intel: improve command decoding

Decode both batch and state buffers.  Decoding is enabled with
INTEL_DEBUG=batch for now.  The functionality should be moved to the debug
layer in the future.
diff --git a/icd/intel/CMakeLists.txt b/icd/intel/CMakeLists.txt
index d64d8d4..434b115 100644
--- a/icd/intel/CMakeLists.txt
+++ b/icd/intel/CMakeLists.txt
@@ -19,6 +19,7 @@
 
 SET(SOURCES 
     cmd.c
+    cmd_decode.c
     cmd_mi.c
     cmd_prepare.c
     cmd_pipeline.c
diff --git a/icd/intel/cmd.c b/icd/intel/cmd.c
index 8a93b4a..29e6f82 100644
--- a/icd/intel/cmd.c
+++ b/icd/intel/cmd.c
@@ -53,6 +53,12 @@
     }
 
     writer->used = 0;
+
+    if (writer->items) {
+        icd_free(writer->items);
+        writer->item_alloc = 0;
+        writer->item_used = 0;
+    }
 }
 
 /**
@@ -65,6 +71,7 @@
 
     intel_bo_truncate_relocs(writer->bo, 0);
     writer->used = 0;
+    writer->item_used = 0;
 }
 
 static struct intel_bo *alloc_writer_bo(struct intel_winsys *winsys,
@@ -101,6 +108,7 @@
     }
 
     writer->used = 0;
+    writer->item_used = 0;
 
     writer->ptr = intel_bo_map(writer->bo, true);
     if (!writer->ptr)
@@ -164,6 +172,45 @@
     writer->ptr = new_ptr;
 }
 
+/**
+ * Record an item for later decoding.
+ */
+void cmd_writer_record(struct intel_cmd *cmd,
+                       enum intel_cmd_writer_type which,
+                       enum intel_cmd_item_type type,
+                       XGL_SIZE offset, XGL_SIZE size)
+{
+    struct intel_cmd_writer *writer = &cmd->writers[which];
+    struct intel_cmd_item *item;
+
+    if (writer->item_used == writer->item_alloc) {
+        const unsigned new_alloc = (writer->item_alloc) ?
+            writer->item_alloc << 1 : 256;
+        struct intel_cmd_item *items;
+
+        items = icd_alloc(sizeof(writer->items[0]) * new_alloc,
+                0, XGL_SYSTEM_ALLOC_DEBUG);
+        if (!items) {
+            writer->item_used = 0;
+            cmd->result = XGL_ERROR_OUT_OF_MEMORY;
+            return;
+        }
+
+        memcpy(items, writer->items,
+                sizeof(writer->items[0]) * writer->item_alloc);
+
+        icd_free(writer->items);
+
+        writer->items = items;
+        writer->item_alloc = new_alloc;
+    }
+
+    item = &writer->items[writer->item_used++];
+    item->type = type;
+    item->offset = offset;
+    item->size = size;
+}
+
 static void cmd_writer_patch(struct intel_cmd *cmd,
                              enum intel_cmd_writer_type which,
                              XGL_SIZE offset, uint32_t val)
diff --git a/icd/intel/cmd.h b/icd/intel/cmd.h
index 5443de7..81a38c8 100644
--- a/icd/intel/cmd.h
+++ b/icd/intel/cmd.h
@@ -43,6 +43,7 @@
 struct intel_ds_state;
 struct intel_dset;
 
+struct intel_cmd_item;
 struct intel_cmd_reloc;
 
 /*
@@ -204,6 +205,11 @@
     void *ptr;
 
     XGL_SIZE used;
+
+    /* for decoding */
+    struct intel_cmd_item *items;
+    XGL_UINT item_alloc;
+    XGL_UINT item_used;
 };
 
 struct intel_cmd {
@@ -244,6 +250,8 @@
 XGL_RESULT intel_cmd_begin(struct intel_cmd *cmd, XGL_FLAGS flags);
 XGL_RESULT intel_cmd_end(struct intel_cmd *cmd);
 
+void intel_cmd_decode(struct intel_cmd *cmd);
+
 static inline struct intel_bo *intel_cmd_get_batch(const struct intel_cmd *cmd,
                                                    XGL_GPU_SIZE *used)
 {
diff --git a/icd/intel/cmd_decode.c b/icd/intel/cmd_decode.c
new file mode 100644
index 0000000..cc1e1f8
--- /dev/null
+++ b/icd/intel/cmd_decode.c
@@ -0,0 +1,578 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2014 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chia-I Wu <olv@lunarg.com>
+ */
+
+#include <stdio.h>
+#include <stdarg.h>
+#include "genhw/genhw.h"
+#include "kmd/winsys.h"
+#include "cmd_priv.h"
+
+#define READ(dw, field) (((dw) & field ## __MASK) >> field ## __SHIFT)
+
+static const uint32_t *
+writer_pointer(const struct intel_cmd *cmd,
+               enum intel_cmd_writer_type which,
+               unsigned offset)
+{
+    const struct intel_cmd_writer *writer = &cmd->writers[which];
+    return (const uint32_t *) ((const char *) writer->ptr + offset);
+}
+
+static uint32_t
+writer_dw(const struct intel_cmd *cmd,
+          enum intel_cmd_writer_type which,
+          unsigned offset, unsigned dw_index,
+          const char *format, ...)
+{
+    const uint32_t *dw = writer_pointer(cmd, which, offset);
+    va_list ap;
+    char desc[16];
+    int len;
+
+    fprintf(stderr, "0x%08x:      0x%08x: ",
+            offset + (dw_index << 2), dw[dw_index]);
+
+    va_start(ap, format);
+    len = vsnprintf(desc, sizeof(desc), format, ap);
+    va_end(ap);
+
+    if (len >= sizeof(desc)) {
+        len = sizeof(desc) - 1;
+        desc[len] = '\0';
+    }
+
+    if (desc[len - 1] == '\n') {
+        desc[len - 1] = '\0';
+        fprintf(stderr, "%8s: \n", desc);
+    } else {
+        fprintf(stderr, "%8s: ", desc);
+    }
+
+    return dw[dw_index];
+}
+
+static void
+writer_decode_blob(const struct intel_cmd *cmd,
+                   enum intel_cmd_writer_type which,
+                   const struct intel_cmd_item *item)
+{
+    const unsigned state_size = sizeof(uint32_t) * 4;
+    const unsigned count = item->size / state_size;
+    unsigned offset = item->offset;
+    unsigned i;
+
+    for (i = 0; i < count; i++) {
+        const uint32_t *dw = writer_pointer(cmd, which, offset);
+
+        writer_dw(cmd, which, offset, 0, "BLOB%d", i);
+        /* output a single line for all four DWords */
+        fprintf(stderr, "(% f, % f, % f, % f) "
+                        "(0x%08x, 0x%08x, 0x%08x, 0x%08x)\n",
+                        u_uif(dw[0]), u_uif(dw[1]), u_uif(dw[2]), u_uif(dw[3]),
+                        dw[0], dw[1], dw[2], dw[3]);
+
+        offset += state_size;
+    }
+}
+
+static void
+writer_decode_clip_viewport(const struct intel_cmd *cmd,
+                            enum intel_cmd_writer_type which,
+                            const struct intel_cmd_item *item)
+{
+    const unsigned state_size = sizeof(uint32_t) * 4;
+    const unsigned count = item->size / state_size;
+    unsigned offset = item->offset;
+    unsigned i;
+
+    for (i = 0; i < count; i++) {
+        uint32_t dw;
+
+        dw = writer_dw(cmd, which, offset, 0, "CLIP VP%d", i);
+        fprintf(stderr, "xmin = %f\n", u_uif(dw));
+
+        dw = writer_dw(cmd, which, offset, 1, "CLIP VP%d", i);
+        fprintf(stderr, "xmax = %f\n", u_uif(dw));
+
+        dw = writer_dw(cmd, which, offset, 2, "CLIP VP%d", i);
+        fprintf(stderr, "ymin = %f\n", u_uif(dw));
+
+        dw = writer_dw(cmd, which, offset, 3, "CLIP VP%d", i);
+        fprintf(stderr, "ymax = %f\n", u_uif(dw));
+
+        offset += state_size;
+    }
+}
+
+static void
+writer_decode_sf_clip_viewport_gen7(const struct intel_cmd *cmd,
+                                    enum intel_cmd_writer_type which,
+                                    const struct intel_cmd_item *item)
+{
+    const unsigned state_size = sizeof(uint32_t) * 16;
+    const unsigned count = item->size / state_size;
+    unsigned offset = item->offset;
+    unsigned i;
+
+    for (i = 0; i < count; i++) {
+        uint32_t dw;
+
+        dw = writer_dw(cmd, which, offset, 0, "SF_CLIP VP%d", i);
+        fprintf(stderr, "m00 = %f\n", u_uif(dw));
+
+        dw = writer_dw(cmd, which, offset, 1, "SF_CLIP VP%d", i);
+        fprintf(stderr, "m11 = %f\n", u_uif(dw));
+
+        dw = writer_dw(cmd, which, offset, 2, "SF_CLIP VP%d", i);
+        fprintf(stderr, "m22 = %f\n", u_uif(dw));
+
+        dw = writer_dw(cmd, which, offset, 3, "SF_CLIP VP%d", i);
+        fprintf(stderr, "m30 = %f\n", u_uif(dw));
+
+        dw = writer_dw(cmd, which, offset, 4, "SF_CLIP VP%d", i);
+        fprintf(stderr, "m31 = %f\n", u_uif(dw));
+
+        dw = writer_dw(cmd, which, offset, 5, "SF_CLIP VP%d", i);
+        fprintf(stderr, "m32 = %f\n", u_uif(dw));
+
+        dw = writer_dw(cmd, which, offset, 8, "SF_CLIP VP%d", i);
+        fprintf(stderr, "guardband xmin = %f\n", u_uif(dw));
+
+        dw = writer_dw(cmd, which, offset, 9, "SF_CLIP VP%d", i);
+        fprintf(stderr, "guardband xmax = %f\n", u_uif(dw));
+
+        dw = writer_dw(cmd, which, offset, 10, "SF_CLIP VP%d", i);
+        fprintf(stderr, "guardband ymin = %f\n", u_uif(dw));
+
+        dw = writer_dw(cmd, which, offset, 11, "SF_CLIP VP%d", i);
+        fprintf(stderr, "guardband ymax = %f\n", u_uif(dw));
+
+        offset += state_size;
+    }
+}
+
+static void
+writer_decode_sf_viewport_gen6(const struct intel_cmd *cmd,
+                               enum intel_cmd_writer_type which,
+                               const struct intel_cmd_item *item)
+{
+    const unsigned state_size = sizeof(uint32_t) * 8;
+    const unsigned count = item->size / state_size;
+    unsigned offset = item->offset;
+    unsigned i;
+
+    for (i = 0; i < count; i++) {
+        uint32_t dw;
+
+        dw = writer_dw(cmd, which, offset, 0, "SF VP%d", i);
+        fprintf(stderr, "m00 = %f\n", u_uif(dw));
+
+        dw = writer_dw(cmd, which, offset, 1, "SF VP%d", i);
+        fprintf(stderr, "m11 = %f\n", u_uif(dw));
+
+        dw = writer_dw(cmd, which, offset, 2, "SF VP%d", i);
+        fprintf(stderr, "m22 = %f\n", u_uif(dw));
+
+        dw = writer_dw(cmd, which, offset, 3, "SF VP%d", i);
+        fprintf(stderr, "m30 = %f\n", u_uif(dw));
+
+        dw = writer_dw(cmd, which, offset, 4, "SF VP%d", i);
+        fprintf(stderr, "m31 = %f\n", u_uif(dw));
+
+        dw = writer_dw(cmd, which, offset, 5, "SF VP%d", i);
+        fprintf(stderr, "m32 = %f\n", u_uif(dw));
+
+        offset += state_size;
+    }
+}
+
+static void
+writer_decode_sf_viewport(const struct intel_cmd *cmd,
+                          enum intel_cmd_writer_type which,
+                          const struct intel_cmd_item *item)
+{
+    if (cmd_gen(cmd) >= INTEL_GEN(7))
+        writer_decode_sf_clip_viewport_gen7(cmd, which, item);
+    else
+        writer_decode_sf_viewport_gen6(cmd, which, item);
+}
+
+static void
+writer_decode_scissor_rect(const struct intel_cmd *cmd,
+                           enum intel_cmd_writer_type which,
+                           const struct intel_cmd_item *item)
+{
+    const unsigned state_size = sizeof(uint32_t) * 2;
+    const unsigned count = item->size / state_size;
+    unsigned offset = item->offset;
+    unsigned i;
+
+    for (i = 0; i < count; i++) {
+        uint32_t dw;
+
+        dw = writer_dw(cmd, which, offset, 0, "SCISSOR%d", i);
+        fprintf(stderr, "xmin %d, ymin %d\n",
+                READ(dw, GEN6_SCISSOR_DW0_MIN_X),
+                READ(dw, GEN6_SCISSOR_DW0_MIN_Y));
+
+        dw = writer_dw(cmd, which, offset, 1, "SCISSOR%d", i);
+        fprintf(stderr, "xmax %d, ymax %d\n",
+                READ(dw, GEN6_SCISSOR_DW1_MAX_X),
+                READ(dw, GEN6_SCISSOR_DW1_MAX_Y));
+
+        offset += state_size;
+    }
+}
+
+static void
+writer_decode_cc_viewport(const struct intel_cmd *cmd,
+                          enum intel_cmd_writer_type which,
+                          const struct intel_cmd_item *item)
+{
+    const unsigned state_size = sizeof(uint32_t) * 2;
+    const unsigned count = item->size / state_size;
+    unsigned offset = item->offset;
+    unsigned i;
+
+    for (i = 0; i < count; i++) {
+        uint32_t dw;
+
+        dw = writer_dw(cmd, which, offset, 0, "CC VP%d", i);
+        fprintf(stderr, "min_depth = %f\n", u_uif(dw));
+
+        dw = writer_dw(cmd, which, offset, 1, "CC VP%d", i);
+        fprintf(stderr, "max_depth = %f\n", u_uif(dw));
+
+        offset += state_size;
+    }
+}
+
+static void
+writer_decode_color_calc(const struct intel_cmd *cmd,
+                         enum intel_cmd_writer_type which,
+                         const struct intel_cmd_item *item)
+{
+    uint32_t dw;
+
+    dw = writer_dw(cmd, which, item->offset, 0, "CC");
+    fprintf(stderr, "alpha test format %s, round disable %d, "
+            "stencil ref %d, bf stencil ref %d\n",
+            READ(dw, GEN6_CC_DW0_ALPHATEST) ? "FLOAT32" : "UNORM8",
+            (bool) (dw & GEN6_CC_DW0_ROUND_DISABLE_DISABLE),
+            READ(dw, GEN6_CC_DW0_STENCIL0_REF),
+            READ(dw, GEN6_CC_DW0_STENCIL1_REF));
+
+    writer_dw(cmd, which, item->offset, 1, "CC\n");
+
+    dw = writer_dw(cmd, which, item->offset, 2, "CC");
+    fprintf(stderr, "constant red %f\n", u_uif(dw));
+
+    dw = writer_dw(cmd, which, item->offset, 3, "CC");
+    fprintf(stderr, "constant green %f\n", u_uif(dw));
+
+    dw = writer_dw(cmd, which, item->offset, 4, "CC");
+    fprintf(stderr, "constant blue %f\n", u_uif(dw));
+
+    dw = writer_dw(cmd, which, item->offset, 5, "CC");
+    fprintf(stderr, "constant alpha %f\n", u_uif(dw));
+}
+
+static void
+writer_decode_depth_stencil(const struct intel_cmd *cmd,
+                            enum intel_cmd_writer_type which,
+                            const struct intel_cmd_item *item)
+{
+    uint32_t dw;
+
+    dw = writer_dw(cmd, which, item->offset, 0, "D_S");
+    fprintf(stderr, "stencil %sable, func %d, write %sable\n",
+            (dw & GEN6_ZS_DW0_STENCIL_TEST_ENABLE) ? "en" : "dis",
+            READ(dw, GEN6_ZS_DW0_STENCIL0_FUNC),
+            (dw & GEN6_ZS_DW0_STENCIL_WRITE_ENABLE) ? "en" : "dis");
+
+    dw = writer_dw(cmd, which, item->offset, 1, "D_S");
+    fprintf(stderr, "stencil test mask 0x%x, write mask 0x%x\n",
+            READ(dw, GEN6_ZS_DW1_STENCIL0_VALUEMASK),
+            READ(dw, GEN6_ZS_DW1_STENCIL0_WRITEMASK));
+
+    dw = writer_dw(cmd, which, item->offset, 2, "D_S");
+    fprintf(stderr, "depth test %sable, func %d, write %sable\n",
+            (dw & GEN6_ZS_DW2_DEPTH_TEST_ENABLE) ? "en" : "dis",
+            READ(dw, GEN6_ZS_DW2_DEPTH_FUNC),
+            (dw & GEN6_ZS_DW2_DEPTH_WRITE_ENABLE) ? "en" : "dis");
+}
+
+static void
+writer_decode_blend(const struct intel_cmd *cmd,
+                    enum intel_cmd_writer_type which,
+                    const struct intel_cmd_item *item)
+{
+    const unsigned state_size = sizeof(uint32_t) * 2;
+    const unsigned count = item->size / state_size;
+    unsigned offset = item->offset;
+    unsigned i;
+
+    for (i = 0; i < count; i++) {
+        writer_dw(cmd, which, offset, 0, "BLEND%d\n", i);
+        writer_dw(cmd, which, offset, 1, "BLEND%d\n", i);
+
+        offset += state_size;
+    }
+}
+
+static void
+writer_decode_sampler(const struct intel_cmd *cmd,
+                      enum intel_cmd_writer_type which,
+                      const struct intel_cmd_item *item)
+{
+    const unsigned state_size = sizeof(uint32_t) * 4;
+    const unsigned count = item->size / state_size;
+    unsigned offset = item->offset;
+    unsigned i;
+
+    for (i = 0; i < count; i++) {
+        writer_dw(cmd, which, offset, 0, "WM SAMP%d", i);
+        fprintf(stderr, "filtering\n");
+
+        writer_dw(cmd, which, offset, 1, "WM SAMP%d", i);
+        fprintf(stderr, "wrapping, lod\n");
+
+        writer_dw(cmd, which, offset, 2, "WM SAMP%d", i);
+        fprintf(stderr, "default color pointer\n");
+
+        writer_dw(cmd, which, offset, 3, "WM SAMP%d", i);
+        fprintf(stderr, "chroma key, aniso\n");
+
+        offset += state_size;
+    }
+}
+
+static void
+writer_decode_surface_gen7(const struct intel_cmd *cmd,
+                           enum intel_cmd_writer_type which,
+                           const struct intel_cmd_item *item)
+{
+    uint32_t dw;
+
+    dw = writer_dw(cmd, which, item->offset, 0, "SURF");
+    fprintf(stderr, "type 0x%x, format 0x%x, tiling %d, %s array\n",
+            READ(dw, GEN7_SURFACE_DW0_TYPE),
+            READ(dw, GEN7_SURFACE_DW0_FORMAT),
+            READ(dw, GEN7_SURFACE_DW0_TILING),
+            (dw & GEN7_SURFACE_DW0_IS_ARRAY) ? "is" : "not");
+
+    writer_dw(cmd, which, item->offset, 1, "SURF");
+    fprintf(stderr, "offset\n");
+
+    dw = writer_dw(cmd, which, item->offset, 2, "SURF");
+    fprintf(stderr, "%dx%d size\n",
+            READ(dw, GEN7_SURFACE_DW2_WIDTH),
+            READ(dw, GEN7_SURFACE_DW2_HEIGHT));
+
+    dw = writer_dw(cmd, which, item->offset, 3, "SURF");
+    fprintf(stderr, "depth %d, pitch %d\n",
+            READ(dw, GEN7_SURFACE_DW3_DEPTH),
+            READ(dw, GEN7_SURFACE_DW3_PITCH));
+
+    dw = writer_dw(cmd, which, item->offset, 4, "SURF");
+    fprintf(stderr, "min array element %d, array extent %d\n",
+            READ(dw, GEN7_SURFACE_DW4_MIN_ARRAY_ELEMENT),
+            READ(dw, GEN7_SURFACE_DW4_RT_VIEW_EXTENT));
+
+    dw = writer_dw(cmd, which, item->offset, 5, "SURF");
+    fprintf(stderr, "mip base %d, mips %d, x,y offset: %d,%d\n",
+            READ(dw, GEN7_SURFACE_DW5_MIN_LOD),
+            READ(dw, GEN7_SURFACE_DW5_MIP_COUNT_LOD),
+            READ(dw, GEN7_SURFACE_DW5_X_OFFSET),
+            READ(dw, GEN7_SURFACE_DW5_Y_OFFSET));
+
+    writer_dw(cmd, which, item->offset, 6, "SURF\n");
+    writer_dw(cmd, which, item->offset, 7, "SURF\n");
+}
+
+static void
+writer_decode_surface_gen6(const struct intel_cmd *cmd,
+                           enum intel_cmd_writer_type which,
+                           const struct intel_cmd_item *item)
+{
+    uint32_t dw;
+
+    dw = writer_dw(cmd, which, item->offset, 0, "SURF");
+    fprintf(stderr, "type 0x%x, format 0x%x\n",
+            READ(dw, GEN6_SURFACE_DW0_TYPE),
+            READ(dw, GEN6_SURFACE_DW0_FORMAT));
+
+    writer_dw(cmd, which, item->offset, 1, "SURF");
+    fprintf(stderr, "offset\n");
+
+    dw = writer_dw(cmd, which, item->offset, 2, "SURF");
+    fprintf(stderr, "%dx%d size, %d mips\n",
+            READ(dw, GEN6_SURFACE_DW2_WIDTH),
+            READ(dw, GEN6_SURFACE_DW2_HEIGHT),
+            READ(dw, GEN6_SURFACE_DW2_MIP_COUNT_LOD));
+
+    dw = writer_dw(cmd, which, item->offset, 3, "SURF");
+    fprintf(stderr, "pitch %d, tiling %d\n",
+            READ(dw, GEN6_SURFACE_DW3_PITCH),
+            READ(dw, GEN6_SURFACE_DW3_TILING));
+
+    dw = writer_dw(cmd, which, item->offset, 4, "SURF");
+    fprintf(stderr, "mip base %d\n",
+            READ(dw, GEN6_SURFACE_DW4_MIN_LOD));
+
+    dw = writer_dw(cmd, which, item->offset, 5, "SURF");
+    fprintf(stderr, "x,y offset: %d,%d\n",
+            READ(dw, GEN6_SURFACE_DW5_X_OFFSET),
+            READ(dw, GEN6_SURFACE_DW5_Y_OFFSET));
+}
+
+static void
+writer_decode_surface(const struct intel_cmd *cmd,
+                      enum intel_cmd_writer_type which,
+                      const struct intel_cmd_item *item)
+{
+    if (cmd_gen(cmd) >= INTEL_GEN(7))
+        writer_decode_surface_gen7(cmd, which, item);
+    else
+        writer_decode_surface_gen6(cmd, which, item);
+}
+
+static void
+writer_decode_binding_table(const struct intel_cmd *cmd,
+                            enum intel_cmd_writer_type which,
+                            const struct intel_cmd_item *item)
+{
+    const unsigned state_size = sizeof(uint32_t) * 1;
+    const unsigned count = item->size / state_size;
+    unsigned offset = item->offset;
+    unsigned i;
+
+    for (i = 0; i < count; i++) {
+        writer_dw(cmd, which, offset, 0, "BIND");
+        fprintf(stderr, "BINDING_TABLE_STATE[%d]\n", i);
+
+        offset += state_size;
+    }
+}
+
+static void
+writer_decode_kernel(const struct intel_cmd *cmd,
+                     enum intel_cmd_writer_type which,
+                     const struct intel_cmd_item *item)
+{
+}
+
+static const struct {
+    void (*func)(const struct intel_cmd *cmd,
+                 enum intel_cmd_writer_type which,
+                 const struct intel_cmd_item *item);
+} writer_decode_table[INTEL_CMD_ITEM_COUNT] = {
+    [INTEL_CMD_ITEM_BLOB]                = { writer_decode_blob },
+    [INTEL_CMD_ITEM_CLIP_VIEWPORT]       = { writer_decode_clip_viewport },
+    [INTEL_CMD_ITEM_SF_VIEWPORT]         = { writer_decode_sf_viewport },
+    [INTEL_CMD_ITEM_SCISSOR_RECT]        = { writer_decode_scissor_rect },
+    [INTEL_CMD_ITEM_CC_VIEWPORT]         = { writer_decode_cc_viewport },
+    [INTEL_CMD_ITEM_COLOR_CALC]          = { writer_decode_color_calc },
+    [INTEL_CMD_ITEM_DEPTH_STENCIL]       = { writer_decode_depth_stencil },
+    [INTEL_CMD_ITEM_BLEND]               = { writer_decode_blend },
+    [INTEL_CMD_ITEM_SAMPLER]             = { writer_decode_sampler },
+    [INTEL_CMD_ITEM_SURFACE]             = { writer_decode_surface },
+    [INTEL_CMD_ITEM_BINDING_TABLE]       = { writer_decode_binding_table },
+    [INTEL_CMD_ITEM_KERNEL]              = { writer_decode_kernel },
+};
+
+static void cmd_writer_decode_items(struct intel_cmd *cmd,
+                                    enum intel_cmd_writer_type which)
+{
+    struct intel_cmd_writer *writer = &cmd->writers[which];
+    int i;
+
+    if (!writer->item_used)
+        return;
+
+    writer->ptr = intel_bo_map(writer->bo, false);
+    if (!writer->ptr)
+        return;
+
+    for (i = 0; i < writer->item_used; i++) {
+        const struct intel_cmd_item *item = &writer->items[i];
+
+        writer_decode_table[item->type].func(cmd, which, item);
+    }
+
+    intel_bo_unmap(writer->bo);
+}
+
+static void cmd_writer_decode(struct intel_cmd *cmd,
+                              enum intel_cmd_writer_type which)
+{
+    struct intel_cmd_writer *writer = &cmd->writers[which];
+
+    assert(writer->bo && !writer->ptr);
+
+    switch (which) {
+    case INTEL_CMD_WRITER_BATCH:
+        fprintf(stderr, "decoding batch buffer: %d bytes\n", writer->used);
+        if (writer->used) {
+            intel_winsys_decode_bo(cmd->dev->winsys,
+                    writer->bo, writer->used);
+        }
+        break;
+    case INTEL_CMD_WRITER_STATE:
+        fprintf(stderr, "decoding state buffer: %d states\n",
+                writer->item_used);
+        cmd_writer_decode_items(cmd, which);
+        break;
+    case INTEL_CMD_WRITER_INSTRUCTION:
+        if (true) {
+            fprintf(stderr, "skipping instruction buffer: %d kernels\n",
+                    writer->item_used);
+        } else {
+            fprintf(stderr, "decoding instruction buffer: %d kernels\n",
+                    writer->item_used);
+
+            cmd_writer_decode_items(cmd, which);
+        }
+        break;
+    default:
+        break;
+    }
+}
+
+/**
+ * Decode according to the recorded items.  This can be called only after a
+ * successful intel_cmd_end().
+ */
+void intel_cmd_decode(struct intel_cmd *cmd)
+{
+    int i;
+
+    assert(cmd->result == XGL_SUCCESS);
+
+    for (i = 0; i < INTEL_CMD_WRITER_COUNT; i++)
+        cmd_writer_decode(cmd, i);
+}
diff --git a/icd/intel/cmd_pipeline.c b/icd/intel/cmd_pipeline.c
index a810829..abd5cde 100644
--- a/icd/intel/cmd_pipeline.c
+++ b/icd/intel/cmd_pipeline.c
@@ -1000,7 +1000,8 @@
     CMD_ASSERT(cmd, 6, 7.5);
     STATIC_ASSERT(ARRAY_SIZE(state->cmd) >= cmd_len);
 
-    return cmd_state_write(cmd, cmd_align, cmd_len, state->cmd);
+    return cmd_state_write(cmd, INTEL_CMD_ITEM_BLEND,
+            cmd_align, cmd_len, state->cmd);
 }
 
 static uint32_t gen6_DEPTH_STENCIL_STATE(struct intel_cmd *cmd,
@@ -1012,7 +1013,8 @@
     CMD_ASSERT(cmd, 6, 7.5);
     STATIC_ASSERT(ARRAY_SIZE(state->cmd) >= cmd_len);
 
-    return cmd_state_write(cmd, cmd_align, cmd_len, state->cmd);
+    return cmd_state_write(cmd, INTEL_CMD_ITEM_DEPTH_STENCIL,
+            cmd_align, cmd_len, state->cmd);
 }
 
 static uint32_t gen6_COLOR_CALC_STATE(struct intel_cmd *cmd,
@@ -1025,7 +1027,8 @@
 
     CMD_ASSERT(cmd, 6, 7.5);
 
-    offset = cmd_state_pointer(cmd, cmd_align, cmd_len, &dw);
+    offset = cmd_state_pointer(cmd, INTEL_CMD_ITEM_COLOR_CALC,
+            cmd_align, cmd_len, &dw);
     dw[0] = stencil_ref;
     dw[1] = 0;
     dw[2] = blend_color[0];
@@ -1296,7 +1299,8 @@
     if (!viewport)
         return;
 
-    offset = cmd_state_write(cmd, viewport->cmd_align * 4,
+    offset = cmd_state_write(cmd, INTEL_CMD_ITEM_SF_VIEWPORT,
+            viewport->cmd_align * 4,
             viewport->cmd_len, viewport->cmd);
 
     gen6_3DSTATE_VIEWPORT_STATE_POINTERS(cmd,
@@ -1355,8 +1359,8 @@
     if (!viewport)
         return;
 
-    offset = cmd_state_write(cmd, viewport->cmd_align * 4,
-            viewport->cmd_len, viewport->cmd);
+    offset = cmd_state_write(cmd, INTEL_CMD_ITEM_SF_VIEWPORT,
+            viewport->cmd_align * 4, viewport->cmd_len, viewport->cmd);
 
     gen7_3dstate_pointer(cmd,
             GEN7_RENDER_OPCODE_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP,
@@ -1398,7 +1402,7 @@
         const XGL_SIZE alignment = 32;
         const XGL_SIZE size = u_align(sh->pcb_size, alignment);
 
-        offset = cmd_state_pointer(cmd, alignment,
+        offset = cmd_state_pointer(cmd, INTEL_CMD_ITEM_BLOB, alignment,
                 size / sizeof(uint32_t), &dw);
         memcpy(dw, sh->pcb, sh->pcb_size);
 
@@ -1437,7 +1441,7 @@
 
         pcb_len = size / alignment;
 
-        offset = cmd_state_pointer(cmd, alignment,
+        offset = cmd_state_pointer(cmd, INTEL_CMD_ITEM_BLOB, alignment,
                 size / sizeof(uint32_t), &dw);
         memcpy(dw, sh->pcb, sh->pcb_size);
     }
@@ -1473,7 +1477,7 @@
             {
                 const struct intel_rt_view *view = cmd->bind.att.rt[i];
 
-                offset = cmd_surface_write(cmd,
+                offset = cmd_surface_write(cmd, INTEL_CMD_ITEM_SURFACE,
                         GEN6_ALIGNMENT_SURFACE_STATE * 4,
                         view->cmd_len, view->cmd);
 
@@ -1487,7 +1491,7 @@
                 const struct intel_mem_view *view =
                     &cmd->bind.dyn_view.graphics;
 
-                offset = cmd_surface_write(cmd,
+                offset = cmd_surface_write(cmd, INTEL_CMD_ITEM_SURFACE,
                         GEN6_ALIGNMENT_SURFACE_STATE * 4,
                         view->cmd_len, view->cmd);
 
@@ -1506,7 +1510,8 @@
         binding_table[i] = offset;
     }
 
-    offset = cmd_state_write(cmd, GEN6_ALIGNMENT_BINDING_TABLE_STATE * 4,
+    offset = cmd_state_write(cmd, INTEL_CMD_ITEM_BINDING_TABLE,
+            GEN6_ALIGNMENT_BINDING_TABLE_STATE * 4,
             surface_count, binding_table);
 
     if (cmd_gen(cmd) >= INTEL_GEN(7)) {
diff --git a/icd/intel/cmd_priv.h b/icd/intel/cmd_priv.h
index 6fbd524..eb9cf76 100644
--- a/icd/intel/cmd_priv.h
+++ b/icd/intel/cmd_priv.h
@@ -36,6 +36,34 @@
 #define CMD_ASSERT(cmd, min_gen, max_gen) \
     INTEL_GPU_ASSERT((cmd)->dev->gpu, (min_gen), (max_gen))
 
+enum intel_cmd_item_type {
+    /* for state buffer */
+    INTEL_CMD_ITEM_BLOB,
+    INTEL_CMD_ITEM_CLIP_VIEWPORT,
+    INTEL_CMD_ITEM_SF_VIEWPORT,
+    INTEL_CMD_ITEM_SCISSOR_RECT,
+    INTEL_CMD_ITEM_CC_VIEWPORT,
+    INTEL_CMD_ITEM_COLOR_CALC,
+    INTEL_CMD_ITEM_DEPTH_STENCIL,
+    INTEL_CMD_ITEM_BLEND,
+    INTEL_CMD_ITEM_SAMPLER,
+
+    /* for surface buffer */
+    INTEL_CMD_ITEM_SURFACE,
+    INTEL_CMD_ITEM_BINDING_TABLE,
+
+    /* for instruction buffer */
+    INTEL_CMD_ITEM_KERNEL,
+
+    INTEL_CMD_ITEM_COUNT,
+};
+
+struct intel_cmd_item {
+    enum intel_cmd_item_type type;
+    XGL_SIZE offset;
+    XGL_SIZE size;
+};
+
 struct intel_cmd_reloc {
     enum intel_cmd_writer_type which;
     XGL_SIZE offset;
@@ -66,6 +94,11 @@
                      enum intel_cmd_writer_type which,
                      XGL_SIZE new_size);
 
+void cmd_writer_record(struct intel_cmd *cmd,
+                       enum intel_cmd_writer_type which,
+                       enum intel_cmd_item_type type,
+                       XGL_SIZE offset, XGL_SIZE size);
+
 /**
  * Return an offset to a region that is aligned to \p alignment and has at
  * least \p size bytes.
@@ -119,6 +152,7 @@
  * Note that \p alignment is in bytes and \p len is in DWords.
  */
 static inline uint32_t cmd_state_pointer(struct intel_cmd *cmd,
+                                         enum intel_cmd_item_type item,
                                          XGL_SIZE alignment, XGL_UINT len,
                                          uint32_t **dw)
 {
@@ -134,6 +168,9 @@
 
     writer->used = offset + size;
 
+    if (intel_debug & INTEL_DEBUG_BATCH)
+        cmd_writer_record(cmd, which, item, offset, size);
+
     return offset;
 }
 
@@ -141,12 +178,13 @@
  * Write a dynamic state to the state buffer.
  */
 static inline uint32_t cmd_state_write(struct intel_cmd *cmd,
+                                       enum intel_cmd_item_type item,
                                        XGL_SIZE alignment, XGL_UINT len,
                                        const uint32_t *dw)
 {
     uint32_t offset, *dst;
 
-    offset = cmd_state_pointer(cmd, alignment, len, &dst);
+    offset = cmd_state_pointer(cmd, item, alignment, len, &dst);
     memcpy(dst, dw, len << 2);
 
     return offset;
@@ -159,10 +197,14 @@
  * Note that \p alignment is in bytes and \p len is in DWords.
  */
 static inline uint32_t cmd_surface_write(struct intel_cmd *cmd,
+                                         enum intel_cmd_item_type item,
                                          XGL_SIZE alignment, XGL_UINT len,
                                          const uint32_t *dw)
 {
-    return cmd_state_write(cmd, alignment, len, dw);
+    assert(item == INTEL_CMD_ITEM_SURFACE ||
+           item == INTEL_CMD_ITEM_BINDING_TABLE);
+
+    return cmd_state_write(cmd, item, alignment, len, dw);
 }
 
 /**
@@ -208,6 +250,9 @@
 
     writer->used = offset + size;
 
+    if (intel_debug & INTEL_DEBUG_BATCH)
+        cmd_writer_record(cmd, which, INTEL_CMD_ITEM_KERNEL, offset, size);
+
     return offset;
 }
 
diff --git a/icd/intel/queue.c b/icd/intel/queue.c
index 2a8fba1..d9c9e95 100644
--- a/icd/intel/queue.c
+++ b/icd/intel/queue.c
@@ -39,9 +39,6 @@
     struct intel_winsys *winsys = queue->dev->winsys;
     int err;
 
-    if (intel_debug & INTEL_DEBUG_BATCH)
-        intel_winsys_decode_bo(winsys, bo, used);
-
     if (intel_debug & INTEL_DEBUG_NOHW)
         err = 0;
     else
@@ -275,6 +272,9 @@
         ret = queue_submit_bo(queue, bo, used);
         queue->last_submitted_cmd = cmd;
 
+        if (intel_debug & INTEL_DEBUG_BATCH)
+            intel_cmd_decode(cmd);
+
         if (ret != XGL_SUCCESS)
             break;
     }