geometry shaders: Turn on GS stage support
diff --git a/icd/intel/compiler/pipeline/pipeline_compiler_interface.cpp b/icd/intel/compiler/pipeline/pipeline_compiler_interface.cpp
index cfdcdbc..9fb3fca 100644
--- a/icd/intel/compiler/pipeline/pipeline_compiler_interface.cpp
+++ b/icd/intel/compiler/pipeline/pipeline_compiler_interface.cpp
@@ -159,138 +159,169 @@
   fflush(fp);
 }
 
-static void base_prog_dump(FILE *fp, struct brw_stage_prog_data* base)
+static void base_prog_dump(FILE *fp, struct brw_stage_prog_data* stage_prog_data)
 {
-    fprintf(fp, "data->base.binding_table.size_bytes = %u\n",
-                 base->binding_table.size_bytes);
-    fprintf(fp, "data->base.binding_table.pull_constants_start = %u\n",
-                 base->binding_table.pull_constants_start);
-    fprintf(fp, "data->base.binding_table.texture_start = %u\n",
-                 base->binding_table.texture_start);
-    fprintf(fp, "data->base.binding_table.gather_texture_start = %u\n",
-                 base->binding_table.gather_texture_start);
-    fprintf(fp, "data->base.binding_table.ubo_start = %u\n",
-                 base->binding_table.ubo_start);
-    fprintf(fp, "data->base.binding_table.abo_start = %u\n",
-                 base->binding_table.abo_start);
-    fprintf(fp, "data->base.binding_table.shader_time_start = %u\n",
-                 base->binding_table.shader_time_start);
+    fprintf(fp, "stage_prog_data->binding_table.size_bytes = %u\n",
+                 stage_prog_data->binding_table.size_bytes);
+    fprintf(fp, "stage_prog_data->binding_table.pull_constants_start = %u\n",
+                 stage_prog_data->binding_table.pull_constants_start);
+    fprintf(fp, "stage_prog_data->binding_table.texture_start = %u\n",
+                 stage_prog_data->binding_table.texture_start);
+    fprintf(fp, "stage_prog_data->binding_table.gather_texture_start = %u\n",
+                 stage_prog_data->binding_table.gather_texture_start);
+    fprintf(fp, "stage_prog_data->binding_table.ubo_start = %u\n",
+                 stage_prog_data->binding_table.ubo_start);
+    fprintf(fp, "stage_prog_data->binding_table.abo_start = %u\n",
+                 stage_prog_data->binding_table.abo_start);
+    fprintf(fp, "stage_prog_data->binding_table.shader_time_start = %u\n",
+                 stage_prog_data->binding_table.shader_time_start);
 
-    fprintf(fp, "data->base.nr_params = %u\n",
-                 base->nr_params);
-    fprintf(fp, "data->base.nr_pull_params = %u\n",
-                 base->nr_pull_params);
+    fprintf(fp, "stage_prog_data->nr_params = %u\n",
+                 stage_prog_data->nr_params);
+    fprintf(fp, "stage_prog_data->nr_pull_params = %u\n",
+                 stage_prog_data->nr_pull_params);
 
     fprintf(fp, "== push constants: ==\n");
-    fprintf(fp, "data->base.nr_params = %u\n",
-                 base->nr_params);
+    fprintf(fp, "stage_prog_data->nr_params = %u\n",
+                 stage_prog_data->nr_params);
 
-    for (int i = 0; i < base->nr_params; ++i) {
-        fprintf(fp, "data->base.param = %p\n",
-                     base->param);
-        fprintf(fp, "*data->base.param = %p\n",
-                     *base->param);
-        fprintf(fp, "**data->base.param = %f\n",
-                     **base->param);
+    for (int i = 0; i < stage_prog_data->nr_params; ++i) {
+        fprintf(fp, "stage_prog_data->param = %p\n",
+                     stage_prog_data->param);
+        fprintf(fp, "*stage_prog_data->param = %p\n",
+                     *stage_prog_data->param);
+        fprintf(fp, "**stage_prog_data->param = %f\n",
+                     **stage_prog_data->param);
     }
 
     fprintf(fp, "== pull constants: ==\n");
-    fprintf(fp, "data->base.nr_pull_params = %u\n",
-                 base->nr_pull_params);
+    fprintf(fp, "stage_prog_data->nr_pull_params = %u\n",
+                 stage_prog_data->nr_pull_params);
 
-    for (int i = 0; i < base->nr_pull_params; ++i) {
-        fprintf(fp, "data->base.pull_param = %p\n",
-                     base->pull_param);
-        fprintf(fp, "*data->base.pull_param = %p\n",
-                     *base->pull_param);
-        fprintf(fp, "**data->base.pull_param = %f\n",
-                     **base->pull_param);
+    for (int i = 0; i < stage_prog_data->nr_pull_params; ++i) {
+        fprintf(fp, "stage_prog_data->pull_param = %p\n",
+                     stage_prog_data->pull_param);
+        fprintf(fp, "*stage_prog_data->pull_param = %p\n",
+                     *stage_prog_data->pull_param);
+        fprintf(fp, "**stage_prog_data->pull_param = %f\n",
+                     **stage_prog_data->pull_param);
     }
 }
 
-static void vs_data_dump(FILE *fp, struct brw_vs_prog_data *data)
+
+static void base_vec4_prog_dump(FILE *fp, struct brw_vec4_prog_data* vec4_prog_data)
+{
+    fprintf(fp, "vec4_prog_data->vue_map.slots_valid = 0x%" PRIX64 "\n",
+                 vec4_prog_data->vue_map.slots_valid);
+
+    for (int i = 0; i < BRW_VARYING_SLOT_COUNT; ++i)
+        fprintf(fp, "vec4_prog_data->vue_map.varying_to_slot[%i] = %i\n", i,
+               (int) vec4_prog_data->vue_map.varying_to_slot[i]);
+
+    for (int i = 0; i < BRW_VARYING_SLOT_COUNT; ++i)
+        fprintf(fp, "vec4_prog_data->vue_map.slot_to_varying[%i] = %i\n", i,
+               (int) vec4_prog_data->vue_map.slot_to_varying[i]);
+
+    fprintf(fp, "vec4_prog_data->vue_map.num_slots = %i\n",
+                 vec4_prog_data->vue_map.num_slots);
+    fprintf(fp, "vec4_prog_data->dispatch_grf_start_reg = %u\n",
+                 vec4_prog_data->dispatch_grf_start_reg);
+    fprintf(fp, "vec4_prog_data->curb_read_length = %u\n",
+                 vec4_prog_data->curb_read_length);
+    fprintf(fp, "vec4_prog_data->urb_read_length = %u\n",
+                 vec4_prog_data->urb_read_length);
+    fprintf(fp, "vec4_prog_data->total_grf = %u\n",
+                 vec4_prog_data->total_grf);
+    fprintf(fp, "vec4_prog_data->total_scratch = %u\n",
+                 vec4_prog_data->total_scratch);
+    fprintf(fp, "vec4_prog_data->urb_entry_size = %u\n",
+                 vec4_prog_data->urb_entry_size);
+}
+
+static void vs_data_dump(FILE *fp, struct brw_vs_prog_data *vs_prog_data)
 {
     fprintf(fp, "\n=== begin brw_vs_prog_data ===\n");
 
-    base_prog_dump(fp, &data->base.base);
+    base_prog_dump(fp, &vs_prog_data->base.base);
 
-    fprintf(fp, "data->base.vue_map.slots_valid = 0x%" PRIX64 "\n",
-                 data->base.vue_map.slots_valid);
+    base_vec4_prog_dump(fp, &vs_prog_data->base);
 
-    for (int i = 0; i < BRW_VARYING_SLOT_COUNT; ++i)
-        fprintf(fp, "data->base.vue_map.varying_to_slot[%i] = %i\n", i,
-               (int) data->base.vue_map.varying_to_slot[i]);
-
-    for (int i = 0; i < BRW_VARYING_SLOT_COUNT; ++i)
-        fprintf(fp, "data->base.vue_map.slot_to_varying[%i] = %i\n", i,
-               (int) data->base.vue_map.slot_to_varying[i]);
-
-    fprintf(fp, "data->base.vue_map.num_slots = %i\n",
-                 data->base.vue_map.num_slots);
-    fprintf(fp, "data->base.dispatch_grf_start_reg = %u\n",
-                 data->base.dispatch_grf_start_reg);
-    fprintf(fp, "data->base.curb_read_length = %u\n",
-                 data->base.curb_read_length);
-    fprintf(fp, "data->base.urb_read_length = %u\n",
-                 data->base.urb_read_length);
-    fprintf(fp, "data->base.total_grf = %u\n",
-                 data->base.total_grf);
-    fprintf(fp, "data->base.total_scratch = %u\n",
-                 data->base.total_scratch);
-    fprintf(fp, "data->base.urb_entry_size = %u\n",
-                 data->base.urb_entry_size);
-
-    fprintf(fp, "data->inputs_read = 0x%" PRIX64 "\n",
-                 data->inputs_read);
-    fprintf(fp, "data->uses_vertexid = %s\n",
-                 data->uses_vertexid ? "true" : "false");
-    fprintf(fp, "data->uses_instanceid = %s\n",
-                 data->uses_instanceid ? "true" : "false");
+    fprintf(fp, "vs_prog_data->inputs_read = 0x%" PRIX64 "\n",
+                 vs_prog_data->inputs_read);
+    fprintf(fp, "vs_prog_data->uses_vertexid = %s\n",
+                 vs_prog_data->uses_vertexid ? "true" : "false");
+    fprintf(fp, "vs_prog_data->uses_instanceid = %s\n",
+                 vs_prog_data->uses_instanceid ? "true" : "false");
 
     fprintf(fp, "=== end brw_vs_prog_data ===\n");
 
     fflush(fp);
 }
 
-static void fs_data_dump(FILE *fp, struct brw_wm_prog_data* data)
+static void gs_data_dump(FILE *fp, struct brw_gs_prog_data *gs_prog_data)
+{
+    fprintf(fp, "\n=== begin brw_gs_prog_data ===\n");
+
+    base_prog_dump(fp, &gs_prog_data->base.base);
+
+    base_vec4_prog_dump(fp, &gs_prog_data->base);
+
+    fprintf(fp, "gs_prog_data->output_vertex_size_hwords = %u\n",
+                 gs_prog_data->output_vertex_size_hwords);
+    fprintf(fp, "gs_prog_data->output_topology = %u\n",
+                 gs_prog_data->output_topology);
+    fprintf(fp, "gs_prog_data->control_data_header_size_hwords = %u\n",
+                 gs_prog_data->control_data_header_size_hwords);
+    fprintf(fp, "gs_prog_data->control_data_format = %u\n",
+                 gs_prog_data->control_data_format);
+    fprintf(fp, "gs_prog_data->include_primitive_id = %s\n",
+                 gs_prog_data->include_primitive_id ? "true" : "false");
+    fprintf(fp, "gs_prog_data->invocations = %u\n",
+                 gs_prog_data->invocations);
+    fprintf(fp, "gs_prog_data->dual_instanced_dispatch = %s\n",
+                 gs_prog_data->dual_instanced_dispatch ? "true" : "false");
+
+    fprintf(fp, "=== end brw_gs_prog_data ===\n");
+
+    fflush(fp);
+}
+
+static void fs_data_dump(FILE *fp, struct brw_wm_prog_data* wm_prog_data)
 {
     fprintf(fp, "\n=== begin brw_wm_prog_data ===\n");
 
-    base_prog_dump(fp, &data->base);
+    base_prog_dump(fp, &wm_prog_data->base);
 
-    fprintf(fp, "data->curb_read_length = %u\n",
-                 data->curb_read_length);
-    fprintf(fp, "data->num_varying_inputs = %u\n",
-                 data->num_varying_inputs);
-
-    fprintf(fp, "data->first_curbe_grf = %u\n",
-                 data->first_curbe_grf);
-    fprintf(fp, "data->first_curbe_grf_16 = %u\n",
-                 data->first_curbe_grf_16);
-    fprintf(fp, "data->reg_blocks = %u\n",
-                 data->reg_blocks);
-    fprintf(fp, "data->reg_blocks_16 = %u\n",
-                 data->reg_blocks_16);
-    fprintf(fp, "data->total_scratch = %u\n",
-                 data->total_scratch);
-    fprintf(fp, "data->binding_table.render_target_start = %u\n",
-                 data->binding_table.render_target_start);
-
-    fprintf(fp, "data->dual_src_blend = %s\n",
-                 data->dual_src_blend ? "true" : "false");
-    fprintf(fp, "data->uses_pos_offset = %s\n",
-                 data->uses_pos_offset ? "true" : "false");
-    fprintf(fp, "data->uses_omask = %s\n",
-                 data->uses_omask ? "true" : "false");
-    fprintf(fp, "data->prog_offset_16 = %u\n",
-                 data->prog_offset_16);
-
-    fprintf(fp, "data->barycentric_interp_modes = %u\n",
-                 data->barycentric_interp_modes);
+    fprintf(fp, "wm_prog_data->curb_read_length = %u\n",
+                 wm_prog_data->curb_read_length);
+    fprintf(fp, "wm_prog_data->num_varying_inputs = %u\n",
+                 wm_prog_data->num_varying_inputs);
+    fprintf(fp, "wm_prog_data->first_curbe_grf = %u\n",
+                 wm_prog_data->first_curbe_grf);
+    fprintf(fp, "wm_prog_data->first_curbe_grf_16 = %u\n",
+                 wm_prog_data->first_curbe_grf_16);
+    fprintf(fp, "wm_prog_data->reg_blocks = %u\n",
+                 wm_prog_data->reg_blocks);
+    fprintf(fp, "wm_prog_data->reg_blocks_16 = %u\n",
+                 wm_prog_data->reg_blocks_16);
+    fprintf(fp, "wm_prog_data->total_scratch = %u\n",
+                 wm_prog_data->total_scratch);
+    fprintf(fp, "wm_prog_data->binding_table.render_target_start = %u\n",
+                 wm_prog_data->binding_table.render_target_start);
+    fprintf(fp, "wm_prog_data->dual_src_blend = %s\n",
+                 wm_prog_data->dual_src_blend ? "true" : "false");
+    fprintf(fp, "wm_prog_data->uses_pos_offset = %s\n",
+                 wm_prog_data->uses_pos_offset ? "true" : "false");
+    fprintf(fp, "wm_prog_data->uses_omask = %s\n",
+                 wm_prog_data->uses_omask ? "true" : "false");
+    fprintf(fp, "wm_prog_data->prog_offset_16 = %u\n",
+                 wm_prog_data->prog_offset_16);
+    fprintf(fp, "wm_prog_data->barycentric_interp_modes = %u\n",
+                 wm_prog_data->barycentric_interp_modes);
 
     for (int i = 0; i < VARYING_SLOT_MAX; ++i) {
-        fprintf(fp, "data->urb_setup[%i] = %i\n",
-                  i, data->urb_setup[i]);
+        fprintf(fp, "wm_prog_data->urb_setup[%i] = %i\n",
+                  i, wm_prog_data->urb_setup[i]);
     }
 
     fprintf(fp, "=== end brw_wm_prog_data ===\n");
@@ -603,6 +634,77 @@
         }
             break;
 
+        case GL_GEOMETRY_SHADER:
+        {
+            pipe_shader->codeSize = get_gs_program_size(brw->shader_prog);
+
+            pipe_shader->pCode = pipe_interface_alloc(gpu, pipe_shader->codeSize, 0, VK_SYSTEM_ALLOC_TYPE_INTERNAL_SHADER);
+
+            if (!pipe_shader->pCode) {
+                status = VK_ERROR_OUT_OF_HOST_MEMORY;
+                break;
+            }
+
+            // copy the ISA out of our compile context, it is about to poof away
+            memcpy(pipe_shader->pCode, get_gs_program(brw->shader_prog), pipe_shader->codeSize);
+
+            struct brw_gs_prog_data *data = get_gs_prog_data(brw->shader_prog);
+
+            struct gl_geometry_program *gp = (struct gl_geometry_program *)
+               sh_prog->_LinkedShaders[MESA_SHADER_GEOMETRY]->Program;
+
+            // for now, assume inputs have not changed, but need to hook up vue_map_vs
+            pipe_shader->inputs_read = gp->Base.InputsRead;
+
+            // urb entries are reported in pairs, see vec4_gs_visitor::setup_varying_inputs()
+            pipe_shader->in_count = data->base.urb_read_length * 2;
+
+            pipe_shader->enable_user_clip = sh_prog->Geom.UsesClipDistance;
+            pipe_shader->discard_adj      = (sh_prog->Geom.InputType == GL_LINES ||
+                                             sh_prog->Geom.InputType == GL_TRIANGLES);
+
+            assert(VARYING_SLOT_MAX - VARYING_SLOT_CLIP_DIST0 < 64);
+            uint64_t varyings_written = 0;
+            for (int i=VARYING_SLOT_CLIP_DIST0; i < VARYING_SLOT_MAX; i++) {
+                if (data->base.vue_map.varying_to_slot[i] >= 0) {
+                    varyings_written |= (1 << (i - VARYING_SLOT_CLIP_DIST0));
+                }
+            }
+            pipe_shader->outputs_written = varyings_written;
+            pipe_shader->outputs_offset = BRW_SF_URB_ENTRY_READ_OFFSET * 2;
+            pipe_shader->out_count = data->base.vue_map.num_slots;
+
+            // The following were all programmed in brw_gs_do_compile
+            pipe_shader->output_size_hwords              = data->output_vertex_size_hwords;
+            pipe_shader->output_topology                 = data->output_topology;
+            pipe_shader->control_data_header_size_hwords = data->control_data_header_size_hwords;
+            pipe_shader->control_data_format             = data->control_data_format;
+            pipe_shader->include_primitive_id            = data->include_primitive_id;
+            pipe_shader->invocations                     = data->invocations;
+            pipe_shader->dual_instanced_dispatch         = data->dual_instanced_dispatch;
+
+            // The rest duplicated from VS, merge it and clean up
+            pipe_shader->urb_grf_start = data->base.dispatch_grf_start_reg;
+            pipe_shader->surface_count = data->base.base.binding_table.size_bytes / 4;
+            pipe_shader->ubo_start     = data->base.base.binding_table.ubo_start;
+            pipe_shader->per_thread_scratch_size = data->base.total_scratch;
+
+            status = build_binding_table(gpu, brw, &bt, data->base.base, sh_prog->_LinkedShaders[MESA_SHADER_GEOMETRY], VK_SHADER_STAGE_GEOMETRY);
+            if (status != VK_SUCCESS)
+
+            if (unlikely(INTEL_DEBUG & DEBUG_GS)) {
+                printf("out_count: %d\n", pipe_shader->out_count);
+
+                gs_data_dump(stdout, data);
+
+                fprintf(stdout,"\nISA generated by compiler:\n");
+                fprintf(stdout,"ISA size: %i\n", pipe_shader->codeSize);
+                hexdump(stdout, pipe_shader->pCode, pipe_shader->codeSize);
+                fflush(stdout);
+            }
+        }
+        break;
+
         case GL_FRAGMENT_SHADER:
         {
             // Start pulling bits out of our compile result.
@@ -709,11 +811,9 @@
                 hexdump(stdout, pipe_shader->pCode, pipe_shader->codeSize);
                 fflush(stdout);
             }
-
         }
             break;
 
-        case GL_GEOMETRY_SHADER:
         case GL_COMPUTE_SHADER:
         default:
             assert(0);
diff --git a/icd/intel/compiler/shader/main.cpp b/icd/intel/compiler/shader/main.cpp
index eb16b6c..cba3de5 100644
--- a/icd/intel/compiler/shader/main.cpp
+++ b/icd/intel/compiler/shader/main.cpp
@@ -112,7 +112,7 @@
     const unsigned fileNameLength = strlen(fileName);
     if (fileNameLength < 5 ||
             strncmp(".spv", &fileName[fileNameLength - 4], 4) != 0) {
-        printf("file must be .spv, .vert or .frag\n");
+        printf("file must be .spv, .vert, .geom, or .frag\n");
         return false;
     }
 
@@ -140,16 +140,19 @@
            // Call vkCreateShader on the single shader
 
            printf("Frontend compile %s\n", argv[1]);
+           fflush(stdout);
 
            void *shaderCode;
            size_t size;
 
            if (checkFileExt(argv[1], ".spv")) {
                shaderCode = load_spv_file(argv[1], &size);
-           } else if (checkFileExt(argv[1], ".frag")) {
-               shaderCode = load_glsl_file(argv[1], &size, VK_SHADER_STAGE_FRAGMENT);
            } else if (checkFileExt(argv[1], ".vert")) {
                shaderCode = load_glsl_file(argv[1], &size, VK_SHADER_STAGE_VERTEX);
+           } else if (checkFileExt(argv[1], ".geom")) {
+               shaderCode = load_glsl_file(argv[1], &size, VK_SHADER_STAGE_GEOMETRY);
+           } else if (checkFileExt(argv[1], ".frag")) {
+               shaderCode = load_glsl_file(argv[1], &size, VK_SHADER_STAGE_FRAGMENT);
            } else {
                return EXIT_FAILURE;
            }
@@ -165,6 +168,7 @@
            gpu.gt = 3;
 
            printf("Backend compile %s\n", argv[1]);
+           fflush(stdout);
 
            // struct timespec before;
            // clock_gettime(CLOCK_MONOTONIC, &before);