src/gallium/drivers/svga/svga_shader.c - platform/external/mesa3d - Gitiles

 /**********************************************************
  * Copyright 2008-2012 VMware, Inc.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person
  * obtaining a copy of this software and associated documentation
  * files (the "Software"), to deal in the Software without
  * restriction, including without limitation the rights to use, copy,
  * modify, merge, publish, distribute, sublicense, and/or sell copies
  * of the Software, and to permit persons to whom the Software is
  * furnished to do so, subject to the following conditions:
  *
  * The above copyright notice and this permission notice shall be
  * included in all copies or substantial portions of the Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  *
  **********************************************************/

 #include "util/u_bitmask.h"
 #include "util/u_memory.h"
 #include "util/u_format.h"
 #include "svga_context.h"
 #include "svga_cmd.h"
 #include "svga_format.h"
 #include "svga_shader.h"
 #include "svga_resource_texture.h"


 /**
  * This bit isn't really used anywhere.  It only serves to help
  * generate a unique "signature" for the vertex shader output bitmask.
  * Shader input/output signatures are used to resolve shader linking
  * issues.
  */
 #define FOG_GENERIC_BIT (((uint64_t) 1) << 63)


 /**
  * Use the shader info to generate a bitmask indicating which generic
  * inputs are used by the shader.  A set bit indicates that GENERIC[i]
  * is used.
  */
 uint64_t
 svga_get_generic_inputs_mask(const struct tgsi_shader_info *info)
 {
    unsigned i;
    uint64_t mask = 0x0;

    for (i = 0; i < info->num_inputs; i++) {
       if (info->input_semantic_name[i] == TGSI_SEMANTIC_GENERIC) {
          unsigned j = info->input_semantic_index[i];
          assert(j < sizeof(mask) * 8);
          mask |= ((uint64_t) 1) << j;
       }
    }

    return mask;
 }


 /**
  * Scan shader info to return a bitmask of written outputs.
  */
 uint64_t
 svga_get_generic_outputs_mask(const struct tgsi_shader_info *info)
 {
    unsigned i;
    uint64_t mask = 0x0;

    for (i = 0; i < info->num_outputs; i++) {
       switch (info->output_semantic_name[i]) {
       case TGSI_SEMANTIC_GENERIC:
          {
             unsigned j = info->output_semantic_index[i];
             assert(j < sizeof(mask) * 8);
             mask |= ((uint64_t) 1) << j;
          }
          break;
       case TGSI_SEMANTIC_FOG:
          mask |= FOG_GENERIC_BIT;
          break;
       }
    }

    return mask;
 }


 /**
  * Given a mask of used generic variables (as returned by the above functions)
  * fill in a table which maps those indexes to small integers.
  * This table is used by the remap_generic_index() function in
  * svga_tgsi_decl_sm30.c
  * Example: if generics_mask = binary(1010) it means that GENERIC[1] and
  * GENERIC[3] are used.  The remap_table will contain:
  *   table[1] = 0;
  *   table[3] = 1;
  * The remaining table entries will be filled in with the next unused
  * generic index (in this example, 2).
  */
 void
 svga_remap_generics(uint64_t generics_mask,
                     int8_t remap_table[MAX_GENERIC_VARYING])
 {
    /* Note texcoord[0] is reserved so start at 1 */
    unsigned count = 1, i;

    for (i = 0; i < MAX_GENERIC_VARYING; i++) {
       remap_table[i] = -1;
    }

    /* for each bit set in generic_mask */
    while (generics_mask) {
       unsigned index = ffsll(generics_mask) - 1;
       remap_table[index] = count++;
       generics_mask &= ~((uint64_t) 1 << index);
    }
 }


 /**
  * Use the generic remap table to map a TGSI generic varying variable
  * index to a small integer.  If the remapping table doesn't have a
  * valid value for the given index (the table entry is -1) it means
  * the fragment shader doesn't use that VS output.  Just allocate
  * the next free value in that case.  Alternately, we could cull
  * VS instructions that write to register, or replace the register
  * with a dummy temp register.
  * XXX TODO: we should do one of the later as it would save precious
  * texcoord registers.
  */
 int
 svga_remap_generic_index(int8_t remap_table[MAX_GENERIC_VARYING],
                          int generic_index)
 {
    assert(generic_index < MAX_GENERIC_VARYING);

    if (generic_index >= MAX_GENERIC_VARYING) {
       /* just don't return a random/garbage value */
       generic_index = MAX_GENERIC_VARYING - 1;
    }

    if (remap_table[generic_index] == -1) {
       /* This is a VS output that has no matching PS input.  Find a
        * free index.
        */
       int i, max = 0;
       for (i = 0; i < MAX_GENERIC_VARYING; i++) {
          max = MAX2(max, remap_table[i]);
       }
       remap_table[generic_index] = max + 1;
    }

    return remap_table[generic_index];
 }

 static const enum pipe_swizzle copy_alpha[PIPE_SWIZZLE_MAX] = {
    PIPE_SWIZZLE_X,
    PIPE_SWIZZLE_Y,
    PIPE_SWIZZLE_Z,
    PIPE_SWIZZLE_W,
    PIPE_SWIZZLE_0,
    PIPE_SWIZZLE_1,
    PIPE_SWIZZLE_NONE
 };

 static const enum pipe_swizzle set_alpha[PIPE_SWIZZLE_MAX] = {
    PIPE_SWIZZLE_X,
    PIPE_SWIZZLE_Y,
    PIPE_SWIZZLE_Z,
    PIPE_SWIZZLE_1,
    PIPE_SWIZZLE_0,
    PIPE_SWIZZLE_1,
    PIPE_SWIZZLE_NONE
 };

 /**
  * Initialize the shader-neutral fields of svga_compile_key from context
  * state.  This is basically the texture-related state.
  */
 void
 svga_init_shader_key_common(const struct svga_context *svga,
                             enum pipe_shader_type shader,
                             struct svga_compile_key *key)
 {
    unsigned i, idx = 0;

    assert(shader < ARRAY_SIZE(svga->curr.num_sampler_views));

    /* In case the number of samplers and sampler_views doesn't match,
     * loop over the lower of the two counts.
     */
    key->num_textures = MAX2(svga->curr.num_sampler_views[shader],
                             svga->curr.num_samplers[shader]);

    for (i = 0; i < key->num_textures; i++) {
       struct pipe_sampler_view *view = svga->curr.sampler_views[shader][i];
       const struct svga_sampler_state *sampler = svga->curr.sampler[shader][i];
       if (view) {
          assert(view->texture);
          assert(view->texture->target < (1 << 4)); /* texture_target:4 */

          /* 1D/2D array textures with one slice are treated as non-arrays
           * by the SVGA3D device.  Convert the texture type here so that
           * we emit the right TEX/SAMPLE instruction in the shader.
           */
          if (view->texture->target == PIPE_TEXTURE_1D_ARRAY ||
              view->texture->target == PIPE_TEXTURE_2D_ARRAY) {
             if (view->texture->array_size == 1) {
                key->tex[i].is_array = 0;
             }
             else {
                assert(view->texture->array_size > 1);
                key->tex[i].is_array = 1;
             }
          }

          /* If we have a non-alpha view into an svga3d surface with an
           * alpha channel, then explicitly set the alpha channel to 1
           * when sampling. Note that we need to check the
           * actual device format to cover also imported surface cases.
           */
          const enum pipe_swizzle *swizzle_tab =
             (view->texture->target != PIPE_BUFFER &&
              !util_format_has_alpha(view->format) &&
              svga_texture_device_format_has_alpha(view->texture)) ?
             set_alpha : copy_alpha;

          key->tex[i].swizzle_r = swizzle_tab[view->swizzle_r];
          key->tex[i].swizzle_g = swizzle_tab[view->swizzle_g];
          key->tex[i].swizzle_b = swizzle_tab[view->swizzle_b];
          key->tex[i].swizzle_a = swizzle_tab[view->swizzle_a];
       }

       if (sampler) {
          if (!sampler->normalized_coords) {
             assert(idx < (1 << 5));  /* width_height_idx:5 bitfield */
             key->tex[i].width_height_idx = idx++;
             key->tex[i].unnormalized = TRUE;
             ++key->num_unnormalized_coords;

             if (sampler->magfilter == SVGA3D_TEX_FILTER_NEAREST ||
                 sampler->minfilter == SVGA3D_TEX_FILTER_NEAREST) {
                 key->tex[i].texel_bias = TRUE;
             }
          }
       }
    }
 }


 /** Search for a compiled shader variant with the same compile key */
 struct svga_shader_variant *
 svga_search_shader_key(const struct svga_shader *shader,
                        const struct svga_compile_key *key)
 {
    struct svga_shader_variant *variant = shader->variants;

    assert(key);

    for ( ; variant; variant = variant->next) {
       if (svga_compile_keys_equal(key, &variant->key))
          return variant;
    }
    return NULL;
 }

 /** Search for a shader with the same token key */
 struct svga_shader *
 svga_search_shader_token_key(struct svga_shader *pshader,
                              const struct svga_token_key *key)
 {
    struct svga_shader *shader = pshader;

    assert(key);

    for ( ; shader; shader = shader->next) {
       if (memcmp(key, &shader->token_key, sizeof(struct svga_token_key)) == 0)
          return shader;
    }
    return NULL;
 }

 /**
  * Helper function to define a gb shader for non-vgpu10 device
  */
 static enum pipe_error
 define_gb_shader_vgpu9(struct svga_context *svga,
                        SVGA3dShaderType type,
                        struct svga_shader_variant *variant,
                        unsigned codeLen)
 {
    struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
    enum pipe_error ret;

    /**
     * Create gb memory for the shader and upload the shader code.
     * Kernel module will allocate an id for the shader and issue
     * the DefineGBShader command.
     */
    variant->gb_shader = sws->shader_create(sws, type,
                                            variant->tokens, codeLen);

    if (!variant->gb_shader)
       return PIPE_ERROR_OUT_OF_MEMORY;

    ret = SVGA3D_BindGBShader(svga->swc, variant->gb_shader);

    return ret;
 }

 /**
  * Helper function to define a gb shader for vgpu10 device
  */
 static enum pipe_error
 define_gb_shader_vgpu10(struct svga_context *svga,
                         SVGA3dShaderType type,
                         struct svga_shader_variant *variant,
                         unsigned codeLen)
 {
    struct svga_winsys_context *swc = svga->swc;
    enum pipe_error ret;

    /**
     * Shaders in VGPU10 enabled device reside in the device COTable.
     * SVGA driver will allocate an integer ID for the shader and
     * issue DXDefineShader and DXBindShader commands.
     */
    variant->id = util_bitmask_add(svga->shader_id_bm);
    if (variant->id == UTIL_BITMASK_INVALID_INDEX) {
       return PIPE_ERROR_OUT_OF_MEMORY;
    }

    /* Create gb memory for the shader and upload the shader code */
    variant->gb_shader = swc->shader_create(swc,
                                            variant->id, type,
                                            variant->tokens, codeLen);

    if (!variant->gb_shader) {
       /* Free the shader ID */
       assert(variant->id != UTIL_BITMASK_INVALID_INDEX);
       goto fail_no_allocation;
    }

    /**
     * Since we don't want to do any flush within state emission to avoid
     * partial state in a command buffer, it's important to make sure that
     * there is enough room to send both the DXDefineShader & DXBindShader
     * commands in the same command buffer. So let's send both
     * commands in one command reservation. If it fails, we'll undo
     * the shader creation and return an error.
     */
    ret = SVGA3D_vgpu10_DefineAndBindShader(swc, variant->gb_shader,
                                            variant->id, type, codeLen);

    if (ret != PIPE_OK)
       goto fail;

    return PIPE_OK;

 fail:
    swc->shader_destroy(swc, variant->gb_shader);
    variant->gb_shader = NULL;

 fail_no_allocation:
    util_bitmask_clear(svga->shader_id_bm, variant->id);
    variant->id = UTIL_BITMASK_INVALID_INDEX;

    return PIPE_ERROR_OUT_OF_MEMORY;
 }

 /**
  * Issue the SVGA3D commands to define a new shader.
  * \param variant  contains the shader tokens, etc.  The result->id field will
  *                 be set here.
  */
 enum pipe_error
 svga_define_shader(struct svga_context *svga,
                    SVGA3dShaderType type,
                    struct svga_shader_variant *variant)
 {
    unsigned codeLen = variant->nr_tokens * sizeof(variant->tokens[0]);
    enum pipe_error ret;

    SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_DEFINESHADER);

    variant->id = UTIL_BITMASK_INVALID_INDEX;

    if (svga_have_gb_objects(svga)) {
       if (svga_have_vgpu10(svga))
          ret = define_gb_shader_vgpu10(svga, type, variant, codeLen);
       else
          ret = define_gb_shader_vgpu9(svga, type, variant, codeLen);
    }
    else {
       /* Allocate an integer ID for the shader */
       variant->id = util_bitmask_add(svga->shader_id_bm);
       if (variant->id == UTIL_BITMASK_INVALID_INDEX) {
          ret = PIPE_ERROR_OUT_OF_MEMORY;
          goto done;
       }

       /* Issue SVGA3D device command to define the shader */
       ret = SVGA3D_DefineShader(svga->swc,
                                 variant->id,
                                 type,
                                 variant->tokens,
                                 codeLen);
       if (ret != PIPE_OK) {
          /* free the ID */
          assert(variant->id != UTIL_BITMASK_INVALID_INDEX);
          util_bitmask_clear(svga->shader_id_bm, variant->id);
          variant->id = UTIL_BITMASK_INVALID_INDEX;
       }
    }

 done:
    SVGA_STATS_TIME_POP(svga_sws(svga));
    return ret;
 }


 /**
  * Issue the SVGA3D commands to set/bind a shader.
  * \param result  the shader to bind.
  */
 enum pipe_error
 svga_set_shader(struct svga_context *svga,
                 SVGA3dShaderType type,
                 struct svga_shader_variant *variant)
 {
    enum pipe_error ret;
    unsigned id = variant ? variant->id : SVGA3D_INVALID_ID;

    assert(type == SVGA3D_SHADERTYPE_VS ||
           type == SVGA3D_SHADERTYPE_GS ||
           type == SVGA3D_SHADERTYPE_PS);

    if (svga_have_gb_objects(svga)) {
       struct svga_winsys_gb_shader *gbshader =
          variant ? variant->gb_shader : NULL;

       if (svga_have_vgpu10(svga))
          ret = SVGA3D_vgpu10_SetShader(svga->swc, type, gbshader, id);
       else
          ret = SVGA3D_SetGBShader(svga->swc, type, gbshader);
    }
    else {
       ret = SVGA3D_SetShader(svga->swc, type, id);
    }

    return ret;
 }


 struct svga_shader_variant *
 svga_new_shader_variant(struct svga_context *svga)
 {
    svga->hud.num_shaders++;
    return CALLOC_STRUCT(svga_shader_variant);
 }


 enum pipe_error
 svga_destroy_shader_variant(struct svga_context *svga,
                             SVGA3dShaderType type,
                             struct svga_shader_variant *variant)
 {
    enum pipe_error ret = PIPE_OK;

    if (svga_have_gb_objects(svga) && variant->gb_shader) {
       if (svga_have_vgpu10(svga)) {
          struct svga_winsys_context *swc = svga->swc;
          swc->shader_destroy(swc, variant->gb_shader);
          ret = SVGA3D_vgpu10_DestroyShader(svga->swc, variant->id);
          if (ret != PIPE_OK) {
             /* flush and try again */
             svga_context_flush(svga, NULL);
             ret = SVGA3D_vgpu10_DestroyShader(svga->swc, variant->id);
          }
          util_bitmask_clear(svga->shader_id_bm, variant->id);
       }
       else {
          struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
          sws->shader_destroy(sws, variant->gb_shader);
       }
       variant->gb_shader = NULL;
    }
    else {
       if (variant->id != UTIL_BITMASK_INVALID_INDEX) {
          ret = SVGA3D_DestroyShader(svga->swc, variant->id, type);
          if (ret != PIPE_OK) {
             /* flush and try again */
             svga_context_flush(svga, NULL);
             ret = SVGA3D_DestroyShader(svga->swc, variant->id, type);
             assert(ret == PIPE_OK);
          }
          util_bitmask_clear(svga->shader_id_bm, variant->id);
       }
    }

    FREE((unsigned *)variant->tokens);
    FREE(variant);

    svga->hud.num_shaders--;

    return ret;
 }

 /*
  * Rebind shaders.
  * Called at the beginning of every new command buffer to ensure that
  * shaders are properly paged-in. Instead of sending the SetShader
  * command, this function sends a private allocation command to
  * page in a shader. This avoids emitting redundant state to the device
  * just to page in a resource.
  */
 enum pipe_error
 svga_rebind_shaders(struct svga_context *svga)
 {
    struct svga_winsys_context *swc = svga->swc;
    struct svga_hw_draw_state *hw = &svga->state.hw_draw;
    enum pipe_error ret;

    assert(svga_have_vgpu10(svga));

    /**
     * If the underlying winsys layer does not need resource rebinding,
     * just clear the rebind flags and return.
     */
    if (swc->resource_rebind == NULL) {
       svga->rebind.flags.vs = 0;
       svga->rebind.flags.gs = 0;
       svga->rebind.flags.fs = 0;

       return PIPE_OK;
    }

    if (svga->rebind.flags.vs && hw->vs && hw->vs->gb_shader) {
       ret = swc->resource_rebind(swc, NULL, hw->vs->gb_shader, SVGA_RELOC_READ);
       if (ret != PIPE_OK)
          return ret;
    }
    svga->rebind.flags.vs = 0;

    if (svga->rebind.flags.gs && hw->gs && hw->gs->gb_shader) {
       ret = swc->resource_rebind(swc, NULL, hw->gs->gb_shader, SVGA_RELOC_READ);
       if (ret != PIPE_OK)
          return ret;
    }
    svga->rebind.flags.gs = 0;

    if (svga->rebind.flags.fs && hw->fs && hw->fs->gb_shader) {
       ret = swc->resource_rebind(swc, NULL, hw->fs->gb_shader, SVGA_RELOC_READ);
       if (ret != PIPE_OK)
          return ret;
    }
    svga->rebind.flags.fs = 0;

    return PIPE_OK;
 }
	/**********************************************************
	* Copyright 2008-2012 VMware, Inc. All rights reserved.
	*
	* Permission is hereby granted, free of charge, to any person
	* obtaining a copy of this software and associated documentation
	* files (the "Software"), to deal in the Software without
	* restriction, including without limitation the rights to use, copy,
	* modify, merge, publish, distribute, sublicense, and/or sell copies
	* of the Software, and to permit persons to whom the Software is
	* furnished to do so, subject to the following conditions:
	*
	* The above copyright notice and this permission notice shall be
	* included in all copies or substantial portions of the Software.
	*
	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
	* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
	* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
	* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
	* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
	* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
	* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
	* SOFTWARE.
	*
	**********************************************************/

	#include "util/u_bitmask.h"
	#include "util/u_memory.h"
	#include "util/u_format.h"
	#include "svga_context.h"
	#include "svga_cmd.h"
	#include "svga_format.h"
	#include "svga_shader.h"
	#include "svga_resource_texture.h"


	/**
	* This bit isn't really used anywhere. It only serves to help
	* generate a unique "signature" for the vertex shader output bitmask.
	* Shader input/output signatures are used to resolve shader linking
	* issues.
	*/
	#define FOG_GENERIC_BIT (((uint64_t) 1) << 63)


	/**
	* Use the shader info to generate a bitmask indicating which generic
	* inputs are used by the shader. A set bit indicates that GENERIC[i]
	* is used.
	*/
	uint64_t
	svga_get_generic_inputs_mask(const struct tgsi_shader_info *info)
	{
	unsigned i;
	uint64_t mask = 0x0;

	for (i = 0; i < info->num_inputs; i++) {
	if (info->input_semantic_name[i] == TGSI_SEMANTIC_GENERIC) {
	unsigned j = info->input_semantic_index[i];
	assert(j < sizeof(mask) * 8);
	mask \|= ((uint64_t) 1) << j;
	}
	}

	return mask;
	}


	/**
	* Scan shader info to return a bitmask of written outputs.
	*/
	uint64_t
	svga_get_generic_outputs_mask(const struct tgsi_shader_info *info)
	{
	unsigned i;
	uint64_t mask = 0x0;

	for (i = 0; i < info->num_outputs; i++) {
	switch (info->output_semantic_name[i]) {
	case TGSI_SEMANTIC_GENERIC:
	{
	unsigned j = info->output_semantic_index[i];
	assert(j < sizeof(mask) * 8);
	mask \|= ((uint64_t) 1) << j;
	}
	break;
	case TGSI_SEMANTIC_FOG:
	mask \|= FOG_GENERIC_BIT;
	break;
	}
	}

	return mask;
	}



	/**
	* Given a mask of used generic variables (as returned by the above functions)
	* fill in a table which maps those indexes to small integers.
	* This table is used by the remap_generic_index() function in
	* svga_tgsi_decl_sm30.c
	* Example: if generics_mask = binary(1010) it means that GENERIC[1] and
	* GENERIC[3] are used. The remap_table will contain:
	* table[1] = 0;
	* table[3] = 1;
	* The remaining table entries will be filled in with the next unused
	* generic index (in this example, 2).
	*/
	void
	svga_remap_generics(uint64_t generics_mask,
	int8_t remap_table[MAX_GENERIC_VARYING])
	{
	/* Note texcoord[0] is reserved so start at 1 */
	unsigned count = 1, i;

	for (i = 0; i < MAX_GENERIC_VARYING; i++) {
	remap_table[i] = -1;
	}

	/* for each bit set in generic_mask */
	while (generics_mask) {
	unsigned index = ffsll(generics_mask) - 1;
	remap_table[index] = count++;
	generics_mask &= ~((uint64_t) 1 << index);
	}
	}


	/**
	* Use the generic remap table to map a TGSI generic varying variable
	* index to a small integer. If the remapping table doesn't have a
	* valid value for the given index (the table entry is -1) it means
	* the fragment shader doesn't use that VS output. Just allocate
	* the next free value in that case. Alternately, we could cull
	* VS instructions that write to register, or replace the register
	* with a dummy temp register.
	* XXX TODO: we should do one of the later as it would save precious
	* texcoord registers.
	*/
	int
	svga_remap_generic_index(int8_t remap_table[MAX_GENERIC_VARYING],
	int generic_index)
	{
	assert(generic_index < MAX_GENERIC_VARYING);

	if (generic_index >= MAX_GENERIC_VARYING) {
	/* just don't return a random/garbage value */
	generic_index = MAX_GENERIC_VARYING - 1;
	}

	if (remap_table[generic_index] == -1) {
	/* This is a VS output that has no matching PS input. Find a
	* free index.
	*/
	int i, max = 0;
	for (i = 0; i < MAX_GENERIC_VARYING; i++) {
	max = MAX2(max, remap_table[i]);
	}
	remap_table[generic_index] = max + 1;
	}

	return remap_table[generic_index];
	}

	static const enum pipe_swizzle copy_alpha[PIPE_SWIZZLE_MAX] = {
	PIPE_SWIZZLE_X,
	PIPE_SWIZZLE_Y,
	PIPE_SWIZZLE_Z,
	PIPE_SWIZZLE_W,
	PIPE_SWIZZLE_0,
	PIPE_SWIZZLE_1,
	PIPE_SWIZZLE_NONE
	};

	static const enum pipe_swizzle set_alpha[PIPE_SWIZZLE_MAX] = {
	PIPE_SWIZZLE_X,
	PIPE_SWIZZLE_Y,
	PIPE_SWIZZLE_Z,
	PIPE_SWIZZLE_1,
	PIPE_SWIZZLE_0,
	PIPE_SWIZZLE_1,
	PIPE_SWIZZLE_NONE
	};

	/**
	* Initialize the shader-neutral fields of svga_compile_key from context
	* state. This is basically the texture-related state.
	*/
	void
	svga_init_shader_key_common(const struct svga_context *svga,
	enum pipe_shader_type shader,
	struct svga_compile_key *key)
	{
	unsigned i, idx = 0;

	assert(shader < ARRAY_SIZE(svga->curr.num_sampler_views));

	/* In case the number of samplers and sampler_views doesn't match,
	* loop over the lower of the two counts.
	*/
	key->num_textures = MAX2(svga->curr.num_sampler_views[shader],
	svga->curr.num_samplers[shader]);

	for (i = 0; i < key->num_textures; i++) {
	struct pipe_sampler_view *view = svga->curr.sampler_views[shader][i];
	const struct svga_sampler_state *sampler = svga->curr.sampler[shader][i];
	if (view) {
	assert(view->texture);
	assert(view->texture->target < (1 << 4)); /* texture_target:4 */

	/* 1D/2D array textures with one slice are treated as non-arrays
	* by the SVGA3D device. Convert the texture type here so that
	* we emit the right TEX/SAMPLE instruction in the shader.
	*/
	if (view->texture->target == PIPE_TEXTURE_1D_ARRAY \|\|
	view->texture->target == PIPE_TEXTURE_2D_ARRAY) {
	if (view->texture->array_size == 1) {
	key->tex[i].is_array = 0;
	}
	else {
	assert(view->texture->array_size > 1);
	key->tex[i].is_array = 1;
	}
	}

	/* If we have a non-alpha view into an svga3d surface with an
	* alpha channel, then explicitly set the alpha channel to 1
	* when sampling. Note that we need to check the
	* actual device format to cover also imported surface cases.
	*/
	const enum pipe_swizzle *swizzle_tab =
	(view->texture->target != PIPE_BUFFER &&
	!util_format_has_alpha(view->format) &&
	svga_texture_device_format_has_alpha(view->texture)) ?
	set_alpha : copy_alpha;

	key->tex[i].swizzle_r = swizzle_tab[view->swizzle_r];
	key->tex[i].swizzle_g = swizzle_tab[view->swizzle_g];
	key->tex[i].swizzle_b = swizzle_tab[view->swizzle_b];
	key->tex[i].swizzle_a = swizzle_tab[view->swizzle_a];
	}

	if (sampler) {
	if (!sampler->normalized_coords) {
	assert(idx < (1 << 5)); /* width_height_idx:5 bitfield */
	key->tex[i].width_height_idx = idx++;
	key->tex[i].unnormalized = TRUE;
	++key->num_unnormalized_coords;

	if (sampler->magfilter == SVGA3D_TEX_FILTER_NEAREST \|\|
	sampler->minfilter == SVGA3D_TEX_FILTER_NEAREST) {
	key->tex[i].texel_bias = TRUE;
	}
	}
	}
	}
	}


	/** Search for a compiled shader variant with the same compile key */
	struct svga_shader_variant *
	svga_search_shader_key(const struct svga_shader *shader,
	const struct svga_compile_key *key)
	{
	struct svga_shader_variant *variant = shader->variants;

	assert(key);

	for ( ; variant; variant = variant->next) {
	if (svga_compile_keys_equal(key, &variant->key))
	return variant;
	}
	return NULL;
	}

	/** Search for a shader with the same token key */
	struct svga_shader *
	svga_search_shader_token_key(struct svga_shader *pshader,
	const struct svga_token_key *key)
	{
	struct svga_shader *shader = pshader;

	assert(key);

	for ( ; shader; shader = shader->next) {
	if (memcmp(key, &shader->token_key, sizeof(struct svga_token_key)) == 0)
	return shader;
	}
	return NULL;
	}

	/**
	* Helper function to define a gb shader for non-vgpu10 device
	*/
	static enum pipe_error
	define_gb_shader_vgpu9(struct svga_context *svga,
	SVGA3dShaderType type,
	struct svga_shader_variant *variant,
	unsigned codeLen)
	{
	struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
	enum pipe_error ret;

	/**
	* Create gb memory for the shader and upload the shader code.
	* Kernel module will allocate an id for the shader and issue
	* the DefineGBShader command.
	*/
	variant->gb_shader = sws->shader_create(sws, type,
	variant->tokens, codeLen);

	if (!variant->gb_shader)
	return PIPE_ERROR_OUT_OF_MEMORY;

	ret = SVGA3D_BindGBShader(svga->swc, variant->gb_shader);

	return ret;
	}

	/**
	* Helper function to define a gb shader for vgpu10 device
	*/
	static enum pipe_error
	define_gb_shader_vgpu10(struct svga_context *svga,
	SVGA3dShaderType type,
	struct svga_shader_variant *variant,
	unsigned codeLen)
	{
	struct svga_winsys_context *swc = svga->swc;
	enum pipe_error ret;

	/**
	* Shaders in VGPU10 enabled device reside in the device COTable.
	* SVGA driver will allocate an integer ID for the shader and
	* issue DXDefineShader and DXBindShader commands.
	*/
	variant->id = util_bitmask_add(svga->shader_id_bm);
	if (variant->id == UTIL_BITMASK_INVALID_INDEX) {
	return PIPE_ERROR_OUT_OF_MEMORY;
	}

	/* Create gb memory for the shader and upload the shader code */
	variant->gb_shader = swc->shader_create(swc,
	variant->id, type,
	variant->tokens, codeLen);

	if (!variant->gb_shader) {
	/* Free the shader ID */
	assert(variant->id != UTIL_BITMASK_INVALID_INDEX);
	goto fail_no_allocation;
	}

	/**
	* Since we don't want to do any flush within state emission to avoid
	* partial state in a command buffer, it's important to make sure that
	* there is enough room to send both the DXDefineShader & DXBindShader
	* commands in the same command buffer. So let's send both
	* commands in one command reservation. If it fails, we'll undo
	* the shader creation and return an error.
	*/
	ret = SVGA3D_vgpu10_DefineAndBindShader(swc, variant->gb_shader,
	variant->id, type, codeLen);

	if (ret != PIPE_OK)
	goto fail;

	return PIPE_OK;

	fail:
	swc->shader_destroy(swc, variant->gb_shader);
	variant->gb_shader = NULL;

	fail_no_allocation:
	util_bitmask_clear(svga->shader_id_bm, variant->id);
	variant->id = UTIL_BITMASK_INVALID_INDEX;

	return PIPE_ERROR_OUT_OF_MEMORY;
	}

	/**
	* Issue the SVGA3D commands to define a new shader.
	* \param variant contains the shader tokens, etc. The result->id field will
	* be set here.
	*/
	enum pipe_error
	svga_define_shader(struct svga_context *svga,
	SVGA3dShaderType type,
	struct svga_shader_variant *variant)
	{
	unsigned codeLen = variant->nr_tokens * sizeof(variant->tokens[0]);
	enum pipe_error ret;

	SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_DEFINESHADER);

	variant->id = UTIL_BITMASK_INVALID_INDEX;

	if (svga_have_gb_objects(svga)) {
	if (svga_have_vgpu10(svga))
	ret = define_gb_shader_vgpu10(svga, type, variant, codeLen);
	else
	ret = define_gb_shader_vgpu9(svga, type, variant, codeLen);
	}
	else {
	/* Allocate an integer ID for the shader */
	variant->id = util_bitmask_add(svga->shader_id_bm);
	if (variant->id == UTIL_BITMASK_INVALID_INDEX) {
	ret = PIPE_ERROR_OUT_OF_MEMORY;
	goto done;
	}

	/* Issue SVGA3D device command to define the shader */
	ret = SVGA3D_DefineShader(svga->swc,
	variant->id,
	type,
	variant->tokens,
	codeLen);
	if (ret != PIPE_OK) {
	/* free the ID */
	assert(variant->id != UTIL_BITMASK_INVALID_INDEX);
	util_bitmask_clear(svga->shader_id_bm, variant->id);
	variant->id = UTIL_BITMASK_INVALID_INDEX;
	}
	}

	done:
	SVGA_STATS_TIME_POP(svga_sws(svga));
	return ret;
	}


	/**
	* Issue the SVGA3D commands to set/bind a shader.
	* \param result the shader to bind.
	*/
	enum pipe_error
	svga_set_shader(struct svga_context *svga,
	SVGA3dShaderType type,
	struct svga_shader_variant *variant)
	{
	enum pipe_error ret;
	unsigned id = variant ? variant->id : SVGA3D_INVALID_ID;

	assert(type == SVGA3D_SHADERTYPE_VS \|\|
	type == SVGA3D_SHADERTYPE_GS \|\|
	type == SVGA3D_SHADERTYPE_PS);

	if (svga_have_gb_objects(svga)) {
	struct svga_winsys_gb_shader *gbshader =
	variant ? variant->gb_shader : NULL;

	if (svga_have_vgpu10(svga))
	ret = SVGA3D_vgpu10_SetShader(svga->swc, type, gbshader, id);
	else
	ret = SVGA3D_SetGBShader(svga->swc, type, gbshader);
	}
	else {
	ret = SVGA3D_SetShader(svga->swc, type, id);
	}

	return ret;
	}


	struct svga_shader_variant *
	svga_new_shader_variant(struct svga_context *svga)
	{
	svga->hud.num_shaders++;
	return CALLOC_STRUCT(svga_shader_variant);
	}


	enum pipe_error
	svga_destroy_shader_variant(struct svga_context *svga,
	SVGA3dShaderType type,
	struct svga_shader_variant *variant)
	{
	enum pipe_error ret = PIPE_OK;

	if (svga_have_gb_objects(svga) && variant->gb_shader) {
	if (svga_have_vgpu10(svga)) {
	struct svga_winsys_context *swc = svga->swc;
	swc->shader_destroy(swc, variant->gb_shader);
	ret = SVGA3D_vgpu10_DestroyShader(svga->swc, variant->id);
	if (ret != PIPE_OK) {
	/* flush and try again */
	svga_context_flush(svga, NULL);
	ret = SVGA3D_vgpu10_DestroyShader(svga->swc, variant->id);
	}
	util_bitmask_clear(svga->shader_id_bm, variant->id);
	}
	else {
	struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
	sws->shader_destroy(sws, variant->gb_shader);
	}
	variant->gb_shader = NULL;
	}
	else {
	if (variant->id != UTIL_BITMASK_INVALID_INDEX) {
	ret = SVGA3D_DestroyShader(svga->swc, variant->id, type);
	if (ret != PIPE_OK) {
	/* flush and try again */
	svga_context_flush(svga, NULL);
	ret = SVGA3D_DestroyShader(svga->swc, variant->id, type);
	assert(ret == PIPE_OK);
	}
	util_bitmask_clear(svga->shader_id_bm, variant->id);
	}
	}

	FREE((unsigned *)variant->tokens);
	FREE(variant);

	svga->hud.num_shaders--;

	return ret;
	}

	/*
	* Rebind shaders.
	* Called at the beginning of every new command buffer to ensure that
	* shaders are properly paged-in. Instead of sending the SetShader
	* command, this function sends a private allocation command to
	* page in a shader. This avoids emitting redundant state to the device
	* just to page in a resource.
	*/
	enum pipe_error
	svga_rebind_shaders(struct svga_context *svga)
	{
	struct svga_winsys_context *swc = svga->swc;
	struct svga_hw_draw_state *hw = &svga->state.hw_draw;
	enum pipe_error ret;

	assert(svga_have_vgpu10(svga));

	/**
	* If the underlying winsys layer does not need resource rebinding,
	* just clear the rebind flags and return.
	*/
	if (swc->resource_rebind == NULL) {
	svga->rebind.flags.vs = 0;
	svga->rebind.flags.gs = 0;
	svga->rebind.flags.fs = 0;

	return PIPE_OK;
	}

	if (svga->rebind.flags.vs && hw->vs && hw->vs->gb_shader) {
	ret = swc->resource_rebind(swc, NULL, hw->vs->gb_shader, SVGA_RELOC_READ);
	if (ret != PIPE_OK)
	return ret;
	}
	svga->rebind.flags.vs = 0;

	if (svga->rebind.flags.gs && hw->gs && hw->gs->gb_shader) {
	ret = swc->resource_rebind(swc, NULL, hw->gs->gb_shader, SVGA_RELOC_READ);
	if (ret != PIPE_OK)
	return ret;
	}
	svga->rebind.flags.gs = 0;

	if (svga->rebind.flags.fs && hw->fs && hw->fs->gb_shader) {
	ret = swc->resource_rebind(swc, NULL, hw->fs->gb_shader, SVGA_RELOC_READ);
	if (ret != PIPE_OK)
	return ret;
	}
	svga->rebind.flags.fs = 0;

	return PIPE_OK;
	}