llvmpipe: work on clears and coefficients
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c
index 110caaf..695ddc0 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast.c
@@ -48,14 +48,17 @@
return rast;
}
-void lp_rast_bind_surfaces( struct lp_rasterizer *rast,
- struct pipe_surface *cbuf,
- struct pipe_surface *zsbuf,
- const float *clear_color,
- double clear_depth,
- unsigned clear_stencil)
+void lp_rast_bind_color( struct lp_rasterizer *rast,
+ struct pipe_surface *cbuf,
+ boolean write_color )
{
pipe_surface_reference(&rast->state.cbuf, cbuf);
+}
+
+void lp_rast_bind_zstencil( struct lp_rasterizer *rast,
+ struct pipe_surface *zsbuf,
+ boolean write_zstencil )
+{
pipe_surface_reference(&rast->state.zsbuf, zsbuf);
}
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h
index 492e4b0..28bb0a6 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast.h
@@ -101,27 +101,17 @@
struct lp_rast_shader_inputs *inputs;
};
-struct clear_tile {
- boolean do_color;
- boolean do_depth_stencil;
- unsigned rgba;
- unsigned depth_stencil;
-};
-
-struct load_tile {
- boolean do_color;
- boolean do_depth_stencil;
-};
struct lp_rasterizer *lp_rast_create( void );
-void lp_rast_bind_surfaces( struct lp_rasterizer *,
- struct pipe_surface *cbuf,
- struct pipe_surface *zsbuf,
- const float *clear_color,
- double clear_depth,
- unsigned clear_stencil);
+void lp_rast_bind_color( struct lp_rasterizer *,
+ struct pipe_surface *cbuf,
+ boolean write_when_done );
+
+void lp_rast_bind_depth( struct lp_rasterizer *,
+ struct pipe_surface *zsbuf,
+ boolean write_when_done );
/* Begining of each tile:
*/
@@ -174,8 +164,7 @@
/* End of tile:
*/
-void lp_rast_end_tile( struct lp_rasterizer *rast,
- boolean write_depth );
+void lp_rast_end_tile( struct lp_rasterizer *rast );
/* Shutdown:
*/
diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c
index 9016c4b..57ac854 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup.c
@@ -58,6 +58,8 @@
{
unsigned i, j;
+ /* Free binner command lists:
+ */
for (i = 0; i < setup->tiles_x; i++) {
for (j = 0; j < setup->tiles_y; j++) {
struct cmd_block_list *list = &setup->tile[i][j];
@@ -73,6 +75,8 @@
}
}
+ /* Free binned data:
+ */
{
struct data_block_list *list = &setup->data;
struct data_block *block, *tmp;
@@ -84,6 +88,10 @@
list->head = list->tail;
}
+
+ /* Reset some state:
+ */
+ setup->clear.flags = 0;
}
@@ -131,7 +139,7 @@
}
}
- lp_rast_finish_tile( rast );
+ lp_rast_end_tile( rast );
}
}
@@ -144,10 +152,10 @@
begin_binning( struct setup_context *setup )
{
if (setup->fb.color) {
- if (setup->fb.clear_color)
+ if (setup->clear.flags & PIPE_CLEAR_COLOR)
bin_everywhere( setup,
lp_rast_clear_color,
- &setup->clear_data );
+ &setup->clear.color );
else
bin_everywhere( setup,
lp_rast_load_color,
@@ -155,10 +163,10 @@
}
if (setup->fb.zstencil) {
- if (setup->fb.clear_zstencil)
+ if (setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL)
bin_everywhere( setup,
lp_rast_clear_zstencil,
- &setup->clear_data );
+ &setup->clear.zstencil );
else
bin_everywhere( setup,
lp_rast_load_zstencil,
@@ -176,7 +184,7 @@
execute_clears( struct setup_context *setup )
{
begin_binning( setup );
- rasterize_bins( setup );
+ rasterize_bins( setup, TRUE );
}
@@ -192,7 +200,7 @@
switch (new_state) {
case SETUP_ACTIVE:
if (old_state == SETUP_FLUSHED)
- setup_begin_binning( setup );
+ begin_binning( setup );
break;
case SETUP_CLEARED:
@@ -203,10 +211,10 @@
break;
case SETUP_FLUSHED:
- if (old_state == SETUP_CLEAR)
+ if (old_state == SETUP_CLEARED)
execute_clears( setup );
else
- rasterize_bins( setup );
+ rasterize_bins( setup, TRUE );
break;
}
@@ -271,15 +279,20 @@
}
else {
set_state( setup, SETUP_CLEARED );
+
setup->clear.flags |= flags;
if (flags & PIPE_CLEAR_COLOR) {
- memcpy(setup->clear.color, color, sizeof setup->clear.color);
+ util_pack_color(rgba,
+ setup->fb.cbuf->format,
+ &setup->clear.color.clear_color );
}
if (flags & PIPE_CLEAR_DEPTH_STENCIL) {
- setup->clear.depth = clear_depth;
- setup->clear.stencil = clear_stencil;
+ setup->clear.zstencil.clear_zstencil =
+ util_pack_z_stencil(setup->fb.zsbuf->format,
+ depth,
+ stencil);
}
}
}
@@ -293,6 +306,12 @@
memcpy( setup->interp, interp, nr * sizeof interp[0] );
}
+void
+lp_setup_set_shader_state( struct setup_context *setup,
+ const struct jit_context *jc )
+{
+}
+
static void
first_triangle( struct setup_context *setup,
@@ -324,10 +343,10 @@
}
void
-lp_setup_triangle(struct setup_context *setup,
- const float (*v0)[4],
- const float (*v1)[4],
- const float (*v2)[4])
+lp_setup_tri(struct setup_context *setup,
+ const float (*v0)[4],
+ const float (*v1)[4],
+ const float (*v2)[4])
{
setup->triangle( setup, v0, v1, v2 );
}
diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h
index 6f560f5..7c81307 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.h
+++ b/src/gallium/drivers/llvmpipe/lp_setup.h
@@ -65,6 +65,17 @@
lp_setup_point( struct setup_context *setup,
const float (*v0)[4] );
+
+void
+lp_setup_flush( struct setup_context *setup,
+ unsigned flags );
+
+
+void
+lp_setup_bind_framebuffer( struct setup_context *setup,
+ struct pipe_surface *color,
+ struct pipe_surface *zstencil );
+
void
lp_setup_set_triangle_state( struct setup_context *setup,
unsigned cullmode,
@@ -75,6 +86,10 @@
const enum lp_interp *interp,
unsigned nr );
+void
+lp_setup_set_shader_state( struct setup_context *setup,
+ const struct jit_context *jc );
+
boolean
lp_setup_is_texture_referenced( struct setup_context *setup,
const struct pipe_texture *texture );
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h
index 19d163d..5722e3e 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_context.h
+++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h
@@ -88,9 +88,8 @@
struct {
unsigned flags;
- float clear_color[4];
- double clear_depth;
- unsigned clear_stencil;
+ union lp_rast_cmd_arg color;
+ union lp_rast_cmd_arg zstencil;
} clear;
enum {
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
index 75a0ea8..efd9112 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
@@ -38,55 +38,60 @@
/**
* Compute a0 for a constant-valued coefficient (GL_FLAT shading).
*/
-static void constant_coef( struct tgsi_interp_coef *coef,
+static void constant_coef( struct lp_rast_triangle *tri,
const float (*v3)[4],
unsigned vert_attr,
unsigned i )
{
- coef->a0[i] = v3[vert_attr][i];
- coef->dadx[i] = 0;
- coef->dady[i] = 0;
+ tri->inputs.a0[i] = v3[vert_attr][i];
+ tri->inputs.dadx[i] = 0;
+ tri->inputs.dady[i] = 0;
}
/**
* Compute a0, dadx and dady for a linearly interpolated coefficient,
* for a triangle.
*/
-static void linear_coef( struct triangle *tri,
- struct tgsi_interp_coef *coef,
+static void linear_coef( struct lp_rast_triangle *tri,
+ unsigned input,
const float (*v1)[4],
const float (*v2)[4],
const float (*v3)[4],
- unsigned vert_attr,
- unsigned i)
+ unsigned vert_attr)
{
- float a1 = v1[vert_attr][i];
- float a2 = v2[vert_attr][i];
- float a3 = v3[vert_attr][i];
+ unsigned i;
- float da12 = a1 - a2;
- float da31 = a3 - a1;
- float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea;
- float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea;
+ input *= 4;
- coef->dadx[i] = dadx;
- coef->dady[i] = dady;
+ for (i = 0; i < NUM_CHANNELS; i++) {
+ float a1 = v1[vert_attr][i];
+ float a2 = v2[vert_attr][i];
+ float a3 = v3[vert_attr][i];
- /* calculate a0 as the value which would be sampled for the
- * fragment at (0,0), taking into account that we want to sample at
- * pixel centers, in other words (0.5, 0.5).
- *
- * this is neat but unfortunately not a good way to do things for
- * triangles with very large values of dadx or dady as it will
- * result in the subtraction and re-addition from a0 of a very
- * large number, which means we'll end up loosing a lot of the
- * fractional bits and precision from a0. the way to fix this is
- * to define a0 as the sample at a pixel center somewhere near vmin
- * instead - i'll switch to this later.
- */
- coef->a0[i] = (v1[vert_attr][i] -
- (dadx * (v1[0][0] - 0.5f) +
- dady * (v1[0][1] - 0.5f)));
+ float da12 = a1 - a2;
+ float da31 = a3 - a1;
+ float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea;
+ float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea;
+
+ tri->inputs.dadx[input+i] = dadx;
+ tri->inputs.dady[input+i] = dady;
+
+ /* calculate a0 as the value which would be sampled for the
+ * fragment at (0,0), taking into account that we want to sample at
+ * pixel centers, in other words (0.5, 0.5).
+ *
+ * this is neat but unfortunately not a good way to do things for
+ * triangles with very large values of dadx or dady as it will
+ * result in the subtraction and re-addition from a0 of a very
+ * large number, which means we'll end up loosing a lot of the
+ * fractional bits and precision from a0. the way to fix this is
+ * to define a0 as the sample at a pixel center somewhere near vmin
+ * instead - i'll switch to this later.
+ */
+ tri->inputs.a0[input+i] = (v1[vert_attr][i] -
+ (dadx * (v1[0][0] - 0.5f) +
+ dady * (v1[0][1] - 0.5f)));
+ }
}
@@ -98,30 +103,35 @@
* Later, when we compute the value at a particular fragment position we'll
* divide the interpolated value by the interpolated W at that fragment.
*/
-static void perspective_coef( struct triangle *tri,
- struct tgsi_interp_coef *coef,
+static void perspective_coef( struct lp_rast_triangle *tri,
const float (*v1)[4],
const float (*v2)[4],
const float (*v3)[4],
unsigned vert_attr,
unsigned i)
{
- /* premultiply by 1/w (v[0][3] is always 1/w):
- */
- float a1 = v1[vert_attr][i] * v1[0][3];
- float a2 = v2[vert_attr][i] * v2[0][3];
- float a3 = v3[vert_attr][i] * v3[0][3];
- float da12 = a1 - a2;
- float da31 = a3 - a1;
- float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea;
- float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea;
+ unsigned i;
+
+ input *= 4;
+
+ for (i = 0; i < NUM_CHANNELS; i++) {
+ /* premultiply by 1/w (v[0][3] is always 1/w):
+ */
+ float a1 = v1[vert_attr][i] * v1[0][3];
+ float a2 = v2[vert_attr][i] * v2[0][3];
+ float a3 = v3[vert_attr][i] * v3[0][3];
+ float da12 = a1 - a2;
+ float da31 = a3 - a1;
+ float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea;
+ float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea;
- coef->dadx[i] = dadx;
- coef->dady[i] = dady;
- coef->a0[i] = (a1 -
- (dadx * (v1[0][0] - 0.5f) +
- dady * (v1[0][1] - 0.5f)));
+ tri->inputs.dadx[input+i] = dadx;
+ tri->inputs.dady[input+i] = dady;
+ tri->inputs.a0[input+i] = (a1 -
+ (dadx * (v1[0][0] - 0.5f) +
+ dady * (v1[0][1] - 0.5f)));
+ }
}
@@ -132,24 +142,26 @@
* We could do a bit less work if we'd examine gl_FragCoord's swizzle mask.
*/
static void
-setup_fragcoord_coef(struct triangle *tri, unsigned slot)
+setup_fragcoord_coef(struct lp_rast_triangle *tri, unsigned slot)
{
+ slot *= 4;
+
/*X*/
- tri->coef[slot].a0[0] = 0.0;
- tri->coef[slot].dadx[0] = 1.0;
- tri->coef[slot].dady[0] = 0.0;
+ tri->inputs.a0[slot+0] = 0.0;
+ tri->inputs.dadx[slot+0] = 1.0;
+ tri->inputs.dady[slot+0] = 0.0;
/*Y*/
- tri->coef[slot].a0[1] = 0.0;
- tri->coef[slot].dadx[1] = 0.0;
- tri->coef[slot].dady[1] = 1.0;
+ tri->inputs.a0[slot+1] = 0.0;
+ tri->inputs.dadx[slot+1] = 0.0;
+ tri->inputs.dady[slot+1] = 1.0;
/*Z*/
- tri->coef[slot].a0[2] = tri->position_coef.a0[2];
- tri->coef[slot].dadx[2] = tri->position_coef.dadx[2];
- tri->coef[slot].dady[2] = tri->position_coef.dady[2];
+ tri->inputs.a0[slot+2] = tri->inputs.a0[2];
+ tri->inputs.dadx[slot+2] = tri->inputs.dadx[2];
+ tri->inputs.dady[slot+2] = tri->inputs.dady[2];
/*W*/
- tri->coef[slot].a0[3] = tri->position_coef.a0[3];
- tri->coef[slot].dadx[3] = tri->position_coef.dadx[3];
- tri->coef[slot].dady[3] = tri->position_coef.dady[3];
+ tri->inputs.a0[slot+3] = tri->inputs.a0[3];
+ tri->inputs.dadx[slot+3] = tri->inputs.dadx[3];
+ tri->inputs.dady[slot+3] = tri->inputs.dady[3];
}
@@ -158,50 +170,46 @@
* Compute the tri->coef[] array dadx, dady, a0 values.
*/
static void setup_tri_coefficients( struct setup_context *setup,
- struct triangle *tri,
+ struct lp_rast_triangle *tri,
const float (*v1)[4],
const float (*v2)[4],
const float (*v3)[4],
boolean frontface )
{
- const struct vertex_info *vinfo = setup->vinfo;
unsigned input;
/* z and w are done by linear interpolation:
*/
- linear_coef(tri, tri->position_coef, v1, v2, v3, 0, 2);
- linear_coef(tri, tri->position_coef, v1, v2, v3, 0, 3);
+ setup_fragcoord_coef(tri, 0);
+ linear_coef(tri, input, v1, v2, v3, vert_attr, i);
- /* setup interpolation for all the remaining attributes:
+ /* setup interpolation for all the remaining attrbutes:
*/
- for (input = 0; input < vinfo->num_fs_inputs; input++) {
- unsigned vert_attr = vinfo->attrib[input].src_index;
+ for (input = 0; input < setup->fs.nr_inputs; input++) {
+ unsigned vert_attr = setup->fs.input[input].src_index;
unsigned i;
- switch (vinfo->attrib[input].interp_mode) {
- case INTERP_CONSTANT:
- for (i = 0; i < NUM_CHANNELS; i++)
- constant_coef(tri->coef[input], v3, vert_attr, i);
+ switch (setup->fs.input[input].interp_mode) {
+ case LP_INTERP_CONSTANT:
+ constant_coef(tri, input, v3, vert_attr, i);
break;
- case INTERP_LINEAR:
- for (i = 0; i < NUM_CHANNELS; i++)
- linear_coef(tri, tri->coef[input], v1, v2, v3, vert_attr, i);
+ case LP_INTERP_LINEAR:
+ linear_coef(tri, input, v1, v2, v3, vert_attr, i);
break;
- case INTERP_PERSPECTIVE:
- for (i = 0; i < NUM_CHANNELS; i++)
- perspective_coef(tri, tri->coef[input], v1, v2, v3, vert_attr, i);
+ case LP_INTERP_PERSPECTIVE:
+ perspective_coef(tri, input, v1, v2, v3, vert_attr, i);
break;
- case INTERP_POS:
+ case LP_INTERP_POS:
setup_fragcoord_coef(tri, input);
break;
- case INTERP_FACING:
- tri->coef[input].a0[0] = 1.0f - frontface;
- tri->coef[input].dadx[0] = 0.0;
- tri->coef[input].dady[0] = 0.0;
+ case LP_INTERP_FACING:
+ tri->inputs.a0[input*4+0] = 1.0f - frontface;
+ tri->inputs.dadx[input*4+0] = 0.0;
+ tri->da[input].dady[0] = 0.0;
break;
default:
@@ -255,7 +263,7 @@
const float x2 = subpixel_snap(v2[0][0]);
const float x3 = subpixel_snap(v3[0][0]);
- struct triangle *tri = allocate_triangle( setup );
+ struct lp_setup_triangle *tri = lp_setup_alloc_data( setup, sizeof *tri );
float area;
float c1, c2, c3;
int i;