draw: Work around an invalid write.
SSE vertex shader does not seem to honor the execution mask. Pad the
output array as a workaround.
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
index 24c538b..121dfc4 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
@@ -184,7 +184,7 @@
output_verts->count = input_verts->count;
output_verts->verts =
(struct vertex_header *)MALLOC(output_verts->vertex_size *
- output_verts->count);
+ align(output_verts->count, 4));
vshader->run_linear(vshader,
(const float (*)[4])input_verts->verts->data,