llvmpipe: Factor out and optimize the input interpolation.

Special attention is given to the interpolation of side by side quads.
Multiplications are made only for the first quad. Interpolation of
inputs for posterior quads are done exclusively with additions, and
perspective divide if necessary.
diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile
index ca0a8bf..4f31788 100644
--- a/src/gallium/drivers/llvmpipe/Makefile
+++ b/src/gallium/drivers/llvmpipe/Makefile
@@ -14,6 +14,7 @@
 	lp_bld_debug.c \
 	lp_bld_depth.c \
 	lp_bld_flow.c \
+	lp_bld_interp.c \
 	lp_bld_intr.c \
 	lp_bld_pack.c \
 	lp_bld_unpack.c \
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index 10ead09..ec82be1 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -18,6 +18,7 @@
 		'lp_bld_debug.c',
 		'lp_bld_depth.c',
 		'lp_bld_flow.c',
+		'lp_bld_interp.c',
 		'lp_bld_intr.c',
 		'lp_bld_pack.c',
 		'lp_bld_unpack.c',
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
new file mode 100644
index 0000000..cfe20a0
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
@@ -0,0 +1,377 @@
+/**************************************************************************
+ * 
+ * Copyright 2009 VMware, Inc.
+ * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+/**
+ * @file
+ * Position and shader input interpolation.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+#include "pipe/p_shader_tokens.h"
+#include "util/u_debug.h"
+#include "util/u_memory.h"
+#include "util/u_math.h"
+#include "tgsi/tgsi_parse.h"
+#include "lp_bld_debug.h"
+#include "lp_bld_const.h"
+#include "lp_bld_arit.h"
+#include "lp_bld_swizzle.h"
+#include "lp_bld_interp.h"
+
+
+static void
+attrib_name(LLVMValueRef val, unsigned attrib, unsigned chan, const char *suffix)
+{
+   if(attrib == 0)
+      lp_build_name(val, "pos.%c%s", "xyzw"[chan], suffix);
+   else
+      lp_build_name(val, "input%u.%c%s", attrib - 1, "xyzw"[chan], suffix);
+}
+
+
+static void
+coeffs_init(struct lp_build_interp_soa_context *bld,
+            LLVMValueRef a0_ptr,
+            LLVMValueRef dadx_ptr,
+            LLVMValueRef dady_ptr)
+{
+   LLVMBuilderRef builder = bld->base.builder;
+   unsigned attrib;
+   unsigned chan;
+
+   for(attrib = 0; attrib < bld->num_attribs; ++attrib) {
+      unsigned mask = bld->mask[attrib];
+      unsigned mode = bld->mode[attrib];
+      for(chan = 0; chan < NUM_CHANNELS; ++chan) {
+         if(mask & (1 << chan)) {
+            LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), attrib*NUM_CHANNELS + chan, 0);
+            LLVMValueRef a0 = NULL;
+            LLVMValueRef dadx = NULL;
+            LLVMValueRef dady = NULL;
+
+            switch( mode ) {
+            case TGSI_INTERPOLATE_PERSPECTIVE:
+               /* fall-through */
+
+            case TGSI_INTERPOLATE_LINEAR:
+               dadx = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dadx_ptr, &index, 1, ""), "");
+               dady = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dady_ptr, &index, 1, ""), "");
+               dadx = lp_build_broadcast_scalar(&bld->base, dadx);
+               dady = lp_build_broadcast_scalar(&bld->base, dady);
+               attrib_name(dadx, attrib, chan, ".dadx");
+               attrib_name(dady, attrib, chan, ".dady");
+               /* fall-through */
+
+            case TGSI_INTERPOLATE_CONSTANT:
+               a0 = LLVMBuildLoad(builder, LLVMBuildGEP(builder, a0_ptr, &index, 1, ""), "");
+               a0 = lp_build_broadcast_scalar(&bld->base, a0);
+               attrib_name(a0, attrib, chan, ".dady");
+               break;
+
+            default:
+               assert(0);
+               break;
+            }
+
+            bld->a0  [attrib][chan] = a0;
+            bld->dadx[attrib][chan] = dadx;
+            bld->dady[attrib][chan] = dady;
+         }
+      }
+   }
+}
+
+
+/**
+ * Small vector x scale multiplication optimization.
+ *
+ * TODO: Should be elsewhere.
+ */
+static LLVMValueRef
+coeff_multiply(struct lp_build_interp_soa_context *bld,
+               LLVMValueRef coeff,
+               int step)
+{
+   LLVMValueRef factor;
+
+   switch(step) {
+   case 0:
+      return bld->base.zero;
+   case 1:
+      return coeff;
+   case 2:
+      return lp_build_add(&bld->base, coeff, coeff);
+   default:
+      factor = lp_build_const_scalar(bld->base.type, (double)step);
+      return lp_build_mul(&bld->base, coeff, factor);
+   }
+}
+
+
+/**
+ * Multiply the dadx and dady with the xstep and ystep respectively.
+ */
+static void
+coeffs_update(struct lp_build_interp_soa_context *bld)
+{
+   unsigned attrib;
+   unsigned chan;
+
+   for(attrib = 0; attrib < bld->num_attribs; ++attrib) {
+      unsigned mask = bld->mask[attrib];
+      unsigned mode = bld->mode[attrib];
+      if (mode != TGSI_INTERPOLATE_CONSTANT) {
+         for(chan = 0; chan < NUM_CHANNELS; ++chan) {
+            if(mask & (1 << chan)) {
+               bld->dadx[attrib][chan] = coeff_multiply(bld, bld->dadx[attrib][chan], bld->xstep);
+               bld->dady[attrib][chan] = coeff_multiply(bld, bld->dady[attrib][chan], bld->ystep);
+            }
+         }
+      }
+   }
+}
+
+
+static void
+attribs_init(struct lp_build_interp_soa_context *bld)
+{
+   LLVMValueRef x = bld->pos[0];
+   LLVMValueRef y = bld->pos[1];
+   LLVMValueRef oow = NULL;
+   unsigned attrib;
+   unsigned chan;
+
+   for(attrib = 0; attrib < bld->num_attribs; ++attrib) {
+      unsigned mask = bld->mask[attrib];
+      unsigned mode = bld->mode[attrib];
+      for(chan = 0; chan < NUM_CHANNELS; ++chan) {
+         if(mask & (1 << chan)) {
+            LLVMValueRef a0   = bld->a0  [attrib][chan];
+            LLVMValueRef dadx = bld->dadx[attrib][chan];
+            LLVMValueRef dady = bld->dady[attrib][chan];
+            LLVMValueRef res;
+
+            res = a0;
+
+            if (mode != TGSI_INTERPOLATE_CONSTANT) {
+               res = lp_build_add(&bld->base, res, lp_build_mul(&bld->base, x, dadx));
+               res = lp_build_add(&bld->base, res, lp_build_mul(&bld->base, y, dady));
+            }
+
+            /* Keep the value of the attribue before perspective divide for faster updates */
+            bld->attribs_pre[attrib][chan] = res;
+
+            if (mode == TGSI_INTERPOLATE_PERSPECTIVE) {
+               LLVMValueRef w = bld->pos[3];
+               assert(attrib != 0);
+               if(!oow)
+                  oow = lp_build_rcp(&bld->base, w);
+               res = lp_build_mul(&bld->base, res, oow);
+            }
+
+            attrib_name(res, attrib, chan, "");
+
+            bld->attribs[attrib][chan] = res;
+         }
+      }
+   }
+}
+
+
+static void
+attribs_update(struct lp_build_interp_soa_context *bld)
+{
+   LLVMValueRef oow = NULL;
+   unsigned attrib;
+   unsigned chan;
+
+   for(attrib = 0; attrib < bld->num_attribs; ++attrib) {
+      unsigned mask = bld->mask[attrib];
+      unsigned mode = bld->mode[attrib];
+
+      if (mode != TGSI_INTERPOLATE_CONSTANT) {
+         for(chan = 0; chan < NUM_CHANNELS; ++chan) {
+            if(mask & (1 << chan)) {
+               LLVMValueRef dadx = bld->dadx[attrib][chan];
+               LLVMValueRef dady = bld->dady[attrib][chan];
+               LLVMValueRef res;
+
+               res = bld->attribs_pre[attrib][chan];
+
+               if(bld->xstep)
+                  res = lp_build_add(&bld->base, res, dadx);
+
+               if(bld->ystep)
+                  res = lp_build_add(&bld->base, res, dady);
+
+               bld->attribs_pre[attrib][chan] = res;
+
+               if (mode == TGSI_INTERPOLATE_PERSPECTIVE) {
+                  LLVMValueRef w = bld->pos[3];
+                  assert(attrib != 0);
+                  if(!oow)
+                     oow = lp_build_rcp(&bld->base, w);
+                  res = lp_build_mul(&bld->base, res, oow);
+               }
+
+               attrib_name(res, attrib, chan, "");
+
+               bld->attribs[attrib][chan] = res;
+            }
+         }
+      }
+   }
+}
+
+
+/**
+ * Generate the position vectors.
+ *
+ * Parameter x0, y0 are the integer values with the quad upper left coordinates.
+ */
+static void
+pos_init(struct lp_build_interp_soa_context *bld,
+         LLVMValueRef x0,
+         LLVMValueRef y0)
+{
+   lp_build_name(x0, "pos.x");
+   lp_build_name(y0, "pos.y");
+
+   bld->attribs[0][0] = x0;
+   bld->attribs[0][1] = y0;
+}
+
+
+static void
+pos_update(struct lp_build_interp_soa_context *bld)
+{
+   LLVMValueRef x = bld->attribs[0][0];
+   LLVMValueRef y = bld->attribs[0][1];
+
+   if(bld->xstep)
+      x = lp_build_add(&bld->base, x, lp_build_const_scalar(bld->base.type, bld->xstep));
+
+   if(bld->ystep)
+      y = lp_build_add(&bld->base, y, lp_build_const_scalar(bld->base.type, bld->ystep));
+
+   lp_build_name(x, "pos.x");
+   lp_build_name(y, "pos.y");
+
+   bld->attribs[0][0] = x;
+   bld->attribs[0][1] = y;
+}
+
+
+void
+lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
+                         const struct tgsi_token *tokens,
+                         LLVMBuilderRef builder,
+                         union lp_type type,
+                         LLVMValueRef a0_ptr,
+                         LLVMValueRef dadx_ptr,
+                         LLVMValueRef dady_ptr,
+                         LLVMValueRef x0,
+                         LLVMValueRef y0,
+                         int xstep,
+                         int ystep)
+{
+   struct tgsi_parse_context parse;
+   struct tgsi_full_declaration *decl;
+
+   memset(bld, 0, sizeof *bld);
+
+   lp_build_context_init(&bld->base, builder, type);
+
+   /* For convenience */
+   bld->pos = bld->attribs[0];
+   bld->inputs = (const LLVMValueRef (*)[NUM_CHANNELS]) bld->attribs[1];
+
+   /* Position */
+   bld->num_attribs = 1;
+   bld->mask[0] = TGSI_WRITEMASK_ZW;
+   bld->mode[0] = TGSI_INTERPOLATE_LINEAR;
+
+   /* Inputs */
+   tgsi_parse_init( &parse, tokens );
+   while( !tgsi_parse_end_of_tokens( &parse ) ) {
+      tgsi_parse_token( &parse );
+
+      switch( parse.FullToken.Token.Type ) {
+      case TGSI_TOKEN_TYPE_DECLARATION:
+         decl = &parse.FullToken.FullDeclaration;
+         if( decl->Declaration.File == TGSI_FILE_INPUT ) {
+            unsigned first, last, mask;
+            unsigned attrib;
+
+            first = decl->DeclarationRange.First;
+            last = decl->DeclarationRange.Last;
+            mask = decl->Declaration.UsageMask;
+
+            for( attrib = first; attrib <= last; ++attrib ) {
+               bld->mask[1 + attrib] = mask;
+               bld->mode[1 + attrib] = decl->Declaration.Interpolate;
+            }
+
+            bld->num_attribs = MAX2(bld->num_attribs, 1 + last + 1);
+         }
+         break;
+
+      case TGSI_TOKEN_TYPE_INSTRUCTION:
+      case TGSI_TOKEN_TYPE_IMMEDIATE:
+         break;
+
+      default:
+         assert( 0 );
+      }
+   }
+   tgsi_parse_free( &parse );
+
+   coeffs_init(bld, a0_ptr, dadx_ptr, dady_ptr);
+
+   pos_init(bld, x0, y0);
+
+   attribs_init(bld);
+
+   bld->xstep = xstep;
+   bld->ystep = ystep;
+
+   coeffs_update(bld);
+}
+
+
+/**
+ * Advance the position and inputs with the xstep and ystep.
+ */
+void
+lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld)
+{
+   pos_update(bld);
+
+   attribs_update(bld);
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.h b/src/gallium/drivers/llvmpipe/lp_bld_interp.h
new file mode 100644
index 0000000..9194f62
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.h
@@ -0,0 +1,99 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Position and shader input interpolation.
+ *
+ * Special attention is given to the interpolation of side by side quads.
+ * Multiplications are made only for the first quad. Interpolation of
+ * inputs for posterior quads are done exclusively with additions, and
+ * perspective divide if necessary.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+#ifndef LP_BLD_INTERP_H
+#define LP_BLD_INTERP_H
+
+
+#include <llvm-c/Core.h>
+
+#include "tgsi/tgsi_exec.h"
+
+#include "lp_bld_type.h"
+
+
+struct tgsi_token;
+
+
+struct lp_build_interp_soa_context
+{
+   struct lp_build_context base;
+
+   unsigned num_attribs;
+   unsigned mask[1 + PIPE_MAX_SHADER_INPUTS];
+   unsigned mode[1 + PIPE_MAX_SHADER_INPUTS];
+
+   LLVMValueRef a0  [1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
+   LLVMValueRef dadx[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
+   LLVMValueRef dady[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
+
+   int xstep;
+   int ystep;
+
+   /* Attribute values before perspective divide */
+   LLVMValueRef attribs_pre[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
+
+   LLVMValueRef attribs[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
+
+   /*
+    * Convenience pointers. Callers may access this one.
+    */
+   const LLVMValueRef *pos;
+   const LLVMValueRef (*inputs)[NUM_CHANNELS];
+};
+
+
+void
+lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
+                         const struct tgsi_token *tokens,
+                         LLVMBuilderRef builder,
+                         union lp_type type,
+                         LLVMValueRef a0_ptr,
+                         LLVMValueRef dadx_ptr,
+                         LLVMValueRef dady_ptr,
+                         LLVMValueRef x0,
+                         LLVMValueRef y0,
+                         int xstep,
+                         int ystep);
+
+void
+lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld);
+
+
+#endif /* LP_BLD_INTERP_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h b/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h
index 86380a1..d42ab99 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h
@@ -54,11 +54,9 @@
                   const struct tgsi_token *tokens,
                   union lp_type type,
                   struct lp_build_mask_context *mask,
-                  LLVMValueRef *pos,
-                  LLVMValueRef a0_ptr,
-                  LLVMValueRef dadx_ptr,
-                  LLVMValueRef dady_ptr,
                   LLVMValueRef consts_ptr,
+                  const LLVMValueRef *pos,
+                  const LLVMValueRef (*inputs)[4],
                   LLVMValueRef (*outputs)[4],
                   LLVMValueRef samplers_ptr);
 
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
index 60cf5e9..1335ba8 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
@@ -83,19 +83,12 @@
 {
    struct lp_build_context base;
 
-   LLVMValueRef x, y, w;
-   LLVMValueRef a0_ptr;
-   LLVMValueRef dadx_ptr;
-   LLVMValueRef dady_ptr;
-
    LLVMValueRef consts_ptr;
+   const LLVMValueRef *pos;
+   const LLVMValueRef (*inputs)[NUM_CHANNELS];
    LLVMValueRef (*outputs)[NUM_CHANNELS];
    LLVMValueRef samplers_ptr;
 
-   LLVMValueRef oow;
-
-   LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
-
    LLVMValueRef immediates[LP_MAX_IMMEDIATES][NUM_CHANNELS];
    LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS];
 
@@ -1350,93 +1343,16 @@
    return 1;
 }
 
-static void
-emit_declaration(
-   struct lp_build_tgsi_soa_context *bld,
-   struct tgsi_full_declaration *decl )
-{
-   if( decl->Declaration.File == TGSI_FILE_INPUT ) {
-      LLVMBuilderRef builder = bld->base.builder;
-      unsigned first, last, mask;
-      unsigned attrib, chan;
-
-      first = decl->DeclarationRange.First;
-      last = decl->DeclarationRange.Last;
-      mask = decl->Declaration.UsageMask;
-
-      for( attrib = first; attrib <= last; attrib++ ) {
-         for( chan = 0; chan < NUM_CHANNELS; chan++ ) {
-            LLVMValueRef input = bld->base.undef;
-
-            if( mask & (1 << chan) ) {
-               LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), (1 + attrib)*NUM_CHANNELS + chan, 0);
-               LLVMValueRef a0;
-               LLVMValueRef dadx;
-               LLVMValueRef dady;
-
-               switch( decl->Declaration.Interpolate ) {
-               case TGSI_INTERPOLATE_PERSPECTIVE:
-                  /* fall-through */
-
-               case TGSI_INTERPOLATE_LINEAR: {
-                  LLVMValueRef dadx_ptr = LLVMBuildGEP(builder, bld->dadx_ptr, &index, 1, "");
-                  LLVMValueRef dady_ptr = LLVMBuildGEP(builder, bld->dady_ptr, &index, 1, "");
-                  dadx = LLVMBuildLoad(builder, dadx_ptr, "");
-                  dady = LLVMBuildLoad(builder, dady_ptr, "");
-                  dadx = lp_build_broadcast_scalar(&bld->base, dadx);
-                  dady = lp_build_broadcast_scalar(&bld->base, dady);
-                  lp_build_name(dadx, "dadx_%u.%c", attrib, "xyzw"[chan]);
-                  lp_build_name(dady, "dady_%u.%c", attrib, "xyzw"[chan]);
-                  /* fall-through */
-               }
-
-               case TGSI_INTERPOLATE_CONSTANT: {
-                  LLVMValueRef a0_ptr = LLVMBuildGEP(builder, bld->a0_ptr, &index, 1, "");
-                  a0 = LLVMBuildLoad(builder, a0_ptr, "");
-                  a0 = lp_build_broadcast_scalar(&bld->base, a0);
-                  lp_build_name(a0, "a0_%u.%c", attrib, "xyzw"[chan]);
-                  break;
-               }
-
-               default:
-                  assert(0);
-                  break;
-               }
-
-               input = a0;
-
-               if (decl->Declaration.Interpolate != TGSI_INTERPOLATE_CONSTANT) {
-                  input = lp_build_add(&bld->base, input, lp_build_mul(&bld->base, bld->x, dadx));
-                  input = lp_build_add(&bld->base, input, lp_build_mul(&bld->base, bld->y, dady));
-               }
-
-               if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
-                  if(!bld->oow)
-                     bld->oow = lp_build_rcp(&bld->base, bld->w);
-                  input = lp_build_mul(&bld->base, input, bld->oow);
-               }
-
-               lp_build_name(input, "input%u.%c", attrib, "xyzw"[chan]);
-            }
-
-            bld->inputs[attrib][chan] = input;
-         }
-      }
-   }
-}
-
 
 void
 lp_build_tgsi_soa(LLVMBuilderRef builder,
                   const struct tgsi_token *tokens,
                   union lp_type type,
                   struct lp_build_mask_context *mask,
-                  LLVMValueRef *pos,
-                  LLVMValueRef a0_ptr,
-                  LLVMValueRef dadx_ptr,
-                  LLVMValueRef dady_ptr,
                   LLVMValueRef consts_ptr,
-                  LLVMValueRef (*outputs)[4],
+                  const LLVMValueRef *pos,
+                  const LLVMValueRef (*inputs)[NUM_CHANNELS],
+                  LLVMValueRef (*outputs)[NUM_CHANNELS],
                   LLVMValueRef samplers_ptr)
 {
    struct lp_build_tgsi_soa_context bld;
@@ -1448,12 +1364,8 @@
    memset(&bld, 0, sizeof bld);
    lp_build_context_init(&bld.base, builder, type);
    bld.mask = mask;
-   bld.x = pos[0];
-   bld.y = pos[1];
-   bld.w = pos[3];
-   bld.a0_ptr = a0_ptr;
-   bld.dadx_ptr = dadx_ptr;
-   bld.dady_ptr = dady_ptr;
+   bld.pos = pos;
+   bld.inputs = inputs;
    bld.outputs = outputs;
    bld.consts_ptr = consts_ptr;
    bld.samplers_ptr = samplers_ptr;
@@ -1465,9 +1377,7 @@
 
       switch( parse.FullToken.Token.Type ) {
       case TGSI_TOKEN_TYPE_DECLARATION:
-         if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) {
-            emit_declaration( &bld, &parse.FullToken.FullDeclaration );
-         }
+         /* Input already interpolated */
          break;
 
       case TGSI_TOKEN_TYPE_INSTRUCTION:
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index e639f9c..361b306 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -69,9 +69,11 @@
 #include "tgsi/tgsi_scan.h"
 #include "tgsi/tgsi_parse.h"
 #include "lp_bld_type.h"
+#include "lp_bld_const.h"
 #include "lp_bld_conv.h"
 #include "lp_bld_logic.h"
 #include "lp_bld_depth.h"
+#include "lp_bld_interp.h"
 #include "lp_bld_tgsi.h"
 #include "lp_bld_alpha.h"
 #include "lp_bld_blend.h"
@@ -88,22 +90,16 @@
 static const unsigned char quad_offset_y[4] = {0, 0, 1, 1};
 
 
-/**
- * Generate the position vectors.
- *
- * TODO: This should be called only once per fragment pipeline, for the first
- * quad, and the neighboring quad positions obtained by additions.
- *
- * Parameter x, y are the integer values with the quad upper left coordinates.
+/*
+ * Derive from the quad's upper left scalar coordinates the coordinates for
+ * all other quad pixels
  */
 static void
-generate_pos(LLVMBuilderRef builder,
-             LLVMValueRef x,
-             LLVMValueRef y,
-             LLVMValueRef a0_ptr,
-             LLVMValueRef dadx_ptr,
-             LLVMValueRef dady_ptr,
-             LLVMValueRef *pos)
+generate_pos0(LLVMBuilderRef builder,
+              LLVMValueRef x,
+              LLVMValueRef y,
+              LLVMValueRef *x0,
+              LLVMValueRef *y0)
 {
    LLVMTypeRef int_elem_type = LLVMInt32Type();
    LLVMTypeRef int_vec_type = LLVMVectorType(int_elem_type, QUAD_SIZE);
@@ -111,14 +107,8 @@
    LLVMTypeRef vec_type = LLVMVectorType(elem_type, QUAD_SIZE);
    LLVMValueRef x_offsets[QUAD_SIZE];
    LLVMValueRef y_offsets[QUAD_SIZE];
-   unsigned chan;
    unsigned i;
 
-   /*
-    * Derive from the quad's upper left scalar coordinates the coordinates for
-    * all other quad pixels
-    */
-
    x = lp_build_broadcast(builder, int_vec_type, x);
    y = lp_build_broadcast(builder, int_vec_type, y);
 
@@ -130,33 +120,8 @@
    x = LLVMBuildAdd(builder, x, LLVMConstVector(x_offsets, QUAD_SIZE), "");
    y = LLVMBuildAdd(builder, y, LLVMConstVector(y_offsets, QUAD_SIZE), "");
 
-   x = LLVMBuildSIToFP(builder, x, vec_type, "");
-   y = LLVMBuildSIToFP(builder, y, vec_type, "");
-
-   pos[0] = x;
-   pos[1] = y;
-
-   /* 
-    * Calculate z and w from the interpolation factors.
-    */
-
-   for(chan = 2; chan < NUM_CHANNELS; ++chan) {
-      LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0);
-      LLVMValueRef a0   = LLVMBuildLoad(builder, LLVMBuildGEP(builder, a0_ptr,   &index, 1, ""), "");
-      LLVMValueRef dadx = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dadx_ptr, &index, 1, ""), "");
-      LLVMValueRef dady = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dady_ptr, &index, 1, ""), "");
-      LLVMValueRef res;
-      a0   = lp_build_broadcast(builder, vec_type, a0);
-      dadx = lp_build_broadcast(builder, vec_type, dadx);
-      dady = lp_build_broadcast(builder, vec_type, dady);
-      res = a0;
-      res = LLVMBuildAdd(builder, res, LLVMBuildMul(builder, dadx, x, ""), "");
-      res = LLVMBuildAdd(builder, res, LLVMBuildMul(builder, dady, y, ""), "");
-      pos[chan] = res;
-   }
-
-   for(chan = 0; chan < NUM_CHANNELS; ++chan)
-      lp_build_name(pos[chan], "pos.%c", "xyzw"[chan]);
+   *x0 = LLVMBuildSIToFP(builder, x, vec_type, "");
+   *y0 = LLVMBuildSIToFP(builder, y, vec_type, "");
 }
 
 
@@ -218,11 +183,7 @@
             union lp_type type,
             LLVMValueRef context_ptr,
             unsigned i,
-            LLVMValueRef x,
-            LLVMValueRef y,
-            LLVMValueRef a0_ptr,
-            LLVMValueRef dadx_ptr,
-            LLVMValueRef dady_ptr,
+            const struct lp_build_interp_soa_context *interp,
             LLVMValueRef *pmask,
             LLVMValueRef *color,
             LLVMValueRef depth_ptr)
@@ -233,8 +194,8 @@
    LLVMTypeRef int_vec_type;
    LLVMValueRef consts_ptr;
    LLVMValueRef samplers_ptr;
-   LLVMValueRef pos[NUM_CHANNELS];
    LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
+   LLVMValueRef z = interp->pos[2];
    struct lp_build_mask_context mask;
    boolean early_depth_test;
    unsigned attrib;
@@ -247,8 +208,6 @@
    consts_ptr = lp_jit_context_constants(builder, context_ptr);
    samplers_ptr = lp_jit_context_samplers(builder, context_ptr);
 
-   generate_pos(builder, x, y, a0_ptr, dadx_ptr, dady_ptr, pos);
-
    lp_build_mask_begin(&mask, builder, type, *pmask);
 
    early_depth_test =
@@ -260,14 +219,14 @@
 
    if(early_depth_test)
       generate_depth(lp, builder, &key->depth,
-                          type, &mask,
-                          pos[2], depth_ptr);
+                     type, &mask,
+                     z, depth_ptr);
 
    memset(outputs, 0, sizeof outputs);
 
    lp_build_tgsi_soa(builder, tokens, type, &mask,
-                     pos, a0_ptr, dadx_ptr, dady_ptr,
-                     consts_ptr, outputs, samplers_ptr);
+                     consts_ptr, interp->pos, interp->inputs,
+                     outputs, samplers_ptr);
 
    for (attrib = 0; attrib < shader->info.num_outputs; ++attrib) {
       for(chan = 0; chan < NUM_CHANNELS; ++chan) {
@@ -300,7 +259,7 @@
 
             case TGSI_SEMANTIC_POSITION:
                if(chan == 2)
-                  pos[2] = outputs[attrib][chan];
+                  z = outputs[attrib][chan];
                break;
             }
          }
@@ -309,8 +268,8 @@
 
    if(!early_depth_test)
       generate_depth(lp, builder, &key->depth,
-                          type, &mask,
-                          pos[2], depth_ptr);
+                     type, &mask,
+                     z, depth_ptr);
 
    lp_build_mask_end(&mask);
 
@@ -400,6 +359,9 @@
    LLVMValueRef depth_ptr;
    LLVMBasicBlockRef block;
    LLVMBuilderRef builder;
+   LLVMValueRef x0;
+   LLVMValueRef y0;
+   struct lp_build_interp_soa_context interp;
    LLVMValueRef fs_mask[LP_MAX_VECTOR_LENGTH];
    LLVMValueRef fs_out_color[NUM_CHANNELS][LP_MAX_VECTOR_LENGTH];
    LLVMValueRef blend_mask;
@@ -516,14 +478,19 @@
    builder = LLVMCreateBuilder();
    LLVMPositionBuilderAtEnd(builder, block);
 
+   generate_pos0(builder, x, y, &x0, &y0);
+
+   lp_build_interp_soa_init(&interp, shader->base.tokens, builder, fs_type,
+                            a0_ptr, dadx_ptr, dady_ptr,
+                            x0, y0, 2, 0);
+
    for(i = 0; i < num_fs; ++i) {
       LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
       LLVMValueRef out_color[NUM_CHANNELS];
-      LLVMValueRef x_i;
       LLVMValueRef depth_ptr_i;
 
-      /* TODO: Reuse position interpolation */
-      x_i = LLVMBuildAdd(builder, x, LLVMConstInt(LLVMInt32Type(), 2*i, 0), "");
+      if(i != 0)
+         lp_build_interp_soa_update(&interp);
 
       fs_mask[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, mask_ptr, &index, 1, ""), "");
       depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &index, 1, "");
@@ -533,8 +500,7 @@
                   fs_type,
                   context_ptr,
                   i,
-                  x_i, y,
-                  a0_ptr, dadx_ptr, dady_ptr,
+                  &interp,
                   &fs_mask[i],
                   out_color,
                   depth_ptr_i);