Merge commit 'origin/master' into gallium-0.2
diff --git a/progs/glsl/Makefile b/progs/glsl/Makefile
index 0849223..ff6359d 100644
--- a/progs/glsl/Makefile
+++ b/progs/glsl/Makefile
@@ -202,6 +202,14 @@
 
 
 
+vert-or-frag-only.o: vert-or-frag-only.c extfuncs.h shaderutil.h
+	$(CC) -c -I$(INCDIR) $(CFLAGS) vert-or-frag-only.c
+
+vert-or-frag-only: vert-or-frag-only.o shaderutil.o
+	$(CC) -I$(INCDIR) $(CFLAGS) $(LDFLAGS) vert-or-frag-only.o shaderutil.o $(LIBS) -o $@
+
+
+
 vert-tex.o: vert-tex.c extfuncs.h shaderutil.h
 	$(APP_CC) -c -I$(INCDIR) $(CFLAGS) vert-tex.c
 
diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
index 25b4ee8..4a95413 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@@ -420,7 +420,7 @@
 		 BRW_MATH_FUNCTION_EXP, 
 		 brw_writemask(dst, WRITEMASK_Z),
 		 brw_swizzle1(arg0, 0), 
-		 BRW_MATH_PRECISION_PARTIAL);
+		 BRW_MATH_PRECISION_FULL);
    }  
 
    if (dst.dw1.bits.writemask & WRITEMASK_W) {
diff --git a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
index e3ce149..fb1a051 100644
--- a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
+++ b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
@@ -32,6 +32,7 @@
 #include "main/mtypes.h"
 #include "main/macros.h"
 #include "main/bufferobj.h"
+#include "main/state.h"
 #include "swrast/swrast.h"
 
 #include "intel_screen.h"
@@ -149,8 +150,18 @@
    return count;
 }
 
-
-
+/**
+ * Returns the low Y value of the vertical range given, flipped according to
+ * whether the framebuffer is or not.
+ */
+static inline int
+y_flip(struct gl_framebuffer *fb, int y, int height)
+{
+   if (fb->Name != 0)
+      return y;
+   else
+      return fb->Height - y - height;
+}
 
 /*
  * Render a bitmap.
@@ -171,6 +182,11 @@
    unsigned int num_cliprects;
    drm_clip_rect_t *cliprects;
    int x_off, y_off;
+   GLsizei bitmap_width = width;
+   GLsizei bitmap_height = height;
+
+   /* Update draw buffer bounds */
+   _mesa_update_state(ctx);
 
    if (!dst)
        return GL_FALSE;
@@ -202,8 +218,6 @@
 
    intel_get_cliprects(intel, &cliprects, &num_cliprects, &x_off, &y_off);
    if (num_cliprects != 0) {
-      drm_clip_rect_t dest_rect;
-      GLint srcx = 0, srcy = 0;
       GLuint i;
       GLint orig_dstx = dstx;
       GLint orig_dsty = dsty;
@@ -214,45 +228,26 @@
 				&dstx, &dsty, &width, &height))
             goto out;
 
-      /* Convert from GL to hardware coordinates.  Transform original points
-       * along with it so that we can look at cliprects in hw coordinates and
-       * map back to points in the source space.
-       */
-      if (fb->Name == 0) {
-	 /* bitmap to a system framebuffer */
-	 dstx = x_off + dstx;
-	 dsty = y_off + (fb->Height - dsty - height);
-	 orig_dstx = x_off + orig_dstx;
-	 orig_dsty = y_off + (fb->Height - orig_dsty - height);
-      } else {
-	 /* bitmap to a user framebuffer object */
-	 dstx = x_off + dstx;
-	 dsty = y_off + dsty;
-	 orig_dstx = x_off + orig_dstx;
-	 orig_dsty = y_off + orig_dsty;
-      }
-
-      dest_rect.x1 = dstx;
-      dest_rect.y1 = dsty;
-      dest_rect.x2 = dstx + width;
-      dest_rect.y2 = dsty + height;
+      dstx = x_off + dstx;
+      dsty = y_off + y_flip(fb, dsty, height);
 
       for (i = 0; i < num_cliprects; i++) {
-         drm_clip_rect_t rect;
-	 int box_w, box_h;
+	 int box_x, box_y, box_w, box_h;
 	 GLint px, py;
 	 GLuint stipple[32];  
 
-         if (!intel_intersect_cliprects(&rect, &dest_rect, &cliprects[i]))
-            continue;
+	 box_x = dstx;
+	 box_y = dsty;
+	 box_w = width;
+	 box_h = height;
 
-	 /* Now go back to GL coordinates to figure out what subset of
-	  * the bitmap we are uploading for this cliprect:
-	  */
-	 box_w = rect.x2 - rect.x1;
-	 box_h = rect.y2 - rect.y1;
-	 srcx = rect.x1 - orig_dstx;
-	 srcy = rect.y1 - orig_dsty;
+	 /* Clip to drawable cliprect */
+         if (!_mesa_clip_to_region(cliprects[i].x1,
+				   cliprects[i].y1,
+				   cliprects[i].x2 - cliprects[i].x1,
+				   cliprects[i].y2 - cliprects[i].y1,
+				   &box_x, &box_y, &box_w, &box_h))
+	    continue;
 
 #define DY 32
 #define DX 32
@@ -273,10 +268,16 @@
 
 	       /* May need to adjust this when padding has been introduced in
 		* sz above:
+		*
+		* Have to translate destination coordinates back into source
+		* coordinates.
 		*/
-	       if (get_bitmap_rect(width, height, unpack, 
+	       if (get_bitmap_rect(bitmap_width, bitmap_height, unpack,
 				   bitmap,
-				   srcx + px, srcy + py, w, h,
+				   -orig_dstx + (box_x + px - x_off),
+				   -orig_dsty + y_flip(fb,
+						       box_y + py - y_off, h),
+				   w, h,
 				   (GLubyte *)stipple,
 				   8,
 				   fb->Name == 0 ? GL_TRUE : GL_FALSE) == 0)
@@ -293,8 +294,8 @@
 						  dst->buffer,
 						  0,
 						  dst->tiling,
-						  rect.x1 + px,
-						  rect.y2 - (py + h),
+						  box_x + px,
+						  box_y + py,
 						  w, h,
 						  logic_op);
 	    } 
diff --git a/src/mesa/drivers/dri/intel/intel_pixel_copy.c b/src/mesa/drivers/dri/intel/intel_pixel_copy.c
index 61d1296..447c649 100644
--- a/src/mesa/drivers/dri/intel/intel_pixel_copy.c
+++ b/src/mesa/drivers/dri/intel/intel_pixel_copy.c
@@ -266,6 +266,9 @@
    drm_clip_rect_t *cliprects;
    int x_off, y_off;
 
+   /* Update draw buffer bounds */
+   _mesa_update_state(ctx);
+
    /* Copypixels can be more than a straight copy.  Ensure all the
     * extra operations are disabled:
     */
@@ -308,8 +311,8 @@
       /* Clip to source buffer. */
       orig_srcx = srcx;
       orig_srcy = srcy;
-      if (!_mesa_clip_to_region(read_fb->_Xmin, read_fb->_Ymin,
-				read_fb->_Xmax, read_fb->_Ymax,
+      if (!_mesa_clip_to_region(0, 0,
+				read_fb->Width, read_fb->Height,
 				&srcx, &srcy, &width, &height))
 	 goto out;
       /* Adjust dst coords for our post-clipped source origin */
diff --git a/src/mesa/drivers/dri/intel/intel_tex_copy.c b/src/mesa/drivers/dri/intel/intel_tex_copy.c
index b893990..08437aa 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_copy.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_copy.c
@@ -112,56 +112,47 @@
                                                        intelImage->level);
       const GLint orig_x = x;
       const GLint orig_y = y;
-      const struct gl_framebuffer *fb = ctx->DrawBuffer;
+      GLshort src_pitch;
 
-      if (_mesa_clip_to_region(fb->_Xmin, fb->_Ymin, fb->_Xmax, fb->_Ymax,
-                               &x, &y, &width, &height)) {
-	 GLshort src_pitch;
+      /* Update dst for clipped src.  Need to also clip the source rect. */
+      dstx += x - orig_x;
+      dsty += y - orig_y;
 
-         /* Update dst for clipped src.  Need to also clip the source rect.
-          */
-         dstx += x - orig_x;
-         dsty += y - orig_y;
+      /* image_offset may be non-page-aligned, but that's illegal for tiling. */
+      assert(intelImage->mt->region->tiling == I915_TILING_NONE);
 
-	 /* image_offset may be non-page-aligned, but that's illegal for tiling.
+      if (ctx->ReadBuffer->Name == 0) {
+	 /* reading from a window, adjust x, y */
+	 __DRIdrawablePrivate *dPriv = intel->driDrawable;
+	 y = dPriv->y + (dPriv->h - (y + height));
+	 x += dPriv->x;
+
+	 /* Invert the data coming from the source rectangle due to GL
+	  * and hardware disagreeing on where y=0 is.
+	  *
+	  * It appears that our offsets and pitches get mangled
+	  * appropriately by the hardware, and we don't need to adjust them
+	  * on our own.
 	  */
-	 assert(intelImage->mt->region->tiling == I915_TILING_NONE);
-
-         if (ctx->ReadBuffer->Name == 0) {
-            /* reading from a window, adjust x, y */
-            __DRIdrawablePrivate *dPriv = intel->driDrawable;
-	    y = dPriv->y + (dPriv->h - (y + height));
-            x += dPriv->x;
-
-	    /* Invert the data coming from the source rectangle due to GL
-	     * and hardware disagreeing on where y=0 is.
-	     *
-	     * It appears that our offsets and pitches get mangled
-	     * appropriately by the hardware, and we don't need to adjust them
-	     * on our own.
-	     */
-	    src_pitch = -src->pitch;
-         }
-         else {
-            /* reading from a FBO, y is already oriented the way we like */
-	    src_pitch = src->pitch;
-         }
-
-         intelEmitCopyBlit(intel,
-                           intelImage->mt->cpp,
-                           src_pitch,
-                           src->buffer,
-                           0,
-			   src->tiling,
-                           intelImage->mt->pitch,
-                           intelImage->mt->region->buffer,
-                           image_offset,
-			   intelImage->mt->region->tiling,
-                           x, y, dstx, dsty, width, height,
-			   GL_COPY);
+	 src_pitch = -src->pitch;
+      } else {
+	 /* reading from a FBO, y is already oriented the way we like */
+	 src_pitch = src->pitch;
       }
-   }
 
+      intelEmitCopyBlit(intel,
+			intelImage->mt->cpp,
+			src_pitch,
+			src->buffer,
+			0,
+			src->tiling,
+			intelImage->mt->pitch,
+			intelImage->mt->region->buffer,
+			image_offset,
+			intelImage->mt->region->tiling,
+			x, y, dstx, dsty, width, height,
+			GL_COPY);
+   }
 
    UNLOCK_HARDWARE(intel);
 
@@ -188,6 +179,7 @@
       _mesa_select_tex_object(ctx, texUnit, target);
    struct gl_texture_image *texImage =
       _mesa_select_tex_image(ctx, texObj, target, level);
+   int srcx, srcy, dstx, dsty, height;
 
    if (border)
       goto fail;
@@ -199,10 +191,20 @@
                           width, border,
                           GL_RGBA, CHAN_TYPE, NULL,
                           &ctx->DefaultPacking, texObj, texImage);
+   srcx = x;
+   srcy = y;
+   dstx = 0;
+   dsty = 0;
+   height = 1;
+   if (!_mesa_clip_copytexsubimage(ctx,
+				   &dstx, &dsty,
+				   &srcx, &srcy,
+				   &width, &height))
+      return;
 
    if (!do_copy_texsubimage(intel_context(ctx), target,
                             intel_texture_image(texImage),
-                            internalFormat, 0, 0, x, y, width, 1))
+                            internalFormat, 0, 0, x, y, width, height))
       goto fail;
 
    return;
@@ -224,10 +226,21 @@
       _mesa_select_tex_object(ctx, texUnit, target);
    struct gl_texture_image *texImage =
       _mesa_select_tex_image(ctx, texObj, target, level);
+   int srcx, srcy, dstx, dsty;
 
    if (border)
       goto fail;
 
+   srcx = x;
+   srcy = y;
+   dstx = 0;
+   dsty = 0;
+   if (!_mesa_clip_copytexsubimage(ctx,
+				   &dstx, &dsty,
+				   &srcx, &srcy,
+				   &width, &height))
+      return;
+
    /* Setup or redefine the texture object, mipmap tree and texture
     * image.  Don't populate yet.  
     */
diff --git a/src/mesa/main/image.c b/src/mesa/main/image.c
index d3282d4..c205b4b 100644
--- a/src/mesa/main/image.c
+++ b/src/mesa/main/image.c
@@ -5152,7 +5152,7 @@
 
    /* right clipping */
    if (*x + *width > xmax)
-      *width -= (*x + *width - xmax - 1);
+      *width -= (*x + *width - xmax);
 
    if (*width <= 0)
       return GL_FALSE;
@@ -5165,7 +5165,7 @@
 
    /* top (or bottom) clipping */
    if (*y + *height > ymax)
-      *height -= (*y + *height - ymax - 1);
+      *height -= (*y + *height - ymax);
 
    if (*height <= 0)
       return GL_FALSE;
diff --git a/src/mesa/main/mipmap.c b/src/mesa/main/mipmap.c
index 13d90e7..9e051ac 100644
--- a/src/mesa/main/mipmap.c
+++ b/src/mesa/main/mipmap.c
@@ -46,6 +46,67 @@
 
 
 /**
+ * \name Support macros for do_row and do_row_3d
+ *
+ * The macro madness is here for two reasons.  First, it compacts the code
+ * slightly.  Second, it makes it much easier to adjust the specifics of the
+ * filter to tune the rounding characteristics.
+ */
+/*@{*/
+#define DECLARE_ROW_POINTERS(t, e) \
+      const t(*rowA)[e] = (const t(*)[e]) srcRowA; \
+      const t(*rowB)[e] = (const t(*)[e]) srcRowB; \
+      const t(*rowC)[e] = (const t(*)[e]) srcRowC; \
+      const t(*rowD)[e] = (const t(*)[e]) srcRowD; \
+      t(*dst)[e] = (t(*)[e]) dstRow
+
+#define DECLARE_ROW_POINTERS0(t) \
+      const t *rowA = (const t *) srcRowA; \
+      const t *rowB = (const t *) srcRowB; \
+      const t *rowC = (const t *) srcRowC; \
+      const t *rowD = (const t *) srcRowD; \
+      t *dst = (t *) dstRow
+
+#define FILTER_SUM_3D(Aj, Ak, Bj, Bk, Cj, Ck, Dj, Dk) \
+   ((unsigned) Aj + (unsigned) Ak \
+    + (unsigned) Bj + (unsigned) Bk \
+    + (unsigned) Cj + (unsigned) Ck \
+    + (unsigned) Dj + (unsigned) Dk \
+    + 4) >> 3
+
+#define FILTER_3D(e) \
+   do { \
+      dst[i][e] = FILTER_SUM_3D(rowA[j][e], rowA[k][e], \
+                                rowB[j][e], rowB[k][e], \
+                                rowC[j][e], rowC[k][e], \
+                                rowD[j][e], rowD[k][e]); \
+   } while(0)
+   
+#define FILTER_F_3D(e) \
+   do { \
+      dst[i][e] = (rowA[j][e] + rowA[k][e] \
+                   + rowB[j][e] + rowB[k][e] \
+                   + rowC[j][e] + rowC[k][e] \
+                   + rowD[j][e] + rowD[k][e]) * 0.125F; \
+   } while(0)
+
+#define FILTER_HF_3D(e) \
+   do { \
+      const GLfloat aj = _mesa_half_to_float(rowA[j][e]); \
+      const GLfloat ak = _mesa_half_to_float(rowA[k][e]); \
+      const GLfloat bj = _mesa_half_to_float(rowB[j][e]); \
+      const GLfloat bk = _mesa_half_to_float(rowB[k][e]); \
+      const GLfloat cj = _mesa_half_to_float(rowC[j][e]); \
+      const GLfloat ck = _mesa_half_to_float(rowC[k][e]); \
+      const GLfloat dj = _mesa_half_to_float(rowD[j][e]); \
+      const GLfloat dk = _mesa_half_to_float(rowD[k][e]); \
+      dst[i][e] = _mesa_float_to_half((aj + ak + bj + bk + cj + ck + dj + dk) \
+                                      * 0.125F); \
+   } while(0)
+/*@}*/
+
+
+/**
  * Average together two rows of a source image to produce a single new
  * row in the dest image.  It's legal for the two source rows to point
  * to the same data.  The source width must be equal to either the
@@ -361,7 +422,7 @@
          const GLint rowAr0 = rowA[j] & 0x1f;
          const GLint rowAr1 = rowA[k] & 0x1f;
          const GLint rowBr0 = rowB[j] & 0x1f;
-         const GLint rowBr1 = rowB[k] & 0xf;
+         const GLint rowBr1 = rowB[k] & 0x1f;
          const GLint rowAg0 = (rowA[j] >> 5) & 0x1f;
          const GLint rowAg1 = (rowA[k] >> 5) & 0x1f;
          const GLint rowBg0 = (rowB[j] >> 5) & 0x1f;
@@ -412,6 +473,384 @@
 }
 
 
+/**
+ * Average together four rows of a source image to produce a single new
+ * row in the dest image.  It's legal for the two source rows to point
+ * to the same data.  The source width must be equal to either the
+ * dest width or two times the dest width.
+ *
+ * \param datatype  GL pixel type \c GL_UNSIGNED_BYTE, \c GL_UNSIGNED_SHORT,
+ *                  \c GL_FLOAT, etc.
+ * \param comps     number of components per pixel (1..4)
+ * \param srcWidth  Width of a row in the source data
+ * \param srcRowA   Pointer to one of the rows of source data
+ * \param srcRowB   Pointer to one of the rows of source data
+ * \param srcRowC   Pointer to one of the rows of source data
+ * \param srcRowD   Pointer to one of the rows of source data
+ * \param dstWidth  Width of a row in the destination data
+ * \param srcRowA   Pointer to the row of destination data
+ */
+static void
+do_row_3D(GLenum datatype, GLuint comps, GLint srcWidth,
+          const GLvoid *srcRowA, const GLvoid *srcRowB,
+          const GLvoid *srcRowC, const GLvoid *srcRowD,
+          GLint dstWidth, GLvoid *dstRow)
+{
+   const GLuint k0 = (srcWidth == dstWidth) ? 0 : 1;
+   const GLuint colStride = (srcWidth == dstWidth) ? 1 : 2;
+   GLuint i, j, k;
+
+   ASSERT(comps >= 1);
+   ASSERT(comps <= 4);
+
+   if ((datatype == GL_UNSIGNED_BYTE) && (comps == 4)) {
+      DECLARE_ROW_POINTERS(GLubyte, 4);
+
+      for (i = j = 0, k = k0; i < (GLuint) dstWidth;
+           i++, j += colStride, k += colStride) {
+         FILTER_3D(0);
+         FILTER_3D(1);
+         FILTER_3D(2);
+         FILTER_3D(3);
+      }
+   }
+   else if ((datatype == GL_UNSIGNED_BYTE) && (comps == 3)) {
+      DECLARE_ROW_POINTERS(GLubyte, 3);
+
+      for (i = j = 0, k = k0; i < (GLuint) dstWidth;
+           i++, j += colStride, k += colStride) {
+         FILTER_3D(0);
+         FILTER_3D(1);
+         FILTER_3D(2);
+      }
+   }
+   else if ((datatype == GL_UNSIGNED_BYTE) && (comps == 2)) {
+      DECLARE_ROW_POINTERS(GLubyte, 2);
+
+      for (i = j = 0, k = k0; i < (GLuint) dstWidth;
+           i++, j += colStride, k += colStride) {
+         FILTER_3D(0);
+         FILTER_3D(1);
+      }
+   }
+   else if ((datatype == GL_UNSIGNED_BYTE) && (comps == 1)) {
+      DECLARE_ROW_POINTERS(GLubyte, 1);
+
+      for (i = j = 0, k = k0; i < (GLuint) dstWidth;
+           i++, j += colStride, k += colStride) {
+         FILTER_3D(0);
+      }
+   }
+   else if ((datatype == GL_UNSIGNED_SHORT) && (comps == 4)) {
+      DECLARE_ROW_POINTERS(GLushort, 4);
+
+      for (i = j = 0, k = k0; i < (GLuint) dstWidth;
+           i++, j += colStride, k += colStride) {
+         FILTER_3D(0);
+         FILTER_3D(1);
+         FILTER_3D(2);
+         FILTER_3D(3);
+      }
+   }
+   else if ((datatype == GL_UNSIGNED_SHORT) && (comps == 3)) {
+      DECLARE_ROW_POINTERS(GLushort, 3);
+
+      for (i = j = 0, k = k0; i < (GLuint) dstWidth;
+           i++, j += colStride, k += colStride) {
+         FILTER_3D(0);
+         FILTER_3D(1);
+         FILTER_3D(2);
+      }
+   }
+   else if ((datatype == GL_UNSIGNED_SHORT) && (comps == 2)) {
+      DECLARE_ROW_POINTERS(GLushort, 2);
+
+      for (i = j = 0, k = k0; i < (GLuint) dstWidth;
+           i++, j += colStride, k += colStride) {
+         FILTER_3D(0);
+         FILTER_3D(1);
+      }
+   }
+   else if ((datatype == GL_UNSIGNED_SHORT) && (comps == 1)) {
+      DECLARE_ROW_POINTERS(GLushort, 1);
+
+      for (i = j = 0, k = k0; i < (GLuint) dstWidth;
+           i++, j += colStride, k += colStride) {
+         FILTER_3D(0);
+      }
+   }
+   else if ((datatype == GL_FLOAT) && (comps == 4)) {
+      DECLARE_ROW_POINTERS(GLfloat, 4);
+
+      for (i = j = 0, k = k0; i < (GLuint) dstWidth;
+           i++, j += colStride, k += colStride) {
+         FILTER_F_3D(0);
+         FILTER_F_3D(1);
+         FILTER_F_3D(2);
+         FILTER_F_3D(3);
+      }
+   }
+   else if ((datatype == GL_FLOAT) && (comps == 3)) {
+      DECLARE_ROW_POINTERS(GLfloat, 3);
+
+      for (i = j = 0, k = k0; i < (GLuint) dstWidth;
+           i++, j += colStride, k += colStride) {
+         FILTER_F_3D(0);
+         FILTER_F_3D(1);
+         FILTER_F_3D(2);
+      }
+   }
+   else if ((datatype == GL_FLOAT) && (comps == 2)) {
+      DECLARE_ROW_POINTERS(GLfloat, 2);
+
+      for (i = j = 0, k = k0; i < (GLuint) dstWidth;
+           i++, j += colStride, k += colStride) {
+         FILTER_F_3D(0);
+         FILTER_F_3D(1);
+      }
+   }
+   else if ((datatype == GL_FLOAT) && (comps == 1)) {
+      DECLARE_ROW_POINTERS(GLfloat, 1);
+
+      for (i = j = 0, k = k0; i < (GLuint) dstWidth;
+           i++, j += colStride, k += colStride) {
+         FILTER_F_3D(0);
+      }
+   }
+   else if ((datatype == GL_HALF_FLOAT_ARB) && (comps == 4)) {
+      DECLARE_ROW_POINTERS(GLhalfARB, 4);
+
+      for (i = j = 0, k = k0; i < (GLuint) dstWidth;
+           i++, j += colStride, k += colStride) {
+         FILTER_HF_3D(0);
+         FILTER_HF_3D(1);
+         FILTER_HF_3D(2);
+         FILTER_HF_3D(3);
+      }
+   }
+   else if ((datatype == GL_HALF_FLOAT_ARB) && (comps == 3)) {
+      DECLARE_ROW_POINTERS(GLhalfARB, 4);
+
+      for (i = j = 0, k = k0; i < (GLuint) dstWidth;
+           i++, j += colStride, k += colStride) {
+         FILTER_HF_3D(0);
+         FILTER_HF_3D(1);
+         FILTER_HF_3D(2);
+      }
+   }
+   else if ((datatype == GL_HALF_FLOAT_ARB) && (comps == 2)) {
+      DECLARE_ROW_POINTERS(GLhalfARB, 4);
+
+      for (i = j = 0, k = k0; i < (GLuint) dstWidth;
+           i++, j += colStride, k += colStride) {
+         FILTER_HF_3D(0);
+         FILTER_HF_3D(1);
+      }
+   }
+   else if ((datatype == GL_HALF_FLOAT_ARB) && (comps == 1)) {
+      DECLARE_ROW_POINTERS(GLhalfARB, 4);
+
+      for (i = j = 0, k = k0; i < (GLuint) dstWidth;
+           i++, j += colStride, k += colStride) {
+         FILTER_HF_3D(0);
+      }
+   }
+   else if ((datatype == GL_UNSIGNED_INT) && (comps == 1)) {
+      const GLuint *rowA = (const GLuint *) srcRowA;
+      const GLuint *rowB = (const GLuint *) srcRowB;
+      const GLuint *rowC = (const GLuint *) srcRowC;
+      const GLuint *rowD = (const GLuint *) srcRowD;
+      GLfloat *dst = (GLfloat *) dstRow;
+
+      for (i = j = 0, k = k0; i < (GLuint) dstWidth;
+           i++, j += colStride, k += colStride) {
+         const uint64_t tmp = (((uint64_t) rowA[j] + (uint64_t) rowA[k])
+                               + ((uint64_t) rowB[j] + (uint64_t) rowB[k])
+                               + ((uint64_t) rowC[j] + (uint64_t) rowC[k])
+                               + ((uint64_t) rowD[j] + (uint64_t) rowD[k]));
+         dst[i] = (GLfloat)((double) tmp * 0.125);
+      }
+   }
+   else if ((datatype == GL_UNSIGNED_SHORT_5_6_5) && (comps == 3)) {
+      DECLARE_ROW_POINTERS0(GLushort);
+
+      for (i = j = 0, k = k0; i < (GLuint) dstWidth;
+           i++, j += colStride, k += colStride) {
+         const GLint rowAr0 = rowA[j] & 0x1f;
+         const GLint rowAr1 = rowA[k] & 0x1f;
+         const GLint rowBr0 = rowB[j] & 0x1f;
+         const GLint rowBr1 = rowB[k] & 0x1f;
+         const GLint rowCr0 = rowC[j] & 0x1f;
+         const GLint rowCr1 = rowC[k] & 0x1f;
+         const GLint rowDr0 = rowD[j] & 0x1f;
+         const GLint rowDr1 = rowD[k] & 0x1f;
+         const GLint rowAg0 = (rowA[j] >> 5) & 0x3f;
+         const GLint rowAg1 = (rowA[k] >> 5) & 0x3f;
+         const GLint rowBg0 = (rowB[j] >> 5) & 0x3f;
+         const GLint rowBg1 = (rowB[k] >> 5) & 0x3f;
+         const GLint rowCg0 = (rowC[j] >> 5) & 0x3f;
+         const GLint rowCg1 = (rowC[k] >> 5) & 0x3f;
+         const GLint rowDg0 = (rowD[j] >> 5) & 0x3f;
+         const GLint rowDg1 = (rowD[k] >> 5) & 0x3f;
+         const GLint rowAb0 = (rowA[j] >> 11) & 0x1f;
+         const GLint rowAb1 = (rowA[k] >> 11) & 0x1f;
+         const GLint rowBb0 = (rowB[j] >> 11) & 0x1f;
+         const GLint rowBb1 = (rowB[k] >> 11) & 0x1f;
+         const GLint rowCb0 = (rowC[j] >> 11) & 0x1f;
+         const GLint rowCb1 = (rowC[k] >> 11) & 0x1f;
+         const GLint rowDb0 = (rowD[j] >> 11) & 0x1f;
+         const GLint rowDb1 = (rowD[k] >> 11) & 0x1f;
+         const GLint r = FILTER_SUM_3D(rowAr0, rowAr1, rowBr0, rowBr1,
+                                       rowCr0, rowCr1, rowDr0, rowDr1);
+         const GLint g = FILTER_SUM_3D(rowAg0, rowAg1, rowBg0, rowBg1,
+                                       rowCg0, rowCg1, rowDg0, rowDg1);
+         const GLint b = FILTER_SUM_3D(rowAb0, rowAb1, rowBb0, rowBb1,
+                                       rowCb0, rowCb1, rowDb0, rowDb1);
+         dst[i] = (b << 11) | (g << 5) | r;
+      }
+   }
+   else if ((datatype == GL_UNSIGNED_SHORT_4_4_4_4) && (comps == 4)) {
+      DECLARE_ROW_POINTERS0(GLushort);
+
+      for (i = j = 0, k = k0; i < (GLuint) dstWidth;
+           i++, j += colStride, k += colStride) {
+         const GLint rowAr0 = rowA[j] & 0xf;
+         const GLint rowAr1 = rowA[k] & 0xf;
+         const GLint rowBr0 = rowB[j] & 0xf;
+         const GLint rowBr1 = rowB[k] & 0xf;
+         const GLint rowCr0 = rowC[j] & 0xf;
+         const GLint rowCr1 = rowC[k] & 0xf;
+         const GLint rowDr0 = rowD[j] & 0xf;
+         const GLint rowDr1 = rowD[k] & 0xf;
+         const GLint rowAg0 = (rowA[j] >> 4) & 0xf;
+         const GLint rowAg1 = (rowA[k] >> 4) & 0xf;
+         const GLint rowBg0 = (rowB[j] >> 4) & 0xf;
+         const GLint rowBg1 = (rowB[k] >> 4) & 0xf;
+         const GLint rowCg0 = (rowC[j] >> 4) & 0xf;
+         const GLint rowCg1 = (rowC[k] >> 4) & 0xf;
+         const GLint rowDg0 = (rowD[j] >> 4) & 0xf;
+         const GLint rowDg1 = (rowD[k] >> 4) & 0xf;
+         const GLint rowAb0 = (rowA[j] >> 8) & 0xf;
+         const GLint rowAb1 = (rowA[k] >> 8) & 0xf;
+         const GLint rowBb0 = (rowB[j] >> 8) & 0xf;
+         const GLint rowBb1 = (rowB[k] >> 8) & 0xf;
+         const GLint rowCb0 = (rowC[j] >> 8) & 0xf;
+         const GLint rowCb1 = (rowC[k] >> 8) & 0xf;
+         const GLint rowDb0 = (rowD[j] >> 8) & 0xf;
+         const GLint rowDb1 = (rowD[k] >> 8) & 0xf;
+         const GLint rowAa0 = (rowA[j] >> 12) & 0xf;
+         const GLint rowAa1 = (rowA[k] >> 12) & 0xf;
+         const GLint rowBa0 = (rowB[j] >> 12) & 0xf;
+         const GLint rowBa1 = (rowB[k] >> 12) & 0xf;
+         const GLint rowCa0 = (rowC[j] >> 12) & 0xf;
+         const GLint rowCa1 = (rowC[k] >> 12) & 0xf;
+         const GLint rowDa0 = (rowD[j] >> 12) & 0xf;
+         const GLint rowDa1 = (rowD[k] >> 12) & 0xf;
+         const GLint r = FILTER_SUM_3D(rowAr0, rowAr1, rowBr0, rowBr1,
+                                       rowCr0, rowCr1, rowDr0, rowDr1);
+         const GLint g = FILTER_SUM_3D(rowAg0, rowAg1, rowBg0, rowBg1,
+                                       rowCg0, rowCg1, rowDg0, rowDg1);
+         const GLint b = FILTER_SUM_3D(rowAb0, rowAb1, rowBb0, rowBb1,
+                                       rowCb0, rowCb1, rowDb0, rowDb1);
+         const GLint a = FILTER_SUM_3D(rowAa0, rowAa1, rowBa0, rowBa1,
+                                       rowCa0, rowCa1, rowDa0, rowDa1);
+
+         dst[i] = (a << 12) | (b << 8) | (g << 4) | r;
+      }
+   }
+   else if ((datatype == GL_UNSIGNED_SHORT_1_5_5_5_REV) && (comps == 4)) {
+      DECLARE_ROW_POINTERS0(GLushort);
+
+      for (i = j = 0, k = k0; i < (GLuint) dstWidth;
+           i++, j += colStride, k += colStride) {
+         const GLint rowAr0 = rowA[j] & 0x1f;
+         const GLint rowAr1 = rowA[k] & 0x1f;
+         const GLint rowBr0 = rowB[j] & 0x1f;
+         const GLint rowBr1 = rowB[k] & 0x1f;
+         const GLint rowCr0 = rowC[j] & 0x1f;
+         const GLint rowCr1 = rowC[k] & 0x1f;
+         const GLint rowDr0 = rowD[j] & 0x1f;
+         const GLint rowDr1 = rowD[k] & 0x1f;
+         const GLint rowAg0 = (rowA[j] >> 5) & 0x1f;
+         const GLint rowAg1 = (rowA[k] >> 5) & 0x1f;
+         const GLint rowBg0 = (rowB[j] >> 5) & 0x1f;
+         const GLint rowBg1 = (rowB[k] >> 5) & 0x1f;
+         const GLint rowCg0 = (rowC[j] >> 5) & 0x1f;
+         const GLint rowCg1 = (rowC[k] >> 5) & 0x1f;
+         const GLint rowDg0 = (rowD[j] >> 5) & 0x1f;
+         const GLint rowDg1 = (rowD[k] >> 5) & 0x1f;
+         const GLint rowAb0 = (rowA[j] >> 10) & 0x1f;
+         const GLint rowAb1 = (rowA[k] >> 10) & 0x1f;
+         const GLint rowBb0 = (rowB[j] >> 10) & 0x1f;
+         const GLint rowBb1 = (rowB[k] >> 10) & 0x1f;
+         const GLint rowCb0 = (rowC[j] >> 10) & 0x1f;
+         const GLint rowCb1 = (rowC[k] >> 10) & 0x1f;
+         const GLint rowDb0 = (rowD[j] >> 10) & 0x1f;
+         const GLint rowDb1 = (rowD[k] >> 10) & 0x1f;
+         const GLint rowAa0 = (rowA[j] >> 15) & 0x1;
+         const GLint rowAa1 = (rowA[k] >> 15) & 0x1;
+         const GLint rowBa0 = (rowB[j] >> 15) & 0x1;
+         const GLint rowBa1 = (rowB[k] >> 15) & 0x1;
+         const GLint rowCa0 = (rowC[j] >> 15) & 0x1;
+         const GLint rowCa1 = (rowC[k] >> 15) & 0x1;
+         const GLint rowDa0 = (rowD[j] >> 15) & 0x1;
+         const GLint rowDa1 = (rowD[k] >> 15) & 0x1;
+         const GLint r = FILTER_SUM_3D(rowAr0, rowAr1, rowBr0, rowBr1,
+                                       rowCr0, rowCr1, rowDr0, rowDr1);
+         const GLint g = FILTER_SUM_3D(rowAg0, rowAg1, rowBg0, rowBg1,
+                                       rowCg0, rowCg1, rowDg0, rowDg1);
+         const GLint b = FILTER_SUM_3D(rowAb0, rowAb1, rowBb0, rowBb1,
+                                       rowCb0, rowCb1, rowDb0, rowDb1);
+         const GLint a = FILTER_SUM_3D(rowAa0, rowAa1, rowBa0, rowBa1,
+                                       rowCa0, rowCa1, rowDa0, rowDa1);
+
+         dst[i] = (a << 15) | (b << 10) | (g << 5) | r;
+      }
+   }
+   else if ((datatype == GL_UNSIGNED_BYTE_3_3_2) && (comps == 3)) {
+      DECLARE_ROW_POINTERS0(GLushort);
+
+      for (i = j = 0, k = k0; i < (GLuint) dstWidth;
+           i++, j += colStride, k += colStride) {
+         const GLint rowAr0 = rowA[j] & 0x3;
+         const GLint rowAr1 = rowA[k] & 0x3;
+         const GLint rowBr0 = rowB[j] & 0x3;
+         const GLint rowBr1 = rowB[k] & 0x3;
+         const GLint rowCr0 = rowC[j] & 0x3;
+         const GLint rowCr1 = rowC[k] & 0x3;
+         const GLint rowDr0 = rowD[j] & 0x3;
+         const GLint rowDr1 = rowD[k] & 0x3;
+         const GLint rowAg0 = (rowA[j] >> 2) & 0x7;
+         const GLint rowAg1 = (rowA[k] >> 2) & 0x7;
+         const GLint rowBg0 = (rowB[j] >> 2) & 0x7;
+         const GLint rowBg1 = (rowB[k] >> 2) & 0x7;
+         const GLint rowCg0 = (rowC[j] >> 2) & 0x7;
+         const GLint rowCg1 = (rowC[k] >> 2) & 0x7;
+         const GLint rowDg0 = (rowD[j] >> 2) & 0x7;
+         const GLint rowDg1 = (rowD[k] >> 2) & 0x7;
+         const GLint rowAb0 = (rowA[j] >> 5) & 0x7;
+         const GLint rowAb1 = (rowA[k] >> 5) & 0x7;
+         const GLint rowBb0 = (rowB[j] >> 5) & 0x7;
+         const GLint rowBb1 = (rowB[k] >> 5) & 0x7;
+         const GLint rowCb0 = (rowC[j] >> 5) & 0x7;
+         const GLint rowCb1 = (rowC[k] >> 5) & 0x7;
+         const GLint rowDb0 = (rowD[j] >> 5) & 0x7;
+         const GLint rowDb1 = (rowD[k] >> 5) & 0x7;
+         const GLint r = FILTER_SUM_3D(rowAr0, rowAr1, rowBr0, rowBr1,
+                                       rowCr0, rowCr1, rowDr0, rowDr1);
+         const GLint g = FILTER_SUM_3D(rowAg0, rowAg1, rowBg0, rowBg1,
+                                       rowCg0, rowCg1, rowDg0, rowDg1);
+         const GLint b = FILTER_SUM_3D(rowAb0, rowAb1, rowBb0, rowBb1,
+                                       rowCb0, rowCb1, rowDb0, rowDb1);
+         dst[i] = (b << 5) | (g << 2) | r;
+      }
+   }
+   else {
+      _mesa_problem(NULL, "bad format in do_row()");
+   }
+}
+
+
 /*
  * These functions generate a 1/2-size mipmap image from a source image.
  * Texture borders are handled by copying or averaging the source image's
@@ -544,7 +983,6 @@
    const GLint dstWidthNB = dstWidth - 2 * border;
    const GLint dstHeightNB = dstHeight - 2 * border;
    const GLint dstDepthNB = dstDepth - 2 * border;
-   GLvoid *tmpRowA, *tmpRowB;
    GLint img, row;
    GLint bytesPerSrcImage, bytesPerDstImage;
    GLint bytesPerSrcRow, bytesPerDstRow;
@@ -552,15 +990,6 @@
 
    (void) srcDepthNB; /* silence warnings */
 
-   /* Need two temporary row buffers */
-   tmpRowA = _mesa_malloc(srcWidth * bpt);
-   if (!tmpRowA)
-      return;
-   tmpRowB = _mesa_malloc(srcWidth * bpt);
-   if (!tmpRowB) {
-      _mesa_free(tmpRowA);
-      return;
-   }
 
    bytesPerSrcImage = srcWidth * srcHeight * bpt;
    bytesPerDstImage = dstWidth * dstHeight * bpt;
@@ -607,15 +1036,11 @@
       GLubyte *dstImgRow = imgDst;
 
       for (row = 0; row < dstHeightNB; row++) {
-         /* Average together two rows from first src image */
-         do_row(datatype, comps, srcWidthNB, srcImgARowA, srcImgARowB,
-                srcWidthNB, tmpRowA);
-         /* Average together two rows from second src image */
-         do_row(datatype, comps, srcWidthNB, srcImgBRowA, srcImgBRowB,
-                srcWidthNB, tmpRowB);
-         /* Average together the temp rows to make the final row */
-         do_row(datatype, comps, srcWidthNB, tmpRowA, tmpRowB,
-                dstWidthNB, dstImgRow);
+         do_row_3D(datatype, comps, srcWidthNB, 
+                   srcImgARowA, srcImgARowB,
+                   srcImgBRowA, srcImgBRowB,
+                   dstWidthNB, dstImgRow);
+
          /* advance to next rows */
          srcImgARowA += bytesPerSrcRow + srcRowOffset;
          srcImgARowB += bytesPerSrcRow + srcRowOffset;
@@ -625,8 +1050,6 @@
       }
    }
 
-   _mesa_free(tmpRowA);
-   _mesa_free(tmpRowB);
 
    /* Luckily we can leverage the make_2d_mipmap() function here! */
    if (border > 0) {
diff --git a/src/mesa/shader/prog_execute.c b/src/mesa/shader/prog_execute.c
index 8163ae6..a93733c 100644
--- a/src/mesa/shader/prog_execute.c
+++ b/src/mesa/shader/prog_execute.c
@@ -963,7 +963,10 @@
          {
             GLfloat a[4], result[4];
             fetch_vector1(&inst->SrcReg[0], machine, a);
-            result[0] = result[1] = result[2] = result[3] = LOG2(a[0]);
+	    /* The fast LOG2 macro doesn't meet the precision requirements.
+	     */
+            result[0] = result[1] = result[2] = result[3] =
+		(log(a[0]) * 1.442695F);
             store_vector4(inst, machine, result);
          }
          break;
@@ -1022,7 +1025,11 @@
                   GLfloat mantissa = FREXPF(t[0], &exponent);
                   q[0] = (GLfloat) (exponent - 1);
                   q[1] = (GLfloat) (2.0 * mantissa); /* map [.5, 1) -> [1, 2) */
-                  q[2] = (GLfloat) (q[0] + LOG2(q[1]));
+
+		  /* The fast LOG2 macro doesn't meet the precision
+		   * requirements.
+		   */
+                  q[2] = (log(t[0]) * 1.442695F);
                }
             }
             else {