merge from master
diff --git a/src/mesa/drivers/dri/common/dri_bufmgr.c b/src/mesa/drivers/dri/common/dri_bufmgr.c
index 370b56c..65d6545 100644
--- a/src/mesa/drivers/dri/common/dri_bufmgr.c
+++ b/src/mesa/drivers/dri/common/dri_bufmgr.c
@@ -431,7 +431,7 @@
 driBOCreateList(int target, drmBOList * list)
 {
    _glthread_LOCK_MUTEX(bmMutex);
-   BM_CKFATAL(drmBOCreateList(20, list));
+   BM_CKFATAL(drmBOCreateList(target, list));
    _glthread_UNLOCK_MUTEX(bmMutex);
 }
 
diff --git a/src/mesa/drivers/dri/i810/i810context.c b/src/mesa/drivers/dri/i810/i810context.c
index db8f7a1..f0332d9 100644
--- a/src/mesa/drivers/dri/i810/i810context.c
+++ b/src/mesa/drivers/dri/i810/i810context.c
@@ -65,6 +65,7 @@
 
 #define need_GL_ARB_multisample
 #define need_GL_ARB_texture_compression
+#define need_GL_ARB_vertex_buffer_object
 #include "extension_helper.h"
 
 #ifndef I810_DEBUG
@@ -129,6 +130,7 @@
     { "GL_ARB_texture_env_combine",        NULL },
     { "GL_ARB_texture_env_crossbar",       NULL },
     { "GL_ARB_texture_mirrored_repeat",    NULL },
+    { "GL_ARB_vertex_buffer_object",       GL_ARB_vertex_buffer_object_functions },
     { "GL_EXT_stencil_wrap",               NULL },
     { "GL_EXT_texture_edge_clamp",         NULL },
     { "GL_EXT_texture_env_combine",        NULL },
diff --git a/src/mesa/drivers/dri/i915/i915_reg.h b/src/mesa/drivers/dri/i915/i915_reg.h
index 3ba792a..694cd4c 100644
--- a/src/mesa/drivers/dri/i915/i915_reg.h
+++ b/src/mesa/drivers/dri/i915/i915_reg.h
@@ -138,7 +138,7 @@
 
 
 /* 3DSTATE_DEPTH_SUBRECT_DISABLE, p160 */
-#define _3DSTATE_DEPTH_SUBRECT_DISABLE    (CMD_3D | (0x1c<<24) | (0x11<19) | 0x2)
+#define _3DSTATE_DEPTH_SUBRECT_DISABLE    (CMD_3D | (0x1c<<24) | (0x11<<19) | 0x2)
 
 /* p161 */
 #define _3DSTATE_DST_BUF_VARS_CMD	(CMD_3D | (0x1d<<24) | (0x85<<16))
diff --git a/src/mesa/drivers/dri/i915/i915_state.c b/src/mesa/drivers/dri/i915/i915_state.c
index 5692289..d126208 100644
--- a/src/mesa/drivers/dri/i915/i915_state.c
+++ b/src/mesa/drivers/dri/i915/i915_state.c
@@ -813,7 +813,7 @@
 				       I1_LOAD_S(4) |
 				       I1_LOAD_S(5) |
 				       I1_LOAD_S(6) | 
-				       (4));
+				       (3));
       i915->state.Ctx[I915_CTXREG_LIS2] = 0;
       i915->state.Ctx[I915_CTXREG_LIS4] = 0;
       i915->state.Ctx[I915_CTXREG_LIS5] = 0;
diff --git a/src/mesa/drivers/dri/i915/i915_vtbl.c b/src/mesa/drivers/dri/i915/i915_vtbl.c
index 82a421b..c920fc4 100644
--- a/src/mesa/drivers/dri/i915/i915_vtbl.c
+++ b/src/mesa/drivers/dri/i915/i915_vtbl.c
@@ -180,7 +180,7 @@
     */
    OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | 
 	     I1_LOAD_S(3) |
-	     (1));
+	     (0));
    OUT_BATCH(0);
  
    /* XXX: Use this */
diff --git a/src/mesa/drivers/dri/i915tex/i915_reg.h b/src/mesa/drivers/dri/i915tex/i915_reg.h
index 04b1999..34c6821 100644
--- a/src/mesa/drivers/dri/i915tex/i915_reg.h
+++ b/src/mesa/drivers/dri/i915tex/i915_reg.h
@@ -138,7 +138,7 @@
 
 
 /* 3DSTATE_DEPTH_SUBRECT_DISABLE, p160 */
-#define _3DSTATE_DEPTH_SUBRECT_DISABLE    (CMD_3D | (0x1c<<24) | (0x11<19) | 0x2)
+#define _3DSTATE_DEPTH_SUBRECT_DISABLE    (CMD_3D | (0x1c<<24) | (0x11<<19) | 0x2)
 
 /* p161 */
 #define _3DSTATE_DST_BUF_VARS_CMD	(CMD_3D | (0x1d<<24) | (0x85<<16))
diff --git a/src/mesa/drivers/dri/i915tex/i915_state.c b/src/mesa/drivers/dri/i915tex/i915_state.c
index 4816afc..c2c2ff3 100644
--- a/src/mesa/drivers/dri/i915tex/i915_state.c
+++ b/src/mesa/drivers/dri/i915tex/i915_state.c
@@ -859,7 +859,7 @@
       i915->state.Ctx[I915_CTXREG_LI] = (_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
                                          I1_LOAD_S(2) |
                                          I1_LOAD_S(4) |
-                                         I1_LOAD_S(5) | I1_LOAD_S(6) | (4));
+                                         I1_LOAD_S(5) | I1_LOAD_S(6) | (3));
       i915->state.Ctx[I915_CTXREG_LIS2] = 0;
       i915->state.Ctx[I915_CTXREG_LIS4] = 0;
       i915->state.Ctx[I915_CTXREG_LIS5] = 0;
diff --git a/src/mesa/drivers/dri/i915tex/intel_buffers.c b/src/mesa/drivers/dri/i915tex/intel_buffers.c
index 45fd2fa..62ff54b 100644
--- a/src/mesa/drivers/dri/i915tex/intel_buffers.c
+++ b/src/mesa/drivers/dri/i915tex/intel_buffers.c
@@ -327,6 +327,9 @@
 	 }
 
 	 for (i = 0; i < intel_fb->pf_num_pages; i++) {
+	    if (!intel_fb->color_rb[i])
+	       continue;
+
 	    vbl.request.sequence = intel_fb->color_rb[i]->vbl_pending;
 	    drmWaitVBlank(intel->driFd, &vbl);
 	 }
@@ -336,7 +339,8 @@
 	 intel_fb->vbl_waited = intel_fb->vbl_seq;
 
 	 for (i = 0; i < intel_fb->pf_num_pages; i++) {
-	    intel_fb->color_rb[i]->vbl_pending = intel_fb->vbl_waited;
+	    if (intel_fb->color_rb[i])
+	       intel_fb->color_rb[i]->vbl_pending = intel_fb->vbl_waited;
 	 }
       }
    } else {
diff --git a/src/mesa/drivers/dri/i915tex/intel_context.c b/src/mesa/drivers/dri/i915tex/intel_context.c
index 5c2cdf0..acda7b1 100644
--- a/src/mesa/drivers/dri/i915tex/intel_context.c
+++ b/src/mesa/drivers/dri/i915tex/intel_context.c
@@ -581,11 +581,7 @@
       }
 
       /* set GLframebuffer size to match window, if needed */
-      if (intel_fb->Base.Width != driDrawPriv->w) {
-         _mesa_resize_framebuffer(&intel->ctx, &intel_fb->Base,
-                                  driDrawPriv->w, driDrawPriv->h);
-      }         
-      if (readFb->Width != driReadPriv->w) {
+      if (driReadPriv != driDrawPriv && readFb->Width != driReadPriv->w) {
          _mesa_resize_framebuffer(&intel->ctx, readFb,
                                   driReadPriv->w, driReadPriv->h);
       }         
diff --git a/src/mesa/drivers/dri/i965/brw_tex.c b/src/mesa/drivers/dri/i965/brw_tex.c
index c3ffa9e..9d4b986 100644
--- a/src/mesa/drivers/dri/i965/brw_tex.c
+++ b/src/mesa/drivers/dri/i965/brw_tex.c
@@ -148,8 +148,9 @@
          return &_mesa_texformat_ycbcr_rev;
 
    case GL_COMPRESSED_RGB_FXT1_3DFX:
+       return &_mesa_texformat_rgb_fxt1;
    case GL_COMPRESSED_RGBA_FXT1_3DFX:
-     return &_mesa_texformat_rgb_fxt1;
+       return &_mesa_texformat_rgba_fxt1;
 
    case GL_RGB_S3TC:
    case GL_RGB4_S3TC:
diff --git a/src/mesa/drivers/dri/i965/brw_vs_tnl.c b/src/mesa/drivers/dri/i965/brw_vs_tnl.c
index 21e961c..35adc48 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_tnl.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_tnl.c
@@ -849,14 +849,13 @@
    /* Calculate spot attenuation:
     */
    if (!p->state->unit[i].light_spotcutoff_is_180) {
-      struct ureg spot_dir = register_param3(p, STATE_LIGHT, i,
-					     STATE_SPOT_DIRECTION);
+      struct ureg spot_dir_norm = register_param3(p, STATE_INTERNAL,
+						  STATE_SPOT_DIR_NORMALIZED, i);
       struct ureg spot = get_temp(p);
       struct ureg slt = get_temp(p);
-	       
-      emit_normalize_vec3( p, spot, spot_dir ); /* XXX: precompute! */
-      emit_op2(p, OPCODE_DP3, spot, 0, ureg_negate(VPpli), spot);
-      emit_op2(p, OPCODE_SLT, slt, 0, swizzle1(spot_dir,W), spot);
+
+      emit_op2(p, OPCODE_DP3, spot, 0, ureg_negate(VPpli), spot_dir_norm);
+      emit_op2(p, OPCODE_SLT, slt, 0, swizzle1(spot_dir_norm,W), spot);
       emit_op2(p, OPCODE_POW, spot, 0, spot, swizzle1(attenuation, W));
       emit_op2(p, OPCODE_MUL, att, 0, slt, spot);
 
@@ -894,7 +893,7 @@
 
 
 /* Need to add some addtional parameters to allow lighting in object
- * space - STATE_SPOT_DIRECTION and STATE_HALF implicitly assume eye
+ * space - STATE_SPOT_DIRECTION and STATE_HALF_VECTOR implicitly assume eye
  * space lighting.
  */
 static void build_lighting( struct tnl_program *p )
@@ -987,7 +986,14 @@
 	     */
 	    VPpli = register_param3(p, STATE_LIGHT, i, 
 				    STATE_POSITION_NORMALIZED); 
-	    half = register_param3(p, STATE_LIGHT, i, STATE_HALF_VECTOR);
+            if (p->state->light_local_viewer) {
+                struct ureg eye_hat = get_eye_position_normalized(p);
+                half = get_temp(p);
+                emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat);
+                emit_normalize_vec3(p, half, half);
+            } else {
+                half = register_param3(p, STATE_LIGHT, i, STATE_HALF_VECTOR);
+            }
 	 } 
 	 else {
 	    struct ureg Ppli = register_param3(p, STATE_LIGHT, i, 
@@ -1158,7 +1164,8 @@
 
    if (p->state->fog_option &&
        p->state->tnl_do_vertex_fog) {
-      struct ureg params = register_param1(p, STATE_FOG_PARAMS);
+      struct ureg params = register_param2(p, STATE_INTERNAL,
+					   STATE_FOG_PARAMS_OPTIMIZED);
       struct ureg tmp = get_temp(p);
       struct ureg id = get_identity_param(p);
 
@@ -1166,24 +1173,21 @@
 
       switch (p->state->fog_option) {
       case FOG_LINEAR: {
-	 emit_op1(p, OPCODE_ABS, tmp, 0, input); 
-	 emit_op2(p, OPCODE_SUB, tmp, 0, swizzle1(params,Z), tmp); 
-	 emit_op2(p, OPCODE_MUL, tmp, 0, tmp, swizzle1(params,W)); 
+	 emit_op1(p, OPCODE_ABS, tmp, 0, input);
+	 emit_op3(p, OPCODE_MAD, tmp, 0, tmp, swizzle1(params,X), swizzle1(params,Y));
 	 emit_op2(p, OPCODE_MAX, tmp, 0, tmp, swizzle1(id,X)); /* saturate */
 	 emit_op2(p, OPCODE_MIN, fog, WRITEMASK_X, tmp, swizzle1(id,W));
 	 break;
       }
       case FOG_EXP:
 	 emit_op1(p, OPCODE_ABS, tmp, 0, input); 
-	 emit_op2(p, OPCODE_MUL, tmp, 0, tmp, swizzle1(params,X)); 
-	 emit_op2(p, OPCODE_POW, fog, WRITEMASK_X, 
-		  register_const1f(p, M_E), ureg_negate(tmp)); 
+	 emit_op2(p, OPCODE_MUL, tmp, 0, tmp, swizzle1(params,Z));
+	 emit_op1(p, OPCODE_EX2, fog, WRITEMASK_X, ureg_negate(tmp));
 	 break;
       case FOG_EXP2:
-	 emit_op2(p, OPCODE_MUL, tmp, 0, input, swizzle1(params,X)); 
+	 emit_op2(p, OPCODE_MUL, tmp, 0, input, swizzle1(params,W));
 	 emit_op2(p, OPCODE_MUL, tmp, 0, tmp, tmp); 
-	 emit_op2(p, OPCODE_POW, fog, WRITEMASK_X, 
-		  register_const1f(p, M_E), ureg_negate(tmp)); 
+	 emit_op1(p, OPCODE_EX2, fog, WRITEMASK_X, ureg_negate(tmp));
 	 break;
       }
       
diff --git a/src/mesa/drivers/dri/mach64/mach64_context.c b/src/mesa/drivers/dri/mach64/mach64_context.c
index 5a6c301..ad661e1 100644
--- a/src/mesa/drivers/dri/mach64/mach64_context.c
+++ b/src/mesa/drivers/dri/mach64/mach64_context.c
@@ -58,6 +58,7 @@
 #include "vblank.h"
 
 #define need_GL_ARB_multisample
+#define need_GL_ARB_vertex_buffer_object
 #include "extension_helper.h"
 
 #ifndef MACH64_DEBUG
@@ -83,6 +84,7 @@
 {
     { "GL_ARB_multisample",                GL_ARB_multisample_functions },
     { "GL_ARB_multitexture",               NULL },
+    { "GL_ARB_vertex_buffer_object",       GL_ARB_vertex_buffer_object_functions },
     { "GL_EXT_texture_edge_clamp",         NULL },
     { "GL_MESA_ycbcr_texture",             NULL },
     { "GL_SGIS_generate_mipmap",           NULL },
diff --git a/src/mesa/drivers/dri/mga/mga_xmesa.c b/src/mesa/drivers/dri/mga/mga_xmesa.c
index 67a6f8b..ca2c8fa 100644
--- a/src/mesa/drivers/dri/mga/mga_xmesa.c
+++ b/src/mesa/drivers/dri/mga/mga_xmesa.c
@@ -72,6 +72,7 @@
 
 #define need_GL_ARB_multisample
 #define need_GL_ARB_texture_compression
+#define need_GL_ARB_vertex_buffer_object
 #define need_GL_ARB_vertex_program
 #define need_GL_EXT_fog_coord
 #define need_GL_EXT_multi_draw_arrays
@@ -401,6 +402,7 @@
    { "GL_ARB_multisample",            GL_ARB_multisample_functions },
    { "GL_ARB_texture_compression",    GL_ARB_texture_compression_functions },
    { "GL_ARB_texture_rectangle",      NULL },
+   { "GL_ARB_vertex_buffer_object",   GL_ARB_vertex_buffer_object_functions },
    { "GL_EXT_blend_logic_op",         NULL },
    { "GL_EXT_fog_coord",              GL_EXT_fog_coord_functions },
    { "GL_EXT_multi_draw_arrays",      GL_EXT_multi_draw_arrays_functions },
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_context.c b/src/mesa/drivers/dri/nouveau/nouveau_context.c
index 627679a..8e11eb6 100644
--- a/src/mesa/drivers/dri/nouveau/nouveau_context.c
+++ b/src/mesa/drivers/dri/nouveau/nouveau_context.c
@@ -352,6 +352,7 @@
 		OUT_RING       (((box->y2 - box->y1) << 16) |
 				(box->x2 - box->x1));
 	}
+	FIRE_RING();
 
 	UNLOCK_HARDWARE(nmesa);
 #endif
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_context.h b/src/mesa/drivers/dri/nouveau/nouveau_context.h
index f79a867..87e4479 100644
--- a/src/mesa/drivers/dri/nouveau/nouveau_context.h
+++ b/src/mesa/drivers/dri/nouveau/nouveau_context.h
@@ -45,6 +45,7 @@
 #include "xmlconfig.h"
 
 typedef struct nouveau_fifo_t{
+	int channel;
 	u_int32_t* buffer;
 	u_int32_t* mmio;
 	u_int32_t put_base;
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_fifo.c b/src/mesa/drivers/dri/nouveau/nouveau_fifo.c
index 67b5aa4..bd2b2ed 100644
--- a/src/mesa/drivers/dri/nouveau/nouveau_fifo.c
+++ b/src/mesa/drivers/dri/nouveau/nouveau_fifo.c
@@ -124,6 +124,7 @@
 	}
 
 	/* Setup our initial FIFO tracking params */
+	nmesa->fifo.channel  = fifo_init.channel;
 	nmesa->fifo.put_base = fifo_init.put_base;
 	nmesa->fifo.current  = 0;
 	nmesa->fifo.put      = 0;
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_object.c b/src/mesa/drivers/dri/nouveau/nouveau_object.c
index 468b18e..b71acff 100644
--- a/src/mesa/drivers/dri/nouveau/nouveau_object.c
+++ b/src/mesa/drivers/dri/nouveau/nouveau_object.c
@@ -10,8 +10,9 @@
 	drm_nouveau_object_init_t cto;
 	int ret;
 
-	cto.handle = handle;
-	cto.class  = class;
+	cto.channel = nmesa->fifo.channel;
+	cto.handle  = handle;
+	cto.class   = class;
 	ret = drmCommandWrite(nmesa->driFd, DRM_NOUVEAU_OBJECT_INIT, &cto, sizeof(cto));
 
 	return ret == 0;
@@ -28,12 +29,13 @@
 	drm_nouveau_dma_object_init_t dma;
 	int ret;
 
-	dma.class  = class;
-	dma.handle = handle;
-	dma.target = target;
-	dma.access = access;
-	dma.offset = offset;
-	dma.size   = size;
+	dma.channel = nmesa->fifo.channel;
+	dma.class   = class;
+	dma.handle  = handle;
+	dma.target  = target;
+	dma.access  = access;
+	dma.offset  = offset;
+	dma.size    = size;
 	ret = drmCommandWriteRead(nmesa->driFd, DRM_NOUVEAU_DMA_OBJECT_INIT,
 				  &dma, sizeof(dma));
 	return ret == 0;
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_screen.c b/src/mesa/drivers/dri/nouveau/nouveau_screen.c
index e00080f..65bde99 100644
--- a/src/mesa/drivers/dri/nouveau/nouveau_screen.c
+++ b/src/mesa/drivers/dri/nouveau/nouveau_screen.c
@@ -328,7 +328,7 @@
 	static const __DRIversion ddx_expected = { 1, 2, 0 };
 	static const __DRIversion dri_expected = { 4, 0, 0 };
 	static const __DRIversion drm_expected = { 0, 0, NOUVEAU_DRM_HEADER_PATCHLEVEL };
-#if NOUVEAU_DRM_HEADER_PATCHLEVEL != 4
+#if NOUVEAU_DRM_HEADER_PATCHLEVEL != 5
 #error nouveau_drm.h version doesn't match expected version
 #endif
 	dri_interface = interface;
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_shader.c b/src/mesa/drivers/dri/nouveau/nouveau_shader.c
index bee8d5a..b6837c5 100644
--- a/src/mesa/drivers/dri/nouveau/nouveau_shader.c
+++ b/src/mesa/drivers/dri/nouveau/nouveau_shader.c
@@ -49,6 +49,7 @@
 static void
 nouveauBindProgram(GLcontext *ctx, GLenum target, struct gl_program *prog)
 {
+   NVSDBG("target=%s, prog=%p\n", _mesa_lookup_enum_by_nr(target), prog);
 }
 
 static struct gl_program *
@@ -56,7 +57,10 @@
 {
    nouveauShader *nvs;
 
+   NVSDBG("target=%s, id=%d\n", _mesa_lookup_enum_by_nr(target), id);
+
    nvs = CALLOC_STRUCT(_nouveauShader);
+   NVSDBG("prog=%p\n", nvs);
    switch (target) {
    case GL_VERTEX_PROGRAM_ARB:
       return _mesa_init_vertex_program(ctx, &nvs->mesa.vp, target, id);
@@ -76,6 +80,8 @@
 {
    nouveauShader *nvs = (nouveauShader *)prog;
 
+   NVSDBG("prog=%p\n", prog);
+
    if (nvs->translated)
       FREE(nvs->program);
    _mesa_delete_program(ctx, prog);
@@ -87,9 +93,13 @@
 {
    nouveauShader *nvs = (nouveauShader *)prog;
 
+   NVSDBG("target=%s, prog=%p\n", _mesa_lookup_enum_by_nr(target), prog);
+
    if (nvs->translated)
       FREE(nvs->program);
-   nvs->translated = 0;
+
+   nvs->error      = GL_FALSE;
+   nvs->translated = GL_FALSE;
 
    _tnl_program_string(ctx, target, prog);
 }
@@ -99,6 +109,8 @@
 {
    nouveauShader *nvs = (nouveauShader *)prog;
 
+   NVSDBG("target=%s, prog=%p\n", _mesa_lookup_enum_by_nr(target), prog);
+
    return nvs->translated;
 }
 
@@ -109,6 +121,8 @@
    struct gl_program_parameter_list *plist;
    int i;
 
+   NVSDBG("prog=%p\n", nvs);
+
    /* Translate to HW format now if necessary */
    if (!nvs->translated) {
       /* Mesa ASM shader -> nouveauShader */
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_shader.h b/src/mesa/drivers/dri/nouveau/nouveau_shader.h
index b2df354..7125a2a 100644
--- a/src/mesa/drivers/dri/nouveau/nouveau_shader.h
+++ b/src/mesa/drivers/dri/nouveau/nouveau_shader.h
@@ -4,6 +4,12 @@
 #include "mtypes.h"
 #include "bufferobj.h"
 
+#define NVSDBG(fmt, args...) do {                             \
+	if (NOUVEAU_DEBUG & DEBUG_SHADERS) {                  \
+		fprintf(stderr, "%s: "fmt, __func__, ##args); \
+	}                                                     \
+} while(0)
+
 typedef struct _nvsFunc nvsFunc;
 
 #define NVS_MAX_TEMPS   32
@@ -45,6 +51,7 @@
    nvsFunc *func;
 
    /* State of the final program */
+   GLboolean error;
    GLboolean translated;
    GLboolean on_hardware;
    unsigned int *program;
@@ -418,6 +425,12 @@
    return reg;
 }
 
+#define nvsProgramError(nvs,fmt,args...) do {                           \
+	fprintf(stderr, "nvsProgramError (%s): "fmt, __func__, ##args); \
+	(nvs)->error = GL_TRUE;                                         \
+	(nvs)->translated = GL_FALSE;                                   \
+} while(0)
+
 extern GLboolean nvsUpdateShader(GLcontext *ctx, nouveauShader *nvs);
 extern void nvsDisasmHWShader(nvsPtr);
 extern void nvsDumpFragmentList(nvsFragmentHeader *f, int lvl);
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c b/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c
index abba59d..8c203cc 100644
--- a/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c
+++ b/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c
@@ -264,6 +264,23 @@
 	return mask;
 }
 
+static GLboolean
+pass0_opcode_is_tex(enum prog_opcode op)
+{
+	switch (op) {
+	case OPCODE_TEX:
+	case OPCODE_TXB:
+	case OPCODE_TXD:
+	case OPCODE_TXL:
+	case OPCODE_TXP:
+		return GL_TRUE;
+	default:
+		break;
+	}
+
+	return GL_FALSE;
+}
+
 static nvsTexTarget
 pass0_make_tex_target(GLuint mesa)
 {
@@ -724,7 +741,11 @@
 				(inst->SaturateMode != SATURATE_OFF),
 				src[0], src[1], src[2]);
 		nvsinst->tex_unit   = inst->TexSrcUnit;
-		nvsinst->tex_target = pass0_make_tex_target(inst->TexSrcTarget);
+		if (pass0_opcode_is_tex(inst->Opcode))
+			nvsinst->tex_target =
+				pass0_make_tex_target(inst->TexSrcTarget);
+		else
+			nvsinst->tex_target = NVS_TEX_TARGET_UNKNOWN;
 
 		ret = GL_TRUE;
 	} else
@@ -907,7 +928,7 @@
 	}
 }
 
-static void
+static GLboolean
 pass0_resolve_mesa_consts(nouveauShader *nvs)
 {
 	struct pass0_rec *rec = nvs->pass_rec;
@@ -928,6 +949,11 @@
 	for (i=0; i<plist->NumParameters; i++) {
 		int hw = rec->mesa_const_base + i;
 
+		if (hw > NVS_MAX_CONSTS) {
+			nvsProgramError(nvs, "hw = %d > NVS_MAX_CONSTS!\n", hw);
+			return GL_FALSE;
+		}
+
 		switch (plist->Parameters[i].Type) {
 		case PROGRAM_NAMED_PARAM:
 		case PROGRAM_STATE_VAR:
@@ -941,10 +967,13 @@
 			COPY_4V(nvs->params[hw].val, plist->ParameterValues[i]);
 			break;
 		default:
-			assert(0);
-			break;
+			nvsProgramError(nvs, "hit bad type=%d on param %d\n",
+					plist->Parameters[i].Type, i);
+			return GL_FALSE;
 		}
 	}
+
+	return GL_TRUE;
 }
 
 GLboolean
@@ -957,6 +986,16 @@
 	struct pass0_rec *rec;
 	int ret = GL_FALSE;
 
+	NVSDBG("start: nvs=%p\n", nvs);
+
+	/* Previously detected an error, and haven't recieved new program
+	 * string, so fail immediately.
+	 */
+	if (nvs->error) {
+		NVSDBG("failed previous compile attempt, not retrying\n");
+		return GL_FALSE;
+	}
+
 	rec = CALLOC_STRUCT(pass0_rec);
 	if (!rec)
 		return GL_FALSE;
@@ -1001,7 +1040,8 @@
 
 	ret = pass0_translate_instructions(nvs, 0, 0, nvs->program_tree);
 	if (ret)
-		pass0_resolve_mesa_consts(nvs);
+		ret = pass0_resolve_mesa_consts(nvs);	
+	
 	/*XXX: if (!ret) DESTROY TREE!!! */
 
 	FREE(rec);
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_shader_1.c b/src/mesa/drivers/dri/nouveau/nouveau_shader_1.c
index 90c57d3..78c1401 100644
--- a/src/mesa/drivers/dri/nouveau/nouveau_shader_1.c
+++ b/src/mesa/drivers/dri/nouveau/nouveau_shader_1.c
@@ -2,11 +2,13 @@
 #include "macros.h"
 #include "enums.h"
 
+#include "nouveau_context.h"
 #include "nouveau_shader.h"
 
 GLboolean
 nouveau_shader_pass1(nvsPtr nvs)
 {
+   NVSDBG("start: nvs=%p\n", nvs);
 
    return GL_TRUE;
 }
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_shader_2.c b/src/mesa/drivers/dri/nouveau/nouveau_shader_2.c
index 6eb9de4..cd27dac 100644
--- a/src/mesa/drivers/dri/nouveau/nouveau_shader_2.c
+++ b/src/mesa/drivers/dri/nouveau/nouveau_shader_2.c
@@ -210,6 +210,8 @@
 	struct pass2_rec *rec;
 	int i;
 
+	NVSDBG("start: nvs=%p\n", nvs);
+
 	rec = calloc(1, sizeof(struct pass2_rec));
 	for (i=0; i<NVS_MAX_TEMPS; i++)
 		rec->temps[i] = -1;
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_sync.c b/src/mesa/drivers/dri/nouveau/nouveau_sync.c
index 428b19b..30e6696 100644
--- a/src/mesa/drivers/dri/nouveau/nouveau_sync.c
+++ b/src/mesa/drivers/dri/nouveau/nouveau_sync.c
@@ -124,7 +124,7 @@
 	while (time <= timeout) {
 		if (n[NV_NOTIFY_STATE/4] & NV_NOTIFY_STATE_ERROR_CODE_MASK) {
 			MESSAGE("Notifier returned error: 0x%04x\n",
-					n[NV_NOTIFY_STATE] &
+					n[NV_NOTIFY_STATE/4] &
 					NV_NOTIFY_STATE_ERROR_CODE_MASK);
 			return GL_FALSE;
 		}
diff --git a/src/mesa/drivers/dri/r128/r128_context.c b/src/mesa/drivers/dri/r128/r128_context.c
index 89ddafa..95e54a6 100644
--- a/src/mesa/drivers/dri/r128/r128_context.c
+++ b/src/mesa/drivers/dri/r128/r128_context.c
@@ -68,6 +68,7 @@
 
 #define need_GL_ARB_multisample
 #define need_GL_ARB_texture_compression
+#define need_GL_ARB_vertex_buffer_object
 #define need_GL_EXT_blend_minmax
 #define need_GL_EXT_fog_coord
 #define need_GL_EXT_secondary_color
@@ -80,6 +81,7 @@
     { "GL_ARB_texture_compression",        GL_ARB_texture_compression_functions },
     { "GL_ARB_texture_env_add",            NULL },
     { "GL_ARB_texture_mirrored_repeat",    NULL },
+    { "GL_ARB_vertex_buffer_object",       GL_ARB_vertex_buffer_object_functions },
     { "GL_EXT_blend_subtract",             GL_EXT_blend_minmax_functions },
     { "GL_EXT_fog_coord",                  GL_EXT_fog_coord_functions },
     { "GL_EXT_texture_edge_clamp",         NULL },
diff --git a/src/mesa/drivers/dri/r200/r200_context.c b/src/mesa/drivers/dri/r200/r200_context.c
index fc6eb93..3abcdf9 100644
--- a/src/mesa/drivers/dri/r200/r200_context.c
+++ b/src/mesa/drivers/dri/r200/r200_context.c
@@ -673,11 +673,13 @@
 				&newCtx->vbl_seq );
       }
 
-      if ( newCtx->dri.drawable != driDrawPriv ||
-           newCtx->dri.readable != driReadPriv ) {
-	 newCtx->dri.drawable = driDrawPriv;
-	 newCtx->dri.readable = driReadPriv;
+      newCtx->dri.readable = driReadPriv;
 
+      if ( newCtx->dri.drawable != driDrawPriv ||
+           newCtx->lastStamp != driDrawPriv->lastStamp ) {
+	 newCtx->dri.drawable = driDrawPriv;
+
+	 r200SetCliprects(newCtx);
 	 r200UpdateWindow( newCtx->glCtx );
 	 r200UpdateViewportOffset( newCtx->glCtx );
       }
diff --git a/src/mesa/drivers/dri/r200/r200_lock.c b/src/mesa/drivers/dri/r200/r200_lock.c
index bcc0c91..9ffdb2b 100644
--- a/src/mesa/drivers/dri/r200/r200_lock.c
+++ b/src/mesa/drivers/dri/r200/r200_lock.c
@@ -92,13 +92,9 @@
 
    if ( rmesa->lastStamp != drawable->lastStamp ) {
       r200UpdatePageFlipping( rmesa );
-      if (rmesa->glCtx->DrawBuffer->_ColorDrawBufferMask[0] == BUFFER_BIT_BACK_LEFT)
-         r200SetCliprects( rmesa, GL_BACK_LEFT );
-      else
-         r200SetCliprects( rmesa, GL_FRONT_LEFT );
+      r200SetCliprects( rmesa );
       r200UpdateViewportOffset( rmesa->glCtx );
       driUpdateFramebufferSize(rmesa->glCtx, drawable);
-      rmesa->lastStamp = drawable->lastStamp;
    }
 
    R200_STATECHANGE( rmesa, ctx );
diff --git a/src/mesa/drivers/dri/r200/r200_state.c b/src/mesa/drivers/dri/r200/r200_state.c
index bdb487f..16726d7 100644
--- a/src/mesa/drivers/dri/r200/r200_state.c
+++ b/src/mesa/drivers/dri/r200/r200_state.c
@@ -1691,6 +1691,11 @@
 #define SUBPIXEL_X 0.125
 #define SUBPIXEL_Y 0.125
 
+
+/**
+ * Called when window size or position changes or viewport or depth range
+ * state is changed.  We update the hardware viewport state here.
+ */
 void r200UpdateWindow( GLcontext *ctx )
 {
    r200ContextPtr rmesa = R200_CONTEXT(ctx);
@@ -1843,19 +1848,18 @@
 }
 
 
-void r200SetCliprects( r200ContextPtr rmesa, GLenum mode )
+/*
+ * Set up the cliprects for either front or back-buffer drawing.
+ */
+void r200SetCliprects( r200ContextPtr rmesa )
 {
    __DRIdrawablePrivate *const drawable = rmesa->dri.drawable;
    __DRIdrawablePrivate *const readable = rmesa->dri.readable;
    GLframebuffer *const draw_fb = (GLframebuffer*) drawable->driverPrivate;
    GLframebuffer *const read_fb = (GLframebuffer*) readable->driverPrivate;
 
-   switch ( mode ) {
-   case GL_FRONT_LEFT:
-      rmesa->numClipRects = drawable->numClipRects;
-      rmesa->pClipRects = drawable->pClipRects;
-      break;
-   case GL_BACK_LEFT:
+   if (draw_fb->_ColorDrawBufferMask[0]
+       == BUFFER_BIT_BACK_LEFT) {
       /* Can't ignore 2d windows if we are page flipping.
        */
       if ( drawable->numBackClipRects == 0 || rmesa->doPageFlip ) {
@@ -1866,11 +1870,12 @@
          rmesa->numClipRects = drawable->numBackClipRects;
          rmesa->pClipRects = drawable->pBackClipRects;
       }
-      break;
-   default:
-      fprintf(stderr, "bad mode in r200SetCliprects\n");
-      return;
    }
+   else {
+     /* front buffer (or none, or multiple buffers) */
+     rmesa->numClipRects = drawable->numClipRects;
+     rmesa->pClipRects = drawable->pClipRects;
+  }
 
    if ((draw_fb->Width != drawable->w) || (draw_fb->Height != drawable->h)) {
       _mesa_resize_framebuffer(rmesa->glCtx, draw_fb,
@@ -1889,6 +1894,8 @@
 
    if (rmesa->state.scissor.enabled)
       r200RecalcScissorRects( rmesa );
+
+   rmesa->lastStamp = drawable->lastStamp;
 }
 
 
@@ -1908,19 +1915,17 @@
     */
    switch ( ctx->DrawBuffer->_ColorDrawBufferMask[0] ) {
    case BUFFER_BIT_FRONT_LEFT:
-      FALLBACK( rmesa, R200_FALLBACK_DRAW_BUFFER, GL_FALSE );
-      r200SetCliprects( rmesa, GL_FRONT_LEFT );
-      break;
    case BUFFER_BIT_BACK_LEFT:
       FALLBACK( rmesa, R200_FALLBACK_DRAW_BUFFER, GL_FALSE );
-      r200SetCliprects( rmesa, GL_BACK_LEFT );
       break;
    default:
-      /* GL_NONE or GL_FRONT_AND_BACK or stereo left&right, etc */
+      /* 0 (GL_NONE) buffers or multiple color drawing buffers */
       FALLBACK( rmesa, R200_FALLBACK_DRAW_BUFFER, GL_TRUE );
       return;
    }
 
+   r200SetCliprects( rmesa );
+
    /* We'll set the drawing engine's offset/pitch parameters later
     * when we update other state.
     */
diff --git a/src/mesa/drivers/dri/r200/r200_state.h b/src/mesa/drivers/dri/r200/r200_state.h
index 98c6fbe..f34090b 100644
--- a/src/mesa/drivers/dri/r200/r200_state.h
+++ b/src/mesa/drivers/dri/r200/r200_state.h
@@ -44,7 +44,7 @@
 
 extern void r200UpdateMaterial( GLcontext *ctx );
 
-extern void r200SetCliprects( r200ContextPtr rmesa, GLenum mode );
+extern void r200SetCliprects( r200ContextPtr rmesa );
 extern void r200RecalcScissorRects( r200ContextPtr rmesa );
 extern void r200UpdateViewportOffset( GLcontext *ctx );
 extern void r200UpdateWindow( GLcontext *ctx );
diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile
index 2ee2328..d4bf0ae 100644
--- a/src/mesa/drivers/dri/r300/Makefile
+++ b/src/mesa/drivers/dri/r300/Makefile
@@ -39,7 +39,7 @@
 		 r300_texmem.c \
 		 r300_tex.c \
 		 r300_texstate.c \
-		 r300_vertexprog.c \
+		 r300_vertprog.c \
 		 r300_fragprog.c \
 		 r300_shader.c \
 		 r300_maos.c \
diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c
index 2c7b5aa..0fb2e5a 100644
--- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c
+++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c
@@ -174,11 +174,12 @@
 	dest ++;
 	r300->cmdbuf.count_used ++;
 
+	/* Emit cache flush */
 	*dest = cmdpacket0(R300_TX_CNTL, 1);
 	dest ++;
 	r300->cmdbuf.count_used ++;
 	
-	*dest = 0x0;
+	*dest = R300_TX_FLUSH;
 	dest ++;
 	r300->cmdbuf.count_used ++;
 	
@@ -291,14 +292,14 @@
 	/* Initialize state atoms */
 	ALLOC_STATE( vpt, always, R300_VPT_CMDSIZE, "vpt", 0 );
 		r300->hw.vpt.cmd[R300_VPT_CMD_0] = cmdpacket0(R300_SE_VPORT_XSCALE, 6);
-	ALLOC_STATE( unk2080, always, 2, "unk2080", 0 );
-		r300->hw.unk2080.cmd[0] = cmdpacket0(R300_VAP_CNTL, 1);
+	ALLOC_STATE( vap_cntl, always, 2, "vap_cntl", 0 );
+		r300->hw.vap_cntl.cmd[0] = cmdpacket0(R300_VAP_CNTL, 1);
 	ALLOC_STATE( vte, always, 3, "vte", 0 );
 		r300->hw.vte.cmd[0] = cmdpacket0(R300_SE_VTE_CNTL, 2);
 	ALLOC_STATE( unk2134, always, 3, "unk2134", 0 );
 		r300->hw.unk2134.cmd[0] = cmdpacket0(0x2134, 2);
-	ALLOC_STATE( unk2140, always, 2, "unk2140", 0 );
-		r300->hw.unk2140.cmd[0] = cmdpacket0(R300_VAP_CNTL_STATUS, 1);
+	ALLOC_STATE( vap_cntl_status, always, 2, "vap_cntl_status", 0 );
+		r300->hw.vap_cntl_status.cmd[0] = cmdpacket0(R300_VAP_CNTL_STATUS, 1);
 	ALLOC_STATE( vir[0], variable, R300_VIR_CMDSIZE, "vir/0", 0 );
 		r300->hw.vir[0].cmd[R300_VIR_CMD_0] = cmdpacket0(R300_VAP_INPUT_ROUTE_0_0, 1);
 	ALLOC_STATE( vir[1], variable, R300_VIR_CMDSIZE, "vir/1", 1 );
@@ -335,18 +336,18 @@
 		r300->hw.lcntl.cmd[0] = cmdpacket0(R300_RE_LINE_CNT, 1);
 	ALLOC_STATE( unk4260, always, 4, "unk4260", 0 );
 		r300->hw.unk4260.cmd[0] = cmdpacket0(0x4260, 3);
-	ALLOC_STATE( unk4274, always, 5, "unk4274", 0 );
-		r300->hw.unk4274.cmd[0] = cmdpacket0(R300_RE_SHADE, 4);
-	ALLOC_STATE( unk4288, always, 4, "unk4288", 0 );
-		r300->hw.unk4288.cmd[0] = cmdpacket0(R300_RE_POLYGON_MODE, 3);
+	ALLOC_STATE( shade, always, 5, "shade", 0 );
+		r300->hw.shade.cmd[0] = cmdpacket0(R300_RE_SHADE, 4);
+	ALLOC_STATE( polygon_mode, always, 4, "polygon_mode", 0 );
+		r300->hw.polygon_mode.cmd[0] = cmdpacket0(R300_RE_POLYGON_MODE, 3);
 	ALLOC_STATE( fogp, always, 3, "fogp", 0 );
 		r300->hw.fogp.cmd[0] = cmdpacket0(R300_RE_FOG_SCALE, 2);
-	ALLOC_STATE( unk42A0, always, 2, "unk42A0", 0 );
-		r300->hw.unk42A0.cmd[0] = cmdpacket0(0x42A0, 1);
+	ALLOC_STATE( zbias_cntl, always, 2, "zbias_cntl", 0 );
+		r300->hw.zbias_cntl.cmd[0] = cmdpacket0(R300_RE_ZBIAS_CNTL, 1);
 	ALLOC_STATE( zbs, always, R300_ZBS_CMDSIZE, "zbs", 0 );
 		r300->hw.zbs.cmd[R300_ZBS_CMD_0] = cmdpacket0(R300_RE_ZBIAS_T_FACTOR, 4);
-	ALLOC_STATE( unk42B4, always, 2, "unk42B4", 0 );
-		r300->hw.unk42B4.cmd[0] = cmdpacket0(R300_RE_OCCLUSION_CNTL, 1);
+	ALLOC_STATE( occlusion_cntl, always, 2, "occlusion_cntl", 0 );
+		r300->hw.occlusion_cntl.cmd[0] = cmdpacket0(R300_RE_OCCLUSION_CNTL, 1);
 	ALLOC_STATE( cul, always, R300_CUL_CMDSIZE, "cul", 0 );
 		r300->hw.cul.cmd[R300_CUL_CMD_0] = cmdpacket0(R300_RE_CULL_CNTL, 1);
 	ALLOC_STATE( unk42C0, always, 3, "unk42C0", 0 );
@@ -392,8 +393,8 @@
 		r300->hw.bld.cmd[R300_BLD_CMD_0] = cmdpacket0(R300_RB3D_CBLEND, 2);
 	ALLOC_STATE( cmk, always, R300_CMK_CMDSIZE, "cmk", 0 );
 		r300->hw.cmk.cmd[R300_CMK_CMD_0] = cmdpacket0(R300_RB3D_COLORMASK, 1);
-	ALLOC_STATE( unk4E10, always, 4, "unk4E10", 0 );
-		r300->hw.unk4E10.cmd[0] = cmdpacket0(R300_RB3D_BLEND_COLOR, 3);
+	ALLOC_STATE( blend_color, always, 4, "blend_color", 0 );
+		r300->hw.blend_color.cmd[0] = cmdpacket0(R300_RB3D_BLEND_COLOR, 3);
 	ALLOC_STATE( cb, always, R300_CB_CMDSIZE, "cb", 0 );
 		r300->hw.cb.cmd[R300_CB_CMD_0] = cmdpacket0(R300_RB3D_COLOROFFSET0, 1);
 		r300->hw.cb.cmd[R300_CB_CMD_1] = cmdpacket0(R300_RB3D_COLORPITCH0, 1);
@@ -405,8 +406,8 @@
 		r300->hw.unk4EA0.cmd[0] = cmdpacket0(0x4EA0, 2);
 	ALLOC_STATE( zs, always, R300_ZS_CMDSIZE, "zstencil", 0 );
 		r300->hw.zs.cmd[R300_ZS_CMD_0] = cmdpacket0(R300_RB3D_ZSTENCIL_CNTL_0, 3);
-	ALLOC_STATE( unk4F10, always, 5, "unk4F10", 0 );
-		r300->hw.unk4F10.cmd[0] = cmdpacket0(R300_RB3D_ZSTENCIL_FORMAT, 4);
+	ALLOC_STATE( zstencil_format, always, 5, "zstencil_format", 0 );
+		r300->hw.zstencil_format.cmd[0] = cmdpacket0(R300_RB3D_ZSTENCIL_FORMAT, 4);
 	ALLOC_STATE( zb, always, R300_ZB_CMDSIZE, "zb", 0 );
 		r300->hw.zb.cmd[R300_ZB_CMD_0] = cmdpacket0(R300_RB3D_DEPTHOFFSET, 2);
 	ALLOC_STATE( unk4F28, always, 2, "unk4F28", 0 );
@@ -429,8 +430,8 @@
 	ALLOC_STATE( tex.filter, variable, mtu+1, "tex_filter", 0 );
 		r300->hw.tex.filter.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_FILTER_0, 0);
 
-	ALLOC_STATE( tex.unknown1, variable, mtu+1, "tex_unknown1", 0 );
-		r300->hw.tex.unknown1.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_FILTER1_0, 0);
+	ALLOC_STATE( tex.filter_1, variable, mtu+1, "tex_filter_1", 0 );
+		r300->hw.tex.filter_1.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_FILTER1_0, 0);
 
 	ALLOC_STATE( tex.size, variable, mtu+1, "tex_size", 0 );
 		r300->hw.tex.size.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_SIZE_0, 0);
@@ -444,8 +445,8 @@
 	ALLOC_STATE( tex.offset, variable, mtu+1, "tex_offset", 0 );
 		r300->hw.tex.offset.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_OFFSET_0, 0);
 
-	ALLOC_STATE( tex.unknown4, variable, mtu+1, "tex_unknown4", 0 );
-		r300->hw.tex.unknown4.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_CHROMA_KEY_0, 0);
+	ALLOC_STATE( tex.chroma_key, variable, mtu+1, "tex_chroma_key", 0 );
+		r300->hw.tex.chroma_key.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_CHROMA_KEY_0, 0);
 
 	ALLOC_STATE( tex.border_color, variable, mtu+1, "tex_border_color", 0 );
 		r300->hw.tex.border_color.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_BORDER_COLOR_0, 0);
@@ -456,10 +457,10 @@
 	r300->hw.atomlist.name = "atom-list";
 
 	insert_at_tail(&r300->hw.atomlist, &r300->hw.vpt);
-	insert_at_tail(&r300->hw.atomlist, &r300->hw.unk2080);
+	insert_at_tail(&r300->hw.atomlist, &r300->hw.vap_cntl);
 	insert_at_tail(&r300->hw.atomlist, &r300->hw.vte);
 	insert_at_tail(&r300->hw.atomlist, &r300->hw.unk2134);
-	insert_at_tail(&r300->hw.atomlist, &r300->hw.unk2140);
+	insert_at_tail(&r300->hw.atomlist, &r300->hw.vap_cntl_status);
 	insert_at_tail(&r300->hw.atomlist, &r300->hw.vir[0]);
 	insert_at_tail(&r300->hw.atomlist, &r300->hw.vir[1]);
 	insert_at_tail(&r300->hw.atomlist, &r300->hw.vic);
@@ -478,12 +479,12 @@
 	insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4230);
 	insert_at_tail(&r300->hw.atomlist, &r300->hw.lcntl);
 	insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4260);
-	insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4274);
-	insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4288);
+	insert_at_tail(&r300->hw.atomlist, &r300->hw.shade);
+	insert_at_tail(&r300->hw.atomlist, &r300->hw.polygon_mode);
 	insert_at_tail(&r300->hw.atomlist, &r300->hw.fogp);
-	insert_at_tail(&r300->hw.atomlist, &r300->hw.unk42A0);
+	insert_at_tail(&r300->hw.atomlist, &r300->hw.zbias_cntl);
 	insert_at_tail(&r300->hw.atomlist, &r300->hw.zbs);
-	insert_at_tail(&r300->hw.atomlist, &r300->hw.unk42B4);
+	insert_at_tail(&r300->hw.atomlist, &r300->hw.occlusion_cntl);
 	insert_at_tail(&r300->hw.atomlist, &r300->hw.cul);
 	insert_at_tail(&r300->hw.atomlist, &r300->hw.unk42C0);
 	insert_at_tail(&r300->hw.atomlist, &r300->hw.rc);
@@ -506,13 +507,13 @@
 	insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4E00);
 	insert_at_tail(&r300->hw.atomlist, &r300->hw.bld);
 	insert_at_tail(&r300->hw.atomlist, &r300->hw.cmk);
-	insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4E10);
+	insert_at_tail(&r300->hw.atomlist, &r300->hw.blend_color);
 	insert_at_tail(&r300->hw.atomlist, &r300->hw.cb);
 	insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4E50);
 	insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4E88);
 	insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4EA0);
 	insert_at_tail(&r300->hw.atomlist, &r300->hw.zs);
-	insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4F10);
+	insert_at_tail(&r300->hw.atomlist, &r300->hw.zstencil_format);
 	insert_at_tail(&r300->hw.atomlist, &r300->hw.zb);
 	insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4F28);
 	insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4F30);
@@ -524,12 +525,12 @@
 	insert_at_tail(&r300->hw.atomlist, &r300->hw.vps);
 
 	insert_at_tail(&r300->hw.atomlist, &r300->hw.tex.filter);
-	insert_at_tail(&r300->hw.atomlist, &r300->hw.tex.unknown1);
+	insert_at_tail(&r300->hw.atomlist, &r300->hw.tex.filter_1);
 	insert_at_tail(&r300->hw.atomlist, &r300->hw.tex.size);
 	insert_at_tail(&r300->hw.atomlist, &r300->hw.tex.format);
 	insert_at_tail(&r300->hw.atomlist, &r300->hw.tex.pitch);
 	insert_at_tail(&r300->hw.atomlist, &r300->hw.tex.offset);
-	insert_at_tail(&r300->hw.atomlist, &r300->hw.tex.unknown4);
+	insert_at_tail(&r300->hw.atomlist, &r300->hw.tex.chroma_key);
 	insert_at_tail(&r300->hw.atomlist, &r300->hw.tex.border_color);
 
 	r300->hw.is_dirty = GL_TRUE;
@@ -544,9 +545,9 @@
 		size = 64*256;
 
 	if (RADEON_DEBUG & (DEBUG_IOCTL|DEBUG_DMA)) {
-		fprintf(stderr, "sizeof(drm_r300_cmd_header_t)=%u\n",
+		fprintf(stderr, "sizeof(drm_r300_cmd_header_t)=%zd\n",
 			sizeof(drm_r300_cmd_header_t));
-		fprintf(stderr, "sizeof(drm_radeon_cmd_buffer_t)=%u\n",
+		fprintf(stderr, "sizeof(drm_radeon_cmd_buffer_t)=%zd\n",
 			sizeof(drm_radeon_cmd_buffer_t));
 		fprintf(stderr,
 			"Allocating %d bytes command buffer (max state is %d bytes)\n",
diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h
index a339b2f..fe261db 100644
--- a/src/mesa/drivers/dri/r300/r300_context.h
+++ b/src/mesa/drivers/dri/r300/r300_context.h
@@ -71,7 +71,7 @@
 /* Checkpoint.. for convenience */
 #define CPT	{ fprintf(stderr, "%s:%s line %d\n", __FILE__, __FUNCTION__, __LINE__); }
 /* From http://gcc.gnu.org/onlinedocs/gcc-3.2.3/gcc/Variadic-Macros.html .
-   I suppose we could inline this and use macro to fetch out __LINE__ and stuff in case we run into trouble 
+   I suppose we could inline this and use macro to fetch out __LINE__ and stuff in case we run into trouble
    with other compilers ... GLUE!
 */
 #if 1
@@ -180,6 +180,7 @@
 	/* hardware register values */
 	/* Note that R200 has 8 registers per texture and R300 only 7 */
 	GLuint filter;
+	GLuint filter_1;
 	GLuint pitch_reg;
 	GLuint size;	/* npot only */
 	GLuint format;
@@ -432,11 +433,11 @@
 	int		max_state_size;	/* in dwords */
 
 	struct r300_state_atom vpt;	/* viewport (1D98) */
-	struct r300_state_atom unk2080;	/* (2080) */
+	struct r300_state_atom vap_cntl;
 	struct r300_state_atom vof;     /* VAP output format register 0x2090 */
 	struct r300_state_atom vte;	/* (20B0) */
 	struct r300_state_atom unk2134;	/* (2134) */
-	struct r300_state_atom unk2140;	/* (2140) */
+	struct r300_state_atom vap_cntl_status;
 	struct r300_state_atom vir[2];	/* vap input route (2150/21E0) */
 	struct r300_state_atom vic;	/* vap input control (2180) */
 	struct r300_state_atom unk21DC; /* (21DC) */
@@ -452,13 +453,13 @@
 	struct r300_state_atom unk4230; /* (4230) */
 	struct r300_state_atom lcntl;	/* line control */
 	struct r300_state_atom unk4260; /* (4260) */
-	struct r300_state_atom unk4274; /* (4274) */
-	struct r300_state_atom unk4288; /* (4288) */
+	struct r300_state_atom shade;
+	struct r300_state_atom polygon_mode;
 	struct r300_state_atom fogp;	/* fog parameters (4294) */
 	struct r300_state_atom unk429C; /* (429C) */
-	struct r300_state_atom unk42A0;	/* (42A0) */
+	struct r300_state_atom zbias_cntl;
 	struct r300_state_atom zbs;	/* zbias (42A4) */
-	struct r300_state_atom unk42B4; /* (42B4) */
+	struct r300_state_atom occlusion_cntl;
 	struct r300_state_atom cul;	/* cull cntl (42B8) */
 	struct r300_state_atom unk42C0; /* (42C0) */
 	struct r300_state_atom rc;	/* rs control (4300) */
@@ -478,13 +479,13 @@
 	struct r300_state_atom unk4E00;	/* (4E00) */
 	struct r300_state_atom bld;	/* blending (4E04) */
 	struct r300_state_atom cmk;	/* colormask (4E0C) */
-	struct r300_state_atom unk4E10;	/* constant blend color + ??? (4E10) */
+	struct r300_state_atom blend_color;	/* constant blend color */
 	struct r300_state_atom cb;	/* colorbuffer (4E28) */
 	struct r300_state_atom unk4E50;	/* (4E50) */
 	struct r300_state_atom unk4E88;	/* (4E88) */
 	struct r300_state_atom unk4EA0;	/* (4E88) I saw it only written on RV350 hardware..  */
 	struct r300_state_atom zs;	/* zstencil control (4F00) */
-	struct r300_state_atom unk4F10;	/* (4F10) */
+	struct r300_state_atom zstencil_format;
 	struct r300_state_atom zb;	/* z buffer (4F20) */
 	struct r300_state_atom unk4F28;	/* (4F28) */
 	struct r300_state_atom unk4F30;	/* (4F30) */
@@ -501,12 +502,12 @@
 		   updating the whole thing at once */
 	struct {
 		struct r300_state_atom filter;
-		struct r300_state_atom unknown1;
+		struct r300_state_atom filter_1;
 		struct r300_state_atom size;
 		struct r300_state_atom format;
 		struct r300_state_atom pitch;
 		struct r300_state_atom offset;
-		struct r300_state_atom unknown4;
+		struct r300_state_atom chroma_key;
 		struct r300_state_atom border_color;
 		} tex;
 	struct r300_state_atom txe;	/* tex enable (4104) */
@@ -547,7 +548,7 @@
 /* Perhaps more if we store programs in vmem? */
 /* drm_r300_cmd_header_t->vpu->count is unsigned char */
 #define VSF_MAX_FRAGMENT_LENGTH (255*4)
-	
+
 /* Can be tested with colormat currently. */
 #define VSF_MAX_FRAGMENT_TEMPS (14)
 
@@ -592,7 +593,7 @@
 	int unknown_ptr2;  /* pointer within program space */
 	int unknown_ptr3;  /* pointer within program space */
 	};
-	
+
 extern int hw_tcl_on;
 
 //#define CURRENT_VERTEX_SHADER(ctx) (ctx->VertexProgram._Current)
@@ -601,16 +602,16 @@
 /* Should but doesnt work */
 //#define CURRENT_VERTEX_SHADER(ctx) (R300_CONTEXT(ctx)->curr_vp)
 
-//#define TMU_ENABLED(ctx, unit) (hw_tcl_on ? ctx->Texture.Unit[unit]._ReallyEnabled && (OutputsWritten & (1<<(VERT_RESULT_TEX0+(unit)))) : 
+//#define TMU_ENABLED(ctx, unit) (hw_tcl_on ? ctx->Texture.Unit[unit]._ReallyEnabled && (OutputsWritten & (1<<(VERT_RESULT_TEX0+(unit)))) :
 //	(r300->state.render_inputs & (_TNL_BIT_TEX0<<(unit))))
-//#define TMU_ENABLED(ctx, unit) (hw_tcl_on ? ctx->Texture.Unit[unit]._ReallyEnabled && OutputsWritten & (1<<(VERT_RESULT_TEX0+(unit))) : 
+//#define TMU_ENABLED(ctx, unit) (hw_tcl_on ? ctx->Texture.Unit[unit]._ReallyEnabled && OutputsWritten & (1<<(VERT_RESULT_TEX0+(unit))) :
 //	ctx->Texture.Unit[unit]._ReallyEnabled && r300->state.render_inputs & (_TNL_BIT_TEX0<<(unit)))
 
 #define TMU_ENABLED(ctx, unit) (ctx->Texture.Unit[unit]._ReallyEnabled)
 
 /* r300_vertex_shader_state and r300_vertex_program should probably be merged together someday.
  * Keeping them them seperate for now should ensure fixed pipeline keeps functioning properly.
- */	
+ */
 
 struct r300_vertex_program_key {
 	GLuint InputsRead;
@@ -621,9 +622,9 @@
 	struct r300_vertex_program *next;
 	struct r300_vertex_program_key key;
 	int translated;
-	
+
 	struct r300_vertex_shader_fragment program;
-	
+
 	int pos_end;
 	int num_temporaries; /* Number of temp vars used by program */
 	int wpos_idx;
@@ -646,38 +647,89 @@
 #define PFS_NUM_TEMP_REGS	32
 #define PFS_NUM_CONST_REGS	16
 
-/* Tracking data for Mesa registers */
+/* Mapping Mesa registers to R300 temporaries */
 struct reg_acc {
        int reg;        /* Assigned hw temp */
        unsigned int refcount; /* Number of uses by mesa program */
 };
 
-struct r300_pfs_compile_state {
-       int v_pos, s_pos;       /* highest ALU slots used */
+/**
+ * Describe the current lifetime information for an R300 temporary
+ */
+struct reg_lifetime {
+	/* Index of the first slot where this register is free in the sense
+	   that it can be used as a new destination register.
+	   This is -1 if the register has been assigned to a Mesa register
+	   and the last access to the register has not yet been emitted */
+	int free;
 
-       /* Track some information gathered during opcode
-        * construction.
-        * 
-        * NOTE: Data is only set by the code, and isn't used yet.
-        */
-       struct {
-               int vsrc[3];
-               int ssrc[3];
-               int umask;
-       } slot[PFS_MAX_ALU_INST];
+	/* Index of the first slot where this register is currently reserved.
+	   This is used to stop e.g. a scalar operation from being moved
+	   before the allocation time of a register that was first allocated
+	   for a vector operation. */
+	int reserved;
 
-       /* Used to map Mesa's inputs/temps onto hardware temps */
-       int temp_in_use;
-       struct reg_acc temps[PFS_NUM_TEMP_REGS];
-       struct reg_acc inputs[32]; /* don't actually need 32... */
+	/* Index of the first slot in which the register can be used as a
+	   source without losing the value that is written by the last
+	   emitted instruction that writes to the register */
+	int vector_valid;
+	int scalar_valid;
 
-       /* Track usage of hardware temps, for register allocation,
-        * indirection detection, etc. */
-       int hwreg_in_use;
-       GLuint used_in_node;
-       GLuint dest_in_node;
+	/* Index to the slot where the register was last read.
+	   This is also the first slot in which the register may be written again */
+	int vector_lastread;
+	int scalar_lastread;
 };
 
+
+/**
+ * Store usage information about an ALU instruction slot during the
+ * compilation of a fragment program.
+ */
+#define SLOT_SRC_VECTOR  (1<<0)
+#define SLOT_SRC_SCALAR  (1<<3)
+#define SLOT_SRC_BOTH    (SLOT_SRC_VECTOR | SLOT_SRC_SCALAR)
+#define SLOT_OP_VECTOR   (1<<16)
+#define SLOT_OP_SCALAR   (1<<17)
+#define SLOT_OP_BOTH     (SLOT_OP_VECTOR | SLOT_OP_SCALAR)
+
+struct r300_pfs_compile_slot {
+	/* Bitmask indicating which parts of the slot are used, using SLOT_ constants
+	   defined above */
+	unsigned int used;
+
+	/* Selected sources */
+	int vsrc[3];
+	int ssrc[3];
+};
+
+/**
+ * Store information during compilation of fragment programs.
+ */
+struct r300_pfs_compile_state {
+	int nrslots;       /* number of ALU slots used so far */
+
+	/* Track which (parts of) slots are already filled with instructions */
+	struct r300_pfs_compile_slot slot[PFS_MAX_ALU_INST];
+
+	/* Track the validity of R300 temporaries */
+	struct reg_lifetime hwtemps[PFS_NUM_TEMP_REGS];
+
+	/* Used to map Mesa's inputs/temps onto hardware temps */
+	int temp_in_use;
+	struct reg_acc temps[PFS_NUM_TEMP_REGS];
+	struct reg_acc inputs[32]; /* don't actually need 32... */
+
+	/* Track usage of hardware temps, for register allocation,
+	 * indirection detection, etc. */
+	GLuint used_in_node;
+	GLuint dest_in_node;
+};
+
+/**
+ * Store everything about a fragment program that is needed
+ * to render with that program.
+ */
 struct r300_fragment_program {
 	struct gl_fragment_program mesa_program;
 
@@ -715,23 +767,18 @@
 	int tex_offset;
 	int tex_end;
 
-	/* Hardware constants */
-	GLfloat constant[PFS_NUM_CONST_REGS][4];
+	/* Hardware constants.
+	 * Contains a pointer to the value. The destination of the pointer
+	 * is supposed to be updated when GL state changes.
+	 * Typically, this is either a pointer into
+	 * gl_program_parameter_list::ParameterValues, or a pointer to a
+	 * global constant (e.g. for sin/cos-approximation)
+	 */
+	const GLfloat* constant[PFS_NUM_CONST_REGS];
 	int const_nr;
 
-	/* Tracked parameters */
-	struct {
-		int idx;			/* hardware index */
-		GLfloat *values;	/* pointer to values */
-	} param[PFS_NUM_CONST_REGS];
-	int param_nr;
-	GLboolean params_uptodate;
-
 	int max_temp_idx;
 
-	/* the index of the sin constant is stored here */
-	GLint const_sin[2];
-	
 	GLuint optimization;
 };
 
@@ -758,10 +805,10 @@
 	void *Elts;
 	int elt_size;
 	int elt_min, elt_max; /* debug */
-	
+
 	struct dt AttribPtr[VERT_ATTRIB_MAX];
-	
-	const struct _mesa_prim  *Primitive;	              
+
+	const struct _mesa_prim  *Primitive;
 	GLuint      PrimitiveCount;
 	GLint LockFirst;
 	GLsizei LockCount;
@@ -793,16 +840,16 @@
 
 	GLuint *Elts;
 	struct r300_dma_region elt_dma;
-	
-	DECLARE_RENDERINPUTS(render_inputs_bitset); /* actual render inputs that R300 was configured for. 
-				 They are the same as tnl->render_inputs for fixed pipeline */	
-	
+
+	DECLARE_RENDERINPUTS(render_inputs_bitset); /* actual render inputs that R300 was configured for.
+				 They are the same as tnl->render_inputs for fixed pipeline */
+
 	struct {
 		int transform_offset;  /* Transform matrix offset, -1 if none */
 		} vap_param;  /* vertex processor parameter allocation - tells where to write parameters */
-	
+
 	struct r300_stencilbuffer_state stencil;
-	
+
 };
 
 #define R300_FALLBACK_NONE 0
@@ -854,7 +901,7 @@
 	struct gl_buffer_object mesa_obj;
 	int id;
 };
-		
+
 #define R300_CONTEXT(ctx)		((r300ContextPtr)(ctx->DriverCtx))
 
 static __inline GLuint r300PackColor( GLuint cpp,
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c
index f18a982..c407dfb 100644
--- a/src/mesa/drivers/dri/r300/r300_fragprog.c
+++ b/src/mesa/drivers/dri/r300/r300_fragprog.c
@@ -50,6 +50,7 @@
 #include "r300_context.h"
 #include "r300_fragprog.h"
 #include "r300_reg.h"
+#include "r300_state.h"
 
 /*
  * Usefull macros and values
@@ -94,8 +95,9 @@
 #define REG_NEGV_SHIFT		18
 #define REG_NEGS_SHIFT		19
 #define REG_ABS_SHIFT		20
-#define REG_NO_USE_SHIFT	21
-#define REG_VALID_SHIFT		22
+#define REG_NO_USE_SHIFT	21 // Hack for refcounting
+#define REG_VALID_SHIFT		22 // Does the register contain a defined value?
+#define REG_BUILTIN_SHIFT   23 // Is it a builtin (like all zero/all one)?
 
 #define REG_TYPE_MASK		(0x03 << REG_TYPE_SHIFT)
 #define REG_INDEX_MASK		(0x3F << REG_INDEX_SHIFT)
@@ -106,12 +108,14 @@
 #define REG_ABS_MASK		(0x01 << REG_ABS_SHIFT)
 #define REG_NO_USE_MASK		(0x01 << REG_NO_USE_SHIFT)
 #define REG_VALID_MASK		(0x01 << REG_VALID_SHIFT)
+#define REG_BUILTIN_MASK	(0x01 << REG_BUILTIN_SHIFT)
 
-#define REG(type, index, vswz, sswz, nouse, valid)			\
+#define REG(type, index, vswz, sswz, nouse, valid, builtin)	\
 	(((type << REG_TYPE_SHIFT) & REG_TYPE_MASK) |			\
 	 ((index << REG_INDEX_SHIFT) & REG_INDEX_MASK) |		\
 	 ((nouse << REG_NO_USE_SHIFT) & REG_NO_USE_MASK) |		\
 	 ((valid << REG_VALID_SHIFT) & REG_VALID_MASK) |		\
+	 ((builtin << REG_BUILTIN_SHIFT) & REG_BUILTIN_MASK) |	\
 	 ((vswz << REG_VSWZ_SHIFT) & REG_VSWZ_MASK) |			\
 	 ((sswz << REG_SSWZ_SHIFT) & REG_SSWZ_MASK))
 #define REG_GET_TYPE(reg)						\
@@ -126,6 +130,8 @@
 	((reg & REG_NO_USE_MASK) >> REG_NO_USE_SHIFT)
 #define REG_GET_VALID(reg)						\
 	((reg & REG_VALID_MASK) >> REG_VALID_SHIFT)
+#define REG_GET_BUILTIN(reg)						\
+	((reg & REG_BUILTIN_MASK) >> REG_BUILTIN_SHIFT)
 #define REG_SET_TYPE(reg, type)						\
 	reg = ((reg & ~REG_TYPE_MASK) |					\
 	       ((type << REG_TYPE_SHIFT) & REG_TYPE_MASK))
@@ -144,6 +150,9 @@
 #define REG_SET_VALID(reg, valid)					\
 	reg = ((reg & ~REG_VALID_MASK) |				\
 	       ((valid << REG_VALID_SHIFT) & REG_VALID_MASK))
+#define REG_SET_BUILTIN(reg, builtin)					\
+	reg = ((reg & ~REG_BUILTIN_MASK) |				\
+	       ((builtin << REG_BUILTIN_SHIFT) & REG_BUILTIN_MASK))
 #define REG_ABS(reg)							\
 	reg = (reg | REG_ABS_MASK)
 #define REG_NEGV(reg)							\
@@ -184,9 +193,6 @@
  *
  * REG_VSWZ/REG_SSWZ is an index into this table
  */
-#define SLOT_VECTOR	(1<<0)
-#define SLOT_SCALAR	(1<<3)
-#define SLOT_BOTH	(SLOT_VECTOR | SLOT_SCALAR)
 
 /* mapping from SWIZZLE_* to r300 native values for scalar insns */
 #define SWIZZLE_HALF 6
@@ -202,14 +208,14 @@
 	GLuint flags;
 } v_swiz[] = {
 /* native swizzles */
-	{ MAKE_SWZ3(X, Y, Z), R300_FPI0_ARGC_SRC0C_XYZ, 4, SLOT_VECTOR },
-	{ MAKE_SWZ3(X, X, X), R300_FPI0_ARGC_SRC0C_XXX, 4, SLOT_VECTOR },
-	{ MAKE_SWZ3(Y, Y, Y), R300_FPI0_ARGC_SRC0C_YYY, 4, SLOT_VECTOR },
-	{ MAKE_SWZ3(Z, Z, Z), R300_FPI0_ARGC_SRC0C_ZZZ, 4, SLOT_VECTOR },
-	{ MAKE_SWZ3(W, W, W), R300_FPI0_ARGC_SRC0A,     1, SLOT_SCALAR },
-	{ MAKE_SWZ3(Y, Z, X), R300_FPI0_ARGC_SRC0C_YZX, 1, SLOT_VECTOR },
-	{ MAKE_SWZ3(Z, X, Y), R300_FPI0_ARGC_SRC0C_ZXY, 1, SLOT_VECTOR },
-	{ MAKE_SWZ3(W, Z, Y), R300_FPI0_ARGC_SRC0CA_WZY, 1, SLOT_BOTH },
+	{ MAKE_SWZ3(X, Y, Z), R300_FPI0_ARGC_SRC0C_XYZ, 4, SLOT_SRC_VECTOR },
+	{ MAKE_SWZ3(X, X, X), R300_FPI0_ARGC_SRC0C_XXX, 4, SLOT_SRC_VECTOR },
+	{ MAKE_SWZ3(Y, Y, Y), R300_FPI0_ARGC_SRC0C_YYY, 4, SLOT_SRC_VECTOR },
+	{ MAKE_SWZ3(Z, Z, Z), R300_FPI0_ARGC_SRC0C_ZZZ, 4, SLOT_SRC_VECTOR },
+	{ MAKE_SWZ3(W, W, W), R300_FPI0_ARGC_SRC0A,     1, SLOT_SRC_SCALAR },
+	{ MAKE_SWZ3(Y, Z, X), R300_FPI0_ARGC_SRC0C_YZX, 1, SLOT_SRC_VECTOR },
+	{ MAKE_SWZ3(Z, X, Y), R300_FPI0_ARGC_SRC0C_ZXY, 1, SLOT_SRC_VECTOR },
+	{ MAKE_SWZ3(W, Z, Y), R300_FPI0_ARGC_SRC0CA_WZY, 1, SLOT_SRC_BOTH },
 	{ MAKE_SWZ3(ONE, ONE, ONE), R300_FPI0_ARGC_ONE, 0, 0},
 	{ MAKE_SWZ3(ZERO, ZERO, ZERO), R300_FPI0_ARGC_ZERO, 0, 0},
 	{ MAKE_SWZ3(HALF, HALF, HALF), R300_FPI0_ARGC_HALF, 0, 0},
@@ -241,10 +247,10 @@
 	int stride;	/* difference between SRC0/1/2 */
 	GLuint flags;
 } s_swiz[] = {
-	{ R300_FPI2_ARGA_SRC0C_X, 3, SLOT_VECTOR },
-	{ R300_FPI2_ARGA_SRC0C_Y, 3, SLOT_VECTOR },
-	{ R300_FPI2_ARGA_SRC0C_Z, 3, SLOT_VECTOR },
-	{ R300_FPI2_ARGA_SRC0A  , 1, SLOT_SCALAR },
+	{ R300_FPI2_ARGA_SRC0C_X, 3, SLOT_SRC_VECTOR },
+	{ R300_FPI2_ARGA_SRC0C_Y, 3, SLOT_SRC_VECTOR },
+	{ R300_FPI2_ARGA_SRC0C_Z, 3, SLOT_SRC_VECTOR },
+	{ R300_FPI2_ARGA_SRC0A  , 1, SLOT_SRC_SCALAR },
 	{ R300_FPI2_ARGA_ZERO   , 0, 0 },
 	{ R300_FPI2_ARGA_ONE    , 0, 0 },
 	{ R300_FPI2_ARGA_HALF   , 0, 0 }
@@ -256,6 +262,7 @@
 				SWIZZLE_XYZ,
 				SWIZZLE_W,
 				GL_FALSE,
+				GL_FALSE,
 				GL_FALSE);
 
 /* constant one source */
@@ -264,6 +271,7 @@
 				  SWIZZLE_111,
 				  SWIZZLE_ONE,
 				  GL_FALSE,
+				  GL_TRUE,
 				  GL_TRUE);
 
 /* constant half source */
@@ -272,6 +280,7 @@
 				   SWIZZLE_HHH,
 				   SWIZZLE_HALF,
 				   GL_FALSE,
+				   GL_TRUE,
 				   GL_TRUE);
 
 /* constant zero source */
@@ -280,6 +289,7 @@
 				   SWIZZLE_000,
 				   SWIZZLE_ZERO,
 				   GL_FALSE,
+				   GL_TRUE,
 				   GL_TRUE);
 
 /*
@@ -291,47 +301,105 @@
 				GLuint src0, GLuint src1, GLuint src2,
 				int flags);
 
-/*
- * Helper functions prototypes
+/**
+ * Get an R300 temporary that can be written to in the given slot.
  */
-static int get_hw_temp(struct r300_fragment_program *rp)
+static int get_hw_temp(struct r300_fragment_program *rp, int slot)
 {
 	COMPILE_STATE;
-	int r = ffs(~cs->hwreg_in_use);
-	if (!r) {
+	int r;
+
+	for(r = 0; r < PFS_NUM_TEMP_REGS; ++r) {
+		if (cs->hwtemps[r].free >= 0 && cs->hwtemps[r].free <= slot)
+			break;
+	}
+
+	if (r >= PFS_NUM_TEMP_REGS) {
 		ERROR("Out of hardware temps\n");
 		return 0;
 	}
 
-	cs->hwreg_in_use |= (1 << --r);
+	// Reserved is used to avoid the following scenario:
+	//  R300 temporary X is first assigned to Mesa temporary Y during vector ops
+	//  R300 temporary X is then assigned to Mesa temporary Z for further vector ops
+	//  Then scalar ops on Mesa temporary Z are emitted and move back in time
+	//  to overwrite the value of temporary Y.
+	// End scenario.
+	cs->hwtemps[r].reserved = cs->hwtemps[r].free;
+	cs->hwtemps[r].free = -1;
+
+	// Reset to some value that won't mess things up when the user
+	// tries to read from a temporary that hasn't been assigned a value yet.
+	// In the normal case, vector_valid and scalar_valid should be set to
+	// a sane value by the first emit that writes to this temporary.
+	cs->hwtemps[r].vector_valid = 0;
+	cs->hwtemps[r].scalar_valid = 0;
+
 	if (r > rp->max_temp_idx)
 		rp->max_temp_idx = r;
 
 	return r;
 }
 
+/**
+ * Get an R300 temporary that will act as a TEX destination register.
+ */
 static int get_hw_temp_tex(struct r300_fragment_program *rp)
 {
 	COMPILE_STATE;
 	int r;
 
-	r = ffs(~(cs->hwreg_in_use | cs->used_in_node));
-	if (!r)
-		return get_hw_temp(rp); /* Will cause an indirection */
+	for(r = 0; r < PFS_NUM_TEMP_REGS; ++r) {
+		if (cs->used_in_node & (1 << r))
+			continue;
 
-	cs->hwreg_in_use |= (1 << --r);
+		// Note: Be very careful here
+		if (cs->hwtemps[r].free >= 0 && cs->hwtemps[r].free <= 0)
+			break;
+	}
+
+	if (r >= PFS_NUM_TEMP_REGS)
+		return get_hw_temp(rp, 0); /* Will cause an indirection */
+
+	cs->hwtemps[r].reserved = cs->hwtemps[r].free;
+	cs->hwtemps[r].free = -1;
+
+	// Reset to some value that won't mess things up when the user
+	// tries to read from a temporary that hasn't been assigned a value yet.
+	// In the normal case, vector_valid and scalar_valid should be set to
+	// a sane value by the first emit that writes to this temporary.
+	cs->hwtemps[r].vector_valid = cs->nrslots;
+	cs->hwtemps[r].scalar_valid = cs->nrslots;
+
 	if (r > rp->max_temp_idx)
 		rp->max_temp_idx = r;
 
 	return r;
 }
 
+/**
+ * Mark the given hardware register as free.
+ */
 static void free_hw_temp(struct r300_fragment_program *rp, int idx)
 {
 	COMPILE_STATE;
-	cs->hwreg_in_use &= ~(1<<idx);
+
+	// Be very careful here. Consider sequences like
+	//  MAD r0, r1,r2,r3
+	//  TEX r4, ...
+	// The TEX instruction may be moved in front of the MAD instruction
+	// due to the way nodes work. We don't want to alias r1 and r4 in
+	// this case.
+	// I'm certain the register allocation could be further sanitized,
+	// but it's tricky because of stuff that can happen inside emit_tex
+	// and emit_arith.
+	cs->hwtemps[idx].free = cs->nrslots+1;
 }
 
+
+/**
+ * Create a new Mesa temporary register.
+ */
 static GLuint get_temp_reg(struct r300_fragment_program *rp)
 {
 	COMPILE_STATE;
@@ -354,6 +422,10 @@
 	return r;
 }
 
+/**
+ * Create a new Mesa temporary register that will act as the destination
+ * register for a texture read.
+ */
 static GLuint get_temp_reg_tex(struct r300_fragment_program *rp)
 {
 	COMPILE_STATE;
@@ -376,6 +448,9 @@
 	return r;
 }
 
+/**
+ * Free a Mesa temporary and the associated R300 temporary.
+ */
 static void free_temp(struct r300_fragment_program *rp, GLuint r)
 {
 	COMPILE_STATE;
@@ -383,7 +458,7 @@
 
 	if (!(cs->temp_in_use & (1 << index)))
 		return;
-	
+
 	if (REG_GET_TYPE(r) == REG_TYPE_TEMP) {
 		free_hw_temp(rp, cs->temps[index].reg);
 		cs->temps[index].reg = -1;
@@ -394,47 +469,39 @@
 	}
 }
 
-static GLuint emit_param4fv(struct r300_fragment_program *rp,
-			    GLfloat *values)
+/**
+ * Emit a hardware constant/parameter.
+ *
+ * \p cp Stable pointer to an array of 4 floats.
+ *  The pointer must be stable in the sense that it remains to be valid
+ *  and hold the contents of the constant/parameter throughout the lifetime
+ *  of the fragment program (actually, up until the next time the fragment
+ *  program is translated).
+ */
+static GLuint emit_const4fv(struct r300_fragment_program *rp, const GLfloat* cp)
 {
-	GLuint r = undef;
-	GLuint index;
-	int pidx;
+	GLuint reg = undef;
+	int index;
 
-	pidx = rp->param_nr++;
-	index = rp->const_nr++;
-	if (pidx >= PFS_NUM_CONST_REGS || index >= PFS_NUM_CONST_REGS) {
-		ERROR("Out of const/param slots!\n");
-		return r;
+	for(index = 0; index < rp->const_nr; ++index) {
+		if (rp->constant[index] == cp)
+			break;
 	}
 
-	rp->param[pidx].idx = index;
-	rp->param[pidx].values = values;
-	rp->params_uptodate = GL_FALSE;
+	if (index >= rp->const_nr) {
+		if (index >= PFS_NUM_CONST_REGS) {
+			ERROR("Out of hw constants!\n");
+			return reg;
+		}
 
-	REG_SET_TYPE(r, REG_TYPE_CONST);
-	REG_SET_INDEX(r, index);
-	REG_SET_VALID(r, GL_TRUE);
-	return r;
-}
-
-static GLuint emit_const4fv(struct r300_fragment_program *rp, GLfloat *cp)
-{ 
-	GLuint r = undef;
-	GLuint index;
-
-	index = rp->const_nr++;
-	if (index >= PFS_NUM_CONST_REGS) {
-		ERROR("Out of hw constants!\n");
-		return r;
+		rp->const_nr++;
+		rp->constant[index] = cp;
 	}
 
-	COPY_4V(rp->constant[index], cp);
-
-	REG_SET_TYPE(r, REG_TYPE_CONST);
-	REG_SET_INDEX(r, index);
-	REG_SET_VALID(r, GL_TRUE);
-	return r;
+	REG_SET_TYPE(reg, REG_TYPE_CONST);
+	REG_SET_INDEX(reg, index);
+	REG_SET_VALID(reg, GL_TRUE);
+	return reg;
 }
 
 static inline GLuint negate(GLuint r)
@@ -617,7 +684,7 @@
 	    GLuint offset;
 	    for(i=0; i < 4; ++i){
 		offset = GET_SWZ(arbswz, i);
-		
+
 		newswz |= (offset <= 3)?GET_SWZ(vsrcswz, offset) << i*3:offset << i*3;
 	    }
 
@@ -688,16 +755,16 @@
 		REG_SET_TYPE(r, REG_TYPE_INPUT);
 		break;
 	case PROGRAM_LOCAL_PARAM:
-		r = emit_param4fv(rp,
+		r = emit_const4fv(rp,
 				  rp->mesa_program.Base.LocalParams[fpsrc.Index]);
 		break;
 	case PROGRAM_ENV_PARAM:
-		r = emit_param4fv(rp,
+		r = emit_const4fv(rp,
 				  rp->ctx->FragmentProgram.Parameters[fpsrc.Index]);
 		break;
 	case PROGRAM_STATE_VAR:
 	case PROGRAM_NAMED_PARAM:
-		r = emit_param4fv(rp,
+		r = emit_const4fv(rp,
 				  rp->mesa_program.Base.Parameters->ParameterValues[fpsrc.Index]);
 		break;
 	default:
@@ -726,7 +793,7 @@
 		       struct prog_dst_register dest)
 {
 	GLuint r = undef;
-	
+
 	switch (dest.File) {
 	case PROGRAM_TEMPORARY:
 		REG_SET_INDEX(r, dest.Index);
@@ -762,10 +829,10 @@
 	switch(REG_GET_TYPE(src)) {
 	case REG_TYPE_TEMP:
 		/* NOTE: if reg==-1 here, a source is being read that
-		 * 	 hasn't been written to. Undefined results
+		 * 	 hasn't been written to. Undefined results.
 		 */
 		if (cs->temps[index].reg == -1)
-			cs->temps[index].reg = get_hw_temp(rp);
+			cs->temps[index].reg = get_hw_temp(rp, cs->nrslots);
 
 		idx = cs->temps[index].reg;
 
@@ -795,7 +862,8 @@
 
 static int t_hw_dst(struct r300_fragment_program *rp,
 		    GLuint dest,
-		    GLboolean tex)
+		    GLboolean tex,
+		    int slot)
 {
 	COMPILE_STATE;
 	int idx;
@@ -806,7 +874,7 @@
 	case REG_TYPE_TEMP:
 		if (cs->temps[REG_GET_INDEX(dest)].reg == -1) {
 			if (!tex) {
-				cs->temps[index].reg = get_hw_temp(rp);
+				cs->temps[index].reg = get_hw_temp(rp, slot);
 			} else {
 				cs->temps[index].reg = get_hw_temp_tex(rp);
 			}
@@ -835,30 +903,24 @@
 		ERROR("invalid dest reg type %d\n", REG_GET_TYPE(dest));
 		return 0;
 	}
-	
+
 	return idx;
 }
 
-static void emit_nop(struct r300_fragment_program *rp,
-		     GLuint mask,
-		     GLboolean sync)
+static void emit_nop(struct r300_fragment_program *rp)
 {
 	COMPILE_STATE;
-	
-	if (sync)
-		cs->v_pos = cs->s_pos = MAX2(cs->v_pos, cs->s_pos);
 
-	if (mask & WRITEMASK_XYZ) {
-		rp->alu.inst[cs->v_pos].inst0 = NOP_INST0;
-		rp->alu.inst[cs->v_pos].inst1 = NOP_INST1;
-		cs->v_pos++;
+	if (cs->nrslots >= PFS_MAX_ALU_INST) {
+		ERROR("Out of ALU instruction slots\n");
+		return;
 	}
 
-	if (mask & WRITEMASK_W) {
-		rp->alu.inst[cs->s_pos].inst2 = NOP_INST2;
-		rp->alu.inst[cs->s_pos].inst3 = NOP_INST3;
-		cs->s_pos++;
-	}
+	rp->alu.inst[cs->nrslots].inst0 = NOP_INST0;
+	rp->alu.inst[cs->nrslots].inst1 = NOP_INST1;
+	rp->alu.inst[cs->nrslots].inst2 = NOP_INST2;
+	rp->alu.inst[cs->nrslots].inst3 = NOP_INST3;
+	cs->nrslots++;
 }
 
 static void emit_tex(struct r300_fragment_program *rp,
@@ -871,7 +933,7 @@
 	GLuint din = cs->dest_in_node, uin = cs->used_in_node;
 	int unit = fpi->TexSrcUnit;
 	int hwsrc, hwdest;
-	
+
 	/* Resolve source/dest to hardware registers */
 	hwsrc = t_hw_src(rp, coord, GL_TRUE);
 	if (opcode != R300_FPITX_OP_KIL) {
@@ -882,8 +944,8 @@
 			rdest = dest;
 			dest = get_temp_reg_tex(rp);
 		}
-		hwdest = t_hw_dst(rp, dest, GL_TRUE);
-		
+		hwdest = t_hw_dst(rp, dest, GL_TRUE, rp->node[rp->cur_node].alu_offset);
+
 		/* Use a temp that hasn't been used in this node, rather
 		 * than causing an indirection
 		 */
@@ -896,23 +958,19 @@
 		hwdest = 0;
 		unit = 0;
 	}
-	
+
 	/* Indirection if source has been written in this node, or if the
 	 * dest has been read/written in this node
 	 */
 	if ((REG_GET_TYPE(coord) != REG_TYPE_CONST &&
 	     (din & (1<<hwsrc))) || (uin & (1<<hwdest))) {
-			
+
 		/* Finish off current node */
-		cs->v_pos = cs->s_pos = MAX2(cs->v_pos, cs->s_pos);
-		if (rp->node[rp->cur_node].alu_offset == cs->v_pos) {
-			/* No alu instructions in the node? Emit a NOP. */
-			emit_nop(rp, WRITEMASK_XYZW, GL_TRUE);
-			cs->v_pos = cs->s_pos = MAX2(cs->v_pos, cs->s_pos);
-		}
-				
+		if (rp->node[rp->cur_node].alu_offset == cs->nrslots)
+			emit_nop(rp);
+
 		rp->node[rp->cur_node].alu_end =
-				cs->v_pos - rp->node[rp->cur_node].alu_offset - 1;
+				cs->nrslots - rp->node[rp->cur_node].alu_offset - 1;
 		assert(rp->node[rp->cur_node].alu_end >= 0);
 
 		if (++rp->cur_node >= PFS_MAX_TEX_INDIRECT) {
@@ -922,14 +980,14 @@
 
 		/* Start new node */
 		rp->node[rp->cur_node].tex_offset = rp->tex.length;
-		rp->node[rp->cur_node].alu_offset = cs->v_pos;
+		rp->node[rp->cur_node].alu_offset = cs->nrslots;
 		rp->node[rp->cur_node].tex_end = -1;
-		rp->node[rp->cur_node].alu_end = -1;	
+		rp->node[rp->cur_node].alu_end = -1;
 		rp->node[rp->cur_node].flags = 0;
 		cs->used_in_node = 0;
 		cs->dest_in_node = 0;
 	}
-	
+
 	if (rp->cur_node == 0)
 		rp->first_node_has_tex = 1;
 
@@ -940,7 +998,7 @@
 		/* not entirely sure about this */
 		| (opcode << R300_FPITX_OPCODE_SHIFT);
 
-	cs->dest_in_node |= (1 << hwdest); 
+	cs->dest_in_node |= (1 << hwdest);
 	if (REG_GET_TYPE(coord) != REG_TYPE_CONST)
 		cs->used_in_node |= (1 << hwsrc);
 
@@ -954,84 +1012,272 @@
 	}
 }
 
-/* Add sources to FPI1/FPI3 lists.  If source is already on list,
- * reuse the index instead of wasting a source.
+
+/**
+ * Returns the first slot where we could possibly allow writing to dest,
+ * according to register allocation.
  */
-static int add_src(struct r300_fragment_program *rp,
-		   int reg,
-		   int pos,
-		   int srcmask)
+static int get_earliest_allowed_write(
+		struct r300_fragment_program* rp,
+		GLuint dest, int mask)
 {
 	COMPILE_STATE;
-	int csm, i;
-	
-	/* Look for matches */
-	for (i=0,csm=srcmask; i<3; i++,csm=csm<<1) {	
-		/* If sources have been allocated in this position(s)... */
-		if ((cs->slot[pos].umask & csm) == csm) {
-			/* ... and the register number(s) match, re-use the
-			   source */
-			if (srcmask == SLOT_VECTOR &&
-			    cs->slot[pos].vsrc[i] == reg)
-				return i;
-			if (srcmask == SLOT_SCALAR &&
-			    cs->slot[pos].ssrc[i] == reg)
-				return i;
-			if (srcmask == SLOT_BOTH &&
-			    cs->slot[pos].vsrc[i] == reg &&
-			    cs->slot[pos].ssrc[i] == reg)
-				return i;
+	int idx;
+	int pos;
+	GLuint index = REG_GET_INDEX(dest);
+	assert(REG_GET_VALID(dest));
+
+	switch(REG_GET_TYPE(dest)) {
+		case REG_TYPE_TEMP:
+			if (cs->temps[index].reg == -1)
+				return 0;
+
+			idx = cs->temps[index].reg;
+			break;
+		case REG_TYPE_OUTPUT:
+			return 0;
+		default:
+			ERROR("invalid dest reg type %d\n", REG_GET_TYPE(dest));
+			return 0;
+	}
+
+	pos = cs->hwtemps[idx].reserved;
+	if (mask & WRITEMASK_XYZ) {
+		if (pos < cs->hwtemps[idx].vector_lastread)
+			pos = cs->hwtemps[idx].vector_lastread;
+	}
+	if (mask & WRITEMASK_W) {
+		if (pos < cs->hwtemps[idx].scalar_lastread)
+			pos = cs->hwtemps[idx].scalar_lastread;
+	}
+
+	return pos;
+}
+
+
+/**
+ * Allocates a slot for an ALU instruction that can consist of
+ * a vertex part or a scalar part or both.
+ *
+ * Sources from src (src[0] to src[argc-1]) are added to the slot in the
+ * appropriate position (vector and/or scalar), and their positions are
+ * recorded in the srcpos array.
+ *
+ * This function emits instruction code for the source fetch and the
+ * argument selection. It does not emit instruction code for the
+ * opcode or the destination selection.
+ *
+ * @return the index of the slot
+ */
+static int find_and_prepare_slot(struct r300_fragment_program* rp,
+		GLboolean emit_vop,
+		GLboolean emit_sop,
+		int argc,
+		GLuint* src,
+		GLuint dest,
+		int mask)
+{
+	COMPILE_STATE;
+	int hwsrc[3];
+	int srcpos[3];
+	unsigned int used;
+	int tempused;
+	int tempvsrc[3];
+	int tempssrc[3];
+	int pos;
+	int regnr;
+	int i,j;
+
+	// Determine instruction slots, whether sources are required on
+	// vector or scalar side, and the smallest slot number where
+	// all source registers are available
+	used = 0;
+	if (emit_vop)
+		used |= SLOT_OP_VECTOR;
+	if (emit_sop)
+		used |= SLOT_OP_SCALAR;
+
+	pos = get_earliest_allowed_write(rp, dest, mask);
+
+	if (rp->node[rp->cur_node].alu_offset > pos)
+		pos = rp->node[rp->cur_node].alu_offset;
+	for(i = 0; i < argc; ++i) {
+		if (!REG_GET_BUILTIN(src[i])) {
+			if (emit_vop)
+				used |= v_swiz[REG_GET_VSWZ(src[i])].flags << i;
+			if (emit_sop)
+				used |= s_swiz[REG_GET_SSWZ(src[i])].flags << i;
+		}
+
+		hwsrc[i] = t_hw_src(rp, src[i], GL_FALSE); /* Note: sideeffects wrt refcounting! */
+		regnr = hwsrc[i] & 31;
+
+		if (REG_GET_TYPE(src[i]) == REG_TYPE_TEMP) {
+			if (used & (SLOT_SRC_VECTOR << i)) {
+				if (cs->hwtemps[regnr].vector_valid > pos)
+					pos = cs->hwtemps[regnr].vector_valid;
+			}
+			if (used & (SLOT_SRC_SCALAR << i)) {
+				if (cs->hwtemps[regnr].scalar_valid > pos)
+					pos = cs->hwtemps[regnr].scalar_valid;
+			}
 		}
 	}
 
-	/* Look for free spaces */
-	for (i=0,csm=srcmask; i<3; i++,csm=csm<<1) {
-		/* If the position(s) haven't been allocated */
-		if ((cs->slot[pos].umask & csm) == 0) {
-			cs->slot[pos].umask |= csm;
+	// Find a slot that fits
+	for(; ; ++pos) {
+		if (cs->slot[pos].used & used & SLOT_OP_BOTH)
+			continue;
 
-			if (srcmask & SLOT_VECTOR)
-				cs->slot[pos].vsrc[i] = reg;
-			if (srcmask & SLOT_SCALAR)
-				cs->slot[pos].ssrc[i] = reg;
-			return i;
-		}	
+		if (pos >= cs->nrslots) {
+			if (cs->nrslots >= PFS_MAX_ALU_INST) {
+				ERROR("Out of ALU instruction slots\n");
+				return -1;
+			}
+
+			rp->alu.inst[pos].inst0 = NOP_INST0;
+			rp->alu.inst[pos].inst1 = NOP_INST1;
+			rp->alu.inst[pos].inst2 = NOP_INST2;
+			rp->alu.inst[pos].inst3 = NOP_INST3;
+
+			cs->nrslots++;
+		}
+
+		// Note: When we need both parts (vector and scalar) of a source,
+		// we always try to put them into the same position. This makes the
+		// code easier to read, and it is optimal (i.e. one doesn't gain
+		// anything by splitting the parts).
+		// It also avoids headaches with swizzles that access both parts (i.e WXY)
+		tempused = cs->slot[pos].used;
+		for(i = 0; i < 3; ++i) {
+			tempvsrc[i] = cs->slot[pos].vsrc[i];
+			tempssrc[i] = cs->slot[pos].ssrc[i];
+		}
+
+		for(i = 0; i < argc; ++i) {
+			int flags = (used >> i) & SLOT_SRC_BOTH;
+
+			if (!flags) {
+				srcpos[i] = 0;
+				continue;
+			}
+
+			for(j = 0; j < 3; ++j) {
+				if ((tempused >> j) & flags & SLOT_SRC_VECTOR) {
+					if (tempvsrc[j] != hwsrc[i])
+						continue;
+				}
+
+				if ((tempused >> j) & flags & SLOT_SRC_SCALAR) {
+					if (tempssrc[j] != hwsrc[i])
+						continue;
+				}
+
+				break;
+			}
+
+			if (j == 3)
+				break;
+
+			srcpos[i] = j;
+			tempused |= flags << j;
+			if (flags & SLOT_SRC_VECTOR)
+				tempvsrc[j] = hwsrc[i];
+			if (flags & SLOT_SRC_SCALAR)
+				tempssrc[j] = hwsrc[i];
+		}
+
+		if (i == argc)
+			break;
 	}
-	
-	//ERROR("Failed to allocate sources in FPI1/FPI3!\n");
-	return 0;
-}
 
-/* Determine whether or not to position opcode in the same ALU slot for both
- * vector and scalar portions of an instruction.
- *
- * It's not necessary to force the first case, but it makes disassembled
- * shaders easier to read.
- */
-static GLboolean force_same_slot(int vop,
-				 int sop,
-				 GLboolean emit_vop,
-				 GLboolean emit_sop,
-				 int argc,
-				 GLuint *src)
-{
-	int i;
+	// Found a slot, reserve it
+	cs->slot[pos].used = tempused | (used & SLOT_OP_BOTH);
+	for(i = 0; i < 3; ++i) {
+		cs->slot[pos].vsrc[i] = tempvsrc[i];
+		cs->slot[pos].ssrc[i] = tempssrc[i];
+	}
 
-	if (emit_vop && emit_sop)
-		return GL_TRUE;
+	for(i = 0; i < argc; ++i) {
+		if (REG_GET_TYPE(src[i]) == REG_TYPE_TEMP) {
+			int regnr = hwsrc[i] & 31;
 
-	if (emit_vop && vop == R300_FPI0_OUTC_REPL_ALPHA)
-		return GL_TRUE;
+			if (used & (SLOT_SRC_VECTOR << i)) {
+				if (cs->hwtemps[regnr].vector_lastread < pos)
+					cs->hwtemps[regnr].vector_lastread = pos;
+			}
+			if (used & (SLOT_SRC_SCALAR << i)) {
+				if (cs->hwtemps[regnr].scalar_lastread < pos)
+					cs->hwtemps[regnr].scalar_lastread = pos;
+			}
+		}
+	}
 
+	// Emit the source fetch code
+	rp->alu.inst[pos].inst1 &= ~R300_FPI1_SRC_MASK;
+	rp->alu.inst[pos].inst1 |=
+			((cs->slot[pos].vsrc[0] << R300_FPI1_SRC0C_SHIFT) |
+			 (cs->slot[pos].vsrc[1] << R300_FPI1_SRC1C_SHIFT) |
+			 (cs->slot[pos].vsrc[2] << R300_FPI1_SRC2C_SHIFT));
+
+	rp->alu.inst[pos].inst3 &= ~R300_FPI3_SRC_MASK;
+	rp->alu.inst[pos].inst3 |=
+			((cs->slot[pos].ssrc[0] << R300_FPI3_SRC0A_SHIFT) |
+			 (cs->slot[pos].ssrc[1] << R300_FPI3_SRC1A_SHIFT) |
+			 (cs->slot[pos].ssrc[2] << R300_FPI3_SRC2A_SHIFT));
+
+	// Emit the argument selection code
 	if (emit_vop) {
-		for (i=0;i<argc;i++)
-			if (REG_GET_VSWZ(src[i]) == SWIZZLE_WZY)
-				return GL_TRUE;
+		int swz[3];
+
+		for(i = 0; i < 3; ++i) {
+			if (i < argc) {
+				swz[i] = (v_swiz[REG_GET_VSWZ(src[i])].base +
+				            (srcpos[i] * v_swiz[REG_GET_VSWZ(src[i])].stride)) |
+					((src[i] & REG_NEGV_MASK) ? ARG_NEG : 0) |
+					((src[i] & REG_ABS_MASK) ? ARG_ABS : 0);
+			} else {
+				swz[i] = R300_FPI0_ARGC_ZERO;
+			}
+		}
+
+		rp->alu.inst[pos].inst0 &=
+				~(R300_FPI0_ARG0C_MASK|R300_FPI0_ARG1C_MASK|R300_FPI0_ARG2C_MASK);
+		rp->alu.inst[pos].inst0 |=
+				(swz[0] << R300_FPI0_ARG0C_SHIFT) |
+				(swz[1] << R300_FPI0_ARG1C_SHIFT) |
+				(swz[2] << R300_FPI0_ARG2C_SHIFT);
 	}
 
-	return GL_FALSE;
+	if (emit_sop) {
+		int swz[3];
+
+		for(i = 0; i < 3; ++i) {
+			if (i < argc) {
+				swz[i] = (s_swiz[REG_GET_SSWZ(src[i])].base +
+						(srcpos[i] * s_swiz[REG_GET_SSWZ(src[i])].stride)) |
+						((src[i] & REG_NEGV_MASK) ? ARG_NEG : 0) |
+						((src[i] & REG_ABS_MASK) ? ARG_ABS : 0);
+			} else {
+				swz[i] = R300_FPI2_ARGA_ZERO;
+			}
+		}
+
+		rp->alu.inst[pos].inst2 &=
+				~(R300_FPI2_ARG0A_MASK|R300_FPI2_ARG1A_MASK|R300_FPI2_ARG2A_MASK);
+		rp->alu.inst[pos].inst2 |=
+				(swz[0] << R300_FPI2_ARG0A_SHIFT) |
+				(swz[1] << R300_FPI2_ARG1A_SHIFT) |
+				(swz[2] << R300_FPI2_ARG2A_SHIFT);
+	}
+
+	return pos;
 }
 
+
+/**
+ * Append an ALU instruction to the instruction list.
+ */
 static void emit_arith(struct r300_fragment_program *rp,
 		       int op,
 		       GLuint dest,
@@ -1043,145 +1289,81 @@
 {
 	COMPILE_STATE;
 	GLuint src[3] = { src0, src1, src2 };
-	int hwsrc[3], sswz[3], vswz[3];
 	int hwdest;
-	GLboolean emit_vop = GL_FALSE, emit_sop = GL_FALSE;
+	GLboolean emit_vop, emit_sop;
 	int vop, sop, argc;
-	int vpos, spos;
-	int i;
+	int pos;
 
 	vop = r300_fpop[op].v_op;
 	sop = r300_fpop[op].s_op;
 	argc = r300_fpop[op].argc;
 
+	if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT &&
+	    REG_GET_INDEX(dest) == FRAG_RESULT_DEPR) {
+		if (mask & WRITEMASK_Z) {
+			mask = WRITEMASK_W;
+		} else {
+			return;
+		}
+	}
+
+	emit_vop = GL_FALSE;
+	emit_sop = GL_FALSE;
 	if ((mask & WRITEMASK_XYZ) || vop == R300_FPI0_OUTC_DP3)
 		emit_vop = GL_TRUE;
 	if ((mask & WRITEMASK_W) || vop == R300_FPI0_OUTC_REPL_ALPHA)
 		emit_sop = GL_TRUE;
 
-	if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT &&
-	    REG_GET_INDEX(dest) == FRAG_RESULT_DEPR)
-		emit_vop = GL_FALSE;
-					
-	if (force_same_slot(vop, sop, emit_vop, emit_sop, argc, src)) {
-		vpos = spos = MAX2(cs->v_pos, cs->s_pos);
-	} else {
-		vpos = cs->v_pos;
-		spos = cs->s_pos;
-		/* Here is where we'd decide on where a safe place is to
-		 * combine this instruction with a previous one.
-		 *
-		 * This is extremely simple for now.. if a source depends
-		 * on the opposite stream, force the same instruction.
-		 */
-		for (i=0;i<3;i++) {
-			if (emit_vop &&
-			    (v_swiz[REG_GET_VSWZ(src[i])].flags & SLOT_SCALAR)) {
-				vpos = spos = MAX2(vpos, spos);
-				break;
-			}
-			if (emit_sop &&
-			    (s_swiz[REG_GET_SSWZ(src[i])].flags & SLOT_VECTOR)) {
-				vpos = spos = MAX2(vpos, spos);
-				break;
-			}
-		}
-	}
-	
-	/* - Convert src->hwsrc, record for FPI1/FPI3
-	 * - Determine ARG parts of FPI0/FPI2, unused args are filled
-	 *   with ARG_ZERO.
-	 */	
-	for (i=0;i<3;i++) {
-		int srcpos;
-		
-		if (i >= argc) {
-			vswz[i] = R300_FPI0_ARGC_ZERO;
-			sswz[i] = R300_FPI2_ARGA_ZERO;
-			continue;
-		}
-		
-		hwsrc[i] = t_hw_src(rp, src[i], GL_FALSE);	
+	pos = find_and_prepare_slot(rp, emit_vop, emit_sop, argc, src, dest, mask);
+	if (pos < 0)
+		return;
 
-		if (emit_vop && vop != R300_FPI0_OUTC_REPL_ALPHA) {
-			srcpos = add_src(rp, hwsrc[i], vpos,
-					 v_swiz[REG_GET_VSWZ(src[i])].flags);
-			vswz[i] = (v_swiz[REG_GET_VSWZ(src[i])].base +
-				   (srcpos *
-				    v_swiz[REG_GET_VSWZ(src[i])].stride)) |
-				((src[i] & REG_NEGV_MASK) ? ARG_NEG : 0) |
-				((src[i] & REG_ABS_MASK) ? ARG_ABS : 0);
-		} else vswz[i] = R300_FPI0_ARGC_ZERO;
-		
-		if (emit_sop) {
-			srcpos = add_src(rp, hwsrc[i], spos,
-					 s_swiz[REG_GET_SSWZ(src[i])].flags);
-			sswz[i] = (s_swiz[REG_GET_SSWZ(src[i])].base +
-				   (srcpos *
-				    s_swiz[REG_GET_SSWZ(src[i])].stride)) |
-				((src[i] & REG_NEGS_MASK) ? ARG_NEG : 0) |
-				((src[i] & REG_ABS_MASK) ? ARG_ABS : 0);
-		} else sswz[i] = R300_FPI2_ARGA_ZERO;
-	}
-	hwdest = t_hw_dst(rp, dest, GL_FALSE);
-	
+	hwdest = t_hw_dst(rp, dest, GL_FALSE, pos); /* Note: Side effects wrt register allocation */
+
 	if (flags & PFS_FLAG_SAT) {
 		vop |= R300_FPI0_OUTC_SAT;
 		sop |= R300_FPI2_OUTA_SAT;
 	}
 
 	/* Throw the pieces together and get FPI0/1 */
-	rp->alu.inst[vpos].inst1 =
-			((cs->slot[vpos].vsrc[0] << R300_FPI1_SRC0C_SHIFT) |
-			 (cs->slot[vpos].vsrc[1] << R300_FPI1_SRC1C_SHIFT) |
-			 (cs->slot[vpos].vsrc[2] << R300_FPI1_SRC2C_SHIFT));
 	if (emit_vop) {
-		rp->alu.inst[vpos].inst0 = vop |
-				(vswz[0] << R300_FPI0_ARG0C_SHIFT) |
-				(vswz[1] << R300_FPI0_ARG1C_SHIFT) |
-				(vswz[2] << R300_FPI0_ARG2C_SHIFT);
+		rp->alu.inst[pos].inst0 |= vop;
 
-		rp->alu.inst[vpos].inst1 |= hwdest << R300_FPI1_DSTC_SHIFT;
+		rp->alu.inst[pos].inst1 |= hwdest << R300_FPI1_DSTC_SHIFT;
+
 		if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) {
 			if (REG_GET_INDEX(dest) == FRAG_RESULT_COLR) {
-				rp->alu.inst[vpos].inst1 |=
+				rp->alu.inst[pos].inst1 |=
 					(mask & WRITEMASK_XYZ) << R300_FPI1_DSTC_OUTPUT_MASK_SHIFT;
 			} else assert(0);
 		} else {
-			rp->alu.inst[vpos].inst1 |=
+			rp->alu.inst[pos].inst1 |=
 					(mask & WRITEMASK_XYZ) << R300_FPI1_DSTC_REG_MASK_SHIFT;
+
+			cs->hwtemps[hwdest].vector_valid = pos+1;
 		}
-		cs->v_pos = vpos+1;
-	} else if (spos >= vpos)
-		rp->alu.inst[spos].inst0 = NOP_INST0;
+	}
 
 	/* And now FPI2/3 */
-	rp->alu.inst[spos].inst3 =
-			((cs->slot[spos].ssrc[0] << R300_FPI3_SRC0A_SHIFT) |
-			 (cs->slot[spos].ssrc[1] << R300_FPI3_SRC1A_SHIFT) |
-			 (cs->slot[spos].ssrc[2] << R300_FPI3_SRC2A_SHIFT));
 	if (emit_sop) {
-		rp->alu.inst[spos].inst2 = sop |
-				sswz[0] << R300_FPI2_ARG0A_SHIFT |
-				sswz[1] << R300_FPI2_ARG1A_SHIFT |
-				sswz[2] << R300_FPI2_ARG2A_SHIFT;
+		rp->alu.inst[pos].inst2 |= sop;
 
 		if (mask & WRITEMASK_W) {
 			if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) {
 				if (REG_GET_INDEX(dest) == FRAG_RESULT_COLR) {
-					rp->alu.inst[spos].inst3 |= 
+					rp->alu.inst[pos].inst3 |=
 							(hwdest << R300_FPI3_DSTA_SHIFT) | R300_FPI3_DSTA_OUTPUT;
 				} else if (REG_GET_INDEX(dest) == FRAG_RESULT_DEPR) {
-					rp->alu.inst[spos].inst3 |= R300_FPI3_DSTA_DEPTH;
+					rp->alu.inst[pos].inst3 |= R300_FPI3_DSTA_DEPTH;
 				} else assert(0);
 			} else {
-				rp->alu.inst[spos].inst3 |=
+				rp->alu.inst[pos].inst3 |=
 						(hwdest << R300_FPI3_DSTA_SHIFT) | R300_FPI3_DSTA_REG;
+
+				cs->hwtemps[hwdest].scalar_valid = pos+1;
 			}
 		}
-		cs->s_pos = spos+1;
-	} else if (vpos >= spos)
-		rp->alu.inst[vpos].inst2 = NOP_INST2;
+	}
 
 	return;
 }
@@ -1204,34 +1386,130 @@
 }
 #endif
 
-static void make_sin_const(struct r300_fragment_program *rp)
+static GLfloat SinCosConsts[2][4] = {
+	{
+		1.273239545,  // 4/PI
+		-0.405284735, // -4/(PI*PI)
+		3.141592654,  // PI
+		0.2225        // weight
+	},
+	{
+		0.75,
+		0.0,
+		0.159154943,  // 1/(2*PI)
+		6.283185307   // 2*PI
+	}
+};
+
+
+/**
+ * Emit a LIT instruction.
+ * \p flags may be PFS_FLAG_SAT
+ *
+ * Definition of LIT (from ARB_fragment_program):
+ * tmp = VectorLoad(op0);
+ * if (tmp.x < 0) tmp.x = 0;
+ * if (tmp.y < 0) tmp.y = 0;
+ * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon);
+ * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon;
+ * result.x = 1.0;
+ * result.y = tmp.x;
+ * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0;
+ * result.w = 1.0;
+ *
+ * The longest path of computation is the one leading to result.z,
+ * consisting of 5 operations. This implementation of LIT takes
+ * 5 slots. So unless there's some special undocumented opcode,
+ * this implementation is potentially optimal. Unfortunately,
+ * emit_arith is a bit too conservative because it doesn't understand
+ * partial writes to the vector component.
+ */
+static const GLfloat LitConst[4] = { 127.999999, 127.999999, 127.999999, -127.999999 };
+
+static void emit_lit(struct r300_fragment_program *rp,
+		GLuint dest,
+		int mask,
+		GLuint src,
+		int flags)
 {
-	if(rp->const_sin[0] == -1){
-	    GLfloat cnstv[4];
+	COMPILE_STATE;
+	GLuint cnst;
+	int needTemporary;
+	GLuint temp;
 
-	    cnstv[0] = 1.273239545; // 4/PI
-	    cnstv[1] =-0.405284735; // -4/(PI*PI)
-	    cnstv[2] = 3.141592654; // PI
-	    cnstv[3] = 0.2225;      // weight
-	    rp->const_sin[0] = emit_const4fv(rp, cnstv);
+	cnst = emit_const4fv(rp, LitConst);
 
-	    cnstv[0] = 0.75;
-	    cnstv[1] = 0.0;
-	    cnstv[2] = 0.159154943; // 1/(2*PI)
-	    cnstv[3] = 6.283185307; // 2*PI
-	    rp->const_sin[1] = emit_const4fv(rp, cnstv);
+	needTemporary = 0;
+	if ((mask & WRITEMASK_XYZW) != WRITEMASK_XYZW) {
+		needTemporary = 1;
+	} else if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) {
+		// LIT is typically followed by DP3/DP4, so there's no point
+		// in creating special code for this case
+		needTemporary = 1;
+	}
+
+	if (needTemporary) {
+		temp = keep(get_temp_reg(rp));
+	} else {
+		temp = keep(dest);
+	}
+
+	// Note: The order of emit_arith inside the slots is relevant,
+	// because emit_arith only looks at scalar vs. vector when resolving
+	// dependencies, and it does not consider individual vector components,
+	// so swizzling between the two parts can create fake dependencies.
+
+	// First slot
+	emit_arith(rp, PFS_OP_MAX, temp, WRITEMASK_XY,
+	           keep(src), pfs_zero, undef, 0);
+	emit_arith(rp, PFS_OP_MAX, temp, WRITEMASK_W,
+	           src, cnst, undef, 0);
+
+	// Second slot
+	emit_arith(rp, PFS_OP_MIN, temp, WRITEMASK_Z,
+	           swizzle(temp, W, W, W, W), cnst, undef, 0);
+	emit_arith(rp, PFS_OP_LG2, temp, WRITEMASK_W,
+	           swizzle(temp, Y, Y, Y, Y), undef, undef, 0);
+
+	// Third slot
+	// If desired, we saturate the y result here.
+	// This does not affect the use as a condition variable in the CMP later
+	emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_W,
+	           temp, swizzle(temp, Z, Z, Z, Z), pfs_zero, 0);
+	emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_Y,
+	           swizzle(temp, X, X, X, X), pfs_one, pfs_zero, flags);
+
+	// Fourth slot
+	emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X,
+	           pfs_one, pfs_one, pfs_zero, 0);
+	emit_arith(rp, PFS_OP_EX2, temp, WRITEMASK_W,
+	           temp, undef, undef, 0);
+
+	// Fifth slot
+	emit_arith(rp, PFS_OP_CMP, temp, WRITEMASK_Z,
+	           pfs_zero, swizzle(temp, W, W, W, W), negate(swizzle(temp, Y, Y, Y, Y)), flags);
+	emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_W,
+	           pfs_one, pfs_one, pfs_zero, 0);
+
+	if (needTemporary) {
+		emit_arith(rp, PFS_OP_MAD, dest, mask,
+			           temp, pfs_one, pfs_zero, flags);
+		free_temp(rp, temp);
+	} else {
+		// Decrease refcount of the destination
+		t_hw_dst(rp, dest, GL_FALSE, cs->nrslots);
 	}
 }
 
+
 static GLboolean parse_program(struct r300_fragment_program *rp)
-{	
+{
 	struct gl_fragment_program *mp = &rp->mesa_program;
 	const struct prog_instruction *inst = mp->Base.Instructions;
 	struct prog_instruction *fpi;
 	GLuint src[3], dest, temp[2];
-	GLuint cnst;
 	int flags, mask = 0;
-	GLfloat cnstv[4] = {0.0, 0.0, 0.0, 0.0};
+	int const_sin[2];
 
 	if (!inst || inst[0].Opcode == OPCODE_END) {
 		ERROR("empty program?\n");
@@ -1284,15 +1562,16 @@
 			 *   result = sin(x)
 			 */
 			temp[0] = get_temp_reg(rp);
-			make_sin_const(rp);
+			const_sin[0] = emit_const4fv(rp, SinCosConsts[0]);
+			const_sin[1] = emit_const4fv(rp, SinCosConsts[1]);
 			src[0] = t_scalar_src(rp, fpi->SrcReg[0]);
 
 			/* add 0.5*PI and do range reduction */
 
 			emit_arith(rp, PFS_OP_MAD, temp[0], WRITEMASK_X,
 				   swizzle(src[0], X, X, X, X),
-				   swizzle(rp->const_sin[1], Z, Z, Z, Z),
-				   swizzle(rp->const_sin[1], X, X, X, X),
+				   swizzle(const_sin[1], Z, Z, Z, Z),
+				   swizzle(const_sin[1], X, X, X, X),
 				   0);
 
 			emit_arith(rp, PFS_OP_FRC, temp[0], WRITEMASK_X,
@@ -1303,15 +1582,15 @@
 
 			emit_arith(rp, PFS_OP_MAD, temp[0], WRITEMASK_Z,
 				   swizzle(temp[0], X, X, X, X),
-				   swizzle(rp->const_sin[1], W, W, W, W), //2*PI
-				   negate(swizzle(rp->const_sin[0], Z, Z, Z, Z)), //-PI
+				   swizzle(const_sin[1], W, W, W, W), //2*PI
+				   negate(swizzle(const_sin[0], Z, Z, Z, Z)), //-PI
 				   0);
 
 			/* SIN */
 
 			emit_arith(rp, PFS_OP_MAD, temp[0], WRITEMASK_X | WRITEMASK_Y,
 				   swizzle(temp[0], Z, Z, Z, Z),
-				   rp->const_sin[0],
+				   const_sin[0],
 				   pfs_zero,
 				   0);
 
@@ -1320,7 +1599,7 @@
 				   absolute(swizzle(temp[0], Z, Z, Z, Z)),
 				   swizzle(temp[0], X, X, X, X),
 				   0);
-			
+
 			emit_arith(rp, PFS_OP_MAD, temp[0], WRITEMASK_Y,
 				   swizzle(temp[0], X, X, X, X),
 				   absolute(swizzle(temp[0], X, X, X, X)),
@@ -1330,7 +1609,7 @@
 
 	    		emit_arith(rp, PFS_OP_MAD, dest, mask,
 				   swizzle(temp[0], Y, Y, Y, Y),
-				   swizzle(rp->const_sin[0], W, W, W, W),
+				   swizzle(const_sin[0], W, W, W, W),
 				   swizzle(temp[0], X, X, X, X),
 				   flags);
 
@@ -1364,12 +1643,12 @@
 				   0);
 			emit_arith(rp, PFS_OP_DP4, dest, mask,
 				   temp[0], src[1], undef,
-				   flags);	
+				   flags);
 			free_temp(rp, temp[0]);
 #else
 			emit_arith(rp, PFS_OP_DP4, dest, mask,
 				   swizzle(src[0], X, Y, Z, ONE), src[1],
-				   undef, flags);	
+				   undef, flags);
 #endif
 			break;
 		case OPCODE_DST:
@@ -1400,7 +1679,7 @@
 				   src[0], undef, undef,
 				   flags);
 			break;
-		case OPCODE_FLR:		
+		case OPCODE_FLR:
 			src[0] = t_src(rp, fpi->SrcReg[0]);
 			temp[0] = get_temp_reg(rp);
 			/* FRC temp, src0
@@ -1430,66 +1709,8 @@
 				   flags);
 			break;
 		case OPCODE_LIT:
-			/* LIT
-			 * if (s.x < 0) t.x = 0; else t.x = s.x;
-			 * if (s.y < 0) t.y = 0; else t.y = s.y;
-			 * if (s.w >  128.0) t.w =  128.0; else t.w = s.w;
-			 * if (s.w < -128.0) t.w = -128.0; else t.w = s.w;
-			 * r.x = 1.0
-			 * if (t.x > 0) r.y = pow(t.y, t.w); else r.y = 0;
-			 * Also r.y = 0 if t.y < 0
-			 * For the t.x > 0 FGLRX use the CMPH opcode which
-			 * change the compare to (t.x + 0.5) > 0.5 we may
-			 * save one instruction by doing CMP -t.x 
-			 */
-			cnstv[0] = cnstv[1] = cnstv[2] = cnstv[3] = 0.50001;
 			src[0] = t_src(rp, fpi->SrcReg[0]);
-			temp[0] = get_temp_reg(rp);
-			cnst = emit_const4fv(rp, cnstv);
-			emit_arith(rp, PFS_OP_CMP, temp[0],
-				   WRITEMASK_X | WRITEMASK_Y,
-				   src[0], pfs_zero, src[0], flags);
-			emit_arith(rp, PFS_OP_MIN, temp[0], WRITEMASK_Z,
-				   swizzle(keep(src[0]), W, W, W, W),
-				   cnst, undef, flags);
-			emit_arith(rp, PFS_OP_LG2, temp[0], WRITEMASK_W,
-				   swizzle(temp[0], Y, Y, Y, Y),
-				   undef, undef, flags);
-			emit_arith(rp, PFS_OP_MAX, temp[0], WRITEMASK_Z,
-				   temp[0], negate(cnst), undef, flags);
-			emit_arith(rp, PFS_OP_MAD, temp[0], WRITEMASK_W,
-				   temp[0], swizzle(temp[0], Z, Z, Z, Z),
-				   pfs_zero, flags);
-			emit_arith(rp, PFS_OP_EX2, temp[0], WRITEMASK_W,
-				   temp[0], undef, undef, flags);
-			emit_arith(rp, PFS_OP_MAD, dest, WRITEMASK_Y,
-				   swizzle(keep(temp[0]), X, X, X, X),
-				   pfs_one, pfs_zero, flags);
-#if 0
-			emit_arith(rp, PFS_OP_MAD, temp[0], WRITEMASK_X,
-				   temp[0], pfs_one, pfs_half, flags);
-			emit_arith(rp, PFS_OP_CMPH, temp[0], WRITEMASK_Z,
-				   swizzle(keep(temp[0]), W, W, W, W),
-				   pfs_zero, swizzle(keep(temp[0]), X, X, X, X),
-				   flags);
-#else
-			emit_arith(rp, PFS_OP_CMP, temp[0], WRITEMASK_Z,
-				   pfs_zero,
-				   swizzle(keep(temp[0]), W, W, W, W),
-				   negate(swizzle(keep(temp[0]), X, X, X, X)),
-				   flags);
-#endif
-			emit_arith(rp, PFS_OP_CMP, dest, WRITEMASK_Z,
-				   pfs_zero, temp[0],
-				   negate(swizzle(keep(temp[0]), Y, Y, Y, Y)),
-				   flags);
-			emit_arith(rp, PFS_OP_MAD, dest,
-				   WRITEMASK_X | WRITEMASK_W,
-				   pfs_one,
-				   pfs_one,
-				   pfs_zero,
-				   flags);
-			free_temp(rp, temp[0]);
+			emit_lit(rp, dest, mask, src[0], flags);
 			break;
 		case OPCODE_LRP:
 			src[0] = t_src(rp, fpi->SrcReg[0]);
@@ -1508,7 +1729,7 @@
 				   src[0], src[1], temp[0],
 				   flags);
 			free_temp(rp, temp[0]);
-			break;			
+			break;
 		case OPCODE_MAD:
 			src[0] = t_src(rp, fpi->SrcReg[0]);
 			src[1] = t_src(rp, fpi->SrcReg[1]);
@@ -1535,7 +1756,7 @@
 		case OPCODE_SWZ:
 			src[0] = t_src(rp, fpi->SrcReg[0]);
 			emit_arith(rp, PFS_OP_MAD, dest, mask,
-				   src[0], pfs_one, pfs_zero, 
+				   src[0], pfs_one, pfs_zero,
 				   flags);
 			break;
 		case OPCODE_MUL:
@@ -1548,7 +1769,7 @@
 		case OPCODE_POW:
 			src[0] = t_scalar_src(rp, fpi->SrcReg[0]);
 			src[1] = t_scalar_src(rp, fpi->SrcReg[1]);
-			temp[0] = get_temp_reg(rp);	
+			temp[0] = get_temp_reg(rp);
 			emit_arith(rp, PFS_OP_LG2, temp[0], WRITEMASK_W,
 				   src[0], undef, undef,
 				   0);
@@ -1582,19 +1803,20 @@
 			 */
 			temp[0] = get_temp_reg(rp);
 			temp[1] = get_temp_reg(rp);
-			make_sin_const(rp);
+			const_sin[0] = emit_const4fv(rp, SinCosConsts[0]);
+			const_sin[1] = emit_const4fv(rp, SinCosConsts[1]);
 			src[0] = t_scalar_src(rp, fpi->SrcReg[0]);
 
 			/* x = -abs(x)+0.5*PI */
 			emit_arith(rp, PFS_OP_MAD, temp[0], WRITEMASK_Z,
-				   swizzle(rp->const_sin[0], Z, Z, Z, Z), //PI
+				   swizzle(const_sin[0], Z, Z, Z, Z), //PI
 				   pfs_half,
 				   negate(abs(swizzle(keep(src[0]), X, X, X, X))),
 				   0);
 
 			/* C*x (sin) */
 			emit_arith(rp, PFS_OP_MAD, temp[0], WRITEMASK_W,
-				   swizzle(rp->const_sin[0], Y, Y, Y, Y),
+				   swizzle(const_sin[0], Y, Y, Y, Y),
 				   swizzle(keep(src[0]), X, X, X, X),
 				   pfs_zero,
 				   0);
@@ -1602,13 +1824,13 @@
 			/* B*x, C*x (cos) */
 			emit_arith(rp, PFS_OP_MAD, temp[0], WRITEMASK_X | WRITEMASK_Y,
 			           swizzle(temp[0], Z, Z, Z, Z),
-				   rp->const_sin[0],
+				   const_sin[0],
 			           pfs_zero,
 				   0);
 
 			/* B*x (sin) */
 			emit_arith(rp, PFS_OP_MAD, temp[1], WRITEMASK_W,
-				   swizzle(rp->const_sin[0], X, X, X, X),
+				   swizzle(const_sin[0], X, X, X, X),
 				   keep(src[0]),
 				   pfs_zero,
 				   0);
@@ -1638,7 +1860,7 @@
 			/* dest.xy = mad(temp.xy, P, temp2.wz) */
 			emit_arith(rp, PFS_OP_MAD, dest, mask & (WRITEMASK_X | WRITEMASK_Y),
 				   temp[0],
-				   swizzle(rp->const_sin[0], W, W, W, W),
+				   swizzle(const_sin[0], W, W, W, W),
 				   swizzle(temp[1], W, Z, Y, X),
 				   flags);
 
@@ -1669,7 +1891,8 @@
 			 */
 
 			temp[0] = get_temp_reg(rp);
-			make_sin_const(rp);
+			const_sin[0] = emit_const4fv(rp, SinCosConsts[0]);
+			const_sin[1] = emit_const4fv(rp, SinCosConsts[1]);
 			src[0] = t_scalar_src(rp, fpi->SrcReg[0]);
 
 
@@ -1677,7 +1900,7 @@
 
 			emit_arith(rp, PFS_OP_MAD, temp[0], WRITEMASK_X,
 				   swizzle(keep(src[0]), X, X, X, X),
-				   swizzle(rp->const_sin[1], Z, Z, Z, Z),
+				   swizzle(const_sin[1], Z, Z, Z, Z),
 				   pfs_half,
 				   0);
 
@@ -1689,15 +1912,15 @@
 
 			emit_arith(rp, PFS_OP_MAD, temp[0], WRITEMASK_Z,
 				   swizzle(temp[0], X, X, X, X),
-				   swizzle(rp->const_sin[1], W, W, W, W), //2*PI
-				   negate(swizzle(rp->const_sin[0], Z, Z, Z, Z)), //PI
+				   swizzle(const_sin[1], W, W, W, W), //2*PI
+				   negate(swizzle(const_sin[0], Z, Z, Z, Z)), //PI
 				   0);
 
 			/* SIN */
 
 			emit_arith(rp, PFS_OP_MAD, temp[0], WRITEMASK_X | WRITEMASK_Y,
 				   swizzle(temp[0], Z, Z, Z, Z),
-				   rp->const_sin[0],
+				   const_sin[0],
 				   pfs_zero,
 				   0);
 
@@ -1706,7 +1929,7 @@
 				   absolute(swizzle(temp[0], Z, Z, Z, Z)),
 				   swizzle(temp[0], X, X, X, X),
 				   0);
-			
+
 			emit_arith(rp, PFS_OP_MAD, temp[0], WRITEMASK_Y,
 				   swizzle(temp[0], X, X, X, X),
 				   absolute(swizzle(temp[0], X, X, X, X)),
@@ -1716,7 +1939,7 @@
 
 	    		emit_arith(rp, PFS_OP_MAD, dest, mask,
 				   swizzle(temp[0], Y, Y, Y, Y),
-				   swizzle(rp->const_sin[0], W, W, W, W),
+				   swizzle(const_sin[0], W, W, W, W),
 				   swizzle(temp[0], X, X, X, X),
 				   flags);
 
@@ -1763,7 +1986,7 @@
 				   swizzle(keep(src[1]), Y, Z, X, W),
 				   pfs_zero,
 				   0);
-			/* dest.xyz = src0.yzx * src1.zxy - temp 
+			/* dest.xyz = src0.yzx * src1.zxy - temp
 			 * dest.w	= undefined
 			 * */
 			emit_arith(rp, PFS_OP_MAD, dest, mask & WRITEMASK_XYZ,
@@ -1788,13 +2011,99 @@
 	return GL_TRUE;
 }
 
+static void insert_wpos(struct gl_program *prog)
+{
+	GLint tokens[6] = { STATE_INTERNAL, STATE_R300_WINDOW_DIMENSION, 0, 0, 0, 0 };
+	struct prog_instruction *fpi;
+	GLuint window_index;
+	int i = 0;
+	GLuint tempregi = prog->NumTemporaries;
+	/* should do something else if no temps left... */
+	prog->NumTemporaries++;
+
+	fpi = _mesa_alloc_instructions (prog->NumInstructions + 3);
+	_mesa_init_instructions (fpi, prog->NumInstructions + 3);
+
+	/* perspective divide */
+	fpi[i].Opcode = OPCODE_RCP;
+
+	fpi[i].DstReg.File = PROGRAM_TEMPORARY;
+	fpi[i].DstReg.Index = tempregi;
+	fpi[i].DstReg.WriteMask = WRITEMASK_W;
+	fpi[i].DstReg.CondMask = COND_TR;
+
+	fpi[i].SrcReg[0].File = PROGRAM_INPUT;
+	fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS;
+	fpi[i].SrcReg[0].Swizzle = SWIZZLE_WWWW;
+	i++;
+
+	fpi[i].Opcode = OPCODE_MUL;
+
+	fpi[i].DstReg.File = PROGRAM_TEMPORARY;
+	fpi[i].DstReg.Index = tempregi;
+	fpi[i].DstReg.WriteMask = WRITEMASK_XYZ;
+	fpi[i].DstReg.CondMask = COND_TR;
+
+	fpi[i].SrcReg[0].File = PROGRAM_INPUT;
+	fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS;
+	fpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
+
+	fpi[i].SrcReg[1].File = PROGRAM_TEMPORARY;
+	fpi[i].SrcReg[1].Index = tempregi;
+	fpi[i].SrcReg[1].Swizzle = SWIZZLE_WWWW;
+	i++;
+
+	/* viewport transformation */
+	window_index = _mesa_add_state_reference(prog->Parameters, tokens);
+
+	fpi[i].Opcode = OPCODE_MAD;
+
+	fpi[i].DstReg.File = PROGRAM_TEMPORARY;
+	fpi[i].DstReg.Index = tempregi;
+	fpi[i].DstReg.WriteMask = WRITEMASK_XYZ;
+	fpi[i].DstReg.CondMask = COND_TR;
+
+	fpi[i].SrcReg[0].File = PROGRAM_TEMPORARY;
+	fpi[i].SrcReg[0].Index = tempregi;
+	fpi[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
+
+	fpi[i].SrcReg[1].File = PROGRAM_STATE_VAR;
+	fpi[i].SrcReg[1].Index = window_index;
+	fpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
+
+	fpi[i].SrcReg[2].File = PROGRAM_STATE_VAR;
+	fpi[i].SrcReg[2].Index = window_index;
+	fpi[i].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
+	i++;
+
+	_mesa_copy_instructions (&fpi[i], prog->Instructions, prog->NumInstructions);
+
+	free(prog->Instructions);
+
+	prog->Instructions = fpi;
+
+	prog->NumInstructions += i;
+	fpi = &prog->Instructions[prog->NumInstructions-1];
+
+	assert(fpi->Opcode == OPCODE_END);
+
+	for(fpi = &prog->Instructions[3]; fpi->Opcode != OPCODE_END; fpi++){
+		for(i=0; i<3; i++)
+		    if( fpi->SrcReg[i].File == PROGRAM_INPUT &&
+			fpi->SrcReg[i].Index == FRAG_ATTRIB_WPOS ){
+			    fpi->SrcReg[i].File = PROGRAM_TEMPORARY;
+			    fpi->SrcReg[i].Index = tempregi;
+    		    }
+	}
+}
+
 /* - Init structures
  * - Determine what hwregs each input corresponds to
  */
 static void init_program(r300ContextPtr r300, struct r300_fragment_program *rp)
 {
 	struct r300_pfs_compile_state *cs = NULL;
-	struct gl_fragment_program *mp = &rp->mesa_program;	
+	struct gl_fragment_program *mp = &rp->mesa_program;
 	struct prog_instruction *fpi;
 	GLuint InputsRead = mp->Base.InputsRead;
 	GLuint temps_used = 0; /* for rp->temps[] */
@@ -1809,13 +2118,10 @@
 	rp->cur_node   = 0;
 	rp->first_node_has_tex = 0;
 	rp->const_nr   = 0;
-	rp->param_nr   = 0;
-	rp->params_uptodate = GL_FALSE;
 	rp->max_temp_idx = 0;
 	rp->node[0].alu_end = -1;
 	rp->node[0].tex_end = -1;
-	rp->const_sin[0] = -1;
-	
+
 	_mesa_memset(cs, 0, sizeof(*rp->cs));
 	for (i=0;i<PFS_MAX_ALU_INST;i++) {
 		for (j=0;j<3;j++) {
@@ -1823,7 +2129,7 @@
 			cs->slot[i].ssrc[j] = SRC_CONST;
 		}
 	}
-	
+
 	/* Work out what temps the Mesa inputs correspond to, this must match
 	 * what setup_rs_unit does, which shouldn't be a problem as rs_unit
 	 * configures itself based on the fragprog's InputsRead
@@ -1836,7 +2142,7 @@
 	for (i=0;i<rp->ctx->Const.MaxTextureUnits;i++) {
 		if (InputsRead & (FRAG_BIT_TEX0 << i)) {
 			cs->inputs[FRAG_ATTRIB_TEX0+i].refcount = 0;
-			cs->inputs[FRAG_ATTRIB_TEX0+i].reg = get_hw_temp(rp);
+			cs->inputs[FRAG_ATTRIB_TEX0+i].reg = get_hw_temp(rp, 0);
 		}
 	}
 	InputsRead &= ~FRAG_BITS_TEX_ANY;
@@ -1844,21 +2150,22 @@
 	/* fragment position treated as a texcoord */
 	if (InputsRead & FRAG_BIT_WPOS) {
 		cs->inputs[FRAG_ATTRIB_WPOS].refcount = 0;
-		cs->inputs[FRAG_ATTRIB_WPOS].reg = get_hw_temp(rp);
+		cs->inputs[FRAG_ATTRIB_WPOS].reg = get_hw_temp(rp, 0);
+		insert_wpos(&mp->Base);
 	}
 	InputsRead &= ~FRAG_BIT_WPOS;
 
 	/* Then primary colour */
 	if (InputsRead & FRAG_BIT_COL0) {
 		cs->inputs[FRAG_ATTRIB_COL0].refcount = 0;
-		cs->inputs[FRAG_ATTRIB_COL0].reg = get_hw_temp(rp);
+		cs->inputs[FRAG_ATTRIB_COL0].reg = get_hw_temp(rp, 0);
 	}
 	InputsRead &= ~FRAG_BIT_COL0;
-	
+
 	/* Secondary color */
 	if (InputsRead & FRAG_BIT_COL1) {
 		cs->inputs[FRAG_ATTRIB_COL1].refcount = 0;
-		cs->inputs[FRAG_ATTRIB_COL1].reg = get_hw_temp(rp);
+		cs->inputs[FRAG_ATTRIB_COL1].reg = get_hw_temp(rp, 0);
 	}
 	InputsRead &= ~FRAG_BIT_COL1;
 
@@ -1881,7 +2188,7 @@
 
 	for (fpi=mp->Base.Instructions;fpi->Opcode != OPCODE_END; fpi++) {
 		int idx;
-		
+
 		for (i=0;i<3;i++) {
 			idx = fpi->SrcReg[i].Index;
 			switch (fpi->SrcReg[i].File) {
@@ -1916,16 +2223,10 @@
 static void update_params(struct r300_fragment_program *rp)
 {
 	struct gl_fragment_program *mp = &rp->mesa_program;
-	int i;
 
 	/* Ask Mesa nicely to fill in ParameterValues for us */
-	if (rp->param_nr)
+	if (mp->Base.Parameters)
 		_mesa_load_state_parameters(rp->ctx, mp->Base.Parameters);
-
-	for (i=0;i<rp->param_nr;i++)
-		COPY_4V(rp->constant[rp->param[i].idx], rp->param[i].values);
-
-	rp->params_uptodate = GL_TRUE;
 }
 
 void r300_translate_fragment_shader(r300ContextPtr r300, struct r300_fragment_program *rp)
@@ -1933,7 +2234,7 @@
 	struct r300_pfs_compile_state *cs = NULL;
 
 	if (!rp->translated) {
-		
+
 		init_program(r300, rp);
 		cs = rp->cs;
 
@@ -1941,22 +2242,22 @@
 			dump_program(rp);
 			return;
 		}
-		
+
 		/* Finish off */
-		cs->v_pos = cs->s_pos = MAX2(cs->v_pos, cs->s_pos);
 		rp->node[rp->cur_node].alu_end =
-				cs->v_pos - rp->node[rp->cur_node].alu_offset - 1;
+				cs->nrslots - rp->node[rp->cur_node].alu_offset - 1;
 		if (rp->node[rp->cur_node].tex_end < 0)
 			rp->node[rp->cur_node].tex_end = 0;
 		rp->alu_offset = 0;
-		rp->alu_end    = cs->v_pos - 1;
+		rp->alu_end    = cs->nrslots - 1;
 		rp->tex_offset = 0;
 		rp->tex_end    = rp->tex.length ? rp->tex.length - 1 : 0;
 		assert(rp->node[rp->cur_node].alu_end >= 0);
 		assert(rp->alu_end >= 0);
-	
+
 		rp->translated = GL_TRUE;
-		if (0) dump_program(rp);
+		if (RADEON_DEBUG & DEBUG_PIXEL) dump_program(rp);
+		r300UpdateStateParameters(rp->ctx, _NEW_PROGRAM);
 	}
 
 	update_params(rp);
@@ -1965,11 +2266,11 @@
 /* just some random things... */
 static void dump_program(struct r300_fragment_program *rp)
 {
-	int i;
+	int n, i, j;
 	static int pc = 0;
 
 	fprintf(stderr, "pc=%d*************************************\n", pc++);
-			
+
 	fprintf(stderr, "Mesa program:\n");
 	fprintf(stderr, "-------------\n");
 		_mesa_print_program(&rp->mesa_program.Base);
@@ -1977,47 +2278,180 @@
 
 	fprintf(stderr, "Hardware program\n");
 	fprintf(stderr, "----------------\n");
-	
-	fprintf(stderr, "tex:\n");
-	
-	for(i=0;i<rp->tex.length;i++) {
-		fprintf(stderr, "%08x\n", rp->tex.inst[i]);
-	}
-	
-	for (i=0;i<(rp->cur_node+1);i++) {
+
+	for (n = 0; n < (rp->cur_node+1); n++) {
 		fprintf(stderr, "NODE %d: alu_offset: %d, tex_offset: %d, "\
-			"alu_end: %d, tex_end: %d\n", i,
-			rp->node[i].alu_offset,
-			rp->node[i].tex_offset,
-			rp->node[i].alu_end,
-			rp->node[i].tex_end);
+			"alu_end: %d, tex_end: %d\n", n,
+			rp->node[n].alu_offset,
+			rp->node[n].tex_offset,
+			rp->node[n].alu_end,
+			rp->node[n].tex_end);
+
+		if (rp->tex.length) {
+			fprintf(stderr, "  TEX:\n");
+			for(i = rp->node[n].tex_offset; i <= rp->node[n].tex_offset+rp->node[n].tex_end; ++i) {
+				const char* instr;
+
+				switch((rp->tex.inst[i] >> R300_FPITX_OPCODE_SHIFT) & 15) {
+				case R300_FPITX_OP_TEX:
+					instr = "TEX";
+					break;
+				case R300_FPITX_OP_KIL:
+					instr = "KIL";
+					break;
+				case R300_FPITX_OP_TXP:
+					instr = "TXP";
+					break;
+				case R300_FPITX_OP_TXB:
+					instr = "TXB";
+					break;
+				default:
+					instr = "UNKNOWN";
+				}
+
+				fprintf(stderr, "    %s t%i, %c%i, texture[%i]   (%08x)\n",
+						instr,
+						(rp->tex.inst[i] >> R300_FPITX_DST_SHIFT) & 31,
+						(rp->tex.inst[i] & R300_FPITX_SRC_CONST) ? 'c': 't',
+						(rp->tex.inst[i] >> R300_FPITX_SRC_SHIFT) & 31,
+						(rp->tex.inst[i] & R300_FPITX_IMAGE_MASK) >> R300_FPITX_IMAGE_SHIFT,
+						rp->tex.inst[i]);
+			}
+		}
+
+		for(i = rp->node[n].alu_offset; i <= rp->node[n].alu_offset+rp->node[n].alu_end; ++i) {
+			char srcc[3][10], dstc[20];
+			char srca[3][10], dsta[20];
+			char argc[3][20];
+			char arga[3][20];
+			char flags[5], tmp[10];
+
+			for(j = 0; j < 3; ++j) {
+				int regc = rp->alu.inst[i].inst1 >> (j*6);
+				int rega = rp->alu.inst[i].inst3 >> (j*6);
+
+				sprintf(srcc[j], "%c%i", (regc & 32) ? 'c' : 't', regc & 31);
+				sprintf(srca[j], "%c%i", (rega & 32) ? 'c' : 't', rega & 31);
+			}
+
+			dstc[0] = 0;
+			sprintf(flags, "%s%s%s",
+					(rp->alu.inst[i].inst1 & R300_FPI1_DSTC_REG_X) ? "x" : "",
+					(rp->alu.inst[i].inst1 & R300_FPI1_DSTC_REG_Y) ? "y" : "",
+					(rp->alu.inst[i].inst1 & R300_FPI1_DSTC_REG_Z) ? "z" : "");
+			if (flags[0] != 0) {
+				sprintf(dstc, "t%i.%s ",
+						(rp->alu.inst[i].inst1 >> R300_FPI1_DSTC_SHIFT) & 31,
+						flags);
+			}
+			sprintf(flags, "%s%s%s",
+					(rp->alu.inst[i].inst1 & R300_FPI1_DSTC_OUTPUT_X) ? "x" : "",
+					(rp->alu.inst[i].inst1 & R300_FPI1_DSTC_OUTPUT_Y) ? "y" : "",
+					(rp->alu.inst[i].inst1 & R300_FPI1_DSTC_OUTPUT_Z) ? "z" : "");
+			if (flags[0] != 0) {
+				sprintf(tmp, "o%i.%s",
+						(rp->alu.inst[i].inst1 >> R300_FPI1_DSTC_SHIFT) & 31,
+						flags);
+				strcat(dstc, tmp);
+			}
+
+			dsta[0] = 0;
+			if (rp->alu.inst[i].inst3 & R300_FPI3_DSTA_REG) {
+				sprintf(dsta, "t%i.w ", (rp->alu.inst[i].inst3 >> R300_FPI3_DSTA_SHIFT) & 31);
+			}
+			if (rp->alu.inst[i].inst3 & R300_FPI3_DSTA_OUTPUT) {
+				sprintf(tmp, "o%i.w ", (rp->alu.inst[i].inst3 >> R300_FPI3_DSTA_SHIFT) & 31);
+				strcat(dsta, tmp);
+			}
+			if (rp->alu.inst[i].inst3 & R300_FPI3_DSTA_DEPTH) {
+				strcat(dsta, "Z");
+			}
+
+			fprintf(stderr, "%3i: xyz: %3s %3s %3s -> %-20s (%08x)\n"
+			                "       w: %3s %3s %3s -> %-20s (%08x)\n",
+					i,
+					srcc[0], srcc[1], srcc[2], dstc, rp->alu.inst[i].inst1,
+					srca[0], srca[1], srca[2], dsta, rp->alu.inst[i].inst3);
+
+			for(j = 0; j < 3; ++j) {
+				int regc = rp->alu.inst[i].inst0 >> (j*7);
+				int rega = rp->alu.inst[i].inst2 >> (j*7);
+				int d;
+				char buf[20];
+
+				d = regc & 31;
+				if (d < 12) {
+					switch(d % 4) {
+						case R300_FPI0_ARGC_SRC0C_XYZ:
+							sprintf(buf, "%s.xyz", srcc[d / 4]);
+							break;
+						case R300_FPI0_ARGC_SRC0C_XXX:
+							sprintf(buf, "%s.xxx", srcc[d / 4]);
+							break;
+						case R300_FPI0_ARGC_SRC0C_YYY:
+							sprintf(buf, "%s.yyy", srcc[d / 4]);
+							break;
+						case R300_FPI0_ARGC_SRC0C_ZZZ:
+							sprintf(buf, "%s.zzz", srcc[d / 4]);
+							break;
+					}
+				} else if (d < 15) {
+					sprintf(buf, "%s.www", srca[d-12]);
+				} else if (d == 20) {
+					sprintf(buf, "0.0");
+				} else if (d == 21) {
+					sprintf(buf, "1.0");
+				} else if (d == 22) {
+					sprintf(buf, "0.5");
+				} else if (d >= 23 && d < 32) {
+					d -= 23;
+					switch(d/3) {
+						case 0:
+							sprintf(buf, "%s.yzx", srcc[d % 3]);
+							break;
+						case 1:
+							sprintf(buf, "%s.zxy", srcc[d % 3]);
+							break;
+						case 2:
+							sprintf(buf, "%s.Wzy", srcc[d % 3]);
+							break;
+					}
+				} else {
+					sprintf(buf, "%i", d);
+				}
+
+				sprintf(argc[j], "%s%s%s%s",
+						(regc & 32) ? "-" : "",
+						(regc & 64) ? "|" : "",
+						buf,
+						(regc & 64) ? "|" : "");
+
+				d = rega & 31;
+				if (d < 9) {
+					sprintf(buf, "%s.%c", srcc[d / 3], 'x' + (char)(d%3));
+				} else if (d < 12) {
+					sprintf(buf, "%s.w", srca[d-9]);
+				} else if (d == 16) {
+					sprintf(buf, "0.0");
+				} else if (d == 17) {
+					sprintf(buf, "1.0");
+				} else if (d == 18) {
+					sprintf(buf, "0.5");
+				} else {
+					sprintf(buf, "%i", d);
+				}
+
+				sprintf(arga[j], "%s%s%s%s",
+						(rega & 32) ? "-" : "",
+						(rega & 64) ? "|" : "",
+						buf,
+						(rega & 64) ? "|" : "");
+			}
+
+			fprintf(stderr, "     xyz: %8s %8s %8s    op: %08x\n"
+			                "       w: %8s %8s %8s    op: %08x\n",
+					argc[0], argc[1], argc[2], rp->alu.inst[i].inst0,
+					arga[0], arga[1], arga[2], rp->alu.inst[i].inst2);
+		}
 	}
-	
-	fprintf(stderr, "%08x\n",
-		((rp->tex_end << 16) | (R300_PFS_TEXI_0 >> 2)));
-	for (i=0;i<=rp->tex_end;i++)
-		fprintf(stderr, "%08x\n", rp->tex.inst[i]);
-
-	/* dump program in pretty_print_command_stream.tcl-readable format */
-	fprintf(stderr, "%08x\n",
-		((rp->alu_end << 16) | (R300_PFS_INSTR0_0 >> 2)));
-	for (i=0;i<=rp->alu_end;i++)
-		fprintf(stderr, "%08x\n", rp->alu.inst[i].inst0);
-
-	fprintf(stderr, "%08x\n",
-		((rp->alu_end << 16) | (R300_PFS_INSTR1_0 >> 2)));
-	for (i=0;i<=rp->alu_end;i++)
-		fprintf(stderr, "%08x\n", rp->alu.inst[i].inst1);
-
-	fprintf(stderr, "%08x\n",
-		((rp->alu_end << 16) | (R300_PFS_INSTR2_0 >> 2)));
-	for (i=0;i<=rp->alu_end;i++)
-		fprintf(stderr, "%08x\n", rp->alu.inst[i].inst2);
-
-	fprintf(stderr, "%08x\n",
-		((rp->alu_end << 16) | (R300_PFS_INSTR3_0 >> 2)));
-	for (i=0;i<=rp->alu_end;i++)
-		fprintf(stderr, "%08x\n", rp->alu.inst[i].inst3);
-
-	fprintf(stderr, "00000000\n");
 }
diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c
index 6fa34ee..11e2d42 100644
--- a/src/mesa/drivers/dri/r300/r300_ioctl.c
+++ b/src/mesa/drivers/dri/r300/r300_ioctl.c
@@ -162,11 +162,11 @@
 	cmd2[8].u = r300PackFloat32(ctx->Color.ClearColor[3]);
 
 	reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0);
-	e32(0x0000000a);
+	e32(R300_RB3D_DSTCACHE_UNKNOWN_0A);
 	  
 
-	reg_start(0x4f18,0);
-	e32(0x00000003);
+	reg_start(R300_RB3D_ZCACHE_CTLSTAT,0);
+	e32(R300_RB3D_ZCACHE_UNKNOWN_03);
 	cp_wait(rmesa, R300_WAIT_3D | R300_WAIT_3D_CLEAN);
 }
 
@@ -389,7 +389,7 @@
 #ifdef USER_BUFFERS
 #include "radeon_mm.h"
 
-void r300RefillCurrentDmaRegion(r300ContextPtr rmesa, int size)
+static void r300RefillCurrentDmaRegion(r300ContextPtr rmesa, int size)
 {
 	struct r300_dma_buffer *dmabuf;
 	size = MAX2(size, RADEON_BUFFER_SIZE*16);
@@ -503,7 +503,7 @@
 }
 
 #else
-void r300RefillCurrentDmaRegion(r300ContextPtr rmesa)
+static void r300RefillCurrentDmaRegion(r300ContextPtr rmesa)
 {
 	struct r300_dma_buffer *dmabuf;
 	int fd = rmesa->radeon.dri.fd;
diff --git a/src/mesa/drivers/dri/r300/r300_maos.c b/src/mesa/drivers/dri/r300/r300_maos.c
index fcb87cb..b0d96f7 100644
--- a/src/mesa/drivers/dri/r300/r300_maos.c
+++ b/src/mesa/drivers/dri/r300/r300_maos.c
@@ -1,4 +1,3 @@
-/* $XFree86: xc/lib/GL/mesa/src/drv/r300/r300_maos_arrays.c,v 1.3 2003/02/23 23:59:01 dawes Exp $ */
 /*
 Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
 
diff --git a/src/mesa/drivers/dri/r300/r300_maos.h b/src/mesa/drivers/dri/r300/r300_maos.h
index 679f1c2..ab28317 100644
--- a/src/mesa/drivers/dri/r300/r300_maos.h
+++ b/src/mesa/drivers/dri/r300/r300_maos.h
@@ -1,4 +1,3 @@
-/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_maos.h,v 1.1 2002/10/30 12:51:52 alanh Exp $ */
 /*
 Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
 
diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h
index 69bc994..1f4a2d2 100644
--- a/src/mesa/drivers/dri/r300/r300_reg.h
+++ b/src/mesa/drivers/dri/r300/r300_reg.h
@@ -497,6 +497,7 @@
 
 /* Zero to flush caches. */
 #define R300_TX_CNTL                        0x4100
+#define R300_TX_FLUSH                       0x0
 
 /* The upper enable bits are guessed, based on fglrx reported limits. */
 #define R300_TX_ENABLE                      0x4104
@@ -571,6 +572,7 @@
  * Some of the tests indicate that fgl has a fallback implementation of zbias
  * via pixel shaders.
  */
+#define R300_RE_ZBIAS_CNTL                    0x42A0 /* GUESS */
 #define R300_RE_ZBIAS_T_FACTOR                0x42A4
 #define R300_RE_ZBIAS_T_CONSTANT              0x42A8
 #define R300_RE_ZBIAS_W_FACTOR                0x42AC
@@ -1045,7 +1047,7 @@
  * WRT swizzling. If, for example, you want to load an R component into an
  * Alpha operand, this R component is taken from a *color* source, not from
  * an alpha source. The corresponding register doesn't even have to appear in
- * the alpha sources list. (I hope this alll makes sense to you)
+ * the alpha sources list. (I hope this all makes sense to you)
  *
  * Destination selection
  * The destination register index is in FPI1 (color) and FPI3 (alpha)
@@ -1072,6 +1074,7 @@
 #       define R300_FPI1_SRC2C_SHIFT             12
 #       define R300_FPI1_SRC2C_MASK              (31 << 12)
 #       define R300_FPI1_SRC2C_CONST             (1 << 17)
+#       define R300_FPI1_SRC_MASK                0x0003ffff
 #       define R300_FPI1_DSTC_SHIFT              18
 #       define R300_FPI1_DSTC_MASK               (31 << 18)
 #		define R300_FPI1_DSTC_REG_MASK_SHIFT     23
@@ -1093,6 +1096,7 @@
 #       define R300_FPI3_SRC2A_SHIFT             12
 #       define R300_FPI3_SRC2A_MASK              (31 << 12)
 #       define R300_FPI3_SRC2A_CONST             (1 << 17)
+#       define R300_FPI3_SRC_MASK                0x0003ffff
 #       define R300_FPI3_DSTA_SHIFT              18
 #       define R300_FPI3_DSTA_MASK               (31 << 18)
 #       define R300_FPI3_DSTA_REG                (1 << 23)
@@ -1322,8 +1326,8 @@
  * Set to 0A before 3D operations, set to 02 afterwards.
  */
 #define R300_RB3D_DSTCACHE_CTLSTAT          0x4E4C
-#       define R300_RB3D_DSTCACHE_02             0x00000002
-#       define R300_RB3D_DSTCACHE_0A             0x0000000A
+#       define R300_RB3D_DSTCACHE_UNKNOWN_02             0x00000002
+#       define R300_RB3D_DSTCACHE_UNKNOWN_0A             0x0000000A
 
 /* gap */
 /* There seems to be no "write only" setting, so use Z-test = ALWAYS
@@ -1394,6 +1398,12 @@
 
 /* gap */
 
+#define R300_RB3D_ZCACHE_CTLSTAT            0x4F18 /* GUESS */
+#       define R300_RB3D_ZCACHE_UNKNOWN_01  0x1
+#       define R300_RB3D_ZCACHE_UNKNOWN_03  0x3
+
+/* gap */
+
 #define R300_RB3D_DEPTHOFFSET               0x4F20
 #define R300_RB3D_DEPTHPITCH                0x4F24
 #       define R300_DEPTHPITCH_MASK              0x00001FF8 /* GUESS */
diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c
index 211c451..0864558 100644
--- a/src/mesa/drivers/dri/r300/r300_render.c
+++ b/src/mesa/drivers/dri/r300/r300_render.c
@@ -344,10 +344,10 @@
 	r300UpdateShaderStates(rmesa);
 	
 	reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0);
-	e32(0x0000000a);
+	e32(R300_RB3D_DSTCACHE_UNKNOWN_0A);
 
-	reg_start(0x4f18,0);
-	e32(0x00000003);
+	reg_start(R300_RB3D_ZCACHE_CTLSTAT,0);
+	e32(R300_RB3D_ZCACHE_UNKNOWN_03);
 	
 	r300EmitState(rmesa);
 	
@@ -360,10 +360,10 @@
 	}
 
 	reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0);
-	e32(0x0000000a/*0x2*/);
+	e32(R300_RB3D_DSTCACHE_UNKNOWN_0A /*R300_RB3D_DSTCACHE_UNKNOWN_02*/);
 
-	reg_start(0x4f18,0);
-	e32(0x00000003/*0x1*/);
+	reg_start(R300_RB3D_ZCACHE_CTLSTAT,0);
+	e32(R300_RB3D_ZCACHE_UNKNOWN_03 /*R300_RB3D_ZCACHE_UNKNOWN_01*/);
 
 #ifdef USER_BUFFERS
 	r300UseArrays(ctx);
diff --git a/src/mesa/drivers/dri/r300/r300_shader.c b/src/mesa/drivers/dri/r300/r300_shader.c
index e8bce93..41c07a3 100644
--- a/src/mesa/drivers/dri/r300/r300_shader.c
+++ b/src/mesa/drivers/dri/r300/r300_shader.c
@@ -12,15 +12,6 @@
 {
 	switch(target){
 		case GL_VERTEX_PROGRAM_ARB:
-		//rmesa->curr_vp = (struct gl_vertex_program *)vp;
-		//vp->ref_count++;
-#if 0
-		if((vp->ref_count % 1500) == 0) {
-			fprintf(stderr, "id %p, ref_count %d\n", vp, vp->ref_count);
-			_mesa_print_program(&vp->mesa_program.Base);
-		}
-#endif
-		
 		case GL_FRAGMENT_PROGRAM_ARB:
 		break;
 		default:
@@ -54,18 +45,9 @@
 	return NULL;	
 }
 
-
 static void
 r300DeleteProgram(GLcontext *ctx, struct gl_program *prog)
 {
-#if 0
-	r300ContextPtr rmesa = R300_CONTEXT(ctx);
-	struct r300_vertex_program *vp=(void *)prog;
-	
-	if(rmesa->curr_vp == vp)
-		rmesa->curr_vp = NULL;
-#endif
-
 	_mesa_delete_program(ctx, prog);
 }
 
@@ -93,9 +75,6 @@
 static GLboolean
 r300IsProgramNative(GLcontext *ctx, GLenum target, struct gl_program *prog)
 {
-	//struct r300_vertex_program *vp=(void *)prog;
-	//r300ContextPtr rmesa = R300_CONTEXT(ctx);
-
 	return 1;
 }
 
diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c
index 50dcee4..fff1165 100644
--- a/src/mesa/drivers/dri/r300/r300_state.c
+++ b/src/mesa/drivers/dri/r300/r300_state.c
@@ -71,14 +71,14 @@
 	GLubyte color[4];
 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
 
-	R300_STATECHANGE(rmesa, unk4E10);
+	R300_STATECHANGE(rmesa, blend_color);
 
 	CLAMPED_FLOAT_TO_UBYTE(color[0], cf[0]);
 	CLAMPED_FLOAT_TO_UBYTE(color[1], cf[1]);
 	CLAMPED_FLOAT_TO_UBYTE(color[2], cf[2]);
 	CLAMPED_FLOAT_TO_UBYTE(color[3], cf[3]);
 
-	rmesa->hw.unk4E10.cmd[1] = r300PackColor(4, color[3], color[0],
+	rmesa->hw.blend_color.cmd[1] = r300PackColor(4, color[3], color[0],
 						 color[1], color[2]);
 }
 
@@ -337,17 +337,17 @@
 	*/
 	r300ContextPtr r300 = R300_CONTEXT(ctx);
 
-	R300_STATECHANGE(r300, unk4F10);
+	R300_STATECHANGE(r300, zstencil_format);
 	if (ctx->Color.AlphaEnabled && ctx->Color.AlphaFunc != GL_ALWAYS)
 		/* disable early Z */
-		r300->hw.unk4F10.cmd[2] = R300_EARLY_Z_DISABLE;
+		r300->hw.zstencil_format.cmd[2] = R300_EARLY_Z_DISABLE;
 	else {
 		if (ctx->Depth.Test && ctx->Depth.Func != GL_NEVER)
 			/* enable early Z */
-			r300->hw.unk4F10.cmd[2] = R300_EARLY_Z_ENABLE;
+			r300->hw.zstencil_format.cmd[2] = R300_EARLY_Z_ENABLE;
 		else
 			/* disable early Z */
-			r300->hw.unk4F10.cmd[2] = R300_EARLY_Z_DISABLE;
+			r300->hw.zstencil_format.cmd[2] = R300_EARLY_Z_DISABLE;
 	}
 }
 
@@ -533,11 +533,11 @@
 		break;
 
 	case GL_POLYGON_OFFSET_FILL:
-		R300_STATECHANGE(r300, unk42B4);
+		R300_STATECHANGE(r300, occlusion_cntl);
 		if(state){
-			r300->hw.unk42B4.cmd[1] |= (3<<0);
+			r300->hw.occlusion_cntl.cmd[1] |= (3<<0);
 		} else {
-			r300->hw.unk42B4.cmd[1] &= ~(3<<0);
+			r300->hw.occlusion_cntl.cmd[1] &= ~(3<<0);
 		}
 		break;
 	default:
@@ -591,9 +591,9 @@
 		}
 	}
 
-	if (r300->hw.unk4288.cmd[1] != hw_mode) {
-		R300_STATECHANGE(r300, unk4288);
-		r300->hw.unk4288.cmd[1] = hw_mode;
+	if (r300->hw.polygon_mode.cmd[1] != hw_mode) {
+		R300_STATECHANGE(r300, polygon_mode);
+		r300->hw.polygon_mode.cmd[1] = hw_mode;
 	}
 }
 
@@ -832,13 +832,13 @@
 {
 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
 	
-	R300_STATECHANGE(rmesa, unk4274);
+	R300_STATECHANGE(rmesa, shade);
 	switch (mode) {
 	case GL_FLAT:
-		rmesa->hw.unk4274.cmd[2] = R300_RE_SHADE_MODEL_FLAT;
+		rmesa->hw.shade.cmd[2] = R300_RE_SHADE_MODEL_FLAT;
 		break;
 	case GL_SMOOTH:
-		rmesa->hw.unk4274.cmd[2] = R300_RE_SHADE_MODEL_SMOOTH;
+		rmesa->hw.shade.cmd[2] = R300_RE_SHADE_MODEL_SMOOTH;
 		break;
 	default:
 		return;
@@ -1070,8 +1070,8 @@
     	switch(state[1])
 	{
 	case STATE_R300_WINDOW_DIMENSION:
-	    value[0] = r300->radeon.dri.drawable->w;	/* width */
-    	    value[1] = r300->radeon.dri.drawable->h;	/* height */
+	    value[0] = r300->radeon.dri.drawable->w*0.5f;/* width*0.5 */
+    	    value[1] = r300->radeon.dri.drawable->h*0.5f;/* height*0.5 */
 	    value[2] = 0.5F; 				/* for moving range [-1 1] -> [0 1] */
     	    value[3] = 1.0F; 				/* not used */
 	    break;
@@ -1085,20 +1085,20 @@
  * Update R300's own internal state parameters.
  * For now just STATE_R300_WINDOW_DIMENSION
  */
-static void r300UpdateStateParameters(GLcontext * ctx, GLuint new_state)
+void r300UpdateStateParameters(GLcontext * ctx, GLuint new_state)
 {
-	struct r300_vertex_program_cont *vpc;
+	struct r300_fragment_program *fp;
 	struct gl_program_parameter_list *paramList;
 	GLuint i;
 
 	if(!(new_state & (_NEW_BUFFERS|_NEW_PROGRAM)))
 	    return;
 
-	vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current;
-	if (!vpc)
+	fp = (struct r300_fragment_program *)ctx->FragmentProgram._Current;
+	if (!fp)
 	    return;
 
-	paramList = vpc->mesa_program.Base.Parameters;
+	paramList = fp->mesa_program.Base.Parameters;
 
 	if (!paramList)
 	    return;
@@ -1221,12 +1221,12 @@
 
 	R300_STATECHANGE(r300, txe);
 	R300_STATECHANGE(r300, tex.filter);
-	R300_STATECHANGE(r300, tex.unknown1);
+	R300_STATECHANGE(r300, tex.filter_1);
 	R300_STATECHANGE(r300, tex.size);
 	R300_STATECHANGE(r300, tex.format);
 	R300_STATECHANGE(r300, tex.pitch);
 	R300_STATECHANGE(r300, tex.offset);
-	R300_STATECHANGE(r300, tex.unknown4);
+	R300_STATECHANGE(r300, tex.chroma_key);
 	R300_STATECHANGE(r300, tex.border_color);
 	
 	r300->hw.txe.cmd[R300_TXE_ENABLE]=0x0;
@@ -1263,7 +1263,7 @@
 			
 			r300->hw.tex.filter.cmd[R300_TEX_VALUE_0 + hw_tmu] = gen_fixed_filter(t->filter) | (hw_tmu << 28);
 			/* Currently disabled! */
-			r300->hw.tex.unknown1.cmd[R300_TEX_VALUE_0 + hw_tmu] = 0x0; //0x20501f80;
+			r300->hw.tex.filter_1.cmd[R300_TEX_VALUE_0 + hw_tmu] = 0x0; //0x20501f80;
 			r300->hw.tex.size.cmd[R300_TEX_VALUE_0 + hw_tmu] = t->size;
 			r300->hw.tex.format.cmd[R300_TEX_VALUE_0 + hw_tmu] = t->format;
 			r300->hw.tex.pitch.cmd[R300_TEX_VALUE_0 + hw_tmu] = t->pitch_reg;
@@ -1277,7 +1277,7 @@
 				WARN_ONCE("micro tiling enabled!\n");
 			}
 			
-			r300->hw.tex.unknown4.cmd[R300_TEX_VALUE_0 + hw_tmu] = 0x0;
+			r300->hw.tex.chroma_key.cmd[R300_TEX_VALUE_0 + hw_tmu] = 0x0;
 			r300->hw.tex.border_color.cmd[R300_TEX_VALUE_0 + hw_tmu] = t->pp_border_color;
 			
 			last_hw_tmu = hw_tmu;
@@ -1287,12 +1287,12 @@
 	}
 	
 	r300->hw.tex.filter.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_FILTER_0, last_hw_tmu + 1);
-	r300->hw.tex.unknown1.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_FILTER1_0, last_hw_tmu + 1);
+	r300->hw.tex.filter_1.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_FILTER1_0, last_hw_tmu + 1);
 	r300->hw.tex.size.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_SIZE_0, last_hw_tmu + 1);
 	r300->hw.tex.format.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_FORMAT_0, last_hw_tmu + 1);
 	r300->hw.tex.pitch.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_PITCH_0, last_hw_tmu + 1);
 	r300->hw.tex.offset.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_OFFSET_0, last_hw_tmu + 1);
-	r300->hw.tex.unknown4.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_CHROMA_KEY_0, last_hw_tmu + 1);
+	r300->hw.tex.chroma_key.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_CHROMA_KEY_0, last_hw_tmu + 1);
 	r300->hw.tex.border_color.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_BORDER_COLOR_0, last_hw_tmu + 1);
 	
 	
@@ -1977,7 +1977,7 @@
 		/* Initialize magic registers
 		 TODO : learn what they really do, or get rid of
 		 those we don't have to touch */
-	r300->hw.unk2080.cmd[1] = 0x0030045A; //0x0030065a /* Dangerous */
+	r300->hw.vap_cntl.cmd[1] = 0x0030045A; //0x0030065a /* Dangerous */
 
 	r300->hw.vte.cmd[1] = R300_VPORT_X_SCALE_ENA
 				| R300_VPORT_X_OFFSET_ENA
@@ -1991,9 +1991,9 @@
 	r300->hw.unk2134.cmd[1] = 0x00FFFFFF;
 	r300->hw.unk2134.cmd[2] = 0x00000000;
 	if (_mesa_little_endian())
-		r300->hw.unk2140.cmd[1] = 0x00000000;
+		r300->hw.vap_cntl_status.cmd[1] = 0x00000000;
 	else
-		r300->hw.unk2140.cmd[1] = 0x00000002;
+		r300->hw.vap_cntl_status.cmd[1] = 0x00000002;
 
 #if 0 /* Done in setup routing */
 	((drm_r300_cmd_header_t*)r300->hw.vir[0].cmd)->packet0.count = 1;
@@ -2084,16 +2084,16 @@
 	r300->hw.unk4260.cmd[2] = r300PackFloat32(0.0);
 	r300->hw.unk4260.cmd[3] = r300PackFloat32(1.0);
 
-	r300->hw.unk4274.cmd[1] = 0x00000002;
+	r300->hw.shade.cmd[1] = 0x00000002;
 	r300ShadeModel(ctx, ctx->Light.ShadeModel);
-	r300->hw.unk4274.cmd[3] = 0x00000000;
-	r300->hw.unk4274.cmd[4] = 0x00000000;
+	r300->hw.shade.cmd[3] = 0x00000000;
+	r300->hw.shade.cmd[4] = 0x00000000;
 
 	r300PolygonMode(ctx, GL_FRONT, ctx->Polygon.FrontMode);
 	r300PolygonMode(ctx, GL_BACK, ctx->Polygon.BackMode);
-	r300->hw.unk4288.cmd[2] = 0x00000001;
-	r300->hw.unk4288.cmd[3] = 0x00000000;
-	r300->hw.unk42A0.cmd[1] = 0x00000000;
+	r300->hw.polygon_mode.cmd[2] = 0x00000001;
+	r300->hw.polygon_mode.cmd[3] = 0x00000000;
+	r300->hw.zbias_cntl.cmd[1] = 0x00000000;
 
 	r300PolygonOffset(ctx, ctx->Polygon.OffsetFactor, ctx->Polygon.OffsetUnits);
 	r300Enable(ctx, GL_POLYGON_OFFSET_FILL, ctx->Polygon.OffsetFill);
@@ -2151,8 +2151,8 @@
 #endif
 
 	r300BlendColor(ctx, ctx->Color.BlendColor);
-	r300->hw.unk4E10.cmd[2] = 0;
-	r300->hw.unk4E10.cmd[3] = 0;
+	r300->hw.blend_color.cmd[2] = 0;
+	r300->hw.blend_color.cmd[3] = 0;
 	
 	/* Again, r300ClearBuffer uses this */
 	r300->hw.cb.cmd[R300_CB_OFFSET] = r300->radeon.state.color.drawOffset +
@@ -2184,10 +2184,10 @@
 
 	switch (ctx->Visual.depthBits) {
 	case 16:
-		r300->hw.unk4F10.cmd[1] = R300_DEPTH_FORMAT_16BIT_INT_Z;
+		r300->hw.zstencil_format.cmd[1] = R300_DEPTH_FORMAT_16BIT_INT_Z;
 	break;
 	case 24:
-		r300->hw.unk4F10.cmd[1] = R300_DEPTH_FORMAT_24BIT_INT_Z;
+		r300->hw.zstencil_format.cmd[1] = R300_DEPTH_FORMAT_24BIT_INT_Z;
 	break;
 	default:
 		fprintf(stderr, "Error: Unsupported depth %d... exiting\n",
@@ -2196,10 +2196,10 @@
 			
 	}
 	/* z compress? */
-	//r300->hw.unk4F10.cmd[1] |= R300_DEPTH_FORMAT_UNK32;
+	//r300->hw.zstencil_format.cmd[1] |= R300_DEPTH_FORMAT_UNK32;
 	
-	r300->hw.unk4F10.cmd[3] = 0x00000003;
-	r300->hw.unk4F10.cmd[4] = 0x00000000;
+	r300->hw.zstencil_format.cmd[3] = 0x00000003;
+	r300->hw.zstencil_format.cmd[4] = 0x00000000;
 
 	r300->hw.zb.cmd[R300_ZB_OFFSET] =
 		r300->radeon.radeonScreen->depthOffset +
diff --git a/src/mesa/drivers/dri/r300/r300_state.h b/src/mesa/drivers/dri/r300/r300_state.h
index f6a5065..52e606f 100644
--- a/src/mesa/drivers/dri/r300/r300_state.h
+++ b/src/mesa/drivers/dri/r300/r300_state.h
@@ -61,6 +61,7 @@
 	
 extern void r300ResetHwState(r300ContextPtr r300);
 
+extern void r300UpdateStateParameters(GLcontext * ctx, GLuint new_state);
 extern void r300InitState(r300ContextPtr r300);
 extern void r300InitStateFuncs(struct dd_function_table* functions);
 extern void r300UpdateViewportOffset( GLcontext *ctx );
diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c
index 6348ba1..eb72802 100644
--- a/src/mesa/drivers/dri/r300/r300_tex.c
+++ b/src/mesa/drivers/dri/r300/r300_tex.c
@@ -1,4 +1,3 @@
-/* $XFree86: xc/lib/GL/mesa/src/drv/r300/r300_tex.c,v 1.2 2002/11/05 17:46:08 tsi Exp $ */
 /*
 Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
 
diff --git a/src/mesa/drivers/dri/r300/r300_tex.h b/src/mesa/drivers/dri/r300/r300_tex.h
index a18ff0e..10aabc8 100644
--- a/src/mesa/drivers/dri/r300/r300_tex.h
+++ b/src/mesa/drivers/dri/r300/r300_tex.h
@@ -1,4 +1,3 @@
-/* $XFree86: xc/lib/GL/mesa/src/drv/r300/r300_tex.h,v 1.1 2002/10/30 12:51:53 alanh Exp $ */
 /*
 Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
 
diff --git a/src/mesa/drivers/dri/r300/r300_texmem.c b/src/mesa/drivers/dri/r300/r300_texmem.c
index 96973c0..f531b54 100644
--- a/src/mesa/drivers/dri/r300/r300_texmem.c
+++ b/src/mesa/drivers/dri/r300/r300_texmem.c
@@ -1,4 +1,3 @@
-/* $XFree86: xc/lib/GL/mesa/src/drv/r300/r300_texmem.c,v 1.5 2002/12/17 00:32:56 dawes Exp $ */
 /**************************************************************************
 
 Copyright (C) Tungsten Graphics 2002.  All Rights Reserved.
diff --git a/src/mesa/drivers/dri/r300/r300_texstate.c b/src/mesa/drivers/dri/r300/r300_texstate.c
index cc356af..4bc0ea1 100644
--- a/src/mesa/drivers/dri/r300/r300_texstate.c
+++ b/src/mesa/drivers/dri/r300/r300_texstate.c
@@ -1,4 +1,3 @@
-/* $XFree86: xc/lib/GL/mesa/src/drv/r300/r300_texstate.c,v 1.3 2003/02/15 22:18:47 dawes Exp $ */
 /*
 Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
 
diff --git a/src/mesa/drivers/dri/r300/r300_vertexprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c
similarity index 90%
rename from src/mesa/drivers/dri/r300/r300_vertexprog.c
rename to src/mesa/drivers/dri/r300/r300_vertprog.c
index cc8e077..aa2f201 100644
--- a/src/mesa/drivers/dri/r300/r300_vertexprog.c
+++ b/src/mesa/drivers/dri/r300/r300_vertprog.c
@@ -891,8 +891,8 @@
 #endif
 	paramList = prog->Parameters;
 
-	vpi = malloc((prog->NumInstructions + 4) * sizeof(struct prog_instruction));
-	memset(vpi, 0, 4 * sizeof(struct prog_instruction));
+	vpi = _mesa_alloc_instructions (prog->NumInstructions + 4);
+	_mesa_init_instructions (vpi, prog->NumInstructions + 4);
 
 	for (i=0; i < 4; i++) {
 		GLint idx;
@@ -910,11 +910,11 @@
 
 		vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR;
 		vpi[i].SrcReg[0].Index = idx;
-		vpi[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W);
+		vpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
 
 		vpi[i].SrcReg[1].File = PROGRAM_INPUT;
 		vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS;
-		vpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W);
+		vpi[i].SrcReg[1].Swizzle = SWIZZLE_XYZW;
 #else
 		if (i == 0)
 			vpi[i].Opcode = OPCODE_MUL;
@@ -934,7 +934,7 @@
 
 		vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR;
 		vpi[i].SrcReg[0].Index = idx;
-		vpi[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W);
+		vpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
 
 		vpi[i].SrcReg[1].File = PROGRAM_INPUT;
 		vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS;
@@ -943,12 +943,12 @@
 		if (i > 0) {
 			vpi[i].SrcReg[2].File = PROGRAM_TEMPORARY;
 			vpi[i].SrcReg[2].Index = 0;
-			vpi[i].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W);
+			vpi[i].SrcReg[2].Swizzle = SWIZZLE_XYZW;
 		}
 #endif					
 	}
 
-	memcpy(&vpi[i], prog->Instructions, prog->NumInstructions * sizeof(struct prog_instruction));
+	_mesa_copy_instructions (&vpi[i], prog->Instructions, prog->NumInstructions);
 
 	free(prog->Instructions);
 
@@ -964,23 +964,19 @@
 		       struct gl_program *prog,
 		       GLuint temp_index)
 {
-
-        gl_state_index tokens[STATE_LENGTH]
-           = { STATE_INTERNAL, STATE_R300_WINDOW_DIMENSION, 0, 0, 0 };
 	struct prog_instruction *vpi;
 	struct prog_instruction *vpi_insert;
-	GLuint window_index;
 	int i = 0;
 	
-	vpi = malloc((prog->NumInstructions + 5) * sizeof(struct prog_instruction));
+	vpi = _mesa_alloc_instructions (prog->NumInstructions + 2);
+	_mesa_init_instructions (vpi, prog->NumInstructions + 2);
 	/* all but END */
-	memcpy(vpi, prog->Instructions, (prog->NumInstructions - 1) * sizeof(struct prog_instruction));
+	_mesa_copy_instructions (vpi, prog->Instructions, prog->NumInstructions - 1);
 	/* END */
-	memcpy(&vpi[prog->NumInstructions + 4], &prog->Instructions[prog->NumInstructions - 1],
-		sizeof(struct prog_instruction));
-	
+	_mesa_copy_instructions (&vpi[prog->NumInstructions + 1],
+				 &prog->Instructions[prog->NumInstructions - 1],
+				 1);
 	vpi_insert = &vpi[prog->NumInstructions - 1];
-	memset(vpi_insert, 0, 5 * sizeof(struct prog_instruction));
 
 	vpi_insert[i].Opcode = OPCODE_MOV;
 
@@ -991,62 +987,10 @@
 
 	vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY;
 	vpi_insert[i].SrcReg[0].Index = temp_index;
-	vpi_insert[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W);
+	vpi_insert[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
 	i++;
 
-	/* perspective divide */
-	vpi_insert[i].Opcode = OPCODE_RCP;
-
-	vpi_insert[i].DstReg.File = PROGRAM_TEMPORARY;
-	vpi_insert[i].DstReg.Index = temp_index;
-	vpi_insert[i].DstReg.WriteMask = WRITEMASK_W;
-	vpi_insert[i].DstReg.CondMask = COND_TR;
-
-	vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY;
-	vpi_insert[i].SrcReg[0].Index = temp_index;
-	vpi_insert[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO);
-	i++;
-
-	vpi_insert[i].Opcode = OPCODE_MUL;
-
-	vpi_insert[i].DstReg.File = PROGRAM_TEMPORARY;
-	vpi_insert[i].DstReg.Index = temp_index;
-	vpi_insert[i].DstReg.WriteMask = WRITEMASK_XYZ;
-	vpi_insert[i].DstReg.CondMask = COND_TR;
-
-	vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY;
-	vpi_insert[i].SrcReg[0].Index = temp_index;
-	vpi_insert[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
-
-	vpi_insert[i].SrcReg[1].File = PROGRAM_TEMPORARY;
-	vpi_insert[i].SrcReg[1].Index = temp_index;
-	vpi_insert[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_ZERO);
-	i++;
-
-	/* viewport transformation */
-	window_index = _mesa_add_state_reference(prog->Parameters, tokens);
-
-	vpi_insert[i].Opcode = OPCODE_MAD;
-
-	vpi_insert[i].DstReg.File = PROGRAM_TEMPORARY;
-	vpi_insert[i].DstReg.Index = temp_index;
-	vpi_insert[i].DstReg.WriteMask = WRITEMASK_XYZ;
-	vpi_insert[i].DstReg.CondMask = COND_TR;
-
-	vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY;
-	vpi_insert[i].SrcReg[0].Index = temp_index;
-	vpi_insert[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
-
-	vpi_insert[i].SrcReg[1].File = PROGRAM_STATE_VAR;
-	vpi_insert[i].SrcReg[1].Index = window_index;
-	vpi_insert[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_ZERO);
-
-	vpi_insert[i].SrcReg[2].File = PROGRAM_STATE_VAR;
-	vpi_insert[i].SrcReg[2].Index = window_index;
-	vpi_insert[i].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_ZERO);
-	i++;
-
-	vpi_insert[i].Opcode = OPCODE_MUL;
+	vpi_insert[i].Opcode = OPCODE_MOV;
 
 	vpi_insert[i].DstReg.File = PROGRAM_OUTPUT;
 	vpi_insert[i].DstReg.Index = VERT_RESULT_TEX0+vp->wpos_idx;
@@ -1055,11 +999,7 @@
 
 	vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY;
 	vpi_insert[i].SrcReg[0].Index = temp_index;
-	vpi_insert[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W);
-
-	vpi_insert[i].SrcReg[1].File = PROGRAM_STATE_VAR;
-	vpi_insert[i].SrcReg[1].Index = window_index;
-	vpi_insert[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_ONE, SWIZZLE_ONE);
+	vpi_insert[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
 	i++;
 
 	free(prog->Instructions);
diff --git a/src/mesa/drivers/dri/r300/radeon_context.c b/src/mesa/drivers/dri/r300/radeon_context.c
index 66d1b15..22f943e 100644
--- a/src/mesa/drivers/dri/r300/radeon_context.c
+++ b/src/mesa/drivers/dri/r300/radeon_context.c
@@ -70,7 +70,10 @@
 
 	switch (name) {
 	case GL_VENDOR:
-		return (GLubyte *) "Tungsten Graphics, Inc.";
+		if (IS_R300_CLASS(radeon->radeonScreen))
+			return (GLubyte *) "DRI R300 Project";
+		else
+			return (GLubyte *) "Tungsten Graphics, Inc.";
 
 	case GL_RENDERER:
 	{
diff --git a/src/mesa/drivers/dri/r300/radeon_mm.c b/src/mesa/drivers/dri/r300/radeon_mm.c
index 32ed1f4..1502dac 100644
--- a/src/mesa/drivers/dri/r300/radeon_mm.c
+++ b/src/mesa/drivers/dri/r300/radeon_mm.c
@@ -284,7 +284,7 @@
 	}
 	
 	reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0);
-	e32(0x0000000a);
+	e32(R300_RB3D_DSTCACHE_UNKNOWN_0A);
 	
 	reg_start(0x342c,0);
 	e32(0x00000005);
diff --git a/src/mesa/drivers/dri/r300/radeon_span.c b/src/mesa/drivers/dri/r300/radeon_span.c
index 1b74f67..cc779d6 100644
--- a/src/mesa/drivers/dri/r300/radeon_span.c
+++ b/src/mesa/drivers/dri/r300/radeon_span.c
@@ -1,4 +1,3 @@
-/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_span.c,v 1.6 2002/10/30 12:51:56 alanh Exp $ */
 /**************************************************************************
 
 Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
diff --git a/src/mesa/drivers/dri/radeon/radeon_context.c b/src/mesa/drivers/dri/radeon/radeon_context.c
index 6bc2c4a..d7c2d14 100644
--- a/src/mesa/drivers/dri/radeon/radeon_context.c
+++ b/src/mesa/drivers/dri/radeon/radeon_context.c
@@ -64,6 +64,7 @@
 
 #define need_GL_ARB_multisample
 #define need_GL_ARB_texture_compression
+#define need_GL_ARB_vertex_buffer_object
 #define need_GL_EXT_blend_minmax
 #define need_GL_EXT_fog_coord
 #define need_GL_EXT_secondary_color
@@ -122,6 +123,7 @@
     { "GL_ARB_texture_env_crossbar",       NULL },
     { "GL_ARB_texture_env_dot3",           NULL },
     { "GL_ARB_texture_mirrored_repeat",    NULL },
+    { "GL_ARB_vertex_buffer_object",       GL_ARB_vertex_buffer_object_functions },
     { "GL_EXT_blend_logic_op",             NULL },
     { "GL_EXT_blend_subtract",             GL_EXT_blend_minmax_functions },
     { "GL_EXT_fog_coord",                  GL_EXT_fog_coord_functions },
@@ -594,12 +596,14 @@
 	 driDrawableInitVBlank( driDrawPriv, newCtx->vblank_flags,
 				&newCtx->vbl_seq );
       }
-      
-      if ( (newCtx->dri.drawable != driDrawPriv)
-	   || (newCtx->dri.readable != driReadPriv) ) {
-	 newCtx->dri.drawable = driDrawPriv;
-	 newCtx->dri.readable = driReadPriv;
 
+      newCtx->dri.readable = driReadPriv;
+
+      if ( (newCtx->dri.drawable != driDrawPriv) ||
+           newCtx->lastStamp != driDrawPriv->lastStamp ) {
+	 newCtx->dri.drawable = driDrawPriv;
+
+	 radeonSetCliprects(newCtx);
 	 radeonUpdateWindow( newCtx->glCtx );
 	 radeonUpdateViewportOffset( newCtx->glCtx );
       }
diff --git a/src/mesa/drivers/dri/radeon/radeon_lock.c b/src/mesa/drivers/dri/radeon/radeon_lock.c
index e6ab6af..cdf8a19 100644
--- a/src/mesa/drivers/dri/radeon/radeon_lock.c
+++ b/src/mesa/drivers/dri/radeon/radeon_lock.c
@@ -96,7 +96,6 @@
       radeonSetCliprects( rmesa );
       radeonUpdateViewportOffset( rmesa->glCtx );
       driUpdateFramebufferSize(rmesa->glCtx, drawable);
-      rmesa->lastStamp = drawable->lastStamp;
    }
 
    RADEON_STATECHANGE( rmesa, ctx );
diff --git a/src/mesa/drivers/dri/radeon/radeon_state.c b/src/mesa/drivers/dri/radeon/radeon_state.c
index e19202f..4de05c7 100644
--- a/src/mesa/drivers/dri/radeon/radeon_state.c
+++ b/src/mesa/drivers/dri/radeon/radeon_state.c
@@ -1675,6 +1675,8 @@
 
    if (rmesa->state.scissor.enabled)
       radeonRecalcScissorRects( rmesa );
+
+   rmesa->lastStamp = drawable->lastStamp;
 }
 
 
diff --git a/src/mesa/drivers/dri/savage/savage_xmesa.c b/src/mesa/drivers/dri/savage/savage_xmesa.c
index f859217..43422db 100644
--- a/src/mesa/drivers/dri/savage/savage_xmesa.c
+++ b/src/mesa/drivers/dri/savage/savage_xmesa.c
@@ -61,6 +61,7 @@
 
 #define need_GL_ARB_multisample
 #define need_GL_ARB_texture_compression
+#define need_GL_ARB_vertex_buffer_object
 #define need_GL_EXT_secondary_color
 #include "extension_helper.h"
 
@@ -135,6 +136,7 @@
     { "GL_ARB_multisample",                GL_ARB_multisample_functions },
     { "GL_ARB_multitexture",               NULL },
     { "GL_ARB_texture_compression",        GL_ARB_texture_compression_functions },
+    { "GL_ARB_vertex_buffer_object",       GL_ARB_vertex_buffer_object_functions },
     { "GL_EXT_stencil_wrap",               NULL },
     { "GL_EXT_texture_lod_bias",           NULL },
     { "GL_EXT_secondary_color",            GL_EXT_secondary_color_functions },
diff --git a/src/mesa/drivers/dri/sis/sis_context.c b/src/mesa/drivers/dri/sis/sis_context.c
index 89b81da..b21df0a 100644
--- a/src/mesa/drivers/dri/sis/sis_context.c
+++ b/src/mesa/drivers/dri/sis/sis_context.c
@@ -60,6 +60,7 @@
 
 #define need_GL_ARB_multisample
 #define need_GL_ARB_texture_compression
+#define need_GL_ARB_vertex_buffer_object
 #define need_GL_EXT_fog_coord
 #define need_GL_EXT_secondary_color
 #include "extension_helper.h"
@@ -79,6 +80,7 @@
     { "GL_ARB_texture_border_clamp",       NULL },
     { "GL_ARB_texture_compression",        GL_ARB_texture_compression_functions },
     { "GL_ARB_texture_mirrored_repeat",    NULL },
+    { "GL_ARB_vertex_buffer_object",       GL_ARB_vertex_buffer_object_functions },
     /*{ "GL_EXT_fog_coord",                  GL_EXT_fog_coord_functions },*/
     { "GL_EXT_texture_lod_bias",           NULL },
     { "GL_EXT_secondary_color",            GL_EXT_secondary_color_functions },
diff --git a/src/mesa/drivers/dri/unichrome/via_context.c b/src/mesa/drivers/dri/unichrome/via_context.c
index bc5a414..4d25d32 100644
--- a/src/mesa/drivers/dri/unichrome/via_context.c
+++ b/src/mesa/drivers/dri/unichrome/via_context.c
@@ -64,6 +64,7 @@
 
 #define need_GL_ARB_multisample
 #define need_GL_ARB_point_parameters
+#define need_GL_ARB_vertex_buffer_object
 #define need_GL_EXT_fog_coord
 #define need_GL_EXT_secondary_color
 #include "extension_helper.h"
@@ -372,6 +373,7 @@
     { "GL_ARB_texture_env_combine",        NULL },
 /*    { "GL_ARB_texture_env_dot3",           NULL }, */
     { "GL_ARB_texture_mirrored_repeat",    NULL },
+    { "GL_ARB_vertex_buffer_object",       GL_ARB_vertex_buffer_object_functions },
     { "GL_EXT_fog_coord",                  GL_EXT_fog_coord_functions },
     { "GL_EXT_secondary_color",            GL_EXT_secondary_color_functions },
     { "GL_EXT_stencil_wrap",               NULL },
@@ -766,9 +768,7 @@
 				  drawable);
    }
 
-   draw_buffer->drawXoff = (GLuint)(((drawable->x * bytePerPixel) & 0x1f) / 
-			      bytePerPixel);  
-   draw_buffer->drawX = drawable->x - draw_buffer->drawXoff;
+   draw_buffer->drawX = drawable->x;
    draw_buffer->drawY = drawable->y;
    draw_buffer->drawW = drawable->w;
    draw_buffer->drawH = drawable->h;
@@ -780,9 +780,7 @@
 				     readable);
       }
 
-      read_buffer->drawXoff = (GLuint)(((readable->x * bytePerPixel) & 0x1f) / 
-				       bytePerPixel);  
-      read_buffer->drawX = readable->x - read_buffer->drawXoff;
+      read_buffer->drawX = readable->x;
       read_buffer->drawY = readable->y;
       read_buffer->drawW = readable->w;
       read_buffer->drawH = readable->h;
@@ -793,13 +791,24 @@
 			draw_buffer->drawX * bytePerPixel);
 
    vmesa->front.origMap = (vmesa->front.map + 
-			   draw_buffer->drawY * vmesa->front.pitch + 
-			   draw_buffer->drawX * bytePerPixel);
+			draw_buffer->drawY * vmesa->front.pitch + 
+			draw_buffer->drawX * bytePerPixel);
 
-   vmesa->back.orig = vmesa->back.offset;
-   vmesa->depth.orig = vmesa->depth.offset;   
-   vmesa->back.origMap = vmesa->back.map;
-   vmesa->depth.origMap = vmesa->depth.map;
+   vmesa->back.orig = (vmesa->back.offset +
+			draw_buffer->drawY * vmesa->back.pitch +
+			draw_buffer->drawX * bytePerPixel);
+
+   vmesa->back.origMap = (vmesa->back.map +
+			draw_buffer->drawY * vmesa->back.pitch +
+			draw_buffer->drawX * bytePerPixel);
+
+   vmesa->depth.orig = (vmesa->depth.offset +
+			draw_buffer->drawY * vmesa->depth.pitch +
+			draw_buffer->drawX * bytePerPixel);   
+
+   vmesa->depth.origMap = (vmesa->depth.map +
+			draw_buffer->drawY * vmesa->depth.pitch +
+			draw_buffer->drawX * bytePerPixel);
 
    viaCalcViewport(vmesa->glCtx);
 }
diff --git a/src/mesa/drivers/dri/unichrome/via_context.h b/src/mesa/drivers/dri/unichrome/via_context.h
index 77161a8..fecd278 100644
--- a/src/mesa/drivers/dri/unichrome/via_context.h
+++ b/src/mesa/drivers/dri/unichrome/via_context.h
@@ -104,11 +104,6 @@
    int drawW;                  
    int drawH;    
 
-   int drawXoff;		/* drawX is 32byte aligned - this is
-				 * the delta to the real origin, in
-				 * pixel units.
-				 */
-
    __DRIdrawablePrivate *dPriv;
 };
 
diff --git a/src/mesa/drivers/dri/unichrome/via_ioctl.c b/src/mesa/drivers/dri/unichrome/via_ioctl.c
index 5d102de..4a733fb 100644
--- a/src/mesa/drivers/dri/unichrome/via_ioctl.c
+++ b/src/mesa/drivers/dri/unichrome/via_ioctl.c
@@ -187,7 +187,7 @@
       int w = pbox[i].x2 - pbox[i].x1;
       int h = pbox[i].y2 - pbox[i].y1;
 
-      int offset = (buffer->orig + 
+      int offset = (buffer->offset + 
 		    y * buffer->pitch + 
 		    x * bytePerPixel);
 
@@ -276,7 +276,7 @@
 
       /* flip top to bottom */
       cy = dPriv->h - cy - ch;
-      cx += vrb->drawX + vrb->drawXoff;
+      cx += vrb->drawX;
       cy += vrb->drawY;
         
       if (!all) {
@@ -359,8 +359,8 @@
       GLint w = b->x2 - b->x1;
       GLint h = b->y2 - b->y1;
 	
-      GLuint src = back->orig + y * back->pitch + x * bytePerPixel;
-      GLuint dest = front->orig + y * front->pitch + x * bytePerPixel;
+      GLuint src = back->offset + y * back->pitch + x * bytePerPixel;
+      GLuint dest = front->offset + y * front->pitch + x * bytePerPixel;
 
       viaBlit(vmesa, 
 	      bytePerPixel << 3, 
@@ -747,7 +747,7 @@
 		    : HC_HDBFM_RGB565);
 
    GLuint pitch = buffer->pitch;
-   GLuint offset = buffer->orig;
+   GLuint offset = buffer->offset;
 
    if (0)
       fprintf(stderr, "emit cliprect for box %d,%d %d,%d\n", 
@@ -768,7 +768,7 @@
    vb[4] = (HC_SubA_HDBBasL << 24) | (offset & 0xFFFFFF);
    vb[5] = (HC_SubA_HDBBasH << 24) | ((offset & 0xFF000000) >> 24); 
 
-   vb[6] = (HC_SubA_HSPXYOS << 24) | ((31 - buffer->drawXoff) << HC_HSPXOS_SHIFT);
+   vb[6] = (HC_SubA_HSPXYOS << 24);
    vb[7] = (HC_SubA_HDBFM << 24) | HC_HDBLoc_Local | format | pitch;
 }
 
@@ -887,22 +887,18 @@
       struct via_renderbuffer *const vrb = 
 	(struct via_renderbuffer *) dPriv->driverPrivate;
 
-
       for (i = 0; i < vmesa->numClipRects; i++) {
 	 drm_clip_rect_t b;
 
-	 b.x1 = pbox[i].x1 - (vrb->drawX + vrb->drawXoff);
-	 b.x2 = pbox[i].x2 - (vrb->drawX + vrb->drawXoff);
-	 b.y1 = pbox[i].y1 - vrb->drawY;
-	 b.y2 = pbox[i].y2 - vrb->drawY;
+	 b.x1 = pbox[i].x1;
+	 b.x2 = pbox[i].x2;
+	 b.y1 = pbox[i].y1;
+	 b.y2 = pbox[i].y2;
 
 	 if (vmesa->scissor &&
 	     !intersect_rect(&b, &b, &vmesa->scissorRect)) 
 	    continue;
 
-	 b.x1 += vrb->drawXoff;
-	 b.x2 += vrb->drawXoff;
-
 	 via_emit_cliprect(vmesa, &b);
 
 	 if (fire_buffer(vmesa) != 0) {
diff --git a/src/mesa/drivers/dri/unichrome/via_span.c b/src/mesa/drivers/dri/unichrome/via_span.c
index f1ed980..3a16dad 100644
--- a/src/mesa/drivers/dri/unichrome/via_span.c
+++ b/src/mesa/drivers/dri/unichrome/via_span.c
@@ -46,7 +46,7 @@
     GLuint pitch = vrb->pitch;                                          \
     GLuint height = dPriv->h;                                        	\
     GLint p = 0;							\
-    char *buf = (char *)(vrb->origMap + vrb->drawXoff * vrb->bpp);      \
+    char *buf = (char *)(vrb->origMap);					\
     (void) p;
 
 /* ================================================================
@@ -82,7 +82,7 @@
     __DRIdrawablePrivate *dPriv = vrb->dPriv;                       \
     GLuint depth_pitch = vrb->pitch;                                \
     GLuint height = dPriv->h;                                       \
-    char *buf = (char *)(vrb->map + (vrb->drawXoff * vrb->bpp/8))
+    char *buf = (char *)(vrb->map)
 
 #define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS 
 
diff --git a/src/mesa/drivers/dri/unichrome/via_state.c b/src/mesa/drivers/dri/unichrome/via_state.c
index 30b9dc2..bccdbe9 100644
--- a/src/mesa/drivers/dri/unichrome/via_state.c
+++ b/src/mesa/drivers/dri/unichrome/via_state.c
@@ -500,10 +500,8 @@
 
       OUT_RING( HC_HEADER2 );                     
       OUT_RING( (HC_ParaType_NotTex << 16) );
-      OUT_RING( (HC_SubA_HSPXYOS << 24) | 
-		(((32- vrb->drawXoff) & 0x1f) << HC_HSPXOS_SHIFT));
-      OUT_RING( (HC_SubA_HSPXYOS << 24) | 
-		(((32 - vrb->drawXoff) & 0x1f) << HC_HSPXOS_SHIFT));
+      OUT_RING( (HC_SubA_HSPXYOS << 24) );
+      OUT_RING( (HC_SubA_HSPXYOS << 24) );
 
       ADVANCE_RING();
    }
@@ -712,12 +710,8 @@
 }
 
 
-/* =============================================================
- */
 
-
-/* Using drawXoff like this is incorrect outside of locked regions.
- * This hardware just isn't capable of private back buffers without
+/* This hardware just isn't capable of private back buffers without
  * glitches and/or a hefty locking scheme.
  */
 void viaCalcViewport(GLcontext *ctx)
@@ -729,12 +723,10 @@
     const GLfloat *v = ctx->Viewport._WindowMap.m;
     GLfloat *m = vmesa->ViewportMatrix.m;
     
-    /* See also via_translate_vertex.
-     */
     m[MAT_SX] =   v[MAT_SX];
-    m[MAT_TX] =   v[MAT_TX] + SUBPIXEL_X + vrb->drawXoff;
+    m[MAT_TX] =   v[MAT_TX] + vrb->drawX + SUBPIXEL_X;
     m[MAT_SY] = - v[MAT_SY];
-    m[MAT_TY] = - v[MAT_TY] + dPriv->h + SUBPIXEL_Y;
+    m[MAT_TY] = - v[MAT_TY] + vrb->drawY + SUBPIXEL_Y + vrb->drawH;
     m[MAT_SZ] =   v[MAT_SZ] * (1.0 / vmesa->depth_max);
     m[MAT_TZ] =   v[MAT_TZ] * (1.0 / vmesa->depth_max);
 }
diff --git a/src/mesa/drivers/x11/xm_api.c b/src/mesa/drivers/x11/xm_api.c
index 776928d..b513dc8 100644
--- a/src/mesa/drivers/x11/xm_api.c
+++ b/src/mesa/drivers/x11/xm_api.c
@@ -483,6 +483,12 @@
 
          /* mark as delete pending */
          fb->DeletePending = GL_TRUE;
+
+         /* Since the X window for the XMesaBuffer is going away, we don't
+          * want to dereference this pointer in the future.
+          */
+         b->frontxrb->drawable = 0;
+
          /* Unreference.  If count = zero we'll really delete the buffer */
          _mesa_unreference_framebuffer(&fb);
 
@@ -1836,16 +1842,18 @@
  *  1. the first time a buffer is bound to a context.
  *  2. from glViewport to poll for window size changes
  *  3. from the XMesaResizeBuffers() API function.
+ * Note: it's possible (and legal) for xmctx to be NULL.  That can happen
+ * when resizing a buffer when no rendering context is bound.
  */
 void
 xmesa_check_and_update_buffer_size(XMesaContext xmctx, XMesaBuffer drawBuffer)
 {
    GLuint width, height;
-   xmesa_get_window_size(xmctx->display, drawBuffer, &width, &height);
+   xmesa_get_window_size(drawBuffer->display, drawBuffer, &width, &height);
    if (drawBuffer->mesa_buffer.Width != width ||
        drawBuffer->mesa_buffer.Height != height) {
-      _mesa_resize_framebuffer(&(xmctx->mesa),
-                               &(drawBuffer->mesa_buffer), width, height);
+      GLcontext *ctx = xmctx ? &xmctx->mesa : NULL;
+      _mesa_resize_framebuffer(ctx, &(drawBuffer->mesa_buffer), width, height);
    }
    drawBuffer->mesa_buffer.Initialized = GL_TRUE; /* XXX TEMPORARY? */
 }
@@ -2169,7 +2177,7 @@
       }
 #endif
      if (b->backxrb->ximage) {
-	 /* Copy Ximage from host's memory to server's window */
+	 /* Copy Ximage (back buf) from client memory to server window */
 #if defined(USE_XSHM) && !defined(XFree86Server)
 	 if (b->shm) {
             /*_glthread_LOCK_MUTEX(_xmesa_lock);*/
@@ -2191,8 +2199,8 @@
             /*_glthread_UNLOCK_MUTEX(_xmesa_lock);*/
          }
       }
-      else {
-	 /* Copy pixmap to window on server */
+      else if (b->backxrb->pixmap) {
+	 /* Copy pixmap (back buf) to window (front buf) on server */
          /*_glthread_LOCK_MUTEX(_xmesa_lock);*/
 	 XMesaCopyArea( b->xm_visual->display,
 			b->backxrb->pixmap,   /* source drawable */
@@ -2493,6 +2501,8 @@
 {
    GET_CURRENT_CONTEXT(ctx);
    XMesaContext xmctx = XMESA_CONTEXT(ctx);
+   if (!xmctx)
+      return;
    xmesa_check_and_update_buffer_size(xmctx, b);
 }
 
diff --git a/src/mesa/drivers/x11/xm_buffer.c b/src/mesa/drivers/x11/xm_buffer.c
index 747971a..c1fa233 100644
--- a/src/mesa/drivers/x11/xm_buffer.c
+++ b/src/mesa/drivers/x11/xm_buffer.c
@@ -168,9 +168,6 @@
 static void
 alloc_back_buffer(XMesaBuffer b, GLuint width, GLuint height)
 {
-   if (width == 0 || height == 0)
-      return;
-
    if (b->db_mode == BACK_XIMAGE) {
       /* Deallocate the old backxrb->ximage, if any */
       if (b->backxrb->ximage) {
@@ -186,6 +183,9 @@
 	 b->backxrb->ximage = NULL;
       }
 
+      if (width == 0 || height == 0)
+         return;
+
       /* Allocate new back buffer */
 #ifdef XFree86Server
       /* Allocate a regular XImage for the back buffer. */
@@ -218,20 +218,20 @@
       b->backxrb->pixmap = None;
    }
    else if (b->db_mode == BACK_PIXMAP) {
-      if (!width)
-         width = 1;
-      if (!height)
-         height = 1;
-
       /* Free the old back pixmap */
       if (b->backxrb->pixmap) {
-	 XMesaFreePixmap(b->xm_visual->display, b->backxrb->pixmap);
+         XMesaFreePixmap(b->xm_visual->display, b->backxrb->pixmap);
+         b->backxrb->pixmap = 0;
       }
-      /* Allocate new back pixmap */
-      b->backxrb->pixmap = XMesaCreatePixmap(b->xm_visual->display,
-                                             b->frontxrb->drawable,
-                                             width, height,
-                                             GET_VISUAL_DEPTH(b->xm_visual));
+
+      if (width > 0 && height > 0) {
+         /* Allocate new back pixmap */
+         b->backxrb->pixmap = XMesaCreatePixmap(b->xm_visual->display,
+                                                b->frontxrb->drawable,
+                                                width, height,
+                                                GET_VISUAL_DEPTH(b->xm_visual));
+      }
+
       b->backxrb->ximage = NULL;
    }
 }
@@ -250,6 +250,7 @@
 
 /**
  * Reallocate renderbuffer storage for front color buffer.
+ * Called via gl_renderbuffer::AllocStorage()
  */
 static GLboolean
 xmesa_alloc_front_storage(GLcontext *ctx, struct gl_renderbuffer *rb,
@@ -260,6 +261,7 @@
    /* just clear these to be sure we don't accidentally use them */
    xrb->origin1 = NULL;
    xrb->origin2 = NULL;
+   xrb->origin3 = NULL;
    xrb->origin4 = NULL;
 
    /* for the FLIP macro: */
@@ -275,6 +277,7 @@
 
 /**
  * Reallocate renderbuffer storage for back color buffer.
+ * Called via gl_renderbuffer::AllocStorage()
  */
 static GLboolean
 xmesa_alloc_back_storage(GLcontext *ctx, struct gl_renderbuffer *rb,
@@ -309,8 +312,12 @@
       xrb->origin4 = (GLuint *) xrb->ximage->data + xrb->width4 * (height - 1);
    }
    else {
-      /* this assertion will fail if we happend to run out of memory */
-      /*assert(xrb->pixmap);*/
+      /* out of memory or buffer size is 0 x 0 */
+      xrb->width1 = xrb->width2 = xrb->width3 = xrb->width4 = 0;
+      xrb->origin1 = NULL;
+      xrb->origin2 = NULL;
+      xrb->origin3 = NULL;
+      xrb->origin4 = NULL;
    }
 
    return GL_TRUE;
@@ -362,16 +369,13 @@
 {
    XMesaBuffer b = XMESA_BUFFER(fb);
 
-#ifdef XFree86Server
-   int client = 0;
-   if (b->frontxrb->drawable)
-       client = CLIENT_ID(b->frontxrb->drawable->id);
-#endif
-
    if (b->num_alloced > 0) {
       /* If no other buffer uses this X colormap then free the colors. */
       if (!xmesa_find_buffer(b->display, b->cmap, b)) {
 #ifdef XFree86Server
+         int client = 0;
+         if (b->frontxrb->drawable)
+            client = CLIENT_ID(b->frontxrb->drawable->id);
          (void)FreeColors(b->cmap, client,
                           b->num_alloced, b->alloced_colors, 0);
 #else
diff --git a/src/mesa/main/attrib.c b/src/mesa/main/attrib.c
index 2b1a35f..0df8d23 100644
--- a/src/mesa/main/attrib.c
+++ b/src/mesa/main/attrib.c
@@ -138,9 +138,9 @@
       attr->Blend = ctx->Color.BlendEnabled;
       attr->ClipPlanes = ctx->Transform.ClipPlanesEnabled;
       attr->ColorMaterial = ctx->Light.ColorMaterialEnabled;
-      attr->ColorTable = ctx->Pixel.ColorTableEnabled;
-      attr->PostColorMatrixColorTable = ctx->Pixel.PostColorMatrixColorTableEnabled;
-      attr->PostConvolutionColorTable = ctx->Pixel.PostConvolutionColorTableEnabled;
+      for (i = 0; i < COLORTABLE_MAX; i++) {
+         attr->ColorTable[i] = ctx->Pixel.ColorTableEnabled[i];
+      }
       attr->Convolution1D = ctx->Pixel.Convolution1DEnabled;
       attr->Convolution2D = ctx->Pixel.Convolution2DEnabled;
       attr->Separable2D = ctx->Pixel.Separable2DEnabled;
@@ -432,14 +432,15 @@
 
    TEST_AND_UPDATE(ctx->Light.ColorMaterialEnabled, enable->ColorMaterial,
                    GL_COLOR_MATERIAL);
-   TEST_AND_UPDATE(ctx->Pixel.ColorTableEnabled, enable->ColorTable,
+   TEST_AND_UPDATE(ctx->Pixel.ColorTableEnabled[COLORTABLE_PRECONVOLUTION],
+                   enable->ColorTable[COLORTABLE_PRECONVOLUTION],
                    GL_COLOR_TABLE);
-   TEST_AND_UPDATE(ctx->Pixel.PostColorMatrixColorTableEnabled,
-                   enable->PostColorMatrixColorTable,
-                   GL_POST_COLOR_MATRIX_COLOR_TABLE);
-   TEST_AND_UPDATE(ctx->Pixel.PostConvolutionColorTableEnabled,
-                   enable->PostConvolutionColorTable,
+   TEST_AND_UPDATE(ctx->Pixel.ColorTableEnabled[COLORTABLE_POSTCONVOLUTION],
+                   enable->ColorTable[COLORTABLE_POSTCONVOLUTION],
                    GL_POST_CONVOLUTION_COLOR_TABLE);
+   TEST_AND_UPDATE(ctx->Pixel.ColorTableEnabled[COLORTABLE_POSTCOLORMATRIX],
+                   enable->ColorTable[COLORTABLE_POSTCOLORMATRIX],
+                   GL_POST_COLOR_MATRIX_COLOR_TABLE);
    TEST_AND_UPDATE(ctx->Polygon.CullFlag, enable->CullFace, GL_CULL_FACE);
    TEST_AND_UPDATE(ctx->Depth.Test, enable->DepthTest, GL_DEPTH_TEST);
    TEST_AND_UPDATE(ctx->Color.DitherFlag, enable->Dither, GL_DITHER);
diff --git a/src/mesa/main/buffers.c b/src/mesa/main/buffers.c
index 0e3ed15..11bd173 100644
--- a/src/mesa/main/buffers.c
+++ b/src/mesa/main/buffers.c
@@ -140,6 +140,9 @@
       return;
    }
 
+   if (ctx->DrawBuffer->Width == 0 || ctx->DrawBuffer->Height == 0)
+      return;
+
    if (ctx->RenderMode == GL_RENDER) {
       GLbitfield bufferMask;
 
diff --git a/src/mesa/main/colortab.c b/src/mesa/main/colortab.c
index 2ad5c30..610acba 100644
--- a/src/mesa/main/colortab.c
+++ b/src/mesa/main/colortab.c
@@ -291,15 +291,17 @@
                   GLsizei width, GLenum format, GLenum type,
                   const GLvoid *data )
 {
+   static const GLfloat one[4] = { 1.0, 1.0, 1.0, 1.0 };
+   static const GLfloat zero[4] = { 0.0, 0.0, 0.0, 0.0 };
    GET_CURRENT_CONTEXT(ctx);
    struct gl_texture_unit *texUnit = &ctx->Texture.Unit[ctx->Texture.CurrentUnit];
    struct gl_texture_object *texObj = NULL;
    struct gl_color_table *table = NULL;
    GLboolean proxy = GL_FALSE;
    GLint baseFormat;
-   GLfloat rScale = 1.0, gScale = 1.0, bScale = 1.0, aScale = 1.0;
-   GLfloat rBias  = 0.0, gBias  = 0.0, bBias  = 0.0, aBias  = 0.0;
+   const GLfloat *scale = one, *bias = zero;
    GLint comps;
+
    ASSERT_OUTSIDE_BEGIN_END_AND_FLUSH(ctx); /* too complex */
 
    switch (target) {
@@ -350,18 +352,12 @@
          table = &ctx->Texture.Palette;
          break;
       case GL_COLOR_TABLE:
-         table = &ctx->ColorTable;
-         rScale = ctx->Pixel.ColorTableScale[0];
-         gScale = ctx->Pixel.ColorTableScale[1];
-         bScale = ctx->Pixel.ColorTableScale[2];
-         aScale = ctx->Pixel.ColorTableScale[3];
-         rBias = ctx->Pixel.ColorTableBias[0];
-         gBias = ctx->Pixel.ColorTableBias[1];
-         bBias = ctx->Pixel.ColorTableBias[2];
-         aBias = ctx->Pixel.ColorTableBias[3];
+         table = &ctx->ColorTable[COLORTABLE_PRECONVOLUTION];
+         scale = ctx->Pixel.ColorTableScale[COLORTABLE_PRECONVOLUTION];
+         bias = ctx->Pixel.ColorTableBias[COLORTABLE_PRECONVOLUTION];
          break;
       case GL_PROXY_COLOR_TABLE:
-         table = &ctx->ProxyColorTable;
+         table = &ctx->ProxyColorTable[COLORTABLE_PRECONVOLUTION];
          proxy = GL_TRUE;
          break;
       case GL_TEXTURE_COLOR_TABLE_SGI:
@@ -370,14 +366,8 @@
             return;
          }
          table = &(texUnit->ColorTable);
-         rScale = ctx->Pixel.TextureColorTableScale[0];
-         gScale = ctx->Pixel.TextureColorTableScale[1];
-         bScale = ctx->Pixel.TextureColorTableScale[2];
-         aScale = ctx->Pixel.TextureColorTableScale[3];
-         rBias = ctx->Pixel.TextureColorTableBias[0];
-         gBias = ctx->Pixel.TextureColorTableBias[1];
-         bBias = ctx->Pixel.TextureColorTableBias[2];
-         aBias = ctx->Pixel.TextureColorTableBias[3];
+         scale = ctx->Pixel.TextureColorTableScale;
+         bias = ctx->Pixel.TextureColorTableBias;
          break;
       case GL_PROXY_TEXTURE_COLOR_TABLE_SGI:
          if (!ctx->Extensions.SGI_texture_color_table) {
@@ -388,33 +378,21 @@
          proxy = GL_TRUE;
          break;
       case GL_POST_CONVOLUTION_COLOR_TABLE:
-         table = &ctx->PostConvolutionColorTable;
-         rScale = ctx->Pixel.PCCTscale[0];
-         gScale = ctx->Pixel.PCCTscale[1];
-         bScale = ctx->Pixel.PCCTscale[2];
-         aScale = ctx->Pixel.PCCTscale[3];
-         rBias = ctx->Pixel.PCCTbias[0];
-         gBias = ctx->Pixel.PCCTbias[1];
-         bBias = ctx->Pixel.PCCTbias[2];
-         aBias = ctx->Pixel.PCCTbias[3];
+         table = &ctx->ColorTable[COLORTABLE_POSTCONVOLUTION];
+         scale = ctx->Pixel.ColorTableScale[COLORTABLE_POSTCONVOLUTION];
+         bias = ctx->Pixel.ColorTableBias[COLORTABLE_POSTCONVOLUTION];
          break;
       case GL_PROXY_POST_CONVOLUTION_COLOR_TABLE:
-         table = &ctx->ProxyPostConvolutionColorTable;
+         table = &ctx->ProxyColorTable[COLORTABLE_POSTCONVOLUTION];
          proxy = GL_TRUE;
          break;
       case GL_POST_COLOR_MATRIX_COLOR_TABLE:
-         table = &ctx->PostColorMatrixColorTable;
-         rScale = ctx->Pixel.PCMCTscale[0];
-         gScale = ctx->Pixel.PCMCTscale[1];
-         bScale = ctx->Pixel.PCMCTscale[2];
-         aScale = ctx->Pixel.PCMCTscale[3];
-         rBias = ctx->Pixel.PCMCTbias[0];
-         gBias = ctx->Pixel.PCMCTbias[1];
-         bBias = ctx->Pixel.PCMCTbias[2];
-         aBias = ctx->Pixel.PCMCTbias[3];
+         table = &ctx->ColorTable[COLORTABLE_POSTCOLORMATRIX];
+         scale = ctx->Pixel.ColorTableScale[COLORTABLE_POSTCOLORMATRIX];
+         bias = ctx->Pixel.ColorTableBias[COLORTABLE_POSTCOLORMATRIX];
          break;
       case GL_PROXY_POST_COLOR_MATRIX_COLOR_TABLE:
-         table = &ctx->ProxyPostColorMatrixColorTable;
+         table = &ctx->ProxyColorTable[COLORTABLE_POSTCOLORMATRIX];
          proxy = GL_TRUE;
          break;
       default:
@@ -483,10 +461,10 @@
 	 store_colortable_entries(ctx, table,
 				  0, width,  /* start, count */
 				  format, type, data,
-				  rScale, rBias,
-				  gScale, gBias,
-				  bScale, bBias,
-				  aScale, aBias);
+				  scale[0], bias[0],
+				  scale[1], bias[1],
+				  scale[2], bias[2],
+				  scale[3], bias[3]);
       }
    } /* proxy */
 
@@ -510,12 +488,14 @@
                      GLsizei count, GLenum format, GLenum type,
                      const GLvoid *data )
 {
+   static const GLfloat one[4] = { 1.0, 1.0, 1.0, 1.0 };
+   static const GLfloat zero[4] = { 0.0, 0.0, 0.0, 0.0 };
    GET_CURRENT_CONTEXT(ctx);
    struct gl_texture_unit *texUnit = &ctx->Texture.Unit[ctx->Texture.CurrentUnit];
    struct gl_texture_object *texObj = NULL;
    struct gl_color_table *table = NULL;
-   GLfloat rScale = 1.0, gScale = 1.0, bScale = 1.0, aScale = 1.0;
-   GLfloat rBias  = 0.0, gBias  = 0.0, bBias  = 0.0, aBias  = 0.0;
+   const GLfloat *scale = one, *bias = zero;
+
    ASSERT_OUTSIDE_BEGIN_END_AND_FLUSH(ctx);
 
    switch (target) {
@@ -543,15 +523,9 @@
          table = &ctx->Texture.Palette;
          break;
       case GL_COLOR_TABLE:
-         table = &ctx->ColorTable;
-         rScale = ctx->Pixel.ColorTableScale[0];
-         gScale = ctx->Pixel.ColorTableScale[1];
-         bScale = ctx->Pixel.ColorTableScale[2];
-         aScale = ctx->Pixel.ColorTableScale[3];
-         rBias = ctx->Pixel.ColorTableBias[0];
-         gBias = ctx->Pixel.ColorTableBias[1];
-         bBias = ctx->Pixel.ColorTableBias[2];
-         aBias = ctx->Pixel.ColorTableBias[3];
+         table = &ctx->ColorTable[COLORTABLE_PRECONVOLUTION];
+         scale = ctx->Pixel.ColorTableScale[COLORTABLE_PRECONVOLUTION];
+         bias = ctx->Pixel.ColorTableBias[COLORTABLE_PRECONVOLUTION];
          break;
       case GL_TEXTURE_COLOR_TABLE_SGI:
          if (!ctx->Extensions.SGI_texture_color_table) {
@@ -559,36 +533,18 @@
             return;
          }
          table = &(texUnit->ColorTable);
-         rScale = ctx->Pixel.TextureColorTableScale[0];
-         gScale = ctx->Pixel.TextureColorTableScale[1];
-         bScale = ctx->Pixel.TextureColorTableScale[2];
-         aScale = ctx->Pixel.TextureColorTableScale[3];
-         rBias = ctx->Pixel.TextureColorTableBias[0];
-         gBias = ctx->Pixel.TextureColorTableBias[1];
-         bBias = ctx->Pixel.TextureColorTableBias[2];
-         aBias = ctx->Pixel.TextureColorTableBias[3];
+         scale = ctx->Pixel.TextureColorTableScale;
+         bias = ctx->Pixel.TextureColorTableBias;
          break;
       case GL_POST_CONVOLUTION_COLOR_TABLE:
-         table = &ctx->PostConvolutionColorTable;
-         rScale = ctx->Pixel.PCCTscale[0];
-         gScale = ctx->Pixel.PCCTscale[1];
-         bScale = ctx->Pixel.PCCTscale[2];
-         aScale = ctx->Pixel.PCCTscale[3];
-         rBias = ctx->Pixel.PCCTbias[0];
-         gBias = ctx->Pixel.PCCTbias[1];
-         bBias = ctx->Pixel.PCCTbias[2];
-         aBias = ctx->Pixel.PCCTbias[3];
+         table = &ctx->ColorTable[COLORTABLE_POSTCONVOLUTION];
+         scale = ctx->Pixel.ColorTableScale[COLORTABLE_POSTCONVOLUTION];
+         bias = ctx->Pixel.ColorTableBias[COLORTABLE_POSTCONVOLUTION];
          break;
       case GL_POST_COLOR_MATRIX_COLOR_TABLE:
-         table = &ctx->PostColorMatrixColorTable;
-         rScale = ctx->Pixel.PCMCTscale[0];
-         gScale = ctx->Pixel.PCMCTscale[1];
-         bScale = ctx->Pixel.PCMCTscale[2];
-         aScale = ctx->Pixel.PCMCTscale[3];
-         rBias = ctx->Pixel.PCMCTbias[0];
-         gBias = ctx->Pixel.PCMCTbias[1];
-         bBias = ctx->Pixel.PCMCTbias[2];
-         aBias = ctx->Pixel.PCMCTbias[3];
+         table = &ctx->ColorTable[COLORTABLE_POSTCOLORMATRIX];
+         scale = ctx->Pixel.ColorTableScale[COLORTABLE_POSTCOLORMATRIX];
+         bias = ctx->Pixel.ColorTableBias[COLORTABLE_POSTCOLORMATRIX];
          break;
       default:
          _mesa_error(ctx, GL_INVALID_ENUM, "glColorSubTable(target)");
@@ -623,10 +579,10 @@
 
    store_colortable_entries(ctx, table, start, count,
 			    format, type, data,
-			    rScale, rBias,
-			    gScale, gBias,
-			    bScale, bBias,
-			    aScale, aBias);
+                            scale[0], bias[0],
+                            scale[1], bias[1],
+                            scale[2], bias[2],
+                            scale[3], bias[3]);
 
    if (texObj || target == GL_SHARED_TEXTURE_PALETTE_EXT) {
       /* per-texture object palette */
@@ -700,7 +656,7 @@
          table = &ctx->Texture.Palette;
          break;
       case GL_COLOR_TABLE:
-         table = &ctx->ColorTable;
+         table = &ctx->ColorTable[COLORTABLE_PRECONVOLUTION];
          break;
       case GL_TEXTURE_COLOR_TABLE_SGI:
          if (!ctx->Extensions.SGI_texture_color_table) {
@@ -710,10 +666,10 @@
          table = &(texUnit->ColorTable);
          break;
       case GL_POST_CONVOLUTION_COLOR_TABLE:
-         table = &ctx->PostConvolutionColorTable;
+         table = &ctx->ColorTable[COLORTABLE_POSTCONVOLUTION];
          break;
       case GL_POST_COLOR_MATRIX_COLOR_TABLE:
-         table = &ctx->PostColorMatrixColorTable;
+         table = &ctx->ColorTable[COLORTABLE_POSTCOLORMATRIX];
          break;
       default:
          _mesa_error(ctx, GL_INVALID_ENUM, "glGetColorTable(target)");
@@ -831,16 +787,10 @@
    switch (target) {
       case GL_COLOR_TABLE_SGI:
          if (pname == GL_COLOR_TABLE_SCALE_SGI) {
-            ctx->Pixel.ColorTableScale[0] = params[0];
-            ctx->Pixel.ColorTableScale[1] = params[1];
-            ctx->Pixel.ColorTableScale[2] = params[2];
-            ctx->Pixel.ColorTableScale[3] = params[3];
+            COPY_4V(ctx->Pixel.ColorTableScale[COLORTABLE_PRECONVOLUTION], params);
          }
          else if (pname == GL_COLOR_TABLE_BIAS_SGI) {
-            ctx->Pixel.ColorTableBias[0] = params[0];
-            ctx->Pixel.ColorTableBias[1] = params[1];
-            ctx->Pixel.ColorTableBias[2] = params[2];
-            ctx->Pixel.ColorTableBias[3] = params[3];
+            COPY_4V(ctx->Pixel.ColorTableBias[COLORTABLE_PRECONVOLUTION], params);
          }
          else {
             _mesa_error(ctx, GL_INVALID_ENUM, "glColorTableParameterfv(pname)");
@@ -853,16 +803,10 @@
             return;
          }
          if (pname == GL_COLOR_TABLE_SCALE_SGI) {
-            ctx->Pixel.TextureColorTableScale[0] = params[0];
-            ctx->Pixel.TextureColorTableScale[1] = params[1];
-            ctx->Pixel.TextureColorTableScale[2] = params[2];
-            ctx->Pixel.TextureColorTableScale[3] = params[3];
+            COPY_4V(ctx->Pixel.TextureColorTableScale, params);
          }
          else if (pname == GL_COLOR_TABLE_BIAS_SGI) {
-            ctx->Pixel.TextureColorTableBias[0] = params[0];
-            ctx->Pixel.TextureColorTableBias[1] = params[1];
-            ctx->Pixel.TextureColorTableBias[2] = params[2];
-            ctx->Pixel.TextureColorTableBias[3] = params[3];
+            COPY_4V(ctx->Pixel.TextureColorTableBias, params);
          }
          else {
             _mesa_error(ctx, GL_INVALID_ENUM, "glColorTableParameterfv(pname)");
@@ -871,16 +815,10 @@
          break;
       case GL_POST_CONVOLUTION_COLOR_TABLE_SGI:
          if (pname == GL_COLOR_TABLE_SCALE_SGI) {
-            ctx->Pixel.PCCTscale[0] = params[0];
-            ctx->Pixel.PCCTscale[1] = params[1];
-            ctx->Pixel.PCCTscale[2] = params[2];
-            ctx->Pixel.PCCTscale[3] = params[3];
+            COPY_4V(ctx->Pixel.ColorTableScale[COLORTABLE_POSTCONVOLUTION], params);
          }
          else if (pname == GL_COLOR_TABLE_BIAS_SGI) {
-            ctx->Pixel.PCCTbias[0] = params[0];
-            ctx->Pixel.PCCTbias[1] = params[1];
-            ctx->Pixel.PCCTbias[2] = params[2];
-            ctx->Pixel.PCCTbias[3] = params[3];
+            COPY_4V(ctx->Pixel.ColorTableBias[COLORTABLE_POSTCONVOLUTION], params);
          }
          else {
             _mesa_error(ctx, GL_INVALID_ENUM, "glColorTableParameterfv(pname)");
@@ -889,16 +827,10 @@
          break;
       case GL_POST_COLOR_MATRIX_COLOR_TABLE_SGI:
          if (pname == GL_COLOR_TABLE_SCALE_SGI) {
-            ctx->Pixel.PCMCTscale[0] = params[0];
-            ctx->Pixel.PCMCTscale[1] = params[1];
-            ctx->Pixel.PCMCTscale[2] = params[2];
-            ctx->Pixel.PCMCTscale[3] = params[3];
+            COPY_4V(ctx->Pixel.ColorTableScale[COLORTABLE_POSTCOLORMATRIX], params);
          }
          else if (pname == GL_COLOR_TABLE_BIAS_SGI) {
-            ctx->Pixel.PCMCTbias[0] = params[0];
-            ctx->Pixel.PCMCTbias[1] = params[1];
-            ctx->Pixel.PCMCTbias[2] = params[2];
-            ctx->Pixel.PCMCTbias[3] = params[3];
+            COPY_4V(ctx->Pixel.ColorTableBias[COLORTABLE_POSTCOLORMATRIX], params);
          }
          else {
             _mesa_error(ctx, GL_INVALID_ENUM, "glColorTableParameterfv(pname)");
@@ -985,24 +917,18 @@
          table = &ctx->Texture.Palette;
          break;
       case GL_COLOR_TABLE:
-         table = &ctx->ColorTable;
+         table = &ctx->ColorTable[COLORTABLE_PRECONVOLUTION];
          if (pname == GL_COLOR_TABLE_SCALE_SGI) {
-            params[0] = ctx->Pixel.ColorTableScale[0];
-            params[1] = ctx->Pixel.ColorTableScale[1];
-            params[2] = ctx->Pixel.ColorTableScale[2];
-            params[3] = ctx->Pixel.ColorTableScale[3];
+            COPY_4V(params, ctx->Pixel.ColorTableScale[COLORTABLE_PRECONVOLUTION]);
             return;
          }
          else if (pname == GL_COLOR_TABLE_BIAS_SGI) {
-            params[0] = ctx->Pixel.ColorTableBias[0];
-            params[1] = ctx->Pixel.ColorTableBias[1];
-            params[2] = ctx->Pixel.ColorTableBias[2];
-            params[3] = ctx->Pixel.ColorTableBias[3];
+            COPY_4V(params, ctx->Pixel.ColorTableBias[COLORTABLE_PRECONVOLUTION]);
             return;
          }
          break;
       case GL_PROXY_COLOR_TABLE:
-         table = &ctx->ProxyColorTable;
+         table = &ctx->ProxyColorTable[COLORTABLE_PRECONVOLUTION];
          break;
       case GL_TEXTURE_COLOR_TABLE_SGI:
          if (!ctx->Extensions.SGI_texture_color_table) {
@@ -1011,17 +937,11 @@
          }
          table = &(texUnit->ColorTable);
          if (pname == GL_COLOR_TABLE_SCALE_SGI) {
-            params[0] = ctx->Pixel.TextureColorTableScale[0];
-            params[1] = ctx->Pixel.TextureColorTableScale[1];
-            params[2] = ctx->Pixel.TextureColorTableScale[2];
-            params[3] = ctx->Pixel.TextureColorTableScale[3];
+            COPY_4V(params, ctx->Pixel.TextureColorTableScale);
             return;
          }
          else if (pname == GL_COLOR_TABLE_BIAS_SGI) {
-            params[0] = ctx->Pixel.TextureColorTableBias[0];
-            params[1] = ctx->Pixel.TextureColorTableBias[1];
-            params[2] = ctx->Pixel.TextureColorTableBias[2];
-            params[3] = ctx->Pixel.TextureColorTableBias[3];
+            COPY_4V(params, ctx->Pixel.TextureColorTableBias);
             return;
          }
          break;
@@ -1033,44 +953,32 @@
          table = &(texUnit->ProxyColorTable);
          break;
       case GL_POST_CONVOLUTION_COLOR_TABLE:
-         table = &ctx->PostConvolutionColorTable;
+         table = &ctx->ColorTable[COLORTABLE_POSTCONVOLUTION];
          if (pname == GL_COLOR_TABLE_SCALE_SGI) {
-            params[0] = ctx->Pixel.PCCTscale[0];
-            params[1] = ctx->Pixel.PCCTscale[1];
-            params[2] = ctx->Pixel.PCCTscale[2];
-            params[3] = ctx->Pixel.PCCTscale[3];
+            COPY_4V(params, ctx->Pixel.ColorTableScale[COLORTABLE_POSTCONVOLUTION]);
             return;
          }
          else if (pname == GL_COLOR_TABLE_BIAS_SGI) {
-            params[0] = ctx->Pixel.PCCTbias[0];
-            params[1] = ctx->Pixel.PCCTbias[1];
-            params[2] = ctx->Pixel.PCCTbias[2];
-            params[3] = ctx->Pixel.PCCTbias[3];
+            COPY_4V(params, ctx->Pixel.ColorTableBias[COLORTABLE_POSTCONVOLUTION]);
             return;
          }
          break;
       case GL_PROXY_POST_CONVOLUTION_COLOR_TABLE:
-         table = &ctx->ProxyPostConvolutionColorTable;
+         table = &ctx->ProxyColorTable[COLORTABLE_POSTCONVOLUTION];
          break;
       case GL_POST_COLOR_MATRIX_COLOR_TABLE:
-         table = &ctx->PostColorMatrixColorTable;
+         table = &ctx->ColorTable[COLORTABLE_POSTCOLORMATRIX];
          if (pname == GL_COLOR_TABLE_SCALE_SGI) {
-            params[0] = ctx->Pixel.PCMCTscale[0];
-            params[1] = ctx->Pixel.PCMCTscale[1];
-            params[2] = ctx->Pixel.PCMCTscale[2];
-            params[3] = ctx->Pixel.PCMCTscale[3];
+            COPY_4V(params, ctx->Pixel.ColorTableScale[COLORTABLE_POSTCOLORMATRIX]);
             return;
          }
          else if (pname == GL_COLOR_TABLE_BIAS_SGI) {
-            params[0] = ctx->Pixel.PCMCTbias[0];
-            params[1] = ctx->Pixel.PCMCTbias[1];
-            params[2] = ctx->Pixel.PCMCTbias[2];
-            params[3] = ctx->Pixel.PCMCTbias[3];
+            COPY_4V(params, ctx->Pixel.ColorTableBias[COLORTABLE_POSTCOLORMATRIX]);
             return;
          }
          break;
       case GL_PROXY_POST_COLOR_MATRIX_COLOR_TABLE:
-         table = &ctx->ProxyPostColorMatrixColorTable;
+         table = &ctx->ProxyColorTable[COLORTABLE_POSTCOLORMATRIX];
          break;
       default:
          _mesa_error(ctx, GL_INVALID_ENUM, "glGetColorTableParameterfv(target)");
@@ -1159,24 +1067,26 @@
          table = &ctx->Texture.Palette;
          break;
       case GL_COLOR_TABLE:
-         table = &ctx->ColorTable;
+         table = &ctx->ColorTable[COLORTABLE_PRECONVOLUTION];
          if (pname == GL_COLOR_TABLE_SCALE_SGI) {
-            params[0] = (GLint) ctx->Pixel.ColorTableScale[0];
-            params[1] = (GLint) ctx->Pixel.ColorTableScale[1];
-            params[2] = (GLint) ctx->Pixel.ColorTableScale[2];
-            params[3] = (GLint) ctx->Pixel.ColorTableScale[3];
+            GLfloat *scale = ctx->Pixel.ColorTableScale[COLORTABLE_PRECONVOLUTION];
+            params[0] = (GLint) scale[0];
+            params[1] = (GLint) scale[1];
+            params[2] = (GLint) scale[2];
+            params[3] = (GLint) scale[3];
             return;
          }
          else if (pname == GL_COLOR_TABLE_BIAS_SGI) {
-            params[0] = (GLint) ctx->Pixel.ColorTableBias[0];
-            params[1] = (GLint) ctx->Pixel.ColorTableBias[1];
-            params[2] = (GLint) ctx->Pixel.ColorTableBias[2];
-            params[3] = (GLint) ctx->Pixel.ColorTableBias[3];
+            GLfloat *bias = ctx->Pixel.ColorTableBias[COLORTABLE_PRECONVOLUTION];
+            params[0] = (GLint) bias[0];
+            params[1] = (GLint) bias[1];
+            params[2] = (GLint) bias[2];
+            params[3] = (GLint) bias[3];
             return;
          }
          break;
       case GL_PROXY_COLOR_TABLE:
-         table = &ctx->ProxyColorTable;
+         table = &ctx->ProxyColorTable[COLORTABLE_PRECONVOLUTION];
          break;
       case GL_TEXTURE_COLOR_TABLE_SGI:
          if (!ctx->Extensions.SGI_texture_color_table) {
@@ -1207,44 +1117,48 @@
          table = &(texUnit->ProxyColorTable);
          break;
       case GL_POST_CONVOLUTION_COLOR_TABLE:
-         table = &ctx->PostConvolutionColorTable;
+         table = &ctx->ColorTable[COLORTABLE_POSTCONVOLUTION];
          if (pname == GL_COLOR_TABLE_SCALE_SGI) {
-            params[0] = (GLint) ctx->Pixel.PCCTscale[0];
-            params[1] = (GLint) ctx->Pixel.PCCTscale[1];
-            params[2] = (GLint) ctx->Pixel.PCCTscale[2];
-            params[3] = (GLint) ctx->Pixel.PCCTscale[3];
+            GLfloat *scale = ctx->Pixel.ColorTableScale[COLORTABLE_POSTCONVOLUTION];
+            params[0] = (GLint) scale[0];
+            params[1] = (GLint) scale[1];
+            params[2] = (GLint) scale[2];
+            params[3] = (GLint) scale[3];
             return;
          }
          else if (pname == GL_COLOR_TABLE_BIAS_SGI) {
-            params[0] = (GLint) ctx->Pixel.PCCTbias[0];
-            params[1] = (GLint) ctx->Pixel.PCCTbias[1];
-            params[2] = (GLint) ctx->Pixel.PCCTbias[2];
-            params[3] = (GLint) ctx->Pixel.PCCTbias[3];
+            GLfloat *bias = ctx->Pixel.ColorTableBias[COLORTABLE_POSTCONVOLUTION];
+            params[0] = (GLint) bias[0];
+            params[1] = (GLint) bias[1];
+            params[2] = (GLint) bias[2];
+            params[3] = (GLint) bias[3];
             return;
          }
          break;
       case GL_PROXY_POST_CONVOLUTION_COLOR_TABLE:
-         table = &ctx->ProxyPostConvolutionColorTable;
+         table = &ctx->ProxyColorTable[COLORTABLE_POSTCONVOLUTION];
          break;
       case GL_POST_COLOR_MATRIX_COLOR_TABLE:
-         table = &ctx->PostColorMatrixColorTable;
+         table = &ctx->ColorTable[COLORTABLE_POSTCOLORMATRIX];
          if (pname == GL_COLOR_TABLE_SCALE_SGI) {
-            params[0] = (GLint) ctx->Pixel.PCMCTscale[0];
-            params[1] = (GLint) ctx->Pixel.PCMCTscale[1];
-            params[2] = (GLint) ctx->Pixel.PCMCTscale[2];
-            params[3] = (GLint) ctx->Pixel.PCMCTscale[3];
+            GLfloat *scale = ctx->Pixel.ColorTableScale[COLORTABLE_POSTCOLORMATRIX];
+            params[0] = (GLint) scale[0];
+            params[0] = (GLint) scale[1];
+            params[0] = (GLint) scale[2];
+            params[0] = (GLint) scale[3];
             return;
          }
          else if (pname == GL_COLOR_TABLE_BIAS_SGI) {
-            params[0] = (GLint) ctx->Pixel.PCMCTbias[0];
-            params[1] = (GLint) ctx->Pixel.PCMCTbias[1];
-            params[2] = (GLint) ctx->Pixel.PCMCTbias[2];
-            params[3] = (GLint) ctx->Pixel.PCMCTbias[3];
+            GLfloat *bias = ctx->Pixel.ColorTableScale[COLORTABLE_POSTCOLORMATRIX];
+            params[0] = (GLint) bias[0];
+            params[1] = (GLint) bias[1];
+            params[2] = (GLint) bias[2];
+            params[3] = (GLint) bias[3];
             return;
          }
          break;
       case GL_PROXY_POST_COLOR_MATRIX_COLOR_TABLE:
-         table = &ctx->ProxyPostColorMatrixColorTable;
+         table = &ctx->ProxyColorTable[COLORTABLE_POSTCOLORMATRIX];
          break;
       default:
          _mesa_error(ctx, GL_INVALID_ENUM, "glGetColorTableParameteriv(target)");
@@ -1320,13 +1234,11 @@
 void
 _mesa_init_colortables( GLcontext * ctx )
 {
-   /* Color tables */
-   _mesa_init_colortable(&ctx->ColorTable);
-   _mesa_init_colortable(&ctx->ProxyColorTable);
-   _mesa_init_colortable(&ctx->PostConvolutionColorTable);
-   _mesa_init_colortable(&ctx->ProxyPostConvolutionColorTable);
-   _mesa_init_colortable(&ctx->PostColorMatrixColorTable);
-   _mesa_init_colortable(&ctx->ProxyPostColorMatrixColorTable);
+   GLuint i;
+   for (i = 0; i < COLORTABLE_MAX; i++) {
+      _mesa_init_colortable(&ctx->ColorTable[i]);
+      _mesa_init_colortable(&ctx->ProxyColorTable[i]);
+   }
 }
 
 
@@ -1336,10 +1248,9 @@
 void
 _mesa_free_colortables_data( GLcontext *ctx )
 {
-   _mesa_free_colortable_data(&ctx->ColorTable);
-   _mesa_free_colortable_data(&ctx->ProxyColorTable);
-   _mesa_free_colortable_data(&ctx->PostConvolutionColorTable);
-   _mesa_free_colortable_data(&ctx->ProxyPostConvolutionColorTable);
-   _mesa_free_colortable_data(&ctx->PostColorMatrixColorTable);
-   _mesa_free_colortable_data(&ctx->ProxyPostColorMatrixColorTable);
+   GLuint i;
+   for (i = 0; i < COLORTABLE_MAX; i++) {
+      _mesa_free_colortable_data(&ctx->ColorTable[i]);
+      _mesa_free_colortable_data(&ctx->ProxyColorTable[i]);
+   }
 }
diff --git a/src/mesa/main/enable.c b/src/mesa/main/enable.c
index 0d54c29..11b4ad6 100644
--- a/src/mesa/main/enable.c
+++ b/src/mesa/main/enable.c
@@ -663,24 +663,24 @@
       /* GL_SGI_color_table */
       case GL_COLOR_TABLE_SGI:
          CHECK_EXTENSION(SGI_color_table, cap);
-         if (ctx->Pixel.ColorTableEnabled == state)
+         if (ctx->Pixel.ColorTableEnabled[COLORTABLE_PRECONVOLUTION] == state)
             return;
          FLUSH_VERTICES(ctx, _NEW_PIXEL);
-         ctx->Pixel.ColorTableEnabled = state;
+         ctx->Pixel.ColorTableEnabled[COLORTABLE_PRECONVOLUTION] = state;
          break;
       case GL_POST_CONVOLUTION_COLOR_TABLE_SGI:
          CHECK_EXTENSION(SGI_color_table, cap);
-         if (ctx->Pixel.PostConvolutionColorTableEnabled == state)
+         if (ctx->Pixel.ColorTableEnabled[COLORTABLE_POSTCONVOLUTION] == state)
             return;
          FLUSH_VERTICES(ctx, _NEW_PIXEL);
-         ctx->Pixel.PostConvolutionColorTableEnabled = state;
+         ctx->Pixel.ColorTableEnabled[COLORTABLE_POSTCONVOLUTION] = state;
          break;
       case GL_POST_COLOR_MATRIX_COLOR_TABLE_SGI:
          CHECK_EXTENSION(SGI_color_table, cap);
-         if (ctx->Pixel.PostColorMatrixColorTableEnabled == state)
+         if (ctx->Pixel.ColorTableEnabled[COLORTABLE_POSTCOLORMATRIX] == state)
             return;
          FLUSH_VERTICES(ctx, _NEW_PIXEL);
-         ctx->Pixel.PostColorMatrixColorTableEnabled = state;
+         ctx->Pixel.ColorTableEnabled[COLORTABLE_POSTCOLORMATRIX] = state;
          break;
       case GL_TEXTURE_COLOR_TABLE_SGI:
          CHECK_EXTENSION(SGI_texture_color_table, cap);
@@ -1192,13 +1192,13 @@
       /* GL_SGI_color_table */
       case GL_COLOR_TABLE_SGI:
          CHECK_EXTENSION(SGI_color_table);
-         return ctx->Pixel.ColorTableEnabled;
+         return ctx->Pixel.ColorTableEnabled[COLORTABLE_PRECONVOLUTION];
       case GL_POST_CONVOLUTION_COLOR_TABLE_SGI:
          CHECK_EXTENSION(SGI_color_table);
-         return ctx->Pixel.PostConvolutionColorTableEnabled;
+         return ctx->Pixel.ColorTableEnabled[COLORTABLE_POSTCONVOLUTION];
       case GL_POST_COLOR_MATRIX_COLOR_TABLE_SGI:
          CHECK_EXTENSION(SGI_color_table);
-         return ctx->Pixel.PostColorMatrixColorTableEnabled;
+         return ctx->Pixel.ColorTableEnabled[COLORTABLE_POSTCOLORMATRIX];
 
       /* GL_SGI_texture_color_table */
       case GL_TEXTURE_COLOR_TABLE_SGI:
diff --git a/src/mesa/main/framebuffer.c b/src/mesa/main/framebuffer.c
index 3136a95..cd4f594 100644
--- a/src/mesa/main/framebuffer.c
+++ b/src/mesa/main/framebuffer.c
@@ -647,7 +647,7 @@
             const GLuint bufferBit = 1 << i;
             if (bufferBit & bufferMask) {
                struct gl_renderbuffer *rb = fb->Attachment[i].Renderbuffer;
-               if (rb) {
+               if (rb && rb->Width > 0 && rb->Height > 0) {
                   fb->_ColorDrawBuffers[output][count] = rb;
                   count++;
                }
@@ -673,7 +673,10 @@
 update_color_read_buffer(GLcontext *ctx, struct gl_framebuffer *fb)
 {
    (void) ctx;
-   if (fb->_ColorReadBufferIndex == -1 || fb->DeletePending) {
+   if (fb->_ColorReadBufferIndex == -1 ||
+       fb->DeletePending ||
+       fb->Width == 0 ||
+       fb->Height == 0) {
       fb->_ColorReadBuffer = NULL; /* legal! */
    }
    else {
diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c
index d09e061..eb81ee4 100644
--- a/src/mesa/main/get.c
+++ b/src/mesa/main/get.c
@@ -677,34 +677,34 @@
          params[0] = ENUM_TO_BOOLEAN(ctx->Hint.PerspectiveCorrection);
          break;
       case GL_PIXEL_MAP_A_TO_A_SIZE:
-         params[0] = INT_TO_BOOLEAN(ctx->Pixel.MapAtoAsize);
+         params[0] = INT_TO_BOOLEAN(ctx->PixelMaps.AtoA.Size);
          break;
       case GL_PIXEL_MAP_B_TO_B_SIZE:
-         params[0] = INT_TO_BOOLEAN(ctx->Pixel.MapBtoBsize);
+         params[0] = INT_TO_BOOLEAN(ctx->PixelMaps.BtoB.Size);
          break;
       case GL_PIXEL_MAP_G_TO_G_SIZE:
-         params[0] = INT_TO_BOOLEAN(ctx->Pixel.MapGtoGsize);
+         params[0] = INT_TO_BOOLEAN(ctx->PixelMaps.GtoG.Size);
          break;
       case GL_PIXEL_MAP_I_TO_A_SIZE:
-         params[0] = INT_TO_BOOLEAN(ctx->Pixel.MapItoAsize);
+         params[0] = INT_TO_BOOLEAN(ctx->PixelMaps.ItoA.Size);
          break;
       case GL_PIXEL_MAP_I_TO_B_SIZE:
-         params[0] = INT_TO_BOOLEAN(ctx->Pixel.MapItoBsize);
+         params[0] = INT_TO_BOOLEAN(ctx->PixelMaps.ItoB.Size);
          break;
       case GL_PIXEL_MAP_I_TO_G_SIZE:
-         params[0] = INT_TO_BOOLEAN(ctx->Pixel.MapItoGsize);
+         params[0] = INT_TO_BOOLEAN(ctx->PixelMaps.ItoG.Size);
          break;
       case GL_PIXEL_MAP_I_TO_I_SIZE:
-         params[0] = INT_TO_BOOLEAN(ctx->Pixel.MapItoIsize);
+         params[0] = INT_TO_BOOLEAN(ctx->PixelMaps.ItoI.Size);
          break;
       case GL_PIXEL_MAP_I_TO_R_SIZE:
-         params[0] = INT_TO_BOOLEAN(ctx->Pixel.MapItoRsize);
+         params[0] = INT_TO_BOOLEAN(ctx->PixelMaps.ItoR.Size);
          break;
       case GL_PIXEL_MAP_R_TO_R_SIZE:
-         params[0] = INT_TO_BOOLEAN(ctx->Pixel.MapRtoRsize);
+         params[0] = INT_TO_BOOLEAN(ctx->PixelMaps.RtoR.Size);
          break;
       case GL_PIXEL_MAP_S_TO_S_SIZE:
-         params[0] = INT_TO_BOOLEAN(ctx->Pixel.MapStoSsize);
+         params[0] = INT_TO_BOOLEAN(ctx->PixelMaps.StoS.Size);
          break;
       case GL_POINT_SIZE:
          params[0] = FLOAT_TO_BOOLEAN(ctx->Point.Size);
@@ -1283,15 +1283,15 @@
          break;
       case GL_COLOR_TABLE_SGI:
          CHECK_EXT1(SGI_color_table, "GetBooleanv");
-         params[0] = ctx->Pixel.ColorTableEnabled;
+         params[0] = ctx->Pixel.ColorTableEnabled[COLORTABLE_PRECONVOLUTION];
          break;
       case GL_POST_CONVOLUTION_COLOR_TABLE_SGI:
          CHECK_EXT1(SGI_color_table, "GetBooleanv");
-         params[0] = ctx->Pixel.PostConvolutionColorTableEnabled;
+         params[0] = ctx->Pixel.ColorTableEnabled[COLORTABLE_POSTCONVOLUTION];
          break;
       case GL_POST_COLOR_MATRIX_COLOR_TABLE_SGI:
          CHECK_EXT1(SGI_color_table, "GetBooleanv");
-         params[0] = ctx->Pixel.PostColorMatrixColorTableEnabled;
+         params[0] = ctx->Pixel.ColorTableEnabled[COLORTABLE_POSTCOLORMATRIX];
          break;
       case GL_TEXTURE_COLOR_TABLE_SGI:
          CHECK_EXT1(SGI_texture_color_table, "GetBooleanv");
@@ -2508,34 +2508,34 @@
          params[0] = ENUM_TO_FLOAT(ctx->Hint.PerspectiveCorrection);
          break;
       case GL_PIXEL_MAP_A_TO_A_SIZE:
-         params[0] = (GLfloat)(ctx->Pixel.MapAtoAsize);
+         params[0] = (GLfloat)(ctx->PixelMaps.AtoA.Size);
          break;
       case GL_PIXEL_MAP_B_TO_B_SIZE:
-         params[0] = (GLfloat)(ctx->Pixel.MapBtoBsize);
+         params[0] = (GLfloat)(ctx->PixelMaps.BtoB.Size);
          break;
       case GL_PIXEL_MAP_G_TO_G_SIZE:
-         params[0] = (GLfloat)(ctx->Pixel.MapGtoGsize);
+         params[0] = (GLfloat)(ctx->PixelMaps.GtoG.Size);
          break;
       case GL_PIXEL_MAP_I_TO_A_SIZE:
-         params[0] = (GLfloat)(ctx->Pixel.MapItoAsize);
+         params[0] = (GLfloat)(ctx->PixelMaps.ItoA.Size);
          break;
       case GL_PIXEL_MAP_I_TO_B_SIZE:
-         params[0] = (GLfloat)(ctx->Pixel.MapItoBsize);
+         params[0] = (GLfloat)(ctx->PixelMaps.ItoB.Size);
          break;
       case GL_PIXEL_MAP_I_TO_G_SIZE:
-         params[0] = (GLfloat)(ctx->Pixel.MapItoGsize);
+         params[0] = (GLfloat)(ctx->PixelMaps.ItoG.Size);
          break;
       case GL_PIXEL_MAP_I_TO_I_SIZE:
-         params[0] = (GLfloat)(ctx->Pixel.MapItoIsize);
+         params[0] = (GLfloat)(ctx->PixelMaps.ItoI.Size);
          break;
       case GL_PIXEL_MAP_I_TO_R_SIZE:
-         params[0] = (GLfloat)(ctx->Pixel.MapItoRsize);
+         params[0] = (GLfloat)(ctx->PixelMaps.ItoR.Size);
          break;
       case GL_PIXEL_MAP_R_TO_R_SIZE:
-         params[0] = (GLfloat)(ctx->Pixel.MapRtoRsize);
+         params[0] = (GLfloat)(ctx->PixelMaps.RtoR.Size);
          break;
       case GL_PIXEL_MAP_S_TO_S_SIZE:
-         params[0] = (GLfloat)(ctx->Pixel.MapStoSsize);
+         params[0] = (GLfloat)(ctx->PixelMaps.StoS.Size);
          break;
       case GL_POINT_SIZE:
          params[0] = ctx->Point.Size;
@@ -3114,15 +3114,15 @@
          break;
       case GL_COLOR_TABLE_SGI:
          CHECK_EXT1(SGI_color_table, "GetFloatv");
-         params[0] = BOOLEAN_TO_FLOAT(ctx->Pixel.ColorTableEnabled);
+         params[0] = BOOLEAN_TO_FLOAT(ctx->Pixel.ColorTableEnabled[COLORTABLE_PRECONVOLUTION]);
          break;
       case GL_POST_CONVOLUTION_COLOR_TABLE_SGI:
          CHECK_EXT1(SGI_color_table, "GetFloatv");
-         params[0] = BOOLEAN_TO_FLOAT(ctx->Pixel.PostConvolutionColorTableEnabled);
+         params[0] = BOOLEAN_TO_FLOAT(ctx->Pixel.ColorTableEnabled[COLORTABLE_POSTCONVOLUTION]);
          break;
       case GL_POST_COLOR_MATRIX_COLOR_TABLE_SGI:
          CHECK_EXT1(SGI_color_table, "GetFloatv");
-         params[0] = BOOLEAN_TO_FLOAT(ctx->Pixel.PostColorMatrixColorTableEnabled);
+         params[0] = BOOLEAN_TO_FLOAT(ctx->Pixel.ColorTableEnabled[COLORTABLE_POSTCOLORMATRIX]);
          break;
       case GL_TEXTURE_COLOR_TABLE_SGI:
          CHECK_EXT1(SGI_texture_color_table, "GetFloatv");
@@ -4339,34 +4339,34 @@
          params[0] = ENUM_TO_INT(ctx->Hint.PerspectiveCorrection);
          break;
       case GL_PIXEL_MAP_A_TO_A_SIZE:
-         params[0] = ctx->Pixel.MapAtoAsize;
+         params[0] = ctx->PixelMaps.AtoA.Size;
          break;
       case GL_PIXEL_MAP_B_TO_B_SIZE:
-         params[0] = ctx->Pixel.MapBtoBsize;
+         params[0] = ctx->PixelMaps.BtoB.Size;
          break;
       case GL_PIXEL_MAP_G_TO_G_SIZE:
-         params[0] = ctx->Pixel.MapGtoGsize;
+         params[0] = ctx->PixelMaps.GtoG.Size;
          break;
       case GL_PIXEL_MAP_I_TO_A_SIZE:
-         params[0] = ctx->Pixel.MapItoAsize;
+         params[0] = ctx->PixelMaps.ItoA.Size;
          break;
       case GL_PIXEL_MAP_I_TO_B_SIZE:
-         params[0] = ctx->Pixel.MapItoBsize;
+         params[0] = ctx->PixelMaps.ItoB.Size;
          break;
       case GL_PIXEL_MAP_I_TO_G_SIZE:
-         params[0] = ctx->Pixel.MapItoGsize;
+         params[0] = ctx->PixelMaps.ItoG.Size;
          break;
       case GL_PIXEL_MAP_I_TO_I_SIZE:
-         params[0] = ctx->Pixel.MapItoIsize;
+         params[0] = ctx->PixelMaps.ItoI.Size;
          break;
       case GL_PIXEL_MAP_I_TO_R_SIZE:
-         params[0] = ctx->Pixel.MapItoRsize;
+         params[0] = ctx->PixelMaps.ItoR.Size;
          break;
       case GL_PIXEL_MAP_R_TO_R_SIZE:
-         params[0] = ctx->Pixel.MapRtoRsize;
+         params[0] = ctx->PixelMaps.RtoR.Size;
          break;
       case GL_PIXEL_MAP_S_TO_S_SIZE:
-         params[0] = ctx->Pixel.MapStoSsize;
+         params[0] = ctx->PixelMaps.StoS.Size;
          break;
       case GL_POINT_SIZE:
          params[0] = IROUND(ctx->Point.Size);
@@ -4945,15 +4945,15 @@
          break;
       case GL_COLOR_TABLE_SGI:
          CHECK_EXT1(SGI_color_table, "GetIntegerv");
-         params[0] = BOOLEAN_TO_INT(ctx->Pixel.ColorTableEnabled);
+         params[0] = BOOLEAN_TO_INT(ctx->Pixel.ColorTableEnabled[COLORTABLE_PRECONVOLUTION]);
          break;
       case GL_POST_CONVOLUTION_COLOR_TABLE_SGI:
          CHECK_EXT1(SGI_color_table, "GetIntegerv");
-         params[0] = BOOLEAN_TO_INT(ctx->Pixel.PostConvolutionColorTableEnabled);
+         params[0] = BOOLEAN_TO_INT(ctx->Pixel.ColorTableEnabled[COLORTABLE_POSTCONVOLUTION]);
          break;
       case GL_POST_COLOR_MATRIX_COLOR_TABLE_SGI:
          CHECK_EXT1(SGI_color_table, "GetIntegerv");
-         params[0] = BOOLEAN_TO_INT(ctx->Pixel.PostColorMatrixColorTableEnabled);
+         params[0] = BOOLEAN_TO_INT(ctx->Pixel.ColorTableEnabled[COLORTABLE_POSTCOLORMATRIX]);
          break;
       case GL_TEXTURE_COLOR_TABLE_SGI:
          CHECK_EXT1(SGI_texture_color_table, "GetIntegerv");
diff --git a/src/mesa/main/get_gen.py b/src/mesa/main/get_gen.py
index 3e66946..33be768 100644
--- a/src/mesa/main/get_gen.py
+++ b/src/mesa/main/get_gen.py
@@ -337,16 +337,16 @@
 	( "GL_PACK_INVERT_MESA", GLboolean, ["ctx->Pack.Invert"], "", None ),
 	( "GL_PERSPECTIVE_CORRECTION_HINT", GLenum,
 	  ["ctx->Hint.PerspectiveCorrection"], "", None ),
-	( "GL_PIXEL_MAP_A_TO_A_SIZE", GLint, ["ctx->Pixel.MapAtoAsize"], "", None ),
-	( "GL_PIXEL_MAP_B_TO_B_SIZE", GLint, ["ctx->Pixel.MapBtoBsize"], "", None ),
-	( "GL_PIXEL_MAP_G_TO_G_SIZE", GLint, ["ctx->Pixel.MapGtoGsize"], "", None ),
-	( "GL_PIXEL_MAP_I_TO_A_SIZE", GLint, ["ctx->Pixel.MapItoAsize"], "", None ),
-	( "GL_PIXEL_MAP_I_TO_B_SIZE", GLint, ["ctx->Pixel.MapItoBsize"], "", None ),
-	( "GL_PIXEL_MAP_I_TO_G_SIZE", GLint, ["ctx->Pixel.MapItoGsize"], "", None ),
-	( "GL_PIXEL_MAP_I_TO_I_SIZE", GLint, ["ctx->Pixel.MapItoIsize"], "", None ),
-	( "GL_PIXEL_MAP_I_TO_R_SIZE", GLint, ["ctx->Pixel.MapItoRsize"], "", None ),
-	( "GL_PIXEL_MAP_R_TO_R_SIZE", GLint, ["ctx->Pixel.MapRtoRsize"], "", None ),
-	( "GL_PIXEL_MAP_S_TO_S_SIZE", GLint, ["ctx->Pixel.MapStoSsize"], "", None ),
+	( "GL_PIXEL_MAP_A_TO_A_SIZE", GLint, ["ctx->PixelMaps.AtoA.Size"], "", None ),
+	( "GL_PIXEL_MAP_B_TO_B_SIZE", GLint, ["ctx->PixelMaps.BtoB.Size"], "", None ),
+	( "GL_PIXEL_MAP_G_TO_G_SIZE", GLint, ["ctx->PixelMaps.GtoG.Size"], "", None ),
+	( "GL_PIXEL_MAP_I_TO_A_SIZE", GLint, ["ctx->PixelMaps.ItoA.Size"], "", None ),
+	( "GL_PIXEL_MAP_I_TO_B_SIZE", GLint, ["ctx->PixelMaps.ItoB.Size"], "", None ),
+	( "GL_PIXEL_MAP_I_TO_G_SIZE", GLint, ["ctx->PixelMaps.ItoG.Size"], "", None ),
+	( "GL_PIXEL_MAP_I_TO_I_SIZE", GLint, ["ctx->PixelMaps.ItoI.Size"], "", None ),
+	( "GL_PIXEL_MAP_I_TO_R_SIZE", GLint, ["ctx->PixelMaps.ItoR.Size"], "", None ),
+	( "GL_PIXEL_MAP_R_TO_R_SIZE", GLint, ["ctx->PixelMaps.RtoR.Size"], "", None ),
+	( "GL_PIXEL_MAP_S_TO_S_SIZE", GLint, ["ctx->PixelMaps.StoS.Size"], "", None ),
 	( "GL_POINT_SIZE", GLfloat, ["ctx->Point.Size"], "", None ),
 	( "GL_POINT_SIZE_GRANULARITY", GLfloat,
 	  ["ctx->Const.PointSizeGranularity"], "", None ),
@@ -624,11 +624,11 @@
 
 	# GL_SGI_color_table / GL_ARB_imaging
 	( "GL_COLOR_TABLE_SGI", GLboolean,
-	  ["ctx->Pixel.ColorTableEnabled"], "", ["SGI_color_table"] ),
+	  ["ctx->Pixel.ColorTableEnabled[COLORTABLE_PRECONVOLUTION]"], "", ["SGI_color_table"] ),
 	( "GL_POST_CONVOLUTION_COLOR_TABLE_SGI", GLboolean,
-	  ["ctx->Pixel.PostConvolutionColorTableEnabled"], "", ["SGI_color_table"] ),
+	  ["ctx->Pixel.ColorTableEnabled[COLORTABLE_POSTCONVOLUTION]"], "", ["SGI_color_table"] ),
 	( "GL_POST_COLOR_MATRIX_COLOR_TABLE_SGI", GLboolean,
-	  ["ctx->Pixel.PostColorMatrixColorTableEnabled"], "", ["SGI_color_table"] ),
+	  ["ctx->Pixel.ColorTableEnabled[COLORTABLE_POSTCOLORMATRIX]"], "", ["SGI_color_table"] ),
 
 	# GL_SGI_texture_color_table
 	( "GL_TEXTURE_COLOR_TABLE_SGI", GLboolean,
diff --git a/src/mesa/main/image.c b/src/mesa/main/image.c
index eb91ebb..394a7c6 100644
--- a/src/mesa/main/image.c
+++ b/src/mesa/main/image.c
@@ -1,8 +1,8 @@
 /*
  * Mesa 3-D graphics library
- * Version:  6.5.2
+ * Version:  6.5.3
  *
- * Copyright (C) 1999-2006  Brian Paul   All Rights Reserved.
+ * Copyright (C) 1999-2007  Brian Paul   All Rights Reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -60,6 +60,34 @@
 
 
 /**
+ * \return GL_TRUE if type is packed pixel type, GL_FALSE otherwise.
+ */
+static GLboolean
+_mesa_type_is_packed(GLenum type)
+    {
+        switch (type) {
+            case GL_UNSIGNED_BYTE_3_3_2:
+            case GL_UNSIGNED_BYTE_2_3_3_REV:
+            case GL_UNSIGNED_SHORT_5_6_5:
+            case GL_UNSIGNED_SHORT_5_6_5_REV:
+            case GL_UNSIGNED_SHORT_4_4_4_4:
+            case GL_UNSIGNED_SHORT_4_4_4_4_REV:
+            case GL_UNSIGNED_SHORT_5_5_5_1:
+            case GL_UNSIGNED_SHORT_1_5_5_5_REV:
+            case GL_UNSIGNED_INT_8_8_8_8:
+            case GL_UNSIGNED_INT_8_8_8_8_REV:
+            case GL_UNSIGNED_INT_10_10_10_2:
+            case GL_UNSIGNED_INT_2_10_10_10_REV:
+            case GL_UNSIGNED_SHORT_8_8_MESA:
+            case GL_UNSIGNED_SHORT_8_8_REV_MESA:
+            case GL_UNSIGNED_INT_24_8_EXT:
+                return GL_TRUE;
+         }
+
+        return GL_FALSE;
+}
+
+/**
  * Flip the 8 bits in each byte of the given array.
  *
  * \param p array.
@@ -651,39 +679,34 @@
 
 
 /**
- * Compute the stride between image rows.
+ * Compute the stride (in bytes) between image rows.
  *
  * \param packing the pixelstore attributes
  * \param width image width.
  * \param format pixel format.
  * \param type pixel data type.
  * 
- * \return the stride in bytes for the given parameters.
+ * \return the stride in bytes for the given parameters, or -1 if error
  */
 GLint
 _mesa_image_row_stride( const struct gl_pixelstore_attrib *packing,
                         GLint width, GLenum format, GLenum type )
 {
+   GLint bytesPerRow, remainder;
+
    ASSERT(packing);
+
    if (type == GL_BITMAP) {
-      /* BITMAP data */
-      GLint bytes;
       if (packing->RowLength == 0) {
-         bytes = (width + 7) / 8;
+         bytesPerRow = (width + 7) / 8;
       }
       else {
-         bytes = (packing->RowLength + 7) / 8;
+         bytesPerRow = (packing->RowLength + 7) / 8;
       }
-      if (packing->Invert) {
-         /* negate the bytes per row (negative row stride) */
-         bytes = -bytes;
-      }
-      return bytes;
    }
    else {
       /* Non-BITMAP data */
       const GLint bytesPerPixel = _mesa_bytes_per_pixel(format, type);
-      GLint bytesPerRow, remainder;
       if (bytesPerPixel <= 0)
          return -1;  /* error */
       if (packing->RowLength == 0) {
@@ -692,13 +715,19 @@
       else {
          bytesPerRow = bytesPerPixel * packing->RowLength;
       }
-      remainder = bytesPerRow % packing->Alignment;
-      if (remainder > 0)
-         bytesPerRow += (packing->Alignment - remainder);
-      if (packing->Invert)
-         bytesPerRow = -bytesPerRow;
-      return bytesPerRow;
    }
+
+   remainder = bytesPerRow % packing->Alignment;
+   if (remainder > 0) {
+      bytesPerRow += (packing->Alignment - remainder);
+   }
+
+   if (packing->Invert) {
+      /* negate the bytes per row (negative row stride) */
+      bytesPerRow = -bytesPerRow;
+   }
+
+   return bytesPerRow;
 }
 
 
@@ -1007,7 +1036,7 @@
    }
    /* GL_COLOR_TABLE lookup */
    if (transferOps & IMAGE_COLOR_TABLE_BIT) {
-      _mesa_lookup_rgba_float(&ctx->ColorTable, n, rgba);
+      _mesa_lookup_rgba_float(&ctx->ColorTable[COLORTABLE_PRECONVOLUTION], n, rgba);
    }
    /* convolution */
    if (transferOps & IMAGE_CONVOLUTION_BIT) {
@@ -1028,7 +1057,7 @@
    }
    /* GL_POST_CONVOLUTION_COLOR_TABLE lookup */
    if (transferOps & IMAGE_POST_CONVOLUTION_COLOR_TABLE_BIT) {
-      _mesa_lookup_rgba_float(&ctx->PostConvolutionColorTable, n, rgba);
+      _mesa_lookup_rgba_float(&ctx->ColorTable[COLORTABLE_POSTCONVOLUTION], n, rgba);
    }
    /* color matrix transform */
    if (transferOps & IMAGE_COLOR_MATRIX_BIT) {
@@ -1036,7 +1065,7 @@
    }
    /* GL_POST_COLOR_MATRIX_COLOR_TABLE lookup */
    if (transferOps & IMAGE_POST_COLOR_MATRIX_COLOR_TABLE_BIT) {
-      _mesa_lookup_rgba_float(&ctx->PostColorMatrixColorTable, n, rgba);
+      _mesa_lookup_rgba_float(&ctx->ColorTable[COLORTABLE_POSTCOLORMATRIX], n, rgba);
    }
    /* update histogram count */
    if (transferOps & IMAGE_HISTOGRAM_BIT) {
@@ -1100,11 +1129,11 @@
       shift_and_offset_ci(ctx, n, indexes);
    }
    if (transferOps & IMAGE_MAP_COLOR_BIT) {
-      const GLuint mask = ctx->Pixel.MapItoIsize - 1;
+      const GLuint mask = ctx->PixelMaps.ItoI.Size - 1;
       GLuint i;
       for (i = 0; i < n; i++) {
          const GLuint j = indexes[i] & mask;
-         indexes[i] = IROUND(ctx->Pixel.MapItoI[j]);
+         indexes[i] = IROUND(ctx->PixelMaps.ItoI.Map[j]);
       }
    }
 }
@@ -1140,10 +1169,10 @@
       }
    }
    if (ctx->Pixel.MapStencilFlag) {
-      GLuint mask = ctx->Pixel.MapStoSsize - 1;
+      GLuint mask = ctx->PixelMaps.StoS.Size - 1;
       GLuint i;
       for (i = 0; i < n; i++) {
-         stencil[i] = ctx->Pixel.MapStoS[ stencil[i] & mask ];
+         stencil[i] = ctx->PixelMaps.StoS.Map[ stencil[i] & mask ];
       }
    }
 }
@@ -1182,24 +1211,15 @@
 
    if (dstFormat == GL_LUMINANCE || dstFormat == GL_LUMINANCE_ALPHA) {
       /* compute luminance values */
-      if (transferOps & IMAGE_RED_TO_LUMINANCE) {
-         /* Luminance = Red (glGetTexImage) */
+      if (dstType != GL_FLOAT || ctx->Color.ClampReadColor == GL_TRUE) {
          for (i = 0; i < n; i++) {
-            luminance[i] = rgba[i][RCOMP];
+            GLfloat sum = rgba[i][RCOMP] + rgba[i][GCOMP] + rgba[i][BCOMP];
+            luminance[i] = CLAMP(sum, 0.0F, 1.0F);
          }
       }
       else {
-         /* Luminance = Red + Green + Blue (glReadPixels) */
-         if (dstType != GL_FLOAT || ctx->Color.ClampReadColor == GL_TRUE) {
-            for (i = 0; i < n; i++) {
-               GLfloat sum = rgba[i][RCOMP] + rgba[i][GCOMP] + rgba[i][BCOMP];
-               luminance[i] = CLAMP(sum, 0.0F, 1.0F);
-            }
-         }
-         else {
-            for (i = 0; i < n; i++) {
-               luminance[i] = rgba[i][RCOMP] + rgba[i][GCOMP] + rgba[i][BCOMP];
-            }
+         for (i = 0; i < n; i++) {
+            luminance[i] = rgba[i][RCOMP] + rgba[i][GCOMP] + rgba[i][BCOMP];
          }
       }
    }
@@ -1425,9 +1445,6 @@
                default:
                   _mesa_problem(ctx, "bad format in _mesa_pack_rgba_span\n");
             }
-            if (dstPacking->SwapBytes) {
-               _mesa_swap2( (GLushort *) dst, n * comps);
-            }
          }
          break;
       case GL_SHORT:
@@ -1501,9 +1518,6 @@
                default:
                   _mesa_problem(ctx, "bad format in _mesa_pack_rgba_span\n");
             }
-            if (dstPacking->SwapBytes) {
-               _mesa_swap2( (GLushort *) dst, n * comps );
-            }
          }
          break;
       case GL_UNSIGNED_INT:
@@ -1577,9 +1591,6 @@
                default:
                   _mesa_problem(ctx, "bad format in _mesa_pack_rgba_span\n");
             }
-            if (dstPacking->SwapBytes) {
-               _mesa_swap4( (GLuint *) dst, n * comps );
-            }
          }
          break;
       case GL_INT:
@@ -1653,9 +1664,6 @@
                default:
                   _mesa_problem(ctx, "bad format in _mesa_pack_rgba_span\n");
             }
-            if (dstPacking->SwapBytes) {
-               _mesa_swap4( (GLuint *) dst, n * comps );
-            }
          }
          break;
       case GL_FLOAT:
@@ -1729,9 +1737,6 @@
                default:
                   _mesa_problem(ctx, "bad format in _mesa_pack_rgba_span\n");
             }
-            if (dstPacking->SwapBytes) {
-               _mesa_swap4( (GLuint *) dst, n * comps );
-            }
          }
          break;
       case GL_HALF_FLOAT_ARB:
@@ -1805,9 +1810,6 @@
                default:
                   _mesa_problem(ctx, "bad format in _mesa_pack_rgba_span\n");
             }
-            if (dstPacking->SwapBytes) {
-               _mesa_swap2( (GLushort *) dst, n * comps );
-            }
          }
          break;
       case GL_UNSIGNED_BYTE_3_3_2:
@@ -2084,6 +2086,21 @@
          break;
       default:
          _mesa_problem(ctx, "bad type in _mesa_pack_rgba_span_float");
+         return;
+   }
+
+   if (dstPacking->SwapBytes) {
+      GLint swapSize = _mesa_sizeof_packed_type(dstType);
+      if (swapSize == 2) {
+         if (dstPacking->SwapBytes) {
+            _mesa_swap2((GLushort *) dstAddr, n * comps);
+         }
+      }
+      else if (swapSize == 4) {
+         if (dstPacking->SwapBytes) {
+            _mesa_swap4((GLuint *) dstAddr, n * comps);
+         }
+      }
    }
 }
 
@@ -3662,10 +3679,10 @@
 
          if (ctx->Pixel.MapStencilFlag) {
             /* Apply stencil lookup table */
-            GLuint mask = ctx->Pixel.MapStoSsize - 1;
+            GLuint mask = ctx->PixelMaps.StoS.Size - 1;
             GLuint i;
             for (i=0;i<n;i++) {
-               indexes[i] = ctx->Pixel.MapStoS[ indexes[i] & mask ];
+               indexes[i] = ctx->PixelMaps.StoS.Map[ indexes[i] & mask ];
             }
          }
       }
@@ -3851,6 +3868,22 @@
    }
 }
 
+#define DEPTH_VALUES(GLTYPE, GLTYPE2FLOAT)                              \
+    do {                                                                \
+        GLuint i;                                                       \
+        const GLTYPE *src = (const GLTYPE *)source;                     \
+        for (i = 0; i < n; i++) {                                       \
+            GLTYPE value = src[i];                                      \
+            if (srcPacking->SwapBytes) {                                \
+                if (sizeof(GLTYPE) == 2) {                              \
+                    SWAP2BYTE(value);                                   \
+                } else if (sizeof(GLTYPE) == 4) {                       \
+                    SWAP4BYTE(value);                                   \
+                }                                                       \
+            }                                                           \
+            depthValues[i] = CLAMP(GLTYPE2FLOAT(value), 0.0F, 1.0F);    \
+        }                                                               \
+    } while (0)
 
 void
 _mesa_unpack_depth_span( const GLcontext *ctx, GLuint n,
@@ -3872,59 +3905,23 @@
 
    switch (srcType) {
       case GL_BYTE:
-         {
-            GLuint i;
-            const GLubyte *src = (const GLubyte *) source;
-            for (i = 0; i < n; i++) {
-               depthValues[i] = BYTE_TO_FLOAT(src[i]);
-            }
-         }
-         break;
+          DEPTH_VALUES(GLbyte, BYTE_TO_FLOAT);
+          break;
       case GL_UNSIGNED_BYTE:
-         {
-            GLuint i;
-            const GLubyte *src = (const GLubyte *) source;
-            for (i = 0; i < n; i++) {
-               depthValues[i] = UBYTE_TO_FLOAT(src[i]);
-            }
-         }
-         break;
+          DEPTH_VALUES(GLubyte, UBYTE_TO_FLOAT);
+          break;
       case GL_SHORT:
-         {
-            GLuint i;
-            const GLshort *src = (const GLshort *) source;
-            for (i = 0; i < n; i++) {
-               depthValues[i] = SHORT_TO_FLOAT(src[i]);
-            }
-         }
-         break;
+          DEPTH_VALUES(GLshort, SHORT_TO_FLOAT);
+          break;
       case GL_UNSIGNED_SHORT:
-         {
-            GLuint i;
-            const GLushort *src = (const GLushort *) source;
-            for (i = 0; i < n; i++) {
-               depthValues[i] = USHORT_TO_FLOAT(src[i]);
-            }
-         }
-         break;
+          DEPTH_VALUES(GLushort, USHORT_TO_FLOAT);
+          break;
       case GL_INT:
-         {
-            GLuint i;
-            const GLint *src = (const GLint *) source;
-            for (i = 0; i < n; i++) {
-               depthValues[i] = INT_TO_FLOAT(src[i]);
-            }
-         }
-         break;
+          DEPTH_VALUES(GLint, INT_TO_FLOAT);
+          break;
       case GL_UNSIGNED_INT:
-         {
-            GLuint i;
-            const GLuint *src = (const GLuint *) source;
-            for (i = 0; i < n; i++) {
-               depthValues[i] = UINT_TO_FLOAT(src[i]);
-            }
-         }
-         break;
+          DEPTH_VALUES(GLuint, UINT_TO_FLOAT);
+          break;
       case GL_UNSIGNED_INT_24_8_EXT: /* GL_EXT_packed_depth_stencil */
          if (dstType == GL_UNSIGNED_INT &&
              depthScale == (GLfloat) 0xffffff &&
@@ -3934,7 +3931,11 @@
             GLuint *zValues = (GLuint *) dest;
             GLuint i;
             for (i = 0; i < n; i++) {
-               zValues[i] = src[i] & 0xffffff00;
+                GLuint value = src[i];
+                if (srcPacking->SwapBytes) {
+                    SWAP4BYTE(value);
+                }
+                zValues[i] = value & 0xffffff00;
             }
             return;
          }
@@ -3943,19 +3944,27 @@
             const GLfloat scale = 1.0f / 0xffffff;
             GLuint i;
             for (i = 0; i < n; i++) {
-               depthValues[i] = (src[i] >> 8) * scale;
+                GLuint value = src[i];
+                if (srcPacking->SwapBytes) {
+                    SWAP4BYTE(value);
+                }
+                depthValues[i] = (value >> 8) * scale;
             }
          }
          break;
       case GL_FLOAT:
-         _mesa_memcpy(depthValues, source, n * sizeof(GLfloat));
-         break;
+          DEPTH_VALUES(GLfloat, 1*);
+          break;
       case GL_HALF_FLOAT_ARB:
          {
             GLuint i;
             const GLhalfARB *src = (const GLhalfARB *) source;
             for (i = 0; i < n; i++) {
-               depthValues[i] = _mesa_half_to_float(src[i]);
+                GLhalfARB value = src[i];
+                if (srcPacking->SwapBytes) {
+                    SWAP2BYTE(value);
+                }
+               depthValues[i] = _mesa_half_to_float(value);
             }
          }
          break;
@@ -4186,14 +4195,18 @@
 
    if (type == GL_BITMAP) {
       bytesPerRow = (width + 7) >> 3;
-      flipBytes = !unpack->LsbFirst;
+      flipBytes = unpack->LsbFirst;
       swap2 = swap4 = GL_FALSE;
       compsPerRow = 0;
    }
    else {
       const GLint bytesPerPixel = _mesa_bytes_per_pixel(format, type);
-      const GLint components = _mesa_components_in_format(format);
+      GLint components = _mesa_components_in_format(format);
       GLint bytesPerComp;
+
+      if (_mesa_type_is_packed(type))
+          components = 1;
+
       if (bytesPerPixel <= 0 || components <= 0)
          return NULL;   /* bad format or type.  generate error later */
       bytesPerRow = bytesPerPixel * width;
@@ -4218,7 +4231,61 @@
          for (row = 0; row < height; row++) {
             const GLvoid *src = _mesa_image_address(dimensions, unpack, pixels,
                                width, height, format, type, img, row, 0);
-            _mesa_memcpy(dst, src, bytesPerRow);
+
+                if ((type == GL_BITMAP) && (unpack->SkipPixels & 0x7)) {
+                    GLint i;
+                    flipBytes = GL_FALSE;
+                    if (unpack->LsbFirst) {
+                            GLubyte srcMask = 1 << (unpack->SkipPixels & 0x7);
+                            GLubyte dstMask = 128;
+                            const GLubyte *s = src;
+                            GLubyte *d = dst;
+                            *d = 0;
+                            for (i = 0; i < width; i++) {
+                                if (*s & srcMask) {
+                                    *d |= dstMask;
+                                }      
+                                if (srcMask == 128) {
+                                    srcMask = 1;
+                                    s++;
+                                } else {
+                                    srcMask = srcMask << 1;
+                                }
+                                if (dstMask == 1) {
+                                    dstMask = 128;
+                                    d++;
+                                    *d = 0;
+                                } else {
+                                    dstMask = dstMask >> 1;
+                                }
+                            }
+                    } else {
+                        GLubyte srcMask = 128 >> (unpack->SkipPixels & 0x7);
+                        GLubyte dstMask = 128;
+                        const GLubyte *s = src;
+                        GLubyte *d = dst;
+                        *d = 0;
+                        for (i = 0; i < width; i++) {
+                            if (*s & srcMask) {
+                                *d |= dstMask;
+                            }
+                            if (srcMask == 1) {
+                                srcMask = 128;
+                                s++;
+                            } else {
+                                srcMask = srcMask >> 1;
+                            }
+                            if (dstMask == 1) {
+                                dstMask = 128;
+                                d++;
+                                *d = 0;
+                            } else {
+                                dstMask = dstMask >> 1;
+                            }      
+                        }
+                    }
+                } else
+                    _mesa_memcpy(dst, src, bytesPerRow);
             /* byte flipping/swapping */
             if (flipBytes) {
                flip_bytes((GLubyte *) dst, bytesPerRow);
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 25a5a3c..0c9bf20 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -381,6 +381,13 @@
                             BUFFER_BIT_COLOR7)
 
 
+/** The pixel transfer path has three color tables: */
+/*@{*/
+#define COLORTABLE_PRECONVOLUTION  0
+#define COLORTABLE_POSTCONVOLUTION 1
+#define COLORTABLE_POSTCOLORMATRIX 2
+#define COLORTABLE_MAX 3
+/*@}*/
 
 
 /**
@@ -661,9 +668,7 @@
    GLboolean Blend;
    GLbitfield ClipPlanes;
    GLboolean ColorMaterial;
-   GLboolean ColorTable;                /* SGI_color_table */
-   GLboolean PostColorMatrixColorTable; /* SGI_color_table */
-   GLboolean PostConvolutionColorTable; /* SGI_color_table */
+   GLboolean ColorTable[COLORTABLE_MAX];
    GLboolean Convolution1D;
    GLboolean Convolution2D;
    GLboolean Separable2D;
@@ -963,74 +968,91 @@
 
 
 /**
+ * A pixelmap (see glPixelMap)
+ */
+struct gl_pixelmap
+{
+   GLint Size;
+   GLfloat Map[MAX_PIXEL_MAP_TABLE];
+   GLubyte Map8[MAX_PIXEL_MAP_TABLE];  /**< converted to 8-bit color */
+};
+
+
+/**
+ * Collection of all pixelmaps
+ */
+struct gl_pixelmaps
+{
+   struct gl_pixelmap RtoR;  /**< i.e. GL_PIXEL_MAP_R_TO_R */
+   struct gl_pixelmap GtoG;
+   struct gl_pixelmap BtoB;
+   struct gl_pixelmap AtoA;
+   struct gl_pixelmap ItoR;
+   struct gl_pixelmap ItoG;
+   struct gl_pixelmap ItoB;
+   struct gl_pixelmap ItoA;
+   struct gl_pixelmap ItoI;
+   struct gl_pixelmap StoS;
+};
+
+
+/**
  * Pixel attribute group (GL_PIXEL_MODE_BIT).
  */
 struct gl_pixel_attrib
 {
    GLenum ReadBuffer;		/**< source buffer for glRead/CopyPixels() */
+
+   /*--- Begin Pixel Transfer State ---*/
+   /* Fields are in the order in which they're applied... */
+
+   /* Scale & Bias (index shift, offset) */
    GLfloat RedBias, RedScale;
    GLfloat GreenBias, GreenScale;
    GLfloat BlueBias, BlueScale;
    GLfloat AlphaBias, AlphaScale;
    GLfloat DepthBias, DepthScale;
    GLint IndexShift, IndexOffset;
+
+   /* Pixel Maps */
+   /* Note: actual pixel maps are not part of this attrib group */
    GLboolean MapColorFlag;
    GLboolean MapStencilFlag;
-   GLfloat ZoomX, ZoomY;
-   /* XXX move these out of gl_pixel_attrib */
-   GLint MapStoSsize;		/**< Size of each pixel map */
-   GLint MapItoIsize;
-   GLint MapItoRsize;
-   GLint MapItoGsize;
-   GLint MapItoBsize;
-   GLint MapItoAsize;
-   GLint MapRtoRsize;
-   GLint MapGtoGsize;
-   GLint MapBtoBsize;
-   GLint MapAtoAsize;
-   GLint MapStoS[MAX_PIXEL_MAP_TABLE];	/**< Pixel map tables */
-   GLfloat MapItoI[MAX_PIXEL_MAP_TABLE];
-   GLfloat MapItoR[MAX_PIXEL_MAP_TABLE];
-   GLfloat MapItoG[MAX_PIXEL_MAP_TABLE];
-   GLfloat MapItoB[MAX_PIXEL_MAP_TABLE];
-   GLfloat MapItoA[MAX_PIXEL_MAP_TABLE];
-   GLubyte MapItoR8[MAX_PIXEL_MAP_TABLE];  /**< converted to 8-bit color */
-   GLubyte MapItoG8[MAX_PIXEL_MAP_TABLE];
-   GLubyte MapItoB8[MAX_PIXEL_MAP_TABLE];
-   GLubyte MapItoA8[MAX_PIXEL_MAP_TABLE];
-   GLfloat MapRtoR[MAX_PIXEL_MAP_TABLE];
-   GLfloat MapGtoG[MAX_PIXEL_MAP_TABLE];
-   GLfloat MapBtoB[MAX_PIXEL_MAP_TABLE];
-   GLfloat MapAtoA[MAX_PIXEL_MAP_TABLE];
-   /** GL_EXT_histogram */
-   GLboolean HistogramEnabled;
-   GLboolean MinMaxEnabled;
-   /** GL_SGI_color_matrix */
-   GLfloat PostColorMatrixScale[4];  /**< RGBA */
-   GLfloat PostColorMatrixBias[4];   /**< RGBA */
-   /** GL_SGI_color_table */
-   GLfloat ColorTableScale[4];
-   GLfloat ColorTableBias[4];
-   GLboolean ColorTableEnabled;
-   GLfloat PCCTscale[4];
-   GLfloat PCCTbias[4];
-   GLboolean PostConvolutionColorTableEnabled;
-   GLfloat PCMCTscale[4];
-   GLfloat PCMCTbias[4];
-   GLboolean PostColorMatrixColorTableEnabled;
-   /** GL_SGI_texture_color_table */
-   GLfloat TextureColorTableScale[4];
-   GLfloat TextureColorTableBias[4];
-   /** Convolution */
+
+   /* There are multiple color table stages: */
+   GLboolean ColorTableEnabled[COLORTABLE_MAX];
+   GLfloat ColorTableScale[COLORTABLE_MAX][4];  /**< RGBA */
+   GLfloat ColorTableBias[COLORTABLE_MAX][4];   /**< RGBA */
+
+   /* Convolution (GL_EXT_convolution) */
    GLboolean Convolution1DEnabled;
    GLboolean Convolution2DEnabled;
    GLboolean Separable2DEnabled;
    GLfloat ConvolutionBorderColor[3][4];
    GLenum ConvolutionBorderMode[3];
-   GLfloat ConvolutionFilterScale[3][4];
-   GLfloat ConvolutionFilterBias[3][4];
+   GLfloat ConvolutionFilterScale[3][4];  /**< RGBA */
+   GLfloat ConvolutionFilterBias[3][4];   /**< RGBA */
    GLfloat PostConvolutionScale[4];  /**< RGBA */
    GLfloat PostConvolutionBias[4];   /**< RGBA */
+
+   /* Color matrix (GL_SGI_color_matrix) */
+   /* Note: the color matrix is not part of this attrib group */
+   GLfloat PostColorMatrixScale[4];  /**< RGBA */
+   GLfloat PostColorMatrixBias[4];   /**< RGBA */
+
+   /* Histogram & minmax (GL_EXT_histogram) */
+   /* Note: histogram and minmax data are not part of this attrib group */
+   GLboolean HistogramEnabled;
+   GLboolean MinMaxEnabled;
+
+   /*--- End Pixel Transfer State ---*/
+
+   /* Pixel Zoom */
+   GLfloat ZoomX, ZoomY;
+
+   /** GL_SGI_texture_color_table */
+   GLfloat TextureColorTableScale[4];
+   GLfloat TextureColorTableBias[4];
 };
 
 
@@ -2213,7 +2235,7 @@
    GLubyte IndexBits;
    GLubyte DepthBits;
    GLubyte StencilBits;
-   GLvoid *Data;
+   GLvoid *Data;        /**< This may not be used by some kinds of RBs */
 
    /* Used to wrap one renderbuffer around another: */
    struct gl_renderbuffer *Wrapped;
@@ -2614,7 +2636,6 @@
 #define IMAGE_HISTOGRAM_BIT                       0x200
 #define IMAGE_MIN_MAX_BIT                         0x400
 #define IMAGE_CLAMP_BIT                           0x800 /* extra */
-#define IMAGE_RED_TO_LUMINANCE                    0x1000
 
 
 /** Pixel Transfer ops up to convolution */
@@ -2966,6 +2987,7 @@
 
    /** \name Other assorted state (not pushed/popped on attribute stack) */
    /*@{*/
+   struct gl_pixelmaps          PixelMaps;
    struct gl_histogram_attrib	Histogram;
    struct gl_minmax_attrib	MinMax;
    struct gl_convolution_attrib Convolution1D;
@@ -2976,12 +2998,14 @@
    struct gl_feedback   Feedback;  /**< Feedback */
    struct gl_selection  Select;    /**< Selection */
 
-   struct gl_color_table ColorTable;       /**< Pre-convolution */
-   struct gl_color_table ProxyColorTable;  /**< Pre-convolution */
+   struct gl_color_table ColorTable[COLORTABLE_MAX];
+   struct gl_color_table ProxyColorTable[COLORTABLE_MAX];
+#if 0
    struct gl_color_table PostConvolutionColorTable;
    struct gl_color_table ProxyPostConvolutionColorTable;
    struct gl_color_table PostColorMatrixColorTable;
    struct gl_color_table ProxyPostColorMatrixColorTable;
+#endif
 
    struct gl_program_state Program;        /**< for vertex or fragment progs */
    struct gl_vertex_program_state VertexProgram;   /**< GL_ARB/NV_vertex_program */
diff --git a/src/mesa/main/pixel.c b/src/mesa/main/pixel.c
index de5c7fc..eb4fd6e 100644
--- a/src/mesa/main/pixel.c
+++ b/src/mesa/main/pixel.c
@@ -252,85 +252,76 @@
 /*****                         glPixelMap                         *****/
 /**********************************************************************/
 
+/**
+ * Return pointer to a pixelmap by name.
+ */
+static struct gl_pixelmap *
+get_pixelmap(GLcontext *ctx, GLenum map)
+{
+   switch (map) {
+   case GL_PIXEL_MAP_I_TO_I:
+      return &ctx->PixelMaps.ItoI;
+   case GL_PIXEL_MAP_S_TO_S:
+      return &ctx->PixelMaps.StoS;
+   case GL_PIXEL_MAP_I_TO_R:
+      return &ctx->PixelMaps.ItoR;
+   case GL_PIXEL_MAP_I_TO_G:
+      return &ctx->PixelMaps.ItoG;
+   case GL_PIXEL_MAP_I_TO_B:
+      return &ctx->PixelMaps.ItoB;
+   case GL_PIXEL_MAP_I_TO_A:
+      return &ctx->PixelMaps.ItoA;
+   case GL_PIXEL_MAP_R_TO_R:
+      return &ctx->PixelMaps.RtoR;
+   case GL_PIXEL_MAP_G_TO_G:
+      return &ctx->PixelMaps.GtoG;
+   case GL_PIXEL_MAP_B_TO_B:
+      return &ctx->PixelMaps.BtoB;
+   case GL_PIXEL_MAP_A_TO_A:
+      return &ctx->PixelMaps.AtoA;
+   default:
+      return NULL;
+   }
+}
+
 
 /**
  * Helper routine used by the other _mesa_PixelMap() functions.
  */
 static void
-pixelmap(GLcontext *ctx, GLenum map, GLsizei mapsize, const GLfloat *values)
+store_pixelmap(GLcontext *ctx, GLenum map, GLsizei mapsize,
+               const GLfloat *values)
 {
    GLint i;
+   struct gl_pixelmap *pm = get_pixelmap(ctx, map);
+   if (!pm) {
+      _mesa_error(ctx, GL_INVALID_ENUM, "glPixelMap(map)");
+      return;
+   }
+
    switch (map) {
-      case GL_PIXEL_MAP_S_TO_S:
-         ctx->Pixel.MapStoSsize = mapsize;
-         for (i = 0; i < mapsize; i++) {
-	    ctx->Pixel.MapStoS[i] = IROUND(values[i]);
-	 }
-	 break;
-      case GL_PIXEL_MAP_I_TO_I:
-         ctx->Pixel.MapItoIsize = mapsize;
-         for (i = 0; i < mapsize; i++) {
-	    ctx->Pixel.MapItoI[i] = values[i];
-	 }
-	 break;
-      case GL_PIXEL_MAP_I_TO_R:
-         ctx->Pixel.MapItoRsize = mapsize;
-         for (i = 0; i < mapsize; i++) {
-            GLfloat val = CLAMP( values[i], 0.0F, 1.0F );
-	    ctx->Pixel.MapItoR[i] = val;
-	    ctx->Pixel.MapItoR8[i] = (GLint) (val * 255.0F);
-	 }
-	 break;
-      case GL_PIXEL_MAP_I_TO_G:
-         ctx->Pixel.MapItoGsize = mapsize;
-         for (i = 0; i < mapsize; i++) {
-            GLfloat val = CLAMP( values[i], 0.0F, 1.0F );
-	    ctx->Pixel.MapItoG[i] = val;
-	    ctx->Pixel.MapItoG8[i] = (GLint) (val * 255.0F);
-	 }
-	 break;
-      case GL_PIXEL_MAP_I_TO_B:
-         ctx->Pixel.MapItoBsize = mapsize;
-         for (i = 0; i < mapsize; i++) {
-            GLfloat val = CLAMP( values[i], 0.0F, 1.0F );
-	    ctx->Pixel.MapItoB[i] = val;
-	    ctx->Pixel.MapItoB8[i] = (GLint) (val * 255.0F);
-	 }
-	 break;
-      case GL_PIXEL_MAP_I_TO_A:
-         ctx->Pixel.MapItoAsize = mapsize;
-         for (i = 0; i < mapsize; i++) {
-            GLfloat val = CLAMP( values[i], 0.0F, 1.0F );
-	    ctx->Pixel.MapItoA[i] = val;
-	    ctx->Pixel.MapItoA8[i] = (GLint) (val * 255.0F);
-	 }
-	 break;
-      case GL_PIXEL_MAP_R_TO_R:
-         ctx->Pixel.MapRtoRsize = mapsize;
-         for (i = 0; i < mapsize; i++) {
-	    ctx->Pixel.MapRtoR[i] = CLAMP( values[i], 0.0F, 1.0F );
-	 }
-	 break;
-      case GL_PIXEL_MAP_G_TO_G:
-         ctx->Pixel.MapGtoGsize = mapsize;
-         for (i = 0; i < mapsize; i++) {
-	    ctx->Pixel.MapGtoG[i] = CLAMP( values[i], 0.0F, 1.0F );
-	 }
-	 break;
-      case GL_PIXEL_MAP_B_TO_B:
-         ctx->Pixel.MapBtoBsize = mapsize;
-         for (i = 0; i < mapsize; i++) {
-	    ctx->Pixel.MapBtoB[i] = CLAMP( values[i], 0.0F, 1.0F );
-	 }
-	 break;
-      case GL_PIXEL_MAP_A_TO_A:
-         ctx->Pixel.MapAtoAsize = mapsize;
-         for (i = 0; i < mapsize; i++) {
-	    ctx->Pixel.MapAtoA[i] = CLAMP( values[i], 0.0F, 1.0F );
-	 }
-	 break;
-      default:
-         _mesa_error( ctx, GL_INVALID_ENUM, "glPixelMap(map)" );
+   case GL_PIXEL_MAP_S_TO_S:
+      /* special case */
+      ctx->PixelMaps.StoS.Size = mapsize;
+      for (i = 0; i < mapsize; i++) {
+         ctx->PixelMaps.StoS.Map[i] = IROUND(values[i]);
+      }
+      break;
+   case GL_PIXEL_MAP_I_TO_I:
+      /* special case */
+      ctx->PixelMaps.ItoI.Size = mapsize;
+      for (i = 0; i < mapsize; i++) {
+         ctx->PixelMaps.ItoI.Map[i] = values[i];
+      }
+      break;
+   default:
+      /* general case */
+      pm->Size = mapsize;
+      for (i = 0; i < mapsize; i++) {
+         GLfloat val = CLAMP(values[i], 0.0F, 1.0F);
+         pm->Map[i] = val;
+         pm->Map8[i] = (GLint) (val * 255.0F);
+      }
    }
 }
 
@@ -385,7 +376,7 @@
       return;
    }
 
-   pixelmap(ctx, map, mapsize, values);
+   store_pixelmap(ctx, map, mapsize, values);
 
    if (ctx->Unpack.BufferObj->Name) {
       ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
@@ -394,7 +385,6 @@
 }
 
 
-
 void GLAPIENTRY
 _mesa_PixelMapuiv(GLenum map, GLsizei mapsize, const GLuint *values )
 {
@@ -464,11 +454,10 @@
                               ctx->Unpack.BufferObj);
    }
 
-   pixelmap(ctx, map, mapsize, fvalues);
+   store_pixelmap(ctx, map, mapsize, fvalues);
 }
 
 
-
 void GLAPIENTRY
 _mesa_PixelMapusv(GLenum map, GLsizei mapsize, const GLushort *values )
 {
@@ -520,7 +509,7 @@
       return;
    }
 
-    /* convert to floats */
+   /* convert to floats */
    if (map == GL_PIXEL_MAP_I_TO_I || map == GL_PIXEL_MAP_S_TO_S) {
       GLint i;
       for (i = 0; i < mapsize; i++) {
@@ -539,40 +528,7 @@
                               ctx->Unpack.BufferObj);
    }
 
-   pixelmap(ctx, map, mapsize, fvalues);
-}
-
-
-/**
- * Return size of the named map.
- */
-static GLuint
-get_map_size(GLcontext *ctx, GLenum map)
-{
-   switch (map) {
-      case GL_PIXEL_MAP_I_TO_I:
-         return ctx->Pixel.MapItoIsize;
-      case GL_PIXEL_MAP_S_TO_S:
-         return ctx->Pixel.MapStoSsize;
-      case GL_PIXEL_MAP_I_TO_R:
-         return ctx->Pixel.MapItoRsize;
-      case GL_PIXEL_MAP_I_TO_G:
-         return ctx->Pixel.MapItoGsize;
-      case GL_PIXEL_MAP_I_TO_B:
-         return ctx->Pixel.MapItoBsize;
-      case GL_PIXEL_MAP_I_TO_A:
-         return ctx->Pixel.MapItoAsize;
-      case GL_PIXEL_MAP_R_TO_R:
-         return ctx->Pixel.MapRtoRsize;
-      case GL_PIXEL_MAP_G_TO_G:
-         return ctx->Pixel.MapGtoGsize;
-      case GL_PIXEL_MAP_B_TO_B:
-         return ctx->Pixel.MapBtoBsize;
-      case GL_PIXEL_MAP_A_TO_A:
-         return ctx->Pixel.MapAtoAsize;
-      default:
-         return 0;
-   }
+   store_pixelmap(ctx, map, mapsize, fvalues);
 }
 
 
@@ -581,9 +537,17 @@
 {
    GET_CURRENT_CONTEXT(ctx);
    GLuint mapsize, i;
+   const struct gl_pixelmap *pm;
+
    ASSERT_OUTSIDE_BEGIN_END(ctx);
 
-   mapsize = get_map_size(ctx, map);
+   pm = get_pixelmap(ctx, map);
+   if (!pm) {
+      _mesa_error(ctx, GL_INVALID_ENUM, "glGetPixelMapfv(map)");
+      return;
+   }
+
+   mapsize = pm->Size;
 
    if (ctx->Pack.BufferObj->Name) {
       /* pack pixelmap into PBO */
@@ -613,41 +577,14 @@
       return;
    }
 
-   switch (map) {
-      case GL_PIXEL_MAP_I_TO_I:
-         MEMCPY(values, ctx->Pixel.MapItoI, mapsize * sizeof(GLfloat));
-	 break;
-      case GL_PIXEL_MAP_S_TO_S:
-         for (i = 0; i < mapsize; i++) {
-	    values[i] = (GLfloat) ctx->Pixel.MapStoS[i];
-	 }
-	 break;
-      case GL_PIXEL_MAP_I_TO_R:
-         MEMCPY(values, ctx->Pixel.MapItoR, mapsize * sizeof(GLfloat));
-	 break;
-      case GL_PIXEL_MAP_I_TO_G:
-         MEMCPY(values, ctx->Pixel.MapItoG, mapsize * sizeof(GLfloat));
-	 break;
-      case GL_PIXEL_MAP_I_TO_B:
-         MEMCPY(values, ctx->Pixel.MapItoB, mapsize * sizeof(GLfloat));
-	 break;
-      case GL_PIXEL_MAP_I_TO_A:
-         MEMCPY(values, ctx->Pixel.MapItoA, mapsize * sizeof(GLfloat));
-	 break;
-      case GL_PIXEL_MAP_R_TO_R:
-         MEMCPY(values, ctx->Pixel.MapRtoR, mapsize * sizeof(GLfloat));
-	 break;
-      case GL_PIXEL_MAP_G_TO_G:
-         MEMCPY(values, ctx->Pixel.MapGtoG, mapsize * sizeof(GLfloat));
-	 break;
-      case GL_PIXEL_MAP_B_TO_B:
-         MEMCPY(values, ctx->Pixel.MapBtoB, mapsize * sizeof(GLfloat));
-	 break;
-      case GL_PIXEL_MAP_A_TO_A:
-         MEMCPY(values, ctx->Pixel.MapAtoA, mapsize * sizeof(GLfloat));
-	 break;
-      default:
-         _mesa_error( ctx, GL_INVALID_ENUM, "glGetPixelMapfv" );
+   if (map == GL_PIXEL_MAP_S_TO_S) {
+      /* special case */
+      for (i = 0; i < mapsize; i++) {
+         values[i] = (GLfloat) ctx->PixelMaps.StoS.Map[i];
+      }
+   }
+   else {
+      MEMCPY(values, pm->Map, mapsize * sizeof(GLfloat));
    }
 
    if (ctx->Pack.BufferObj->Name) {
@@ -662,9 +599,16 @@
 {
    GET_CURRENT_CONTEXT(ctx);
    GLint mapsize, i;
+   const struct gl_pixelmap *pm;
+
    ASSERT_OUTSIDE_BEGIN_END(ctx);
 
-   mapsize = get_map_size(ctx, map);
+   pm = get_pixelmap(ctx, map);
+   if (!pm) {
+      _mesa_error(ctx, GL_INVALID_ENUM, "glGetPixelMapuiv(map)");
+      return;
+   }
+   mapsize = pm->Size;
 
    if (ctx->Pack.BufferObj->Name) {
       /* pack pixelmap into PBO */
@@ -694,57 +638,14 @@
       return;
    }
 
-   switch (map) {
-      case GL_PIXEL_MAP_I_TO_I:
-	 for (i = 0; i < mapsize; i++) {
-	    values[i] = FLOAT_TO_UINT( ctx->Pixel.MapItoI[i] );
-	 }
-	 break;
-      case GL_PIXEL_MAP_S_TO_S:
-         MEMCPY(values, ctx->Pixel.MapStoS, mapsize * sizeof(GLint));
-	 break;
-      case GL_PIXEL_MAP_I_TO_R:
-	 for (i = 0; i < mapsize; i++) {
-	    values[i] = FLOAT_TO_UINT( ctx->Pixel.MapItoR[i] );
-	 }
-	 break;
-      case GL_PIXEL_MAP_I_TO_G:
-	 for (i = 0; i < mapsize; i++) {
-	    values[i] = FLOAT_TO_UINT( ctx->Pixel.MapItoG[i] );
-	 }
-	 break;
-      case GL_PIXEL_MAP_I_TO_B:
-	 for (i = 0; i < mapsize; i++) {
-	    values[i] = FLOAT_TO_UINT( ctx->Pixel.MapItoB[i] );
-	 }
-	 break;
-      case GL_PIXEL_MAP_I_TO_A:
-	 for (i = 0; i < mapsize; i++) {
-	    values[i] = FLOAT_TO_UINT( ctx->Pixel.MapItoA[i] );
-	 }
-	 break;
-      case GL_PIXEL_MAP_R_TO_R:
-	 for (i = 0; i < mapsize; i++) {
-	    values[i] = FLOAT_TO_UINT( ctx->Pixel.MapRtoR[i] );
-	 }
-	 break;
-      case GL_PIXEL_MAP_G_TO_G:
-	 for (i = 0; i < mapsize; i++) {
-	    values[i] = FLOAT_TO_UINT( ctx->Pixel.MapGtoG[i] );
-	 }
-	 break;
-      case GL_PIXEL_MAP_B_TO_B:
-	 for (i = 0; i < mapsize; i++) {
-	    values[i] = FLOAT_TO_UINT( ctx->Pixel.MapBtoB[i] );
-	 }
-	 break;
-      case GL_PIXEL_MAP_A_TO_A:
-	 for (i = 0; i < mapsize; i++) {
-	    values[i] = FLOAT_TO_UINT( ctx->Pixel.MapAtoA[i] );
-	 }
-	 break;
-      default:
-         _mesa_error( ctx, GL_INVALID_ENUM, "glGetPixelMapfv" );
+   if (map == GL_PIXEL_MAP_S_TO_S) {
+      /* special case */
+      MEMCPY(values, ctx->PixelMaps.StoS.Map, mapsize * sizeof(GLint));
+   }
+   else {
+      for (i = 0; i < mapsize; i++) {
+         values[i] = FLOAT_TO_UINT( pm->Map[i] );
+      }
    }
 
    if (ctx->Pack.BufferObj->Name) {
@@ -759,9 +660,16 @@
 {
    GET_CURRENT_CONTEXT(ctx);
    GLint mapsize, i;
+   const struct gl_pixelmap *pm;
+
    ASSERT_OUTSIDE_BEGIN_END(ctx);
 
-   mapsize = get_map_size(ctx, map);
+   pm = get_pixelmap(ctx, map);
+   if (!pm) {
+      _mesa_error(ctx, GL_INVALID_ENUM, "glGetPixelMapusv(map)");
+      return;
+   }
+   mapsize = pm ? pm->Size : 0;
 
    if (ctx->Pack.BufferObj->Name) {
       /* pack pixelmap into PBO */
@@ -793,58 +701,21 @@
    }
 
    switch (map) {
-      case GL_PIXEL_MAP_I_TO_I:
-	 for (i = 0; i < mapsize; i++) {
-            values[i] = (GLushort) CLAMP(ctx->Pixel.MapItoI[i], 0.0, 65535.0);
-	 }
-	 break;
-      case GL_PIXEL_MAP_S_TO_S:
-	 for (i = 0; i < mapsize; i++) {
-            values[i] = (GLushort) CLAMP(ctx->Pixel.MapStoS[i], 0.0, 65535.0);
-	 }
-	 break;
-      case GL_PIXEL_MAP_I_TO_R:
-	 for (i = 0; i < mapsize; i++) {
-	    CLAMPED_FLOAT_TO_USHORT(values[i] , ctx->Pixel.MapItoR[i] );
-	 }
-	 break;
-      case GL_PIXEL_MAP_I_TO_G:
-	 for (i = 0; i < mapsize; i++) {
-	    CLAMPED_FLOAT_TO_USHORT(values[i] , ctx->Pixel.MapItoG[i] );
-	 }
-	 break;
-      case GL_PIXEL_MAP_I_TO_B:
-	 for (i = 0; i < mapsize; i++) {
-	    CLAMPED_FLOAT_TO_USHORT(values[i] , ctx->Pixel.MapItoB[i] );
-	 }
-	 break;
-      case GL_PIXEL_MAP_I_TO_A:
-	 for (i = 0; i < mapsize; i++) {
-	    CLAMPED_FLOAT_TO_USHORT(values[i] , ctx->Pixel.MapItoA[i] );
-	 }
-	 break;
-      case GL_PIXEL_MAP_R_TO_R:
-	 for (i = 0; i < mapsize; i++) {
-	    CLAMPED_FLOAT_TO_USHORT(values[i] , ctx->Pixel.MapRtoR[i] );
-	 }
-	 break;
-      case GL_PIXEL_MAP_G_TO_G:
-	 for (i = 0; i < mapsize; i++) {
-	    CLAMPED_FLOAT_TO_USHORT(values[i] , ctx->Pixel.MapGtoG[i] );
-	 }
-	 break;
-      case GL_PIXEL_MAP_B_TO_B:
-	 for (i = 0; i < mapsize; i++) {
-	    CLAMPED_FLOAT_TO_USHORT(values[i] , ctx->Pixel.MapBtoB[i] );
-	 }
-	 break;
-      case GL_PIXEL_MAP_A_TO_A:
-	 for (i = 0; i < mapsize; i++) {
-	    CLAMPED_FLOAT_TO_USHORT(values[i] , ctx->Pixel.MapAtoA[i] );
-	 }
-	 break;
-      default:
-         _mesa_error( ctx, GL_INVALID_ENUM, "glGetPixelMapfv" );
+   /* special cases */
+   case GL_PIXEL_MAP_I_TO_I:
+      for (i = 0; i < mapsize; i++) {
+         values[i] = (GLushort) CLAMP(ctx->PixelMaps.ItoI.Map[i], 0.0, 65535.);
+      }
+      break;
+   case GL_PIXEL_MAP_S_TO_S:
+      for (i = 0; i < mapsize; i++) {
+         values[i] = (GLushort) CLAMP(ctx->PixelMaps.StoS.Map[i], 0.0, 65535.);
+      }
+      break;
+   default:
+      for (i = 0; i < mapsize; i++) {
+         CLAMPED_FLOAT_TO_USHORT(values[i], pm->Map[i] );
+      }
    }
 
    if (ctx->Pack.BufferObj->Name) {
@@ -1113,14 +984,14 @@
 void
 _mesa_map_rgba( const GLcontext *ctx, GLuint n, GLfloat rgba[][4] )
 {
-   const GLfloat rscale = (GLfloat) (ctx->Pixel.MapRtoRsize - 1);
-   const GLfloat gscale = (GLfloat) (ctx->Pixel.MapGtoGsize - 1);
-   const GLfloat bscale = (GLfloat) (ctx->Pixel.MapBtoBsize - 1);
-   const GLfloat ascale = (GLfloat) (ctx->Pixel.MapAtoAsize - 1);
-   const GLfloat *rMap = ctx->Pixel.MapRtoR;
-   const GLfloat *gMap = ctx->Pixel.MapGtoG;
-   const GLfloat *bMap = ctx->Pixel.MapBtoB;
-   const GLfloat *aMap = ctx->Pixel.MapAtoA;
+   const GLfloat rscale = (GLfloat) (ctx->PixelMaps.RtoR.Size - 1);
+   const GLfloat gscale = (GLfloat) (ctx->PixelMaps.GtoG.Size - 1);
+   const GLfloat bscale = (GLfloat) (ctx->PixelMaps.BtoB.Size - 1);
+   const GLfloat ascale = (GLfloat) (ctx->PixelMaps.AtoA.Size - 1);
+   const GLfloat *rMap = ctx->PixelMaps.RtoR.Map;
+   const GLfloat *gMap = ctx->PixelMaps.GtoG.Map;
+   const GLfloat *bMap = ctx->PixelMaps.BtoB.Map;
+   const GLfloat *aMap = ctx->PixelMaps.AtoA.Map;
    GLuint i;
    for (i=0;i<n;i++) {
       GLfloat r = CLAMP(rgba[i][RCOMP], 0.0F, 1.0F);
@@ -1413,14 +1284,14 @@
 _mesa_map_ci_to_rgba( const GLcontext *ctx, GLuint n,
                       const GLuint index[], GLfloat rgba[][4] )
 {
-   GLuint rmask = ctx->Pixel.MapItoRsize - 1;
-   GLuint gmask = ctx->Pixel.MapItoGsize - 1;
-   GLuint bmask = ctx->Pixel.MapItoBsize - 1;
-   GLuint amask = ctx->Pixel.MapItoAsize - 1;
-   const GLfloat *rMap = ctx->Pixel.MapItoR;
-   const GLfloat *gMap = ctx->Pixel.MapItoG;
-   const GLfloat *bMap = ctx->Pixel.MapItoB;
-   const GLfloat *aMap = ctx->Pixel.MapItoA;
+   GLuint rmask = ctx->PixelMaps.ItoR.Size - 1;
+   GLuint gmask = ctx->PixelMaps.ItoG.Size - 1;
+   GLuint bmask = ctx->PixelMaps.ItoB.Size - 1;
+   GLuint amask = ctx->PixelMaps.ItoA.Size - 1;
+   const GLfloat *rMap = ctx->PixelMaps.ItoR.Map;
+   const GLfloat *gMap = ctx->PixelMaps.ItoG.Map;
+   const GLfloat *bMap = ctx->PixelMaps.ItoB.Map;
+   const GLfloat *aMap = ctx->PixelMaps.ItoA.Map;
    GLuint i;
    for (i=0;i<n;i++) {
       rgba[i][RCOMP] = rMap[index[i] & rmask];
@@ -1438,14 +1309,14 @@
 _mesa_map_ci8_to_rgba8(const GLcontext *ctx, GLuint n, const GLubyte index[],
                        GLubyte rgba[][4])
 {
-   GLuint rmask = ctx->Pixel.MapItoRsize - 1;
-   GLuint gmask = ctx->Pixel.MapItoGsize - 1;
-   GLuint bmask = ctx->Pixel.MapItoBsize - 1;
-   GLuint amask = ctx->Pixel.MapItoAsize - 1;
-   const GLubyte *rMap = ctx->Pixel.MapItoR8;
-   const GLubyte *gMap = ctx->Pixel.MapItoG8;
-   const GLubyte *bMap = ctx->Pixel.MapItoB8;
-   const GLubyte *aMap = ctx->Pixel.MapItoA8;
+   GLuint rmask = ctx->PixelMaps.ItoR.Size - 1;
+   GLuint gmask = ctx->PixelMaps.ItoG.Size - 1;
+   GLuint bmask = ctx->PixelMaps.ItoB.Size - 1;
+   GLuint amask = ctx->PixelMaps.ItoA.Size - 1;
+   const GLubyte *rMap = ctx->PixelMaps.ItoR.Map8;
+   const GLubyte *gMap = ctx->PixelMaps.ItoG.Map8;
+   const GLubyte *bMap = ctx->PixelMaps.ItoB.Map8;
+   const GLubyte *aMap = ctx->PixelMaps.ItoA.Map8;
    GLuint i;
    for (i=0;i<n;i++) {
       rgba[i][RCOMP] = rMap[index[i] & rmask];
@@ -1496,7 +1367,7 @@
    if (ctx->Pixel.MapColorFlag)
       mask |= IMAGE_MAP_COLOR_BIT;
 
-   if (ctx->Pixel.ColorTableEnabled)
+   if (ctx->Pixel.ColorTableEnabled[COLORTABLE_PRECONVOLUTION])
       mask |= IMAGE_COLOR_TABLE_BIT;
 
    if (ctx->Pixel.Convolution1DEnabled ||
@@ -1515,7 +1386,7 @@
       }
    }
 
-   if (ctx->Pixel.PostConvolutionColorTableEnabled)
+   if (ctx->Pixel.ColorTableEnabled[COLORTABLE_POSTCONVOLUTION])
       mask |= IMAGE_POST_CONVOLUTION_COLOR_TABLE_BIT;
 
    if (ctx->ColorMatrixStack.Top->type != MATRIX_IDENTITY ||
@@ -1529,7 +1400,7 @@
        ctx->Pixel.PostColorMatrixBias[3]  != 0.0F)
       mask |= IMAGE_COLOR_MATRIX_BIT;
 
-   if (ctx->Pixel.PostColorMatrixColorTableEnabled)
+   if (ctx->Pixel.ColorTableEnabled[COLORTABLE_POSTCOLORMATRIX])
       mask |= IMAGE_POST_COLOR_MATRIX_COLOR_TABLE_BIT;
 
    if (ctx->Pixel.HistogramEnabled)
@@ -1558,6 +1429,14 @@
 /*****                      Initialization                        *****/
 /**********************************************************************/
 
+static void
+init_pixelmap(struct gl_pixelmap *map)
+{
+   map->Size = 1;
+   map->Map[0] = 0.0;
+   map->Map8[0] = 0;
+}
+
 
 /**
  * Initialize the context's PIXEL attribute group.
@@ -1584,43 +1463,25 @@
    ctx->Pixel.ZoomY = 1.0;
    ctx->Pixel.MapColorFlag = GL_FALSE;
    ctx->Pixel.MapStencilFlag = GL_FALSE;
-   ctx->Pixel.MapStoSsize = 1;
-   ctx->Pixel.MapItoIsize = 1;
-   ctx->Pixel.MapItoRsize = 1;
-   ctx->Pixel.MapItoGsize = 1;
-   ctx->Pixel.MapItoBsize = 1;
-   ctx->Pixel.MapItoAsize = 1;
-   ctx->Pixel.MapRtoRsize = 1;
-   ctx->Pixel.MapGtoGsize = 1;
-   ctx->Pixel.MapBtoBsize = 1;
-   ctx->Pixel.MapAtoAsize = 1;
-   ctx->Pixel.MapStoS[0] = 0;
-   ctx->Pixel.MapItoI[0] = 0.0;
-   ctx->Pixel.MapItoR[0] = 0.0;
-   ctx->Pixel.MapItoG[0] = 0.0;
-   ctx->Pixel.MapItoB[0] = 0.0;
-   ctx->Pixel.MapItoA[0] = 0.0;
-   ctx->Pixel.MapItoR8[0] = 0;
-   ctx->Pixel.MapItoG8[0] = 0;
-   ctx->Pixel.MapItoB8[0] = 0;
-   ctx->Pixel.MapItoA8[0] = 0;
-   ctx->Pixel.MapRtoR[0] = 0.0;
-   ctx->Pixel.MapGtoG[0] = 0.0;
-   ctx->Pixel.MapBtoB[0] = 0.0;
-   ctx->Pixel.MapAtoA[0] = 0.0;
+   init_pixelmap(&ctx->PixelMaps.StoS);
+   init_pixelmap(&ctx->PixelMaps.ItoI);
+   init_pixelmap(&ctx->PixelMaps.ItoR);
+   init_pixelmap(&ctx->PixelMaps.ItoG);
+   init_pixelmap(&ctx->PixelMaps.ItoB);
+   init_pixelmap(&ctx->PixelMaps.ItoA);
+   init_pixelmap(&ctx->PixelMaps.RtoR);
+   init_pixelmap(&ctx->PixelMaps.GtoG);
+   init_pixelmap(&ctx->PixelMaps.BtoB);
+   init_pixelmap(&ctx->PixelMaps.AtoA);
    ctx->Pixel.HistogramEnabled = GL_FALSE;
    ctx->Pixel.MinMaxEnabled = GL_FALSE;
    ASSIGN_4V(ctx->Pixel.PostColorMatrixScale, 1.0, 1.0, 1.0, 1.0);
    ASSIGN_4V(ctx->Pixel.PostColorMatrixBias, 0.0, 0.0, 0.0, 0.0);
-   ASSIGN_4V(ctx->Pixel.ColorTableScale, 1.0, 1.0, 1.0, 1.0);
-   ASSIGN_4V(ctx->Pixel.ColorTableBias, 0.0, 0.0, 0.0, 0.0);
-   ASSIGN_4V(ctx->Pixel.PCCTscale, 1.0, 1.0, 1.0, 1.0);
-   ASSIGN_4V(ctx->Pixel.PCCTbias, 0.0, 0.0, 0.0, 0.0);
-   ASSIGN_4V(ctx->Pixel.PCMCTscale, 1.0, 1.0, 1.0, 1.0);
-   ASSIGN_4V(ctx->Pixel.PCMCTbias, 0.0, 0.0, 0.0, 0.0);
-   ctx->Pixel.ColorTableEnabled = GL_FALSE;
-   ctx->Pixel.PostConvolutionColorTableEnabled = GL_FALSE;
-   ctx->Pixel.PostColorMatrixColorTableEnabled = GL_FALSE;
+   for (i = 0; i < COLORTABLE_MAX; i++) {
+      ASSIGN_4V(ctx->Pixel.ColorTableScale[i], 1.0, 1.0, 1.0, 1.0);
+      ASSIGN_4V(ctx->Pixel.ColorTableBias[i], 0.0, 0.0, 0.0, 0.0);
+      ctx->Pixel.ColorTableEnabled[i] = GL_FALSE;
+   }
    ctx->Pixel.Convolution1DEnabled = GL_FALSE;
    ctx->Pixel.Convolution2DEnabled = GL_FALSE;
    ctx->Pixel.Separable2DEnabled = GL_FALSE;
diff --git a/src/mesa/main/renderbuffer.c b/src/mesa/main/renderbuffer.c
index 1cc95a7..e387c42 100644
--- a/src/mesa/main/renderbuffer.c
+++ b/src/mesa/main/renderbuffer.c
@@ -1192,18 +1192,22 @@
    ASSERT(rb->PutMonoValues);
 
    /* free old buffer storage */
-   if (rb->Data)
+   if (rb->Data) {
       _mesa_free(rb->Data);
+      rb->Data = NULL;
+   }
 
-   /* allocate new buffer storage */
-   rb->Data = _mesa_malloc(width * height * pixelSize);
-   if (rb->Data == NULL) {
-      rb->Width = 0;
-      rb->Height = 0;
-      _mesa_error(ctx, GL_OUT_OF_MEMORY,
-                  "software renderbuffer allocation (%d x %d x %d)",
-                  width, height, pixelSize);
-      return GL_FALSE;
+   if (width > 0 && height > 0) {
+      /* allocate new buffer storage */
+      rb->Data = _mesa_malloc(width * height * pixelSize);
+      if (rb->Data == NULL) {
+         rb->Width = 0;
+         rb->Height = 0;
+         _mesa_error(ctx, GL_OUT_OF_MEMORY,
+                     "software renderbuffer allocation (%d x %d x %d)",
+                     width, height, pixelSize);
+         return GL_FALSE;
+      }
    }
 
    rb->Width = width;
diff --git a/src/mesa/main/texcompress_fxt1.c b/src/mesa/main/texcompress_fxt1.c
index d3011ce..411d51c 100644
--- a/src/mesa/main/texcompress_fxt1.c
+++ b/src/mesa/main/texcompress_fxt1.c
@@ -752,44 +752,55 @@
    GLint minColL = 0, maxColL = 0;
    GLint minColR = 0, maxColR = 0;
    GLint sumL = 0, sumR = 0;
-
+   GLint nn_comp;
    /* Our solution here is to find the darkest and brightest colors in
     * the 4x4 tile and use those as the two representative colors.
     * There are probably better algorithms to use (histogram-based).
     */
-   minSum = 2000; /* big enough */
-   maxSum = -1; /* small enough */
-   for (k = 0; k < N_TEXELS / 2; k++) {
-      GLint sum = 0;
-      for (i = 0; i < n_comp; i++) {
-         sum += input[k][i];
-      }
-      if (minSum > sum) {
-         minSum = sum;
-         minColL = k;
-      }
-      if (maxSum < sum) {
-         maxSum = sum;
-         maxColL = k;
-      }
-      sumL += sum;
+   nn_comp = n_comp;
+   while ((minColL == maxColL) && nn_comp) {
+       minSum = 2000; /* big enough */
+       maxSum = -1; /* small enough */
+       for (k = 0; k < N_TEXELS / 2; k++) {
+           GLint sum = 0;
+           for (i = 0; i < nn_comp; i++) {
+               sum += input[k][i];
+           }
+           if (minSum > sum) {
+               minSum = sum;
+               minColL = k;
+           }
+           if (maxSum < sum) {
+               maxSum = sum;
+               maxColL = k;
+           }
+           sumL += sum;
+       }
+       
+       nn_comp--;
    }
-   minSum = 2000; /* big enough */
-   maxSum = -1; /* small enough */
-   for (; k < N_TEXELS; k++) {
-      GLint sum = 0;
-      for (i = 0; i < n_comp; i++) {
-         sum += input[k][i];
-      }
-      if (minSum > sum) {
-         minSum = sum;
-         minColR = k;
-      }
-      if (maxSum < sum) {
-         maxSum = sum;
-         maxColR = k;
-      }
-      sumR += sum;
+
+   nn_comp = n_comp;
+   while ((minColR == maxColR) && nn_comp) {
+       minSum = 2000; /* big enough */
+       maxSum = -1; /* small enough */
+       for (k = N_TEXELS / 2; k < N_TEXELS; k++) {
+           GLint sum = 0;
+           for (i = 0; i < nn_comp; i++) {
+               sum += input[k][i];
+           }
+           if (minSum > sum) {
+               minSum = sum;
+               minColR = k;
+           }
+           if (maxSum < sum) {
+               maxSum = sum;
+               maxColR = k;
+           }
+           sumR += sum;
+       }
+
+       nn_comp--;
    }
 
    /* choose the common vector (yuck!) */
diff --git a/src/mesa/main/texstore.c b/src/mesa/main/texstore.c
index 994fb16..a570525 100644
--- a/src/mesa/main/texstore.c
+++ b/src/mesa/main/texstore.c
@@ -3608,10 +3608,29 @@
                GLint col;
                for (col = 0; col < width; col++) {
                   (*texImage->FetchTexelf)(texImage, col, row, img, rgba[col]);
+                  if (texImage->TexFormat->BaseFormat == GL_ALPHA) {
+                     rgba[col][RCOMP] = 0.0;
+                     rgba[col][GCOMP] = 0.0;
+                     rgba[col][BCOMP] = 0.0;
+                  }
+                  else if (texImage->TexFormat->BaseFormat == GL_LUMINANCE) {
+                     rgba[col][GCOMP] = 0.0;
+                     rgba[col][BCOMP] = 0.0;
+                     rgba[col][ACOMP] = 1.0;
+                  }
+                  else if (texImage->TexFormat->BaseFormat == GL_LUMINANCE_ALPHA) {
+                     rgba[col][GCOMP] = 0.0;
+                     rgba[col][BCOMP] = 0.0;
+                  }
+                  else if (texImage->TexFormat->BaseFormat == GL_INTENSITY) {
+                     rgba[col][GCOMP] = 0.0;
+                     rgba[col][BCOMP] = 0.0;
+                     rgba[col][ACOMP] = 1.0;
+                  }
                }
                _mesa_pack_rgba_span_float(ctx, width, (GLfloat (*)[4]) rgba,
                                           format, type, dest,
-                                          &ctx->Pack, IMAGE_RED_TO_LUMINANCE);
+                                          &ctx->Pack, 0x0 /*image xfer ops*/);
             } /* format */
          } /* row */
       } /* img */
diff --git a/src/mesa/shader/nvvertparse.c b/src/mesa/shader/nvvertparse.c
index fb546a0..0bc0c05 100644
--- a/src/mesa/shader/nvvertparse.c
+++ b/src/mesa/shader/nvvertparse.c
@@ -684,13 +684,13 @@
       if (token[1] == 0) {
          /* single letter swizzle */
          if (token[0] == 'x')
-            srcReg->Swizzle = MAKE_SWIZZLE4(0, 0, 0, 0);
+            srcReg->Swizzle = SWIZZLE_XXXX;
          else if (token[0] == 'y')
-            srcReg->Swizzle = MAKE_SWIZZLE4(1, 1, 1, 1);
+            srcReg->Swizzle = SWIZZLE_YYYY;
          else if (token[0] == 'z')
-            srcReg->Swizzle = MAKE_SWIZZLE4(2, 2, 2, 2);
+            srcReg->Swizzle = SWIZZLE_ZZZZ;
          else if (token[0] == 'w')
-            srcReg->Swizzle = MAKE_SWIZZLE4(3, 3, 3, 3);
+            srcReg->Swizzle = SWIZZLE_WWWW;
          else
             RETURN_ERROR1("Expected x, y, z, or w");
       }
diff --git a/src/mesa/shader/prog_instruction.c b/src/mesa/shader/prog_instruction.c
index c678313..ed479a7 100644
--- a/src/mesa/shader/prog_instruction.c
+++ b/src/mesa/shader/prog_instruction.c
@@ -97,6 +97,20 @@
 }
 
 
+/**
+ * Copy an array of program instructions.
+ * \param dest  pointer to destination.
+ * \param src  pointer to source.
+ * \param n  number of instructions to copy.
+ * \return pointer to destination.
+ */
+struct prog_instruction *
+_mesa_copy_instructions(struct prog_instruction *dest,
+                        const struct prog_instruction *src, GLuint n)
+{
+   return _mesa_memcpy(dest, src, n * sizeof(struct prog_instruction));
+}
+
 
 /**
  * Basic info about each instruction
diff --git a/src/mesa/shader/prog_instruction.h b/src/mesa/shader/prog_instruction.h
index 14305f1..66abb10 100644
--- a/src/mesa/shader/prog_instruction.h
+++ b/src/mesa/shader/prog_instruction.h
@@ -432,6 +432,10 @@
 _mesa_realloc_instructions(struct prog_instruction *oldInst,
                            GLuint numOldInst, GLuint numNewInst);
 
+extern struct prog_instruction *
+_mesa_copy_instructions(struct prog_instruction *dest,
+                        const struct prog_instruction *src, GLuint n);
+
 extern GLuint
 _mesa_num_inst_src_regs(gl_inst_opcode opcode);
 
diff --git a/src/mesa/shader/programopt.c b/src/mesa/shader/programopt.c
index 2d14cd3..d427ee3 100644
--- a/src/mesa/shader/programopt.c
+++ b/src/mesa/shader/programopt.c
@@ -99,8 +99,7 @@
    }
 
    /* Append original instructions after new instructions */
-   _mesa_memcpy(newInst + 4, vprog->Base.Instructions,
-                origLen * sizeof(struct prog_instruction));
+   _mesa_copy_instructions (newInst + 4, vprog->Base.Instructions, origLen);
 
    /* free old instructions */
    _mesa_free(vprog->Base.Instructions);
diff --git a/src/mesa/swrast/s_accum.c b/src/mesa/swrast/s_accum.c
index 69e9404..f53e7f5 100644
--- a/src/mesa/swrast/s_accum.c
+++ b/src/mesa/swrast/s_accum.c
@@ -136,7 +136,9 @@
       return;
    }
 
-   assert(rb);
+   if (!rb || !rb->Data)
+      return;
+
    assert(rb->_BaseFormat == GL_RGBA);
    /* add other types in future? */
    assert(rb->DataType == GL_SHORT || rb->DataType == GL_UNSIGNED_SHORT);
diff --git a/src/mesa/swrast/s_copypix.c b/src/mesa/swrast/s_copypix.c
index afab7c4..2051e1f 100644
--- a/src/mesa/swrast/s_copypix.c
+++ b/src/mesa/swrast/s_copypix.c
@@ -199,7 +199,7 @@
    GLint sy, dy, stepy, row;
    const GLboolean zoom = ctx->Pixel.ZoomX != 1.0F || ctx->Pixel.ZoomY != 1.0F;
    GLint overlapping;
-   const GLuint transferOps = ctx->_ImageTransferState;
+   GLuint transferOps = ctx->_ImageTransferState;
    SWspan span;
 
    if (!ctx->ReadBuffer->_ColorReadBuffer) {
@@ -211,6 +211,11 @@
       copy_conv_rgba_pixels(ctx, srcx, srcy, width, height, destx, desty);
       return;
    }
+   else if (ctx->Pixel.Convolution1DEnabled) {
+      /* make sure we don't apply 1D convolution */
+      transferOps &= ~(IMAGE_CONVOLUTION_BIT |
+                       IMAGE_POST_CONVOLUTION_SCALE_BIAS);
+   }
 
    /* Determine if copy should be done bottom-to-top or top-to-bottom */
    if (srcy < desty) {
diff --git a/src/mesa/swrast/s_depth.c b/src/mesa/swrast/s_depth.c
index 408174c..dde2b1d 100644
--- a/src/mesa/swrast/s_depth.c
+++ b/src/mesa/swrast/s_depth.c
@@ -1350,7 +1350,7 @@
    GLuint clearValue;
    GLint x, y, width, height;
 
-   if (!rb || !ctx->Depth.Mask) {
+   if (!rb || !ctx->Depth.Mask || !rb->Data) {
       /* no depth buffer, or writing to it is disabled */
       return;
    }
diff --git a/src/mesa/swrast/s_drawpix.c b/src/mesa/swrast/s_drawpix.c
index 595fd5c..50147f3 100644
--- a/src/mesa/swrast/s_drawpix.c
+++ b/src/mesa/swrast/s_drawpix.c
@@ -452,7 +452,8 @@
        && !scaleOrBias
        && !zoom
        && ctx->Visual.rgbMode
-       && width <= MAX_WIDTH) {
+       && width <= MAX_WIDTH
+       && !unpack->SwapBytes) {
       /* Special case: directly write 16-bit depth values */
       GLint row;
       for (row = 0; row < height; row++) {
@@ -472,7 +473,8 @@
             && !scaleOrBias
             && !zoom
             && ctx->Visual.rgbMode
-            && width <= MAX_WIDTH) {
+            && width <= MAX_WIDTH
+            && !unpack->SwapBytes) {
       /* Special case: shift 32-bit values down to Visual.depthBits */
       const GLint shift = 32 - ctx->DrawBuffer->Visual.depthBits;
       GLint row;
@@ -617,6 +619,11 @@
       type = GL_FLOAT;
       transferOps &= IMAGE_POST_CONVOLUTION_BITS;
    }
+   else if (ctx->Pixel.Convolution1DEnabled) {
+      /* we only want to apply 1D convolution to glTexImage1D */
+      transferOps &= ~(IMAGE_CONVOLUTION_BIT |
+                       IMAGE_POST_CONVOLUTION_SCALE_BIAS);
+   }
 
    if (ctx->DrawBuffer->_NumColorDrawBuffers[0] > 0 &&
        ctx->DrawBuffer->_ColorDrawBuffers[0][0]->DataType != GL_FLOAT &&
diff --git a/src/mesa/swrast/s_readpix.c b/src/mesa/swrast/s_readpix.c
index 383d61c..fe9a70f 100644
--- a/src/mesa/swrast/s_readpix.c
+++ b/src/mesa/swrast/s_readpix.c
@@ -119,7 +119,7 @@
             && !biasOrScale && !packing->SwapBytes) {
       /* Special case: directly read 24-bit unsigned depth values. */
       GLint j;
-      ASSERT(rb->InternalFormat == GL_DEPTH_COMPONENT32);
+      ASSERT(rb->InternalFormat == GL_DEPTH_COMPONENT24);
       ASSERT(rb->DataType == GL_UNSIGNED_INT);
       for (j = 0; j < height; j++, y++) {
          GLuint *dest = (GLuint *)
@@ -410,6 +410,10 @@
          = (GLubyte *) _mesa_image_address2d(packing, pixels, width, height,
                                              format, type, 0, 0);
 
+      /* make sure we don't apply 1D convolution */
+      transferOps &= ~(IMAGE_CONVOLUTION_BIT |
+                       IMAGE_POST_CONVOLUTION_SCALE_BIAS);
+
       for (row = 0; row < height; row++, y++) {
 
          /* Get float rgba pixels */
diff --git a/src/mesa/swrast/s_stencil.c b/src/mesa/swrast/s_stencil.c
index a8aa1d4..43475c0 100644
--- a/src/mesa/swrast/s_stencil.c
+++ b/src/mesa/swrast/s_stencil.c
@@ -1154,7 +1154,7 @@
    const GLuint stencilMax = (1 << stencilBits) - 1;
    GLint x, y, width, height;
 
-   if (!rb || mask == 0)
+   if (!rb || mask == 0 || !rb->Data)
       return;
 
    ASSERT(rb->DataType == GL_UNSIGNED_BYTE ||
diff --git a/src/mesa/tnl/t_vp_build.c b/src/mesa/tnl/t_vp_build.c
index f9e5045..8b8bb3a 100644
--- a/src/mesa/tnl/t_vp_build.c
+++ b/src/mesa/tnl/t_vp_build.c
@@ -853,7 +853,7 @@
 
 
 /* Need to add some addtional parameters to allow lighting in object
- * space - STATE_SPOT_DIRECTION and STATE_HALF implicitly assume eye
+ * space - STATE_SPOT_DIRECTION and STATE_HALF_VECTOR implicitly assume eye
  * space lighting.
  */
 static void build_lighting( struct tnl_program *p )
@@ -942,7 +942,14 @@
 	     */
 	    VPpli = register_param3(p, STATE_LIGHT, i, 
 				    STATE_POSITION_NORMALIZED); 
-	    half = register_param3(p, STATE_LIGHT, i, STATE_HALF_VECTOR);
+            if (p->state->light_local_viewer) {
+                struct ureg eye_hat = get_eye_position_normalized(p);
+                half = get_temp(p);
+                emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat);
+                emit_normalize_vec3(p, half, half);
+            } else {
+                half = register_param3(p, STATE_LIGHT, i, STATE_HALF_VECTOR);
+            }
 	 } 
 	 else {
 	    struct ureg Ppli = register_param3(p, STATE_LIGHT, i, 
@@ -1325,14 +1332,16 @@
    struct ureg out = register_output(p, VERT_RESULT_PSIZ);
    struct ureg ut = get_temp(p);
 
+   /* dist = |eyez| */
+   emit_op1(p, OPCODE_ABS, ut, WRITEMASK_Y, swizzle1(eye, Z));
    /* p1 + dist * (p2 + dist * p3); */
-   emit_op3(p, OPCODE_MAD, ut, 0, negate(swizzle1(eye, Z)),
+   emit_op3(p, OPCODE_MAD, ut, WRITEMASK_X, swizzle1(ut, Y),
 		swizzle1(state_attenuation, Z), swizzle1(state_attenuation, Y));
-   emit_op3(p, OPCODE_MAD, ut, 0, negate(swizzle1(eye, Z)),
+   emit_op3(p, OPCODE_MAD, ut, WRITEMASK_X, swizzle1(ut, Y),
 		ut, swizzle1(state_attenuation, X));
 
    /* 1 / sqrt(factor) */
-   emit_op1(p, OPCODE_RSQ, ut, 0, ut );
+   emit_op1(p, OPCODE_RSQ, ut, WRITEMASK_X, ut );
 
 #if 1
    /* out = pointSize / sqrt(factor) */
@@ -1340,8 +1349,8 @@
 #else
    /* not sure, might make sense to do clamping here,
       but it's not done in t_vb_points neither */
-   emit_op2(p, OPCODE_MUL, ut, 0, ut, state_size);
-   emit_op2(p, OPCODE_MAX, ut, 0, ut, swizzle1(state_size, Y));
+   emit_op2(p, OPCODE_MUL, ut, WRITEMASK_X, ut, state_size);
+   emit_op2(p, OPCODE_MAX, ut, WRITEMASK_X, ut, swizzle1(state_size, Y));
    emit_op2(p, OPCODE_MIN, out, WRITEMASK_X, ut, swizzle1(state_size, Z));
 #endif
 
diff --git a/src/mesa/x86/read_rgba_span_x86.S b/src/mesa/x86/read_rgba_span_x86.S
index a690283..3cbcd71 100644
--- a/src/mesa/x86/read_rgba_span_x86.S
+++ b/src/mesa/x86/read_rgba_span_x86.S
@@ -369,7 +369,7 @@
 	movdqa	mask, %xmm1
 	movdqa	mask+16, %xmm2
  */
-	LOAD_MASK(movdqa,%xmm1,%xmm2)
+	LOAD_MASK(movdqu,%xmm1,%xmm2)
 
 	movl	12(%esp), %ebx	/* source pointer */
 	movl	20(%esp), %edx	/* number of pixels to copy */