Merge branch 'master' of git://anongit.freedesktop.org/mesa/mesa
diff --git a/Android.common.mk b/Android.common.mk
new file mode 100644
index 0000000..83177a0
--- /dev/null
+++ b/Android.common.mk
@@ -0,0 +1,48 @@
+# Mesa 3-D graphics library
+#
+# Copyright (C) 2010-2011 Chia-I Wu <olvaffe@gmail.com>
+# Copyright (C) 2010-2011 LunarG Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+LOCAL_C_INCLUDES += \
+	$(MESA_TOP)/include
+
+LOCAL_CFLAGS += \
+	-DPTHREADS \
+	-fvisibility=hidden \
+	-Wno-sign-compare
+
+ifeq ($(strip $(MESA_ENABLE_ASM)),true)
+ifeq ($(TARGET_ARCH),x86)
+LOCAL_CFLAGS += \
+	-DUSE_X86_ASM
+endif
+endif
+
+LOCAL_CPPFLAGS += \
+	-Wno-error=non-virtual-dtor \
+	-Wno-non-virtual-dtor
+
+# uncomment to keep the debug symbols
+#LOCAL_STRIP_MODULE := false
+
+ifeq ($(strip $(LOCAL_MODULE_TAGS)),)
+LOCAL_MODULE_TAGS := optional
+endif
diff --git a/Android.mk b/Android.mk
new file mode 100644
index 0000000..53c619e
--- /dev/null
+++ b/Android.mk
@@ -0,0 +1,147 @@
+# Mesa 3-D graphics library
+#
+# Copyright (C) 2010-2011 Chia-I Wu <olvaffe@gmail.com>
+# Copyright (C) 2010-2011 LunarG Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# BOARD_GPU_DRIVERS should be defined.  The valid values are
+#
+#   classic drivers:
+#   gallium drivers: swrast r600g
+#
+# The main target is libGLES_mesa.  There is no classic drivers yet.
+
+MESA_TOP := $(call my-dir)
+MESA_COMMON_MK := $(MESA_TOP)/Android.common.mk
+MESA_PYTHON2 := python
+
+DRM_TOP := external/drm
+DRM_GRALLOC_TOP := hardware/drm_gralloc
+
+classic_drivers :=
+gallium_drivers := swrast r600g
+
+MESA_GPU_DRIVERS := $(BOARD_GPU_DRIVERS)
+
+# warn about invalid drivers
+invalid_drivers := $(filter-out \
+	$(classic_drivers) $(gallium_drivers), $(MESA_GPU_DRIVERS))
+ifneq ($(invalid_drivers),)
+$(warning invalid GPU drivers: $(invalid_drivers))
+# tidy up
+MESA_GPU_DRIVERS := $(filter-out $(invalid_drivers), $(MESA_GPU_DRIVERS))
+endif
+
+# host and target must be the same arch to generate matypes.h
+ifeq ($(TARGET_ARCH),$(HOST_ARCH))
+MESA_ENABLE_ASM := true
+else
+MESA_ENABLE_ASM := false
+endif
+
+ifneq ($(filter $(classic_drivers), $(MESA_GPU_DRIVERS)),)
+MESA_BUILD_CLASSIC := true
+else
+MESA_BUILD_CLASSIC := false
+endif
+
+ifneq ($(filter $(gallium_drivers), $(MESA_GPU_DRIVERS)),)
+MESA_BUILD_GALLIUM := true
+else
+MESA_BUILD_GALLIUM := false
+endif
+
+ifneq ($(strip $(MESA_GPU_DRIVERS)),)
+
+SUBDIRS := \
+	src/mapi \
+	src/glsl \
+	src/mesa \
+	src/egl/main
+
+ifeq ($(strip $(MESA_BUILD_GALLIUM)),true)
+SUBDIRS += src/gallium
+endif
+
+# ---------------------------------------
+# Build libGLES_mesa
+# ---------------------------------------
+
+LOCAL_PATH := $(MESA_TOP)
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES :=
+LOCAL_CFLAGS :=
+LOCAL_C_INCLUDES :=
+
+LOCAL_STATIC_LIBRARIES :=
+LOCAL_WHOLE_STATIC_LIBRARIES := libmesa_egl
+
+LOCAL_SHARED_LIBRARIES := \
+	libglapi \
+	libdrm \
+	libdl \
+	libhardware \
+	liblog \
+	libcutils
+
+ifeq ($(strip $(MESA_BUILD_GALLIUM)),true)
+
+gallium_DRIVERS :=
+
+# swrast
+gallium_DRIVERS += libmesa_pipe_softpipe libmesa_winsys_sw_android
+
+# r600g
+ifneq ($(filter r600g, $(MESA_GPU_DRIVERS)),)
+gallium_DRIVERS += libmesa_winsys_radeon
+gallium_DRIVERS += libmesa_pipe_r600 libmesa_winsys_r600
+endif
+
+#
+# Notes about the order here:
+#
+#  * libmesa_st_egl depends on libmesa_winsys_sw_android in $(gallium_DRIVERS)
+#  * libmesa_st_mesa depends on libmesa_glsl
+#  * libmesa_glsl depends on libmesa_glsl_utils
+#
+LOCAL_STATIC_LIBRARIES := \
+	libmesa_egl_gallium \
+	libmesa_st_egl \
+	$(gallium_DRIVERS) \
+	libmesa_st_mesa \
+	libmesa_glsl \
+	libmesa_glsl_utils \
+	libmesa_gallium \
+	$(LOCAL_STATIC_LIBRARIES)
+
+endif # MESA_BUILD_GALLIUM
+
+LOCAL_MODULE := libGLES_mesa
+LOCAL_MODULE_PATH := $(TARGET_OUT_SHARED_LIBRARIES)/egl
+
+include $(MESA_COMMON_MK)
+include $(BUILD_SHARED_LIBRARY)
+
+mkfiles := $(patsubst %,$(MESA_TOP)/%/Android.mk,$(SUBDIRS))
+include $(mkfiles)
+
+endif # MESA_GPU_DRIVERS
diff --git a/Makefile b/Makefile
index b0a2d80..916c498 100644
--- a/Makefile
+++ b/Makefile
@@ -21,6 +21,10 @@
 doxygen:
 	cd doxygen && $(MAKE)
 
+check:
+	cd src/glsl/tests/ && ./optimization-test
+	@echo "All tests passed."
+
 clean:
 	-@touch $(TOP)/configs/current
 	-@for dir in $(SUBDIRS) ; do \
@@ -51,7 +55,7 @@
 	done
 
 
-.PHONY: default doxygen clean realclean distclean install
+.PHONY: default doxygen clean realclean distclean install check
 
 # If there's no current configuration file
 $(TOP)/configs/current:
@@ -203,12 +207,6 @@
 IGNORE_FILES = \
 	-x autogen.sh
 
-DEPEND_FILES = \
-	src/mesa/depend		\
-	src/glx/depend		\
-	src/glw/depend		\
-	src/glu/sgi/depend
-
 
 parsers: configure
 	-@touch $(TOP)/configs/current
@@ -231,15 +229,9 @@
 AC_FLAGS =
 aclocal.m4: configure.ac acinclude.m4
 	$(ACLOCAL) $(ACLOCAL_FLAGS)
-configure: rm_depend configure.ac aclocal.m4 acinclude.m4
+configure: configure.ac aclocal.m4 acinclude.m4
 	$(AUTOCONF) $(AC_FLAGS)
 
-rm_depend:
-	@for dep in $(DEPEND_FILES) ; do \
-		rm -f $$dep ; \
-		touch $$dep ; \
-	done
-
 manifest.txt: .git
 	( \
 		ls -1 $(EXTRA_FILES) ; \
@@ -269,4 +261,4 @@
 	@-md5sum $(PACKAGE_NAME).tar.bz2
 	@-md5sum $(PACKAGE_NAME).zip
 
-.PHONY: tarballs rm_depend md5
+.PHONY: tarballs md5
diff --git a/common.py b/common.py
index 8657030..cfee1b5 100644
--- a/common.py
+++ b/common.py
@@ -88,6 +88,7 @@
 	opts.Add('toolchain', 'compiler toolchain', default_toolchain)
 	opts.Add(BoolOption('gles', 'EXPERIMENTAL: enable OpenGL ES support', 'no'))
 	opts.Add(BoolOption('llvm', 'use LLVM', default_llvm))
+	opts.Add(BoolOption('openmp', 'EXPERIMENTAL: compile with openmp (swrast)', 'no'))
 	opts.Add(BoolOption('debug', 'DEPRECATED: debug build', 'yes'))
 	opts.Add(BoolOption('profile', 'DEPRECATED: profile build', 'no'))
 	opts.Add(BoolOption('quiet', 'DEPRECATED: profile build', 'yes'))
diff --git a/configs/darwin b/configs/darwin
index 41e7ba8..83f417c 100644
--- a/configs/darwin
+++ b/configs/darwin
@@ -50,7 +50,6 @@
 GLW_LIB_DEPS = -L$(TOP)/$(LIB_DIR) -l$(GL_LIB) -L$(INSTALL_DIR)/$(LIB_DIR) -L$(X11_DIR)/$(LIB_DIR) -lX11 -lXt
 APP_LIB_DEPS = -L$(TOP)/$(LIB_DIR) -l$(GLUT_LIB) -l$(GLU_LIB) -l$(GL_LIB) -L$(INSTALL_DIR)/$(LIB_DIR) -L$(X11_DIR)/$(LIB_DIR) -lX11 -lXmu -lXt -lXi -lm
 
-# omit glw lib for now:
 SRC_DIRS = glsl mapi/glapi mapi/vgapi glx/apple mesa gallium glu
 GLU_DIRS = sgi
 DRIVER_DIRS = osmesa
diff --git a/configs/default b/configs/default
index e839a1e..078c85e 100644
--- a/configs/default
+++ b/configs/default
@@ -105,7 +105,7 @@
 # Directories to build
 LIB_DIR = lib
 SRC_DIRS = glsl mapi/glapi mapi/vgapi mesa \
-	gallium egl gallium/winsys gallium/targets glu glw
+	gallium egl gallium/winsys gallium/targets glu
 GLU_DIRS = sgi
 DRIVER_DIRS = x11 osmesa
 
diff --git a/configs/freebsd-dri b/configs/freebsd-dri
index fdf4b29..3c83872 100644
--- a/configs/freebsd-dri
+++ b/configs/freebsd-dri
@@ -42,7 +42,7 @@
 
 
 # Directories
-SRC_DIRS = glx gallium mesa glu glw
+SRC_DIRS = glx gallium mesa glu
 DRIVER_DIRS = dri
 
 DRM_SOURCE_PATH=$(TOP)/../drm
diff --git a/configs/linux-cell b/configs/linux-cell
index e87e69a..7f38da9 100644
--- a/configs/linux-cell
+++ b/configs/linux-cell
@@ -36,7 +36,6 @@
 CXXFLAGS = $(COMMON_C_CPP_FLAGS)
 
 
-# Omitting glw here:
 SRC_DIRS = glsl mapi/glapi mapi/vgapi mesa \
 	gallium gallium/winsys gallium/targets glu
 
diff --git a/configs/linux-dri-xcb b/configs/linux-dri-xcb
index 021aa3e..15702da 100644
--- a/configs/linux-dri-xcb
+++ b/configs/linux-dri-xcb
@@ -49,7 +49,7 @@
 GL_LIB_DEPS   = $(EXTRA_LIB_PATH) -lX11 -lXext -lXxf86vm -lm -lpthread -ldl \
                 $(LIBDRM_LIB) $(shell pkg-config --libs xcb) $(shell pkg-config --libs x11-xcb) $(shell pkg-config --libs xcb-glx)
 
-SRC_DIRS = glx gallium mesa glu glw
+SRC_DIRS = glx gallium mesa glu
 
 DRIVER_DIRS = dri
 DRI_DIRS = i810 i915 mach64 mga r128 r200 r300 radeon \
diff --git a/configs/linux-indirect b/configs/linux-indirect
index 82868c4..5592a8f 100644
--- a/configs/linux-indirect
+++ b/configs/linux-indirect
@@ -48,5 +48,5 @@
 
 
 # Directories
-SRC_DIRS = glx glu glw
+SRC_DIRS = glx glu
 DRIVER_DIRS =
diff --git a/configure.ac b/configure.ac
index f72db11..c461f43 100644
--- a/configure.ac
+++ b/configure.ac
@@ -359,7 +359,6 @@
 GL_LIB_NAME='lib$(GL_LIB).'${LIB_EXTENSION}
 GLU_LIB_NAME='lib$(GLU_LIB).'${LIB_EXTENSION}
 GLUT_LIB_NAME='lib$(GLUT_LIB).'${LIB_EXTENSION}
-GLW_LIB_NAME='lib$(GLW_LIB).'${LIB_EXTENSION}
 OSMESA_LIB_NAME='lib$(OSMESA_LIB).'${LIB_EXTENSION}
 EGL_LIB_NAME='lib$(EGL_LIB).'${LIB_EXTENSION}
 GLESv1_CM_LIB_NAME='lib$(GLESv1_CM_LIB).'${LIB_EXTENSION}
@@ -372,7 +371,6 @@
 GL_LIB_GLOB=${LIB_PREFIX_GLOB}'$(GL_LIB)'${LIB_VERSION_SEPARATOR}'*'${LIB_EXTENSION}'*'
 GLU_LIB_GLOB=${LIB_PREFIX_GLOB}'$(GLU_LIB)'${LIB_VERSION_SEPARATOR}'*'${LIB_EXTENSION}'*'
 GLUT_LIB_GLOB=${LIB_PREFIX_GLOB}'$(GLUT_LIB)'${LIB_VERSION_SEPARATOR}'*'${LIB_EXTENSION}'*'
-GLW_LIB_GLOB=${LIB_PREFIX_GLOB}'$(GLW_LIB)'${LIB_VERSION_SEPARATOR}'*'${LIB_EXTENSION}'*'
 OSMESA_LIB_GLOB=${LIB_PREFIX_GLOB}'$(OSMESA_LIB)'${LIB_VERSION_SEPARATOR}'*'${LIB_EXTENSION}'*'
 EGL_LIB_GLOB=${LIB_PREFIX_GLOB}'$(EGL_LIB)'${LIB_VERSION_SEPARATOR}'*'${LIB_EXTENSION}'*'
 EGL_LIB_GLOB=${LIB_PREFIX_GLOB}'$(EGL_LIB)'${LIB_VERSION_SEPARATOR}'*'${LIB_EXTENSION}'*'
@@ -386,7 +384,6 @@
 AC_SUBST([GL_LIB_NAME])
 AC_SUBST([GLU_LIB_NAME])
 AC_SUBST([GLUT_LIB_NAME])
-AC_SUBST([GLW_LIB_NAME])
 AC_SUBST([OSMESA_LIB_NAME])
 AC_SUBST([EGL_LIB_NAME])
 AC_SUBST([GLESv1_CM_LIB_NAME])
@@ -399,7 +396,6 @@
 AC_SUBST([GL_LIB_GLOB])
 AC_SUBST([GLU_LIB_GLOB])
 AC_SUBST([GLUT_LIB_GLOB])
-AC_SUBST([GLW_LIB_GLOB])
 AC_SUBST([OSMESA_LIB_GLOB])
 AC_SUBST([EGL_LIB_GLOB])
 AC_SUBST([GLESv1_CM_LIB_GLOB])
@@ -951,7 +947,7 @@
         GL_PC_LIB_PRIV="$GL_LIB_DEPS"
         GL_PC_CFLAGS="$X11_INCLUDES"
     fi
-    GL_LIB_DEPS="$GL_LIB_DEPS $SELINUX_LIBS -lm -lpthread"
+    GL_LIB_DEPS="$GL_LIB_DEPS $SELINUX_LIBS -lm -lpthread $DLOPEN_LIBS"
     GL_PC_LIB_PRIV="$GL_PC_LIB_PRIV $SELINUX_LIBS -lm -lpthread"
 
     # if static, move the external libraries to the programs
@@ -1073,11 +1069,6 @@
 
 AC_SUBST([HAVE_XF86VIDMODE])
 
-PKG_CHECK_MODULES([LIBDRM_RADEON],
-		  [libdrm_radeon >= $LIBDRM_RADEON_REQUIRED],
-		  HAVE_LIBDRM_RADEON=yes,
-		  HAVE_LIBDRM_RADEON=no)
-
 dnl
 dnl More GLX setup
 dnl
@@ -1270,6 +1261,11 @@
 
 case $DRI_DIRS in
 *radeon*|*r200*|*r300*|*r600*)
+    PKG_CHECK_MODULES([LIBDRM_RADEON],
+		      [libdrm_radeon >= $LIBDRM_RADEON_REQUIRED],
+		      HAVE_LIBDRM_RADEON=yes,
+		      HAVE_LIBDRM_RADEON=no)
+
     if test "x$HAVE_LIBDRM_RADEON" = xyes; then
 	RADEON_CFLAGS="-DHAVE_LIBDRM_RADEON=1 $LIBDRM_RADEON_CFLAGS"
 	RADEON_LDFLAGS=$LIBDRM_RADEON_LIBS
@@ -1326,47 +1322,6 @@
 AC_SUBST([OSMESA_PC_LIB_PRIV])
 
 dnl
-dnl EGL configuration
-dnl
-EGL_CLIENT_APIS=""
-
-if test "x$enable_egl" = xyes; then
-    SRC_DIRS="$SRC_DIRS egl"
-    EGL_LIB_DEPS="$DLOPEN_LIBS $SELINUX_LIBS -lpthread"
-    EGL_DRIVERS_DIRS=""
-
-    if test "$enable_static" != yes; then
-        # build egl_glx when libGL is built
-        if test "x$enable_glx" = xyes; then
-            EGL_DRIVERS_DIRS="glx"
-        fi
-
-        PKG_CHECK_MODULES([LIBUDEV], [libudev > 150],
-                          [have_libudev=yes],[have_libudev=no])
-        if test "$have_libudev" = yes; then
-            DEFINES="$DEFINES -DHAVE_LIBUDEV"
-        fi
-        if test "x$enable_dri" = xyes; then
-            # build egl_dri2 when xcb-dri2 is available
-            PKG_CHECK_MODULES([XCB_DRI2], [x11-xcb xcb-dri2 xcb-xfixes],
-            		  [have_xcb_dri2=yes],[have_xcb_dri2=no])
-            
-            if test "$have_xcb_dri2" = yes; then
-                EGL_DRIVER_DRI2=dri2
-                DEFINES="$DEFINES -DHAVE_XCB_DRI2"
-                # workaround a bug in xcb-dri2 generated by xcb-proto 1.6
-                AC_CHECK_LIB(xcb-dri2, xcb_dri2_connect_alignment_pad, [],
-                          [DEFINES="$DEFINES -DXCB_DRI2_CONNECT_DEVICE_NAME_BROKEN"])
-            fi
-	fi
-
-        EGL_DRIVERS_DIRS="$EGL_DRIVERS_DIRS $EGL_DRIVER_DRI2"
-    fi
-fi
-AC_SUBST([EGL_LIB_DEPS])
-AC_SUBST([EGL_DRIVERS_DIRS])
-
-dnl
 dnl gbm configuration
 dnl
 if test "x$enable_gbm" = xauto; then
@@ -1402,6 +1357,49 @@
 AC_SUBST([GBM_PC_CFLAGS])
 
 dnl
+dnl EGL configuration
+dnl
+EGL_CLIENT_APIS=""
+
+if test "x$enable_egl" = xyes; then
+    SRC_DIRS="$SRC_DIRS egl"
+    EGL_LIB_DEPS="$DLOPEN_LIBS $SELINUX_LIBS -lpthread"
+    EGL_DRIVERS_DIRS=""
+
+    AC_CHECK_FUNC(mincore, [DEFINES="$DEFINES -DHAVE_MINCORE"])
+
+    if test "$enable_static" != yes; then
+        # build egl_glx when libGL is built
+        if test "x$enable_glx" = xyes; then
+            EGL_DRIVERS_DIRS="glx"
+        fi
+
+        PKG_CHECK_MODULES([LIBUDEV], [libudev > 150],
+                          [have_libudev=yes],[have_libudev=no])
+        if test "$have_libudev" = yes; then
+            DEFINES="$DEFINES -DHAVE_LIBUDEV"
+        fi
+        if test "x$enable_dri" = xyes; then
+            # build egl_dri2 when xcb-dri2 is available
+            PKG_CHECK_MODULES([XCB_DRI2], [x11-xcb xcb-dri2 xcb-xfixes],
+            		  [have_xcb_dri2=yes],[have_xcb_dri2=no])
+            
+            if test "$have_xcb_dri2" = yes; then
+                EGL_DRIVER_DRI2=dri2
+                DEFINES="$DEFINES -DHAVE_XCB_DRI2"
+                # workaround a bug in xcb-dri2 generated by xcb-proto 1.6
+                AC_CHECK_LIB(xcb-dri2, xcb_dri2_connect_alignment_pad, [],
+                          [DEFINES="$DEFINES -DXCB_DRI2_CONNECT_DEVICE_NAME_BROKEN"])
+            fi
+	fi
+
+        EGL_DRIVERS_DIRS="$EGL_DRIVERS_DIRS $EGL_DRIVER_DRI2"
+    fi
+fi
+AC_SUBST([EGL_LIB_DEPS])
+AC_SUBST([EGL_DRIVERS_DIRS])
+
+dnl
 dnl EGL Gallium configuration
 dnl
 if test "x$enable_gallium_egl" = xyes; then
@@ -1603,77 +1601,6 @@
 AC_SUBST([GLU_PC_CFLAGS])
 
 dnl
-dnl GLw configuration
-dnl
-AC_ARG_ENABLE([glw],
-    [AS_HELP_STRING([--disable-glw],
-        [enable Xt/Motif widget library @<:@default=enabled@:>@])],
-    [enable_glw="$enableval"],
-    [enable_glw=yes])
-dnl Don't build GLw on osmesa
-if test "x$enable_glw" = xyes -a "x$enable_glx" = xno; then
-    AC_MSG_NOTICE([Disabling GLw since there is no OpenGL driver])
-    enable_glw=no
-fi
-AC_ARG_ENABLE([motif],
-    [AS_HELP_STRING([--enable-motif],
-        [use Motif widgets in GLw @<:@default=disabled@:>@])],
-    [enable_motif="$enableval"],
-    [enable_motif=no])
-
-if test "x$enable_glw" = xyes; then
-    SRC_DIRS="$SRC_DIRS glw"
-    if test "$x11_pkgconfig" = yes; then
-        PKG_CHECK_MODULES([GLW],[x11 xt])
-        GLW_PC_REQ_PRIV="x11 xt"
-        GLW_LIB_DEPS="$GLW_LIBS"
-    else
-        # should check these...
-        GLW_LIB_DEPS="$X_LIBS -lXt -lX11"
-        GLW_PC_LIB_PRIV="$GLW_LIB_DEPS"
-        GLW_PC_CFLAGS="$X11_INCLUDES"
-    fi
-
-    GLW_SOURCES="GLwDrawA.c"
-    MOTIF_CFLAGS=
-    if test "x$enable_motif" = xyes; then
-        GLW_SOURCES="$GLW_SOURCES GLwMDrawA.c"
-        AC_PATH_PROG([MOTIF_CONFIG], [motif-config], [no])
-        if test "x$MOTIF_CONFIG" != xno; then
-            MOTIF_CFLAGS=`$MOTIF_CONFIG --cflags`
-            MOTIF_LIBS=`$MOTIF_CONFIG --libs`
-        else
-            AC_CHECK_HEADER([Xm/PrimitiveP.h], [],
-                [AC_MSG_ERROR([Can't locate Motif headers])])
-            AC_CHECK_LIB([Xm], [XmGetPixmap], [MOTIF_LIBS="-lXm"],
-                [AC_MSG_ERROR([Can't locate Motif Xm library])])
-        fi
-        # MOTIF_LIBS is prepended to GLW_LIB_DEPS since Xm needs Xt/X11
-        GLW_LIB_DEPS="$MOTIF_LIBS $GLW_LIB_DEPS"
-        GLW_PC_LIB_PRIV="$MOTIF_LIBS $GLW_PC_LIB_PRIV"
-        GLW_PC_CFLAGS="$MOTIF_CFLAGS $GLW_PC_CFLAGS"
-    fi
-
-    # If static, empty GLW_LIB_DEPS and add libs for programs to link
-    GLW_PC_LIB_PRIV="$GLW_PC_LIB_PRIV"
-    if test "$enable_static" = no; then
-        GLW_MESA_DEPS='-l$(GL_LIB)'
-        GLW_LIB_DEPS="$GLW_LIB_DEPS"
-    else
-        APP_LIB_DEPS="$APP_LIB_DEPS $GLW_LIB_DEPS"
-        GLW_LIB_DEPS=""
-        GLW_MESA_DEPS=""
-    fi
-fi
-AC_SUBST([GLW_LIB_DEPS])
-AC_SUBST([GLW_MESA_DEPS])
-AC_SUBST([GLW_SOURCES])
-AC_SUBST([MOTIF_CFLAGS])
-AC_SUBST([GLW_PC_REQ_PRIV])
-AC_SUBST([GLW_PC_LIB_PRIV])
-AC_SUBST([GLW_PC_CFLAGS])
-
-dnl
 dnl Program library dependencies
 dnl    Only libm is added here if necessary as the libraries should
 dnl    be pulled in by the linker
@@ -1699,9 +1626,6 @@
 dnl
 if test "x$with_gallium_drivers" != x; then
     SRC_DIRS="$SRC_DIRS gallium gallium/winsys gallium/targets"
-    AC_PATH_PROG([LLVM_CONFIG], [llvm-config], [no])
-else
-    LLVM_CONFIG=no
 fi
 
 AC_SUBST([LLVM_CFLAGS])
@@ -1821,6 +1745,8 @@
     esac
 fi
 if test "x$enable_gallium_llvm" = xyes; then
+    AC_PATH_PROG([LLVM_CONFIG], [llvm-config], [no])
+
     if test "x$LLVM_CONFIG" != xno; then
 	LLVM_VERSION=`$LLVM_CONFIG --version`
 	LLVM_CFLAGS=`$LLVM_CONFIG --cppflags|sed 's/-DNDEBUG\>//g'`
@@ -1929,7 +1855,7 @@
             ;;
         xr600)
             GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS r600"
-            gallium_check_st "r600/drm" "dri-r600" "" "" "xvmc-r600" "vdpau-r600" "va-r600"
+            gallium_check_st "r600/drm radeon/drm" "dri-r600" "xorg-r600" "" "xvmc-r600" "vdpau-r600" "va-r600"
             ;;
         xnouveau)
             PKG_CHECK_MODULES([NOUVEAU], [libdrm_nouveau >= $LIBDRM_NOUVEAU_REQUIRED])
@@ -1937,11 +1863,12 @@
             gallium_check_st "nouveau/drm" "dri-nouveau" "xorg-nouveau" "" "xvmc-nouveau"
             ;;
         xswrast)
+            GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS softpipe"
+            if test "x$MESA_LLVM" = x1; then
+                GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS llvmpipe"
+            fi
+
             if test "x$HAVE_ST_DRI" = xyes; then
-                GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS softpipe"
-                if test "x$MESA_LLVM" = x1; then
-                    GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS llvmpipe"
-                fi
                 GALLIUM_TARGET_DIRS="$GALLIUM_TARGET_DIRS dri-swrast"
             fi
             if test "x$HAVE_ST_VDPAU" = xyes; then
@@ -1959,9 +1886,6 @@
                if test "x$HAVE_WINSYS_XLIB" != xyes; then
                   GALLIUM_WINSYS_DIRS="$GALLIUM_WINSYS_DIRS sw/xlib"
                fi
-               if test "x$HAVE_ST_DRI" != xyes; then
-                GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS softpipe"
-               fi
             fi
             ;;
         *)
@@ -2054,7 +1978,6 @@
 
 echo ""
 echo "        GLU:             $enable_glu"
-echo "        GLw:             $enable_glw (Motif: $enable_motif)"
 
 dnl EGL
 echo ""
diff --git a/docs/GL3.txt b/docs/GL3.txt
index 135bc4b..ff1f502 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -114,6 +114,19 @@
 GL_ARB_viewport_array                                not started
 
 
+GL 4.2:
+GLSL 4.2                                             not started
+GL_ARB_texture_compression_bptc                      not started
+GL_ARB_compressed_texture_pixel_storage              not started
+GL_ARB_shader_atomic_counters                        not started
+GL_ARB_texture_storage                               not started
+GL_ARB_transform_feedback_instanced                  not started
+GL_ARB_base_instance                                 not started
+GL_ARB_shader_image_load_store                       not started
+GL_ARB_conservative_depth                            DONE (compiler)
+GL_ARB_shading_language_420pack                      not started
+GL_ARB_internalformat_query                          not started
+GL_ARB_map_buffer_alignment                          not started
 
 
 More info about these features and the work involved can be found at
diff --git a/docs/autoconf.html b/docs/autoconf.html
index 64bcbd4..895cf66 100644
--- a/docs/autoconf.html
+++ b/docs/autoconf.html
@@ -20,7 +20,6 @@
 <li><a href="#library">Library Options</a></li>
   <ul>
   <li><a href="#glu">GLU</a></li>
-  <li><a href="#glw">GLw</a></li>
   </ul>
 <li><a href="#demos">Demo Program Options</a></li>
 </ol>
@@ -245,12 +244,6 @@
 on all drivers. This can be disable with the option
 <code>--disable-glu</code>.
 </li>
-
-<a name="glw">
-<li><b><em>GLw</em></b> - The libGLw library will be built by default
-if libGLU has been enabled. This can be disable with the option
-<code>--disable-glw</code>.
-</li>
 </ul>
 </p>
 
diff --git a/docs/contents.html b/docs/contents.html
index 8fc2ac0..46e458e 100644
--- a/docs/contents.html
+++ b/docs/contents.html
@@ -63,6 +63,7 @@
 <LI><A HREF="extensions.html" target="MainFrame">Mesa Extensions</A>
 <LI><A HREF="mangling.html" target="MainFrame">Function Name Mangling</A>
 <LI><A href="llvmpipe.html" target="MainFrame">Gallium llvmpipe driver</A>
+<LI><A href="postprocess.html" target="MainFrame">Gallium post-processing</A>
 </ul>
 
 <b>Developer Topics</b>
diff --git a/docs/devinfo.html b/docs/devinfo.html
index 8887dd0..d9e82e2 100644
--- a/docs/devinfo.html
+++ b/docs/devinfo.html
@@ -72,6 +72,13 @@
 </p>
 
 <p>
+Line width: the preferred width to fill comments and code in Mesa is 78
+columns.  Exceptions are sometimes made for clarity (e.g. tabular data is
+sometimes filled to a much larger width so that extraneous carriage returns
+don't obscure the table).
+</p>
+
+<p>
 Brace example:
 </p>
 <pre>
@@ -81,10 +88,26 @@
 	else {
 	   bar;
 	}
+
+	switch (condition) {
+	case 0:
+	   foo();
+	   break;
+
+	case 1: {
+	   ...
+	   break;
+	}
+
+	default:
+	   ...
+	   break;
+	}
 </pre>
 
 <p>
 Here's the GNU indent command which will best approximate my preferred style:
+(Note that it won't format switch statements in the preferred way)
 </p>
 <pre>
 	indent -br -i3 -npcs --no-tabs infile.c -o outfile.c
@@ -114,6 +137,16 @@
 	_mesa_foo_bar()  - an internal non-static Mesa function
 </pre>
 
+<p>
+Places that are not directly visible to the GL API should prefer the use
+of <tt>bool</tt>, <tt>true</tt>, and
+<tt>false</tt> over <tt>GLboolean</tt>, <tt>GL_TRUE</tt>, and
+<tt>GL_FALSE</tt>.  In C code, this may mean that
+<tt>#include &lt;stdbool.h&gt;</tt> need to be added.  The
+<tt>try_emit_</tt>* methods in src/mesa/program/ir_to_mesa.cpp and
+src/mesa/state_tracker/st_glsl_to_tgsi.cpp can serve as an example.
+</p>
+
 
 <H2>Making a New Mesa Release</H2>
 
diff --git a/docs/download.html b/docs/download.html
index 3c4d597..4e8fc2f 100644
--- a/docs/download.html
+++ b/docs/download.html
@@ -84,7 +84,6 @@
 src/gallium     - sources for Gallium and Gallium drivers
 src/glu		- libGLU source code
 src/glx		- sources for building libGL with full GLX and DRI support
-src/glw		- Xt/Motif/OpenGL widget code
 </pre>
 
 If you downloaded and unpacked the MesaGLUT.x.y.z package:
diff --git a/docs/faq.html b/docs/faq.html
index 071381c..bf6545f 100644
--- a/docs/faq.html
+++ b/docs/faq.html
@@ -204,8 +204,13 @@
 </a></p>
 
 
+<h2><a name="part2">2.4 Where is the GLw library?</a></h2>
+<p>
+<a name="part2">GLw (OpenGL widget library) is now available from a separate <a href="http://cgit.freedesktop.org/mesa/glw/">git repository</a>.  Unless you're using very old Xt/Motif applications with OpenGL, you shouldn't need it.
+</a></p>
 
-<h2><a name="part2">2.4 What's the proper place for the libraries and headers?</a></h2>
+
+<h2><a name="part2">2.5 What's the proper place for the libraries and headers?</a></h2>
 <p>
 <a name="part2">On Linux-based systems you'll want to follow the
 </a><a href="http://oss.sgi.com/projects/ogl-sample/ABI/index.html"
diff --git a/docs/install.html b/docs/install.html
index e101811..c86a755 100644
--- a/docs/install.html
+++ b/docs/install.html
@@ -157,9 +157,6 @@
 lrwxrwxrwx    1 brian    users          12 Mar 26 07:53 libglut.so -> libglut.so.3*
 lrwxrwxrwx    1 brian    users          16 Mar 26 07:53 libglut.so.3 -> libglut.so.3.7.1*
 -rwxr-xr-x    1 brian    users      597754 Mar 26 07:53 libglut.so.3.7.1*
-lrwxrwxrwx    1 brian    users          11 Mar 26 08:04 libGLw.so -> libGLw.so.1*
-lrwxrwxrwx    1 brian    users          15 Mar 26 08:04 libGLw.so.1 -> libGLw.so.1.0.0*
--rwxr-xr-x    1 brian    users       20750 Mar 26 08:04 libGLw.so.1.0.0*
 lrwxrwxrwx    1 brian    users          14 Mar 26 07:53 libOSMesa.so -> libOSMesa.so.6*
 lrwxrwxrwx    1 brian    users          23 Mar 26 07:53 libOSMesa.so.6 -> libOSMesa.so.6.1.060100*
 -rwxr-xr-x    1 brian    users       23871 Mar 26 07:53 libOSMesa.so.6.1.060100*
@@ -172,8 +169,6 @@
 <br>
 <b>libglut</b> is the GLUT library.
 <br>
-<b>libGLw</b> is the Xt/Motif OpenGL drawing area widget library.
-<br>
 <b>libOSMesa</b> is the OSMesa (Off-Screen) interface library.
 </p>
 
diff --git a/docs/postprocess.html b/docs/postprocess.html
new file mode 100644
index 0000000..2a37969
--- /dev/null
+++ b/docs/postprocess.html
@@ -0,0 +1,56 @@
+<HTML>
+
+<TITLE>Gallium Post-processing</TITLE>
+
+<link rel="stylesheet" type="text/css" href="mesa.css"></head>
+
+<BODY>
+
+<H1>Gallium Post-processing</H1>
+
+<p>
+The Gallium drivers support user-defined image post-processing.
+At the end of drawing a frame a post-processing filter can be applied to
+the rendered image.
+Example filters include morphological antialiasing and cell shading.
+</p>
+
+<p>
+The filters can be toggled per-app via driconf, or per-session via the
+corresponding environment variables.
+</p>
+
+<p>
+Multiple filters can be used together.
+</p>
+
+
+<H2>PP environment variables</H2>
+
+<ul>
+<li>PP_DEBUG - If defined debug information will be printed to stderr.
+</ul>
+
+<h2>Current filters</h2>
+
+<ul>
+<li>pp_nored, pp_nogreen, pp_noblue - set to 1 to remove the corresponding color channel.
+These are basic filters for easy testing of the PP queue.
+<li>pp_jimenezmlaa, pp_jimenezmlaa_color -
+<a href="http://www.iryokufx.com/mlaa/" target=_blank>Jimenez's MLAA</a>
+is a morphological antialiasing filter.
+The two versions use depth and color data, respectively.
+Which works better depends on the app - depth will not blur text, but it will
+miss transparent textures for example.
+Set to a number from 2 to 32, roughly corresponding to quality.
+Numbers higher than 8 see minimizing gains.
+<li>pp_celshade - set to 1 to enable cell shading (a more complex color filter).
+</ul>
+
+
+<br>
+<br>
+
+
+</BODY>
+</HTML>
diff --git a/docs/sourcetree.html b/docs/sourcetree.html
index 2e2d1d3..713e25b 100644
--- a/docs/sourcetree.html
+++ b/docs/sourcetree.html
@@ -153,8 +153,6 @@
   <li><b>glx</b> - The GLX library code for building libGL.  This is used for
          direct rendering drivers.  It will dynamically load one of the 
          xxx_dri.so drivers.
-  <li><b>glw</b> - Widgets for Xt/Motif.
-  <li><b>glew</b> - OpenGL Extension Wrangler library (used by demo programs)
   </ul>
 <li><b>progs</b> - OpenGL test and demonstration programs
 <li><b>lib</b> - where the GL libraries are placed
diff --git a/include/EGL/eglext.h b/include/EGL/eglext.h
index 9915b8c..0449ae2 100644
--- a/include/EGL/eglext.h
+++ b/include/EGL/eglext.h
@@ -390,6 +390,20 @@
 #define EGL_Y_INVERTED_NOK			0x307F
 #endif /* EGL_NOK_texture_from_pixmap */
 
+#ifndef EGL_ANDROID_image_native_buffer
+#define EGL_ANDROID_image_native_buffer 1
+struct android_native_buffer_t;
+#define EGL_NATIVE_BUFFER_ANDROID       0x3140  /* eglCreateImageKHR target */
+#endif
+
+#ifndef EGL_ANDROID_swap_rectangle
+#define EGL_ANDROID_swap_rectangle 1
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglSetSwapRectangleANDROID (EGLDisplay dpy, EGLSurface draw, EGLint left, EGLint top, EGLint width, EGLint height);
+#endif /* EGL_EGLEXT_PROTOTYPES */
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLSETSWAPRECTANGLEANDROIDPROC) (EGLDisplay dpy, EGLSurface draw, EGLint left, EGLint top, EGLint width, EGLint height);
+#endif
+
 
 #ifdef __cplusplus
 }
diff --git a/include/EGL/eglplatform.h b/include/EGL/eglplatform.h
index fbfdce3..8d3f72f 100644
--- a/include/EGL/eglplatform.h
+++ b/include/EGL/eglplatform.h
@@ -90,6 +90,15 @@
 typedef struct gbm_bo      *EGLNativePixmapType;
 typedef void               *EGLNativeWindowType;
 
+#elif defined(ANDROID) /* Android */
+
+struct ANativeWindow;
+struct egl_native_pixmap_t;
+
+typedef struct ANativeWindow        *EGLNativeWindowType;
+typedef struct egl_native_pixmap_t  *EGLNativePixmapType;
+typedef void                        *EGLNativeDisplayType;
+
 #elif defined(__unix__) || defined(__unix)
 
 #ifdef MESA_EGL_NO_X11_HEADERS
diff --git a/include/GL/glext.h b/include/GL/glext.h
index 9048515..0940021 100644
--- a/include/GL/glext.h
+++ b/include/GL/glext.h
@@ -29,9 +29,9 @@
 */
 
 /* Header file version number, required by OpenGL ABI for Linux */
-/* glext.h last updated $Date: 2011-07-06 02:49:14 -0700 (Wed, 06 Jul 2011) $ */
+/* glext.h last updated $Date: 2011-08-08 00:34:29 -0700 (Mon, 08 Aug 2011) $ */
 /* Current version at http://www.opengl.org/registry/ */
-#define GL_GLEXT_VERSION 71
+#define GL_GLEXT_VERSION 72
 /* Function declaration macros - to move into glplatform.h */
 
 #if defined(_WIN32) && !defined(APIENTRY) && !defined(__CYGWIN__) && !defined(__SCITECH_SNAP__)
@@ -1047,6 +1047,124 @@
 /* reuse GL_UNDEFINED_VERTEX */
 #endif
 
+#ifndef GL_VERSION_4_2
+/* Reuse tokens from ARB_base_instance (none) */
+/* Reuse tokens from ARB_shading_language_420pack (none) */
+/* Reuse tokens from ARB_transform_feedback_instanced (none) */
+/* Reuse tokens from ARB_compressed_texture_pixel_storage */
+/* reuse GL_UNPACK_COMPRESSED_BLOCK_WIDTH */
+/* reuse GL_UNPACK_COMPRESSED_BLOCK_HEIGHT */
+/* reuse GL_UNPACK_COMPRESSED_BLOCK_DEPTH */
+/* reuse GL_UNPACK_COMPRESSED_BLOCK_SIZE */
+/* reuse GL_PACK_COMPRESSED_BLOCK_WIDTH */
+/* reuse GL_PACK_COMPRESSED_BLOCK_HEIGHT */
+/* reuse GL_PACK_COMPRESSED_BLOCK_DEPTH */
+/* reuse GL_PACK_COMPRESSED_BLOCK_SIZE */
+/* Reuse tokens from ARB_conservative_depth (none) */
+/* Reuse tokens from ARB_internalformat_query */
+/* reuse GL_NUM_SAMPLE_COUNTS */
+/* Reuse tokens from ARB_map_buffer_alignment */
+/* reuse GL_MIN_MAP_BUFFER_ALIGNMENT */
+/* Reuse tokens from ARB_shader_atomic_counters */
+/* reuse GL_ATOMIC_COUNTER_BUFFER */
+/* reuse GL_ATOMIC_COUNTER_BUFFER_BINDING */
+/* reuse GL_ATOMIC_COUNTER_BUFFER_START */
+/* reuse GL_ATOMIC_COUNTER_BUFFER_SIZE */
+/* reuse GL_ATOMIC_COUNTER_BUFFER_DATA_SIZE */
+/* reuse GL_ATOMIC_COUNTER_BUFFER_ACTIVE_ATOMIC_COUNTERS */
+/* reuse GL_ATOMIC_COUNTER_BUFFER_ACTIVE_ATOMIC_COUNTER_INDICES */
+/* reuse GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_VERTEX_SHADER */
+/* reuse GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_TESS_CONTROL_SHADER */
+/* reuse GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_TESS_EVALUATION_SHADER */
+/* reuse GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_GEOMETRY_SHADER */
+/* reuse GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_FRAGMENT_SHADER */
+/* reuse GL_MAX_VERTEX_ATOMIC_COUNTER_BUFFERS */
+/* reuse GL_MAX_TESS_CONTROL_ATOMIC_COUNTER_BUFFERS */
+/* reuse GL_MAX_TESS_EVALUATION_ATOMIC_COUNTER_BUFFERS */
+/* reuse GL_MAX_GEOMETRY_ATOMIC_COUNTER_BUFFERS */
+/* reuse GL_MAX_FRAGMENT_ATOMIC_COUNTER_BUFFERS */
+/* reuse GL_MAX_COMBINED_ATOMIC_COUNTER_BUFFERS */
+/* reuse GL_MAX_VERTEX_ATOMIC_COUNTERS */
+/* reuse GL_MAX_TESS_CONTROL_ATOMIC_COUNTERS */
+/* reuse GL_MAX_TESS_EVALUATION_ATOMIC_COUNTERS */
+/* reuse GL_MAX_GEOMETRY_ATOMIC_COUNTERS */
+/* reuse GL_MAX_FRAGMENT_ATOMIC_COUNTERS */
+/* reuse GL_MAX_COMBINED_ATOMIC_COUNTERS */
+/* reuse GL_MAX_ATOMIC_COUNTER_BUFFER_SIZE */
+/* reuse GL_MAX_ATOMIC_COUNTER_BUFFER_BINDINGS */
+/* reuse GL_ACTIVE_ATOMIC_COUNTER_BUFFERS */
+/* reuse GL_UNIFORM_ATOMIC_COUNTER_BUFFER_INDEX */
+/* reuse GL_UNSIGNED_INT_ATOMIC_COUNTER */
+/* Reuse tokens from ARB_shader_image_load_store */
+/* reuse GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT */
+/* reuse GL_ELEMENT_ARRAY_BARRIER_BIT */
+/* reuse GL_UNIFORM_BARRIER_BIT */
+/* reuse GL_TEXTURE_FETCH_BARRIER_BIT */
+/* reuse GL_SHADER_IMAGE_ACCESS_BARRIER_BIT */
+/* reuse GL_COMMAND_BARRIER_BIT */
+/* reuse GL_PIXEL_BUFFER_BARRIER_BIT */
+/* reuse GL_TEXTURE_UPDATE_BARRIER_BIT */
+/* reuse GL_BUFFER_UPDATE_BARRIER_BIT */
+/* reuse GL_FRAMEBUFFER_BARRIER_BIT */
+/* reuse GL_TRANSFORM_FEEDBACK_BARRIER_BIT */
+/* reuse GL_ATOMIC_COUNTER_BARRIER_BIT */
+/* reuse GL_ALL_BARRIER_BITS */
+/* reuse GL_MAX_IMAGE_UNITS */
+/* reuse GL_MAX_COMBINED_IMAGE_UNITS_AND_FRAGMENT_OUTPUTS */
+/* reuse GL_IMAGE_BINDING_NAME */
+/* reuse GL_IMAGE_BINDING_LEVEL */
+/* reuse GL_IMAGE_BINDING_LAYERED */
+/* reuse GL_IMAGE_BINDING_LAYER */
+/* reuse GL_IMAGE_BINDING_ACCESS */
+/* reuse GL_IMAGE_1D */
+/* reuse GL_IMAGE_2D */
+/* reuse GL_IMAGE_3D */
+/* reuse GL_IMAGE_2D_RECT */
+/* reuse GL_IMAGE_CUBE */
+/* reuse GL_IMAGE_BUFFER */
+/* reuse GL_IMAGE_1D_ARRAY */
+/* reuse GL_IMAGE_2D_ARRAY */
+/* reuse GL_IMAGE_CUBE_MAP_ARRAY */
+/* reuse GL_IMAGE_2D_MULTISAMPLE */
+/* reuse GL_IMAGE_2D_MULTISAMPLE_ARRAY */
+/* reuse GL_INT_IMAGE_1D */
+/* reuse GL_INT_IMAGE_2D */
+/* reuse GL_INT_IMAGE_3D */
+/* reuse GL_INT_IMAGE_2D_RECT */
+/* reuse GL_INT_IMAGE_CUBE */
+/* reuse GL_INT_IMAGE_BUFFER */
+/* reuse GL_INT_IMAGE_1D_ARRAY */
+/* reuse GL_INT_IMAGE_2D_ARRAY */
+/* reuse GL_INT_IMAGE_CUBE_MAP_ARRAY */
+/* reuse GL_INT_IMAGE_2D_MULTISAMPLE */
+/* reuse GL_INT_IMAGE_2D_MULTISAMPLE_ARRAY */
+/* reuse GL_UNSIGNED_INT_IMAGE_1D */
+/* reuse GL_UNSIGNED_INT_IMAGE_2D */
+/* reuse GL_UNSIGNED_INT_IMAGE_3D */
+/* reuse GL_UNSIGNED_INT_IMAGE_2D_RECT */
+/* reuse GL_UNSIGNED_INT_IMAGE_CUBE */
+/* reuse GL_UNSIGNED_INT_IMAGE_BUFFER */
+/* reuse GL_UNSIGNED_INT_IMAGE_1D_ARRAY */
+/* reuse GL_UNSIGNED_INT_IMAGE_2D_ARRAY */
+/* reuse GL_UNSIGNED_INT_IMAGE_CUBE_MAP_ARRAY */
+/* reuse GL_UNSIGNED_INT_IMAGE_2D_MULTISAMPLE */
+/* reuse GL_UNSIGNED_INT_IMAGE_2D_MULTISAMPLE_ARRAY */
+/* reuse GL_MAX_IMAGE_SAMPLES */
+/* reuse GL_IMAGE_BINDING_FORMAT */
+/* reuse GL_IMAGE_FORMAT_COMPATIBILITY_TYPE */
+/* reuse GL_IMAGE_FORMAT_COMPATIBILITY_BY_SIZE */
+/* reuse GL_IMAGE_FORMAT_COMPATIBILITY_BY_CLASS */
+/* reuse GL_MAX_VERTEX_IMAGE_UNIFORMS */
+/* reuse GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS */
+/* reuse GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS */
+/* reuse GL_MAX_GEOMETRY_IMAGE_UNIFORMS */
+/* reuse GL_MAX_FRAGMENT_IMAGE_UNIFORMS */
+/* reuse GL_MAX_COMBINED_IMAGE_UNIFORMS */
+/* Reuse tokens from ARB_shading_language_packing (none) */
+/* Reuse tokens from ARB_texture_storage */
+/* reuse GL_TEXTURE_IMMUTABLE_FORMAT */
+#endif
+
 #ifndef GL_ARB_multitexture
 #define GL_TEXTURE0_ARB                   0x84C0
 #define GL_TEXTURE1_ARB                   0x84C1
@@ -2140,6 +2258,143 @@
 #ifndef GL_ARB_shader_stencil_export
 #endif
 
+#ifndef GL_ARB_base_instance
+#endif
+
+#ifndef GL_ARB_shading_language_420pack
+#endif
+
+#ifndef GL_ARB_transform_feedback_instanced
+#endif
+
+#ifndef GL_ARB_compressed_texture_pixel_storage
+#define GL_UNPACK_COMPRESSED_BLOCK_WIDTH  0x9127
+#define GL_UNPACK_COMPRESSED_BLOCK_HEIGHT 0x9128
+#define GL_UNPACK_COMPRESSED_BLOCK_DEPTH  0x9129
+#define GL_UNPACK_COMPRESSED_BLOCK_SIZE   0x912A
+#define GL_PACK_COMPRESSED_BLOCK_WIDTH    0x912B
+#define GL_PACK_COMPRESSED_BLOCK_HEIGHT   0x912C
+#define GL_PACK_COMPRESSED_BLOCK_DEPTH    0x912D
+#define GL_PACK_COMPRESSED_BLOCK_SIZE     0x912E
+#endif
+
+#ifndef GL_ARB_conservative_depth
+#endif
+
+#ifndef GL_ARB_internalformat_query
+#define GL_NUM_SAMPLE_COUNTS              0x9380
+#endif
+
+#ifndef GL_ARB_map_buffer_alignment
+#define GL_MIN_MAP_BUFFER_ALIGNMENT       0x90BC
+#endif
+
+#ifndef GL_ARB_shader_atomic_counters
+#define GL_ATOMIC_COUNTER_BUFFER          0x92C0
+#define GL_ATOMIC_COUNTER_BUFFER_BINDING  0x92C1
+#define GL_ATOMIC_COUNTER_BUFFER_START    0x92C2
+#define GL_ATOMIC_COUNTER_BUFFER_SIZE     0x92C3
+#define GL_ATOMIC_COUNTER_BUFFER_DATA_SIZE 0x92C4
+#define GL_ATOMIC_COUNTER_BUFFER_ACTIVE_ATOMIC_COUNTERS 0x92C5
+#define GL_ATOMIC_COUNTER_BUFFER_ACTIVE_ATOMIC_COUNTER_INDICES 0x92C6
+#define GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_VERTEX_SHADER 0x92C7
+#define GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_TESS_CONTROL_SHADER 0x92C8
+#define GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_TESS_EVALUATION_SHADER 0x92C9
+#define GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_GEOMETRY_SHADER 0x92CA
+#define GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_FRAGMENT_SHADER 0x92CB
+#define GL_MAX_VERTEX_ATOMIC_COUNTER_BUFFERS 0x92CC
+#define GL_MAX_TESS_CONTROL_ATOMIC_COUNTER_BUFFERS 0x92CD
+#define GL_MAX_TESS_EVALUATION_ATOMIC_COUNTER_BUFFERS 0x92CE
+#define GL_MAX_GEOMETRY_ATOMIC_COUNTER_BUFFERS 0x92CF
+#define GL_MAX_FRAGMENT_ATOMIC_COUNTER_BUFFERS 0x92D0
+#define GL_MAX_COMBINED_ATOMIC_COUNTER_BUFFERS 0x92D1
+#define GL_MAX_VERTEX_ATOMIC_COUNTERS     0x92D2
+#define GL_MAX_TESS_CONTROL_ATOMIC_COUNTERS 0x92D3
+#define GL_MAX_TESS_EVALUATION_ATOMIC_COUNTERS 0x92D4
+#define GL_MAX_GEOMETRY_ATOMIC_COUNTERS   0x92D5
+#define GL_MAX_FRAGMENT_ATOMIC_COUNTERS   0x92D6
+#define GL_MAX_COMBINED_ATOMIC_COUNTERS   0x92D7
+#define GL_MAX_ATOMIC_COUNTER_BUFFER_SIZE 0x92D8
+#define GL_MAX_ATOMIC_COUNTER_BUFFER_BINDINGS 0x92DC
+#define GL_ACTIVE_ATOMIC_COUNTER_BUFFERS  0x92D9
+#define GL_UNIFORM_ATOMIC_COUNTER_BUFFER_INDEX 0x92DA
+#define GL_UNSIGNED_INT_ATOMIC_COUNTER    0x92DB
+#endif
+
+#ifndef GL_ARB_shader_image_load_store
+#define GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT 0x00000001
+#define GL_ELEMENT_ARRAY_BARRIER_BIT      0x00000002
+#define GL_UNIFORM_BARRIER_BIT            0x00000004
+#define GL_TEXTURE_FETCH_BARRIER_BIT      0x00000008
+#define GL_SHADER_IMAGE_ACCESS_BARRIER_BIT 0x00000020
+#define GL_COMMAND_BARRIER_BIT            0x00000040
+#define GL_PIXEL_BUFFER_BARRIER_BIT       0x00000080
+#define GL_TEXTURE_UPDATE_BARRIER_BIT     0x00000100
+#define GL_BUFFER_UPDATE_BARRIER_BIT      0x00000200
+#define GL_FRAMEBUFFER_BARRIER_BIT        0x00000400
+#define GL_TRANSFORM_FEEDBACK_BARRIER_BIT 0x00000800
+#define GL_ATOMIC_COUNTER_BARRIER_BIT     0x00001000
+#define GL_ALL_BARRIER_BITS               0xFFFFFFFF
+#define GL_MAX_IMAGE_UNITS                0x8F38
+#define GL_MAX_COMBINED_IMAGE_UNITS_AND_FRAGMENT_OUTPUTS 0x8F39
+#define GL_IMAGE_BINDING_NAME             0x8F3A
+#define GL_IMAGE_BINDING_LEVEL            0x8F3B
+#define GL_IMAGE_BINDING_LAYERED          0x8F3C
+#define GL_IMAGE_BINDING_LAYER            0x8F3D
+#define GL_IMAGE_BINDING_ACCESS           0x8F3E
+#define GL_IMAGE_1D                       0x904C
+#define GL_IMAGE_2D                       0x904D
+#define GL_IMAGE_3D                       0x904E
+#define GL_IMAGE_2D_RECT                  0x904F
+#define GL_IMAGE_CUBE                     0x9050
+#define GL_IMAGE_BUFFER                   0x9051
+#define GL_IMAGE_1D_ARRAY                 0x9052
+#define GL_IMAGE_2D_ARRAY                 0x9053
+#define GL_IMAGE_CUBE_MAP_ARRAY           0x9054
+#define GL_IMAGE_2D_MULTISAMPLE           0x9055
+#define GL_IMAGE_2D_MULTISAMPLE_ARRAY     0x9056
+#define GL_INT_IMAGE_1D                   0x9057
+#define GL_INT_IMAGE_2D                   0x9058
+#define GL_INT_IMAGE_3D                   0x9059
+#define GL_INT_IMAGE_2D_RECT              0x905A
+#define GL_INT_IMAGE_CUBE                 0x905B
+#define GL_INT_IMAGE_BUFFER               0x905C
+#define GL_INT_IMAGE_1D_ARRAY             0x905D
+#define GL_INT_IMAGE_2D_ARRAY             0x905E
+#define GL_INT_IMAGE_CUBE_MAP_ARRAY       0x905F
+#define GL_INT_IMAGE_2D_MULTISAMPLE       0x9060
+#define GL_INT_IMAGE_2D_MULTISAMPLE_ARRAY 0x9061
+#define GL_UNSIGNED_INT_IMAGE_1D          0x9062
+#define GL_UNSIGNED_INT_IMAGE_2D          0x9063
+#define GL_UNSIGNED_INT_IMAGE_3D          0x9064
+#define GL_UNSIGNED_INT_IMAGE_2D_RECT     0x9065
+#define GL_UNSIGNED_INT_IMAGE_CUBE        0x9066
+#define GL_UNSIGNED_INT_IMAGE_BUFFER      0x9067
+#define GL_UNSIGNED_INT_IMAGE_1D_ARRAY    0x9068
+#define GL_UNSIGNED_INT_IMAGE_2D_ARRAY    0x9069
+#define GL_UNSIGNED_INT_IMAGE_CUBE_MAP_ARRAY 0x906A
+#define GL_UNSIGNED_INT_IMAGE_2D_MULTISAMPLE 0x906B
+#define GL_UNSIGNED_INT_IMAGE_2D_MULTISAMPLE_ARRAY 0x906C
+#define GL_MAX_IMAGE_SAMPLES              0x906D
+#define GL_IMAGE_BINDING_FORMAT           0x906E
+#define GL_IMAGE_FORMAT_COMPATIBILITY_TYPE 0x90C7
+#define GL_IMAGE_FORMAT_COMPATIBILITY_BY_SIZE 0x90C8
+#define GL_IMAGE_FORMAT_COMPATIBILITY_BY_CLASS 0x90C9
+#define GL_MAX_VERTEX_IMAGE_UNIFORMS      0x90CA
+#define GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS 0x90CB
+#define GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS 0x90CC
+#define GL_MAX_GEOMETRY_IMAGE_UNIFORMS    0x90CD
+#define GL_MAX_FRAGMENT_IMAGE_UNIFORMS    0x90CE
+#define GL_MAX_COMBINED_IMAGE_UNIFORMS    0x90CF
+#endif
+
+#ifndef GL_ARB_shading_language_packing
+#endif
+
+#ifndef GL_ARB_texture_storage
+#define GL_TEXTURE_IMMUTABLE_FORMAT       0x912F
+#endif
+
 #ifndef GL_EXT_abgr
 #define GL_ABGR_EXT                       0x8000
 #endif
@@ -5917,7 +6172,7 @@
 
 #ifndef GL_VERSION_4_1
 #define GL_VERSION_4_1 1
-/* OpenGL 4.1 also reuses entry points from these extensions: */
+/* OpenGL 4.1 reuses entry points from these extensions: */
 /* ARB_ES2_compatibility */
 /* ARB_get_program_binary */
 /* ARB_separate_shader_objects */
@@ -5926,6 +6181,22 @@
 /* ARB_viewport_array */
 #endif
 
+#ifndef GL_VERSION_4_2
+#define GL_VERSION_4_2 1
+/* OpenGL 4.2 reuses entry points from these extensions: */
+/* ARB_base_instance */
+/* ARB_shading_language_420pack (no entry points) */
+/* ARB_transform_feedback_instanced */
+/* ARB_compressed_texture_pixel_storage (no entry points) */
+/* ARB_conservative_depth (no entry points) */
+/* ARB_internalformat_query */
+/* ARB_map_buffer_alignment (no entry points) */
+/* ARB_shader_atomic_counters */
+/* ARB_shader_image_load_store */
+/* ARB_shading_language_packing (no entry points) */
+/* ARB_texture_storage */
+#endif
+
 #ifndef GL_ARB_multitexture
 #define GL_ARB_multitexture 1
 #ifdef GL_GLEXT_PROTOTYPES
@@ -6851,6 +7122,10 @@
 typedef void (APIENTRYP PFNGLGETSAMPLERPARAMETERIUIVPROC) (GLuint sampler, GLenum pname, GLuint *params);
 #endif
 
+#ifndef GL_ARB_shader_bit_encoding
+#define GL_ARB_shader_bit_encoding 1
+#endif
+
 #ifndef GL_ARB_texture_rgb10_a2ui
 #define GL_ARB_texture_rgb10_a2ui 1
 #endif
@@ -7357,6 +7632,92 @@
 #define GL_ARB_shader_stencil_export 1
 #endif
 
+#ifndef GL_ARB_base_instance
+#define GL_ARB_base_instance 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glDrawArraysInstancedBaseInstance (GLenum mode, GLint first, GLsizei count, GLsizei primcount, GLuint baseinstance);
+GLAPI void APIENTRY glDrawElementsInstancedBaseInstance (GLenum mode, GLsizei count, GLenum type, const void *indices, GLsizei primcount, GLuint baseinstance);
+GLAPI void APIENTRY glDrawElementsInstancedBaseVertexBaseInstance (GLenum mode, GLsizei count, GLenum type, const void *indices, GLsizei primcount, GLint basevertex, GLuint baseinstance);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLDRAWARRAYSINSTANCEDBASEINSTANCEPROC) (GLenum mode, GLint first, GLsizei count, GLsizei primcount, GLuint baseinstance);
+typedef void (APIENTRYP PFNGLDRAWELEMENTSINSTANCEDBASEINSTANCEPROC) (GLenum mode, GLsizei count, GLenum type, const void *indices, GLsizei primcount, GLuint baseinstance);
+typedef void (APIENTRYP PFNGLDRAWELEMENTSINSTANCEDBASEVERTEXBASEINSTANCEPROC) (GLenum mode, GLsizei count, GLenum type, const void *indices, GLsizei primcount, GLint basevertex, GLuint baseinstance);
+#endif
+
+#ifndef GL_ARB_shading_language_420pack
+#define GL_ARB_shading_language_420pack 1
+#endif
+
+#ifndef GL_ARB_transform_feedback_instanced
+#define GL_ARB_transform_feedback_instanced 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glDrawTransformFeedbackInstanced (GLenum mode, GLuint id, GLsizei primcount);
+GLAPI void APIENTRY glDrawTransformFeedbackStreamInstanced (GLenum mode, GLuint id, GLuint stream, GLsizei primcount);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLDRAWTRANSFORMFEEDBACKINSTANCEDPROC) (GLenum mode, GLuint id, GLsizei primcount);
+typedef void (APIENTRYP PFNGLDRAWTRANSFORMFEEDBACKSTREAMINSTANCEDPROC) (GLenum mode, GLuint id, GLuint stream, GLsizei primcount);
+#endif
+
+#ifndef GL_ARB_compressed_texture_pixel_storage
+#define GL_ARB_compressed_texture_pixel_storage 1
+#endif
+
+#ifndef GL_ARB_conservative_depth
+#define GL_ARB_conservative_depth 1
+#endif
+
+#ifndef GL_ARB_internalformat_query
+#define GL_ARB_internalformat_query 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glGetInternalformativ (GLenum target, GLenum internalformat, GLenum pname, GLsizei bufSize, GLint *params);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLGETINTERNALFORMATIVPROC) (GLenum target, GLenum internalformat, GLenum pname, GLsizei bufSize, GLint *params);
+#endif
+
+#ifndef GL_ARB_map_buffer_alignment
+#define GL_ARB_map_buffer_alignment 1
+#endif
+
+#ifndef GL_ARB_shader_atomic_counters
+#define GL_ARB_shader_atomic_counters 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glGetActiveAtomicCounterBufferiv (GLuint program, GLuint bufferIndex, GLenum pname, GLint *params);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLGETACTIVEATOMICCOUNTERBUFFERIVPROC) (GLuint program, GLuint bufferIndex, GLenum pname, GLint *params);
+#endif
+
+#ifndef GL_ARB_shader_image_load_store
+#define GL_ARB_shader_image_load_store 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glBindImageTexture (GLuint unit, GLuint texture, GLint level, GLboolean layered, GLint layer, GLenum access, GLenum format);
+GLAPI void APIENTRY glMemoryBarrier (GLbitfield barriers);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLBINDIMAGETEXTUREPROC) (GLuint unit, GLuint texture, GLint level, GLboolean layered, GLint layer, GLenum access, GLenum format);
+typedef void (APIENTRYP PFNGLMEMORYBARRIERPROC) (GLbitfield barriers);
+#endif
+
+#ifndef GL_ARB_shading_language_packing
+#define GL_ARB_shading_language_packing 1
+#endif
+
+#ifndef GL_ARB_texture_storage
+#define GL_ARB_texture_storage 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glTexStorage1D (GLenum target, GLsizei levels, GLenum internalformat, GLsizei width);
+GLAPI void APIENTRY glTexStorage2D (GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height);
+GLAPI void APIENTRY glTexStorage3D (GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth);
+GLAPI void APIENTRY glTextureStorage1DEXT (GLuint texture, GLenum target, GLsizei levels, GLenum internalformat, GLsizei width);
+GLAPI void APIENTRY glTextureStorage2DEXT (GLuint texture, GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height);
+GLAPI void APIENTRY glTextureStorage3DEXT (GLuint texture, GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLTEXSTORAGE1DPROC) (GLenum target, GLsizei levels, GLenum internalformat, GLsizei width);
+typedef void (APIENTRYP PFNGLTEXSTORAGE2DPROC) (GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height);
+typedef void (APIENTRYP PFNGLTEXSTORAGE3DPROC) (GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth);
+typedef void (APIENTRYP PFNGLTEXTURESTORAGE1DEXTPROC) (GLuint texture, GLenum target, GLsizei levels, GLenum internalformat, GLsizei width);
+typedef void (APIENTRYP PFNGLTEXTURESTORAGE2DEXTPROC) (GLuint texture, GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height);
+typedef void (APIENTRYP PFNGLTEXTURESTORAGE3DEXTPROC) (GLuint texture, GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth);
+#endif
+
 #ifndef GL_EXT_abgr
 #define GL_EXT_abgr 1
 #endif
diff --git a/include/GL/internal/dri_interface.h b/include/GL/internal/dri_interface.h
index 4fe9e94..eed159e 100644
--- a/include/GL/internal/dri_interface.h
+++ b/include/GL/internal/dri_interface.h
@@ -694,6 +694,9 @@
 #define __DRI_BUFFER_DEPTH_STENCIL	9  /**< Only available with DRI2 1.1 */
 #define __DRI_BUFFER_HIZ		10
 
+/* Inofficial and for internal use. Increase when adding a new buffer token. */
+#define __DRI_BUFFER_COUNT		11
+
 struct __DRIbufferRec {
     unsigned int attachment;
     unsigned int name;
diff --git a/scons/crossmingw.py b/scons/crossmingw.py
index cc04622..4a695a4 100644
--- a/scons/crossmingw.py
+++ b/scons/crossmingw.py
@@ -128,6 +128,42 @@
                                     source_scanner=SCons.Tool.SourceFileScanner)
 SCons.Tool.SourceFileScanner.add_scanner('.rc', SCons.Defaults.CScan)
 
+
+
+def compile_without_gstabs(env, sources, c_file):
+    '''This is a hack used to compile some source files without the
+    -gstabs option.
+
+    It seems that some versions of mingw32's gcc (4.4.2 at least) die
+    when compiling large files with the -gstabs option.  -gstabs is
+    related to debug symbols and can be omitted from the effected
+    files.
+
+    This function compiles the given c_file without -gstabs, removes
+    the c_file from the sources list, then appends the new .o file to
+    sources.  Then return the new sources list.
+    '''
+
+    # Modify CCFLAGS to not have -gstabs option:
+    env2 = env.Clone()
+    flags = str(env2['CCFLAGS'])
+    flags = flags.replace("-gstabs", "")
+    env2['CCFLAGS'] = SCons.Util.CLVar(flags)
+    
+    # Build the special-case files:
+    obj_file = env2.SharedObject(c_file)
+
+    # Replace ".cpp" or ".c" with ".o"
+    o_file = c_file.replace(".cpp", ".o")
+    o_file = o_file.replace(".c", ".o")
+
+    # Replace the .c files with the specially-compiled .o file
+    sources.remove(c_file)
+    sources.append(o_file)
+
+    return sources
+
+
 def generate(env):
     mingw_prefix = find(env)
 
@@ -197,5 +233,7 @@
     # Avoid depending on gcc runtime DLLs
     env.AppendUnique(LINKFLAGS = ['-static-libgcc'])
 
+    env.AddMethod(compile_without_gstabs, 'compile_without_gstabs')
+
 def exists(env):
     return find(env)
diff --git a/scons/custom.py b/scons/custom.py
index df7ac93..b6d716c 100644
--- a/scons/custom.py
+++ b/scons/custom.py
@@ -42,6 +42,7 @@
 
 import fixes
 
+import source_list
 
 def quietCommandLines(env):
     # Quiet command lines
@@ -229,6 +230,40 @@
     env.AddMethod(pkg_use_modules, 'PkgUseModules')
 
 
+def parse_source_list(env, filename, names=None):
+    # parse the source list file
+    parser = source_list.SourceListParser()
+    src = env.File(filename).srcnode()
+    sym_table = parser.parse(src.abspath)
+
+    if names:
+        if isinstance(names, basestring):
+            names = [names]
+
+        symbols = names
+    else:
+        symbols = sym_table.keys()
+
+    # convert the symbol table to source lists
+    src_lists = {}
+    for sym in symbols:
+        val = sym_table[sym]
+        src_lists[sym] = [f for f in val.split(' ') if f]
+
+    # if names are given, concatenate the lists
+    if names:
+        srcs = []
+        for name in names:
+            srcs.extend(src_lists[name])
+
+        return srcs
+    else:
+        return src_lists
+
+def createParseSourceListMethod(env):
+    env.AddMethod(parse_source_list, 'ParseSourceList')
+
+
 def generate(env):
     """Common environment generation code"""
 
@@ -240,6 +275,7 @@
     createConvenienceLibBuilder(env)
     createCodeGenerateMethod(env)
     createPkgConfigMethods(env)
+    createParseSourceListMethod(env)
 
     # for debugging
     #print env.Dump()
diff --git a/scons/gallium.py b/scons/gallium.py
index 8cd3bc7..7135251 100755
--- a/scons/gallium.py
+++ b/scons/gallium.py
@@ -596,6 +596,18 @@
         libs += ['m', 'pthread', 'dl']
     env.Append(LIBS = libs)
 
+    # OpenMP
+    if env['openmp']:
+        if env['msvc']:
+            env.Append(CCFLAGS = ['/openmp'])
+            # When building openmp release VS2008 link.exe crashes with LNK1103 error.
+            # Workaround: overwrite PDB flags with empty value as it isn't required anyways
+            if env['build'] == 'release':
+                env['PDB'] = ''
+        if env['gcc']:
+            env.Append(CCFLAGS = ['-fopenmp'])
+            env.Append(LIBS = ['gomp'])
+
     # Load tools
     env.Tool('lex')
     env.Tool('yacc')
diff --git a/scons/llvm.py b/scons/llvm.py
index 66f972d..57fe922 100644
--- a/scons/llvm.py
+++ b/scons/llvm.py
@@ -92,7 +92,21 @@
             'HAVE_STDINT_H',
         ])
         env.Prepend(LIBPATH = [os.path.join(llvm_dir, 'lib')])
-        if llvm_version >= distutils.version.LooseVersion('2.7'):
+        if llvm_version >= distutils.version.LooseVersion('2.9'):
+            # 2.9
+            env.Prepend(LIBS = [
+                'LLVMObject', 'LLVMMCJIT', 'LLVMMCDisassembler',
+                'LLVMLinker', 'LLVMipo', 'LLVMInterpreter',
+                'LLVMInstrumentation', 'LLVMJIT', 'LLVMExecutionEngine',
+                'LLVMBitWriter', 'LLVMX86Disassembler', 'LLVMX86AsmParser',
+                'LLVMMCParser', 'LLVMX86AsmPrinter', 'LLVMX86CodeGen',
+                'LLVMSelectionDAG', 'LLVMX86Utils', 'LLVMX86Info', 'LLVMAsmPrinter',
+                'LLVMCodeGen', 'LLVMScalarOpts', 'LLVMInstCombine',
+                'LLVMTransformUtils', 'LLVMipa', 'LLVMAsmParser',
+                'LLVMArchive', 'LLVMBitReader', 'LLVMAnalysis', 'LLVMTarget',
+                'LLVMCore', 'LLVMMC', 'LLVMSupport',
+            ])
+        elif llvm_version >= distutils.version.LooseVersion('2.7'):
             # 2.7
             env.Prepend(LIBS = [
                 'LLVMLinker', 'LLVMipo', 'LLVMInterpreter',
@@ -121,6 +135,8 @@
         env.Append(LIBS = [
             'imagehlp',
             'psapi',
+            'shell32',
+            'advapi32'
         ])
         if env['msvc']:
             # Some of the LLVM C headers use the inline keyword without
diff --git a/scons/source_list.py b/scons/source_list.py
new file mode 100644
index 0000000..fbd3ef7
--- /dev/null
+++ b/scons/source_list.py
@@ -0,0 +1,123 @@
+"""Source List Parser
+
+The syntax of a source list file is a very small subset of GNU Make.  These
+features are supported
+
+ operators: +=, :=
+ line continuation
+ non-nested variable expansion
+ comment
+
+The goal is to allow Makefile's and SConscript's to share source listing.
+"""
+
+class SourceListParser(object):
+    def __init__(self):
+        self._reset()
+
+    def _reset(self, filename=None):
+        self.filename = filename
+
+        self.line_no = 1
+        self.line_cont = ''
+        self.symbol_table = {}
+
+    def _error(self, msg):
+        raise RuntimeError('%s:%d: %s' % (self.filename, self.line_no, msg))
+
+    def _next_dereference(self, val, cur):
+        """Locate the next $(...) in value."""
+        deref_pos = val.find('$', cur)
+        if deref_pos < 0:
+            return (-1, -1)
+        elif val[deref_pos + 1] != '(':
+            self._error('non-variable dereference')
+
+        deref_end = val.find(')', deref_pos + 2)
+        if deref_end < 0:
+            self._error('unterminated variable dereference')
+
+        return (deref_pos, deref_end + 1)
+
+    def _expand_value(self, val):
+        """Perform variable expansion."""
+        expanded = ''
+        cur = 0
+        while True:
+            deref_pos, deref_end = self._next_dereference(val, cur)
+            if deref_pos < 0:
+                expanded += val[cur:]
+                break
+
+            sym = val[(deref_pos + 2):(deref_end - 1)]
+            expanded += val[cur:deref_pos] + self.symbol_table[sym]
+            cur = deref_end
+
+        return expanded
+
+    def _parse_definition(self, line):
+        """Parse a variable definition line."""
+        op_pos = line.find('=')
+        op_end = op_pos + 1
+        if op_pos < 0:
+            self._error('not a variable definition')
+
+        if op_pos > 0 and line[op_pos - 1] in [':', '+']:
+            op_pos -= 1
+        else:
+            self._error('only := and += are supported')
+
+        # set op, sym, and val
+        op = line[op_pos:op_end]
+        sym = line[:op_pos].strip()
+        val = self._expand_value(line[op_end:].lstrip())
+
+        if op == ':=':
+            self.symbol_table[sym] = val
+        elif op == '+=':
+            self.symbol_table[sym] += ' ' + val
+
+    def _parse_line(self, line):
+        """Parse a source list line."""
+        # more lines to come
+        if line and line[-1] == '\\':
+            # spaces around "\\\n" are replaced by a single space
+            if self.line_cont:
+                self.line_cont += line[:-1].strip() + ' '
+            else:
+                self.line_cont = line[:-1].rstrip() + ' '
+            return 0
+
+        # combine with previous lines
+        if self.line_cont:
+            line = self.line_cont + line.lstrip()
+            self.line_cont = ''
+
+        if line:
+            begins_with_tab = (line[0] == '\t')
+
+            line = line.lstrip()
+            if line[0] != '#':
+                if begins_with_tab:
+                    self._error('recipe line not supported')
+                else:
+                    self._parse_definition(line)
+
+        return 1
+
+    def parse(self, filename):
+        """Parse a source list file."""
+        if self.filename != filename:
+            fp = open(filename)
+            lines = fp.read().splitlines()
+            fp.close()
+
+            try:
+                self._reset(filename)
+                for line in lines:
+                    self.line_no += self._parse_line(line)
+            except:
+                self._reset()
+                raise
+
+        return self.symbol_table
diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c
index 0aca929..f09ae14 100644
--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -592,6 +592,13 @@
          wl_display_destroy(dri2_dpy->wl_dpy);
          break;
 #endif
+#ifdef HAVE_DRM_PLATFORM
+      case _EGL_PLATFORM_DRM:
+         if (dri2_dpy->own_gbm_device) {
+            gbm_device_destroy(&dri2_dpy->gbm_dri->base.base);
+         }
+         break;
+#endif
       default:
          break;
       }
@@ -719,6 +726,23 @@
 }
 
 /**
+ * Called via eglDestroyContext(), drv->API.DestroyContext().
+ */
+static EGLBoolean
+dri2_destroy_context(_EGLDriver *drv, _EGLDisplay *disp, _EGLContext *ctx)
+{
+   struct dri2_egl_context *dri2_ctx = dri2_egl_context(ctx);
+   struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
+
+   if (_eglPutContext(ctx)) {
+      dri2_dpy->core->destroyContext(dri2_ctx->dri_context);
+      free(dri2_ctx);
+   }
+
+   return EGL_TRUE;
+}
+
+/**
  * Called via eglMakeCurrent(), drv->API.MakeCurrent().
  */
 static EGLBoolean
@@ -758,9 +782,8 @@
          drv->API.DestroySurface(drv, disp, old_dsurf);
       if (old_rsurf)
          drv->API.DestroySurface(drv, disp, old_rsurf);
-      /* no destroy? */
       if (old_ctx)
-         _eglPutContext(old_ctx);
+         drv->API.DestroyContext(drv, disp, old_ctx);
 
       return EGL_TRUE;
    } else {
@@ -1341,6 +1364,7 @@
    dri2_drv->base.API.Initialize = dri2_initialize;
    dri2_drv->base.API.Terminate = dri2_terminate;
    dri2_drv->base.API.CreateContext = dri2_create_context;
+   dri2_drv->base.API.DestroyContext = dri2_destroy_context;
    dri2_drv->base.API.MakeCurrent = dri2_make_current;
    dri2_drv->base.API.GetProcAddress = dri2_get_proc_address;
    dri2_drv->base.API.WaitClient = dri2_wait_client;
diff --git a/src/egl/drivers/dri2/egl_dri2.h b/src/egl/drivers/dri2/egl_dri2.h
index 3854200..db93eec 100644
--- a/src/egl/drivers/dri2/egl_dri2.h
+++ b/src/egl/drivers/dri2/egl_dri2.h
@@ -86,6 +86,7 @@
 
 #ifdef HAVE_DRM_PLATFORM
    struct gbm_dri_device    *gbm_dri;
+   int                       own_gbm_device;
 #endif
 
    char                     *device_name;
@@ -122,8 +123,6 @@
    WL_BUFFER_THIRD,
    WL_BUFFER_COUNT
 };
-
-#define __DRI_BUFFER_COUNT 10
 #endif
 
 enum dri2_surface_type {
diff --git a/src/egl/drivers/dri2/platform_drm.c b/src/egl/drivers/dri2/platform_drm.c
index 579baf9..04b10e2 100644
--- a/src/egl/drivers/dri2/platform_drm.c
+++ b/src/egl/drivers/dri2/platform_drm.c
@@ -30,6 +30,10 @@
 #include <string.h>
 #include <xf86drm.h>
 #include <dlfcn.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
 
 #include "egl_dri2.h"
 
@@ -90,6 +94,7 @@
 {
    struct dri2_egl_display *dri2_dpy;
    struct gbm_device *gbm;
+   int fd = -1;
    int i;
 
    dri2_dpy = malloc(sizeof *dri2_dpy);
@@ -100,7 +105,15 @@
 
    disp->DriverData = (void *) dri2_dpy;
 
-   gbm = (struct gbm_device *) disp->PlatformDisplay;
+   gbm = disp->PlatformDisplay;
+   if (gbm == NULL) {
+      fd = open("/dev/dri/card0", O_RDWR);
+      dri2_dpy->own_gbm_device = 1;
+      gbm = gbm_create_device(fd);
+      if (gbm == NULL)
+         return EGL_FALSE;
+   }
+
    if (strcmp(gbm_device_get_backend_name(gbm), "drm") != 0) {
       free(dri2_dpy);
       return EGL_FALSE;
@@ -112,7 +125,15 @@
       return EGL_FALSE;
    }
 
-   dri2_dpy->fd = gbm_device_get_fd(gbm);
+   if (fd < 0) {
+      fd = dup(gbm_device_get_fd(gbm));
+      if (fd < 0) {
+         free(dri2_dpy);
+         return EGL_FALSE;
+      }
+   }
+
+   dri2_dpy->fd = fd;
    dri2_dpy->device_name = dri2_get_device_name_for_fd(dri2_dpy->fd);
    dri2_dpy->driver_name = dri2_dpy->gbm_dri->base.driver_name;
 
diff --git a/src/egl/drivers/glx/egl_glx.c b/src/egl/drivers/glx/egl_glx.c
index 7cf8f4d..042936f 100644
--- a/src/egl/drivers/glx/egl_glx.c
+++ b/src/egl/drivers/glx/egl_glx.c
@@ -713,6 +713,24 @@
    return &GLX_ctx->Base;
 }
 
+/**
+ * Called via eglDestroyContext(), drv->API.DestroyContext().
+ */
+static EGLBoolean
+GLX_eglDestroyContext(_EGLDriver *drv, _EGLDisplay *disp, _EGLContext *ctx)
+{
+   struct GLX_egl_driver *GLX_drv = GLX_egl_driver(drv);
+   struct GLX_egl_context *GLX_ctx = GLX_egl_context(ctx);
+
+   if (_eglPutContext(ctx)) {
+      assert(GLX_ctx);
+      GLX_drv->glXDestroyContext(disp, ctx);
+
+      free(GLX_ctx);
+   }
+
+   return EGL_TRUE;
+}
 
 /**
  * Destroy a surface.  The display is allowed to be uninitialized.
@@ -1142,6 +1160,7 @@
    GLX_drv->Base.API.Initialize = GLX_eglInitialize;
    GLX_drv->Base.API.Terminate = GLX_eglTerminate;
    GLX_drv->Base.API.CreateContext = GLX_eglCreateContext;
+   GLX_drv->Base.API.DestroyContext = GLX_eglDestroyContext;
    GLX_drv->Base.API.MakeCurrent = GLX_eglMakeCurrent;
    GLX_drv->Base.API.CreateWindowSurface = GLX_eglCreateWindowSurface;
    GLX_drv->Base.API.CreatePixmapSurface = GLX_eglCreatePixmapSurface;
diff --git a/src/egl/main/Android.mk b/src/egl/main/Android.mk
new file mode 100644
index 0000000..25a7c65
--- /dev/null
+++ b/src/egl/main/Android.mk
@@ -0,0 +1,64 @@
+# Mesa 3-D graphics library
+#
+# Copyright (C) 2010-2011 Chia-I Wu <olvaffe@gmail.com>
+# Copyright (C) 2010-2011 LunarG Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# Android.mk for core EGL
+
+LOCAL_PATH := $(call my-dir)
+
+# from Makefile
+SOURCES = \
+	eglapi.c \
+	eglarray.c \
+	eglconfig.c \
+	eglcontext.c \
+	eglcurrent.c \
+	egldisplay.c \
+	egldriver.c \
+	eglfallbacks.c \
+	eglglobals.c \
+	eglimage.c \
+	egllog.c \
+	eglmisc.c \
+	eglmode.c \
+	eglscreen.c \
+	eglstring.c \
+	eglsurface.c \
+	eglsync.c
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(SOURCES)
+
+LOCAL_CFLAGS := \
+	-D_EGL_NATIVE_PLATFORM=_EGL_PLATFORM_ANDROID \
+	-D_EGL_DRIVER_SEARCH_DIR=\"/system/lib/egl\" \
+	-D_EGL_OS_UNIX=1
+
+ifeq ($(strip $(MESA_BUILD_GALLIUM)),true)
+LOCAL_CFLAGS += -D_EGL_BUILT_IN_DRIVER_GALLIUM
+endif
+
+LOCAL_MODULE := libmesa_egl
+
+include $(MESA_COMMON_MK)
+include $(BUILD_STATIC_LIBRARY)
diff --git a/src/egl/main/Makefile b/src/egl/main/Makefile
index 775fbbe..3172ad2 100644
--- a/src/egl/main/Makefile
+++ b/src/egl/main/Makefile
@@ -63,6 +63,7 @@
 endif
 ifneq ($(findstring drm, $(EGL_PLATFORMS)),)
 EGL_LIB_DEPS += -lgbm
+INCLUDE_DIRS += -I$(TOP)/src/gbm/main
 endif
 EGL_LIB_DEPS += $(LIBUDEV_LIBS) $(DLOPEN_LIBS) $(LIBDRM_LIB) $(WAYLAND_LIBS)
 endif
@@ -70,6 +71,7 @@
 
 ifneq ($(findstring wayland, $(EGL_PLATFORMS)),)
 LOCAL_LIBS += $(TOP)/src/egl/wayland/wayland-drm/libwayland-drm.a
+INCLUDE_DIRS += $(WAYLAND_CFLAGS)
 endif
 
 ifeq ($(filter glx, $(EGL_DRIVERS_DIRS)),glx)
@@ -93,6 +95,19 @@
 EGL_NATIVE_PLATFORM=_EGL_PLATFORM_FBDEV
 endif
 
+ifneq ($(findstring x11, $(EGL_PLATFORMS)),)
+LOCAL_CFLAGS += -DHAVE_X11_PLATFORM
+endif
+ifneq ($(findstring wayland, $(EGL_PLATFORMS)),)
+LOCAL_CFLAGS += -DHAVE_WAYLAND_PLATFORM
+endif
+ifneq ($(findstring drm, $(EGL_PLATFORMS)),)
+LOCAL_CFLAGS += -DHAVE_DRM_PLATFORM
+endif
+ifneq ($(findstring fbdev, $(EGL_PLATFORMS)),)
+LOCAL_CFLAGS += -DHAVE_FBDEV_PLATFORM
+endif
+
 LOCAL_CFLAGS += \
 	-D_EGL_NATIVE_PLATFORM=$(EGL_NATIVE_PLATFORM) \
 	-D_EGL_DRIVER_SEARCH_DIR=\"$(EGL_DRIVER_INSTALL_DIR)\"
diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c
index 0ba7794..3cb1a5b 100644
--- a/src/egl/main/eglapi.c
+++ b/src/egl/main/eglapi.c
@@ -301,7 +301,7 @@
 EGLDisplay EGLAPIENTRY
 eglGetDisplay(EGLNativeDisplayType nativeDisplay)
 {
-   _EGLPlatformType plat = _eglGetNativePlatform();
+   _EGLPlatformType plat = _eglGetNativePlatform(nativeDisplay);
    _EGLDisplay *dpy = _eglFindDisplay(plat, (void *) nativeDisplay);
    return _eglGetDisplayHandle(dpy);
 }
@@ -538,7 +538,7 @@
    EGLSurface ret;
 
    _EGL_CHECK_CONFIG(disp, conf, EGL_NO_SURFACE, drv);
-   if (disp->Platform != _eglGetNativePlatform())
+   if (disp->Platform != _eglGetNativePlatform(disp->PlatformDisplay))
       RETURN_EGL_ERROR(disp, EGL_BAD_NATIVE_WINDOW, EGL_NO_SURFACE);
 
    surf = drv->API.CreateWindowSurface(drv, disp, conf, window, attrib_list);
@@ -559,7 +559,7 @@
    EGLSurface ret;
 
    _EGL_CHECK_CONFIG(disp, conf, EGL_NO_SURFACE, drv);
-   if (disp->Platform != _eglGetNativePlatform())
+   if (disp->Platform != _eglGetNativePlatform(disp->PlatformDisplay))
       RETURN_EGL_ERROR(disp, EGL_BAD_NATIVE_PIXMAP, EGL_NO_SURFACE);
 
    surf = drv->API.CreatePixmapSurface(drv, disp, conf, pixmap, attrib_list);
@@ -720,7 +720,7 @@
    EGLBoolean ret;
 
    _EGL_CHECK_SURFACE(disp, surf, EGL_FALSE, drv);
-   if (disp->Platform != _eglGetNativePlatform())
+   if (disp->Platform != _eglGetNativePlatform(disp->PlatformDisplay))
       RETURN_EGL_ERROR(disp, EGL_BAD_NATIVE_PIXMAP, EGL_FALSE);
    ret = drv->API.CopyBuffers(drv, disp, surf, target);
 
@@ -948,6 +948,9 @@
       { "eglBindWaylandDisplayWL", (_EGLProc) eglBindWaylandDisplayWL },
       { "eglUnbindWaylandDisplayWL", (_EGLProc) eglUnbindWaylandDisplayWL },
 #endif
+#ifdef EGL_ANDROID_swap_rectangle
+      { "eglSetSwapRectangleANDROID", (_EGLProc) eglSetSwapRectangleANDROID },
+#endif
       { NULL, NULL }
    };
    EGLint i;
@@ -1565,3 +1568,25 @@
    RETURN_EGL_EVAL(disp, ret);
 }
 #endif
+
+#ifdef EGL_ANDROID_swap_rectangle
+EGLBoolean EGLAPIENTRY
+eglSetSwapRectangleANDROID(EGLDisplay dpy, EGLSurface draw,
+                           EGLint left, EGLint top,
+                           EGLint width, EGLint height)
+{
+   _EGLDisplay *disp = _eglLockDisplay(dpy);
+   _EGLSurface *surf = _eglLookupSurface(draw, disp);
+   _EGLDriver *drv;
+   EGLBoolean ret;
+
+   _EGL_CHECK_SURFACE(disp, surf, EGL_FALSE, drv);
+
+   if (!disp->Extensions.ANDROID_swap_rectangle)
+      RETURN_EGL_EVAL(disp, EGL_FALSE);
+
+   ret = drv->API.SetSwapRectangleANDROID(drv, disp, surf, left, top, width, height);
+
+   RETURN_EGL_EVAL(disp, ret);
+}
+#endif
diff --git a/src/egl/main/eglapi.h b/src/egl/main/eglapi.h
index 4fcbe40..1e0aef6 100644
--- a/src/egl/main/eglapi.h
+++ b/src/egl/main/eglapi.h
@@ -131,6 +131,10 @@
 typedef EGLBoolean (*UnbindWaylandDisplayWL_t)(_EGLDriver *drv, _EGLDisplay *disp, struct wl_display *display);
 #endif
 
+#ifdef EGL_ANDROID_swap_rectangle
+typedef EGLBoolean (*SetSwapRectangleANDROID_t)(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *draw, EGLint left, EGLint top, EGLint width, EGLint height);
+#endif
+
 /**
  * The API dispatcher jumps through these functions
  */
@@ -210,6 +214,10 @@
    BindWaylandDisplayWL_t BindWaylandDisplayWL;
    UnbindWaylandDisplayWL_t UnbindWaylandDisplayWL;
 #endif
+
+#ifdef EGL_ANDROID_swap_rectangle
+   SetSwapRectangleANDROID_t SetSwapRectangleANDROID;
+#endif
 };
 
 #endif /* EGLAPI_INCLUDED */
diff --git a/src/egl/main/eglconfig.c b/src/egl/main/eglconfig.c
index 483d980..e1d53da 100644
--- a/src/egl/main/eglconfig.c
+++ b/src/egl/main/eglconfig.c
@@ -529,8 +529,9 @@
    if (!_eglValidateConfig(conf, EGL_TRUE))
       return EGL_FALSE;
 
-   /* the spec says that EGL_LEVEL cannot be EGL_DONT_CARE */
-   if (conf->Level == EGL_DONT_CARE)
+   /* EGL_LEVEL and EGL_MATCH_NATIVE_PIXMAP cannot be EGL_DONT_CARE */
+   if (conf->Level == EGL_DONT_CARE ||
+       conf->MatchNativePixmap == EGL_DONT_CARE)
       return EGL_FALSE;
 
    /* ignore other attributes when EGL_CONFIG_ID is given */
diff --git a/src/egl/main/egldisplay.c b/src/egl/main/egldisplay.c
index 60f3177..1d05e57 100644
--- a/src/egl/main/egldisplay.c
+++ b/src/egl/main/egldisplay.c
@@ -43,6 +43,39 @@
 #include "eglmutex.h"
 #include "egllog.h"
 
+/* Includes for _eglNativePlatformDetectNativeDisplay */
+#ifdef HAVE_MINCORE
+#include <unistd.h>
+#include <sys/mman.h>
+#endif
+#ifdef HAVE_WAYLAND_PLATFORM
+#include <wayland-client.h>
+#endif
+#ifdef HAVE_DRM_PLATFORM
+#include <gbm.h>
+#endif
+#ifdef HAVE_FBDEV_PLATFORM
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#endif
+
+
+/**
+ * Map --with-egl-platforms names to platform types.
+ */
+static const struct {
+   _EGLPlatformType platform;
+   const char *name;
+} egl_platforms[_EGL_NUM_PLATFORMS] = {
+   { _EGL_PLATFORM_WINDOWS, "gdi" },
+   { _EGL_PLATFORM_X11, "x11" },
+   { _EGL_PLATFORM_WAYLAND, "wayland" },
+   { _EGL_PLATFORM_DRM, "drm" },
+   { _EGL_PLATFORM_FBDEV, "fbdev" },
+   { _EGL_PLATFORM_ANDROID, "android" }
+};
+
 
 /**
  * Return the native platform by parsing EGL_PLATFORM.
@@ -50,17 +83,6 @@
 static _EGLPlatformType
 _eglGetNativePlatformFromEnv(void)
 {
-   /* map --with-egl-platforms names to platform types */
-   static const struct {
-      _EGLPlatformType platform;
-      const char *name;
-   } egl_platforms[_EGL_NUM_PLATFORMS] = {
-      { _EGL_PLATFORM_WINDOWS, "gdi" },
-      { _EGL_PLATFORM_X11, "x11" },
-      { _EGL_PLATFORM_WAYLAND, "wayland" },
-      { _EGL_PLATFORM_DRM, "drm" },
-      { _EGL_PLATFORM_FBDEV, "fbdev" }
-   };
    _EGLPlatformType plat = _EGL_INVALID_PLATFORM;
    const char *plat_name;
    EGLint i;
@@ -84,19 +106,105 @@
 
 
 /**
+ * Perform validity checks on a generic pointer.
+ */
+static EGLBoolean
+_eglPointerIsDereferencable(void *p)
+{
+#ifdef HAVE_MINCORE
+   uintptr_t addr = (uintptr_t) p;
+   unsigned char valid = 0;
+   const long page_size = getpagesize();
+
+   if (p == NULL)
+      return EGL_FALSE;
+
+   /* align addr to page_size */
+   addr &= ~(page_size - 1);
+
+   if (mincore((void *) addr, page_size, &valid) < 0) {
+      _eglLog(_EGL_DEBUG, "mincore failed: %m");
+      return EGL_FALSE;
+   }
+
+   return (valid & 0x01) == 0x01;
+#else
+   return p != NULL;
+#endif
+}
+
+
+/**
+ * Try detecting native platform with the help of native display characteristcs.
+ */
+static _EGLPlatformType
+_eglNativePlatformDetectNativeDisplay(EGLNativeDisplayType nativeDisplay)
+{
+#ifdef HAVE_FBDEV_PLATFORM
+   struct stat buf;
+#endif
+
+   if (nativeDisplay == EGL_DEFAULT_DISPLAY)
+      return _EGL_INVALID_PLATFORM;
+
+#ifdef HAVE_FBDEV_PLATFORM
+   /* fbdev is the only platform that can be a file descriptor. */
+   if (fstat((intptr_t) nativeDisplay, &buf) == 0 && S_ISCHR(buf.st_mode))
+      return _EGL_PLATFORM_FBDEV;
+#endif
+
+   if (_eglPointerIsDereferencable(nativeDisplay)) {
+      void *first_pointer = *(void **) nativeDisplay;
+
+#ifdef HAVE_WAYLAND_PLATFORM
+      /* wl_display is a wl_proxy, which is a wl_object.
+       * wl_object's first element points to the interfacetype. */
+      if (first_pointer == &wl_display_interface)
+         return _EGL_PLATFORM_WAYLAND;
+#endif
+
+#ifdef HAVE_DRM_PLATFORM
+      /* gbm has a pointer to its constructor as first element. */
+      if (first_pointer == gbm_create_device)
+         return _EGL_PLATFORM_DRM;
+#endif
+
+#ifdef HAVE_X11_PLATFORM
+      /* If not matched to any other platform, fallback to x11. */
+      return _EGL_PLATFORM_X11;
+#endif
+   }
+
+   return _EGL_INVALID_PLATFORM;
+}
+
+
+/**
  * Return the native platform.  It is the platform of the EGL native types.
  */
 _EGLPlatformType
-_eglGetNativePlatform(void)
+_eglGetNativePlatform(EGLNativeDisplayType nativeDisplay)
 {
    static _EGLPlatformType native_platform = _EGL_INVALID_PLATFORM;
+   char *detection_method = NULL;
 
    if (native_platform == _EGL_INVALID_PLATFORM) {
       native_platform = _eglGetNativePlatformFromEnv();
-      if (native_platform == _EGL_INVALID_PLATFORM)
-         native_platform = _EGL_NATIVE_PLATFORM;
+      detection_method = "environment overwrite";
+      if (native_platform == _EGL_INVALID_PLATFORM) {
+         native_platform = _eglNativePlatformDetectNativeDisplay(nativeDisplay);
+         detection_method = "autodetected";
+         if (native_platform == _EGL_INVALID_PLATFORM) {
+            native_platform = _EGL_NATIVE_PLATFORM;
+            detection_method = "build-time configuration";
+         }
+      }
    }
 
+   if (detection_method != NULL)
+      _eglLog(_EGL_DEBUG, "Native platform type: %s (%s)",
+              egl_platforms[native_platform].name, detection_method);
+
    return native_platform;
 }
 
diff --git a/src/egl/main/egldisplay.h b/src/egl/main/egldisplay.h
index 9cd4dbf..cddea80 100644
--- a/src/egl/main/egldisplay.h
+++ b/src/egl/main/egldisplay.h
@@ -44,6 +44,7 @@
    _EGL_PLATFORM_WAYLAND,
    _EGL_PLATFORM_DRM,
    _EGL_PLATFORM_FBDEV,
+   _EGL_PLATFORM_ANDROID,
 
    _EGL_NUM_PLATFORMS,
    _EGL_INVALID_PLATFORM = -1
@@ -107,6 +108,9 @@
 
    EGLBoolean NOK_swap_region;
    EGLBoolean NOK_texture_from_pixmap;
+
+   EGLBoolean ANDROID_image_native_buffer;
+   EGLBoolean ANDROID_swap_rectangle;
 };
 
 
@@ -150,7 +154,7 @@
 
 
 extern _EGLPlatformType
-_eglGetNativePlatform(void);
+_eglGetNativePlatform(EGLNativeDisplayType nativeDisplay);
 
 
 extern void
diff --git a/src/egl/main/eglmisc.c b/src/egl/main/eglmisc.c
index da189b6..ab48bc6 100644
--- a/src/egl/main/eglmisc.c
+++ b/src/egl/main/eglmisc.c
@@ -113,6 +113,9 @@
 
    _EGL_CHECK_EXTENSION(NOK_swap_region);
    _EGL_CHECK_EXTENSION(NOK_texture_from_pixmap);
+
+   _EGL_CHECK_EXTENSION(ANDROID_image_native_buffer);
+   _EGL_CHECK_EXTENSION(ANDROID_swap_rectangle);
 #undef _EGL_CHECK_EXTENSION
 }
 
diff --git a/src/egl/main/eglsurface.c b/src/egl/main/eglsurface.c
index c9cfb01..3564ecd 100644
--- a/src/egl/main/eglsurface.c
+++ b/src/egl/main/eglsurface.c
@@ -269,11 +269,13 @@
 {
    const char *func;
    EGLint renderBuffer = EGL_BACK_BUFFER;
+   EGLint swapBehavior = EGL_BUFFER_PRESERVED;
    EGLint err;
 
    switch (type) {
    case EGL_WINDOW_BIT:
       func = "eglCreateWindowSurface";
+      swapBehavior = EGL_BUFFER_DESTROYED;
       break;
    case EGL_PIXMAP_BIT:
       func = "eglCreatePixmapSurface";
@@ -315,7 +317,7 @@
 
    surf->MipmapLevel = 0;
    surf->MultisampleResolve = EGL_MULTISAMPLE_RESOLVE_DEFAULT;
-   surf->SwapBehavior = EGL_BUFFER_DESTROYED;
+   surf->SwapBehavior = swapBehavior;
 
    surf->HorizontalResolution = EGL_UNKNOWN;
    surf->VerticalResolution = EGL_UNKNOWN;
diff --git a/src/egl/wayland/wayland-drm/protocol/wayland-drm.xml b/src/egl/wayland/wayland-drm/protocol/wayland-drm.xml
index 0331f12..cde9430 100644
--- a/src/egl/wayland/wayland-drm/protocol/wayland-drm.xml
+++ b/src/egl/wayland/wayland-drm/protocol/wayland-drm.xml
@@ -1,5 +1,32 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <protocol name="drm">
+
+  <copyright>
+    Copyright © 2008-2011 Kristian Høgsberg
+    Copyright © 2010-2011 Intel Corporation
+
+    Permission to use, copy, modify, distribute, and sell this
+    software and its documentation for any purpose is hereby granted
+    without fee, provided that\n the above copyright notice appear in
+    all copies and that both that copyright notice and this permission
+    notice appear in supporting documentation, and that the name of
+    the copyright holders not be used in advertising or publicity
+    pertaining to distribution of the software without specific,
+    written prior permission.  The copyright holders make no
+    representations about the suitability of this software for any
+    purpose.  It is provided "as is" without express or implied
+    warranty.
+
+    THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+    SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+    FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+    SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+    WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+    AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+    ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+    THIS SOFTWARE.
+  </copyright>
+
   <!-- drm support. This object is created by the server and published
        using the display's global event. -->
   <interface name="wl_drm" version="1">
diff --git a/src/gallium/Android.common.mk b/src/gallium/Android.common.mk
new file mode 100644
index 0000000..782510f
--- /dev/null
+++ b/src/gallium/Android.common.mk
@@ -0,0 +1,32 @@
+# Mesa 3-D graphics library
+#
+# Copyright (C) 2010-2011 Chia-I Wu <olvaffe@gmail.com>
+# Copyright (C) 2010-2011 LunarG Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# src/gallium/Android.common.mk
+
+LOCAL_C_INCLUDES += \
+	$(GALLIUM_TOP)/include \
+	$(GALLIUM_TOP)/auxiliary \
+	$(GALLIUM_TOP)/winsys \
+	$(GALLIUM_TOP)/drivers
+
+include $(MESA_COMMON_MK)
diff --git a/src/gallium/Android.mk b/src/gallium/Android.mk
new file mode 100644
index 0000000..b49a61b
--- /dev/null
+++ b/src/gallium/Android.mk
@@ -0,0 +1,44 @@
+# Mesa 3-D graphics library
+#
+# Copyright (C) 2010-2011 Chia-I Wu <olvaffe@gmail.com>
+# Copyright (C) 2010-2011 LunarG Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# src/gallium/Android.mk
+
+GALLIUM_TOP := $(call my-dir)
+GALLIUM_COMMON_MK := $(GALLIUM_TOP)/Android.common.mk
+
+SUBDIRS := \
+	targets/egl-static \
+	state_trackers/egl \
+	auxiliary
+
+# swrast
+SUBDIRS += winsys/sw/android drivers/softpipe
+
+# r600g
+ifneq ($(filter r600g, $(MESA_GPU_DRIVERS)),)
+SUBDIRS += winsys/radeon/drm
+SUBDIRS += winsys/r600/drm drivers/r600
+endif
+
+mkfiles := $(patsubst %,$(GALLIUM_TOP)/%/Android.mk,$(SUBDIRS))
+include $(mkfiles)
diff --git a/src/gallium/auxiliary/Android.mk b/src/gallium/auxiliary/Android.mk
new file mode 100644
index 0000000..0c37dd3
--- /dev/null
+++ b/src/gallium/auxiliary/Android.mk
@@ -0,0 +1,55 @@
+# Mesa 3-D graphics library
+#
+# Copyright (C) 2010-2011 Chia-I Wu <olvaffe@gmail.com>
+# Copyright (C) 2010-2011 LunarG Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+LOCAL_PATH := $(call my-dir)
+
+# get C_SOURCES and GENERATED_SOURCES
+include $(LOCAL_PATH)/Makefile.sources
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(C_SOURCES)
+
+LOCAL_C_INCLUDES := $(GALLIUM_TOP)/auxiliary/util
+
+LOCAL_MODULE := libmesa_gallium
+
+# generate sources
+LOCAL_MODULE_CLASS := STATIC_LIBRARIES
+intermediates := $(call local-intermediates-dir)
+LOCAL_GENERATED_SOURCES := $(addprefix $(intermediates)/, $(GENERATED_SOURCES))
+
+$(LOCAL_GENERATED_SOURCES): PRIVATE_PYTHON := $(MESA_PYTHON2)
+$(LOCAL_GENERATED_SOURCES): PRIVATE_CUSTOM_TOOL = $(PRIVATE_PYTHON) $^ > $@
+
+$(intermediates)/indices/u_indices_gen.c \
+$(intermediates)/indices/u_unfilled_gen.c \
+$(intermediates)/util/u_format_srgb.c \
+$(intermediates)/util/u_half.c: $(intermediates)/%.c: $(LOCAL_PATH)/%.py
+	$(transform-generated-source)
+
+$(intermediates)/util/u_format_table.c: $(intermediates)/%.c: $(LOCAL_PATH)/%.py $(LOCAL_PATH)/util/u_format.csv
+	$(transform-generated-source)
+
+include $(GALLIUM_COMMON_MK)
+include $(BUILD_STATIC_LIBRARY)
diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile
index 7dae7bc..896c058 100644
--- a/src/gallium/auxiliary/Makefile
+++ b/src/gallium/auxiliary/Makefile
@@ -3,205 +3,10 @@
 
 LIBNAME = gallium
 
-C_SOURCES = \
-	cso_cache/cso_cache.c \
-	cso_cache/cso_context.c \
-	cso_cache/cso_hash.c \
-	draw/draw_context.c \
-	draw/draw_fs.c \
-	draw/draw_gs.c \
-	draw/draw_pipe.c \
-	draw/draw_pipe_aaline.c \
-	draw/draw_pipe_aapoint.c \
-	draw/draw_pipe_clip.c \
-	draw/draw_pipe_cull.c \
-	draw/draw_pipe_flatshade.c \
-	draw/draw_pipe_offset.c \
-	draw/draw_pipe_pstipple.c \
-	draw/draw_pipe_stipple.c \
-	draw/draw_pipe_twoside.c \
-	draw/draw_pipe_unfilled.c \
-	draw/draw_pipe_util.c \
-	draw/draw_pipe_validate.c \
-	draw/draw_pipe_vbuf.c \
-	draw/draw_pipe_wide_line.c \
-	draw/draw_pipe_wide_point.c \
-	draw/draw_pt.c \
-	draw/draw_pt_emit.c \
-	draw/draw_pt_fetch.c \
-	draw/draw_pt_fetch_emit.c \
-	draw/draw_pt_fetch_shade_emit.c \
-	draw/draw_pt_fetch_shade_pipeline.c \
-	draw/draw_pt_post_vs.c \
-	draw/draw_pt_so_emit.c \
-	draw/draw_pt_util.c \
-	draw/draw_pt_vsplit.c \
-	draw/draw_vertex.c \
-	draw/draw_vs.c \
-	draw/draw_vs_aos.c \
-	draw/draw_vs_aos_io.c \
-	draw/draw_vs_aos_machine.c \
-	draw/draw_vs_exec.c \
-	draw/draw_vs_ppc.c \
-	draw/draw_vs_sse.c \
-	draw/draw_vs_variant.c \
-	indices/u_indices_gen.c \
-	indices/u_unfilled_gen.c \
-	os/os_misc.c \
-	os/os_stream.c \
-	os/os_stream_log.c \
-	os/os_stream_null.c \
-	os/os_stream_stdc.c \
-	os/os_stream_str.c \
-	os/os_time.c \
-	pipebuffer/pb_buffer_fenced.c \
-	pipebuffer/pb_buffer_malloc.c \
-	pipebuffer/pb_bufmgr_alt.c \
-	pipebuffer/pb_bufmgr_cache.c \
-	pipebuffer/pb_bufmgr_debug.c \
-	pipebuffer/pb_bufmgr_mm.c \
-	pipebuffer/pb_bufmgr_ondemand.c \
-	pipebuffer/pb_bufmgr_pool.c \
-	pipebuffer/pb_bufmgr_slab.c \
-	pipebuffer/pb_validate.c \
-	rbug/rbug_connection.c \
-	rbug/rbug_context.c \
-	rbug/rbug_core.c \
-	rbug/rbug_demarshal.c \
-	rbug/rbug_texture.c \
-	rbug/rbug_shader.c \
-	rtasm/rtasm_cpu.c \
-	rtasm/rtasm_execmem.c \
-	rtasm/rtasm_ppc.c \
-	rtasm/rtasm_ppc_spe.c \
-	rtasm/rtasm_x86sse.c \
-	tgsi/tgsi_build.c \
-	tgsi/tgsi_dump.c \
-	tgsi/tgsi_exec.c \
-	tgsi/tgsi_info.c \
-	tgsi/tgsi_iterate.c \
-	tgsi/tgsi_parse.c \
-	tgsi/tgsi_ppc.c \
-	tgsi/tgsi_sanity.c \
-	tgsi/tgsi_scan.c \
-	tgsi/tgsi_sse2.c \
-	tgsi/tgsi_text.c \
-	tgsi/tgsi_transform.c \
-	tgsi/tgsi_ureg.c \
-	tgsi/tgsi_util.c \
-	translate/translate.c \
-	translate/translate_cache.c \
-	translate/translate_generic.c \
-	translate/translate_sse.c \
-	util/u_debug.c \
-	util/u_debug_describe.c \
-	util/u_debug_refcnt.c \
-	util/u_debug_stack.c \
-	util/u_debug_symbol.c \
-	util/u_dump_defines.c \
-	util/u_dump_state.c \
-	util/u_bitmask.c \
-	util/u_blit.c \
-	util/u_blitter.c \
-	util/u_cache.c \
-	util/u_caps.c \
-	util/u_cpu_detect.c \
-	util/u_dl.c \
-	util/u_draw.c \
-	util/u_draw_quad.c \
-	util/u_format.c \
-	util/u_format_other.c \
-	util/u_format_latc.c \
-	util/u_format_s3tc.c \
-	util/u_format_rgtc.c \
-	util/u_format_srgb.c \
-	util/u_format_table.c \
-	util/u_format_tests.c \
-	util/u_format_yuv.c \
-	util/u_format_zs.c \
-	util/u_framebuffer.c \
-	util/u_gen_mipmap.c \
-	util/u_half.c \
-	util/u_handle_table.c \
-	util/u_hash.c \
-	util/u_hash_table.c \
-	util/u_index_modify.c \
-	util/u_keymap.c \
-	util/u_linear.c \
-	util/u_linkage.c \
-	util/u_network.c \
-	util/u_math.c \
-	util/u_mm.c \
-	util/u_pstipple.c \
-	util/u_rect.c \
-	util/u_ringbuffer.c \
-	util/u_sampler.c \
-	util/u_simple_shaders.c \
-	util/u_slab.c \
-	util/u_snprintf.c \
-	util/u_staging.c \
-	util/u_surface.c \
-	util/u_surfaces.c \
-	util/u_texture.c \
-	util/u_tile.c \
-	util/u_transfer.c \
-	util/u_resource.c \
-	util/u_upload_mgr.c \
-	util/u_vbuf_mgr.c \
-	vl/vl_csc.c \
-	vl/vl_compositor.c \
-	vl/vl_decoder.c \
-	vl/vl_mpeg12_decoder.c \
-	vl/vl_mpeg12_bitstream.c \
-	vl/vl_zscan.c \
-        vl/vl_idct.c \
-	vl/vl_mc.c \
-        vl/vl_vertex_buffers.c \
-        vl/vl_video_buffer.c
+# get source lists
+include Makefile.sources
 
-GALLIVM_SOURCES = \
-        gallivm/lp_bld_arit.c \
-        gallivm/lp_bld_assert.c \
-        gallivm/lp_bld_bitarit.c \
-        gallivm/lp_bld_const.c \
-        gallivm/lp_bld_conv.c \
-        gallivm/lp_bld_flow.c \
-        gallivm/lp_bld_format_aos.c \
-        gallivm/lp_bld_format_soa.c \
-        gallivm/lp_bld_format_yuv.c \
-        gallivm/lp_bld_gather.c \
-        gallivm/lp_bld_init.c \
-        gallivm/lp_bld_intr.c \
-        gallivm/lp_bld_logic.c \
-        gallivm/lp_bld_pack.c \
-        gallivm/lp_bld_printf.c \
-        gallivm/lp_bld_quad.c \
-        gallivm/lp_bld_sample.c \
-        gallivm/lp_bld_sample_aos.c \
-        gallivm/lp_bld_sample_soa.c \
-        gallivm/lp_bld_struct.c \
-        gallivm/lp_bld_swizzle.c \
-        gallivm/lp_bld_tgsi_aos.c \
-        gallivm/lp_bld_tgsi_info.c \
-        gallivm/lp_bld_tgsi_soa.c \
-        gallivm/lp_bld_type.c \
-        draw/draw_llvm.c \
-        draw/draw_llvm_sample.c \
-        draw/draw_llvm_translate.c \
-        draw/draw_vs_llvm.c \
-        draw/draw_pt_fetch_shade_pipeline_llvm.c
-
-GALLIVM_CPP_SOURCES = \
-	gallivm/lp_bld_debug.cpp \
-	gallivm/lp_bld_misc.cpp
-
-GENERATED_SOURCES = \
-	indices/u_indices_gen.c \
-	indices/u_unfilled_gen.c \
-	util/u_format_srgb.c \
-	util/u_format_table.c \
-	util/u_half.c
-
+C_SOURCES += $(GENERATED_SOURCES)
 
 ifeq ($(MESA_LLVM),1)
 C_SOURCES += \
diff --git a/src/gallium/auxiliary/Makefile.sources b/src/gallium/auxiliary/Makefile.sources
new file mode 100644
index 0000000..766beb0
--- /dev/null
+++ b/src/gallium/auxiliary/Makefile.sources
@@ -0,0 +1,200 @@
+C_SOURCES := \
+	cso_cache/cso_cache.c \
+	cso_cache/cso_context.c \
+	cso_cache/cso_hash.c \
+	draw/draw_context.c \
+	draw/draw_fs.c \
+	draw/draw_gs.c \
+	draw/draw_pipe.c \
+	draw/draw_pipe_aaline.c \
+	draw/draw_pipe_aapoint.c \
+	draw/draw_pipe_clip.c \
+	draw/draw_pipe_cull.c \
+	draw/draw_pipe_flatshade.c \
+	draw/draw_pipe_offset.c \
+	draw/draw_pipe_pstipple.c \
+	draw/draw_pipe_stipple.c \
+	draw/draw_pipe_twoside.c \
+	draw/draw_pipe_unfilled.c \
+	draw/draw_pipe_util.c \
+	draw/draw_pipe_validate.c \
+	draw/draw_pipe_vbuf.c \
+	draw/draw_pipe_wide_line.c \
+	draw/draw_pipe_wide_point.c \
+	draw/draw_pt.c \
+	draw/draw_pt_emit.c \
+	draw/draw_pt_fetch.c \
+	draw/draw_pt_fetch_emit.c \
+	draw/draw_pt_fetch_shade_emit.c \
+	draw/draw_pt_fetch_shade_pipeline.c \
+	draw/draw_pt_post_vs.c \
+	draw/draw_pt_so_emit.c \
+	draw/draw_pt_util.c \
+	draw/draw_pt_vsplit.c \
+	draw/draw_vertex.c \
+	draw/draw_vs.c \
+	draw/draw_vs_aos.c \
+	draw/draw_vs_aos_io.c \
+	draw/draw_vs_aos_machine.c \
+	draw/draw_vs_exec.c \
+	draw/draw_vs_ppc.c \
+	draw/draw_vs_sse.c \
+	draw/draw_vs_variant.c \
+	os/os_misc.c \
+	os/os_stream.c \
+	os/os_stream_log.c \
+	os/os_stream_null.c \
+	os/os_stream_stdc.c \
+	os/os_stream_str.c \
+	os/os_time.c \
+	pipebuffer/pb_buffer_fenced.c \
+	pipebuffer/pb_buffer_malloc.c \
+	pipebuffer/pb_bufmgr_alt.c \
+	pipebuffer/pb_bufmgr_cache.c \
+	pipebuffer/pb_bufmgr_debug.c \
+	pipebuffer/pb_bufmgr_mm.c \
+	pipebuffer/pb_bufmgr_ondemand.c \
+	pipebuffer/pb_bufmgr_pool.c \
+	pipebuffer/pb_bufmgr_slab.c \
+	pipebuffer/pb_validate.c \
+	postprocess/pp_celshade.c \
+	postprocess/pp_colors.c \
+	postprocess/pp_init.c \
+	postprocess/pp_mlaa.c \
+	postprocess/pp_run.c \
+	postprocess/pp_program.c \
+	rbug/rbug_connection.c \
+	rbug/rbug_context.c \
+	rbug/rbug_core.c \
+	rbug/rbug_demarshal.c \
+	rbug/rbug_texture.c \
+	rbug/rbug_shader.c \
+	rtasm/rtasm_cpu.c \
+	rtasm/rtasm_execmem.c \
+	rtasm/rtasm_ppc.c \
+	rtasm/rtasm_ppc_spe.c \
+	rtasm/rtasm_x86sse.c \
+	tgsi/tgsi_build.c \
+	tgsi/tgsi_dump.c \
+	tgsi/tgsi_exec.c \
+	tgsi/tgsi_info.c \
+	tgsi/tgsi_iterate.c \
+	tgsi/tgsi_parse.c \
+	tgsi/tgsi_ppc.c \
+	tgsi/tgsi_sanity.c \
+	tgsi/tgsi_scan.c \
+	tgsi/tgsi_sse2.c \
+	tgsi/tgsi_text.c \
+	tgsi/tgsi_transform.c \
+	tgsi/tgsi_ureg.c \
+	tgsi/tgsi_util.c \
+	translate/translate.c \
+	translate/translate_cache.c \
+	translate/translate_generic.c \
+	translate/translate_sse.c \
+	util/u_debug.c \
+	util/u_debug_describe.c \
+	util/u_debug_memory.c \
+	util/u_debug_refcnt.c \
+	util/u_debug_stack.c \
+	util/u_debug_symbol.c \
+	util/u_dump_defines.c \
+	util/u_dump_state.c \
+	util/u_bitmask.c \
+	util/u_blit.c \
+	util/u_blitter.c \
+	util/u_cache.c \
+	util/u_caps.c \
+	util/u_cpu_detect.c \
+	util/u_dl.c \
+	util/u_draw.c \
+	util/u_draw_quad.c \
+	util/u_format.c \
+	util/u_format_other.c \
+	util/u_format_latc.c \
+	util/u_format_s3tc.c \
+	util/u_format_rgtc.c \
+	util/u_format_tests.c \
+	util/u_format_yuv.c \
+	util/u_format_zs.c \
+	util/u_framebuffer.c \
+	util/u_gen_mipmap.c \
+	util/u_handle_table.c \
+	util/u_hash.c \
+	util/u_hash_table.c \
+	util/u_index_modify.c \
+	util/u_keymap.c \
+	util/u_linear.c \
+	util/u_linkage.c \
+	util/u_network.c \
+	util/u_math.c \
+	util/u_mm.c \
+	util/u_pstipple.c \
+	util/u_rect.c \
+	util/u_ringbuffer.c \
+	util/u_sampler.c \
+	util/u_simple_shaders.c \
+	util/u_slab.c \
+	util/u_snprintf.c \
+	util/u_staging.c \
+	util/u_surface.c \
+	util/u_surfaces.c \
+	util/u_texture.c \
+	util/u_tile.c \
+	util/u_transfer.c \
+	util/u_resource.c \
+	util/u_upload_mgr.c \
+	util/u_vbuf_mgr.c \
+	vl/vl_csc.c \
+	vl/vl_compositor.c \
+	vl/vl_decoder.c \
+	vl/vl_mpeg12_decoder.c \
+	vl/vl_mpeg12_bitstream.c \
+	vl/vl_zscan.c \
+        vl/vl_idct.c \
+	vl/vl_mc.c \
+        vl/vl_vertex_buffers.c \
+        vl/vl_video_buffer.c
+
+GENERATED_SOURCES := \
+	indices/u_indices_gen.c \
+	indices/u_unfilled_gen.c \
+	util/u_format_srgb.c \
+	util/u_format_table.c \
+	util/u_half.c
+
+GALLIVM_SOURCES := \
+        gallivm/lp_bld_arit.c \
+        gallivm/lp_bld_assert.c \
+        gallivm/lp_bld_bitarit.c \
+        gallivm/lp_bld_const.c \
+        gallivm/lp_bld_conv.c \
+        gallivm/lp_bld_flow.c \
+        gallivm/lp_bld_format_aos.c \
+        gallivm/lp_bld_format_soa.c \
+        gallivm/lp_bld_format_yuv.c \
+        gallivm/lp_bld_gather.c \
+        gallivm/lp_bld_init.c \
+        gallivm/lp_bld_intr.c \
+        gallivm/lp_bld_logic.c \
+        gallivm/lp_bld_pack.c \
+        gallivm/lp_bld_printf.c \
+        gallivm/lp_bld_quad.c \
+        gallivm/lp_bld_sample.c \
+        gallivm/lp_bld_sample_aos.c \
+        gallivm/lp_bld_sample_soa.c \
+        gallivm/lp_bld_struct.c \
+        gallivm/lp_bld_swizzle.c \
+        gallivm/lp_bld_tgsi_aos.c \
+        gallivm/lp_bld_tgsi_info.c \
+        gallivm/lp_bld_tgsi_soa.c \
+        gallivm/lp_bld_type.c \
+        draw/draw_llvm.c \
+        draw/draw_llvm_sample.c \
+        draw/draw_llvm_translate.c \
+        draw/draw_vs_llvm.c \
+        draw/draw_pt_fetch_shade_pipeline_llvm.c
+
+GALLIVM_CPP_SOURCES := \
+	gallivm/lp_bld_debug.cpp \
+	gallivm/lp_bld_misc.cpp
diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript
index d18f55f..07c420e 100644
--- a/src/gallium/auxiliary/SConscript
+++ b/src/gallium/auxiliary/SConscript
@@ -47,201 +47,20 @@
     'util/u_format_pack.py', 
 ])
 
-source = [
-    'cso_cache/cso_cache.c',
-    'cso_cache/cso_context.c',
-    'cso_cache/cso_hash.c',
-    'draw/draw_context.c',
-    'draw/draw_fs.c',
-    'draw/draw_gs.c',
-    'draw/draw_pipe.c',
-    'draw/draw_pipe_aaline.c',
-    'draw/draw_pipe_aapoint.c',
-    'draw/draw_pipe_clip.c',
-    'draw/draw_pipe_cull.c',
-    'draw/draw_pipe_flatshade.c',
-    'draw/draw_pipe_offset.c',
-    'draw/draw_pipe_pstipple.c',
-    'draw/draw_pipe_stipple.c',
-    'draw/draw_pipe_twoside.c',
-    'draw/draw_pipe_unfilled.c',
-    'draw/draw_pipe_util.c',
-    'draw/draw_pipe_validate.c',
-    'draw/draw_pipe_vbuf.c',
-    'draw/draw_pipe_wide_line.c',
-    'draw/draw_pipe_wide_point.c',
-    'draw/draw_pt.c',
-    'draw/draw_pt_emit.c',
-    'draw/draw_pt_fetch.c',
-    'draw/draw_pt_fetch_emit.c',
-    'draw/draw_pt_fetch_shade_emit.c',
-    'draw/draw_pt_fetch_shade_pipeline.c',
-    'draw/draw_pt_post_vs.c',
-    'draw/draw_pt_so_emit.c',
-    'draw/draw_pt_util.c',
-    'draw/draw_pt_vsplit.c',
-    'draw/draw_vertex.c',
-    'draw/draw_vs.c',
-    'draw/draw_vs_aos.c',
-    'draw/draw_vs_aos_io.c',
-    'draw/draw_vs_aos_machine.c',
-    'draw/draw_vs_exec.c',
-    'draw/draw_vs_ppc.c',
-    'draw/draw_vs_sse.c',
-    'draw/draw_vs_variant.c',
-    #'indices/u_indices.c',
-    #'indices/u_unfilled_indices.c',
-    'indices/u_indices_gen.c',
-    'indices/u_unfilled_gen.c',
-    'os/os_misc.c',
-    'os/os_stream.c',
-    'os/os_stream_log.c',
-    'os/os_stream_null.c',
-    'os/os_stream_stdc.c',
-    'os/os_stream_str.c',
-    'os/os_time.c',
-    'pipebuffer/pb_buffer_fenced.c',
-    'pipebuffer/pb_buffer_malloc.c',
-    'pipebuffer/pb_bufmgr_alt.c',
-    'pipebuffer/pb_bufmgr_cache.c',
-    'pipebuffer/pb_bufmgr_debug.c',
-    'pipebuffer/pb_bufmgr_mm.c',
-    'pipebuffer/pb_bufmgr_ondemand.c',
-    'pipebuffer/pb_bufmgr_pool.c',
-    'pipebuffer/pb_bufmgr_slab.c',
-    'pipebuffer/pb_validate.c',
-    'rbug/rbug_connection.c',
-    'rbug/rbug_context.c',
-    'rbug/rbug_core.c',
-    'rbug/rbug_demarshal.c',
-    'rbug/rbug_shader.c',
-    'rbug/rbug_texture.c',
-    'rtasm/rtasm_cpu.c',
-    'rtasm/rtasm_execmem.c',
-    'rtasm/rtasm_ppc.c',
-    'rtasm/rtasm_ppc_spe.c',
-    'rtasm/rtasm_x86sse.c',
-    'tgsi/tgsi_build.c',
-    'tgsi/tgsi_dump.c',
-    'tgsi/tgsi_exec.c',
-    'tgsi/tgsi_info.c',
-    'tgsi/tgsi_iterate.c',
-    'tgsi/tgsi_parse.c',
-    'tgsi/tgsi_ppc.c',
-    'tgsi/tgsi_sanity.c',
-    'tgsi/tgsi_scan.c',
-    'tgsi/tgsi_sse2.c',
-    'tgsi/tgsi_text.c',
-    'tgsi/tgsi_transform.c',
-    'tgsi/tgsi_ureg.c',
-    'tgsi/tgsi_util.c',
-    'translate/translate.c',
-    'translate/translate_cache.c',
-    'translate/translate_generic.c',
-    'translate/translate_sse.c',
-    'util/u_bitmask.c',
-    'util/u_blit.c',
-    'util/u_blitter.c',
-    'util/u_cache.c',
-    'util/u_caps.c',
-    'util/u_cpu_detect.c',
-    'util/u_debug.c',
-    'util/u_debug_describe.c',
-    'util/u_debug_memory.c',
-    'util/u_debug_refcnt.c',
-    'util/u_debug_stack.c',
-    'util/u_debug_symbol.c',
-    'util/u_dump_defines.c',
-    'util/u_dump_state.c',
-    'util/u_dl.c',
-    'util/u_draw.c',
-    'util/u_draw_quad.c',
-    'util/u_format.c',
-    'util/u_format_other.c',
-    'util/u_format_latc.c',
-    'util/u_format_s3tc.c',
-    'util/u_format_rgtc.c',
-    'util/u_format_srgb.c',
-    'util/u_format_table.c',
-    'util/u_format_tests.c',
-    'util/u_format_yuv.c',
-    'util/u_format_zs.c',
-    'util/u_framebuffer.c',
-    'util/u_gen_mipmap.c',
-    'util/u_half.c',
-    'util/u_handle_table.c',
-    'util/u_hash.c',
-    'util/u_hash_table.c',
-    'util/u_index_modify.c',
-    'util/u_keymap.c',
-    'util/u_linear.c',
-    'util/u_linkage.c',
-    'util/u_network.c',
-    'util/u_math.c',
-    'util/u_mm.c',
-    'util/u_pstipple.c',
-    'util/u_rect.c',
-    'util/u_resource.c',
-    'util/u_ringbuffer.c',
-    'util/u_sampler.c',
-    'util/u_simple_shaders.c',
-    'util/u_slab.c',
-    'util/u_snprintf.c',
-    'util/u_staging.c',
-    'util/u_surface.c',
-    'util/u_surfaces.c',
-    'util/u_texture.c',
-    'util/u_tile.c',
-    'util/u_transfer.c',
-    'util/u_upload_mgr.c',
-    'util/u_vbuf_mgr.c',
-    'vl/vl_csc.c',
-    'vl/vl_compositor.c',
-    'vl/vl_decoder.c',
-    'vl/vl_mpeg12_decoder.c',
-    'vl/vl_mpeg12_bitstream.c',
-    'vl/vl_zscan.c',
-    'vl/vl_idct.c',
-    'vl/vl_mc.c',
-    'vl/vl_vertex_buffers.c',
-    'vl/vl_video_buffer.c',
-]
+source = env.ParseSourceList('Makefile.sources', [
+    'C_SOURCES',
+    'GENERATED_SOURCES'
+])
 
 if env['llvm']:
-    source += [
-        'gallivm/lp_bld_arit.c',
-        'gallivm/lp_bld_assert.c',
-        'gallivm/lp_bld_bitarit.c',
-        'gallivm/lp_bld_const.c',
-        'gallivm/lp_bld_conv.c',
-        'gallivm/lp_bld_debug.cpp',
-        'gallivm/lp_bld_flow.c',
-        'gallivm/lp_bld_format_aos.c',
-        'gallivm/lp_bld_format_soa.c',
-        'gallivm/lp_bld_format_yuv.c',
-        'gallivm/lp_bld_gather.c',
-        'gallivm/lp_bld_init.c',
-        'gallivm/lp_bld_intr.c',
-        'gallivm/lp_bld_logic.c',
-        'gallivm/lp_bld_misc.cpp',
-        'gallivm/lp_bld_pack.c',
-        'gallivm/lp_bld_printf.c',
-        'gallivm/lp_bld_quad.c',
-        'gallivm/lp_bld_sample.c',
-        'gallivm/lp_bld_sample_aos.c',
-        'gallivm/lp_bld_sample_soa.c',
-        'gallivm/lp_bld_struct.c',
-        'gallivm/lp_bld_swizzle.c',
-        'gallivm/lp_bld_tgsi_aos.c',
-        'gallivm/lp_bld_tgsi_info.c',
-        'gallivm/lp_bld_tgsi_soa.c',
-        'gallivm/lp_bld_type.c',
-        'draw/draw_llvm.c',
-        'draw/draw_llvm_sample.c',
-        'draw/draw_llvm_translate.c',
-        'draw/draw_pt_fetch_shade_pipeline_llvm.c',
-        'draw/draw_vs_llvm.c'
-    ]
+    source += env.ParseSourceList('Makefile.sources', [
+        'GALLIVM_SOURCES',
+        'GALLIVM_CPP_SOURCES'
+    ])
+
+    if env['toolchain'] == 'crossmingw':
+        # compile lp_bld_misc.cpp without -gstabs option
+        source = env.compile_without_gstabs(source, "gallivm/lp_bld_misc.cpp")
 
 gallium = env.ConvenienceLibrary(
     target = 'gallium',
diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c
index 8bb8744..996e295 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -96,7 +96,7 @@
  * Create LLVM type for struct draw_jit_texture
  */
 static LLVMTypeRef
-create_jit_texture_type(struct gallivm_state *gallivm)
+create_jit_texture_type(struct gallivm_state *gallivm, const char *struct_name)
 {
    LLVMTargetDataRef target = gallivm->target;
    LLVMTypeRef texture_type;
@@ -120,13 +120,21 @@
    elem_types[DRAW_JIT_TEXTURE_BORDER_COLOR] = 
       LLVMArrayType(LLVMFloatTypeInContext(gallivm->context), 4);
 
+#if HAVE_LLVM >= 0x0300
+   texture_type = LLVMStructCreateNamed(gallivm->context, struct_name);
+   LLVMStructSetBody(texture_type, elem_types,
+                     Elements(elem_types), 0);
+#else
    texture_type = LLVMStructTypeInContext(gallivm->context, elem_types,
                                           Elements(elem_types), 0);
 
+   LLVMAddTypeName(gallivm->module, struct_name, texture_type);
+
    /* Make sure the target's struct layout cache doesn't return
     * stale/invalid data.
     */
    LLVMInvalidateStructLayout(gallivm->target, texture_type);
+#endif
 
    LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, width,
                           target, texture_type,
@@ -176,7 +184,7 @@
  */
 static LLVMTypeRef
 create_jit_context_type(struct gallivm_state *gallivm,
-                        LLVMTypeRef texture_type)
+                        LLVMTypeRef texture_type, const char *struct_name)
 {
    LLVMTargetDataRef target = gallivm->target;
    LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
@@ -189,11 +197,17 @@
    elem_types[3] = LLVMPointerType(float_type, 0); /* viewport */
    elem_types[4] = LLVMArrayType(texture_type,
                                  PIPE_MAX_VERTEX_SAMPLERS); /* textures */
-
+#if HAVE_LLVM >= 0x0300
+   context_type = LLVMStructCreateNamed(gallivm->context, struct_name);
+   LLVMStructSetBody(context_type, elem_types,
+                     Elements(elem_types), 0);
+#else
    context_type = LLVMStructTypeInContext(gallivm->context, elem_types,
                                           Elements(elem_types), 0);
+   LLVMAddTypeName(gallivm->module, struct_name, context_type);
 
    LLVMInvalidateStructLayout(gallivm->target, context_type);
+#endif
 
    LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, vs_constants,
                           target, context_type, 0);
@@ -215,7 +229,7 @@
  * Create LLVM type for struct pipe_vertex_buffer
  */
 static LLVMTypeRef
-create_jit_vertex_buffer_type(struct gallivm_state *gallivm)
+create_jit_vertex_buffer_type(struct gallivm_state *gallivm, const char *struct_name)
 {
    LLVMTargetDataRef target = gallivm->target;
    LLVMTypeRef elem_types[3];
@@ -225,10 +239,17 @@
    elem_types[1] = LLVMInt32TypeInContext(gallivm->context);
    elem_types[2] = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0); /* vs_constants */
 
+#if HAVE_LLVM >= 0x0300
+   vb_type = LLVMStructCreateNamed(gallivm->context, struct_name);
+   LLVMStructSetBody(vb_type, elem_types,
+                     Elements(elem_types), 0);
+#else
    vb_type = LLVMStructTypeInContext(gallivm->context, elem_types,
                                      Elements(elem_types), 0);
+   LLVMAddTypeName(gallivm->module, struct_name, vb_type);
 
    LLVMInvalidateStructLayout(gallivm->target, vb_type);
+#endif
 
    LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, stride,
                           target, vb_type, 0);
@@ -258,10 +279,17 @@
    elem_types[1]  = LLVMArrayType(LLVMFloatTypeInContext(gallivm->context), 4);
    elem_types[2]  = LLVMArrayType(elem_types[1], data_elems);
 
+#if HAVE_LLVM >= 0x0300
+   vertex_header = LLVMStructCreateNamed(gallivm->context, struct_name);
+   LLVMStructSetBody(vertex_header, elem_types,
+                     Elements(elem_types), 0);
+#else
    vertex_header = LLVMStructTypeInContext(gallivm->context, elem_types,
                                            Elements(elem_types), 0);
+   LLVMAddTypeName(gallivm->module, struct_name, vertex_header);
 
    LLVMInvalidateStructLayout(gallivm->target, vertex_header);
+#endif
 
    /* these are bit-fields and we can't take address of them
       LP_CHECK_MEMBER_OFFSET(struct vertex_header, clipmask,
@@ -284,8 +312,6 @@
                           target, vertex_header,
                           DRAW_JIT_VERTEX_DATA);
 
-   LLVMAddTypeName(gallivm->module, struct_name, vertex_header);
-
    return vertex_header;
 }
 
@@ -299,19 +325,15 @@
    struct gallivm_state *gallivm = llvm->gallivm;
    LLVMTypeRef texture_type, context_type, buffer_type, vb_type;
 
-   texture_type = create_jit_texture_type(gallivm);
-   LLVMAddTypeName(gallivm->module, "texture", texture_type);
+   texture_type = create_jit_texture_type(gallivm, "texture");
 
-   context_type = create_jit_context_type(gallivm, texture_type);
-   LLVMAddTypeName(gallivm->module, "draw_jit_context", context_type);
+   context_type = create_jit_context_type(gallivm, texture_type, "draw_jit_context");
    llvm->context_ptr_type = LLVMPointerType(context_type, 0);
 
    buffer_type = LLVMPointerType(LLVMIntTypeInContext(gallivm->context, 8), 0);
-   LLVMAddTypeName(gallivm->module, "buffer", buffer_type);
    llvm->buffer_ptr_type = LLVMPointerType(buffer_type, 0);
 
-   vb_type = create_jit_vertex_buffer_type(gallivm);
-   LLVMAddTypeName(gallivm->module, "pipe_vertex_buffer", vb_type);
+   vb_type = create_jit_vertex_buffer_type(gallivm, "pipe_vertex_buffer");
    llvm->vb_ptr_type = LLVMPointerType(vb_type, 0);
 }
 
diff --git a/src/gallium/auxiliary/gallivm/f.cpp b/src/gallium/auxiliary/gallivm/f.cpp
index 5eb09c0..6b9c35b 100644
--- a/src/gallium/auxiliary/gallivm/f.cpp
+++ b/src/gallium/auxiliary/gallivm/f.cpp
@@ -15,8 +15,9 @@
  *
  * How to use this source:
  *
- * - Download and abuild the NTL library from
- *   http://shoup.net/ntl/download.html
+ * - Download and build the NTL library from
+ *   http://shoup.net/ntl/download.html , or install libntl-dev package if on
+ *   Debian.
  *
  * - Download boost source code matching to your distro. 
  *
@@ -24,22 +25,32 @@
  *
  * - Build as
  *
- *   g++ -o minimax -I /path/to/ntl/include main.cpp f.cpp /path/to/ntl/src/ntl.a -lboost_math_tr1
+ *   g++ -o minimax -I /path/to/ntl/include main.cpp f.cpp /path/to/ntl/src/ntl.a
  *
  * - Run as 
  *
  *    ./minimax
  *
- * - For example, to compute exp2 5th order polynomial between [0, 1] do:
+ * - For example, to compute log2 5th order polynomial between [1, 2] do:
+ *
+ *    variant 0
+ *    range 1 2
+ *    order 5 0
+ *    step 200
+ *    info
+ *
+ *  and take the coefficients from the P = { ... } array.
+ *
+ * - To compute exp2 5th order polynomial between [0, 1] do:
  *
  *    variant 1
  *    range 0 1
  *    order 5 0
- *    steps 200
+ *    step 200
  *    info
  *
  * - For more info see
- * http://www.boost.org/doc/libs/1_36_0/libs/math/doc/sf_and_dist/html/math_toolkit/toolkit/internals2/minimax.html
+ * http://www.boost.org/doc/libs/1_47_0/libs/math/doc/sf_and_dist/html/math_toolkit/toolkit/internals2/minimax.html
  */
 
 #define L22
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index 02b3bde..2be8598 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -61,7 +61,7 @@
 #include "lp_bld_arit.h"
 
 
-#define EXP_POLY_DEGREE 3
+#define EXP_POLY_DEGREE 5
 
 #define LOG_POLY_DEGREE 5
 
@@ -1645,7 +1645,7 @@
    assert(type.floating);
 
    if (util_cpu_caps.has_sse && type.width == 32 && type.length == 4) {
-      const unsigned num_iterations = 0;
+      const unsigned num_iterations = 1;
       LLVMValueRef res;
       unsigned i;
 
@@ -2151,7 +2151,7 @@
 
    assert(lp_check_value(bld->type, x));
 
-   return lp_build_mul(bld, log2e, lp_build_exp2(bld, x));
+   return lp_build_exp2(bld, lp_build_mul(bld, log2e, x));
 }
 
 
@@ -2168,7 +2168,7 @@
 
    assert(lp_check_value(bld->type, x));
 
-   return lp_build_mul(bld, log2, lp_build_exp2(bld, x));
+   return lp_build_mul(bld, log2, lp_build_log2(bld, x));
 }
 
 
@@ -2218,18 +2218,18 @@
  */
 const double lp_build_exp2_polynomial[] = {
 #if EXP_POLY_DEGREE == 5
-   0.999999999690134838155,
-   0.583974334321735217258,
-   0.164553105719676828492,
-   0.0292811063701710962255,
-   0.00354944426657875141846,
-   0.000296253726543423377365
+   0.999999925063526176901,
+   0.693153073200168932794,
+   0.240153617044375388211,
+   0.0558263180532956664775,
+   0.00898934009049466391101,
+   0.00187757667519147912699
 #elif EXP_POLY_DEGREE == 4
-   1.00000001502262084505,
-   0.563586057338685991394,
-   0.150436017652442413623,
-   0.0243220604213317927308,
-   0.0025359088446580436489
+   1.00000259337069434683,
+   0.693003834469974940458,
+   0.24144275689150793076,
+   0.0520114606103070150235,
+   0.0135341679161270268764
 #elif EXP_POLY_DEGREE == 3
    0.999925218562710312959,
    0.695833540494823811697,
@@ -2465,6 +2465,12 @@
 
       assert(type.floating && type.width == 32);
 
+      /* 
+       * We don't explicitly handle denormalized numbers. They will yield a
+       * result in the neighbourhood of -127, which appears to be adequate
+       * enough.
+       */
+
       i = LLVMBuildBitCast(builder, x, int_vec_type, "");
 
       /* exp = (float) exponent(x) */
diff --git a/src/gallium/auxiliary/os/os_mman.h b/src/gallium/auxiliary/os/os_mman.h
new file mode 100644
index 0000000..b48eb053
--- /dev/null
+++ b/src/gallium/auxiliary/os/os_mman.h
@@ -0,0 +1,87 @@
+/**************************************************************************
+ *
+ * Copyright 2011 LunarG, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * OS independent memory mapping (with large file support).
+ *
+ * @author Chia-I Wu <olvaffe@gmail.com>
+ */
+
+#ifndef _OS_MMAN_H_
+#define _OS_MMAN_H_
+
+
+#include "pipe/p_config.h"
+#include "pipe/p_compiler.h"
+
+#if defined(PIPE_OS_UNIX)
+#  ifndef _FILE_OFFSET_BITS
+#    error _FILE_OFFSET_BITS must be defined to 64
+#  endif
+#  include <sys/mman.h>
+#else
+#  error Unsupported OS
+#endif
+
+#if defined(PIPE_OS_ANDROID)
+#  include <errno.h> /* for EINVAL */
+#endif
+
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
+
+#if defined(PIPE_OS_ANDROID)
+
+extern void *__mmap2(void *, size_t, int, int, int, size_t);
+
+static INLINE void *os_mmap(void *addr, size_t length, int prot, int flags, int fd, loff_t offset)
+{
+   /* offset must be aligned to 4096 (not necessarily the page size) */
+   if (unlikely(offset & 4095)) {
+      errno = EINVAL;
+      return MAP_FAILED;
+   }
+
+   return __mmap2(addr, length, prot, flags, fd, (size_t) (offset >> 12));
+}
+
+#else
+/* assume large file support exists */
+#  define os_mmap(addr, length, prot, flags, fd, offset) mmap(addr, length, prot, flags, fd, offset)
+#endif
+
+#define os_munmap(addr, length) munmap(addr, length)
+
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif /* _OS_MMAN_H_ */
diff --git a/src/gallium/auxiliary/os/os_thread.h b/src/gallium/auxiliary/os/os_thread.h
index 8f1245b..d830129 100644
--- a/src/gallium/auxiliary/os/os_thread.h
+++ b/src/gallium/auxiliary/os/os_thread.h
@@ -314,7 +314,7 @@
  * pipe_barrier
  */
 
-#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_HAIKU)
+#if (defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_HAIKU)) && !defined(PIPE_OS_ANDROID)
 
 typedef pthread_barrier_t pipe_barrier;
 
diff --git a/src/gallium/auxiliary/postprocess/ADDING b/src/gallium/auxiliary/postprocess/ADDING
new file mode 100644
index 0000000..3735835
--- /dev/null
+++ b/src/gallium/auxiliary/postprocess/ADDING
@@ -0,0 +1,87 @@
+How to add a new post-processing filter
+=======================================
+
+The Gallium post-processing queue works by passing the current screen to a fragment shader.
+These shaders may be written in any supported language, but are added here in TGSI text
+assembly.
+
+You can translate GLSL/ARB fairly easily via llvmpipe (LP_DEBUG=tgsi). I don't know the
+status of the D3D state tracker, but if/when that works, I'd assume HLSL would be possible
+too.
+
+
+
+Steps
+=====
+
+1. Add it to PP
+2. Make it known to PP
+3. Make it known to driconf
+4. ????
+5. Profit
+
+
+
+
+1. Add it to PP
+---------------
+
+Once you have the shader(s) in TGSI asm, put them to static const char arrays in a header
+file (see pp_colors.h).
+
+Add the filter's prototypes (main and init functions) to postprocess.h. This is mostly a
+copy-paste job with only changing the name.
+
+Then create a file containing empty main and init functions, named as you specified above.
+See pp_colors.c for an example.
+
+
+
+2. Make it known to PP
+----------------------
+
+Add your filter to filters.h, in a correct place. Placement is important, AA should usually
+be the last effect in the queue for example.
+
+Name is the config option your filter will be enabled by, both in driconf and as an env var.
+
+Inner temp means an intermediate framebuffer you may use in your filter to store
+results between passes. If you have a single-pass filter, request 0 of those.
+
+Shaders is the number of shaders your filter needs. The minimum is 2.
+
+
+You could also write the init and main functions now. If your filter is single-pass without
+a vertex shader and any other input than the main screen, you can use pp_nocolor as your
+main function as is.
+
+
+
+3. Make it known to driconf
+---------------------------
+
+First time outside of auxiliary/postprocess. First, add a suitable description to
+drivers/dri/common/xmlpool/t_options.h, and regenerate options.h by running make in that
+directory. Use the name you put into filters.h as the config option name.
+
+With driconf aware of the option, make Gallium aware of it too. Add it to
+state_trackers/dri/common/dri_screen.c in a proper section, specifying its default value and
+the accepted range (if applicable).
+
+Do check that __driNConfigOptions is still correct after the addition.
+
+
+
+4. ????
+-------
+
+Testing, praying, hookers, blow, sacrificial lambs...
+
+
+
+5. Profit
+---------
+
+Assuming you got here, sharing is caring. Send your filter to mesa-dev.
+
+
diff --git a/src/gallium/auxiliary/postprocess/filters.h b/src/gallium/auxiliary/postprocess/filters.h
new file mode 100644
index 0000000..2454088
--- /dev/null
+++ b/src/gallium/auxiliary/postprocess/filters.h
@@ -0,0 +1,58 @@
+/**************************************************************************
+ *
+ * Copyright 2011 Lauri Kasanen
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef PP_EXTERNAL_FILTERS_H
+#define PP_EXTERNAL_FILTERS_H
+
+#include "postprocess/postprocess.h"
+
+typedef void (*pp_init_func) (struct pp_queue_t *, unsigned int,
+                              unsigned int);
+
+struct pp_filter_t
+{
+   const char *name;            /* Config name */
+   unsigned int inner_tmps;     /* Request how many inner temps */
+   unsigned int shaders;        /* Request how many shaders */
+   unsigned int verts;          /* How many are vertex shaders */
+   pp_init_func init;           /* Init function */
+   pp_func main;                /* Run function */
+};
+
+/*	Order matters. Put new filters in a suitable place. */
+
+static const struct pp_filter_t pp_filters[PP_FILTERS] = {
+/*    name			inner	shaders	verts	init			run */
+   { "pp_noblue",		0,	2,	1,	pp_noblue_init,		pp_nocolor },
+   { "pp_nogreen",		0,	2,	1,	pp_nogreen_init,	pp_nocolor },
+   { "pp_nored",		0,	2,	1,	pp_nored_init,		pp_nocolor },
+   { "pp_celshade",		0,	2,	1,	pp_celshade_init,	pp_nocolor },
+   { "pp_jimenezmlaa",		2,	5,	2,	pp_jimenezmlaa_init,	pp_jimenezmlaa },
+   { "pp_jimenezmlaa_color",	2,	5,	2,	pp_jimenezmlaa_init_color, pp_jimenezmlaa_color },
+};
+
+#endif
diff --git a/src/gallium/auxiliary/postprocess/postprocess.h b/src/gallium/auxiliary/postprocess/postprocess.h
new file mode 100644
index 0000000..ef94f79
--- /dev/null
+++ b/src/gallium/auxiliary/postprocess/postprocess.h
@@ -0,0 +1,100 @@
+/**************************************************************************
+ *
+ * Copyright 2011 Lauri Kasanen
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef POSTPROCESS_H
+#define POSTPROCESS_H
+
+#include "postprocess/pp_program.h"
+
+#define PP_FILTERS 6            /* Increment this if you add filters */
+#define PP_MAX_PASSES 6
+
+struct pp_queue_t;              /* Forward definition */
+
+/* Less typing later on */
+typedef void (*pp_func) (struct pp_queue_t *, struct pipe_resource *,
+                         struct pipe_resource *, unsigned int);
+/**
+*	The main post-processing queue.
+*/
+struct pp_queue_t
+{
+   pp_func *pp_queue;           /* An array of pp_funcs */
+   unsigned int n_filters;      /* Number of enabled filters */
+
+   struct pipe_resource *tmp[2];        /* Two temp FBOs for the queue */
+   struct pipe_resource *inner_tmp[3];  /* Three for filter use */
+
+   unsigned int n_tmp, n_inner_tmp;
+
+   struct pipe_resource *depth; /* depth of original input */
+   struct pipe_resource *stencil;       /* stencil shared by inner_tmps */
+
+   struct pipe_surface *tmps[2], *inner_tmps[3], *stencils;
+
+   void ***shaders;             /* Shaders in TGSI form */
+   unsigned int *verts;
+   struct program *p;
+
+   bool fbos_init;
+};
+
+/* Main functions */
+
+struct pp_queue_t *pp_init(struct pipe_screen *, const unsigned int *);
+void pp_run(struct pp_queue_t *, struct pipe_resource *,
+            struct pipe_resource *, struct pipe_resource *);
+void pp_free(struct pp_queue_t *);
+void pp_free_fbos(struct pp_queue_t *);
+void pp_debug(const char *, ...);
+struct program *pp_init_prog(struct pp_queue_t *, struct pipe_screen *);
+void pp_init_fbos(struct pp_queue_t *, unsigned int, unsigned int,
+                  struct pipe_resource *);
+
+/* The filters */
+
+void pp_nocolor(struct pp_queue_t *, struct pipe_resource *,
+                struct pipe_resource *, unsigned int);
+
+void pp_jimenezmlaa(struct pp_queue_t *, struct pipe_resource *,
+                    struct pipe_resource *, unsigned int);
+void pp_jimenezmlaa_color(struct pp_queue_t *, struct pipe_resource *,
+                          struct pipe_resource *, unsigned int);
+
+/* The filter init functions */
+
+void pp_celshade_init(struct pp_queue_t *, unsigned int, unsigned int);
+
+void pp_nored_init(struct pp_queue_t *, unsigned int, unsigned int);
+void pp_nogreen_init(struct pp_queue_t *, unsigned int, unsigned int);
+void pp_noblue_init(struct pp_queue_t *, unsigned int, unsigned int);
+
+void pp_jimenezmlaa_init(struct pp_queue_t *, unsigned int, unsigned int);
+void pp_jimenezmlaa_init_color(struct pp_queue_t *, unsigned int,
+                               unsigned int);
+
+#endif
diff --git a/src/gallium/auxiliary/postprocess/pp_celshade.c b/src/gallium/auxiliary/postprocess/pp_celshade.c
new file mode 100644
index 0000000..4454764
--- /dev/null
+++ b/src/gallium/auxiliary/postprocess/pp_celshade.c
@@ -0,0 +1,38 @@
+/**************************************************************************
+ *
+ * Copyright 2011 Lauri Kasanen
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "postprocess/postprocess.h"
+#include "postprocess/pp_celshade.h"
+#include "postprocess/pp_filters.h"
+
+/** Init function */
+void
+pp_celshade_init(struct pp_queue_t *ppq, unsigned int n, unsigned int val)
+{
+   ppq->shaders[n][1] =
+      pp_tgsi_to_state(ppq->p->pipe, celshade, false, "celshade");
+}
diff --git a/src/gallium/auxiliary/postprocess/pp_celshade.h b/src/gallium/auxiliary/postprocess/pp_celshade.h
new file mode 100644
index 0000000..536ac7f
--- /dev/null
+++ b/src/gallium/auxiliary/postprocess/pp_celshade.h
@@ -0,0 +1,79 @@
+/**************************************************************************
+ *
+ * Copyright 2011 Lauri Kasanen
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef CELSHADE_H
+#define CELSHADE_H
+
+static const char celshade[] = "FRAG\n"
+   "PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1\n"
+   "DCL IN[0], GENERIC[0], PERSPECTIVE\n"
+   "DCL OUT[0], COLOR\n"
+   "DCL SAMP[0]\n"
+   "DCL TEMP[0..4]\n"
+   "IMM FLT32 {    0.2126,     0.7152,     0.0722,     4.0000}\n"
+   "IMM FLT32 {    0.5000,     2.0000,     1.0000,    -0.1250}\n"
+   "IMM FLT32 {    0.2500,     0.1000,     0.1250,     3.0000}\n"
+   "  0: TEX TEMP[0], IN[0].xyyy, SAMP[0], 2D\n"
+   "  1: DP3 TEMP[1].x, TEMP[0].xyzz, IMM[0]\n"
+   "  2: MUL TEMP[3].x, TEMP[1].xxxx, IMM[0].wwww\n"
+   "  3: ROUND TEMP[2].x, TEMP[3].xxxx\n"
+   "  4: MUL TEMP[3].x, TEMP[2].xxxx, IMM[2].xxxx\n"
+   "  5: MOV TEMP[2].x, TEMP[3].xxxx\n"
+   "  6: ADD TEMP[4].x, TEMP[1].xxxx, -TEMP[3].xxxx\n"
+   "  7: SGT TEMP[1].w, TEMP[4].xxxx, IMM[2].yyyy\n"
+   "  8: IF TEMP[1].wwww :19\n"
+   "  9:   ADD TEMP[4].y, TEMP[3].xxxx, IMM[2].yyyy\n"
+   " 10:   ADD TEMP[1].z, TEMP[1].xxxx, -TEMP[4].yyyy\n"
+   " 11:   ADD TEMP[1].y, TEMP[3].xxxx, IMM[2].zzzz\n"
+   " 12:   ADD TEMP[2].x, TEMP[1].yyyy, -TEMP[4].yyyy\n"
+   " 13:   RCP TEMP[4].y, TEMP[2].xxxx\n"
+   " 14:   MUL TEMP[2].x, TEMP[1].zzzz, TEMP[4].yyyy\n"
+   " 15:   MAD TEMP[1].y, -IMM[1].yyyy, TEMP[2].xxxx, IMM[2].wwww\n"
+   " 16:   MUL TEMP[1].z, TEMP[2].xxxx, TEMP[1].yyyy\n"
+   " 17:   MUL TEMP[1].y, TEMP[2].xxxx, TEMP[1].zzzz\n"
+   " 18:   MAD TEMP[2].x, TEMP[1].yyyy, IMM[2].zzzz, TEMP[3].xxxx\n"
+   " 19: ENDIF\n"
+   " 20: SLT TEMP[3].x, TEMP[4].xxxx, -IMM[2].yyyy\n"
+   " 21: IF TEMP[3].xxxx :34\n"
+   " 22:   ADD TEMP[3].x, TEMP[2].xxxx, -IMM[2].zzzz\n"
+   " 23:   ADD TEMP[4].x, TEMP[1].xxxx, -TEMP[3].xxxx\n"
+   " 24:   ADD TEMP[1].x, TEMP[2].xxxx, -IMM[2].yyyy\n"
+   " 25:   ADD TEMP[4].y, TEMP[1].xxxx, -TEMP[3].xxxx\n"
+   " 26:   RCP TEMP[3].x, TEMP[4].yyyy\n"
+   " 27:   MUL TEMP[1].x, TEMP[4].xxxx, TEMP[3].xxxx\n"
+   " 28:   MAD TEMP[4].x, -IMM[1].yyyy, TEMP[1].xxxx, IMM[2].wwww\n"
+   " 29:   MUL TEMP[3].x, TEMP[1].xxxx, TEMP[4].xxxx\n"
+   " 30:   MUL TEMP[4].x, TEMP[1].xxxx, TEMP[3].xxxx\n"
+   " 31:   ADD TEMP[3].x, IMM[1].zzzz, -TEMP[4].xxxx\n"
+   " 32:   MAD TEMP[1].x, TEMP[3].xxxx, -IMM[2].zzzz, TEMP[2].xxxx\n"
+   " 33:   MOV TEMP[2].x, TEMP[1].xxxx\n"
+   " 34: ENDIF\n"
+   " 35: MAD TEMP[1].x, TEMP[2].xxxx, IMM[1].yyyy, IMM[2].yyyy\n"
+   " 36: MUL OUT[0], TEMP[0], TEMP[1].xxxx\n"
+   " 37: END\n";
+
+#endif
diff --git a/src/gallium/auxiliary/postprocess/pp_colors.c b/src/gallium/auxiliary/postprocess/pp_colors.c
new file mode 100644
index 0000000..36bb1f5
--- /dev/null
+++ b/src/gallium/auxiliary/postprocess/pp_colors.c
@@ -0,0 +1,80 @@
+/**************************************************************************
+ *
+ * Copyright 2011 Lauri Kasanen
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "postprocess/postprocess.h"
+#include "postprocess/pp_colors.h"
+#include "postprocess/pp_filters.h"
+
+/** The run function of the color filters */
+void
+pp_nocolor(struct pp_queue_t *ppq, struct pipe_resource *in,
+           struct pipe_resource *out, unsigned int n)
+{
+
+   struct program *p = ppq->p;
+
+   pp_filter_setup_in(p, in);
+   pp_filter_setup_out(p, out);
+
+   pp_filter_set_fb(p);
+   pp_filter_misc_state(p);
+
+   cso_single_sampler(p->cso, 0, &p->sampler_point);
+   cso_single_sampler_done(p->cso);
+   cso_set_fragment_sampler_views(p->cso, 1, &p->view);
+
+   cso_set_vertex_shader_handle(p->cso, ppq->shaders[n][0]);
+   cso_set_fragment_shader_handle(p->cso, ppq->shaders[n][1]);
+
+   pp_filter_draw(p);
+   pp_filter_end_pass(p);
+}
+
+
+/* Init functions */
+
+void
+pp_nored_init(struct pp_queue_t *ppq, unsigned int n, unsigned int val)
+{
+   ppq->shaders[n][1] = pp_tgsi_to_state(ppq->p->pipe, nored, false, "nored");
+}
+
+
+void
+pp_nogreen_init(struct pp_queue_t *ppq, unsigned int n, unsigned int val)
+{
+   ppq->shaders[n][1] =
+      pp_tgsi_to_state(ppq->p->pipe, nogreen, false, "nogreen");
+}
+
+
+void
+pp_noblue_init(struct pp_queue_t *ppq, unsigned int n, unsigned int val)
+{
+   ppq->shaders[n][1] =
+      pp_tgsi_to_state(ppq->p->pipe, noblue, false, "noblue");
+}
diff --git a/src/gallium/auxiliary/postprocess/pp_colors.h b/src/gallium/auxiliary/postprocess/pp_colors.h
new file mode 100644
index 0000000..588cd2f
--- /dev/null
+++ b/src/gallium/auxiliary/postprocess/pp_colors.h
@@ -0,0 +1,69 @@
+/**************************************************************************
+ *
+ * Copyright 2011 Lauri Kasanen
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef PP_COLORS_H
+#define PP_COLORS_H
+
+static const char nored[] = "FRAG\n"
+   "PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1\n"
+   "DCL IN[0], GENERIC[0], PERSPECTIVE\n"
+   "DCL OUT[0], COLOR\n"
+   "DCL SAMP[0]\n"
+   "DCL TEMP[0]\n"
+   "IMM FLT32 {    0.0000,     0.0000,     0.0000,     0.0000}\n"
+   "  0: TEX TEMP[0], IN[0].xyyy, SAMP[0], 2D\n"
+   "  1: MOV TEMP[0].x, IMM[0].xxxx\n"
+   "  2: MOV OUT[0], TEMP[0]\n"
+   "  3: END\n";
+
+
+static const char nogreen[] = "FRAG\n"
+   "PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1\n"
+   "DCL IN[0], GENERIC[0], PERSPECTIVE\n"
+   "DCL OUT[0], COLOR\n"
+   "DCL SAMP[0]\n"
+   "DCL TEMP[0]\n"
+   "IMM FLT32 {    0.0000,     0.0000,     0.0000,     0.0000}\n"
+   "  0: TEX TEMP[0], IN[0].xyyy, SAMP[0], 2D\n"
+   "  1: MOV TEMP[0].y, IMM[0].xxxx\n"
+   "  2: MOV OUT[0], TEMP[0]\n"
+   "  3: END\n";
+
+
+static const char noblue[] = "FRAG\n"
+   "PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1\n"
+   "DCL IN[0], GENERIC[0], PERSPECTIVE\n"
+   "DCL OUT[0], COLOR\n"
+   "DCL SAMP[0]\n"
+   "DCL TEMP[0]\n"
+   "IMM FLT32 {    0.0000,     0.0000,     0.0000,     0.0000}\n"
+   "  0: TEX TEMP[0], IN[0].xyyy, SAMP[0], 2D\n"
+   "  1: MOV TEMP[0].z, IMM[0].xxxx\n"
+   "  2: MOV OUT[0], TEMP[0]\n"
+   "  3: END\n";
+
+#endif
diff --git a/src/gallium/auxiliary/postprocess/pp_filters.h b/src/gallium/auxiliary/postprocess/pp_filters.h
new file mode 100644
index 0000000..0e34bb6
--- /dev/null
+++ b/src/gallium/auxiliary/postprocess/pp_filters.h
@@ -0,0 +1,57 @@
+/**************************************************************************
+ *
+ * Copyright 2011 Lauri Kasanen
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef PP_FILTERS_H
+#define PP_FILTERS_H
+
+/* Internal include, mainly for the filters */
+
+#include "cso_cache/cso_context.h"
+#include "pipe/p_context.h"
+#include "pipe/p_shader_tokens.h"
+#include "pipe/p_state.h"
+#include "tgsi/tgsi_text.h"
+#include "util/u_memory.h"
+#include "util/u_draw_quad.h"
+
+#define PP_MAX_TOKENS 2048
+
+
+/* Helper functions for the filters */
+
+void pp_filter_setup_in(struct program *, struct pipe_resource *);
+void pp_filter_setup_out(struct program *, struct pipe_resource *);
+void pp_filter_end_pass(struct program *);
+void *pp_tgsi_to_state(struct pipe_context *, const char *, bool,
+                       const char *);
+void pp_filter_misc_state(struct program *);
+void pp_filter_draw(struct program *);
+void pp_filter_set_fb(struct program *);
+void pp_filter_set_clear_fb(struct program *);
+
+
+#endif
diff --git a/src/gallium/auxiliary/postprocess/pp_init.c b/src/gallium/auxiliary/postprocess/pp_init.c
new file mode 100644
index 0000000..7541799
--- /dev/null
+++ b/src/gallium/auxiliary/postprocess/pp_init.c
@@ -0,0 +1,283 @@
+/**************************************************************************
+ *
+ * Copyright 2011 Lauri Kasanen
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+
+#include "postprocess/filters.h"
+
+#include "pipe/p_screen.h"
+#include "util/u_inlines.h"
+#include "util/u_blit.h"
+#include "util/u_math.h"
+#include "cso_cache/cso_context.h"
+
+/** Initialize the post-processing queue. */
+struct pp_queue_t *
+pp_init(struct pipe_screen *pscreen, const unsigned int *enabled)
+{
+
+   unsigned int curpos = 0, i, tmp_req = 0;
+   struct pp_queue_t *ppq;
+   pp_func *tmp_q;
+
+   pp_debug("Initializing the post-processing queue.\n");
+
+   /* How many filters were requested? */
+   for (i = 0; i < PP_FILTERS; i++) {
+      if (enabled[i])
+         curpos++;
+   }
+   if (!curpos)
+      return NULL;
+
+   ppq = calloc(1, sizeof(struct pp_queue_t));
+   tmp_q = calloc(curpos, sizeof(pp_func));
+   ppq->shaders = calloc(curpos, sizeof(void *));
+   ppq->verts = calloc(curpos, sizeof(unsigned int));
+
+   if (!tmp_q || !ppq || !ppq->shaders || !ppq->verts)
+      goto error;
+
+   ppq->p = pp_init_prog(ppq, pscreen);
+   if (!ppq->p)
+      goto error;
+
+   /* Add the enabled filters to the queue, in order */
+   curpos = 0;
+   ppq->pp_queue = tmp_q;
+   for (i = 0; i < PP_FILTERS; i++) {
+      if (enabled[i]) {
+         ppq->pp_queue[curpos] = pp_filters[i].main;
+         tmp_req = MAX2(tmp_req, pp_filters[i].inner_tmps);
+
+         if (pp_filters[i].shaders) {
+            ppq->shaders[curpos] =
+               calloc(pp_filters[i].shaders + 1, sizeof(void *));
+            ppq->verts[curpos] = pp_filters[i].verts;
+            if (!ppq->shaders[curpos])
+               goto error;
+         }
+         pp_filters[i].init(ppq, curpos, enabled[i]);
+
+         curpos++;
+      }
+   }
+
+   ppq->p->blitctx = util_create_blit(ppq->p->pipe, ppq->p->cso);
+   if (!ppq->p->blitctx)
+      goto error;
+
+   ppq->n_filters = curpos;
+   ppq->n_tmp = (curpos > 2 ? 2 : 1);
+   ppq->n_inner_tmp = tmp_req;
+
+   ppq->fbos_init = false;
+
+   for (i = 0; i < curpos; i++)
+      ppq->shaders[i][0] = ppq->p->passvs;
+
+   pp_debug("Queue successfully allocated. %u filter(s).\n", curpos);
+
+   return ppq;
+
+ error:
+   pp_debug("Error setting up pp\n");
+
+   if (ppq)
+      free(ppq->p);
+   free(ppq);
+   free(tmp_q);
+
+   return NULL;
+}
+
+/** Free any allocated FBOs (temp buffers). Called after resizing for example. */
+void
+pp_free_fbos(struct pp_queue_t *ppq)
+{
+
+   unsigned int i;
+
+   if (!ppq->fbos_init)
+      return;
+
+   for (i = 0; i < ppq->n_tmp; i++) {
+      pipe_surface_reference(&ppq->tmps[i], NULL);
+      pipe_resource_reference(&ppq->tmp[i], NULL);
+   }
+   for (i = 0; i < ppq->n_inner_tmp; i++) {
+      pipe_surface_reference(&ppq->inner_tmps[i], NULL);
+      pipe_resource_reference(&ppq->inner_tmp[i], NULL);
+   }
+   pipe_surface_reference(&ppq->stencils, NULL);
+   pipe_resource_reference(&ppq->stencil, NULL);
+
+   ppq->fbos_init = false;
+}
+
+/** Free the pp queue. Called on context termination. */
+void
+pp_free(struct pp_queue_t *ppq)
+{
+
+   unsigned int i, j;
+
+   pp_free_fbos(ppq);
+
+   util_destroy_blit(ppq->p->blitctx);
+
+   cso_set_fragment_sampler_views(ppq->p->cso, 0, NULL);
+   cso_release_all(ppq->p->cso);
+
+   for (i = 0; i < ppq->n_filters; i++) {
+      for (j = 0; j < PP_MAX_PASSES && ppq->shaders[i][j]; j++) {
+         if (j >= ppq->verts[i]) {
+            ppq->p->pipe->delete_fs_state(ppq->p->pipe, ppq->shaders[i][j]);
+            ppq->shaders[i][j] = NULL;
+         }
+         else if (ppq->shaders[i][j] != ppq->p->passvs) {
+            ppq->p->pipe->delete_vs_state(ppq->p->pipe, ppq->shaders[i][j]);
+            ppq->shaders[i][j] = NULL;
+         }
+      }
+   }
+
+   cso_destroy_context(ppq->p->cso);
+   ppq->p->pipe->destroy(ppq->p->pipe);
+
+   free(ppq->p);
+   free(ppq->pp_queue);
+   free(ppq);
+
+   pp_debug("Queue taken down.\n");
+}
+
+/** Internal debug function. Should be available to final users. */
+void
+pp_debug(const char *fmt, ...)
+{
+   va_list ap;
+
+   if (!getenv("PP_DEBUG"))
+      return;
+
+   va_start(ap, fmt);
+   vfprintf(stderr, fmt, ap);
+   va_end(ap);
+}
+
+/** Allocate the temp FBOs. Called on makecurrent and resize. */
+void
+pp_init_fbos(struct pp_queue_t *ppq, const unsigned int w,
+             const unsigned int h, struct pipe_resource *indepth)
+{
+
+   struct program *p = ppq->p;  /* The lazy will inherit the earth */
+
+   unsigned int i;
+   struct pipe_resource tmp_res;
+
+   if (ppq->fbos_init)
+      return;
+
+   pp_debug("Initializing FBOs, size %ux%u\n", w, h);
+   pp_debug("Requesting %u temps and %u inner temps\n", ppq->n_tmp,
+            ppq->n_inner_tmp);
+
+   memset(&tmp_res, 0, sizeof(tmp_res));
+   tmp_res.target = PIPE_TEXTURE_2D;
+   tmp_res.format = p->surf.format = PIPE_FORMAT_B8G8R8A8_UNORM;
+   tmp_res.width0 = w;
+   tmp_res.height0 = h;
+   tmp_res.depth0 = 1;
+   tmp_res.array_size = 1;
+   tmp_res.last_level = 0;
+   tmp_res.bind = p->surf.usage = PIPE_BIND_RENDER_TARGET;
+
+   if (!p->screen->is_format_supported(p->screen, tmp_res.format,
+                                       tmp_res.target, 1, tmp_res.bind))
+      pp_debug("Temp buffers' format fail\n");
+
+   for (i = 0; i < ppq->n_tmp; i++) {
+      ppq->tmp[i] = p->screen->resource_create(p->screen, &tmp_res);
+      ppq->tmps[i] = p->pipe->create_surface(p->pipe, ppq->tmp[i], &p->surf);
+
+      if (!ppq->tmp[i] || !ppq->tmps[i])
+         goto error;
+   }
+
+   for (i = 0; i < ppq->n_inner_tmp; i++) {
+      ppq->inner_tmp[i] = p->screen->resource_create(p->screen, &tmp_res);
+      ppq->inner_tmps[i] = p->pipe->create_surface(p->pipe,
+                                                   ppq->inner_tmp[i],
+                                                   &p->surf);
+
+      if (!ppq->inner_tmp[i] || !ppq->inner_tmps[i])
+         goto error;
+   }
+
+   tmp_res.format = p->surf.format = indepth->format;
+   tmp_res.bind = p->surf.usage = PIPE_BIND_DEPTH_STENCIL;
+   ppq->depth = indepth;
+   if (!ppq->depth)
+      goto error;
+
+   tmp_res.format = p->surf.format = PIPE_FORMAT_S8_USCALED_Z24_UNORM;
+
+   if (!p->screen->is_format_supported(p->screen, tmp_res.format,
+                                       tmp_res.target, 1, tmp_res.bind)) {
+
+      tmp_res.format = p->surf.format = PIPE_FORMAT_Z24_UNORM_S8_USCALED;
+
+      if (!p->screen->is_format_supported(p->screen, tmp_res.format,
+                                          tmp_res.target, 1, tmp_res.bind))
+         pp_debug("Temp Sbuffer format fail\n");
+   }
+
+   ppq->stencil = p->screen->resource_create(p->screen, &tmp_res);
+   ppq->stencils = p->pipe->create_surface(p->pipe, ppq->stencil, &p->surf);
+   if (!ppq->stencil || !ppq->stencils)
+      goto error;
+
+
+   p->framebuffer.width = w;
+   p->framebuffer.height = h;
+
+   p->viewport.scale[0] = p->viewport.translate[0] = (float) w / 2.0;
+   p->viewport.scale[1] = p->viewport.translate[1] = (float) h / 2.0;
+   p->viewport.scale[3] = 1.0f;
+   p->viewport.translate[3] = 0.0f;
+
+   ppq->fbos_init = true;
+
+   return;
+
+ error:
+   pp_debug("Failed to allocate temp buffers!\n");
+}
diff --git a/src/gallium/auxiliary/postprocess/pp_mlaa.c b/src/gallium/auxiliary/postprocess/pp_mlaa.c
new file mode 100644
index 0000000..476502f
--- /dev/null
+++ b/src/gallium/auxiliary/postprocess/pp_mlaa.c
@@ -0,0 +1,304 @@
+/**
+ * Copyright (C) 2010 Jorge Jimenez (jorge@iryoku.com)
+ * Copyright (C) 2010 Belen Masia (bmasia@unizar.es)
+ * Copyright (C) 2010 Jose I. Echevarria (joseignacioechevarria@gmail.com)
+ * Copyright (C) 2010 Fernando Navarro (fernandn@microsoft.com)
+ * Copyright (C) 2010 Diego Gutierrez (diegog@unizar.es)
+ * Copyright (C) 2011 Lauri Kasanen (cand@gmx.com)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *    1. Redistributions of source code must retain the above copyright notice,
+ *       this list of conditions and the following disclaimer.
+ *
+ *    2. Redistributions in binary form must reproduce the following statement:
+ *
+ *       "Uses Jimenez's MLAA. Copyright (C) 2010 by Jorge Jimenez, Belen Masia,
+ *        Jose I. Echevarria, Fernando Navarro and Diego Gutierrez."
+ *
+ *       Only for use in the Mesa project, this point 2 is filled by naming the
+ *       technique Jimenez's MLAA in the Mesa config options.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS
+ * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDERS OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * The views and conclusions contained in the software and documentation are
+ * those of the authors and should not be interpreted as representing official
+ * policies, either expressed or implied, of the copyright holders.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include "postprocess/postprocess.h"
+#include "postprocess/pp_mlaa.h"
+#include "postprocess/pp_filters.h"
+#include "util/u_blit.h"
+#include "util/u_box.h"
+#include "util/u_sampler.h"
+#include "util/u_inlines.h"
+#include "pipe/p_screen.h"
+
+#define IMM_SPACE 80
+
+static float constants[] = { 1, 1, 0, 0 };
+static unsigned int dimensions[2] = { 0, 0 };
+
+static struct pipe_resource *constbuf, *areamaptex;
+
+/** Upload the constants. */
+static void
+up_consts(struct pipe_context *pipe)
+{
+   struct pipe_box box;
+
+   u_box_2d(0, 0, sizeof(constants), 1, &box);
+   pipe->transfer_inline_write(pipe, constbuf, 0, PIPE_TRANSFER_WRITE,
+                               &box, constants, sizeof(constants),
+                               sizeof(constants));
+}
+
+/** Run function of the MLAA filter. */
+static void
+pp_jimenezmlaa_run(struct pp_queue_t *ppq, struct pipe_resource *in,
+                   struct pipe_resource *out, unsigned int n, bool iscolor)
+{
+
+   struct program *p = ppq->p;
+
+   struct pipe_depth_stencil_alpha_state mstencil;
+   struct pipe_sampler_view v_tmp, *arr[3];
+
+   unsigned int w = p->framebuffer.width;
+   unsigned int h = p->framebuffer.height;
+
+   const struct pipe_stencil_ref ref = { {1} };
+   memset(&mstencil, 0, sizeof(mstencil));
+   cso_set_stencil_ref(p->cso, &ref);
+
+   /* Init the pixel size constant */
+   if (dimensions[0] != p->framebuffer.width ||
+       dimensions[1] != p->framebuffer.height) {
+      constants[0] = 1.0 / p->framebuffer.width;
+      constants[1] = 1.0 / p->framebuffer.height;
+
+      up_consts(p->pipe);
+      dimensions[0] = p->framebuffer.width;
+      dimensions[1] = p->framebuffer.height;
+   }
+
+   p->pipe->set_constant_buffer(p->pipe, PIPE_SHADER_VERTEX, 0, constbuf);
+   p->pipe->set_constant_buffer(p->pipe, PIPE_SHADER_FRAGMENT, 0, constbuf);
+
+   mstencil.stencil[0].enabled = 1;
+   mstencil.stencil[0].valuemask = mstencil.stencil[0].writemask = ~0;
+   mstencil.stencil[0].func = PIPE_FUNC_ALWAYS;
+   mstencil.stencil[0].fail_op = PIPE_STENCIL_OP_KEEP;
+   mstencil.stencil[0].zfail_op = PIPE_STENCIL_OP_KEEP;
+   mstencil.stencil[0].zpass_op = PIPE_STENCIL_OP_REPLACE;
+
+   p->framebuffer.zsbuf = ppq->stencils;
+
+   /* First pass: depth edge detection */
+   if (iscolor)
+      pp_filter_setup_in(p, in);
+   else
+      pp_filter_setup_in(p, ppq->depth);
+
+   pp_filter_setup_out(p, ppq->inner_tmp[0]);
+
+   pp_filter_set_fb(p);
+   pp_filter_misc_state(p);
+   cso_set_depth_stencil_alpha(p->cso, &mstencil);
+   p->pipe->clear(p->pipe, PIPE_CLEAR_STENCIL | PIPE_CLEAR_COLOR,
+                  p->clear_color, 0, 0);
+
+   cso_single_sampler(p->cso, 0, &p->sampler_point);
+   cso_single_sampler_done(p->cso);
+   cso_set_fragment_sampler_views(p->cso, 1, &p->view);
+
+   cso_set_vertex_shader_handle(p->cso, ppq->shaders[n][1]);    /* offsetvs */
+   cso_set_fragment_shader_handle(p->cso, ppq->shaders[n][2]);
+
+   pp_filter_draw(p);
+   pp_filter_end_pass(p);
+
+
+   /* Second pass: blend weights */
+   /* Sampler order: areamap, edgesmap, edgesmapL (reversed, thx compiler) */
+   mstencil.stencil[0].func = PIPE_FUNC_EQUAL;
+   mstencil.stencil[0].zpass_op = PIPE_STENCIL_OP_KEEP;
+   cso_set_depth_stencil_alpha(p->cso, &mstencil);
+
+   pp_filter_setup_in(p, areamaptex);
+   pp_filter_setup_out(p, ppq->inner_tmp[1]);
+
+   u_sampler_view_default_template(&v_tmp, ppq->inner_tmp[0],
+                                   ppq->inner_tmp[0]->format);
+   arr[1] = arr[2] = p->pipe->create_sampler_view(p->pipe,
+                                                  ppq->inner_tmp[0], &v_tmp);
+
+   pp_filter_set_clear_fb(p);
+
+   cso_single_sampler(p->cso, 0, &p->sampler_point);
+   cso_single_sampler(p->cso, 1, &p->sampler_point);
+   cso_single_sampler(p->cso, 2, &p->sampler);
+   cso_single_sampler_done(p->cso);
+
+   arr[0] = p->view;
+   cso_set_fragment_sampler_views(p->cso, 3, arr);
+
+   cso_set_vertex_shader_handle(p->cso, ppq->shaders[n][0]);    /* passvs */
+   cso_set_fragment_shader_handle(p->cso, ppq->shaders[n][3]);
+
+   pp_filter_draw(p);
+   pp_filter_end_pass(p);
+   pipe_sampler_view_reference(&arr[1], NULL);
+
+
+   /* Third pass: smoothed edges */
+   /* Sampler order: colormap, blendmap (wtf compiler) */
+   pp_filter_setup_in(p, ppq->inner_tmp[1]);
+   pp_filter_setup_out(p, out);
+
+   pp_filter_set_fb(p);
+
+   /* Blit the input to the output */
+   util_blit_pixels(p->blitctx, in, 0, 0, 0,
+                    w, h, 0, p->framebuffer.cbufs[0],
+                    0, 0, w, h, 0, PIPE_TEX_MIPFILTER_NEAREST);
+
+   u_sampler_view_default_template(&v_tmp, in, in->format);
+   arr[0] = p->pipe->create_sampler_view(p->pipe, in, &v_tmp);
+
+   cso_single_sampler(p->cso, 0, &p->sampler_point);
+   cso_single_sampler(p->cso, 1, &p->sampler_point);
+   cso_single_sampler_done(p->cso);
+
+   arr[1] = p->view;
+   cso_set_fragment_sampler_views(p->cso, 2, arr);
+
+   cso_set_vertex_shader_handle(p->cso, ppq->shaders[n][1]);    /* offsetvs */
+   cso_set_fragment_shader_handle(p->cso, ppq->shaders[n][4]);
+
+   p->blend.rt[0].blend_enable = 1;
+   cso_set_blend(p->cso, &p->blend);
+
+   pp_filter_draw(p);
+   pp_filter_end_pass(p);
+   pipe_sampler_view_reference(&arr[0], NULL);
+
+   p->blend.rt[0].blend_enable = 0;
+   p->framebuffer.zsbuf = NULL;
+}
+
+/** The init function of the MLAA filter. */
+static void
+pp_jimenezmlaa_init_run(struct pp_queue_t *ppq, unsigned int n,
+                        unsigned int val, bool iscolor)
+{
+
+   struct pipe_box box;
+   struct pipe_resource res;
+
+   char *tmp_text = calloc(sizeof(blend2fs_1) + sizeof(blend2fs_2) +
+                           IMM_SPACE, sizeof(char));
+
+   constbuf = pipe_buffer_create(ppq->p->screen, PIPE_BIND_CONSTANT_BUFFER,
+                                 PIPE_USAGE_STATIC, sizeof(constants));
+   if (!constbuf) {
+      pp_debug("Failed to allocate constant buffer\n");
+      return;
+   }
+
+
+   pp_debug("mlaa: using %u max search steps\n", val);
+
+   if (!tmp_text) {
+      pp_debug("Failed to allocate shader space\n");
+      return;
+   }
+   sprintf(tmp_text, "%s"
+           "IMM FLT32 {    %.8f,     0.0000,     0.0000,     0.0000}\n"
+           "%s\n", blend2fs_1, (float) val, blend2fs_2);
+
+   memset(&res, 0, sizeof(res));
+
+   res.target = PIPE_TEXTURE_2D;
+   res.format = PIPE_FORMAT_R8G8_UNORM;
+   res.width0 = res.height0 = 165;
+   res.bind = PIPE_BIND_SAMPLER_VIEW;
+   res.usage = PIPE_USAGE_STATIC;
+   res.depth0 = res.array_size = res.nr_samples = 1;
+
+   if (!ppq->p->screen->is_format_supported(ppq->p->screen, res.format,
+                                            res.target, 1, res.bind))
+      pp_debug("Areamap format not supported\n");
+
+   areamaptex = ppq->p->screen->resource_create(ppq->p->screen, &res);
+   u_box_2d(0, 0, 165, 165, &box);
+
+   ppq->p->pipe->transfer_inline_write(ppq->p->pipe, areamaptex, 0,
+                                       PIPE_TRANSFER_WRITE, &box,
+                                       areamap, 165 * 2, sizeof(areamap));
+
+
+
+   ppq->shaders[n][1] = pp_tgsi_to_state(ppq->p->pipe, offsetvs, true,
+                                         "offsetvs");
+   if (iscolor)
+      ppq->shaders[n][2] = pp_tgsi_to_state(ppq->p->pipe, color1fs,
+                                            false, "color1fs");
+   else
+      ppq->shaders[n][2] = pp_tgsi_to_state(ppq->p->pipe, depth1fs,
+                                            false, "depth1fs");
+   ppq->shaders[n][3] = pp_tgsi_to_state(ppq->p->pipe, tmp_text, false,
+                                         "blend2fs");
+   ppq->shaders[n][4] = pp_tgsi_to_state(ppq->p->pipe, neigh3fs, false,
+                                         "neigh3fs");
+
+   free(tmp_text);
+}
+
+/** Short wrapper to init the depth version. */
+void
+pp_jimenezmlaa_init(struct pp_queue_t *ppq, unsigned int n, unsigned int val)
+{
+
+   pp_jimenezmlaa_init_run(ppq, n, val, false);
+}
+
+/** Short wrapper to init the color version. */
+void
+pp_jimenezmlaa_init_color(struct pp_queue_t *ppq, unsigned int n,
+                          unsigned int val)
+{
+
+   pp_jimenezmlaa_init_run(ppq, n, val, true);
+}
+
+/** Short wrapper to run the depth version. */
+void
+pp_jimenezmlaa(struct pp_queue_t *ppq, struct pipe_resource *in,
+               struct pipe_resource *out, unsigned int n)
+{
+   pp_jimenezmlaa_run(ppq, in, out, n, false);
+}
+
+/** Short wrapper to run the color version. */
+void
+pp_jimenezmlaa_color(struct pp_queue_t *ppq, struct pipe_resource *in,
+                     struct pipe_resource *out, unsigned int n)
+{
+   pp_jimenezmlaa_run(ppq, in, out, n, true);
+}
diff --git a/src/gallium/auxiliary/postprocess/pp_mlaa.h b/src/gallium/auxiliary/postprocess/pp_mlaa.h
new file mode 100644
index 0000000..9972d59
--- /dev/null
+++ b/src/gallium/auxiliary/postprocess/pp_mlaa.h
@@ -0,0 +1,342 @@
+/**
+ * Copyright (C) 2010 Jorge Jimenez (jorge@iryoku.com)
+ * Copyright (C) 2010 Belen Masia (bmasia@unizar.es)
+ * Copyright (C) 2010 Jose I. Echevarria (joseignacioechevarria@gmail.com)
+ * Copyright (C) 2010 Fernando Navarro (fernandn@microsoft.com)
+ * Copyright (C) 2010 Diego Gutierrez (diegog@unizar.es)
+ * Copyright (C) 2011 Lauri Kasanen (cand@gmx.com)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *    1. Redistributions of source code must retain the above copyright notice,
+ *       this list of conditions and the following disclaimer.
+ *
+ *    2. Redistributions in binary form must reproduce the following statement:
+ *
+ *       "Uses Jimenez's MLAA. Copyright (C) 2010 by Jorge Jimenez, Belen Masia,
+ *        Jose I. Echevarria, Fernando Navarro and Diego Gutierrez."
+ *
+ *       Only for use in the Mesa project, this point 2 is filled by naming the
+ *       technique Jimenez's MLAA in the Mesa config options.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS
+ * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDERS OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * The views and conclusions contained in the software and documentation are
+ * those of the authors and should not be interpreted as representing official
+ * policies, either expressed or implied, of the copyright holders.
+ */
+
+#ifndef PP_MLAA_H
+#define PP_MLAA_H
+
+#include "postprocess/pp_mlaa_areamap.h"
+
+static const char depth1fs[] = "FRAG\n"
+   "PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1\n"
+   "DCL IN[0], GENERIC[0], PERSPECTIVE\n"
+   "DCL IN[1], GENERIC[10], PERSPECTIVE\n"
+   "DCL IN[2], GENERIC[11], PERSPECTIVE\n"
+   "DCL OUT[0], COLOR\n"
+   "DCL SAMP[0]\n"
+   "DCL TEMP[0..2]\n"
+   "IMM FLT32 {    0.0030,     0.0000,     1.0000,     0.0000}\n"
+   "  0: TEX TEMP[0].x, IN[1].xyyy, SAMP[0], 2D\n"
+   "  1: MOV TEMP[1].x, TEMP[0].xxxx\n"
+   "  2: TEX TEMP[0].x, IN[1].zwww, SAMP[0], 2D\n"
+   "  3: MOV TEMP[1].y, TEMP[0].xxxx\n"
+   "  4: TEX TEMP[0].x, IN[2].xyyy, SAMP[0], 2D\n"
+   "  5: MOV TEMP[1].z, TEMP[0].xxxx\n"
+   "  6: TEX TEMP[0].x, IN[2].zwww, SAMP[0], 2D\n"
+   "  7: MOV TEMP[1].w, TEMP[0].xxxx\n"
+   "  8: TEX TEMP[0].x, IN[0].xyyy, SAMP[0], 2D\n"
+   "  9: ADD TEMP[2], TEMP[0].xxxx, -TEMP[1]\n"
+   " 10: ABS TEMP[0], TEMP[2]\n"
+   " 11: SGE TEMP[2], TEMP[0], IMM[0].xxxx\n"
+   " 12: DP4 TEMP[0].x, TEMP[2], IMM[0].zzzz\n"
+   " 13: SEQ TEMP[1].x, TEMP[0].xxxx, IMM[0].yyyy\n"
+   " 14: IF TEMP[1].xxxx :16\n"
+   " 15:   KILP\n"
+   " 16: ENDIF\n"
+   " 17: MOV OUT[0], TEMP[2]\n"
+   " 18: END\n";
+
+
+static const char color1fs[] = "FRAG\n"
+   "PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1\n"
+   "DCL IN[0], GENERIC[0], PERSPECTIVE\n"
+   "DCL IN[1], GENERIC[10], PERSPECTIVE\n"
+   "DCL IN[2], GENERIC[11], PERSPECTIVE\n"
+   "DCL OUT[0], COLOR\n"
+   "DCL SAMP[0]\n"
+   "DCL TEMP[0..2]\n"
+   "IMM FLT32 {    0.2126,     0.7152,     0.0722,     0.1000}\n"
+   "IMM FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}\n"
+   "  0: TEX TEMP[1].xyz, IN[1].xyyy, SAMP[0], 2D\n"
+   "  1: DP3 TEMP[0].x, TEMP[1].xyzz, IMM[0]\n"
+   "  2: TEX TEMP[1].xyz, IN[1].zwww, SAMP[0], 2D\n"
+   "  3: DP3 TEMP[0].y, TEMP[1].xyzz, IMM[0].xyzz\n"
+   "  4: TEX TEMP[1].xyz, IN[2].xyyy, SAMP[0], 2D\n"
+   "  5: DP3 TEMP[0].z, TEMP[1].xyzz, IMM[0].xyzz\n"
+   "  6: TEX TEMP[1].xyz, IN[2].zwww, SAMP[0], 2D\n"
+   "  7: DP3 TEMP[0].w, TEMP[1].xyzz, IMM[0].xyzz\n"
+   "  8: TEX TEMP[1].xyz, IN[0].xyyy, SAMP[0], 2D\n"
+   "  9: DP3 TEMP[2].x, TEMP[1].xyzz, IMM[0].xyzz\n"
+   " 10: ADD TEMP[1], TEMP[2].xxxx, -TEMP[0]\n"
+   " 11: ABS TEMP[0], TEMP[1]\n"
+   " 12: SGE TEMP[2], TEMP[0], IMM[0].wwww\n"
+   " 13: DP4 TEMP[0].x, TEMP[2], IMM[1].xxxx\n"
+   " 14: SEQ TEMP[1].x, TEMP[0].xxxx, IMM[1].yyyy\n"
+   " 15: IF TEMP[1].xxxx :17\n"
+   " 16:   KILP\n"
+   " 17: ENDIF\n"
+   " 18: MOV OUT[0], TEMP[2]\n"
+   " 19: END\n";
+
+
+static const char neigh3fs[] = "FRAG\n"
+   "PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1\n"
+   "DCL IN[0], GENERIC[0], PERSPECTIVE\n"
+   "DCL IN[1], GENERIC[10], PERSPECTIVE\n"
+   "DCL IN[2], GENERIC[11], PERSPECTIVE\n"
+   "DCL OUT[0], COLOR\n"
+   "DCL SAMP[0]\n"
+   "DCL SAMP[1]\n"
+   "DCL TEMP[0..8]\n"
+   "IMM FLT32 {    1.0000,     0.00001,     0.0000,     0.0000}\n"
+   "  0: TEX TEMP[0], IN[0].xyyy, SAMP[1], 2D\n"
+   "  1: MOV TEMP[1].x, TEMP[0].xxxx\n"
+   "  2: TEX TEMP[2].y, IN[2].zwww, SAMP[1], 2D\n"
+   "  3: MOV TEMP[1].y, TEMP[2].yyyy\n"
+   "  4: MOV TEMP[1].z, TEMP[0].zzzz\n"
+   "  5: TEX TEMP[1].w, IN[2].xyyy, SAMP[1], 2D\n"
+   "  6: MUL TEMP[4], TEMP[1], TEMP[1]\n"
+   "  7: MUL TEMP[5], TEMP[4], TEMP[1]\n"
+   "  8: DP4 TEMP[1].x, TEMP[5], IMM[0].xxxx\n"
+   "  9: SLT TEMP[4].x, TEMP[1].xxxx, IMM[0].yyyy\n"
+   " 10: IF TEMP[4].xxxx :12\n"
+   " 11:   KILP\n"
+   " 12: ENDIF\n"
+   " 13: TEX TEMP[4], IN[0].xyyy, SAMP[0], 2D\n"
+   " 14: TEX TEMP[6], IN[1].zwww, SAMP[0], 2D\n"
+   " 15: ADD TEMP[7].x, IMM[0].xxxx, -TEMP[0].xxxx\n"
+   " 16: MUL TEMP[8], TEMP[4], TEMP[7].xxxx\n"
+   " 17: MAD TEMP[7], TEMP[6], TEMP[0].xxxx, TEMP[8]\n"
+   " 18: MUL TEMP[6], TEMP[7], TEMP[5].xxxx\n"
+   " 19: TEX TEMP[7], IN[2].zwww, SAMP[0], 2D\n"
+   " 20: ADD TEMP[8].x, IMM[0].xxxx, -TEMP[2].yyyy\n"
+   " 21: MUL TEMP[3], TEMP[4], TEMP[8].xxxx\n"
+   " 22: MAD TEMP[8], TEMP[7], TEMP[2].yyyy, TEMP[3]\n"
+   " 23: MAD TEMP[2], TEMP[8], TEMP[5].yyyy, TEMP[6]\n"
+   " 24: TEX TEMP[6], IN[1].xyyy, SAMP[0], 2D\n"
+   " 25: ADD TEMP[7].x, IMM[0].xxxx, -TEMP[0].zzzz\n"
+   " 26: MUL TEMP[8], TEMP[4], TEMP[7].xxxx\n"
+   " 27: MAD TEMP[7], TEMP[6], TEMP[0].zzzz, TEMP[8]\n"
+   " 28: MAD TEMP[0], TEMP[7], TEMP[5].zzzz, TEMP[2]\n"
+   " 29: TEX TEMP[2], IN[2].xyyy, SAMP[0], 2D\n"
+   " 30: ADD TEMP[6].x, IMM[0].xxxx, -TEMP[1].wwww\n"
+   " 31: MUL TEMP[7], TEMP[4], TEMP[6].xxxx\n"
+   " 32: MAD TEMP[4], TEMP[2], TEMP[1].wwww, TEMP[7]\n"
+   " 33: MAD TEMP[2], TEMP[4], TEMP[5].wwww, TEMP[0]\n"
+   " 34: RCP TEMP[0].x, TEMP[1].xxxx\n"
+   " 35: MUL OUT[0], TEMP[2], TEMP[0].xxxx\n"
+   " 36: END\n";
+
+
+static const char offsetvs[] = "VERT\n"
+   "DCL IN[0]\n"
+   "DCL IN[1]\n"
+   "DCL OUT[0], POSITION\n"
+   "DCL OUT[1], GENERIC[0]\n"
+   "DCL OUT[2], GENERIC[10]\n"
+   "DCL OUT[3], GENERIC[11]\n"
+   "DCL CONST[0]\n"
+   "IMM FLT32 {    1.0000,     0.0000,    -1.0000,     0.0000}\n"
+   "  0: MOV OUT[0], IN[0]\n"
+   "  1: MOV OUT[1], IN[1]\n"
+   "  2: MAD OUT[2], CONST[0].xyxy, IMM[0].zyyz, IN[1].xyxy\n"
+   "  3: MAD OUT[3], CONST[0].xyxy, IMM[0].xyyx, IN[1].xyxy\n"
+   "  4: END\n";
+
+
+static const char blend2fs_1[] = "FRAG\n"
+   "PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1\n"
+   "DCL IN[0], GENERIC[0], PERSPECTIVE\n"
+   "DCL OUT[0], COLOR\n"
+   "DCL SAMP[0]\n"
+   "DCL SAMP[1]\n"
+   "DCL SAMP[2]\n"
+   "DCL CONST[0]\n"
+   "DCL TEMP[0..6]\n"
+   "IMM FLT32 {    0.0000,    -0.2500,     0.00609756,     0.5000}\n"
+   "IMM FLT32 {   -1.5000,    -2.0000,     0.9000,     1.5000}\n"
+   "IMM FLT32 {    2.0000,     1.0000,     4.0000,    33.0000}\n";
+
+static const char blend2fs_2[] =
+   "  0: MOV TEMP[0], IMM[0].xxxx\n"
+   "  1: TEX TEMP[1], IN[0].xyyy, SAMP[1], 2D\n"
+   "  2: MOV TEMP[2].x, TEMP[1]\n"
+   "  3: SNE TEMP[3].x, TEMP[1].yyyy, IMM[0].xxxx\n"
+   "  4: IF TEMP[3].xxxx :76\n"
+   "  5:   MOV TEMP[1].xy, IN[0].xyxx\n"
+   "  6:   MOV TEMP[4].x, IMM[1].xxxx\n"
+   "  7:   BGNLOOP :24\n"
+   "  8:     MUL TEMP[5].x, IMM[1].yyyy, IMM[3].xxxx\n"
+   "  9:     SLE TEMP[6].x, TEMP[4].xxxx, TEMP[5].xxxx\n"
+   " 10:     IF TEMP[6].xxxx :12\n"
+   " 11:       BRK\n"
+   " 12:     ENDIF\n"
+   " 13:     MOV TEMP[4].y, IMM[0].xxxx\n"
+   " 14:     MAD TEMP[3].xyz, CONST[0].xyyy, TEMP[4].xyyy, TEMP[1].xyyy\n"
+   " 15:     MOV TEMP[3].w, IMM[0].xxxx\n"
+   " 16:     TXL TEMP[5], TEMP[3], SAMP[2], 2D\n"
+   " 17:     MOV TEMP[3].x, TEMP[5].yyyy\n"
+   " 18:     SLT TEMP[6].x, TEMP[5].yyyy, IMM[1].zzzz\n"
+   " 19:     IF TEMP[6].xxxx :21\n"
+   " 20:       BRK\n"
+   " 21:     ENDIF\n"
+   " 22:     ADD TEMP[6].x, TEMP[4].xxxx, IMM[1].yyyy\n"
+   " 23:     MOV TEMP[4].x, TEMP[6].xxxx\n"
+   " 24:   ENDLOOP :7\n"
+   " 25:   ADD TEMP[1].x, TEMP[4].xxxx, IMM[1].wwww\n"
+   " 26:   MAD TEMP[6].x, -IMM[2].xxxx, TEMP[3].xxxx, TEMP[1].xxxx\n"
+   " 27:   MUL TEMP[1].x, IMM[1].yyyy, IMM[3].xxxx\n"
+   " 28:   MAX TEMP[4].x, TEMP[6].xxxx, TEMP[1].xxxx\n"
+   " 29:   MOV TEMP[1].x, TEMP[4].xxxx\n"
+   " 30:   MOV TEMP[3].xy, IN[0].xyxx\n"
+   " 31:   MOV TEMP[5].x, IMM[1].wwww\n"
+   " 32:   BGNLOOP :49\n"
+   " 33:     MUL TEMP[6].x, IMM[2].xxxx, IMM[3].xxxx\n"
+   " 34:     SGE TEMP[4].x, TEMP[5].xxxx, TEMP[6].xxxx\n"
+   " 35:     IF TEMP[4].xxxx :37\n"
+   " 36:       BRK\n"
+   " 37:     ENDIF\n"
+   " 38:     MOV TEMP[5].y, IMM[0].xxxx\n"
+   " 39:     MAD TEMP[4].xyz, CONST[0].xyyy, TEMP[5].xyyy, TEMP[3].xyyy\n"
+   " 40:     MOV TEMP[4].w, IMM[0].xxxx\n"
+   " 41:     TXL TEMP[6].xy, TEMP[4], SAMP[2], 2D\n"
+   " 42:     MOV TEMP[4].x, TEMP[6].yyyy\n"
+   " 43:     SLT TEMP[0].x, TEMP[6].yyyy, IMM[1].zzzz\n"
+   " 44:     IF TEMP[0].xxxx :46\n"
+   " 45:       BRK\n"
+   " 46:     ENDIF\n"
+   " 47:     ADD TEMP[6].x, TEMP[5].xxxx, IMM[2].xxxx\n"
+   " 48:     MOV TEMP[5].x, TEMP[6].xxxx\n"
+   " 49:   ENDLOOP :32\n"
+   " 50:   ADD TEMP[3].x, TEMP[5].xxxx, IMM[1].xxxx\n"
+   " 51:   MAD TEMP[5].x, IMM[2].xxxx, TEMP[4].xxxx, TEMP[3].xxxx\n"
+   " 52:   MUL TEMP[3].x, IMM[2].xxxx, IMM[3].xxxx\n"
+   " 53:   MIN TEMP[4].x, TEMP[5].xxxx, TEMP[3].xxxx\n"
+   " 54:   MOV TEMP[3].x, TEMP[1].xxxx\n"
+   " 55:   MOV TEMP[3].y, TEMP[4].xxxx\n"
+   " 56:   MOV TEMP[5].yw, IMM[0].yyyy\n"
+   " 57:   MOV TEMP[5].x, TEMP[1].xxxx\n"
+   " 58:   ADD TEMP[1].x, TEMP[4].xxxx, IMM[2].yyyy\n"
+   " 59:   MOV TEMP[5].z, TEMP[1].xxxx\n"
+   " 60:   MAD TEMP[1], TEMP[5], CONST[0].xyxy, IN[0].xyxy\n"
+   " 61:   MOV TEMP[4], TEMP[1].xyyy\n"
+   " 62:   MOV TEMP[4].w, IMM[0].xxxx\n"
+   " 63:   TXL TEMP[5].x, TEMP[4], SAMP[2], 2D\n"
+   " 64:   MOV TEMP[4].x, TEMP[5].xxxx\n"
+   " 65:   MOV TEMP[5], TEMP[1].zwww\n"
+   " 66:   MOV TEMP[5].w, IMM[0].xxxx\n"
+   " 67:   TXL TEMP[1].x, TEMP[5], SAMP[2], 2D\n"
+   " 68:   MOV TEMP[4].y, TEMP[1].xxxx\n"
+   " 69:   MUL TEMP[5].xy, IMM[2].zzzz, TEMP[4].xyyy\n"
+   " 70:   ROUND TEMP[1].xy, TEMP[5].xyyy\n"
+   " 71:   ABS TEMP[4].xy, TEMP[3].xyyy\n"
+   " 72:   MAD TEMP[3].xy, IMM[2].wwww, TEMP[1].xyyy, TEMP[4].xyyy\n"
+   " 73:   MUL TEMP[5].xyz, TEMP[3].xyyy, IMM[0].zzzz\n"
+   " 74:   MOV TEMP[5].w, IMM[0].xxxx\n"
+   " 75:   TXL TEMP[0].xy, TEMP[5], SAMP[0], 2D\n"
+   " 76: ENDIF\n"
+   " 77: SNE TEMP[1].x, TEMP[2].xxxx, IMM[0].xxxx\n"
+   " 78: IF TEMP[1].xxxx :151\n"
+   " 79:   MOV TEMP[1].xy, IN[0].xyxx\n"
+   " 80:   MOV TEMP[3].x, IMM[1].xxxx\n"
+   " 81:   BGNLOOP :98\n"
+   " 82:     MUL TEMP[4].x, IMM[1].yyyy, IMM[3].xxxx\n"
+   " 83:     SLE TEMP[5].x, TEMP[3].xxxx, TEMP[4].xxxx\n"
+   " 84:     IF TEMP[5].xxxx :86\n"
+   " 85:       BRK\n"
+   " 86:     ENDIF\n"
+   " 87:     MOV TEMP[3].y, IMM[0].xxxx\n"
+   " 88:     MAD TEMP[5].xyz, CONST[0].xyyy, TEMP[3].yxxx, TEMP[1].xyyy\n"
+   " 89:     MOV TEMP[5].w, IMM[0].xxxx\n"
+   " 90:     TXL TEMP[4], TEMP[5], SAMP[2], 2D\n"
+   " 91:     MOV TEMP[2].x, TEMP[4].xxxx\n"
+   " 92:     SLT TEMP[5].x, TEMP[4].xxxx, IMM[1].zzzz\n"
+   " 93:     IF TEMP[5].xxxx :95\n"
+   " 94:       BRK\n"
+   " 95:     ENDIF\n"
+   " 96:     ADD TEMP[4].x, TEMP[3].xxxx, IMM[1].yyyy\n"
+   " 97:     MOV TEMP[3].x, TEMP[4].xxxx\n"
+   " 98:   ENDLOOP :81\n"
+   " 99:   ADD TEMP[1].x, TEMP[3].xxxx, IMM[1].wwww\n"
+   "100:   MAD TEMP[6].x, -IMM[2].xxxx, TEMP[2].xxxx, TEMP[1].xxxx\n"
+   "101:   MUL TEMP[1].x, IMM[1].yyyy, IMM[3].xxxx\n"
+   "102:   MAX TEMP[3].x, TEMP[6].xxxx, TEMP[1].xxxx\n"
+   "103:   MOV TEMP[1].x, TEMP[3].xxxx\n"
+   "104:   MOV TEMP[2].xy, IN[0].xyxx\n"
+   "105:   MOV TEMP[4].x, IMM[1].wwww\n"
+   "106:   BGNLOOP :123\n"
+   "107:     MUL TEMP[5].x, IMM[2].xxxx, IMM[3].xxxx\n"
+   "108:     SGE TEMP[6].x, TEMP[4].xxxx, TEMP[5].xxxx\n"
+   "109:     IF TEMP[6].xxxx :111\n"
+   "110:       BRK\n"
+   "111:     ENDIF\n"
+   "112:     MOV TEMP[4].y, IMM[0].xxxx\n"
+   "113:     MAD TEMP[5].xyz, CONST[0].xyyy, TEMP[4].yxxx, TEMP[2].xyyy\n"
+   "114:     MOV TEMP[5].w, IMM[0].xxxx\n"
+   "115:     TXL TEMP[6], TEMP[5], SAMP[2], 2D\n"
+   "116:     MOV TEMP[3].x, TEMP[6].xxxx\n"
+   "117:     SLT TEMP[5].x, TEMP[6].xxxx, IMM[1].zzzz\n"
+   "118:     IF TEMP[5].xxxx :120\n"
+   "119:       BRK\n"
+   "120:     ENDIF\n"
+   "121:     ADD TEMP[6].x, TEMP[4].xxxx, IMM[2].xxxx\n"
+   "122:     MOV TEMP[4].x, TEMP[6].xxxx\n"
+   "123:   ENDLOOP :106\n"
+   "124:   ADD TEMP[2].x, TEMP[4].xxxx, IMM[1].xxxx\n"
+   "125:   MAD TEMP[4].x, IMM[2].xxxx, TEMP[3].xxxx, TEMP[2].xxxx\n"
+   "126:   MUL TEMP[2].x, IMM[2].xxxx, IMM[3].xxxx\n"
+   "127:   MIN TEMP[3].x, TEMP[4].xxxx, TEMP[2].xxxx\n"
+   "128:   MOV TEMP[2].x, TEMP[1].xxxx\n"
+   "129:   MOV TEMP[2].y, TEMP[3].xxxx\n"
+   "130:   MOV TEMP[4].xz, IMM[0].yyyy\n"
+   "131:   MOV TEMP[4].y, TEMP[1].xxxx\n"
+   "132:   ADD TEMP[1].x, TEMP[3].xxxx, IMM[2].yyyy\n"
+   "133:   MOV TEMP[4].w, TEMP[1].xxxx\n"
+   "134:   MAD TEMP[1], TEMP[4], CONST[0].xyxy, IN[0].xyxy\n"
+   "135:   MOV TEMP[3], TEMP[1].xyyy\n"
+   "136:   MOV TEMP[3].w, IMM[0].xxxx\n"
+   "137:   TXL TEMP[4].y, TEMP[3], SAMP[2], 2D\n"
+   "138:   MOV TEMP[3].x, TEMP[4].yyyy\n"
+   "139:   MOV TEMP[4], TEMP[1].zwww\n"
+   "140:   MOV TEMP[4].w, IMM[0].xxxx\n"
+   "141:   TXL TEMP[1].y, TEMP[4], SAMP[2], 2D\n"
+   "142:   MOV TEMP[3].y, TEMP[1].yyyy\n"
+   "143:   MUL TEMP[4].xy, IMM[2].zzzz, TEMP[3].xyyy\n"
+   "144:   ROUND TEMP[1].xy, TEMP[4].xyyy\n"
+   "145:   ABS TEMP[3].xy, TEMP[2].xyyy\n"
+   "146:   MAD TEMP[2].xy, IMM[2].wwww, TEMP[1].xyyy, TEMP[3].xyyy\n"
+   "147:   MUL TEMP[3].xyz, TEMP[2].xyyy, IMM[0].zzzz\n"
+   "148:   MOV TEMP[3].w, IMM[0].xxxx\n"
+   "149:   TXL TEMP[1].xy, TEMP[3], SAMP[0], 2D\n"
+   "150:   MOV TEMP[0].zw, TEMP[1].yyxy\n"
+   "151: ENDIF\n"
+   "152: MOV OUT[0], TEMP[0]\n"
+   "153: END\n";
+
+#endif
diff --git a/src/gallium/auxiliary/postprocess/pp_mlaa_areamap.h b/src/gallium/auxiliary/postprocess/pp_mlaa_areamap.h
new file mode 100644
index 0000000..1446ff2
--- /dev/null
+++ b/src/gallium/auxiliary/postprocess/pp_mlaa_areamap.h
@@ -0,0 +1,2821 @@
+/**
+ * Copyright (C) 2010 Jorge Jimenez (jorge@iryoku.com)
+ * Copyright (C) 2010 Belen Masia (bmasia@unizar.es)
+ * Copyright (C) 2010 Jose I. Echevarria (joseignacioechevarria@gmail.com)
+ * Copyright (C) 2010 Fernando Navarro (fernandn@microsoft.com)
+ * Copyright (C) 2010 Diego Gutierrez (diegog@unizar.es)
+ * Copyright (C) 2011 Lauri Kasanen (cand@gmx.com)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *    1. Redistributions of source code must retain the above copyright notice,
+ *       this list of conditions and the following disclaimer.
+ *
+ *    2. Redistributions in binary form must reproduce the following statement:
+ *
+ *       "Uses Jimenez's MLAA. Copyright (C) 2010 by Jorge Jimenez, Belen Masia,
+ *        Jose I. Echevarria, Fernando Navarro and Diego Gutierrez."
+ *
+ *       Only for use in the Mesa project, this point 2 is filled by naming the
+ *       technique Jimenez's MLAA in the Mesa config options.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS
+ * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDERS OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * The views and conclusions contained in the software and documentation are
+ * those of the authors and should not be interpreted as representing official
+ * policies, either expressed or implied, of the copyright holders.
+ */
+
+#ifndef PP_MLAA_AREAMAP_H
+#define PP_MLAA_AREAMAP_H
+
+static const unsigned char areamap[] = {
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 31, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 31, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 63, 0, 10,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 63, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 85, 0, 31, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 85, 0,
+   31, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 95, 0, 51,
+   0, 21, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 95, 0, 51, 0, 21, 0, 4, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 102, 0, 63, 0, 36, 0, 15, 0, 3, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 102, 0,
+   63, 0, 36, 0, 15, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 106, 0, 72,
+   0, 47, 0, 28, 0, 12, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 106, 0, 72, 0, 47, 0, 28, 0, 12, 0, 2, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 109, 0, 79, 0, 56, 0, 38, 0, 23, 0, 10, 0, 2,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 109, 0,
+   79, 0, 56, 0, 38, 0, 23, 0, 10, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 111, 0, 85,
+   0, 63, 0, 46, 0, 31, 0, 19, 0, 9, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 111, 0, 85, 0, 63, 0, 46, 0, 31, 0, 19, 0,
+   9, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 113, 0, 89, 0, 69, 0, 53, 0, 39, 0, 27, 0, 17,
+   0, 7, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 113, 0,
+   89, 0, 69, 0, 53, 0, 39, 0, 27, 0, 17, 0, 7, 0, 1, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 114, 0, 92,
+   0, 74, 0, 58, 0, 45, 0, 34, 0, 23, 0, 15, 0, 7, 0, 1, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 114, 0, 92, 0, 74, 0, 58, 0, 45, 0, 34, 0,
+   23, 0, 15, 0, 7, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 115, 0, 95, 0, 78, 0, 63, 0, 51, 0, 39, 0, 30,
+   0, 21, 0, 13, 0, 6, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 115, 0,
+   95, 0, 78, 0, 63, 0, 51, 0, 39, 0, 30, 0, 21, 0, 13, 0, 6, 0, 1, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 116, 0, 98,
+   0, 81, 0, 68, 0, 55, 0, 44, 0, 35, 0, 26, 0, 19, 0, 12, 0, 5, 0, 1,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 116, 0, 98, 0, 81, 0, 68, 0, 55, 0, 44, 0,
+   35, 0, 26, 0, 19, 0, 12, 0, 5, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 117, 0, 100, 0, 85, 0, 71, 0, 60, 0, 49, 0, 40,
+   0, 31, 0, 24, 0, 17, 0, 11, 0, 5, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 117, 0,
+   100, 0, 85, 0, 71, 0, 60, 0, 49, 0, 40, 0, 31, 0, 24, 0, 17, 0, 11, 0,
+   5, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 118, 0, 102,
+   0, 87, 0, 75, 0, 63, 0, 53, 0, 44, 0, 36, 0, 28, 0, 22, 0, 15, 0, 10,
+   0, 4, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 118, 0, 102, 0, 87, 0, 75, 0, 63, 0, 53, 0,
+   44, 0, 36, 0, 28, 0, 22, 0, 15, 0, 10, 0, 4, 0, 1, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 119, 0, 103, 0, 89, 0, 77, 0, 67, 0, 57, 0, 48,
+   0, 40, 0, 33, 0, 26, 0, 20, 0, 14, 0, 9, 0, 4, 0, 1, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 119, 0,
+   103, 0, 89, 0, 77, 0, 67, 0, 57, 0, 48, 0, 40, 0, 33, 0, 26, 0, 20, 0,
+   14, 0, 9, 0, 4, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 119, 0, 105,
+   0, 92, 0, 80, 0, 70, 0, 60, 0, 52, 0, 44, 0, 37, 0, 30, 0, 24, 0, 18,
+   0, 13, 0, 8, 0, 4, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 119, 0, 105, 0, 92, 0, 80, 0, 70, 0, 60, 0,
+   52, 0, 44, 0, 37, 0, 30, 0, 24, 0, 18, 0, 13, 0, 8, 0, 4, 0, 1, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 120, 0, 106, 0, 93, 0, 82, 0, 72, 0, 63, 0, 55,
+   0, 47, 0, 40, 0, 34, 0, 28, 0, 22, 0, 17, 0, 12, 0, 8, 0, 3, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 120, 0,
+   106, 0, 93, 0, 82, 0, 72, 0, 63, 0, 55, 0, 47, 0, 40, 0, 34, 0, 28, 0,
+   22, 0, 17, 0, 12, 0, 8, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 120, 0, 107,
+   0, 95, 0, 85, 0, 75, 0, 66, 0, 58, 0, 51, 0, 44, 0, 37, 0, 31, 0, 26,
+   0, 21, 0, 16, 0, 11, 0, 7, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 120, 0, 107, 0, 95, 0, 85, 0, 75, 0, 66, 0,
+   58, 0, 51, 0, 44, 0, 37, 0, 31, 0, 26, 0, 21, 0, 16, 0, 11, 0, 7, 0,
+   3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 120, 0, 108, 0, 97, 0, 86, 0, 77, 0, 69, 0, 61,
+   0, 53, 0, 47, 0, 40, 0, 35, 0, 29, 0, 24, 0, 19, 0, 15, 0, 11, 0, 7,
+   0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 120, 0,
+   108, 0, 97, 0, 86, 0, 77, 0, 69, 0, 61, 0, 53, 0, 47, 0, 40, 0, 35, 0,
+   29, 0, 24, 0, 19, 0, 15, 0, 11, 0, 7, 0, 3, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 121, 0, 109,
+   0, 98, 0, 88, 0, 79, 0, 71, 0, 63, 0, 56, 0, 50, 0, 43, 0, 38, 0, 32,
+   0, 27, 0, 23, 0, 18, 0, 14, 0, 10, 0, 6, 0, 3, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 121, 0, 109, 0, 98, 0, 88, 0, 79, 0, 71, 0,
+   63, 0, 56, 0, 50, 0, 43, 0, 38, 0, 32, 0, 27, 0, 23, 0, 18, 0, 14, 0,
+   10, 0, 6, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 121, 0, 110, 0, 99, 0, 90, 0, 81, 0, 73, 0, 66,
+   0, 59, 0, 52, 0, 46, 0, 41, 0, 35, 0, 30, 0, 26, 0, 21, 0, 17, 0, 13,
+   0, 10, 0, 6, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 121, 0,
+   110, 0, 99, 0, 90, 0, 81, 0, 73, 0, 66, 0, 59, 0, 52, 0, 46, 0, 41, 0,
+   35, 0, 30, 0, 26, 0, 21, 0, 17, 0, 13, 0, 10, 0, 6, 0, 3, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 121, 0, 110,
+   0, 100, 0, 91, 0, 83, 0, 75, 0, 68, 0, 61, 0, 55, 0, 49, 0, 43, 0, 38,
+   0, 33, 0, 29, 0, 24, 0, 20, 0, 16, 0, 13, 0, 9, 0, 6, 0, 3, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 121, 0, 110, 0, 100, 0, 91, 0, 83, 0, 75, 0,
+   68, 0, 61, 0, 55, 0, 49, 0, 43, 0, 38, 0, 33, 0, 29, 0, 24, 0, 20, 0,
+   16, 0, 13, 0, 9, 0, 6, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 121, 0, 111, 0, 102, 0, 93, 0, 85, 0, 77, 0, 70,
+   0, 63, 0, 57, 0, 51, 0, 46, 0, 41, 0, 36, 0, 31, 0, 27, 0, 23, 0, 19,
+   0, 15, 0, 12, 0, 9, 0, 5, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 121, 0,
+   111, 0, 102, 0, 93, 0, 85, 0, 77, 0, 70, 0, 63, 0, 57, 0, 51, 0, 46, 0,
+   41, 0, 36, 0, 31, 0, 27, 0, 23, 0, 19, 0, 15, 0, 12, 0, 9, 0, 5, 0,
+   2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 112,
+   0, 102, 0, 94, 0, 86, 0, 79, 0, 72, 0, 65, 0, 59, 0, 54, 0, 48, 0, 43,
+   0, 38, 0, 34, 0, 30, 0, 26, 0, 22, 0, 18, 0, 15, 0, 11, 0, 8, 0, 5,
+   0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 112, 0, 102, 0, 94, 0, 86, 0, 79, 0,
+   72, 0, 65, 0, 59, 0, 54, 0, 48, 0, 43, 0, 38, 0, 34, 0, 30, 0, 26, 0,
+   22, 0, 18, 0, 15, 0, 11, 0, 8, 0, 5, 0, 2, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 122, 0, 112, 0, 103, 0, 95, 0, 87, 0, 80, 0, 74,
+   0, 67, 0, 61, 0, 56, 0, 51, 0, 46, 0, 41, 0, 36, 0, 32, 0, 28, 0, 24,
+   0, 21, 0, 17, 0, 14, 0, 11, 0, 8, 0, 5, 0, 2, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0,
+   112, 0, 103, 0, 95, 0, 87, 0, 80, 0, 74, 0, 67, 0, 61, 0, 56, 0, 51, 0,
+   46, 0, 41, 0, 36, 0, 32, 0, 28, 0, 24, 0, 21, 0, 17, 0, 14, 0, 11, 0,
+   8, 0, 5, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 113,
+   0, 104, 0, 96, 0, 89, 0, 82, 0, 75, 0, 69, 0, 63, 0, 58, 0, 53, 0, 48,
+   0, 43, 0, 39, 0, 35, 0, 31, 0, 27, 0, 23, 0, 20, 0, 17, 0, 13, 0, 10,
+   0, 7, 0, 5, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 113, 0, 104, 0, 96, 0, 89, 0, 82, 0,
+   75, 0, 69, 0, 63, 0, 58, 0, 53, 0, 48, 0, 43, 0, 39, 0, 35, 0, 31, 0,
+   27, 0, 23, 0, 20, 0, 17, 0, 13, 0, 10, 0, 7, 0, 5, 0, 2, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 122, 0, 113, 0, 105, 0, 97, 0, 90, 0, 83, 0, 77,
+   0, 71, 0, 65, 0, 60, 0, 55, 0, 50, 0, 45, 0, 41, 0, 37, 0, 33, 0, 29,
+   0, 26, 0, 22, 0, 19, 0, 16, 0, 13, 0, 10, 0, 7, 0, 5, 0, 2, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0,
+   113, 0, 105, 0, 97, 0, 90, 0, 83, 0, 77, 0, 71, 0, 65, 0, 60, 0, 55, 0,
+   50, 0, 45, 0, 41, 0, 37, 0, 33, 0, 29, 0, 26, 0, 22, 0, 19, 0, 16, 0,
+   13, 0, 10, 0, 7, 0, 5, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 114,
+   0, 106, 0, 98, 0, 91, 0, 85, 0, 78, 0, 72, 0, 67, 0, 62, 0, 57, 0, 52,
+   0, 47, 0, 43, 0, 39, 0, 35, 0, 31, 0, 28, 0, 24, 0, 21, 0, 18, 0, 15,
+   0, 12, 0, 10, 0, 7, 0, 4, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 114, 0, 106, 0, 98, 0, 91, 0, 85, 0,
+   78, 0, 72, 0, 67, 0, 62, 0, 57, 0, 52, 0, 47, 0, 43, 0, 39, 0, 35, 0,
+   31, 0, 28, 0, 24, 0, 21, 0, 18, 0, 15, 0, 12, 0, 10, 0, 7, 0, 4, 0,
+   2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 123, 0, 114, 0, 106, 0, 99, 0, 92, 0, 86, 0, 80,
+   0, 74, 0, 68, 0, 63, 0, 58, 0, 54, 0, 49, 0, 45, 0, 41, 0, 37, 0, 34,
+   0, 30, 0, 27, 0, 23, 0, 20, 0, 17, 0, 15, 0, 12, 0, 9, 0, 7, 0, 4,
+   0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0,
+   114, 0, 106, 0, 99, 0, 92, 0, 86, 0, 80, 0, 74, 0, 68, 0, 63, 0, 58, 0,
+   54, 0, 49, 0, 45, 0, 41, 0, 37, 0, 34, 0, 30, 0, 27, 0, 23, 0, 20, 0,
+   17, 0, 15, 0, 12, 0, 9, 0, 7, 0, 4, 0, 2, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 115,
+   0, 107, 0, 100, 0, 93, 0, 87, 0, 81, 0, 75, 0, 70, 0, 65, 0, 60, 0, 55,
+   0, 51, 0, 47, 0, 43, 0, 39, 0, 36, 0, 32, 0, 29, 0, 26, 0, 22, 0, 20,
+   0, 17, 0, 14, 0, 11, 0, 9, 0, 6, 0, 4, 0, 2, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 115, 0, 107, 0, 100, 0, 93, 0, 87, 0,
+   81, 0, 75, 0, 70, 0, 65, 0, 60, 0, 55, 0, 51, 0, 47, 0, 43, 0, 39, 0,
+   36, 0, 32, 0, 29, 0, 26, 0, 22, 0, 20, 0, 17, 0, 14, 0, 11, 0, 9, 0,
+   6, 0, 4, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 123, 0, 115, 0, 108, 0, 101, 0, 94, 0, 88, 0, 82,
+   0, 77, 0, 71, 0, 66, 0, 62, 0, 57, 0, 53, 0, 49, 0, 45, 0, 41, 0, 37,
+   0, 34, 0, 31, 0, 28, 0, 25, 0, 22, 0, 19, 0, 16, 0, 13, 0, 11, 0, 8,
+   0, 6, 0, 4, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0,
+   115, 0, 108, 0, 101, 0, 94, 0, 88, 0, 82, 0, 77, 0, 71, 0, 66, 0, 62, 0,
+   57, 0, 53, 0, 49, 0, 45, 0, 41, 0, 37, 0, 34, 0, 31, 0, 28, 0, 25, 0,
+   22, 0, 19, 0, 16, 0, 13, 0, 11, 0, 8, 0, 6, 0, 4, 0, 2, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 115,
+   0, 108, 0, 102, 0, 95, 0, 89, 0, 83, 0, 78, 0, 73, 0, 68, 0, 63, 0, 59,
+   0, 55, 0, 51, 0, 47, 0, 43, 0, 39, 0, 36, 0, 33, 0, 30, 0, 26, 0, 24,
+   0, 21, 0, 18, 0, 15, 0, 13, 0, 10, 0, 8, 0, 6, 0, 4, 0, 2, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 115, 0, 108, 0, 102, 0, 95, 0, 89, 0,
+   83, 0, 78, 0, 73, 0, 68, 0, 63, 0, 59, 0, 55, 0, 51, 0, 47, 0, 43, 0,
+   39, 0, 36, 0, 33, 0, 30, 0, 26, 0, 24, 0, 21, 0, 18, 0, 15, 0, 13, 0,
+   10, 0, 8, 0, 6, 0, 4, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 123, 0, 116, 0, 109, 0, 102, 0, 96, 0, 90, 0, 85,
+   0, 79, 0, 74, 0, 69, 0, 65, 0, 60, 0, 56, 0, 52, 0, 48, 0, 45, 0, 41,
+   0, 38, 0, 35, 0, 31, 0, 28, 0, 25, 0, 23, 0, 20, 0, 17, 0, 15, 0, 12,
+   0, 10, 0, 8, 0, 6, 0, 4, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0,
+   116, 0, 109, 0, 102, 0, 96, 0, 90, 0, 85, 0, 79, 0, 74, 0, 69, 0, 65, 0,
+   60, 0, 56, 0, 52, 0, 48, 0, 45, 0, 41, 0, 38, 0, 35, 0, 31, 0, 28, 0,
+   25, 0, 23, 0, 20, 0, 17, 0, 15, 0, 12, 0, 10, 0, 8, 0, 6, 0, 4, 0,
+   1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 31, 0, 63, 0, 85, 0, 95, 0, 102,
+   0, 106, 0, 109, 0, 111, 0, 113, 0, 114, 0, 115, 0, 116, 0, 117, 0, 118, 0,
+      119,
+   0, 119, 0, 120, 0, 120, 0, 120, 0, 121, 0, 121, 0, 121, 0, 121, 0, 122, 0,
+      122,
+   0, 122, 0, 122, 0, 122, 0, 123, 0, 123, 0, 123, 0, 123, 0, 123, 0, 62, 0,
+      63,
+   0, 85, 0, 95, 0, 102, 0, 106, 0, 109, 0, 111, 0, 113, 0, 114, 0, 115, 0,
+      116,
+   0, 117, 0, 118, 0, 119, 0, 119, 0, 120, 0, 120, 0, 120, 0, 121, 0, 121, 0,
+      121,
+   0, 121, 0, 122, 0, 122, 0, 122, 0, 122, 0, 122, 0, 123, 0, 123, 0, 123, 0,
+      123,
+   0, 123, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 31, 31, 0, 63, 0, 85, 0, 95, 0, 102, 0, 106,
+   0, 109, 0, 111, 0, 113, 0, 114, 0, 115, 0, 116, 0, 117, 0, 118, 0, 119, 0,
+      119,
+   0, 120, 0, 120, 0, 120, 0, 121, 0, 121, 0, 121, 0, 121, 0, 122, 0, 122, 0,
+      122,
+   0, 122, 0, 122, 0, 123, 0, 123, 0, 123, 0, 123, 0, 123, 31, 31, 0, 63, 0,
+      85,
+   0, 95, 0, 102, 0, 106, 0, 109, 0, 111, 0, 113, 0, 114, 0, 115, 0, 116, 0,
+      117,
+   0, 118, 0, 119, 0, 119, 0, 120, 0, 120, 0, 120, 0, 121, 0, 121, 0, 121, 0,
+      121,
+   0, 122, 0, 122, 0, 122, 0, 122, 0, 122, 0, 123, 0, 123, 0, 123, 0, 123, 0,
+      123,
+   0, 0, 0, 10, 0, 31, 0, 51, 0, 63, 0, 72, 0, 79, 0, 85, 0, 89, 0, 92,
+   0, 95, 0, 98, 0, 100, 0, 102, 0, 103, 0, 105, 0, 106, 0, 107, 0, 108, 0,
+      109,
+   0, 110, 0, 110, 0, 111, 0, 112, 0, 112, 0, 113, 0, 113, 0, 114, 0, 114, 0,
+      115,
+   0, 115, 0, 115, 0, 116, 0, 63, 0, 20, 0, 31, 0, 51, 0, 63, 0, 72, 0, 79,
+   0, 85, 0, 89, 0, 92, 0, 95, 0, 98, 0, 100, 0, 102, 0, 103, 0, 105, 0, 106,
+   0, 107, 0, 108, 0, 109, 0, 110, 0, 110, 0, 111, 0, 112, 0, 112, 0, 113, 0,
+      113,
+   0, 114, 0, 114, 0, 115, 0, 115, 0, 115, 0, 116, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 63, 0,
+   10, 10, 0, 31, 0, 51, 0, 63, 0, 72, 0, 79, 0, 85, 0, 89, 0, 92, 0, 95,
+   0, 98, 0, 100, 0, 102, 0, 103, 0, 105, 0, 106, 0, 107, 0, 108, 0, 109, 0,
+      110,
+   0, 110, 0, 111, 0, 112, 0, 112, 0, 113, 0, 113, 0, 114, 0, 114, 0, 115, 0,
+      115,
+   0, 115, 0, 116, 63, 0, 10, 10, 0, 31, 0, 51, 0, 63, 0, 72, 0, 79, 0, 85,
+   0, 89, 0, 92, 0, 95, 0, 98, 0, 100, 0, 102, 0, 103, 0, 105, 0, 106, 0, 107,
+   0, 108, 0, 109, 0, 110, 0, 110, 0, 111, 0, 112, 0, 112, 0, 113, 0, 113, 0,
+      114,
+   0, 114, 0, 115, 0, 115, 0, 115, 0, 116, 0, 0, 0, 0, 0, 6, 0, 21, 0, 36,
+   0, 47, 0, 56, 0, 63, 0, 69, 0, 74, 0, 78, 0, 81, 0, 85, 0, 87, 0, 89,
+   0, 92, 0, 93, 0, 95, 0, 97, 0, 98, 0, 99, 0, 100, 0, 102, 0, 102, 0, 103,
+   0, 104, 0, 105, 0, 106, 0, 106, 0, 107, 0, 108, 0, 108, 0, 109, 0, 85, 0,
+      31,
+   0, 12, 0, 21, 0, 36, 0, 47, 0, 56, 0, 63, 0, 69, 0, 74, 0, 78, 0, 81,
+   0, 85, 0, 87, 0, 89, 0, 92, 0, 93, 0, 95, 0, 97, 0, 98, 0, 99, 0, 100,
+   0, 102, 0, 102, 0, 103, 0, 104, 0, 105, 0, 106, 0, 106, 0, 107, 0, 108, 0,
+      108,
+   0, 109, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 85, 0, 31, 0, 6, 6, 0, 21, 0, 36, 0, 47,
+   0, 56, 0, 63, 0, 69, 0, 74, 0, 78, 0, 81, 0, 85, 0, 87, 0, 89, 0, 92,
+   0, 93, 0, 95, 0, 97, 0, 98, 0, 99, 0, 100, 0, 102, 0, 102, 0, 103, 0, 104,
+   0, 105, 0, 106, 0, 106, 0, 107, 0, 108, 0, 108, 0, 109, 85, 0, 31, 0, 6, 6,
+   0, 21, 0, 36, 0, 47, 0, 56, 0, 63, 0, 69, 0, 74, 0, 78, 0, 81, 0, 85,
+   0, 87, 0, 89, 0, 92, 0, 93, 0, 95, 0, 97, 0, 98, 0, 99, 0, 100, 0, 102,
+   0, 102, 0, 103, 0, 104, 0, 105, 0, 106, 0, 106, 0, 107, 0, 108, 0, 108, 0,
+      109,
+   0, 0, 0, 0, 0, 0, 0, 4, 0, 15, 0, 28, 0, 38, 0, 46, 0, 53, 0, 58,
+   0, 63, 0, 68, 0, 71, 0, 75, 0, 77, 0, 80, 0, 82, 0, 85, 0, 86, 0, 88,
+   0, 90, 0, 91, 0, 93, 0, 94, 0, 95, 0, 96, 0, 97, 0, 98, 0, 99, 0, 100,
+   0, 101, 0, 102, 0, 102, 0, 95, 0, 51, 0, 21, 0, 8, 0, 15, 0, 28, 0, 38,
+   0, 46, 0, 53, 0, 58, 0, 63, 0, 68, 0, 71, 0, 75, 0, 77, 0, 80, 0, 82,
+   0, 85, 0, 86, 0, 88, 0, 90, 0, 91, 0, 93, 0, 94, 0, 95, 0, 96, 0, 97,
+   0, 98, 0, 99, 0, 100, 0, 101, 0, 102, 0, 102, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 95, 0,
+   51, 0, 21, 0, 4, 4, 0, 15, 0, 28, 0, 38, 0, 46, 0, 53, 0, 58, 0, 63,
+   0, 68, 0, 71, 0, 75, 0, 77, 0, 80, 0, 82, 0, 85, 0, 86, 0, 88, 0, 90,
+   0, 91, 0, 93, 0, 94, 0, 95, 0, 96, 0, 97, 0, 98, 0, 99, 0, 100, 0, 101,
+   0, 102, 0, 102, 95, 0, 51, 0, 21, 0, 4, 4, 0, 15, 0, 28, 0, 38, 0, 46,
+   0, 53, 0, 58, 0, 63, 0, 68, 0, 71, 0, 75, 0, 77, 0, 80, 0, 82, 0, 85,
+   0, 86, 0, 88, 0, 90, 0, 91, 0, 93, 0, 94, 0, 95, 0, 96, 0, 97, 0, 98,
+   0, 99, 0, 100, 0, 101, 0, 102, 0, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3,
+   0, 12, 0, 23, 0, 31, 0, 39, 0, 45, 0, 51, 0, 55, 0, 60, 0, 63, 0, 67,
+   0, 70, 0, 72, 0, 75, 0, 77, 0, 79, 0, 81, 0, 83, 0, 85, 0, 86, 0, 87,
+   0, 89, 0, 90, 0, 91, 0, 92, 0, 93, 0, 94, 0, 95, 0, 96, 0, 102, 0, 63,
+   0, 36, 0, 15, 0, 6, 0, 12, 0, 23, 0, 31, 0, 39, 0, 45, 0, 51, 0, 55,
+   0, 60, 0, 63, 0, 67, 0, 70, 0, 72, 0, 75, 0, 77, 0, 79, 0, 81, 0, 83,
+   0, 85, 0, 86, 0, 87, 0, 89, 0, 90, 0, 91, 0, 92, 0, 93, 0, 94, 0, 95,
+   0, 96, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 102, 0, 63, 0, 36, 0, 15, 0, 3, 3, 0, 12,
+   0, 23, 0, 31, 0, 39, 0, 45, 0, 51, 0, 55, 0, 60, 0, 63, 0, 67, 0, 70,
+   0, 72, 0, 75, 0, 77, 0, 79, 0, 81, 0, 83, 0, 85, 0, 86, 0, 87, 0, 89,
+   0, 90, 0, 91, 0, 92, 0, 93, 0, 94, 0, 95, 0, 96, 102, 0, 63, 0, 36, 0,
+   15, 0, 3, 3, 0, 12, 0, 23, 0, 31, 0, 39, 0, 45, 0, 51, 0, 55, 0, 60,
+   0, 63, 0, 67, 0, 70, 0, 72, 0, 75, 0, 77, 0, 79, 0, 81, 0, 83, 0, 85,
+   0, 86, 0, 87, 0, 89, 0, 90, 0, 91, 0, 92, 0, 93, 0, 94, 0, 95, 0, 96,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 10, 0, 19, 0, 27, 0, 34,
+   0, 39, 0, 44, 0, 49, 0, 53, 0, 57, 0, 60, 0, 63, 0, 66, 0, 69, 0, 71,
+   0, 73, 0, 75, 0, 77, 0, 79, 0, 80, 0, 82, 0, 83, 0, 85, 0, 86, 0, 87,
+   0, 88, 0, 89, 0, 90, 0, 106, 0, 72, 0, 47, 0, 28, 0, 12, 0, 4, 0, 10,
+   0, 19, 0, 27, 0, 34, 0, 39, 0, 44, 0, 49, 0, 53, 0, 57, 0, 60, 0, 63,
+   0, 66, 0, 69, 0, 71, 0, 73, 0, 75, 0, 77, 0, 79, 0, 80, 0, 82, 0, 83,
+   0, 85, 0, 86, 0, 87, 0, 88, 0, 89, 0, 90, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 106, 0,
+   72, 0, 47, 0, 28, 0, 12, 0, 2, 2, 0, 10, 0, 19, 0, 27, 0, 34, 0, 39,
+   0, 44, 0, 49, 0, 53, 0, 57, 0, 60, 0, 63, 0, 66, 0, 69, 0, 71, 0, 73,
+   0, 75, 0, 77, 0, 79, 0, 80, 0, 82, 0, 83, 0, 85, 0, 86, 0, 87, 0, 88,
+   0, 89, 0, 90, 106, 0, 72, 0, 47, 0, 28, 0, 12, 0, 2, 2, 0, 10, 0, 19,
+   0, 27, 0, 34, 0, 39, 0, 44, 0, 49, 0, 53, 0, 57, 0, 60, 0, 63, 0, 66,
+   0, 69, 0, 71, 0, 73, 0, 75, 0, 77, 0, 79, 0, 80, 0, 82, 0, 83, 0, 85,
+   0, 86, 0, 87, 0, 88, 0, 89, 0, 90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 2, 0, 9, 0, 17, 0, 23, 0, 30, 0, 35, 0, 40, 0, 44, 0, 48,
+   0, 52, 0, 55, 0, 58, 0, 61, 0, 63, 0, 66, 0, 68, 0, 70, 0, 72, 0, 74,
+   0, 75, 0, 77, 0, 78, 0, 80, 0, 81, 0, 82, 0, 83, 0, 85, 0, 109, 0, 79,
+   0, 56, 0, 38, 0, 23, 0, 10, 0, 4, 0, 9, 0, 17, 0, 23, 0, 30, 0, 35,
+   0, 40, 0, 44, 0, 48, 0, 52, 0, 55, 0, 58, 0, 61, 0, 63, 0, 66, 0, 68,
+   0, 70, 0, 72, 0, 74, 0, 75, 0, 77, 0, 78, 0, 80, 0, 81, 0, 82, 0, 83,
+   0, 85, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 109, 0, 79, 0, 56, 0, 38, 0, 23, 0, 10, 0,
+   2, 2, 0, 9, 0, 17, 0, 23, 0, 30, 0, 35, 0, 40, 0, 44, 0, 48, 0, 52,
+   0, 55, 0, 58, 0, 61, 0, 63, 0, 66, 0, 68, 0, 70, 0, 72, 0, 74, 0, 75,
+   0, 77, 0, 78, 0, 80, 0, 81, 0, 82, 0, 83, 0, 85, 109, 0, 79, 0, 56, 0,
+   38, 0, 23, 0, 10, 0, 2, 2, 0, 9, 0, 17, 0, 23, 0, 30, 0, 35, 0, 40,
+   0, 44, 0, 48, 0, 52, 0, 55, 0, 58, 0, 61, 0, 63, 0, 66, 0, 68, 0, 70,
+   0, 72, 0, 74, 0, 75, 0, 77, 0, 78, 0, 80, 0, 81, 0, 82, 0, 83, 0, 85,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 7, 0, 15,
+   0, 21, 0, 26, 0, 31, 0, 36, 0, 40, 0, 44, 0, 47, 0, 51, 0, 53, 0, 56,
+   0, 59, 0, 61, 0, 63, 0, 65, 0, 67, 0, 69, 0, 71, 0, 72, 0, 74, 0, 75,
+   0, 77, 0, 78, 0, 79, 0, 111, 0, 85, 0, 63, 0, 46, 0, 31, 0, 19, 0, 9,
+   0, 4, 0, 7, 0, 15, 0, 21, 0, 26, 0, 31, 0, 36, 0, 40, 0, 44, 0, 47,
+   0, 51, 0, 53, 0, 56, 0, 59, 0, 61, 0, 63, 0, 65, 0, 67, 0, 69, 0, 71,
+   0, 72, 0, 74, 0, 75, 0, 77, 0, 78, 0, 79, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 111, 0,
+   85, 0, 63, 0, 46, 0, 31, 0, 19, 0, 9, 0, 2, 2, 0, 7, 0, 15, 0, 21,
+   0, 26, 0, 31, 0, 36, 0, 40, 0, 44, 0, 47, 0, 51, 0, 53, 0, 56, 0, 59,
+   0, 61, 0, 63, 0, 65, 0, 67, 0, 69, 0, 71, 0, 72, 0, 74, 0, 75, 0, 77,
+   0, 78, 0, 79, 111, 0, 85, 0, 63, 0, 46, 0, 31, 0, 19, 0, 9, 0, 2, 2,
+   0, 7, 0, 15, 0, 21, 0, 26, 0, 31, 0, 36, 0, 40, 0, 44, 0, 47, 0, 51,
+   0, 53, 0, 56, 0, 59, 0, 61, 0, 63, 0, 65, 0, 67, 0, 69, 0, 71, 0, 72,
+   0, 74, 0, 75, 0, 77, 0, 78, 0, 79, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 1, 0, 7, 0, 13, 0, 19, 0, 24, 0, 28, 0, 33,
+   0, 37, 0, 40, 0, 44, 0, 47, 0, 50, 0, 52, 0, 55, 0, 57, 0, 59, 0, 61,
+   0, 63, 0, 65, 0, 67, 0, 68, 0, 70, 0, 71, 0, 73, 0, 74, 0, 113, 0, 89,
+   0, 69, 0, 53, 0, 39, 0, 27, 0, 17, 0, 7, 0, 2, 0, 7, 0, 13, 0, 19,
+   0, 24, 0, 28, 0, 33, 0, 37, 0, 40, 0, 44, 0, 47, 0, 50, 0, 52, 0, 55,
+   0, 57, 0, 59, 0, 61, 0, 63, 0, 65, 0, 67, 0, 68, 0, 70, 0, 71, 0, 73,
+   0, 74, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 113, 0, 89, 0, 69, 0, 53, 0, 39, 0, 27, 0,
+   17, 0, 7, 0, 1, 1, 0, 7, 0, 13, 0, 19, 0, 24, 0, 28, 0, 33, 0, 37,
+   0, 40, 0, 44, 0, 47, 0, 50, 0, 52, 0, 55, 0, 57, 0, 59, 0, 61, 0, 63,
+   0, 65, 0, 67, 0, 68, 0, 70, 0, 71, 0, 73, 0, 74, 113, 0, 89, 0, 69, 0,
+   53, 0, 39, 0, 27, 0, 17, 0, 7, 0, 1, 1, 0, 7, 0, 13, 0, 19, 0, 24,
+   0, 28, 0, 33, 0, 37, 0, 40, 0, 44, 0, 47, 0, 50, 0, 52, 0, 55, 0, 57,
+   0, 59, 0, 61, 0, 63, 0, 65, 0, 67, 0, 68, 0, 70, 0, 71, 0, 73, 0, 74,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
+   0, 6, 0, 12, 0, 17, 0, 22, 0, 26, 0, 30, 0, 34, 0, 37, 0, 40, 0, 43,
+   0, 46, 0, 49, 0, 51, 0, 54, 0, 56, 0, 58, 0, 60, 0, 62, 0, 63, 0, 65,
+   0, 66, 0, 68, 0, 69, 0, 114, 0, 92, 0, 74, 0, 58, 0, 45, 0, 34, 0, 23,
+   0, 15, 0, 7, 0, 2, 0, 6, 0, 12, 0, 17, 0, 22, 0, 26, 0, 30, 0, 34,
+   0, 37, 0, 40, 0, 43, 0, 46, 0, 49, 0, 51, 0, 54, 0, 56, 0, 58, 0, 60,
+   0, 62, 0, 63, 0, 65, 0, 66, 0, 68, 0, 69, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 114, 0,
+   92, 0, 74, 0, 58, 0, 45, 0, 34, 0, 23, 0, 15, 0, 7, 0, 1, 1, 0, 6,
+   0, 12, 0, 17, 0, 22, 0, 26, 0, 30, 0, 34, 0, 37, 0, 40, 0, 43, 0, 46,
+   0, 49, 0, 51, 0, 54, 0, 56, 0, 58, 0, 60, 0, 62, 0, 63, 0, 65, 0, 66,
+   0, 68, 0, 69, 114, 0, 92, 0, 74, 0, 58, 0, 45, 0, 34, 0, 23, 0, 15, 0,
+   7, 0, 1, 1, 0, 6, 0, 12, 0, 17, 0, 22, 0, 26, 0, 30, 0, 34, 0, 37,
+   0, 40, 0, 43, 0, 46, 0, 49, 0, 51, 0, 54, 0, 56, 0, 58, 0, 60, 0, 62,
+   0, 63, 0, 65, 0, 66, 0, 68, 0, 69, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 5, 0, 11, 0, 15, 0, 20,
+   0, 24, 0, 28, 0, 31, 0, 35, 0, 38, 0, 41, 0, 43, 0, 46, 0, 48, 0, 51,
+   0, 53, 0, 55, 0, 57, 0, 58, 0, 60, 0, 62, 0, 63, 0, 65, 0, 115, 0, 95,
+   0, 78, 0, 63, 0, 51, 0, 39, 0, 30, 0, 21, 0, 13, 0, 6, 0, 2, 0, 5,
+   0, 11, 0, 15, 0, 20, 0, 24, 0, 28, 0, 31, 0, 35, 0, 38, 0, 41, 0, 43,
+   0, 46, 0, 48, 0, 51, 0, 53, 0, 55, 0, 57, 0, 58, 0, 60, 0, 62, 0, 63,
+   0, 65, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 115, 0, 95, 0, 78, 0, 63, 0, 51, 0, 39, 0,
+   30, 0, 21, 0, 13, 0, 6, 0, 1, 1, 0, 5, 0, 11, 0, 15, 0, 20, 0, 24,
+   0, 28, 0, 31, 0, 35, 0, 38, 0, 41, 0, 43, 0, 46, 0, 48, 0, 51, 0, 53,
+   0, 55, 0, 57, 0, 58, 0, 60, 0, 62, 0, 63, 0, 65, 115, 0, 95, 0, 78, 0,
+   63, 0, 51, 0, 39, 0, 30, 0, 21, 0, 13, 0, 6, 0, 1, 1, 0, 5, 0, 11,
+   0, 15, 0, 20, 0, 24, 0, 28, 0, 31, 0, 35, 0, 38, 0, 41, 0, 43, 0, 46,
+   0, 48, 0, 51, 0, 53, 0, 55, 0, 57, 0, 58, 0, 60, 0, 62, 0, 63, 0, 65,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 1, 0, 5, 0, 10, 0, 14, 0, 18, 0, 22, 0, 26, 0, 29, 0, 32,
+   0, 35, 0, 38, 0, 41, 0, 43, 0, 46, 0, 48, 0, 50, 0, 52, 0, 54, 0, 55,
+   0, 57, 0, 59, 0, 60, 0, 116, 0, 98, 0, 81, 0, 68, 0, 55, 0, 44, 0, 35,
+   0, 26, 0, 19, 0, 12, 0, 5, 0, 2, 0, 5, 0, 10, 0, 14, 0, 18, 0, 22,
+   0, 26, 0, 29, 0, 32, 0, 35, 0, 38, 0, 41, 0, 43, 0, 46, 0, 48, 0, 50,
+   0, 52, 0, 54, 0, 55, 0, 57, 0, 59, 0, 60, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 116, 0,
+   98, 0, 81, 0, 68, 0, 55, 0, 44, 0, 35, 0, 26, 0, 19, 0, 12, 0, 5, 0,
+   1, 1, 0, 5, 0, 10, 0, 14, 0, 18, 0, 22, 0, 26, 0, 29, 0, 32, 0, 35,
+   0, 38, 0, 41, 0, 43, 0, 46, 0, 48, 0, 50, 0, 52, 0, 54, 0, 55, 0, 57,
+   0, 59, 0, 60, 116, 0, 98, 0, 81, 0, 68, 0, 55, 0, 44, 0, 35, 0, 26, 0,
+   19, 0, 12, 0, 5, 0, 1, 1, 0, 5, 0, 10, 0, 14, 0, 18, 0, 22, 0, 26,
+   0, 29, 0, 32, 0, 35, 0, 38, 0, 41, 0, 43, 0, 46, 0, 48, 0, 50, 0, 52,
+   0, 54, 0, 55, 0, 57, 0, 59, 0, 60, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 4, 0, 9,
+   0, 13, 0, 17, 0, 21, 0, 24, 0, 27, 0, 30, 0, 33, 0, 36, 0, 38, 0, 41,
+   0, 43, 0, 45, 0, 47, 0, 49, 0, 51, 0, 53, 0, 55, 0, 56, 0, 117, 0, 100,
+   0, 85, 0, 71, 0, 60, 0, 49, 0, 40, 0, 31, 0, 24, 0, 17, 0, 11, 0, 5,
+   0, 2, 0, 4, 0, 9, 0, 13, 0, 17, 0, 21, 0, 24, 0, 27, 0, 30, 0, 33,
+   0, 36, 0, 38, 0, 41, 0, 43, 0, 45, 0, 47, 0, 49, 0, 51, 0, 53, 0, 55,
+   0, 56, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 117, 0, 100, 0, 85, 0, 71, 0, 60, 0, 49, 0,
+   40, 0, 31, 0, 24, 0, 17, 0, 11, 0, 5, 0, 1, 1, 0, 4, 0, 9, 0, 13,
+   0, 17, 0, 21, 0, 24, 0, 27, 0, 30, 0, 33, 0, 36, 0, 38, 0, 41, 0, 43,
+   0, 45, 0, 47, 0, 49, 0, 51, 0, 53, 0, 55, 0, 56, 117, 0, 100, 0, 85, 0,
+   71, 0, 60, 0, 49, 0, 40, 0, 31, 0, 24, 0, 17, 0, 11, 0, 5, 0, 1, 1,
+   0, 4, 0, 9, 0, 13, 0, 17, 0, 21, 0, 24, 0, 27, 0, 30, 0, 33, 0, 36,
+   0, 38, 0, 41, 0, 43, 0, 45, 0, 47, 0, 49, 0, 51, 0, 53, 0, 55, 0, 56,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 1, 0, 4, 0, 8, 0, 12, 0, 16, 0, 19, 0, 23,
+   0, 26, 0, 29, 0, 31, 0, 34, 0, 36, 0, 39, 0, 41, 0, 43, 0, 45, 0, 47,
+   0, 49, 0, 51, 0, 52, 0, 118, 0, 102, 0, 87, 0, 75, 0, 63, 0, 53, 0, 44,
+   0, 36, 0, 28, 0, 22, 0, 15, 0, 10, 0, 4, 0, 2, 0, 4, 0, 8, 0, 12,
+   0, 16, 0, 19, 0, 23, 0, 26, 0, 29, 0, 31, 0, 34, 0, 36, 0, 39, 0, 41,
+   0, 43, 0, 45, 0, 47, 0, 49, 0, 51, 0, 52, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 118, 0,
+   102, 0, 87, 0, 75, 0, 63, 0, 53, 0, 44, 0, 36, 0, 28, 0, 22, 0, 15, 0,
+   10, 0, 4, 0, 1, 1, 0, 4, 0, 8, 0, 12, 0, 16, 0, 19, 0, 23, 0, 26,
+   0, 29, 0, 31, 0, 34, 0, 36, 0, 39, 0, 41, 0, 43, 0, 45, 0, 47, 0, 49,
+   0, 51, 0, 52, 118, 0, 102, 0, 87, 0, 75, 0, 63, 0, 53, 0, 44, 0, 36, 0,
+   28, 0, 22, 0, 15, 0, 10, 0, 4, 0, 1, 1, 0, 4, 0, 8, 0, 12, 0, 16,
+   0, 19, 0, 23, 0, 26, 0, 29, 0, 31, 0, 34, 0, 36, 0, 39, 0, 41, 0, 43,
+   0, 45, 0, 47, 0, 49, 0, 51, 0, 52, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
+   0, 4, 0, 8, 0, 11, 0, 15, 0, 18, 0, 21, 0, 24, 0, 27, 0, 30, 0, 32,
+   0, 35, 0, 37, 0, 39, 0, 41, 0, 43, 0, 45, 0, 47, 0, 48, 0, 119, 0, 103,
+   0, 89, 0, 77, 0, 67, 0, 57, 0, 48, 0, 40, 0, 33, 0, 26, 0, 20, 0, 14,
+   0, 9, 0, 4, 0, 2, 0, 4, 0, 8, 0, 11, 0, 15, 0, 18, 0, 21, 0, 24,
+   0, 27, 0, 30, 0, 32, 0, 35, 0, 37, 0, 39, 0, 41, 0, 43, 0, 45, 0, 47,
+   0, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 119, 0, 103, 0, 89, 0, 77, 0, 67, 0, 57, 0,
+   48, 0, 40, 0, 33, 0, 26, 0, 20, 0, 14, 0, 9, 0, 4, 0, 1, 1, 0, 4,
+   0, 8, 0, 11, 0, 15, 0, 18, 0, 21, 0, 24, 0, 27, 0, 30, 0, 32, 0, 35,
+   0, 37, 0, 39, 0, 41, 0, 43, 0, 45, 0, 47, 0, 48, 119, 0, 103, 0, 89, 0,
+   77, 0, 67, 0, 57, 0, 48, 0, 40, 0, 33, 0, 26, 0, 20, 0, 14, 0, 9, 0,
+   4, 0, 1, 1, 0, 4, 0, 8, 0, 11, 0, 15, 0, 18, 0, 21, 0, 24, 0, 27,
+   0, 30, 0, 32, 0, 35, 0, 37, 0, 39, 0, 41, 0, 43, 0, 45, 0, 47, 0, 48,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 0, 7, 0, 11, 0, 14,
+   0, 17, 0, 20, 0, 23, 0, 26, 0, 28, 0, 31, 0, 33, 0, 35, 0, 37, 0, 39,
+   0, 41, 0, 43, 0, 45, 0, 119, 0, 105, 0, 92, 0, 80, 0, 70, 0, 60, 0, 52,
+   0, 44, 0, 37, 0, 30, 0, 24, 0, 18, 0, 13, 0, 8, 0, 4, 0, 2, 0, 3,
+   0, 7, 0, 11, 0, 14, 0, 17, 0, 20, 0, 23, 0, 26, 0, 28, 0, 31, 0, 33,
+   0, 35, 0, 37, 0, 39, 0, 41, 0, 43, 0, 45, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 119, 0,
+   105, 0, 92, 0, 80, 0, 70, 0, 60, 0, 52, 0, 44, 0, 37, 0, 30, 0, 24, 0,
+   18, 0, 13, 0, 8, 0, 4, 0, 1, 1, 0, 3, 0, 7, 0, 11, 0, 14, 0, 17,
+   0, 20, 0, 23, 0, 26, 0, 28, 0, 31, 0, 33, 0, 35, 0, 37, 0, 39, 0, 41,
+   0, 43, 0, 45, 119, 0, 105, 0, 92, 0, 80, 0, 70, 0, 60, 0, 52, 0, 44, 0,
+   37, 0, 30, 0, 24, 0, 18, 0, 13, 0, 8, 0, 4, 0, 1, 1, 0, 3, 0, 7,
+   0, 11, 0, 14, 0, 17, 0, 20, 0, 23, 0, 26, 0, 28, 0, 31, 0, 33, 0, 35,
+   0, 37, 0, 39, 0, 41, 0, 43, 0, 45, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 3, 0, 7, 0, 10, 0, 13, 0, 16, 0, 19, 0, 22, 0, 24,
+   0, 27, 0, 29, 0, 31, 0, 34, 0, 36, 0, 37, 0, 39, 0, 41, 0, 120, 0, 106,
+   0, 93, 0, 82, 0, 72, 0, 63, 0, 55, 0, 47, 0, 40, 0, 34, 0, 28, 0, 22,
+   0, 17, 0, 12, 0, 8, 0, 3, 0, 0, 0, 3, 0, 7, 0, 10, 0, 13, 0, 16,
+   0, 19, 0, 22, 0, 24, 0, 27, 0, 29, 0, 31, 0, 34, 0, 36, 0, 37, 0, 39,
+   0, 41, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 120, 0, 106, 0, 93, 0, 82, 0, 72, 0, 63, 0,
+   55, 0, 47, 0, 40, 0, 34, 0, 28, 0, 22, 0, 17, 0, 12, 0, 8, 0, 3, 0,
+   0, 0, 0, 3, 0, 7, 0, 10, 0, 13, 0, 16, 0, 19, 0, 22, 0, 24, 0, 27,
+   0, 29, 0, 31, 0, 34, 0, 36, 0, 37, 0, 39, 0, 41, 120, 0, 106, 0, 93, 0,
+   82, 0, 72, 0, 63, 0, 55, 0, 47, 0, 40, 0, 34, 0, 28, 0, 22, 0, 17, 0,
+   12, 0, 8, 0, 3, 0, 0, 0, 0, 3, 0, 7, 0, 10, 0, 13, 0, 16, 0, 19,
+   0, 22, 0, 24, 0, 27, 0, 29, 0, 31, 0, 34, 0, 36, 0, 37, 0, 39, 0, 41,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 6,
+   0, 10, 0, 13, 0, 15, 0, 18, 0, 21, 0, 23, 0, 26, 0, 28, 0, 30, 0, 32,
+   0, 34, 0, 36, 0, 38, 0, 120, 0, 107, 0, 95, 0, 85, 0, 75, 0, 66, 0, 58,
+   0, 51, 0, 44, 0, 37, 0, 31, 0, 26, 0, 21, 0, 16, 0, 11, 0, 7, 0, 3,
+   0, 0, 0, 3, 0, 6, 0, 10, 0, 13, 0, 15, 0, 18, 0, 21, 0, 23, 0, 26,
+   0, 28, 0, 30, 0, 32, 0, 34, 0, 36, 0, 38, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 120, 0,
+   107, 0, 95, 0, 85, 0, 75, 0, 66, 0, 58, 0, 51, 0, 44, 0, 37, 0, 31, 0,
+   26, 0, 21, 0, 16, 0, 11, 0, 7, 0, 3, 0, 0, 0, 0, 3, 0, 6, 0, 10,
+   0, 13, 0, 15, 0, 18, 0, 21, 0, 23, 0, 26, 0, 28, 0, 30, 0, 32, 0, 34,
+   0, 36, 0, 38, 120, 0, 107, 0, 95, 0, 85, 0, 75, 0, 66, 0, 58, 0, 51, 0,
+   44, 0, 37, 0, 31, 0, 26, 0, 21, 0, 16, 0, 11, 0, 7, 0, 3, 0, 0, 0,
+   0, 3, 0, 6, 0, 10, 0, 13, 0, 15, 0, 18, 0, 21, 0, 23, 0, 26, 0, 28,
+   0, 30, 0, 32, 0, 34, 0, 36, 0, 38, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 6, 0, 9, 0, 12, 0, 15, 0, 17,
+   0, 20, 0, 22, 0, 24, 0, 27, 0, 29, 0, 31, 0, 33, 0, 35, 0, 120, 0, 108,
+   0, 97, 0, 86, 0, 77, 0, 69, 0, 61, 0, 53, 0, 47, 0, 40, 0, 35, 0, 29,
+   0, 24, 0, 19, 0, 15, 0, 11, 0, 7, 0, 3, 0, 0, 0, 3, 0, 6, 0, 9,
+   0, 12, 0, 15, 0, 17, 0, 20, 0, 22, 0, 24, 0, 27, 0, 29, 0, 31, 0, 33,
+   0, 35, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 120, 0, 108, 0, 97, 0, 86, 0, 77, 0, 69, 0,
+   61, 0, 53, 0, 47, 0, 40, 0, 35, 0, 29, 0, 24, 0, 19, 0, 15, 0, 11, 0,
+   7, 0, 3, 0, 0, 0, 0, 3, 0, 6, 0, 9, 0, 12, 0, 15, 0, 17, 0, 20,
+   0, 22, 0, 24, 0, 27, 0, 29, 0, 31, 0, 33, 0, 35, 120, 0, 108, 0, 97, 0,
+   86, 0, 77, 0, 69, 0, 61, 0, 53, 0, 47, 0, 40, 0, 35, 0, 29, 0, 24, 0,
+   19, 0, 15, 0, 11, 0, 7, 0, 3, 0, 0, 0, 0, 3, 0, 6, 0, 9, 0, 12,
+   0, 15, 0, 17, 0, 20, 0, 22, 0, 24, 0, 27, 0, 29, 0, 31, 0, 33, 0, 35,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 3, 0, 6, 0, 9, 0, 11, 0, 14, 0, 17, 0, 19, 0, 21, 0, 23, 0, 26,
+   0, 28, 0, 30, 0, 31, 0, 121, 0, 109, 0, 98, 0, 88, 0, 79, 0, 71, 0, 63,
+   0, 56, 0, 50, 0, 43, 0, 38, 0, 32, 0, 27, 0, 23, 0, 18, 0, 14, 0, 10,
+   0, 6, 0, 3, 0, 0, 0, 3, 0, 6, 0, 9, 0, 11, 0, 14, 0, 17, 0, 19,
+   0, 21, 0, 23, 0, 26, 0, 28, 0, 30, 0, 31, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 121, 0,
+   109, 0, 98, 0, 88, 0, 79, 0, 71, 0, 63, 0, 56, 0, 50, 0, 43, 0, 38, 0,
+   32, 0, 27, 0, 23, 0, 18, 0, 14, 0, 10, 0, 6, 0, 3, 0, 0, 0, 0, 3,
+   0, 6, 0, 9, 0, 11, 0, 14, 0, 17, 0, 19, 0, 21, 0, 23, 0, 26, 0, 28,
+   0, 30, 0, 31, 121, 0, 109, 0, 98, 0, 88, 0, 79, 0, 71, 0, 63, 0, 56, 0,
+   50, 0, 43, 0, 38, 0, 32, 0, 27, 0, 23, 0, 18, 0, 14, 0, 10, 0, 6, 0,
+   3, 0, 0, 0, 0, 3, 0, 6, 0, 9, 0, 11, 0, 14, 0, 17, 0, 19, 0, 21,
+   0, 23, 0, 26, 0, 28, 0, 30, 0, 31, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 5, 0, 8, 0, 11,
+   0, 13, 0, 16, 0, 18, 0, 20, 0, 22, 0, 25, 0, 26, 0, 28, 0, 121, 0, 110,
+   0, 99, 0, 90, 0, 81, 0, 73, 0, 66, 0, 59, 0, 52, 0, 46, 0, 41, 0, 35,
+   0, 30, 0, 26, 0, 21, 0, 17, 0, 13, 0, 10, 0, 6, 0, 3, 0, 0, 0, 3,
+   0, 5, 0, 8, 0, 11, 0, 13, 0, 16, 0, 18, 0, 20, 0, 22, 0, 25, 0, 26,
+   0, 28, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 121, 0, 110, 0, 99, 0, 90, 0, 81, 0, 73, 0,
+   66, 0, 59, 0, 52, 0, 46, 0, 41, 0, 35, 0, 30, 0, 26, 0, 21, 0, 17, 0,
+   13, 0, 10, 0, 6, 0, 3, 0, 0, 0, 0, 3, 0, 5, 0, 8, 0, 11, 0, 13,
+   0, 16, 0, 18, 0, 20, 0, 22, 0, 25, 0, 26, 0, 28, 121, 0, 110, 0, 99, 0,
+   90, 0, 81, 0, 73, 0, 66, 0, 59, 0, 52, 0, 46, 0, 41, 0, 35, 0, 30, 0,
+   26, 0, 21, 0, 17, 0, 13, 0, 10, 0, 6, 0, 3, 0, 0, 0, 0, 3, 0, 5,
+   0, 8, 0, 11, 0, 13, 0, 16, 0, 18, 0, 20, 0, 22, 0, 25, 0, 26, 0, 28,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 2, 0, 5, 0, 8, 0, 10, 0, 13, 0, 15, 0, 17, 0, 20,
+   0, 22, 0, 24, 0, 25, 0, 121, 0, 110, 0, 100, 0, 91, 0, 83, 0, 75, 0, 68,
+   0, 61, 0, 55, 0, 49, 0, 43, 0, 38, 0, 33, 0, 29, 0, 24, 0, 20, 0, 16,
+   0, 13, 0, 9, 0, 6, 0, 3, 0, 0, 0, 2, 0, 5, 0, 8, 0, 10, 0, 13,
+   0, 15, 0, 17, 0, 20, 0, 22, 0, 24, 0, 25, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 121, 0,
+   110, 0, 100, 0, 91, 0, 83, 0, 75, 0, 68, 0, 61, 0, 55, 0, 49, 0, 43, 0,
+   38, 0, 33, 0, 29, 0, 24, 0, 20, 0, 16, 0, 13, 0, 9, 0, 6, 0, 3, 0,
+   0, 0, 0, 2, 0, 5, 0, 8, 0, 10, 0, 13, 0, 15, 0, 17, 0, 20, 0, 22,
+   0, 24, 0, 25, 121, 0, 110, 0, 100, 0, 91, 0, 83, 0, 75, 0, 68, 0, 61, 0,
+   55, 0, 49, 0, 43, 0, 38, 0, 33, 0, 29, 0, 24, 0, 20, 0, 16, 0, 13, 0,
+   9, 0, 6, 0, 3, 0, 0, 0, 0, 2, 0, 5, 0, 8, 0, 10, 0, 13, 0, 15,
+   0, 17, 0, 20, 0, 22, 0, 24, 0, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 5,
+   0, 7, 0, 10, 0, 12, 0, 15, 0, 17, 0, 19, 0, 21, 0, 23, 0, 121, 0, 111,
+   0, 102, 0, 93, 0, 85, 0, 77, 0, 70, 0, 63, 0, 57, 0, 51, 0, 46, 0, 41,
+   0, 36, 0, 31, 0, 27, 0, 23, 0, 19, 0, 15, 0, 12, 0, 9, 0, 5, 0, 2,
+   0, 0, 0, 2, 0, 5, 0, 7, 0, 10, 0, 12, 0, 15, 0, 17, 0, 19, 0, 21,
+   0, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 121, 0, 111, 0, 102, 0, 93, 0, 85, 0, 77, 0,
+   70, 0, 63, 0, 57, 0, 51, 0, 46, 0, 41, 0, 36, 0, 31, 0, 27, 0, 23, 0,
+   19, 0, 15, 0, 12, 0, 9, 0, 5, 0, 2, 0, 0, 0, 0, 2, 0, 5, 0, 7,
+   0, 10, 0, 12, 0, 15, 0, 17, 0, 19, 0, 21, 0, 23, 121, 0, 111, 0, 102, 0,
+   93, 0, 85, 0, 77, 0, 70, 0, 63, 0, 57, 0, 51, 0, 46, 0, 41, 0, 36, 0,
+   31, 0, 27, 0, 23, 0, 19, 0, 15, 0, 12, 0, 9, 0, 5, 0, 2, 0, 0, 0,
+   0, 2, 0, 5, 0, 7, 0, 10, 0, 12, 0, 15, 0, 17, 0, 19, 0, 21, 0, 23,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 5, 0, 7, 0, 10, 0, 12, 0, 14,
+   0, 16, 0, 18, 0, 20, 0, 122, 0, 112, 0, 102, 0, 94, 0, 86, 0, 79, 0, 72,
+   0, 65, 0, 59, 0, 54, 0, 48, 0, 43, 0, 38, 0, 34, 0, 30, 0, 26, 0, 22,
+   0, 18, 0, 15, 0, 11, 0, 8, 0, 5, 0, 2, 0, 0, 0, 2, 0, 5, 0, 7,
+   0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0,
+   112, 0, 102, 0, 94, 0, 86, 0, 79, 0, 72, 0, 65, 0, 59, 0, 54, 0, 48, 0,
+   43, 0, 38, 0, 34, 0, 30, 0, 26, 0, 22, 0, 18, 0, 15, 0, 11, 0, 8, 0,
+   5, 0, 2, 0, 0, 0, 0, 2, 0, 5, 0, 7, 0, 10, 0, 12, 0, 14, 0, 16,
+   0, 18, 0, 20, 122, 0, 112, 0, 102, 0, 94, 0, 86, 0, 79, 0, 72, 0, 65, 0,
+   59, 0, 54, 0, 48, 0, 43, 0, 38, 0, 34, 0, 30, 0, 26, 0, 22, 0, 18, 0,
+   15, 0, 11, 0, 8, 0, 5, 0, 2, 0, 0, 0, 0, 2, 0, 5, 0, 7, 0, 10,
+   0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 2, 0, 5, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0, 17, 0, 122, 0, 112,
+   0, 103, 0, 95, 0, 87, 0, 80, 0, 74, 0, 67, 0, 61, 0, 56, 0, 51, 0, 46,
+   0, 41, 0, 36, 0, 32, 0, 28, 0, 24, 0, 21, 0, 17, 0, 14, 0, 11, 0, 8,
+   0, 5, 0, 2, 0, 0, 0, 2, 0, 5, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15,
+   0, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 112, 0, 103, 0, 95, 0, 87, 0, 80, 0,
+   74, 0, 67, 0, 61, 0, 56, 0, 51, 0, 46, 0, 41, 0, 36, 0, 32, 0, 28, 0,
+   24, 0, 21, 0, 17, 0, 14, 0, 11, 0, 8, 0, 5, 0, 2, 0, 0, 0, 0, 2,
+   0, 5, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0, 17, 122, 0, 112, 0, 103, 0,
+   95, 0, 87, 0, 80, 0, 74, 0, 67, 0, 61, 0, 56, 0, 51, 0, 46, 0, 41, 0,
+   36, 0, 32, 0, 28, 0, 24, 0, 21, 0, 17, 0, 14, 0, 11, 0, 8, 0, 5, 0,
+   2, 0, 0, 0, 0, 2, 0, 5, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0, 17,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 4, 0, 7, 0, 9,
+   0, 11, 0, 13, 0, 15, 0, 122, 0, 113, 0, 104, 0, 96, 0, 89, 0, 82, 0, 75,
+   0, 69, 0, 63, 0, 58, 0, 53, 0, 48, 0, 43, 0, 39, 0, 35, 0, 31, 0, 27,
+   0, 23, 0, 20, 0, 17, 0, 13, 0, 10, 0, 7, 0, 5, 0, 2, 0, 0, 0, 2,
+   0, 4, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0,
+   113, 0, 104, 0, 96, 0, 89, 0, 82, 0, 75, 0, 69, 0, 63, 0, 58, 0, 53, 0,
+   48, 0, 43, 0, 39, 0, 35, 0, 31, 0, 27, 0, 23, 0, 20, 0, 17, 0, 13, 0,
+   10, 0, 7, 0, 5, 0, 2, 0, 0, 0, 0, 2, 0, 4, 0, 7, 0, 9, 0, 11,
+   0, 13, 0, 15, 122, 0, 113, 0, 104, 0, 96, 0, 89, 0, 82, 0, 75, 0, 69, 0,
+   63, 0, 58, 0, 53, 0, 48, 0, 43, 0, 39, 0, 35, 0, 31, 0, 27, 0, 23, 0,
+   20, 0, 17, 0, 13, 0, 10, 0, 7, 0, 5, 0, 2, 0, 0, 0, 0, 2, 0, 4,
+   0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 122, 0, 113,
+   0, 105, 0, 97, 0, 90, 0, 83, 0, 77, 0, 71, 0, 65, 0, 60, 0, 55, 0, 50,
+   0, 45, 0, 41, 0, 37, 0, 33, 0, 29, 0, 26, 0, 22, 0, 19, 0, 16, 0, 13,
+   0, 10, 0, 7, 0, 5, 0, 2, 0, 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 10,
+   0, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 113, 0, 105, 0, 97, 0, 90, 0, 83, 0,
+   77, 0, 71, 0, 65, 0, 60, 0, 55, 0, 50, 0, 45, 0, 41, 0, 37, 0, 33, 0,
+   29, 0, 26, 0, 22, 0, 19, 0, 16, 0, 13, 0, 10, 0, 7, 0, 5, 0, 2, 0,
+   0, 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 122, 0, 113, 0, 105, 0,
+   97, 0, 90, 0, 83, 0, 77, 0, 71, 0, 65, 0, 60, 0, 55, 0, 50, 0, 45, 0,
+   41, 0, 37, 0, 33, 0, 29, 0, 26, 0, 22, 0, 19, 0, 16, 0, 13, 0, 10, 0,
+   7, 0, 5, 0, 2, 0, 0, 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 4,
+   0, 6, 0, 8, 0, 10, 0, 122, 0, 114, 0, 106, 0, 98, 0, 91, 0, 85, 0, 78,
+   0, 72, 0, 67, 0, 62, 0, 57, 0, 52, 0, 47, 0, 43, 0, 39, 0, 35, 0, 31,
+   0, 28, 0, 24, 0, 21, 0, 18, 0, 15, 0, 12, 0, 10, 0, 7, 0, 4, 0, 2,
+   0, 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0,
+   114, 0, 106, 0, 98, 0, 91, 0, 85, 0, 78, 0, 72, 0, 67, 0, 62, 0, 57, 0,
+   52, 0, 47, 0, 43, 0, 39, 0, 35, 0, 31, 0, 28, 0, 24, 0, 21, 0, 18, 0,
+   15, 0, 12, 0, 10, 0, 7, 0, 4, 0, 2, 0, 0, 0, 0, 2, 0, 4, 0, 6,
+   0, 8, 0, 10, 122, 0, 114, 0, 106, 0, 98, 0, 91, 0, 85, 0, 78, 0, 72, 0,
+   67, 0, 62, 0, 57, 0, 52, 0, 47, 0, 43, 0, 39, 0, 35, 0, 31, 0, 28, 0,
+   24, 0, 21, 0, 18, 0, 15, 0, 12, 0, 10, 0, 7, 0, 4, 0, 2, 0, 0, 0,
+   0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 123, 0, 114,
+   0, 106, 0, 99, 0, 92, 0, 86, 0, 80, 0, 74, 0, 68, 0, 63, 0, 58, 0, 54,
+   0, 49, 0, 45, 0, 41, 0, 37, 0, 34, 0, 30, 0, 27, 0, 23, 0, 20, 0, 17,
+   0, 15, 0, 12, 0, 9, 0, 7, 0, 4, 0, 2, 0, 0, 0, 2, 0, 4, 0, 6,
+   0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 114, 0, 106, 0, 99, 0, 92, 0, 86, 0,
+   80, 0, 74, 0, 68, 0, 63, 0, 58, 0, 54, 0, 49, 0, 45, 0, 41, 0, 37, 0,
+   34, 0, 30, 0, 27, 0, 23, 0, 20, 0, 17, 0, 15, 0, 12, 0, 9, 0, 7, 0,
+   4, 0, 2, 0, 0, 0, 0, 2, 0, 4, 0, 6, 0, 8, 123, 0, 114, 0, 106, 0,
+   99, 0, 92, 0, 86, 0, 80, 0, 74, 0, 68, 0, 63, 0, 58, 0, 54, 0, 49, 0,
+   45, 0, 41, 0, 37, 0, 34, 0, 30, 0, 27, 0, 23, 0, 20, 0, 17, 0, 15, 0,
+   12, 0, 9, 0, 7, 0, 4, 0, 2, 0, 0, 0, 0, 2, 0, 4, 0, 6, 0, 8,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 2, 0, 4, 0, 6, 0, 123, 0, 115, 0, 107, 0, 100, 0, 93, 0, 87, 0, 81,
+   0, 75, 0, 70, 0, 65, 0, 60, 0, 55, 0, 51, 0, 47, 0, 43, 0, 39, 0, 36,
+   0, 32, 0, 29, 0, 26, 0, 22, 0, 20, 0, 17, 0, 14, 0, 11, 0, 9, 0, 6,
+   0, 4, 0, 2, 0, 0, 0, 2, 0, 4, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0,
+   115, 0, 107, 0, 100, 0, 93, 0, 87, 0, 81, 0, 75, 0, 70, 0, 65, 0, 60, 0,
+   55, 0, 51, 0, 47, 0, 43, 0, 39, 0, 36, 0, 32, 0, 29, 0, 26, 0, 22, 0,
+   20, 0, 17, 0, 14, 0, 11, 0, 9, 0, 6, 0, 4, 0, 2, 0, 0, 0, 0, 2,
+   0, 4, 0, 6, 123, 0, 115, 0, 107, 0, 100, 0, 93, 0, 87, 0, 81, 0, 75, 0,
+   70, 0, 65, 0, 60, 0, 55, 0, 51, 0, 47, 0, 43, 0, 39, 0, 36, 0, 32, 0,
+   29, 0, 26, 0, 22, 0, 20, 0, 17, 0, 14, 0, 11, 0, 9, 0, 6, 0, 4, 0,
+   2, 0, 0, 0, 0, 2, 0, 4, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 4, 0, 123, 0, 115,
+   0, 108, 0, 101, 0, 94, 0, 88, 0, 82, 0, 77, 0, 71, 0, 66, 0, 62, 0, 57,
+   0, 53, 0, 49, 0, 45, 0, 41, 0, 37, 0, 34, 0, 31, 0, 28, 0, 25, 0, 22,
+   0, 19, 0, 16, 0, 13, 0, 11, 0, 8, 0, 6, 0, 4, 0, 2, 0, 0, 0, 2,
+   0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 115, 0, 108, 0, 101, 0, 94, 0, 88, 0,
+   82, 0, 77, 0, 71, 0, 66, 0, 62, 0, 57, 0, 53, 0, 49, 0, 45, 0, 41, 0,
+   37, 0, 34, 0, 31, 0, 28, 0, 25, 0, 22, 0, 19, 0, 16, 0, 13, 0, 11, 0,
+   8, 0, 6, 0, 4, 0, 2, 0, 0, 0, 0, 2, 0, 4, 123, 0, 115, 0, 108, 0,
+   101, 0, 94, 0, 88, 0, 82, 0, 77, 0, 71, 0, 66, 0, 62, 0, 57, 0, 53, 0,
+   49, 0, 45, 0, 41, 0, 37, 0, 34, 0, 31, 0, 28, 0, 25, 0, 22, 0, 19, 0,
+   16, 0, 13, 0, 11, 0, 8, 0, 6, 0, 4, 0, 2, 0, 0, 0, 0, 2, 0, 4,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 1, 0, 123, 0, 115, 0, 108, 0, 102, 0, 95, 0, 89, 0, 83,
+   0, 78, 0, 73, 0, 68, 0, 63, 0, 59, 0, 55, 0, 51, 0, 47, 0, 43, 0, 39,
+   0, 36, 0, 33, 0, 30, 0, 26, 0, 24, 0, 21, 0, 18, 0, 15, 0, 13, 0, 10,
+   0, 8, 0, 6, 0, 4, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0,
+   115, 0, 108, 0, 102, 0, 95, 0, 89, 0, 83, 0, 78, 0, 73, 0, 68, 0, 63, 0,
+   59, 0, 55, 0, 51, 0, 47, 0, 43, 0, 39, 0, 36, 0, 33, 0, 30, 0, 26, 0,
+   24, 0, 21, 0, 18, 0, 15, 0, 13, 0, 10, 0, 8, 0, 6, 0, 4, 0, 2, 0,
+   0, 0, 0, 1, 123, 0, 115, 0, 108, 0, 102, 0, 95, 0, 89, 0, 83, 0, 78, 0,
+   73, 0, 68, 0, 63, 0, 59, 0, 55, 0, 51, 0, 47, 0, 43, 0, 39, 0, 36, 0,
+   33, 0, 30, 0, 26, 0, 24, 0, 21, 0, 18, 0, 15, 0, 13, 0, 10, 0, 8, 0,
+   6, 0, 4, 0, 2, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 116,
+   0, 109, 0, 102, 0, 96, 0, 90, 0, 85, 0, 79, 0, 74, 0, 69, 0, 65, 0, 60,
+   0, 56, 0, 52, 0, 48, 0, 45, 0, 41, 0, 38, 0, 35, 0, 31, 0, 28, 0, 25,
+   0, 23, 0, 20, 0, 17, 0, 15, 0, 12, 0, 10, 0, 8, 0, 6, 0, 4, 0, 1,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 116, 0, 109, 0, 102, 0, 96, 0, 90, 0,
+   85, 0, 79, 0, 74, 0, 69, 0, 65, 0, 60, 0, 56, 0, 52, 0, 48, 0, 45, 0,
+   41, 0, 38, 0, 35, 0, 31, 0, 28, 0, 25, 0, 23, 0, 20, 0, 17, 0, 15, 0,
+   12, 0, 10, 0, 8, 0, 6, 0, 4, 0, 1, 0, 0, 0, 123, 0, 116, 0, 109, 0,
+   102, 0, 96, 0, 90, 0, 85, 0, 79, 0, 74, 0, 69, 0, 65, 0, 60, 0, 56, 0,
+   52, 0, 48, 0, 45, 0, 41, 0, 38, 0, 35, 0, 31, 0, 28, 0, 25, 0, 23, 0,
+   20, 0, 17, 0, 15, 0, 12, 0, 10, 0, 8, 0, 6, 0, 4, 0, 1, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 31, 0, 63, 0, 85, 0, 95, 0, 102, 0,
+   106, 0, 109, 0, 111, 0, 113, 0, 114, 0, 115, 0, 116, 0, 117, 0, 118, 0,
+      119, 0,
+   119, 0, 120, 0, 120, 0, 120, 0, 121, 0, 121, 0, 121, 0, 121, 0, 122, 0,
+      122, 0,
+   122, 0, 122, 0, 122, 0, 123, 0, 123, 0, 123, 0, 123, 0, 123, 0, 31, 31, 63,
+      0,
+   85, 0, 95, 0, 102, 0, 106, 0, 109, 0, 111, 0, 113, 0, 114, 0, 115, 0, 116,
+      0,
+   117, 0, 118, 0, 119, 0, 119, 0, 120, 0, 120, 0, 120, 0, 121, 0, 121, 0,
+      121, 0,
+   121, 0, 122, 0, 122, 0, 122, 0, 122, 0, 122, 0, 123, 0, 123, 0, 123, 0,
+      123, 0,
+   123, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 62, 0, 63, 0, 85, 0, 95, 0, 102, 0, 106, 0,
+   109, 0, 111, 0, 113, 0, 114, 0, 115, 0, 116, 0, 117, 0, 118, 0, 119, 0,
+      119, 0,
+   120, 0, 120, 0, 120, 0, 121, 0, 121, 0, 121, 0, 121, 0, 122, 0, 122, 0,
+      122, 0,
+   122, 0, 122, 0, 123, 0, 123, 0, 123, 0, 123, 0, 123, 0, 31, 31, 63, 0, 85,
+      0,
+   95, 0, 102, 0, 106, 0, 109, 0, 111, 0, 113, 0, 114, 0, 115, 0, 116, 0, 117,
+      0,
+   118, 0, 119, 0, 119, 0, 120, 0, 120, 0, 120, 0, 121, 0, 121, 0, 121, 0,
+      121, 0,
+   122, 0, 122, 0, 122, 0, 122, 0, 122, 0, 123, 0, 123, 0, 123, 0, 123, 0,
+      123, 0,
+   0, 0, 10, 0, 31, 0, 51, 0, 63, 0, 72, 0, 79, 0, 85, 0, 89, 0, 92, 0,
+   95, 0, 98, 0, 100, 0, 102, 0, 103, 0, 105, 0, 106, 0, 107, 0, 108, 0, 109,
+      0,
+   110, 0, 110, 0, 111, 0, 112, 0, 112, 0, 113, 0, 113, 0, 114, 0, 114, 0,
+      115, 0,
+   115, 0, 115, 0, 116, 0, 0, 63, 10, 10, 31, 0, 51, 0, 63, 0, 72, 0, 79, 0,
+   85, 0, 89, 0, 92, 0, 95, 0, 98, 0, 100, 0, 102, 0, 103, 0, 105, 0, 106, 0,
+   107, 0, 108, 0, 109, 0, 110, 0, 110, 0, 111, 0, 112, 0, 112, 0, 113, 0,
+      113, 0,
+   114, 0, 114, 0, 115, 0, 115, 0, 115, 0, 116, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 63, 0,
+   20, 0, 31, 0, 51, 0, 63, 0, 72, 0, 79, 0, 85, 0, 89, 0, 92, 0, 95, 0,
+   98, 0, 100, 0, 102, 0, 103, 0, 105, 0, 106, 0, 107, 0, 108, 0, 109, 0, 110,
+      0,
+   110, 0, 111, 0, 112, 0, 112, 0, 113, 0, 113, 0, 114, 0, 114, 0, 115, 0,
+      115, 0,
+   115, 0, 116, 0, 0, 63, 10, 10, 31, 0, 51, 0, 63, 0, 72, 0, 79, 0, 85, 0,
+   89, 0, 92, 0, 95, 0, 98, 0, 100, 0, 102, 0, 103, 0, 105, 0, 106, 0, 107, 0,
+   108, 0, 109, 0, 110, 0, 110, 0, 111, 0, 112, 0, 112, 0, 113, 0, 113, 0,
+      114, 0,
+   114, 0, 115, 0, 115, 0, 115, 0, 116, 0, 0, 0, 0, 0, 6, 0, 21, 0, 36, 0,
+   47, 0, 56, 0, 63, 0, 69, 0, 74, 0, 78, 0, 81, 0, 85, 0, 87, 0, 89, 0,
+   92, 0, 93, 0, 95, 0, 97, 0, 98, 0, 99, 0, 100, 0, 102, 0, 102, 0, 103, 0,
+   104, 0, 105, 0, 106, 0, 106, 0, 107, 0, 108, 0, 108, 0, 109, 0, 0, 85, 0,
+      31,
+   6, 6, 21, 0, 36, 0, 47, 0, 56, 0, 63, 0, 69, 0, 74, 0, 78, 0, 81, 0,
+   85, 0, 87, 0, 89, 0, 92, 0, 93, 0, 95, 0, 97, 0, 98, 0, 99, 0, 100, 0,
+   102, 0, 102, 0, 103, 0, 104, 0, 105, 0, 106, 0, 106, 0, 107, 0, 108, 0,
+      108, 0,
+   109, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 85, 0, 31, 0, 12, 0, 21, 0, 36, 0, 47, 0,
+   56, 0, 63, 0, 69, 0, 74, 0, 78, 0, 81, 0, 85, 0, 87, 0, 89, 0, 92, 0,
+   93, 0, 95, 0, 97, 0, 98, 0, 99, 0, 100, 0, 102, 0, 102, 0, 103, 0, 104, 0,
+   105, 0, 106, 0, 106, 0, 107, 0, 108, 0, 108, 0, 109, 0, 0, 85, 0, 31, 6, 6,
+   21, 0, 36, 0, 47, 0, 56, 0, 63, 0, 69, 0, 74, 0, 78, 0, 81, 0, 85, 0,
+   87, 0, 89, 0, 92, 0, 93, 0, 95, 0, 97, 0, 98, 0, 99, 0, 100, 0, 102, 0,
+   102, 0, 103, 0, 104, 0, 105, 0, 106, 0, 106, 0, 107, 0, 108, 0, 108, 0,
+      109, 0,
+   0, 0, 0, 0, 0, 0, 4, 0, 15, 0, 28, 0, 38, 0, 46, 0, 53, 0, 58, 0,
+   63, 0, 68, 0, 71, 0, 75, 0, 77, 0, 80, 0, 82, 0, 85, 0, 86, 0, 88, 0,
+   90, 0, 91, 0, 93, 0, 94, 0, 95, 0, 96, 0, 97, 0, 98, 0, 99, 0, 100, 0,
+   101, 0, 102, 0, 102, 0, 0, 95, 0, 51, 0, 21, 4, 4, 15, 0, 28, 0, 38, 0,
+   46, 0, 53, 0, 58, 0, 63, 0, 68, 0, 71, 0, 75, 0, 77, 0, 80, 0, 82, 0,
+   85, 0, 86, 0, 88, 0, 90, 0, 91, 0, 93, 0, 94, 0, 95, 0, 96, 0, 97, 0,
+   98, 0, 99, 0, 100, 0, 101, 0, 102, 0, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 95, 0,
+   51, 0, 21, 0, 8, 0, 15, 0, 28, 0, 38, 0, 46, 0, 53, 0, 58, 0, 63, 0,
+   68, 0, 71, 0, 75, 0, 77, 0, 80, 0, 82, 0, 85, 0, 86, 0, 88, 0, 90, 0,
+   91, 0, 93, 0, 94, 0, 95, 0, 96, 0, 97, 0, 98, 0, 99, 0, 100, 0, 101, 0,
+   102, 0, 102, 0, 0, 95, 0, 51, 0, 21, 4, 4, 15, 0, 28, 0, 38, 0, 46, 0,
+   53, 0, 58, 0, 63, 0, 68, 0, 71, 0, 75, 0, 77, 0, 80, 0, 82, 0, 85, 0,
+   86, 0, 88, 0, 90, 0, 91, 0, 93, 0, 94, 0, 95, 0, 96, 0, 97, 0, 98, 0,
+   99, 0, 100, 0, 101, 0, 102, 0, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0,
+   12, 0, 23, 0, 31, 0, 39, 0, 45, 0, 51, 0, 55, 0, 60, 0, 63, 0, 67, 0,
+   70, 0, 72, 0, 75, 0, 77, 0, 79, 0, 81, 0, 83, 0, 85, 0, 86, 0, 87, 0,
+   89, 0, 90, 0, 91, 0, 92, 0, 93, 0, 94, 0, 95, 0, 96, 0, 0, 102, 0, 63,
+   0, 36, 0, 15, 3, 3, 12, 0, 23, 0, 31, 0, 39, 0, 45, 0, 51, 0, 55, 0,
+   60, 0, 63, 0, 67, 0, 70, 0, 72, 0, 75, 0, 77, 0, 79, 0, 81, 0, 83, 0,
+   85, 0, 86, 0, 87, 0, 89, 0, 90, 0, 91, 0, 92, 0, 93, 0, 94, 0, 95, 0,
+   96, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 102, 0, 63, 0, 36, 0, 15, 0, 6, 0, 12, 0,
+   23, 0, 31, 0, 39, 0, 45, 0, 51, 0, 55, 0, 60, 0, 63, 0, 67, 0, 70, 0,
+   72, 0, 75, 0, 77, 0, 79, 0, 81, 0, 83, 0, 85, 0, 86, 0, 87, 0, 89, 0,
+   90, 0, 91, 0, 92, 0, 93, 0, 94, 0, 95, 0, 96, 0, 0, 102, 0, 63, 0, 36,
+   0, 15, 3, 3, 12, 0, 23, 0, 31, 0, 39, 0, 45, 0, 51, 0, 55, 0, 60, 0,
+   63, 0, 67, 0, 70, 0, 72, 0, 75, 0, 77, 0, 79, 0, 81, 0, 83, 0, 85, 0,
+   86, 0, 87, 0, 89, 0, 90, 0, 91, 0, 92, 0, 93, 0, 94, 0, 95, 0, 96, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 10, 0, 19, 0, 27, 0, 34, 0,
+   39, 0, 44, 0, 49, 0, 53, 0, 57, 0, 60, 0, 63, 0, 66, 0, 69, 0, 71, 0,
+   73, 0, 75, 0, 77, 0, 79, 0, 80, 0, 82, 0, 83, 0, 85, 0, 86, 0, 87, 0,
+   88, 0, 89, 0, 90, 0, 0, 106, 0, 72, 0, 47, 0, 28, 0, 12, 2, 2, 10, 0,
+   19, 0, 27, 0, 34, 0, 39, 0, 44, 0, 49, 0, 53, 0, 57, 0, 60, 0, 63, 0,
+   66, 0, 69, 0, 71, 0, 73, 0, 75, 0, 77, 0, 79, 0, 80, 0, 82, 0, 83, 0,
+   85, 0, 86, 0, 87, 0, 88, 0, 89, 0, 90, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 106, 0,
+   72, 0, 47, 0, 28, 0, 12, 0, 4, 0, 10, 0, 19, 0, 27, 0, 34, 0, 39, 0,
+   44, 0, 49, 0, 53, 0, 57, 0, 60, 0, 63, 0, 66, 0, 69, 0, 71, 0, 73, 0,
+   75, 0, 77, 0, 79, 0, 80, 0, 82, 0, 83, 0, 85, 0, 86, 0, 87, 0, 88, 0,
+   89, 0, 90, 0, 0, 106, 0, 72, 0, 47, 0, 28, 0, 12, 2, 2, 10, 0, 19, 0,
+   27, 0, 34, 0, 39, 0, 44, 0, 49, 0, 53, 0, 57, 0, 60, 0, 63, 0, 66, 0,
+   69, 0, 71, 0, 73, 0, 75, 0, 77, 0, 79, 0, 80, 0, 82, 0, 83, 0, 85, 0,
+   86, 0, 87, 0, 88, 0, 89, 0, 90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 2, 0, 9, 0, 17, 0, 23, 0, 30, 0, 35, 0, 40, 0, 44, 0, 48, 0,
+   52, 0, 55, 0, 58, 0, 61, 0, 63, 0, 66, 0, 68, 0, 70, 0, 72, 0, 74, 0,
+   75, 0, 77, 0, 78, 0, 80, 0, 81, 0, 82, 0, 83, 0, 85, 0, 0, 109, 0, 79,
+   0, 56, 0, 38, 0, 23, 0, 10, 2, 2, 9, 0, 17, 0, 23, 0, 30, 0, 35, 0,
+   40, 0, 44, 0, 48, 0, 52, 0, 55, 0, 58, 0, 61, 0, 63, 0, 66, 0, 68, 0,
+   70, 0, 72, 0, 74, 0, 75, 0, 77, 0, 78, 0, 80, 0, 81, 0, 82, 0, 83, 0,
+   85, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 109, 0, 79, 0, 56, 0, 38, 0, 23, 0, 10, 0,
+   4, 0, 9, 0, 17, 0, 23, 0, 30, 0, 35, 0, 40, 0, 44, 0, 48, 0, 52, 0,
+   55, 0, 58, 0, 61, 0, 63, 0, 66, 0, 68, 0, 70, 0, 72, 0, 74, 0, 75, 0,
+   77, 0, 78, 0, 80, 0, 81, 0, 82, 0, 83, 0, 85, 0, 0, 109, 0, 79, 0, 56,
+   0, 38, 0, 23, 0, 10, 2, 2, 9, 0, 17, 0, 23, 0, 30, 0, 35, 0, 40, 0,
+   44, 0, 48, 0, 52, 0, 55, 0, 58, 0, 61, 0, 63, 0, 66, 0, 68, 0, 70, 0,
+   72, 0, 74, 0, 75, 0, 77, 0, 78, 0, 80, 0, 81, 0, 82, 0, 83, 0, 85, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 7, 0, 15, 0,
+   21, 0, 26, 0, 31, 0, 36, 0, 40, 0, 44, 0, 47, 0, 51, 0, 53, 0, 56, 0,
+   59, 0, 61, 0, 63, 0, 65, 0, 67, 0, 69, 0, 71, 0, 72, 0, 74, 0, 75, 0,
+   77, 0, 78, 0, 79, 0, 0, 111, 0, 85, 0, 63, 0, 46, 0, 31, 0, 19, 0, 9,
+   2, 2, 7, 0, 15, 0, 21, 0, 26, 0, 31, 0, 36, 0, 40, 0, 44, 0, 47, 0,
+   51, 0, 53, 0, 56, 0, 59, 0, 61, 0, 63, 0, 65, 0, 67, 0, 69, 0, 71, 0,
+   72, 0, 74, 0, 75, 0, 77, 0, 78, 0, 79, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 111, 0,
+   85, 0, 63, 0, 46, 0, 31, 0, 19, 0, 9, 0, 4, 0, 7, 0, 15, 0, 21, 0,
+   26, 0, 31, 0, 36, 0, 40, 0, 44, 0, 47, 0, 51, 0, 53, 0, 56, 0, 59, 0,
+   61, 0, 63, 0, 65, 0, 67, 0, 69, 0, 71, 0, 72, 0, 74, 0, 75, 0, 77, 0,
+   78, 0, 79, 0, 0, 111, 0, 85, 0, 63, 0, 46, 0, 31, 0, 19, 0, 9, 2, 2,
+   7, 0, 15, 0, 21, 0, 26, 0, 31, 0, 36, 0, 40, 0, 44, 0, 47, 0, 51, 0,
+   53, 0, 56, 0, 59, 0, 61, 0, 63, 0, 65, 0, 67, 0, 69, 0, 71, 0, 72, 0,
+   74, 0, 75, 0, 77, 0, 78, 0, 79, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 1, 0, 7, 0, 13, 0, 19, 0, 24, 0, 28, 0, 33, 0,
+   37, 0, 40, 0, 44, 0, 47, 0, 50, 0, 52, 0, 55, 0, 57, 0, 59, 0, 61, 0,
+   63, 0, 65, 0, 67, 0, 68, 0, 70, 0, 71, 0, 73, 0, 74, 0, 0, 113, 0, 89,
+   0, 69, 0, 53, 0, 39, 0, 27, 0, 17, 0, 7, 1, 1, 7, 0, 13, 0, 19, 0,
+   24, 0, 28, 0, 33, 0, 37, 0, 40, 0, 44, 0, 47, 0, 50, 0, 52, 0, 55, 0,
+   57, 0, 59, 0, 61, 0, 63, 0, 65, 0, 67, 0, 68, 0, 70, 0, 71, 0, 73, 0,
+   74, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 113, 0, 89, 0, 69, 0, 53, 0, 39, 0, 27, 0,
+   17, 0, 7, 0, 2, 0, 7, 0, 13, 0, 19, 0, 24, 0, 28, 0, 33, 0, 37, 0,
+   40, 0, 44, 0, 47, 0, 50, 0, 52, 0, 55, 0, 57, 0, 59, 0, 61, 0, 63, 0,
+   65, 0, 67, 0, 68, 0, 70, 0, 71, 0, 73, 0, 74, 0, 0, 113, 0, 89, 0, 69,
+   0, 53, 0, 39, 0, 27, 0, 17, 0, 7, 1, 1, 7, 0, 13, 0, 19, 0, 24, 0,
+   28, 0, 33, 0, 37, 0, 40, 0, 44, 0, 47, 0, 50, 0, 52, 0, 55, 0, 57, 0,
+   59, 0, 61, 0, 63, 0, 65, 0, 67, 0, 68, 0, 70, 0, 71, 0, 73, 0, 74, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
+   6, 0, 12, 0, 17, 0, 22, 0, 26, 0, 30, 0, 34, 0, 37, 0, 40, 0, 43, 0,
+   46, 0, 49, 0, 51, 0, 54, 0, 56, 0, 58, 0, 60, 0, 62, 0, 63, 0, 65, 0,
+   66, 0, 68, 0, 69, 0, 0, 114, 0, 92, 0, 74, 0, 58, 0, 45, 0, 34, 0, 23,
+   0, 15, 0, 7, 1, 1, 6, 0, 12, 0, 17, 0, 22, 0, 26, 0, 30, 0, 34, 0,
+   37, 0, 40, 0, 43, 0, 46, 0, 49, 0, 51, 0, 54, 0, 56, 0, 58, 0, 60, 0,
+   62, 0, 63, 0, 65, 0, 66, 0, 68, 0, 69, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 114, 0,
+   92, 0, 74, 0, 58, 0, 45, 0, 34, 0, 23, 0, 15, 0, 7, 0, 2, 0, 6, 0,
+   12, 0, 17, 0, 22, 0, 26, 0, 30, 0, 34, 0, 37, 0, 40, 0, 43, 0, 46, 0,
+   49, 0, 51, 0, 54, 0, 56, 0, 58, 0, 60, 0, 62, 0, 63, 0, 65, 0, 66, 0,
+   68, 0, 69, 0, 0, 114, 0, 92, 0, 74, 0, 58, 0, 45, 0, 34, 0, 23, 0, 15,
+   0, 7, 1, 1, 6, 0, 12, 0, 17, 0, 22, 0, 26, 0, 30, 0, 34, 0, 37, 0,
+   40, 0, 43, 0, 46, 0, 49, 0, 51, 0, 54, 0, 56, 0, 58, 0, 60, 0, 62, 0,
+   63, 0, 65, 0, 66, 0, 68, 0, 69, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 5, 0, 11, 0, 15, 0, 20, 0,
+   24, 0, 28, 0, 31, 0, 35, 0, 38, 0, 41, 0, 43, 0, 46, 0, 48, 0, 51, 0,
+   53, 0, 55, 0, 57, 0, 58, 0, 60, 0, 62, 0, 63, 0, 65, 0, 0, 115, 0, 95,
+   0, 78, 0, 63, 0, 51, 0, 39, 0, 30, 0, 21, 0, 13, 0, 6, 1, 1, 5, 0,
+   11, 0, 15, 0, 20, 0, 24, 0, 28, 0, 31, 0, 35, 0, 38, 0, 41, 0, 43, 0,
+   46, 0, 48, 0, 51, 0, 53, 0, 55, 0, 57, 0, 58, 0, 60, 0, 62, 0, 63, 0,
+   65, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 115, 0, 95, 0, 78, 0, 63, 0, 51, 0, 39, 0,
+   30, 0, 21, 0, 13, 0, 6, 0, 2, 0, 5, 0, 11, 0, 15, 0, 20, 0, 24, 0,
+   28, 0, 31, 0, 35, 0, 38, 0, 41, 0, 43, 0, 46, 0, 48, 0, 51, 0, 53, 0,
+   55, 0, 57, 0, 58, 0, 60, 0, 62, 0, 63, 0, 65, 0, 0, 115, 0, 95, 0, 78,
+   0, 63, 0, 51, 0, 39, 0, 30, 0, 21, 0, 13, 0, 6, 1, 1, 5, 0, 11, 0,
+   15, 0, 20, 0, 24, 0, 28, 0, 31, 0, 35, 0, 38, 0, 41, 0, 43, 0, 46, 0,
+   48, 0, 51, 0, 53, 0, 55, 0, 57, 0, 58, 0, 60, 0, 62, 0, 63, 0, 65, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 1, 0, 5, 0, 10, 0, 14, 0, 18, 0, 22, 0, 26, 0, 29, 0, 32, 0,
+   35, 0, 38, 0, 41, 0, 43, 0, 46, 0, 48, 0, 50, 0, 52, 0, 54, 0, 55, 0,
+   57, 0, 59, 0, 60, 0, 0, 116, 0, 98, 0, 81, 0, 68, 0, 55, 0, 44, 0, 35,
+   0, 26, 0, 19, 0, 12, 0, 5, 1, 1, 5, 0, 10, 0, 14, 0, 18, 0, 22, 0,
+   26, 0, 29, 0, 32, 0, 35, 0, 38, 0, 41, 0, 43, 0, 46, 0, 48, 0, 50, 0,
+   52, 0, 54, 0, 55, 0, 57, 0, 59, 0, 60, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 116, 0,
+   98, 0, 81, 0, 68, 0, 55, 0, 44, 0, 35, 0, 26, 0, 19, 0, 12, 0, 5, 0,
+   2, 0, 5, 0, 10, 0, 14, 0, 18, 0, 22, 0, 26, 0, 29, 0, 32, 0, 35, 0,
+   38, 0, 41, 0, 43, 0, 46, 0, 48, 0, 50, 0, 52, 0, 54, 0, 55, 0, 57, 0,
+   59, 0, 60, 0, 0, 116, 0, 98, 0, 81, 0, 68, 0, 55, 0, 44, 0, 35, 0, 26,
+   0, 19, 0, 12, 0, 5, 1, 1, 5, 0, 10, 0, 14, 0, 18, 0, 22, 0, 26, 0,
+   29, 0, 32, 0, 35, 0, 38, 0, 41, 0, 43, 0, 46, 0, 48, 0, 50, 0, 52, 0,
+   54, 0, 55, 0, 57, 0, 59, 0, 60, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 4, 0, 9, 0,
+   13, 0, 17, 0, 21, 0, 24, 0, 27, 0, 30, 0, 33, 0, 36, 0, 38, 0, 41, 0,
+   43, 0, 45, 0, 47, 0, 49, 0, 51, 0, 53, 0, 55, 0, 56, 0, 0, 117, 0, 100,
+   0, 85, 0, 71, 0, 60, 0, 49, 0, 40, 0, 31, 0, 24, 0, 17, 0, 11, 0, 5,
+   1, 1, 4, 0, 9, 0, 13, 0, 17, 0, 21, 0, 24, 0, 27, 0, 30, 0, 33, 0,
+   36, 0, 38, 0, 41, 0, 43, 0, 45, 0, 47, 0, 49, 0, 51, 0, 53, 0, 55, 0,
+   56, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 117, 0, 100, 0, 85, 0, 71, 0, 60, 0, 49, 0,
+   40, 0, 31, 0, 24, 0, 17, 0, 11, 0, 5, 0, 2, 0, 4, 0, 9, 0, 13, 0,
+   17, 0, 21, 0, 24, 0, 27, 0, 30, 0, 33, 0, 36, 0, 38, 0, 41, 0, 43, 0,
+   45, 0, 47, 0, 49, 0, 51, 0, 53, 0, 55, 0, 56, 0, 0, 117, 0, 100, 0, 85,
+   0, 71, 0, 60, 0, 49, 0, 40, 0, 31, 0, 24, 0, 17, 0, 11, 0, 5, 1, 1,
+   4, 0, 9, 0, 13, 0, 17, 0, 21, 0, 24, 0, 27, 0, 30, 0, 33, 0, 36, 0,
+   38, 0, 41, 0, 43, 0, 45, 0, 47, 0, 49, 0, 51, 0, 53, 0, 55, 0, 56, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 1, 0, 4, 0, 8, 0, 12, 0, 16, 0, 19, 0, 23, 0,
+   26, 0, 29, 0, 31, 0, 34, 0, 36, 0, 39, 0, 41, 0, 43, 0, 45, 0, 47, 0,
+   49, 0, 51, 0, 52, 0, 0, 118, 0, 102, 0, 87, 0, 75, 0, 63, 0, 53, 0, 44,
+   0, 36, 0, 28, 0, 22, 0, 15, 0, 10, 0, 4, 1, 1, 4, 0, 8, 0, 12, 0,
+   16, 0, 19, 0, 23, 0, 26, 0, 29, 0, 31, 0, 34, 0, 36, 0, 39, 0, 41, 0,
+   43, 0, 45, 0, 47, 0, 49, 0, 51, 0, 52, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 118, 0,
+   102, 0, 87, 0, 75, 0, 63, 0, 53, 0, 44, 0, 36, 0, 28, 0, 22, 0, 15, 0,
+   10, 0, 4, 0, 2, 0, 4, 0, 8, 0, 12, 0, 16, 0, 19, 0, 23, 0, 26, 0,
+   29, 0, 31, 0, 34, 0, 36, 0, 39, 0, 41, 0, 43, 0, 45, 0, 47, 0, 49, 0,
+   51, 0, 52, 0, 0, 118, 0, 102, 0, 87, 0, 75, 0, 63, 0, 53, 0, 44, 0, 36,
+   0, 28, 0, 22, 0, 15, 0, 10, 0, 4, 1, 1, 4, 0, 8, 0, 12, 0, 16, 0,
+   19, 0, 23, 0, 26, 0, 29, 0, 31, 0, 34, 0, 36, 0, 39, 0, 41, 0, 43, 0,
+   45, 0, 47, 0, 49, 0, 51, 0, 52, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
+   4, 0, 8, 0, 11, 0, 15, 0, 18, 0, 21, 0, 24, 0, 27, 0, 30, 0, 32, 0,
+   35, 0, 37, 0, 39, 0, 41, 0, 43, 0, 45, 0, 47, 0, 48, 0, 0, 119, 0, 103,
+   0, 89, 0, 77, 0, 67, 0, 57, 0, 48, 0, 40, 0, 33, 0, 26, 0, 20, 0, 14,
+   0, 9, 0, 4, 1, 1, 4, 0, 8, 0, 11, 0, 15, 0, 18, 0, 21, 0, 24, 0,
+   27, 0, 30, 0, 32, 0, 35, 0, 37, 0, 39, 0, 41, 0, 43, 0, 45, 0, 47, 0,
+   48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 119, 0, 103, 0, 89, 0, 77, 0, 67, 0, 57, 0,
+   48, 0, 40, 0, 33, 0, 26, 0, 20, 0, 14, 0, 9, 0, 4, 0, 2, 0, 4, 0,
+   8, 0, 11, 0, 15, 0, 18, 0, 21, 0, 24, 0, 27, 0, 30, 0, 32, 0, 35, 0,
+   37, 0, 39, 0, 41, 0, 43, 0, 45, 0, 47, 0, 48, 0, 0, 119, 0, 103, 0, 89,
+   0, 77, 0, 67, 0, 57, 0, 48, 0, 40, 0, 33, 0, 26, 0, 20, 0, 14, 0, 9,
+   0, 4, 1, 1, 4, 0, 8, 0, 11, 0, 15, 0, 18, 0, 21, 0, 24, 0, 27, 0,
+   30, 0, 32, 0, 35, 0, 37, 0, 39, 0, 41, 0, 43, 0, 45, 0, 47, 0, 48, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 0, 7, 0, 11, 0, 14, 0,
+   17, 0, 20, 0, 23, 0, 26, 0, 28, 0, 31, 0, 33, 0, 35, 0, 37, 0, 39, 0,
+   41, 0, 43, 0, 45, 0, 0, 119, 0, 105, 0, 92, 0, 80, 0, 70, 0, 60, 0, 52,
+   0, 44, 0, 37, 0, 30, 0, 24, 0, 18, 0, 13, 0, 8, 0, 4, 1, 1, 3, 0,
+   7, 0, 11, 0, 14, 0, 17, 0, 20, 0, 23, 0, 26, 0, 28, 0, 31, 0, 33, 0,
+   35, 0, 37, 0, 39, 0, 41, 0, 43, 0, 45, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 119, 0,
+   105, 0, 92, 0, 80, 0, 70, 0, 60, 0, 52, 0, 44, 0, 37, 0, 30, 0, 24, 0,
+   18, 0, 13, 0, 8, 0, 4, 0, 2, 0, 3, 0, 7, 0, 11, 0, 14, 0, 17, 0,
+   20, 0, 23, 0, 26, 0, 28, 0, 31, 0, 33, 0, 35, 0, 37, 0, 39, 0, 41, 0,
+   43, 0, 45, 0, 0, 119, 0, 105, 0, 92, 0, 80, 0, 70, 0, 60, 0, 52, 0, 44,
+   0, 37, 0, 30, 0, 24, 0, 18, 0, 13, 0, 8, 0, 4, 1, 1, 3, 0, 7, 0,
+   11, 0, 14, 0, 17, 0, 20, 0, 23, 0, 26, 0, 28, 0, 31, 0, 33, 0, 35, 0,
+   37, 0, 39, 0, 41, 0, 43, 0, 45, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 3, 0, 7, 0, 10, 0, 13, 0, 16, 0, 19, 0, 22, 0, 24, 0,
+   27, 0, 29, 0, 31, 0, 34, 0, 36, 0, 37, 0, 39, 0, 41, 0, 0, 120, 0, 106,
+   0, 93, 0, 82, 0, 72, 0, 63, 0, 55, 0, 47, 0, 40, 0, 34, 0, 28, 0, 22,
+   0, 17, 0, 12, 0, 8, 0, 3, 0, 0, 3, 0, 7, 0, 10, 0, 13, 0, 16, 0,
+   19, 0, 22, 0, 24, 0, 27, 0, 29, 0, 31, 0, 34, 0, 36, 0, 37, 0, 39, 0,
+   41, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 120, 0, 106, 0, 93, 0, 82, 0, 72, 0, 63, 0,
+   55, 0, 47, 0, 40, 0, 34, 0, 28, 0, 22, 0, 17, 0, 12, 0, 8, 0, 3, 0,
+   0, 0, 3, 0, 7, 0, 10, 0, 13, 0, 16, 0, 19, 0, 22, 0, 24, 0, 27, 0,
+   29, 0, 31, 0, 34, 0, 36, 0, 37, 0, 39, 0, 41, 0, 0, 120, 0, 106, 0, 93,
+   0, 82, 0, 72, 0, 63, 0, 55, 0, 47, 0, 40, 0, 34, 0, 28, 0, 22, 0, 17,
+   0, 12, 0, 8, 0, 3, 0, 0, 3, 0, 7, 0, 10, 0, 13, 0, 16, 0, 19, 0,
+   22, 0, 24, 0, 27, 0, 29, 0, 31, 0, 34, 0, 36, 0, 37, 0, 39, 0, 41, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 6, 0,
+   10, 0, 13, 0, 15, 0, 18, 0, 21, 0, 23, 0, 26, 0, 28, 0, 30, 0, 32, 0,
+   34, 0, 36, 0, 38, 0, 0, 120, 0, 107, 0, 95, 0, 85, 0, 75, 0, 66, 0, 58,
+   0, 51, 0, 44, 0, 37, 0, 31, 0, 26, 0, 21, 0, 16, 0, 11, 0, 7, 0, 3,
+   0, 0, 3, 0, 6, 0, 10, 0, 13, 0, 15, 0, 18, 0, 21, 0, 23, 0, 26, 0,
+   28, 0, 30, 0, 32, 0, 34, 0, 36, 0, 38, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 120, 0,
+   107, 0, 95, 0, 85, 0, 75, 0, 66, 0, 58, 0, 51, 0, 44, 0, 37, 0, 31, 0,
+   26, 0, 21, 0, 16, 0, 11, 0, 7, 0, 3, 0, 0, 0, 3, 0, 6, 0, 10, 0,
+   13, 0, 15, 0, 18, 0, 21, 0, 23, 0, 26, 0, 28, 0, 30, 0, 32, 0, 34, 0,
+   36, 0, 38, 0, 0, 120, 0, 107, 0, 95, 0, 85, 0, 75, 0, 66, 0, 58, 0, 51,
+   0, 44, 0, 37, 0, 31, 0, 26, 0, 21, 0, 16, 0, 11, 0, 7, 0, 3, 0, 0,
+   3, 0, 6, 0, 10, 0, 13, 0, 15, 0, 18, 0, 21, 0, 23, 0, 26, 0, 28, 0,
+   30, 0, 32, 0, 34, 0, 36, 0, 38, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 6, 0, 9, 0, 12, 0, 15, 0, 17, 0,
+   20, 0, 22, 0, 24, 0, 27, 0, 29, 0, 31, 0, 33, 0, 35, 0, 0, 120, 0, 108,
+   0, 97, 0, 86, 0, 77, 0, 69, 0, 61, 0, 53, 0, 47, 0, 40, 0, 35, 0, 29,
+   0, 24, 0, 19, 0, 15, 0, 11, 0, 7, 0, 3, 0, 0, 3, 0, 6, 0, 9, 0,
+   12, 0, 15, 0, 17, 0, 20, 0, 22, 0, 24, 0, 27, 0, 29, 0, 31, 0, 33, 0,
+   35, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 120, 0, 108, 0, 97, 0, 86, 0, 77, 0, 69, 0,
+   61, 0, 53, 0, 47, 0, 40, 0, 35, 0, 29, 0, 24, 0, 19, 0, 15, 0, 11, 0,
+   7, 0, 3, 0, 0, 0, 3, 0, 6, 0, 9, 0, 12, 0, 15, 0, 17, 0, 20, 0,
+   22, 0, 24, 0, 27, 0, 29, 0, 31, 0, 33, 0, 35, 0, 0, 120, 0, 108, 0, 97,
+   0, 86, 0, 77, 0, 69, 0, 61, 0, 53, 0, 47, 0, 40, 0, 35, 0, 29, 0, 24,
+   0, 19, 0, 15, 0, 11, 0, 7, 0, 3, 0, 0, 3, 0, 6, 0, 9, 0, 12, 0,
+   15, 0, 17, 0, 20, 0, 22, 0, 24, 0, 27, 0, 29, 0, 31, 0, 33, 0, 35, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   3, 0, 6, 0, 9, 0, 11, 0, 14, 0, 17, 0, 19, 0, 21, 0, 23, 0, 26, 0,
+   28, 0, 30, 0, 31, 0, 0, 121, 0, 109, 0, 98, 0, 88, 0, 79, 0, 71, 0, 63,
+   0, 56, 0, 50, 0, 43, 0, 38, 0, 32, 0, 27, 0, 23, 0, 18, 0, 14, 0, 10,
+   0, 6, 0, 3, 0, 0, 3, 0, 6, 0, 9, 0, 11, 0, 14, 0, 17, 0, 19, 0,
+   21, 0, 23, 0, 26, 0, 28, 0, 30, 0, 31, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 121, 0,
+   109, 0, 98, 0, 88, 0, 79, 0, 71, 0, 63, 0, 56, 0, 50, 0, 43, 0, 38, 0,
+   32, 0, 27, 0, 23, 0, 18, 0, 14, 0, 10, 0, 6, 0, 3, 0, 0, 0, 3, 0,
+   6, 0, 9, 0, 11, 0, 14, 0, 17, 0, 19, 0, 21, 0, 23, 0, 26, 0, 28, 0,
+   30, 0, 31, 0, 0, 121, 0, 109, 0, 98, 0, 88, 0, 79, 0, 71, 0, 63, 0, 56,
+   0, 50, 0, 43, 0, 38, 0, 32, 0, 27, 0, 23, 0, 18, 0, 14, 0, 10, 0, 6,
+   0, 3, 0, 0, 3, 0, 6, 0, 9, 0, 11, 0, 14, 0, 17, 0, 19, 0, 21, 0,
+   23, 0, 26, 0, 28, 0, 30, 0, 31, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 5, 0, 8, 0, 11, 0,
+   13, 0, 16, 0, 18, 0, 20, 0, 22, 0, 25, 0, 26, 0, 28, 0, 0, 121, 0, 110,
+   0, 99, 0, 90, 0, 81, 0, 73, 0, 66, 0, 59, 0, 52, 0, 46, 0, 41, 0, 35,
+   0, 30, 0, 26, 0, 21, 0, 17, 0, 13, 0, 10, 0, 6, 0, 3, 0, 0, 3, 0,
+   5, 0, 8, 0, 11, 0, 13, 0, 16, 0, 18, 0, 20, 0, 22, 0, 25, 0, 26, 0,
+   28, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 121, 0, 110, 0, 99, 0, 90, 0, 81, 0, 73, 0,
+   66, 0, 59, 0, 52, 0, 46, 0, 41, 0, 35, 0, 30, 0, 26, 0, 21, 0, 17, 0,
+   13, 0, 10, 0, 6, 0, 3, 0, 0, 0, 3, 0, 5, 0, 8, 0, 11, 0, 13, 0,
+   16, 0, 18, 0, 20, 0, 22, 0, 25, 0, 26, 0, 28, 0, 0, 121, 0, 110, 0, 99,
+   0, 90, 0, 81, 0, 73, 0, 66, 0, 59, 0, 52, 0, 46, 0, 41, 0, 35, 0, 30,
+   0, 26, 0, 21, 0, 17, 0, 13, 0, 10, 0, 6, 0, 3, 0, 0, 3, 0, 5, 0,
+   8, 0, 11, 0, 13, 0, 16, 0, 18, 0, 20, 0, 22, 0, 25, 0, 26, 0, 28, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 2, 0, 5, 0, 8, 0, 10, 0, 13, 0, 15, 0, 17, 0, 20, 0,
+   22, 0, 24, 0, 25, 0, 0, 121, 0, 110, 0, 100, 0, 91, 0, 83, 0, 75, 0, 68,
+   0, 61, 0, 55, 0, 49, 0, 43, 0, 38, 0, 33, 0, 29, 0, 24, 0, 20, 0, 16,
+   0, 13, 0, 9, 0, 6, 0, 3, 0, 0, 2, 0, 5, 0, 8, 0, 10, 0, 13, 0,
+   15, 0, 17, 0, 20, 0, 22, 0, 24, 0, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 121, 0,
+   110, 0, 100, 0, 91, 0, 83, 0, 75, 0, 68, 0, 61, 0, 55, 0, 49, 0, 43, 0,
+   38, 0, 33, 0, 29, 0, 24, 0, 20, 0, 16, 0, 13, 0, 9, 0, 6, 0, 3, 0,
+   0, 0, 2, 0, 5, 0, 8, 0, 10, 0, 13, 0, 15, 0, 17, 0, 20, 0, 22, 0,
+   24, 0, 25, 0, 0, 121, 0, 110, 0, 100, 0, 91, 0, 83, 0, 75, 0, 68, 0, 61,
+   0, 55, 0, 49, 0, 43, 0, 38, 0, 33, 0, 29, 0, 24, 0, 20, 0, 16, 0, 13,
+   0, 9, 0, 6, 0, 3, 0, 0, 2, 0, 5, 0, 8, 0, 10, 0, 13, 0, 15, 0,
+   17, 0, 20, 0, 22, 0, 24, 0, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 5, 0,
+   7, 0, 10, 0, 12, 0, 15, 0, 17, 0, 19, 0, 21, 0, 23, 0, 0, 121, 0, 111,
+   0, 102, 0, 93, 0, 85, 0, 77, 0, 70, 0, 63, 0, 57, 0, 51, 0, 46, 0, 41,
+   0, 36, 0, 31, 0, 27, 0, 23, 0, 19, 0, 15, 0, 12, 0, 9, 0, 5, 0, 2,
+   0, 0, 2, 0, 5, 0, 7, 0, 10, 0, 12, 0, 15, 0, 17, 0, 19, 0, 21, 0,
+   23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 121, 0, 111, 0, 102, 0, 93, 0, 85, 0, 77, 0,
+   70, 0, 63, 0, 57, 0, 51, 0, 46, 0, 41, 0, 36, 0, 31, 0, 27, 0, 23, 0,
+   19, 0, 15, 0, 12, 0, 9, 0, 5, 0, 2, 0, 0, 0, 2, 0, 5, 0, 7, 0,
+   10, 0, 12, 0, 15, 0, 17, 0, 19, 0, 21, 0, 23, 0, 0, 121, 0, 111, 0, 102,
+   0, 93, 0, 85, 0, 77, 0, 70, 0, 63, 0, 57, 0, 51, 0, 46, 0, 41, 0, 36,
+   0, 31, 0, 27, 0, 23, 0, 19, 0, 15, 0, 12, 0, 9, 0, 5, 0, 2, 0, 0,
+   2, 0, 5, 0, 7, 0, 10, 0, 12, 0, 15, 0, 17, 0, 19, 0, 21, 0, 23, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 5, 0, 7, 0, 10, 0, 12, 0, 14, 0,
+   16, 0, 18, 0, 20, 0, 0, 122, 0, 112, 0, 102, 0, 94, 0, 86, 0, 79, 0, 72,
+   0, 65, 0, 59, 0, 54, 0, 48, 0, 43, 0, 38, 0, 34, 0, 30, 0, 26, 0, 22,
+   0, 18, 0, 15, 0, 11, 0, 8, 0, 5, 0, 2, 0, 0, 2, 0, 5, 0, 7, 0,
+   10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0,
+   112, 0, 102, 0, 94, 0, 86, 0, 79, 0, 72, 0, 65, 0, 59, 0, 54, 0, 48, 0,
+   43, 0, 38, 0, 34, 0, 30, 0, 26, 0, 22, 0, 18, 0, 15, 0, 11, 0, 8, 0,
+   5, 0, 2, 0, 0, 0, 2, 0, 5, 0, 7, 0, 10, 0, 12, 0, 14, 0, 16, 0,
+   18, 0, 20, 0, 0, 122, 0, 112, 0, 102, 0, 94, 0, 86, 0, 79, 0, 72, 0, 65,
+   0, 59, 0, 54, 0, 48, 0, 43, 0, 38, 0, 34, 0, 30, 0, 26, 0, 22, 0, 18,
+   0, 15, 0, 11, 0, 8, 0, 5, 0, 2, 0, 0, 2, 0, 5, 0, 7, 0, 10, 0,
+   12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   2, 0, 5, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0, 17, 0, 0, 122, 0, 112,
+   0, 103, 0, 95, 0, 87, 0, 80, 0, 74, 0, 67, 0, 61, 0, 56, 0, 51, 0, 46,
+   0, 41, 0, 36, 0, 32, 0, 28, 0, 24, 0, 21, 0, 17, 0, 14, 0, 11, 0, 8,
+   0, 5, 0, 2, 0, 0, 2, 0, 5, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0,
+   17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 112, 0, 103, 0, 95, 0, 87, 0, 80, 0,
+   74, 0, 67, 0, 61, 0, 56, 0, 51, 0, 46, 0, 41, 0, 36, 0, 32, 0, 28, 0,
+   24, 0, 21, 0, 17, 0, 14, 0, 11, 0, 8, 0, 5, 0, 2, 0, 0, 0, 2, 0,
+   5, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0, 17, 0, 0, 122, 0, 112, 0, 103,
+   0, 95, 0, 87, 0, 80, 0, 74, 0, 67, 0, 61, 0, 56, 0, 51, 0, 46, 0, 41,
+   0, 36, 0, 32, 0, 28, 0, 24, 0, 21, 0, 17, 0, 14, 0, 11, 0, 8, 0, 5,
+   0, 2, 0, 0, 2, 0, 5, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0, 17, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 4, 0, 7, 0, 9, 0,
+   11, 0, 13, 0, 15, 0, 0, 122, 0, 113, 0, 104, 0, 96, 0, 89, 0, 82, 0, 75,
+   0, 69, 0, 63, 0, 58, 0, 53, 0, 48, 0, 43, 0, 39, 0, 35, 0, 31, 0, 27,
+   0, 23, 0, 20, 0, 17, 0, 13, 0, 10, 0, 7, 0, 5, 0, 2, 0, 0, 2, 0,
+   4, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0,
+   113, 0, 104, 0, 96, 0, 89, 0, 82, 0, 75, 0, 69, 0, 63, 0, 58, 0, 53, 0,
+   48, 0, 43, 0, 39, 0, 35, 0, 31, 0, 27, 0, 23, 0, 20, 0, 17, 0, 13, 0,
+   10, 0, 7, 0, 5, 0, 2, 0, 0, 0, 2, 0, 4, 0, 7, 0, 9, 0, 11, 0,
+   13, 0, 15, 0, 0, 122, 0, 113, 0, 104, 0, 96, 0, 89, 0, 82, 0, 75, 0, 69,
+   0, 63, 0, 58, 0, 53, 0, 48, 0, 43, 0, 39, 0, 35, 0, 31, 0, 27, 0, 23,
+   0, 20, 0, 17, 0, 13, 0, 10, 0, 7, 0, 5, 0, 2, 0, 0, 2, 0, 4, 0,
+   7, 0, 9, 0, 11, 0, 13, 0, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 0, 122, 0, 113,
+   0, 105, 0, 97, 0, 90, 0, 83, 0, 77, 0, 71, 0, 65, 0, 60, 0, 55, 0, 50,
+   0, 45, 0, 41, 0, 37, 0, 33, 0, 29, 0, 26, 0, 22, 0, 19, 0, 16, 0, 13,
+   0, 10, 0, 7, 0, 5, 0, 2, 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0,
+   12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 113, 0, 105, 0, 97, 0, 90, 0, 83, 0,
+   77, 0, 71, 0, 65, 0, 60, 0, 55, 0, 50, 0, 45, 0, 41, 0, 37, 0, 33, 0,
+   29, 0, 26, 0, 22, 0, 19, 0, 16, 0, 13, 0, 10, 0, 7, 0, 5, 0, 2, 0,
+   0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 0, 122, 0, 113, 0, 105,
+   0, 97, 0, 90, 0, 83, 0, 77, 0, 71, 0, 65, 0, 60, 0, 55, 0, 50, 0, 45,
+   0, 41, 0, 37, 0, 33, 0, 29, 0, 26, 0, 22, 0, 19, 0, 16, 0, 13, 0, 10,
+   0, 7, 0, 5, 0, 2, 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 4, 0,
+   6, 0, 8, 0, 10, 0, 0, 122, 0, 114, 0, 106, 0, 98, 0, 91, 0, 85, 0, 78,
+   0, 72, 0, 67, 0, 62, 0, 57, 0, 52, 0, 47, 0, 43, 0, 39, 0, 35, 0, 31,
+   0, 28, 0, 24, 0, 21, 0, 18, 0, 15, 0, 12, 0, 10, 0, 7, 0, 4, 0, 2,
+   0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0,
+   114, 0, 106, 0, 98, 0, 91, 0, 85, 0, 78, 0, 72, 0, 67, 0, 62, 0, 57, 0,
+   52, 0, 47, 0, 43, 0, 39, 0, 35, 0, 31, 0, 28, 0, 24, 0, 21, 0, 18, 0,
+   15, 0, 12, 0, 10, 0, 7, 0, 4, 0, 2, 0, 0, 0, 2, 0, 4, 0, 6, 0,
+   8, 0, 10, 0, 0, 122, 0, 114, 0, 106, 0, 98, 0, 91, 0, 85, 0, 78, 0, 72,
+   0, 67, 0, 62, 0, 57, 0, 52, 0, 47, 0, 43, 0, 39, 0, 35, 0, 31, 0, 28,
+   0, 24, 0, 21, 0, 18, 0, 15, 0, 12, 0, 10, 0, 7, 0, 4, 0, 2, 0, 0,
+   2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 0, 123, 0, 114,
+   0, 106, 0, 99, 0, 92, 0, 86, 0, 80, 0, 74, 0, 68, 0, 63, 0, 58, 0, 54,
+   0, 49, 0, 45, 0, 41, 0, 37, 0, 34, 0, 30, 0, 27, 0, 23, 0, 20, 0, 17,
+   0, 15, 0, 12, 0, 9, 0, 7, 0, 4, 0, 2, 0, 0, 2, 0, 4, 0, 6, 0,
+   8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 114, 0, 106, 0, 99, 0, 92, 0, 86, 0,
+   80, 0, 74, 0, 68, 0, 63, 0, 58, 0, 54, 0, 49, 0, 45, 0, 41, 0, 37, 0,
+   34, 0, 30, 0, 27, 0, 23, 0, 20, 0, 17, 0, 15, 0, 12, 0, 9, 0, 7, 0,
+   4, 0, 2, 0, 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 0, 123, 0, 114, 0, 106,
+   0, 99, 0, 92, 0, 86, 0, 80, 0, 74, 0, 68, 0, 63, 0, 58, 0, 54, 0, 49,
+   0, 45, 0, 41, 0, 37, 0, 34, 0, 30, 0, 27, 0, 23, 0, 20, 0, 17, 0, 15,
+   0, 12, 0, 9, 0, 7, 0, 4, 0, 2, 0, 0, 2, 0, 4, 0, 6, 0, 8, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   2, 0, 4, 0, 6, 0, 0, 123, 0, 115, 0, 107, 0, 100, 0, 93, 0, 87, 0, 81,
+   0, 75, 0, 70, 0, 65, 0, 60, 0, 55, 0, 51, 0, 47, 0, 43, 0, 39, 0, 36,
+   0, 32, 0, 29, 0, 26, 0, 22, 0, 20, 0, 17, 0, 14, 0, 11, 0, 9, 0, 6,
+   0, 4, 0, 2, 0, 0, 2, 0, 4, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0,
+   115, 0, 107, 0, 100, 0, 93, 0, 87, 0, 81, 0, 75, 0, 70, 0, 65, 0, 60, 0,
+   55, 0, 51, 0, 47, 0, 43, 0, 39, 0, 36, 0, 32, 0, 29, 0, 26, 0, 22, 0,
+   20, 0, 17, 0, 14, 0, 11, 0, 9, 0, 6, 0, 4, 0, 2, 0, 0, 0, 2, 0,
+   4, 0, 6, 0, 0, 123, 0, 115, 0, 107, 0, 100, 0, 93, 0, 87, 0, 81, 0, 75,
+   0, 70, 0, 65, 0, 60, 0, 55, 0, 51, 0, 47, 0, 43, 0, 39, 0, 36, 0, 32,
+   0, 29, 0, 26, 0, 22, 0, 20, 0, 17, 0, 14, 0, 11, 0, 9, 0, 6, 0, 4,
+   0, 2, 0, 0, 2, 0, 4, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 4, 0, 0, 123, 0, 115,
+   0, 108, 0, 101, 0, 94, 0, 88, 0, 82, 0, 77, 0, 71, 0, 66, 0, 62, 0, 57,
+   0, 53, 0, 49, 0, 45, 0, 41, 0, 37, 0, 34, 0, 31, 0, 28, 0, 25, 0, 22,
+   0, 19, 0, 16, 0, 13, 0, 11, 0, 8, 0, 6, 0, 4, 0, 2, 0, 0, 2, 0,
+   4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 115, 0, 108, 0, 101, 0, 94, 0, 88, 0,
+   82, 0, 77, 0, 71, 0, 66, 0, 62, 0, 57, 0, 53, 0, 49, 0, 45, 0, 41, 0,
+   37, 0, 34, 0, 31, 0, 28, 0, 25, 0, 22, 0, 19, 0, 16, 0, 13, 0, 11, 0,
+   8, 0, 6, 0, 4, 0, 2, 0, 0, 0, 2, 0, 4, 0, 0, 123, 0, 115, 0, 108,
+   0, 101, 0, 94, 0, 88, 0, 82, 0, 77, 0, 71, 0, 66, 0, 62, 0, 57, 0, 53,
+   0, 49, 0, 45, 0, 41, 0, 37, 0, 34, 0, 31, 0, 28, 0, 25, 0, 22, 0, 19,
+   0, 16, 0, 13, 0, 11, 0, 8, 0, 6, 0, 4, 0, 2, 0, 0, 2, 0, 4, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 1, 0, 0, 123, 0, 115, 0, 108, 0, 102, 0, 95, 0, 89, 0, 83,
+   0, 78, 0, 73, 0, 68, 0, 63, 0, 59, 0, 55, 0, 51, 0, 47, 0, 43, 0, 39,
+   0, 36, 0, 33, 0, 30, 0, 26, 0, 24, 0, 21, 0, 18, 0, 15, 0, 13, 0, 10,
+   0, 8, 0, 6, 0, 4, 0, 2, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0,
+   115, 0, 108, 0, 102, 0, 95, 0, 89, 0, 83, 0, 78, 0, 73, 0, 68, 0, 63, 0,
+   59, 0, 55, 0, 51, 0, 47, 0, 43, 0, 39, 0, 36, 0, 33, 0, 30, 0, 26, 0,
+   24, 0, 21, 0, 18, 0, 15, 0, 13, 0, 10, 0, 8, 0, 6, 0, 4, 0, 2, 0,
+   0, 0, 1, 0, 0, 123, 0, 115, 0, 108, 0, 102, 0, 95, 0, 89, 0, 83, 0, 78,
+   0, 73, 0, 68, 0, 63, 0, 59, 0, 55, 0, 51, 0, 47, 0, 43, 0, 39, 0, 36,
+   0, 33, 0, 30, 0, 26, 0, 24, 0, 21, 0, 18, 0, 15, 0, 13, 0, 10, 0, 8,
+   0, 6, 0, 4, 0, 2, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 116,
+   0, 109, 0, 102, 0, 96, 0, 90, 0, 85, 0, 79, 0, 74, 0, 69, 0, 65, 0, 60,
+   0, 56, 0, 52, 0, 48, 0, 45, 0, 41, 0, 38, 0, 35, 0, 31, 0, 28, 0, 25,
+   0, 23, 0, 20, 0, 17, 0, 15, 0, 12, 0, 10, 0, 8, 0, 6, 0, 4, 0, 1,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 116, 0, 109, 0, 102, 0, 96, 0, 90, 0,
+   85, 0, 79, 0, 74, 0, 69, 0, 65, 0, 60, 0, 56, 0, 52, 0, 48, 0, 45, 0,
+   41, 0, 38, 0, 35, 0, 31, 0, 28, 0, 25, 0, 23, 0, 20, 0, 17, 0, 15, 0,
+   12, 0, 10, 0, 8, 0, 6, 0, 4, 0, 1, 0, 0, 0, 0, 123, 0, 116, 0, 109,
+   0, 102, 0, 96, 0, 90, 0, 85, 0, 79, 0, 74, 0, 69, 0, 65, 0, 60, 0, 56,
+   0, 52, 0, 48, 0, 45, 0, 41, 0, 38, 0, 35, 0, 31, 0, 28, 0, 25, 0, 23,
+   0, 20, 0, 17, 0, 15, 0, 12, 0, 10, 0, 8, 0, 6, 0, 4, 0, 1, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 31, 31, 63, 0, 85, 0, 95, 0, 102, 0, 106, 0, 109, 0,
+   111, 0, 113, 0, 114, 0, 115, 0, 116, 0, 117, 0, 118, 0, 119, 0, 119, 0,
+      120, 0,
+   120, 0, 120, 0, 121, 0, 121, 0, 121, 0, 121, 0, 122, 0, 122, 0, 122, 0,
+      122, 0,
+   122, 0, 123, 0, 123, 0, 123, 0, 123, 0, 123, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 31, 31,
+   0, 63, 0, 85, 0, 95, 0, 102, 0, 106, 0, 109, 0, 111, 0, 113, 0, 114, 0,
+      115,
+   0, 116, 0, 117, 0, 118, 0, 119, 0, 119, 0, 120, 0, 120, 0, 120, 0, 121, 0,
+      121,
+   0, 121, 0, 121, 0, 122, 0, 122, 0, 122, 0, 122, 0, 122, 0, 123, 0, 123, 0,
+      123,
+   0, 123, 0, 123, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 63, 10, 10,
+   31, 0, 51, 0, 63, 0, 72, 0, 79, 0, 85, 0, 89, 0, 92, 0, 95, 0, 98, 0,
+   100, 0, 102, 0, 103, 0, 105, 0, 106, 0, 107, 0, 108, 0, 109, 0, 110, 0,
+      110, 0,
+   111, 0, 112, 0, 112, 0, 113, 0, 113, 0, 114, 0, 114, 0, 115, 0, 115, 0,
+      115, 0,
+   116, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 63, 0, 10, 10, 0, 31, 0, 51, 0, 63, 0, 72,
+   0, 79, 0, 85, 0, 89, 0, 92, 0, 95, 0, 98, 0, 100, 0, 102, 0, 103, 0, 105,
+   0, 106, 0, 107, 0, 108, 0, 109, 0, 110, 0, 110, 0, 111, 0, 112, 0, 112, 0,
+      113,
+   0, 113, 0, 114, 0, 114, 0, 115, 0, 115, 0, 115, 0, 116, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 85, 0, 31, 6, 6, 21, 0, 36, 0, 47, 0, 56, 0,
+   63, 0, 69, 0, 74, 0, 78, 0, 81, 0, 85, 0, 87, 0, 89, 0, 92, 0, 93, 0,
+   95, 0, 97, 0, 98, 0, 99, 0, 100, 0, 102, 0, 102, 0, 103, 0, 104, 0, 105, 0,
+   106, 0, 106, 0, 107, 0, 108, 0, 108, 0, 109, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 85, 0,
+   31, 0, 6, 6, 0, 21, 0, 36, 0, 47, 0, 56, 0, 63, 0, 69, 0, 74, 0, 78,
+   0, 81, 0, 85, 0, 87, 0, 89, 0, 92, 0, 93, 0, 95, 0, 97, 0, 98, 0, 99,
+   0, 100, 0, 102, 0, 102, 0, 103, 0, 104, 0, 105, 0, 106, 0, 106, 0, 107, 0,
+      108,
+   0, 108, 0, 109, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 95, 0, 51,
+   0, 21, 4, 4, 15, 0, 28, 0, 38, 0, 46, 0, 53, 0, 58, 0, 63, 0, 68, 0,
+   71, 0, 75, 0, 77, 0, 80, 0, 82, 0, 85, 0, 86, 0, 88, 0, 90, 0, 91, 0,
+   93, 0, 94, 0, 95, 0, 96, 0, 97, 0, 98, 0, 99, 0, 100, 0, 101, 0, 102, 0,
+   102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 95, 0, 51, 0, 21, 0, 4, 4, 0, 15, 0, 28,
+   0, 38, 0, 46, 0, 53, 0, 58, 0, 63, 0, 68, 0, 71, 0, 75, 0, 77, 0, 80,
+   0, 82, 0, 85, 0, 86, 0, 88, 0, 90, 0, 91, 0, 93, 0, 94, 0, 95, 0, 96,
+   0, 97, 0, 98, 0, 99, 0, 100, 0, 101, 0, 102, 0, 102, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 102, 0, 63, 0, 36, 0, 15, 3, 3, 12, 0, 23, 0,
+   31, 0, 39, 0, 45, 0, 51, 0, 55, 0, 60, 0, 63, 0, 67, 0, 70, 0, 72, 0,
+   75, 0, 77, 0, 79, 0, 81, 0, 83, 0, 85, 0, 86, 0, 87, 0, 89, 0, 90, 0,
+   91, 0, 92, 0, 93, 0, 94, 0, 95, 0, 96, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 102, 0,
+   63, 0, 36, 0, 15, 0, 3, 3, 0, 12, 0, 23, 0, 31, 0, 39, 0, 45, 0, 51,
+   0, 55, 0, 60, 0, 63, 0, 67, 0, 70, 0, 72, 0, 75, 0, 77, 0, 79, 0, 81,
+   0, 83, 0, 85, 0, 86, 0, 87, 0, 89, 0, 90, 0, 91, 0, 92, 0, 93, 0, 94,
+   0, 95, 0, 96, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 106, 0, 72,
+   0, 47, 0, 28, 0, 12, 2, 2, 10, 0, 19, 0, 27, 0, 34, 0, 39, 0, 44, 0,
+   49, 0, 53, 0, 57, 0, 60, 0, 63, 0, 66, 0, 69, 0, 71, 0, 73, 0, 75, 0,
+   77, 0, 79, 0, 80, 0, 82, 0, 83, 0, 85, 0, 86, 0, 87, 0, 88, 0, 89, 0,
+   90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 106, 0, 72, 0, 47, 0, 28, 0, 12, 0, 2, 2,
+   0, 10, 0, 19, 0, 27, 0, 34, 0, 39, 0, 44, 0, 49, 0, 53, 0, 57, 0, 60,
+   0, 63, 0, 66, 0, 69, 0, 71, 0, 73, 0, 75, 0, 77, 0, 79, 0, 80, 0, 82,
+   0, 83, 0, 85, 0, 86, 0, 87, 0, 88, 0, 89, 0, 90, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 109, 0, 79, 0, 56, 0, 38, 0, 23, 0, 10, 2, 2,
+   9, 0, 17, 0, 23, 0, 30, 0, 35, 0, 40, 0, 44, 0, 48, 0, 52, 0, 55, 0,
+   58, 0, 61, 0, 63, 0, 66, 0, 68, 0, 70, 0, 72, 0, 74, 0, 75, 0, 77, 0,
+   78, 0, 80, 0, 81, 0, 82, 0, 83, 0, 85, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 109, 0,
+   79, 0, 56, 0, 38, 0, 23, 0, 10, 0, 2, 2, 0, 9, 0, 17, 0, 23, 0, 30,
+   0, 35, 0, 40, 0, 44, 0, 48, 0, 52, 0, 55, 0, 58, 0, 61, 0, 63, 0, 66,
+   0, 68, 0, 70, 0, 72, 0, 74, 0, 75, 0, 77, 0, 78, 0, 80, 0, 81, 0, 82,
+   0, 83, 0, 85, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 111, 0, 85,
+   0, 63, 0, 46, 0, 31, 0, 19, 0, 9, 2, 2, 7, 0, 15, 0, 21, 0, 26, 0,
+   31, 0, 36, 0, 40, 0, 44, 0, 47, 0, 51, 0, 53, 0, 56, 0, 59, 0, 61, 0,
+   63, 0, 65, 0, 67, 0, 69, 0, 71, 0, 72, 0, 74, 0, 75, 0, 77, 0, 78, 0,
+   79, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 111, 0, 85, 0, 63, 0, 46, 0, 31, 0, 19, 0,
+   9, 0, 2, 2, 0, 7, 0, 15, 0, 21, 0, 26, 0, 31, 0, 36, 0, 40, 0, 44,
+   0, 47, 0, 51, 0, 53, 0, 56, 0, 59, 0, 61, 0, 63, 0, 65, 0, 67, 0, 69,
+   0, 71, 0, 72, 0, 74, 0, 75, 0, 77, 0, 78, 0, 79, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 113, 0, 89, 0, 69, 0, 53, 0, 39, 0, 27, 0, 17,
+   0, 7, 1, 1, 7, 0, 13, 0, 19, 0, 24, 0, 28, 0, 33, 0, 37, 0, 40, 0,
+   44, 0, 47, 0, 50, 0, 52, 0, 55, 0, 57, 0, 59, 0, 61, 0, 63, 0, 65, 0,
+   67, 0, 68, 0, 70, 0, 71, 0, 73, 0, 74, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 113, 0,
+   89, 0, 69, 0, 53, 0, 39, 0, 27, 0, 17, 0, 7, 0, 1, 1, 0, 7, 0, 13,
+   0, 19, 0, 24, 0, 28, 0, 33, 0, 37, 0, 40, 0, 44, 0, 47, 0, 50, 0, 52,
+   0, 55, 0, 57, 0, 59, 0, 61, 0, 63, 0, 65, 0, 67, 0, 68, 0, 70, 0, 71,
+   0, 73, 0, 74, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 114, 0, 92,
+   0, 74, 0, 58, 0, 45, 0, 34, 0, 23, 0, 15, 0, 7, 1, 1, 6, 0, 12, 0,
+   17, 0, 22, 0, 26, 0, 30, 0, 34, 0, 37, 0, 40, 0, 43, 0, 46, 0, 49, 0,
+   51, 0, 54, 0, 56, 0, 58, 0, 60, 0, 62, 0, 63, 0, 65, 0, 66, 0, 68, 0,
+   69, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 114, 0, 92, 0, 74, 0, 58, 0, 45, 0, 34, 0,
+   23, 0, 15, 0, 7, 0, 1, 1, 0, 6, 0, 12, 0, 17, 0, 22, 0, 26, 0, 30,
+   0, 34, 0, 37, 0, 40, 0, 43, 0, 46, 0, 49, 0, 51, 0, 54, 0, 56, 0, 58,
+   0, 60, 0, 62, 0, 63, 0, 65, 0, 66, 0, 68, 0, 69, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 115, 0, 95, 0, 78, 0, 63, 0, 51, 0, 39, 0, 30,
+   0, 21, 0, 13, 0, 6, 1, 1, 5, 0, 11, 0, 15, 0, 20, 0, 24, 0, 28, 0,
+   31, 0, 35, 0, 38, 0, 41, 0, 43, 0, 46, 0, 48, 0, 51, 0, 53, 0, 55, 0,
+   57, 0, 58, 0, 60, 0, 62, 0, 63, 0, 65, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 115, 0,
+   95, 0, 78, 0, 63, 0, 51, 0, 39, 0, 30, 0, 21, 0, 13, 0, 6, 0, 1, 1,
+   0, 5, 0, 11, 0, 15, 0, 20, 0, 24, 0, 28, 0, 31, 0, 35, 0, 38, 0, 41,
+   0, 43, 0, 46, 0, 48, 0, 51, 0, 53, 0, 55, 0, 57, 0, 58, 0, 60, 0, 62,
+   0, 63, 0, 65, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 116, 0, 98,
+   0, 81, 0, 68, 0, 55, 0, 44, 0, 35, 0, 26, 0, 19, 0, 12, 0, 5, 1, 1,
+   5, 0, 10, 0, 14, 0, 18, 0, 22, 0, 26, 0, 29, 0, 32, 0, 35, 0, 38, 0,
+   41, 0, 43, 0, 46, 0, 48, 0, 50, 0, 52, 0, 54, 0, 55, 0, 57, 0, 59, 0,
+   60, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 116, 0, 98, 0, 81, 0, 68, 0, 55, 0, 44, 0,
+   35, 0, 26, 0, 19, 0, 12, 0, 5, 0, 1, 1, 0, 5, 0, 10, 0, 14, 0, 18,
+   0, 22, 0, 26, 0, 29, 0, 32, 0, 35, 0, 38, 0, 41, 0, 43, 0, 46, 0, 48,
+   0, 50, 0, 52, 0, 54, 0, 55, 0, 57, 0, 59, 0, 60, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 117, 0, 100, 0, 85, 0, 71, 0, 60, 0, 49, 0, 40,
+   0, 31, 0, 24, 0, 17, 0, 11, 0, 5, 1, 1, 4, 0, 9, 0, 13, 0, 17, 0,
+   21, 0, 24, 0, 27, 0, 30, 0, 33, 0, 36, 0, 38, 0, 41, 0, 43, 0, 45, 0,
+   47, 0, 49, 0, 51, 0, 53, 0, 55, 0, 56, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 117, 0,
+   100, 0, 85, 0, 71, 0, 60, 0, 49, 0, 40, 0, 31, 0, 24, 0, 17, 0, 11, 0,
+   5, 0, 1, 1, 0, 4, 0, 9, 0, 13, 0, 17, 0, 21, 0, 24, 0, 27, 0, 30,
+   0, 33, 0, 36, 0, 38, 0, 41, 0, 43, 0, 45, 0, 47, 0, 49, 0, 51, 0, 53,
+   0, 55, 0, 56, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 118, 0, 102,
+   0, 87, 0, 75, 0, 63, 0, 53, 0, 44, 0, 36, 0, 28, 0, 22, 0, 15, 0, 10,
+   0, 4, 1, 1, 4, 0, 8, 0, 12, 0, 16, 0, 19, 0, 23, 0, 26, 0, 29, 0,
+   31, 0, 34, 0, 36, 0, 39, 0, 41, 0, 43, 0, 45, 0, 47, 0, 49, 0, 51, 0,
+   52, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 118, 0, 102, 0, 87, 0, 75, 0, 63, 0, 53, 0,
+   44, 0, 36, 0, 28, 0, 22, 0, 15, 0, 10, 0, 4, 0, 1, 1, 0, 4, 0, 8,
+   0, 12, 0, 16, 0, 19, 0, 23, 0, 26, 0, 29, 0, 31, 0, 34, 0, 36, 0, 39,
+   0, 41, 0, 43, 0, 45, 0, 47, 0, 49, 0, 51, 0, 52, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 119, 0, 103, 0, 89, 0, 77, 0, 67, 0, 57, 0, 48,
+   0, 40, 0, 33, 0, 26, 0, 20, 0, 14, 0, 9, 0, 4, 1, 1, 4, 0, 8, 0,
+   11, 0, 15, 0, 18, 0, 21, 0, 24, 0, 27, 0, 30, 0, 32, 0, 35, 0, 37, 0,
+   39, 0, 41, 0, 43, 0, 45, 0, 47, 0, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 119, 0,
+   103, 0, 89, 0, 77, 0, 67, 0, 57, 0, 48, 0, 40, 0, 33, 0, 26, 0, 20, 0,
+   14, 0, 9, 0, 4, 0, 1, 1, 0, 4, 0, 8, 0, 11, 0, 15, 0, 18, 0, 21,
+   0, 24, 0, 27, 0, 30, 0, 32, 0, 35, 0, 37, 0, 39, 0, 41, 0, 43, 0, 45,
+   0, 47, 0, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 119, 0, 105,
+   0, 92, 0, 80, 0, 70, 0, 60, 0, 52, 0, 44, 0, 37, 0, 30, 0, 24, 0, 18,
+   0, 13, 0, 8, 0, 4, 1, 1, 3, 0, 7, 0, 11, 0, 14, 0, 17, 0, 20, 0,
+   23, 0, 26, 0, 28, 0, 31, 0, 33, 0, 35, 0, 37, 0, 39, 0, 41, 0, 43, 0,
+   45, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 119, 0, 105, 0, 92, 0, 80, 0, 70, 0, 60, 0,
+   52, 0, 44, 0, 37, 0, 30, 0, 24, 0, 18, 0, 13, 0, 8, 0, 4, 0, 1, 1,
+   0, 3, 0, 7, 0, 11, 0, 14, 0, 17, 0, 20, 0, 23, 0, 26, 0, 28, 0, 31,
+   0, 33, 0, 35, 0, 37, 0, 39, 0, 41, 0, 43, 0, 45, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 120, 0, 106, 0, 93, 0, 82, 0, 72, 0, 63, 0, 55,
+   0, 47, 0, 40, 0, 34, 0, 28, 0, 22, 0, 17, 0, 12, 0, 8, 0, 3, 0, 0,
+   3, 0, 7, 0, 10, 0, 13, 0, 16, 0, 19, 0, 22, 0, 24, 0, 27, 0, 29, 0,
+   31, 0, 34, 0, 36, 0, 37, 0, 39, 0, 41, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 120, 0,
+   106, 0, 93, 0, 82, 0, 72, 0, 63, 0, 55, 0, 47, 0, 40, 0, 34, 0, 28, 0,
+   22, 0, 17, 0, 12, 0, 8, 0, 3, 0, 0, 0, 0, 3, 0, 7, 0, 10, 0, 13,
+   0, 16, 0, 19, 0, 22, 0, 24, 0, 27, 0, 29, 0, 31, 0, 34, 0, 36, 0, 37,
+   0, 39, 0, 41, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 120, 0, 107,
+   0, 95, 0, 85, 0, 75, 0, 66, 0, 58, 0, 51, 0, 44, 0, 37, 0, 31, 0, 26,
+   0, 21, 0, 16, 0, 11, 0, 7, 0, 3, 0, 0, 3, 0, 6, 0, 10, 0, 13, 0,
+   15, 0, 18, 0, 21, 0, 23, 0, 26, 0, 28, 0, 30, 0, 32, 0, 34, 0, 36, 0,
+   38, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 120, 0, 107, 0, 95, 0, 85, 0, 75, 0, 66, 0,
+   58, 0, 51, 0, 44, 0, 37, 0, 31, 0, 26, 0, 21, 0, 16, 0, 11, 0, 7, 0,
+   3, 0, 0, 0, 0, 3, 0, 6, 0, 10, 0, 13, 0, 15, 0, 18, 0, 21, 0, 23,
+   0, 26, 0, 28, 0, 30, 0, 32, 0, 34, 0, 36, 0, 38, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 120, 0, 108, 0, 97, 0, 86, 0, 77, 0, 69, 0, 61,
+   0, 53, 0, 47, 0, 40, 0, 35, 0, 29, 0, 24, 0, 19, 0, 15, 0, 11, 0, 7,
+   0, 3, 0, 0, 3, 0, 6, 0, 9, 0, 12, 0, 15, 0, 17, 0, 20, 0, 22, 0,
+   24, 0, 27, 0, 29, 0, 31, 0, 33, 0, 35, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 120, 0,
+   108, 0, 97, 0, 86, 0, 77, 0, 69, 0, 61, 0, 53, 0, 47, 0, 40, 0, 35, 0,
+   29, 0, 24, 0, 19, 0, 15, 0, 11, 0, 7, 0, 3, 0, 0, 0, 0, 3, 0, 6,
+   0, 9, 0, 12, 0, 15, 0, 17, 0, 20, 0, 22, 0, 24, 0, 27, 0, 29, 0, 31,
+   0, 33, 0, 35, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 121, 0, 109,
+   0, 98, 0, 88, 0, 79, 0, 71, 0, 63, 0, 56, 0, 50, 0, 43, 0, 38, 0, 32,
+   0, 27, 0, 23, 0, 18, 0, 14, 0, 10, 0, 6, 0, 3, 0, 0, 3, 0, 6, 0,
+   9, 0, 11, 0, 14, 0, 17, 0, 19, 0, 21, 0, 23, 0, 26, 0, 28, 0, 30, 0,
+   31, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 121, 0, 109, 0, 98, 0, 88, 0, 79, 0, 71, 0,
+   63, 0, 56, 0, 50, 0, 43, 0, 38, 0, 32, 0, 27, 0, 23, 0, 18, 0, 14, 0,
+   10, 0, 6, 0, 3, 0, 0, 0, 0, 3, 0, 6, 0, 9, 0, 11, 0, 14, 0, 17,
+   0, 19, 0, 21, 0, 23, 0, 26, 0, 28, 0, 30, 0, 31, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 121, 0, 110, 0, 99, 0, 90, 0, 81, 0, 73, 0, 66,
+   0, 59, 0, 52, 0, 46, 0, 41, 0, 35, 0, 30, 0, 26, 0, 21, 0, 17, 0, 13,
+   0, 10, 0, 6, 0, 3, 0, 0, 3, 0, 5, 0, 8, 0, 11, 0, 13, 0, 16, 0,
+   18, 0, 20, 0, 22, 0, 25, 0, 26, 0, 28, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 121, 0,
+   110, 0, 99, 0, 90, 0, 81, 0, 73, 0, 66, 0, 59, 0, 52, 0, 46, 0, 41, 0,
+   35, 0, 30, 0, 26, 0, 21, 0, 17, 0, 13, 0, 10, 0, 6, 0, 3, 0, 0, 0,
+   0, 3, 0, 5, 0, 8, 0, 11, 0, 13, 0, 16, 0, 18, 0, 20, 0, 22, 0, 25,
+   0, 26, 0, 28, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 121, 0, 110,
+   0, 100, 0, 91, 0, 83, 0, 75, 0, 68, 0, 61, 0, 55, 0, 49, 0, 43, 0, 38,
+   0, 33, 0, 29, 0, 24, 0, 20, 0, 16, 0, 13, 0, 9, 0, 6, 0, 3, 0, 0,
+   2, 0, 5, 0, 8, 0, 10, 0, 13, 0, 15, 0, 17, 0, 20, 0, 22, 0, 24, 0,
+   25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 121, 0, 110, 0, 100, 0, 91, 0, 83, 0, 75, 0,
+   68, 0, 61, 0, 55, 0, 49, 0, 43, 0, 38, 0, 33, 0, 29, 0, 24, 0, 20, 0,
+   16, 0, 13, 0, 9, 0, 6, 0, 3, 0, 0, 0, 0, 2, 0, 5, 0, 8, 0, 10,
+   0, 13, 0, 15, 0, 17, 0, 20, 0, 22, 0, 24, 0, 25, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 121, 0, 111, 0, 102, 0, 93, 0, 85, 0, 77, 0, 70,
+   0, 63, 0, 57, 0, 51, 0, 46, 0, 41, 0, 36, 0, 31, 0, 27, 0, 23, 0, 19,
+   0, 15, 0, 12, 0, 9, 0, 5, 0, 2, 0, 0, 2, 0, 5, 0, 7, 0, 10, 0,
+   12, 0, 15, 0, 17, 0, 19, 0, 21, 0, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 121, 0,
+   111, 0, 102, 0, 93, 0, 85, 0, 77, 0, 70, 0, 63, 0, 57, 0, 51, 0, 46, 0,
+   41, 0, 36, 0, 31, 0, 27, 0, 23, 0, 19, 0, 15, 0, 12, 0, 9, 0, 5, 0,
+   2, 0, 0, 0, 0, 2, 0, 5, 0, 7, 0, 10, 0, 12, 0, 15, 0, 17, 0, 19,
+   0, 21, 0, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 112,
+   0, 102, 0, 94, 0, 86, 0, 79, 0, 72, 0, 65, 0, 59, 0, 54, 0, 48, 0, 43,
+   0, 38, 0, 34, 0, 30, 0, 26, 0, 22, 0, 18, 0, 15, 0, 11, 0, 8, 0, 5,
+   0, 2, 0, 0, 2, 0, 5, 0, 7, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0,
+   20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 112, 0, 102, 0, 94, 0, 86, 0, 79, 0,
+   72, 0, 65, 0, 59, 0, 54, 0, 48, 0, 43, 0, 38, 0, 34, 0, 30, 0, 26, 0,
+   22, 0, 18, 0, 15, 0, 11, 0, 8, 0, 5, 0, 2, 0, 0, 0, 0, 2, 0, 5,
+   0, 7, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 122, 0, 112, 0, 103, 0, 95, 0, 87, 0, 80, 0, 74,
+   0, 67, 0, 61, 0, 56, 0, 51, 0, 46, 0, 41, 0, 36, 0, 32, 0, 28, 0, 24,
+   0, 21, 0, 17, 0, 14, 0, 11, 0, 8, 0, 5, 0, 2, 0, 0, 2, 0, 5, 0,
+   7, 0, 9, 0, 11, 0, 13, 0, 15, 0, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0,
+   112, 0, 103, 0, 95, 0, 87, 0, 80, 0, 74, 0, 67, 0, 61, 0, 56, 0, 51, 0,
+   46, 0, 41, 0, 36, 0, 32, 0, 28, 0, 24, 0, 21, 0, 17, 0, 14, 0, 11, 0,
+   8, 0, 5, 0, 2, 0, 0, 0, 0, 2, 0, 5, 0, 7, 0, 9, 0, 11, 0, 13,
+   0, 15, 0, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 113,
+   0, 104, 0, 96, 0, 89, 0, 82, 0, 75, 0, 69, 0, 63, 0, 58, 0, 53, 0, 48,
+   0, 43, 0, 39, 0, 35, 0, 31, 0, 27, 0, 23, 0, 20, 0, 17, 0, 13, 0, 10,
+   0, 7, 0, 5, 0, 2, 0, 0, 2, 0, 4, 0, 7, 0, 9, 0, 11, 0, 13, 0,
+   15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 113, 0, 104, 0, 96, 0, 89, 0, 82, 0,
+   75, 0, 69, 0, 63, 0, 58, 0, 53, 0, 48, 0, 43, 0, 39, 0, 35, 0, 31, 0,
+   27, 0, 23, 0, 20, 0, 17, 0, 13, 0, 10, 0, 7, 0, 5, 0, 2, 0, 0, 0,
+   0, 2, 0, 4, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 122, 0, 113, 0, 105, 0, 97, 0, 90, 0, 83, 0, 77,
+   0, 71, 0, 65, 0, 60, 0, 55, 0, 50, 0, 45, 0, 41, 0, 37, 0, 33, 0, 29,
+   0, 26, 0, 22, 0, 19, 0, 16, 0, 13, 0, 10, 0, 7, 0, 5, 0, 2, 0, 0,
+   2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0,
+   113, 0, 105, 0, 97, 0, 90, 0, 83, 0, 77, 0, 71, 0, 65, 0, 60, 0, 55, 0,
+   50, 0, 45, 0, 41, 0, 37, 0, 33, 0, 29, 0, 26, 0, 22, 0, 19, 0, 16, 0,
+   13, 0, 10, 0, 7, 0, 5, 0, 2, 0, 0, 0, 0, 2, 0, 4, 0, 6, 0, 8,
+   0, 10, 0, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 114,
+   0, 106, 0, 98, 0, 91, 0, 85, 0, 78, 0, 72, 0, 67, 0, 62, 0, 57, 0, 52,
+   0, 47, 0, 43, 0, 39, 0, 35, 0, 31, 0, 28, 0, 24, 0, 21, 0, 18, 0, 15,
+   0, 12, 0, 10, 0, 7, 0, 4, 0, 2, 0, 0, 2, 0, 4, 0, 6, 0, 8, 0,
+   10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 114, 0, 106, 0, 98, 0, 91, 0, 85, 0,
+   78, 0, 72, 0, 67, 0, 62, 0, 57, 0, 52, 0, 47, 0, 43, 0, 39, 0, 35, 0,
+   31, 0, 28, 0, 24, 0, 21, 0, 18, 0, 15, 0, 12, 0, 10, 0, 7, 0, 4, 0,
+   2, 0, 0, 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 123, 0, 114, 0, 106, 0, 99, 0, 92, 0, 86, 0, 80,
+   0, 74, 0, 68, 0, 63, 0, 58, 0, 54, 0, 49, 0, 45, 0, 41, 0, 37, 0, 34,
+   0, 30, 0, 27, 0, 23, 0, 20, 0, 17, 0, 15, 0, 12, 0, 9, 0, 7, 0, 4,
+   0, 2, 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0,
+   114, 0, 106, 0, 99, 0, 92, 0, 86, 0, 80, 0, 74, 0, 68, 0, 63, 0, 58, 0,
+   54, 0, 49, 0, 45, 0, 41, 0, 37, 0, 34, 0, 30, 0, 27, 0, 23, 0, 20, 0,
+   17, 0, 15, 0, 12, 0, 9, 0, 7, 0, 4, 0, 2, 0, 0, 0, 0, 2, 0, 4,
+   0, 6, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 115,
+   0, 107, 0, 100, 0, 93, 0, 87, 0, 81, 0, 75, 0, 70, 0, 65, 0, 60, 0, 55,
+   0, 51, 0, 47, 0, 43, 0, 39, 0, 36, 0, 32, 0, 29, 0, 26, 0, 22, 0, 20,
+   0, 17, 0, 14, 0, 11, 0, 9, 0, 6, 0, 4, 0, 2, 0, 0, 2, 0, 4, 0,
+   6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 115, 0, 107, 0, 100, 0, 93, 0, 87, 0,
+   81, 0, 75, 0, 70, 0, 65, 0, 60, 0, 55, 0, 51, 0, 47, 0, 43, 0, 39, 0,
+   36, 0, 32, 0, 29, 0, 26, 0, 22, 0, 20, 0, 17, 0, 14, 0, 11, 0, 9, 0,
+   6, 0, 4, 0, 2, 0, 0, 0, 0, 2, 0, 4, 0, 6, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 123, 0, 115, 0, 108, 0, 101, 0, 94, 0, 88, 0, 82,
+   0, 77, 0, 71, 0, 66, 0, 62, 0, 57, 0, 53, 0, 49, 0, 45, 0, 41, 0, 37,
+   0, 34, 0, 31, 0, 28, 0, 25, 0, 22, 0, 19, 0, 16, 0, 13, 0, 11, 0, 8,
+   0, 6, 0, 4, 0, 2, 0, 0, 2, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0,
+   115, 0, 108, 0, 101, 0, 94, 0, 88, 0, 82, 0, 77, 0, 71, 0, 66, 0, 62, 0,
+   57, 0, 53, 0, 49, 0, 45, 0, 41, 0, 37, 0, 34, 0, 31, 0, 28, 0, 25, 0,
+   22, 0, 19, 0, 16, 0, 13, 0, 11, 0, 8, 0, 6, 0, 4, 0, 2, 0, 0, 0,
+   0, 2, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 115,
+   0, 108, 0, 102, 0, 95, 0, 89, 0, 83, 0, 78, 0, 73, 0, 68, 0, 63, 0, 59,
+   0, 55, 0, 51, 0, 47, 0, 43, 0, 39, 0, 36, 0, 33, 0, 30, 0, 26, 0, 24,
+   0, 21, 0, 18, 0, 15, 0, 13, 0, 10, 0, 8, 0, 6, 0, 4, 0, 2, 0, 0,
+   1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 115, 0, 108, 0, 102, 0, 95, 0, 89, 0,
+   83, 0, 78, 0, 73, 0, 68, 0, 63, 0, 59, 0, 55, 0, 51, 0, 47, 0, 43, 0,
+   39, 0, 36, 0, 33, 0, 30, 0, 26, 0, 24, 0, 21, 0, 18, 0, 15, 0, 13, 0,
+   10, 0, 8, 0, 6, 0, 4, 0, 2, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 123, 0, 116, 0, 109, 0, 102, 0, 96, 0, 90, 0, 85,
+   0, 79, 0, 74, 0, 69, 0, 65, 0, 60, 0, 56, 0, 52, 0, 48, 0, 45, 0, 41,
+   0, 38, 0, 35, 0, 31, 0, 28, 0, 25, 0, 23, 0, 20, 0, 17, 0, 15, 0, 12,
+   0, 10, 0, 8, 0, 6, 0, 4, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0,
+   116, 0, 109, 0, 102, 0, 96, 0, 90, 0, 85, 0, 79, 0, 74, 0, 69, 0, 65, 0,
+   60, 0, 56, 0, 52, 0, 48, 0, 45, 0, 41, 0, 38, 0, 35, 0, 31, 0, 28, 0,
+   25, 0, 23, 0, 20, 0, 17, 0, 15, 0, 12, 0, 10, 0, 8, 0, 6, 0, 4, 0,
+   1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+#endif
diff --git a/src/gallium/auxiliary/postprocess/pp_program.c b/src/gallium/auxiliary/postprocess/pp_program.c
new file mode 100644
index 0000000..b92ac80
--- /dev/null
+++ b/src/gallium/auxiliary/postprocess/pp_program.c
@@ -0,0 +1,139 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Jakob Bornecrantz
+ * Copyright 2011 Lauri Kasanen
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "postprocess/postprocess.h"
+#include "cso_cache/cso_context.h"
+#include "pipe/p_screen.h"
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+#include "pipe/p_shader_tokens.h"
+#include "util/u_inlines.h"
+#include "util/u_simple_shaders.h"
+
+/** Initialize the internal details */
+struct program *
+pp_init_prog(struct pp_queue_t *ppq, struct pipe_screen *pscreen)
+{
+
+   struct program *p = calloc(1, sizeof(struct program));
+
+   pp_debug("Initializing program\n");
+   if (!pscreen)
+      return NULL;
+
+   if (!p)
+      return NULL;
+
+   p->screen = pscreen;
+   p->pipe = pscreen->context_create(pscreen, NULL);
+   p->cso = cso_create_context(p->pipe);
+
+   {
+      static const float verts[4][2][4] = {
+         {
+          {1.0f, 1.0f, 0.0f, 1.0f},
+          {1.0f, 1.0f, 0.0f, 1.0f}
+          },
+         {
+          {-1.0f, 1.0f, 0.0f, 1.0f},
+          {0.0f, 1.0f, 0.0f, 1.0f}
+          },
+         {
+          {-1.0f, -1.0f, 0.0f, 1.0f},
+          {0.0f, 0.0f, 0.0f, 1.0f}
+          },
+         {
+          {1.0f, -1.0f, 0.0f, 1.0f},
+          {1.0f, 0.0f, 0.0f, 1.0f}
+          }
+      };
+
+      p->vbuf = pipe_buffer_create(pscreen, PIPE_BIND_VERTEX_BUFFER,
+                                   PIPE_USAGE_STATIC, sizeof(verts));
+      pipe_buffer_write(p->pipe, p->vbuf, 0, sizeof(verts), verts);
+   }
+
+   p->blend.rt[0].colormask = PIPE_MASK_RGBA;
+   p->blend.rt[0].rgb_src_factor = p->blend.rt[0].alpha_src_factor =
+      PIPE_BLENDFACTOR_SRC_ALPHA;
+   p->blend.rt[0].rgb_dst_factor = p->blend.rt[0].alpha_dst_factor =
+      PIPE_BLENDFACTOR_INV_SRC_ALPHA;
+
+   p->rasterizer.cull_face = PIPE_FACE_NONE;
+   p->rasterizer.gl_rasterization_rules = 1;
+
+   p->sampler.wrap_s = p->sampler.wrap_t = p->sampler.wrap_r =
+      PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+
+   p->sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
+   p->sampler.min_img_filter = p->sampler.mag_img_filter =
+      PIPE_TEX_FILTER_LINEAR;
+   p->sampler.normalized_coords = 1;
+
+   p->sampler_point.wrap_s = p->sampler_point.wrap_t =
+      p->sampler_point.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+   p->sampler_point.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
+   p->sampler_point.min_img_filter = p->sampler_point.mag_img_filter =
+      PIPE_TEX_FILTER_NEAREST;
+   p->sampler_point.normalized_coords = 1;
+
+   p->velem[0].src_offset = 0;
+   p->velem[0].instance_divisor = 0;
+   p->velem[0].vertex_buffer_index = 0;
+   p->velem[0].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+   p->velem[1].src_offset = 1 * 4 * sizeof(float);
+   p->velem[1].instance_divisor = 0;
+   p->velem[1].vertex_buffer_index = 0;
+   p->velem[1].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+
+   if (!p->screen->is_format_supported(p->screen,
+                                       PIPE_FORMAT_R32G32B32A32_FLOAT,
+                                       PIPE_BUFFER, 1,
+                                       PIPE_BIND_VERTEX_BUFFER))
+      pp_debug("Vertex buf format fail\n");
+
+
+   {
+      const uint semantic_names[] = { TGSI_SEMANTIC_POSITION,
+         TGSI_SEMANTIC_GENERIC
+      };
+      const uint semantic_indexes[] = { 0, 0 };
+      p->passvs = util_make_vertex_passthrough_shader(p->pipe, 2,
+                                                      semantic_names,
+                                                      semantic_indexes);
+   }
+
+   p->framebuffer.nr_cbufs = 1;
+
+   p->surf.usage = PIPE_BIND_RENDER_TARGET;
+   p->surf.format = PIPE_FORMAT_B8G8R8A8_UNORM;
+
+   p->pipe->set_sample_mask(p->pipe, ~0);
+
+   return p;
+}
diff --git a/src/gallium/auxiliary/postprocess/pp_program.h b/src/gallium/auxiliary/postprocess/pp_program.h
new file mode 100644
index 0000000..2749b35
--- /dev/null
+++ b/src/gallium/auxiliary/postprocess/pp_program.h
@@ -0,0 +1,64 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Jakob Bornecrantz
+ * Copyright 2011 Lauri Kasanen
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef PP_PROGRAM_H
+#define PP_PROGRAM_H
+
+#include "pipe/p_state.h"
+
+/**
+*	Internal control details.
+*/
+struct program
+{
+   struct pipe_screen *screen;
+   struct pipe_context *pipe;
+   struct cso_context *cso;
+
+   struct pipe_blend_state blend;
+   struct pipe_depth_stencil_alpha_state depthstencil;
+   struct pipe_rasterizer_state rasterizer;
+   struct pipe_sampler_state sampler;   /* bilinear */
+   struct pipe_sampler_state sampler_point;     /* point */
+   struct pipe_viewport_state viewport;
+   struct pipe_framebuffer_state framebuffer;
+   struct pipe_vertex_element velem[2];
+
+   float clear_color[4];
+
+   void *passvs;
+
+   struct pipe_resource *vbuf;
+   struct pipe_surface surf;
+   struct pipe_sampler_view *view;
+
+   struct blit_state *blitctx;
+};
+
+
+#endif
diff --git a/src/gallium/auxiliary/postprocess/pp_run.c b/src/gallium/auxiliary/postprocess/pp_run.c
new file mode 100644
index 0000000..ce671ae
--- /dev/null
+++ b/src/gallium/auxiliary/postprocess/pp_run.c
@@ -0,0 +1,188 @@
+/**************************************************************************
+ *
+ * Copyright 2011 Lauri Kasanen
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "postprocess.h"
+
+#include "postprocess/pp_filters.h"
+#include "util/u_blit.h"
+#include "util/u_inlines.h"
+#include "util/u_sampler.h"
+
+/**
+*	Main run function of the PP queue. Called on swapbuffers/flush.
+*
+*	Runs all requested filters in order and handles shuffling the temp
+*	buffers in between.
+*/
+void
+pp_run(struct pp_queue_t *ppq, struct pipe_resource *in,
+       struct pipe_resource *out, struct pipe_resource *indepth)
+{
+
+   unsigned int i;
+
+   if (in->width0 != ppq->p->framebuffer.width ||
+       in->height0 != ppq->p->framebuffer.height) {
+      pp_debug("Resizing the temp pp buffers\n");
+      pp_free_fbos(ppq);
+      pp_init_fbos(ppq, in->width0, in->height0, indepth);
+   }
+
+   if (in == out && ppq->n_filters == 1) {
+      /* Make a copy of in to tmp[0] in this case. */
+      unsigned int w = ppq->p->framebuffer.width;
+      unsigned int h = ppq->p->framebuffer.height;
+
+      util_blit_pixels(ppq->p->blitctx, in, 0, 0, 0,
+                       w, h, 0, ppq->tmps[0],
+                       0, 0, w, h, 0, PIPE_TEX_MIPFILTER_NEAREST);
+
+      in = ppq->tmp[0];
+   }
+
+   switch (ppq->n_filters) {
+   case 1:                     /* No temp buf */
+      ppq->pp_queue[0] (ppq, in, out, 0);
+      break;
+   case 2:                     /* One temp buf */
+
+      ppq->pp_queue[0] (ppq, in, ppq->tmp[0], 0);
+      ppq->pp_queue[1] (ppq, ppq->tmp[0], out, 1);
+
+      break;
+   default:                    /* Two temp bufs */
+      ppq->pp_queue[0] (ppq, in, ppq->tmp[0], 0);
+
+      for (i = 1; i < (ppq->n_filters - 1); i++) {
+         if (i % 2 == 0)
+            ppq->pp_queue[i] (ppq, ppq->tmp[1], ppq->tmp[0], i);
+
+         else
+            ppq->pp_queue[i] (ppq, ppq->tmp[0], ppq->tmp[1], i);
+      }
+
+      if (i % 2 == 0)
+         ppq->pp_queue[i] (ppq, ppq->tmp[1], out, i);
+
+      else
+         ppq->pp_queue[i] (ppq, ppq->tmp[0], out, i);
+
+      break;
+   }
+}
+
+
+/* Utility functions for the filters. You're not forced to use these if */
+/* your filter is more complicated. */
+
+/** Setup this resource as the filter input. */
+void
+pp_filter_setup_in(struct program *p, struct pipe_resource *in)
+{
+   struct pipe_sampler_view v_tmp;
+   u_sampler_view_default_template(&v_tmp, in, in->format);
+   p->view = p->pipe->create_sampler_view(p->pipe, in, &v_tmp);
+}
+
+/** Setup this resource as the filter output. */
+void
+pp_filter_setup_out(struct program *p, struct pipe_resource *out)
+{
+   p->surf.format = out->format;
+   p->surf.usage = PIPE_BIND_RENDER_TARGET;
+
+   p->framebuffer.cbufs[0] = p->pipe->create_surface(p->pipe, out, &p->surf);
+}
+
+/** Clean up the input and output set with the above. */
+void
+pp_filter_end_pass(struct program *p)
+{
+   pipe_surface_reference(&p->framebuffer.cbufs[0], NULL);
+   pipe_sampler_view_reference(&p->view, NULL);
+}
+
+/**
+*	Convert the TGSI assembly to a runnable shader.
+*
+* We need not care about geometry shaders. All we have is screen quads.
+*/
+void *
+pp_tgsi_to_state(struct pipe_context *pipe, const char *text, bool isvs,
+                 const char *name)
+{
+   struct pipe_shader_state state;
+   struct tgsi_token tokens[PP_MAX_TOKENS];
+
+   if (tgsi_text_translate(text, tokens, Elements(tokens)) == FALSE) {
+      pp_debug("Failed to translate %s\n", name);
+      return NULL;
+   }
+
+   state.tokens = tokens;
+
+   if (isvs)
+      return pipe->create_vs_state(pipe, &state);
+   else
+      return pipe->create_fs_state(pipe, &state);
+}
+
+/** Setup misc state for the filter. */
+void
+pp_filter_misc_state(struct program *p)
+{
+   cso_set_blend(p->cso, &p->blend);
+   cso_set_depth_stencil_alpha(p->cso, &p->depthstencil);
+   cso_set_rasterizer(p->cso, &p->rasterizer);
+   cso_set_viewport(p->cso, &p->viewport);
+
+   cso_set_vertex_elements(p->cso, 2, p->velem);
+}
+
+/** Draw with the filter to the set output. */
+void
+pp_filter_draw(struct program *p)
+{
+   util_draw_vertex_buffer(p->pipe, p->cso, p->vbuf, 0,
+                           PIPE_PRIM_QUADS, 4, 2);
+   p->pipe->flush(p->pipe, NULL);
+}
+
+/** Set the framebuffer as active. */
+void
+pp_filter_set_fb(struct program *p)
+{
+   cso_set_framebuffer(p->cso, &p->framebuffer);
+}
+
+/** Set the framebuffer as active and clear it. */
+void
+pp_filter_set_clear_fb(struct program *p)
+{
+   cso_set_framebuffer(p->cso, &p->framebuffer);
+   p->pipe->clear(p->pipe, PIPE_CLEAR_COLOR, p->clear_color, 0, 0);
+}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 712e8ac..38dc1ef 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -1594,6 +1594,9 @@
 #define FETCH(VAL,INDEX,CHAN)\
     fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_FLOAT)
 
+#define IFETCH(VAL,INDEX,CHAN)\
+    fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_INT)
+
 
 /**
  * Execute ARB-style KIL which is predicated by a src register.
@@ -1921,6 +1924,86 @@
 }
 
 
+static void
+exec_txf(struct tgsi_exec_machine *mach,
+	 const struct tgsi_full_instruction *inst)
+{
+   struct tgsi_sampler *sampler;
+   const uint unit = inst->Src[1].Register.Index;
+   union tgsi_exec_channel r[4];
+   uint chan;
+   float rgba[NUM_CHANNELS][QUAD_SIZE];
+   int j;
+
+   IFETCH(&r[3], 0, CHAN_W);
+
+   switch(inst->Texture.Texture) {
+   case TGSI_TEXTURE_3D:
+   case TGSI_TEXTURE_2D_ARRAY:
+      IFETCH(&r[2], 0, CHAN_Z);
+      /* fallthrough */
+   case TGSI_TEXTURE_2D:
+   case TGSI_TEXTURE_RECT:
+   case TGSI_TEXTURE_SHADOW2D:
+   case TGSI_TEXTURE_SHADOWRECT:
+   case TGSI_TEXTURE_1D_ARRAY:
+      IFETCH(&r[1], 0, CHAN_Y);
+      /* fallthrough */
+   case TGSI_TEXTURE_1D:
+   case TGSI_TEXTURE_SHADOW1D:
+      IFETCH(&r[0], 0, CHAN_X);
+      break;
+   default:
+      assert(0);
+      break;
+   }      
+
+   sampler = mach->Samplers[unit];
+   sampler->get_texel(sampler, r[0].i, r[1].i, r[2].i, r[3].i, rgba);
+
+   for (j = 0; j < QUAD_SIZE; j++) {
+      r[0].f[j] = rgba[0][j];
+      r[1].f[j] = rgba[1][j];
+      r[2].f[j] = rgba[2][j];
+      r[3].f[j] = rgba[3][j];
+   }
+
+   for (chan = 0; chan < NUM_CHANNELS; chan++) {
+      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+         store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
+      }
+   }
+}
+
+static void
+exec_txq(struct tgsi_exec_machine *mach,
+         const struct tgsi_full_instruction *inst)
+{
+   struct tgsi_sampler *sampler;
+   const uint unit = inst->Src[1].Register.Index;
+   int result[4];
+   union tgsi_exec_channel r[4], src;
+   uint chan;
+   int i,j;
+
+   fetch_source(mach, &src, &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_INT);
+   sampler = mach->Samplers[unit];
+
+   sampler->get_dims(sampler, src.i[0], result);
+
+   for (i = 0; i < QUAD_SIZE; i++) {
+      for (j = 0; j < 4; j++) {
+	 r[j].i[i] = result[j];
+      }
+   }
+
+   for (chan = 0; chan < NUM_CHANNELS; chan++) {
+      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+	 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan,
+		    TGSI_EXEC_DATA_INT);
+      }
+   }
+}
 
 static void
 exec_sample(struct tgsi_exec_machine *mach,
@@ -2989,6 +3072,17 @@
 }
 
 static void
+micro_mod(union tgsi_exec_channel *dst,
+          const union tgsi_exec_channel *src0,
+          const union tgsi_exec_channel *src1)
+{
+   dst->i[0] = src0->i[0] % src1->i[0];
+   dst->i[1] = src0->i[1] % src1->i[1];
+   dst->i[2] = src0->i[2] % src1->i[2];
+   dst->i[3] = src0->i[3] % src1->i[3];
+}
+
+static void
 micro_f2i(union tgsi_exec_channel *dst,
           const union tgsi_exec_channel *src)
 {
@@ -3691,7 +3785,7 @@
       break;
 
    case TGSI_OPCODE_MOD:
-      assert (0);
+      exec_vector_binary(mach, inst, micro_mod, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
       break;
 
    case TGSI_OPCODE_XOR:
@@ -3703,11 +3797,11 @@
       break;
 
    case TGSI_OPCODE_TXF:
-      assert (0);
+      exec_txf(mach, inst);
       break;
 
    case TGSI_OPCODE_TXQ:
-      assert (0);
+      exec_txq(mach, inst);
       break;
 
    case TGSI_OPCODE_EMIT:
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h
index 33f33aa..3f6964c 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -90,6 +90,11 @@
                        const float c0[QUAD_SIZE],
                        enum tgsi_sampler_control control,
                        float rgba[NUM_CHANNELS][QUAD_SIZE]);
+   void (*get_dims)(struct tgsi_sampler *sampler, int level,
+		    int dims[4]);
+   void (*get_texel)(struct tgsi_sampler *sampler, const int i[QUAD_SIZE],
+		     const int j[QUAD_SIZE], const int k[QUAD_SIZE],
+		     const int lod[QUAD_SIZE], float rgba[NUM_CHANNELS][QUAD_SIZE]);
 };
 
 #define TGSI_EXEC_NUM_TEMPS       128
@@ -400,6 +405,8 @@
       return 1;
    case PIPE_SHADER_CAP_SUBROUTINES:
       return 1;
+   case PIPE_SHADER_CAP_INTEGERS:
+      return 1;
    default:
       return 0;
    }
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c
index 83c6ac7..f165f82 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -200,19 +200,20 @@
             info->file_max[file] = MAX2(info->file_max[file], (int)reg);
          }
          break;
+
       case TGSI_TOKEN_TYPE_PROPERTY:
-      {
-         const struct tgsi_full_property *fullprop
-            = &parse.FullToken.FullProperty;
+         {
+            const struct tgsi_full_property *fullprop
+               = &parse.FullToken.FullProperty;
 
-         info->properties[info->num_properties].name =
-            fullprop->Property.PropertyName;
-         memcpy(info->properties[info->num_properties].data,
-                fullprop->u, 8 * sizeof(unsigned));;
+            info->properties[info->num_properties].name =
+               fullprop->Property.PropertyName;
+            memcpy(info->properties[info->num_properties].data,
+                   fullprop->u, 8 * sizeof(unsigned));;
 
-         ++info->num_properties;
-      }
-      break;
+            ++info->num_properties;
+         }
+         break;
 
       default:
          assert( 0 );
@@ -222,6 +223,23 @@
    info->uses_kill = (info->opcode_count[TGSI_OPCODE_KIL] ||
                       info->opcode_count[TGSI_OPCODE_KILP]);
 
+   /* extract simple properties */
+   for (i = 0; i < info->num_properties; ++i) {
+      switch (info->properties[i].name) {
+      case TGSI_PROPERTY_FS_COORD_ORIGIN:
+         info->origin_lower_left = info->properties[i].data[0];
+         break;
+      case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER:
+         info->pixel_center_integer = info->properties[i].data[0];
+         break;
+      case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
+         info->color0_writes_all_cbufs = info->properties[i].data[0];
+         break;
+      default:
+         ;
+      }
+   }
+
    tgsi_parse_free (&parse);
 }
 
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h b/src/gallium/auxiliary/tgsi/tgsi_scan.h
index 53ab3d5..d6e593b 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h
@@ -68,6 +68,9 @@
    boolean writes_edgeflag; /**< vertex shader outputs edgeflag */
    boolean uses_kill;  /**< KIL or KILP instruction used? */
    boolean uses_instanceid;
+   boolean origin_lower_left;
+   boolean pixel_center_integer;
+   boolean color0_writes_all_cbufs;
 
    /**
     * Bitmask indicating which register files are accessed with
diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c
index 528f344..d8e46f0 100644
--- a/src/gallium/auxiliary/util/u_blitter.c
+++ b/src/gallium/auxiliary/util/u_blitter.c
@@ -26,8 +26,8 @@
 
 /**
  * @file
- * Blitter utility to facilitate acceleration of the clear, clear_render_target, clear_depth_stencil
- * resource_copy_region functions.
+ * Blitter utility to facilitate acceleration of the clear, clear_render_target,
+ * clear_depth_stencil, and resource_copy_region functions.
  *
  * @author Marek Olšák
  */
@@ -197,8 +197,6 @@
    memset(&velem[0], 0, sizeof(velem[0]) * 2);
    for (i = 0; i < 2; i++) {
       velem[i].src_offset = i * 4 * sizeof(float);
-      velem[i].instance_divisor = 0;
-      velem[i].vertex_buffer_index = 0;
       velem[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
    }
    ctx->velem_state = pipe->create_vertex_elements_state(pipe, 2, &velem[0]);
@@ -288,26 +286,33 @@
    unsigned i;
 
    /* restore the state objects which are always required to be saved */
-   pipe->bind_blend_state(pipe, ctx->base.saved_blend_state);
-   pipe->bind_depth_stencil_alpha_state(pipe, ctx->base.saved_dsa_state);
    pipe->bind_rasterizer_state(pipe, ctx->base.saved_rs_state);
-   pipe->bind_fs_state(pipe, ctx->base.saved_fs);
    pipe->bind_vs_state(pipe, ctx->base.saved_vs);
    pipe->bind_vertex_elements_state(pipe, ctx->base.saved_velem_state);
 
-   ctx->base.saved_blend_state = INVALID_PTR;
-   ctx->base.saved_dsa_state = INVALID_PTR;
    ctx->base.saved_rs_state = INVALID_PTR;
-   ctx->base.saved_fs = INVALID_PTR;
    ctx->base.saved_vs = INVALID_PTR;
    ctx->base.saved_velem_state = INVALID_PTR;
 
+   /* restore the state objects which are required to be saved for clear/copy
+    */
+   if (ctx->base.saved_blend_state != INVALID_PTR) {
+      pipe->bind_blend_state(pipe, ctx->base.saved_blend_state);
+      ctx->base.saved_blend_state = INVALID_PTR;
+   }
+   if (ctx->base.saved_dsa_state != INVALID_PTR) {
+      pipe->bind_depth_stencil_alpha_state(pipe, ctx->base.saved_dsa_state);
+      ctx->base.saved_dsa_state = INVALID_PTR;
+   }
+   if (ctx->base.saved_fs != INVALID_PTR) {
+      pipe->bind_fs_state(pipe, ctx->base.saved_fs);
+      ctx->base.saved_fs = INVALID_PTR;
+   }
+
    pipe->set_stencil_ref(pipe, &ctx->base.saved_stencil_ref);
    pipe->set_viewport_state(pipe, &ctx->base.saved_viewport);
    pipe->set_clip_state(pipe, &ctx->base.saved_clip);
 
-   /* restore the state objects which are required to be saved before copy/fill
-    */
    if (ctx->base.saved_fb_state.nr_cbufs != ~0) {
       pipe->set_framebuffer_state(pipe, &ctx->base.saved_fb_state);
       util_unreference_framebuffer_state(&ctx->base.saved_fb_state);
@@ -724,14 +729,14 @@
    return sx1 < dx2 && sx2 > dx1 && sy1 < dy2 && sy2 > dy1;
 }
 
-void util_blitter_copy_region(struct blitter_context *blitter,
-                              struct pipe_resource *dst,
-                              unsigned dstlevel,
-                              unsigned dstx, unsigned dsty, unsigned dstz,
-                              struct pipe_resource *src,
-                              unsigned srclevel,
-                              const struct pipe_box *srcbox,
-                              boolean ignore_stencil)
+void util_blitter_copy_texture(struct blitter_context *blitter,
+                               struct pipe_resource *dst,
+                               unsigned dstlevel,
+                               unsigned dstx, unsigned dsty, unsigned dstz,
+                               struct pipe_resource *src,
+                               unsigned srclevel,
+                               const struct pipe_box *srcbox,
+                               boolean ignore_stencil)
 {
    struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
    struct pipe_context *pipe = ctx->base.pipe;
diff --git a/src/gallium/auxiliary/util/u_blitter.h b/src/gallium/auxiliary/util/u_blitter.h
index 41470d9..df6f023 100644
--- a/src/gallium/auxiliary/util/u_blitter.h
+++ b/src/gallium/auxiliary/util/u_blitter.h
@@ -126,12 +126,15 @@
 }
 
 /*
- * These CSOs must be saved before any of the following functions is called:
+ * These states must be saved before any of the following functions is called:
  * - blend state
  * - depth stencil alpha state
  * - rasterizer state
  * - vertex shader
+ * - any other shader??? (XXX)
  * - fragment shader
+ * - vertex buffers
+ * - vertex elements
  */
 
 /**
@@ -169,14 +172,14 @@
  * - fragment sampler states
  * - fragment sampler textures
  */
-void util_blitter_copy_region(struct blitter_context *blitter,
-                              struct pipe_resource *dst,
-                              unsigned dstlevel,
-                              unsigned dstx, unsigned dsty, unsigned dstz,
-                              struct pipe_resource *src,
-                              unsigned srclevel,
-                              const struct pipe_box *srcbox,
-                              boolean ignore_stencil);
+void util_blitter_copy_texture(struct blitter_context *blitter,
+                               struct pipe_resource *dst,
+                               unsigned dstlevel,
+                               unsigned dstx, unsigned dsty, unsigned dstz,
+                               struct pipe_resource *src,
+                               unsigned srclevel,
+                               const struct pipe_box *srcbox,
+                               boolean ignore_stencil);
 
 /**
  * Clear a region of a (color) surface to a constant value.
diff --git a/src/gallium/auxiliary/util/u_debug.c b/src/gallium/auxiliary/util/u_debug.c
index 004df43..2d61930 100644
--- a/src/gallium/auxiliary/util/u_debug.c
+++ b/src/gallium/auxiliary/util/u_debug.c
@@ -730,7 +730,7 @@
          pixel.rgbRed   = float_to_ubyte(ptr[x*4 + 0]);
          pixel.rgbGreen = float_to_ubyte(ptr[x*4 + 1]);
          pixel.rgbBlue  = float_to_ubyte(ptr[x*4 + 2]);
-         pixel.rgbAlpha = 255;
+         pixel.rgbAlpha = float_to_ubyte(ptr[x*4 + 3]);
          os_stream_write(stream, &pixel, 4);
       }
    }
diff --git a/src/gallium/auxiliary/util/u_format.c b/src/gallium/auxiliary/util/u_format.c
index 9cbdd0a..34922ab 100644
--- a/src/gallium/auxiliary/util/u_format.c
+++ b/src/gallium/auxiliary/util/u_format.c
@@ -390,3 +390,53 @@
       FREE(tmp_row);
    }
 }
+
+void util_format_compose_swizzles(const unsigned char swz1[4],
+                                  const unsigned char swz2[4],
+                                  unsigned char dst[4])
+{
+   unsigned i;
+
+   for (i = 0; i < 4; i++) {
+      dst[i] = swz2[i] <= UTIL_FORMAT_SWIZZLE_W ?
+               swz1[swz2[i]] : swz2[i];
+   }
+}
+
+void util_format_swizzle_4f(float *dst, const float *src,
+                            const unsigned char swz[4])
+{
+   unsigned i;
+
+   for (i = 0; i < 4; i++) {
+      if (swz[i] <= UTIL_FORMAT_SWIZZLE_W)
+         dst[i] = src[swz[i]];
+      else if (swz[i] == UTIL_FORMAT_SWIZZLE_0)
+         dst[i] = 0;
+      else if (swz[i] == UTIL_FORMAT_SWIZZLE_1)
+         dst[i] = 1;
+   }
+}
+
+void util_format_unswizzle_4f(float *dst, const float *src,
+                              const unsigned char swz[4])
+{
+   unsigned i;
+
+   for (i = 0; i < 4; i++) {
+      switch (swz[i]) {
+      case UTIL_FORMAT_SWIZZLE_X:
+         dst[0] = src[i];
+         break;
+      case UTIL_FORMAT_SWIZZLE_Y:
+         dst[1] = src[i];
+         break;
+      case UTIL_FORMAT_SWIZZLE_Z:
+         dst[2] = src[i];
+         break;
+      case UTIL_FORMAT_SWIZZLE_W:
+         dst[3] = src[i];
+         break;
+      }
+   }
+}
diff --git a/src/gallium/auxiliary/util/u_format.csv b/src/gallium/auxiliary/util/u_format.csv
index 347e2be..a3d2aae 100644
--- a/src/gallium/auxiliary/util/u_format.csv
+++ b/src/gallium/auxiliary/util/u_format.csv
@@ -260,10 +260,10 @@
 # A.k.a. D3DDECLTYPE_DEC3N
 PIPE_FORMAT_R10G10B10X2_SNORM     , plain, 1, 1, sn10, sn10, sn10 , x2 , xyz1, rgb
 
-PIPE_FORMAT_YV12                  , subsampled, 1, 1, x8  , x8  , x8  , x8  , xyzw, yuv
-PIPE_FORMAT_YV16                  , subsampled, 1, 1, x8  , x8  , x8  , x8  , xyzw, yuv
-PIPE_FORMAT_IYUV                  , subsampled, 1, 1, x8  , x8  , x8  , x8  , xyzw, yuv
-PIPE_FORMAT_NV12                  , subsampled, 1, 1, x8  , x8  , x8  , x8  , xyzw, yuv
-PIPE_FORMAT_NV21                  , subsampled, 1, 1, x8  , x8  , x8  , x8  , xyzw, yuv
-PIPE_FORMAT_IA44                  , subsampled, 1, 1, x8  , x8  , x8  , x8  , xyzw, yuv
-PIPE_FORMAT_AI44                  , subsampled, 1, 1, x8  , x8  , x8  , x8  , xyzw, yuv
+PIPE_FORMAT_YV12                  , other, 1, 1, x8  , x8  , x8  , x8  , xyzw, yuv
+PIPE_FORMAT_YV16                  , other, 1, 1, x8  , x8  , x8  , x8  , xyzw, yuv
+PIPE_FORMAT_IYUV                  , other, 1, 1, x8  , x8  , x8  , x8  , xyzw, yuv
+PIPE_FORMAT_NV12                  , other, 1, 1, x8  , x8  , x8  , x8  , xyzw, yuv
+PIPE_FORMAT_NV21                  , other, 1, 1, x8  , x8  , x8  , x8  , xyzw, yuv
+PIPE_FORMAT_IA44                  , other, 1, 1, x8  , x8  , x8  , x8  , xyzw, yuv
+PIPE_FORMAT_AI44                  , other, 1, 1, x8  , x8  , x8  , x8  , xyzw, yuv
diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h
index bb3ed72..566fa79 100644
--- a/src/gallium/auxiliary/util/u_format.h
+++ b/src/gallium/auxiliary/util/u_format.h
@@ -815,6 +815,25 @@
                       unsigned src_x, unsigned src_y,
                       unsigned width, unsigned height);
 
+/*
+ * Swizzle operations.
+ */
+
+/* Compose two sets of swizzles.
+ * If V is a 4D vector and the function parameters represent functions that
+ * swizzle vector components, this holds:
+ *     swz2(swz1(V)) = dst(V)
+ */
+void util_format_compose_swizzles(const unsigned char swz1[4],
+                                  const unsigned char swz2[4],
+                                  unsigned char dst[4]);
+
+void util_format_swizzle_4f(float *dst, const float *src,
+                            const unsigned char swz[4]);
+
+void util_format_unswizzle_4f(float *dst, const float *src,
+                              const unsigned char swz[4]);
+
 #ifdef __cplusplus
 } // extern "C" {
 #endif
diff --git a/src/gallium/auxiliary/util/u_format_s3tc.c b/src/gallium/auxiliary/util/u_format_s3tc.c
index bb989c2..d8a7c0d 100644
--- a/src/gallium/auxiliary/util/u_format_s3tc.c
+++ b/src/gallium/auxiliary/util/u_format_s3tc.c
@@ -119,8 +119,15 @@
 
    library = util_dl_open(DXTN_LIBNAME);
    if (!library) {
-      debug_printf("couldn't open " DXTN_LIBNAME ", software DXTn "
-         "compression/decompression unavailable\n");
+      if (getenv("force_s3tc_enable") &&
+          !strcmp(getenv("force_s3tc_enable"), "true")) {
+         debug_printf("couldn't open " DXTN_LIBNAME ", enabling DXTn due to "
+            "force_s3tc_enable=true environment variable\n");
+         util_format_s3tc_enabled = TRUE;
+      } else {
+         debug_printf("couldn't open " DXTN_LIBNAME ", software DXTn "
+            "compression/decompression unavailable\n");
+      }
       return;
    }
 
diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h
index 0b52844..46d9322 100644
--- a/src/gallium/auxiliary/util/u_math.h
+++ b/src/gallium/auxiliary/util/u_math.h
@@ -199,6 +199,16 @@
 #endif /* _MSC_VER */
 
 
+#ifdef PIPE_OS_ANDROID
+
+static INLINE
+double log2(double d)
+{
+   return log(d) * (1.0 / M_LN2);
+}
+
+#endif
+
 
 
 
@@ -409,7 +419,7 @@
 
    return i;
 }
-#elif defined(__MINGW32__)
+#elif defined(__MINGW32__) || defined(PIPE_OS_ANDROID)
 #define ffs __builtin_ffs
 #endif
 
diff --git a/src/gallium/auxiliary/util/u_pstipple.c b/src/gallium/auxiliary/util/u_pstipple.c
index f79a693..ac0df8c 100644
--- a/src/gallium/auxiliary/util/u_pstipple.c
+++ b/src/gallium/auxiliary/util/u_pstipple.c
@@ -52,6 +52,7 @@
 
 #include "tgsi/tgsi_transform.h"
 #include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_scan.h"
 
 /** Approx number of new tokens for instructions in pstip_transform_inst() */
 #define NUM_NEW_TOKENS 50
@@ -175,6 +176,7 @@
  */
 struct pstip_transform_context {
    struct tgsi_transform_context base;
+   struct tgsi_shader_info info;
    uint tempsUsed;  /**< bitmask */
    int wincoordInput;
    int maxInput;
@@ -183,12 +185,13 @@
    int texTemp;  /**< temp registers */
    int numImmed;
    boolean firstInstruction;
+   uint coordOrigin;
 };
 
 
 /**
  * TGSI declaration transform callback.
- * Look for a free sampler, a free input attrib, and two free temp regs.
+ * Track samplers used, temps used, inputs used.
  */
 static void
 pstip_transform_decl(struct tgsi_transform_context *ctx,
@@ -197,10 +200,11 @@
    struct pstip_transform_context *pctx =
       (struct pstip_transform_context *) ctx;
 
+   /* XXX we can use tgsi_shader_info instead of some of this */
+
    if (decl->Declaration.File == TGSI_FILE_SAMPLER) {
       uint i;
-      for (i = decl->Range.First;
-           i <= decl->Range.Last; i++) {
+      for (i = decl->Range.First; i <= decl->Range.Last; i++) {
          pctx->samplersUsed |= 1 << i;
       }
    }
@@ -211,8 +215,7 @@
    }
    else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
       uint i;
-      for (i = decl->Range.First;
-           i <= decl->Range.Last; i++) {
+      for (i = decl->Range.First; i <= decl->Range.Last; i++) {
          pctx->tempsUsed |= (1 << i);
       }
    }
@@ -243,8 +246,16 @@
 
 /**
  * TGSI instruction transform callback.
- * Replace writes to result.color w/ a temp reg.
- * Upon END instruction, insert texture sampling code for antialiasing.
+ * Before the first instruction, insert our new code to sample the
+ * stipple texture (using the fragment coord register) then kill the
+ * fragment if the stipple texture bit is off.
+ *
+ * Insert:
+ *   declare new registers
+ *   MUL texTemp, INPUT[wincoord], 1/32;
+ *   TEX texTemp, texTemp, sampler;
+ *   KIL -texTemp;   # if -texTemp < 0, KILL fragment
+ *   [...original code...]
  */
 static void
 pstip_transform_inst(struct tgsi_transform_context *ctx,
@@ -261,7 +272,7 @@
       uint i;
       int wincoordInput;
 
-      /* find free sampler */
+      /* find free texture sampler */
       pctx->freeSampler = free_bit(pctx->samplersUsed);
       if (pctx->freeSampler >= PIPE_MAX_SAMPLERS)
          pctx->freeSampler = PIPE_MAX_SAMPLERS - 1;
@@ -271,7 +282,7 @@
       else
          wincoordInput = pctx->wincoordInput;
 
-      /* find one free temp reg */
+      /* find one free temp register */
       for (i = 0; i < 32; i++) {
          if ((pctx->tempsUsed & (1 << i)) == 0) {
             /* found a free temp */
@@ -397,6 +408,7 @@
    struct pipe_shader_state *new_fs;
    struct pstip_transform_context transform;
    const uint newLen = tgsi_num_tokens(fs->tokens) + NUM_NEW_TOKENS;
+   unsigned i;
 
    new_fs = MALLOC(sizeof(*new_fs));
    if (!new_fs)
@@ -408,22 +420,33 @@
       return NULL;
    }
 
+   /* Setup shader transformation info/context.
+    */
    memset(&transform, 0, sizeof(transform));
    transform.wincoordInput = -1;
    transform.maxInput = -1;
    transform.texTemp = -1;
    transform.firstInstruction = TRUE;
+   transform.coordOrigin = TGSI_FS_COORD_ORIGIN_UPPER_LEFT;
    transform.base.transform_instruction = pstip_transform_inst;
    transform.base.transform_declaration = pstip_transform_decl;
    transform.base.transform_immediate = pstip_transform_immed;
 
+   tgsi_scan_shader(fs->tokens, &transform.info);
+
+   /* find fragment coordinate origin property */
+   for (i = 0; i < transform.info.num_properties; i++) {
+      if (transform.info.properties[i].name == TGSI_PROPERTY_FS_COORD_ORIGIN)
+         transform.coordOrigin = transform.info.properties[i].data[0];
+   }
+
    tgsi_transform_shader(fs->tokens,
                          (struct tgsi_token *) new_fs->tokens,
                          newLen, &transform.base);
 
 #if 0 /* DEBUG */
    tgsi_dump(fs->tokens, 0);
-   tgsi_dump(pstip_fs.tokens, 0);
+   tgsi_dump(new_fs->tokens, 0);
 #endif
 
    assert(transform.freeSampler < PIPE_MAX_SAMPLERS);
diff --git a/src/gallium/auxiliary/util/u_vbuf_mgr.c b/src/gallium/auxiliary/util/u_vbuf_mgr.c
index 374fc33..d9b39e5 100644
--- a/src/gallium/auxiliary/util/u_vbuf_mgr.c
+++ b/src/gallium/auxiliary/util/u_vbuf_mgr.c
@@ -34,21 +34,6 @@
 #include "translate/translate.h"
 #include "translate/translate_cache.h"
 
-/* Hardware vertex fetcher limitations can be described by this structure. */
-struct u_vbuf_caps {
-   /* Vertex format CAPs. */
-   /* TRUE if hardware supports it. */
-   unsigned format_fixed32:1;    /* PIPE_FORMAT_*32*_FIXED */
-   unsigned format_float16:1;    /* PIPE_FORMAT_*16*_FLOAT */
-   unsigned format_float64:1;    /* PIPE_FORMAT_*64*_FLOAT */
-   unsigned format_norm32:1;     /* PIPE_FORMAT_*32*NORM */
-   unsigned format_scaled32:1;   /* PIPE_FORMAT_*32*SCALED */
-
-   /* Whether vertex fetches don't have to be dword-aligned. */
-   /* TRUE if hardware supports it. */
-   unsigned fetch_dword_unaligned:1;
-};
-
 struct u_vbuf_mgr_elements {
    unsigned count;
    struct pipe_vertex_element ve[PIPE_MAX_ATTRIBS];
@@ -69,7 +54,6 @@
 
 struct u_vbuf_mgr_priv {
    struct u_vbuf_mgr b;
-   struct u_vbuf_caps caps;
    struct pipe_context *pipe;
 
    struct translate_cache *translate_cache;
@@ -79,6 +63,8 @@
    void *saved_ve, *fallback_ve;
    boolean ve_binding_lock;
 
+   unsigned saved_buffer_offset[PIPE_MAX_ATTRIBS];
+
    boolean any_user_vbs;
    boolean incompatible_vb_layout;
 };
@@ -87,25 +73,25 @@
 {
    struct pipe_screen *screen = mgr->pipe->screen;
 
-   mgr->caps.format_fixed32 =
+   mgr->b.caps.format_fixed32 =
       screen->is_format_supported(screen, PIPE_FORMAT_R32_FIXED, PIPE_BUFFER,
                                   0, PIPE_BIND_VERTEX_BUFFER);
 
-   mgr->caps.format_float16 =
+   mgr->b.caps.format_float16 =
       screen->is_format_supported(screen, PIPE_FORMAT_R16_FLOAT, PIPE_BUFFER,
                                   0, PIPE_BIND_VERTEX_BUFFER);
 
-   mgr->caps.format_float64 =
+   mgr->b.caps.format_float64 =
       screen->is_format_supported(screen, PIPE_FORMAT_R64_FLOAT, PIPE_BUFFER,
                                   0, PIPE_BIND_VERTEX_BUFFER);
 
-   mgr->caps.format_norm32 =
+   mgr->b.caps.format_norm32 =
       screen->is_format_supported(screen, PIPE_FORMAT_R32_UNORM, PIPE_BUFFER,
                                   0, PIPE_BIND_VERTEX_BUFFER) &&
       screen->is_format_supported(screen, PIPE_FORMAT_R32_SNORM, PIPE_BUFFER,
                                   0, PIPE_BIND_VERTEX_BUFFER);
 
-   mgr->caps.format_scaled32 =
+   mgr->b.caps.format_scaled32 =
       screen->is_format_supported(screen, PIPE_FORMAT_R32_USCALED, PIPE_BUFFER,
                                   0, PIPE_BIND_VERTEX_BUFFER) &&
       screen->is_format_supported(screen, PIPE_FORMAT_R32_SSCALED, PIPE_BUFFER,
@@ -128,7 +114,7 @@
                                      upload_buffer_alignment,
                                      upload_buffer_bind);
 
-   mgr->caps.fetch_dword_unaligned =
+   mgr->b.caps.fetch_dword_unaligned =
          fetch_alignment == U_VERTEX_FETCH_BYTE_ALIGNED;
 
    u_vbuf_mgr_init_format_caps(mgr);
@@ -182,7 +168,7 @@
 
       /* Check for support. */
       if (mgr->ve->ve[i].src_format == mgr->ve->native_format[i] &&
-          (mgr->caps.fetch_dword_unaligned ||
+          (mgr->b.caps.fetch_dword_unaligned ||
            (vb->buffer_offset % 4 == 0 &&
             vb->stride % 4 == 0 &&
             mgr->ve->ve[i].src_offset % 4 == 0))) {
@@ -363,7 +349,7 @@
       /* Choose a native format.
        * For now we don't care about the alignment, that's going to
        * be sorted out later. */
-      if (!mgr->caps.format_fixed32) {
+      if (!mgr->b.caps.format_fixed32) {
          switch (format) {
             FORMAT_REPLACE(R32_FIXED,           R32_FLOAT);
             FORMAT_REPLACE(R32G32_FIXED,        R32G32_FLOAT);
@@ -372,7 +358,7 @@
             default:;
          }
       }
-      if (!mgr->caps.format_float16) {
+      if (!mgr->b.caps.format_float16) {
          switch (format) {
             FORMAT_REPLACE(R16_FLOAT,           R32_FLOAT);
             FORMAT_REPLACE(R16G16_FLOAT,        R32G32_FLOAT);
@@ -381,7 +367,7 @@
             default:;
          }
       }
-      if (!mgr->caps.format_float64) {
+      if (!mgr->b.caps.format_float64) {
          switch (format) {
             FORMAT_REPLACE(R64_FLOAT,           R32_FLOAT);
             FORMAT_REPLACE(R64G64_FLOAT,        R32G32_FLOAT);
@@ -390,7 +376,7 @@
             default:;
          }
       }
-      if (!mgr->caps.format_norm32) {
+      if (!mgr->b.caps.format_norm32) {
          switch (format) {
             FORMAT_REPLACE(R32_UNORM,           R32_FLOAT);
             FORMAT_REPLACE(R32G32_UNORM,        R32G32_FLOAT);
@@ -403,7 +389,7 @@
             default:;
          }
       }
-      if (!mgr->caps.format_scaled32) {
+      if (!mgr->b.caps.format_scaled32) {
          switch (format) {
             FORMAT_REPLACE(R32_USCALED,         R32_FLOAT);
             FORMAT_REPLACE(R32G32_USCALED,      R32G32_FLOAT);
@@ -425,11 +411,11 @@
       ve->incompatible_layout =
             ve->incompatible_layout ||
             ve->ve[i].src_format != ve->native_format[i] ||
-            (!mgr->caps.fetch_dword_unaligned && ve->ve[i].src_offset % 4 != 0);
+            (!mgr->b.caps.fetch_dword_unaligned && ve->ve[i].src_offset % 4 != 0);
    }
 
    /* Align the formats to the size of DWORD if needed. */
-   if (!mgr->caps.fetch_dword_unaligned) {
+   if (!mgr->b.caps.fetch_dword_unaligned) {
       for (i = 0; i < count; i++) {
          ve->native_format_size[i] = align(ve->native_format_size[i], 4);
       }
@@ -470,7 +456,7 @@
    mgr->any_user_vbs = FALSE;
    mgr->incompatible_vb_layout = FALSE;
 
-   if (!mgr->caps.fetch_dword_unaligned) {
+   if (!mgr->b.caps.fetch_dword_unaligned) {
       /* Check if the strides and offsets are aligned to the size of DWORD. */
       for (i = 0; i < count; i++) {
          if (bufs[i].buffer) {
@@ -488,6 +474,7 @@
 
       pipe_resource_reference(&mgr->b.vertex_buffer[i].buffer, vb->buffer);
       pipe_resource_reference(&mgr->b.real_vertex_buffer[i], NULL);
+      mgr->saved_buffer_offset[i] = vb->buffer_offset;
 
       if (!vb->buffer) {
          continue;
@@ -647,6 +634,13 @@
 void u_vbuf_mgr_draw_end(struct u_vbuf_mgr *mgrb)
 {
    struct u_vbuf_mgr_priv *mgr = (struct u_vbuf_mgr_priv*)mgrb;
+   unsigned i;
+
+   /* buffer offsets were modified in u_vbuf_upload_buffers */
+   if (mgr->any_user_vbs) {
+      for (i = 0; i < mgr->b.nr_vertex_buffers; i++)
+         mgr->b.vertex_buffer[i].buffer_offset = mgr->saved_buffer_offset[i];
+   }
 
    if (mgr->fallback_ve) {
       u_vbuf_translate_end(mgr);
diff --git a/src/gallium/auxiliary/util/u_vbuf_mgr.h b/src/gallium/auxiliary/util/u_vbuf_mgr.h
index 4e63724..c653ca4 100644
--- a/src/gallium/auxiliary/util/u_vbuf_mgr.h
+++ b/src/gallium/auxiliary/util/u_vbuf_mgr.h
@@ -37,6 +37,21 @@
 #include "pipe/p_state.h"
 #include "util/u_transfer.h"
 
+/* Hardware vertex fetcher limitations can be described by this structure. */
+struct u_vbuf_caps {
+   /* Vertex format CAPs. */
+   /* TRUE if hardware supports it. */
+   unsigned format_fixed32:1;    /* PIPE_FORMAT_*32*_FIXED */
+   unsigned format_float16:1;    /* PIPE_FORMAT_*16*_FLOAT */
+   unsigned format_float64:1;    /* PIPE_FORMAT_*64*_FLOAT */
+   unsigned format_norm32:1;     /* PIPE_FORMAT_*32*NORM */
+   unsigned format_scaled32:1;   /* PIPE_FORMAT_*32*SCALED */
+
+   /* Whether vertex fetches don't have to be dword-aligned. */
+   /* TRUE if hardware supports it. */
+   unsigned fetch_dword_unaligned:1;
+};
+
 /* The manager.
  * This structure should also be used to access vertex buffers
  * from a driver. */
@@ -63,6 +78,8 @@
     * - u_upload_buffer
     * - u_upload_flush */
    struct u_upload_mgr *uploader;
+
+   struct u_vbuf_caps caps;
 };
 
 struct u_vbuf_resource {
diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c
index 3bd4af2..c73f976 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -231,6 +231,8 @@
    struct pipe_rasterizer_state rast;
    struct pipe_sampler_state sampler;
    struct pipe_blend_state blend;
+   struct pipe_depth_stencil_alpha_state dsa;
+   unsigned i;
 
    assert(c);
 
@@ -289,6 +291,24 @@
 
    c->rast = c->pipe->create_rasterizer_state(c->pipe, &rast);
 
+   memset(&dsa, 0, sizeof dsa);
+   dsa.depth.enabled = 0;
+   dsa.depth.writemask = 0;
+   dsa.depth.func = PIPE_FUNC_ALWAYS;
+   for (i = 0; i < 2; ++i) {
+      dsa.stencil[i].enabled = 0;
+      dsa.stencil[i].func = PIPE_FUNC_ALWAYS;
+      dsa.stencil[i].fail_op = PIPE_STENCIL_OP_KEEP;
+      dsa.stencil[i].zpass_op = PIPE_STENCIL_OP_KEEP;
+      dsa.stencil[i].zfail_op = PIPE_STENCIL_OP_KEEP;
+      dsa.stencil[i].valuemask = 0;
+      dsa.stencil[i].writemask = 0;
+   }
+   dsa.alpha.enabled = 0;
+   dsa.alpha.func = PIPE_FUNC_ALWAYS;
+   dsa.alpha.ref_value = 0;
+   c->dsa = c->pipe->create_depth_stencil_alpha_state(c->pipe, &dsa);
+   c->pipe->bind_depth_stencil_alpha_state(c->pipe, c->dsa);
    return true;
 }
 
@@ -296,6 +316,11 @@
 {
    assert(c);
 
+   /* Asserted in softpipe_delete_fs_state() for some reason */
+   c->pipe->bind_vs_state(c->pipe, NULL);
+   c->pipe->bind_fs_state(c->pipe, NULL);
+
+   c->pipe->delete_depth_stencil_alpha_state(c->pipe, c->dsa);
    c->pipe->delete_sampler_state(c->pipe, c->sampler_linear);
    c->pipe->delete_sampler_state(c->pipe, c->sampler_nearest);
    c->pipe->delete_blend_state(c->pipe, c->blend);
@@ -648,7 +673,6 @@
 
 void
 vl_compositor_render(struct vl_compositor *c,
-                     enum pipe_mpeg12_picture_type picture_type,
                      struct pipe_surface           *dst_surface,
                      struct pipe_video_rect        *dst_area,
                      struct pipe_video_rect        *dst_clip)
diff --git a/src/gallium/auxiliary/vl/vl_compositor.h b/src/gallium/auxiliary/vl/vl_compositor.h
index 87ad39b..2075100 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.h
+++ b/src/gallium/auxiliary/vl/vl_compositor.h
@@ -68,6 +68,7 @@
    void *sampler_nearest;
    void *blend;
    void *rast;
+   void *dsa;
    void *vertex_elems_state;
 
    void *vs;
@@ -155,7 +156,6 @@
  */
 void
 vl_compositor_render(struct vl_compositor          *compositor,
-                     enum pipe_mpeg12_picture_type picture_type,
                      struct pipe_surface           *dst_surface,
                      struct pipe_video_rect        *dst_area,
                      struct pipe_video_rect        *dst_clip);
diff --git a/src/gallium/auxiliary/vl/vl_decoder.c b/src/gallium/auxiliary/vl/vl_decoder.c
index fac0335..b23827d 100644
--- a/src/gallium/auxiliary/vl/vl_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_decoder.c
@@ -44,6 +44,19 @@
    }
 }
 
+unsigned
+vl_num_buffers_desired(struct pipe_screen *screen, enum pipe_video_profile profile)
+{
+   assert(screen);
+   switch (u_reduce_video_profile(profile)) {
+      case PIPE_VIDEO_CODEC_MPEG12:
+         return 4;
+
+      default:
+         return 1;
+   }
+}
+
 struct pipe_video_decoder *
 vl_create_decoder(struct pipe_context *pipe,
                   enum pipe_video_profile profile,
diff --git a/src/gallium/auxiliary/vl/vl_decoder.h b/src/gallium/auxiliary/vl/vl_decoder.h
index 0e9280d..fed529c 100644
--- a/src/gallium/auxiliary/vl/vl_decoder.h
+++ b/src/gallium/auxiliary/vl/vl_decoder.h
@@ -38,6 +38,12 @@
 vl_profile_supported(struct pipe_screen *screen, enum pipe_video_profile profile);
 
 /**
+ * the desired number of buffers for optimal operation
+ */
+unsigned
+vl_num_buffers_desired(struct pipe_screen *screen, enum pipe_video_profile profile);
+
+/**
  * standard implementation of pipe->create_video_decoder
  */
 struct pipe_video_decoder *
diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index 645d06a..ad78614 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -143,7 +143,7 @@
 create_mismatch_vert_shader(struct vl_idct *idct)
 {
    struct ureg_program *shader;
-   struct ureg_src vrect, vpos;
+   struct ureg_src vpos;
    struct ureg_src scale;
    struct ureg_dst t_tex;
    struct ureg_dst o_vpos, o_addr[2];
@@ -152,7 +152,6 @@
    if (!shader)
       return NULL;
 
-   vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
    vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
 
    t_tex = ureg_DECL_temporary(shader);
diff --git a/src/gallium/auxiliary/vl/vl_mc.c b/src/gallium/auxiliary/vl/vl_mc.c
index bd05205..0b3723c 100644
--- a/src/gallium/auxiliary/vl/vl_mc.c
+++ b/src/gallium/auxiliary/vl/vl_mc.c
@@ -103,16 +103,15 @@
 {
    struct ureg_program *shader;
    struct ureg_src mv_scale;
-   struct ureg_src vrect, vmv[2];
+   struct ureg_src vmv[2];
    struct ureg_dst t_vpos;
-   struct ureg_dst o_vpos, o_vmv[2];
+   struct ureg_dst o_vmv[2];
    unsigned i;
 
    shader = ureg_create(TGSI_PROCESSOR_VERTEX);
    if (!shader)
       return NULL;
 
-   vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
    vmv[0] = ureg_DECL_vs_input(shader, VS_I_MV_TOP);
    vmv[1] = ureg_DECL_vs_input(shader, VS_I_MV_BOTTOM);
 
@@ -121,7 +120,6 @@
       (float)MACROBLOCK_HEIGHT / r->buffer_height)
    );
 
-   o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
    o_vmv[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP);
    o_vmv[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VBOTTOM);
 
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
index 9dd032e..db05b15 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
@@ -1,6 +1,7 @@
 /**************************************************************************
  *
- * Copyright 2011 Christian König.
+ * Copyright 2011 Maarten Lankhorst
+ * Copyright 2011 Christian König
  * All Rights Reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -25,1813 +26,967 @@
  *
  **************************************************************************/
 
-/**
- * This file is based uppon slice_xvmc.c and vlc.h from the xine project,
- * which in turn is based on mpeg2dec. The following is the original copyright:
- *
- * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org>
- * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
- *
- * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
- * See http://libmpeg2.sourceforge.net/ for updates.
- *
- * mpeg2dec is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * mpeg2dec is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-#include <stdint.h>
-
-#include <pipe/p_compiler.h>
-#include <pipe/p_video_state.h>
+#include <pipe/p_video_decoder.h>
+#include <util/u_memory.h>
 
 #include "vl_vlc.h"
 #include "vl_mpeg12_bitstream.h"
 
-/* take num bits from the high part of bit_buf and zero extend them */
-#define UBITS(buf,num) (((uint32_t)(buf)) >> (32 - (num)))
+enum {
+   dct_End_of_Block = 0xFF,
+   dct_Escape = 0xFE,
+   dct_DC = 0xFD,
+   dct_AC = 0xFC
+};
 
-/* take num bits from the high part of bit_buf and sign extend them */
-#define SBITS(buf,num) (((int32_t)(buf)) >> (32 - (num)))
-
-/* macroblock modes */
-#define MACROBLOCK_INTRA 1
-#define MACROBLOCK_PATTERN 2
-#define MACROBLOCK_MOTION_BACKWARD 4
-#define MACROBLOCK_MOTION_FORWARD 8
-#define MACROBLOCK_QUANT 16
-
-/* motion_type */
-#define MOTION_TYPE_MASK (3*64)
-#define MOTION_TYPE_BASE 64
-#define MC_FIELD (1*64)
-#define MC_FRAME (2*64)
-#define MC_16X8 (2*64)
-#define MC_DMV (3*64)
-
-/* picture structure */
-#define TOP_FIELD     1
-#define BOTTOM_FIELD  2
-#define FRAME_PICTURE 3
-
-/* picture coding type (mpeg2 header) */
-#define I_TYPE 1
-#define P_TYPE 2
-#define B_TYPE 3
-#define D_TYPE 4
-
-typedef struct {
-   uint8_t modes;
-   uint8_t len;
-} MBtab;
-
-typedef struct {
-   uint8_t delta;
-   uint8_t len;
-} MVtab;
-
-typedef struct {
-   int8_t dmv;
-   uint8_t len;
-} DMVtab;
-
-typedef struct {
-   uint8_t cbp;
-   uint8_t len;
-} CBPtab;
-
-typedef struct {
-   uint8_t size;
-   uint8_t len;
-} DCtab;
-
-typedef struct {
+struct dct_coeff
+{
+   uint8_t length;
    uint8_t run;
-   uint8_t level;
-   uint8_t len;
-} DCTtab;
-
-typedef struct {
-   uint8_t mba;
-   uint8_t len;
-} MBAtab;
-
-#define INTRA MACROBLOCK_INTRA
-#define QUANT MACROBLOCK_QUANT
-#define MC MACROBLOCK_MOTION_FORWARD
-#define CODED MACROBLOCK_PATTERN
-#define FWD MACROBLOCK_MOTION_FORWARD
-#define BWD MACROBLOCK_MOTION_BACKWARD
-#define INTER MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD
-
-static const MBtab MB_I [] = {
-   {INTRA|QUANT, 2}, {INTRA, 1}
+   int16_t level;
 };
 
-static const MBtab MB_P [] = {
-   {INTRA|QUANT, 6}, {CODED|QUANT, 5}, {MC|CODED|QUANT, 5}, {INTRA,    5},
-   {MC,          3}, {MC,          3}, {MC,             3}, {MC,       3},
-   {CODED,       2}, {CODED,       2}, {CODED,          2}, {CODED,    2},
-   {CODED,       2}, {CODED,       2}, {CODED,          2}, {CODED,    2},
-   {MC|CODED,    1}, {MC|CODED,    1}, {MC|CODED,       1}, {MC|CODED, 1},
-   {MC|CODED,    1}, {MC|CODED,    1}, {MC|CODED,       1}, {MC|CODED, 1},
-   {MC|CODED,    1}, {MC|CODED,    1}, {MC|CODED,       1}, {MC|CODED, 1},
-   {MC|CODED,    1}, {MC|CODED,    1}, {MC|CODED,       1}, {MC|CODED, 1}
-};
-
-static const MBtab MB_B [] = {
-   {0,                 0}, {INTRA|QUANT,       6},
-   {BWD|CODED|QUANT,   6}, {FWD|CODED|QUANT,   6},
-   {INTER|CODED|QUANT, 5}, {INTER|CODED|QUANT, 5},
-                                     {INTRA,       5}, {INTRA,       5},
-   {FWD,         4}, {FWD,         4}, {FWD,         4}, {FWD,         4},
-   {FWD|CODED,   4}, {FWD|CODED,   4}, {FWD|CODED,   4}, {FWD|CODED,   4},
-   {BWD,         3}, {BWD,         3}, {BWD,         3}, {BWD,         3},
-   {BWD,         3}, {BWD,         3}, {BWD,         3}, {BWD,         3},
-   {BWD|CODED,   3}, {BWD|CODED,   3}, {BWD|CODED,   3}, {BWD|CODED,   3},
-   {BWD|CODED,   3}, {BWD|CODED,   3}, {BWD|CODED,   3}, {BWD|CODED,   3},
-   {INTER,       2}, {INTER,       2}, {INTER,       2}, {INTER,       2},
-   {INTER,       2}, {INTER,       2}, {INTER,       2}, {INTER,       2},
-   {INTER,       2}, {INTER,       2}, {INTER,       2}, {INTER,       2},
-   {INTER,       2}, {INTER,       2}, {INTER,       2}, {INTER,       2},
-   {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2},
-   {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2},
-   {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2},
-   {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}
-};
-
-#undef INTRA
-#undef QUANT
-#undef MC
-#undef CODED
-#undef FWD
-#undef BWD
-#undef INTER
-
-static const MVtab MV_4 [] = {
-   { 3, 6}, { 2, 4}, { 1, 3}, { 1, 3}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}
-};
-
-static const MVtab MV_10 [] = {
-   { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10},
-   { 0,10}, { 0,10}, { 0,10}, { 0,10}, {15,10}, {14,10}, {13,10}, {12,10},
-   {11,10}, {10,10}, { 9, 9}, { 9, 9}, { 8, 9}, { 8, 9}, { 7, 9}, { 7, 9},
-   { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7},
-   { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7},
-   { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}
-};
-
-static const DMVtab DMV_2 [] = {
-   { 0, 1}, { 0, 1}, { 1, 2}, {-1, 2}
-};
-
-static const CBPtab CBP_7 [] = {
-   {0x22, 7}, {0x12, 7}, {0x0a, 7}, {0x06, 7},
-   {0x21, 7}, {0x11, 7}, {0x09, 7}, {0x05, 7},
-   {0x3f, 6}, {0x3f, 6}, {0x03, 6}, {0x03, 6},
-   {0x24, 6}, {0x24, 6}, {0x18, 6}, {0x18, 6},
-   {0x3e, 5}, {0x3e, 5}, {0x3e, 5}, {0x3e, 5},
-   {0x02, 5}, {0x02, 5}, {0x02, 5}, {0x02, 5},
-   {0x3d, 5}, {0x3d, 5}, {0x3d, 5}, {0x3d, 5},
-   {0x01, 5}, {0x01, 5}, {0x01, 5}, {0x01, 5},
-   {0x38, 5}, {0x38, 5}, {0x38, 5}, {0x38, 5},
-   {0x34, 5}, {0x34, 5}, {0x34, 5}, {0x34, 5},
-   {0x2c, 5}, {0x2c, 5}, {0x2c, 5}, {0x2c, 5},
-   {0x1c, 5}, {0x1c, 5}, {0x1c, 5}, {0x1c, 5},
-   {0x28, 5}, {0x28, 5}, {0x28, 5}, {0x28, 5},
-   {0x14, 5}, {0x14, 5}, {0x14, 5}, {0x14, 5},
-   {0x30, 5}, {0x30, 5}, {0x30, 5}, {0x30, 5},
-   {0x0c, 5}, {0x0c, 5}, {0x0c, 5}, {0x0c, 5},
-   {0x20, 4}, {0x20, 4}, {0x20, 4}, {0x20, 4},
-   {0x20, 4}, {0x20, 4}, {0x20, 4}, {0x20, 4},
-   {0x10, 4}, {0x10, 4}, {0x10, 4}, {0x10, 4},
-   {0x10, 4}, {0x10, 4}, {0x10, 4}, {0x10, 4},
-   {0x08, 4}, {0x08, 4}, {0x08, 4}, {0x08, 4},
-   {0x08, 4}, {0x08, 4}, {0x08, 4}, {0x08, 4},
-   {0x04, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4},
-   {0x04, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4},
-   {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3},
-   {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3},
-   {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3},
-   {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3}
-};
-
-static const CBPtab CBP_9 [] = {
-   {0,    0}, {0x00, 9}, {0x27, 9}, {0x1b, 9},
-   {0x3b, 9}, {0x37, 9}, {0x2f, 9}, {0x1f, 9},
-   {0x3a, 8}, {0x3a, 8}, {0x36, 8}, {0x36, 8},
-   {0x2e, 8}, {0x2e, 8}, {0x1e, 8}, {0x1e, 8},
-   {0x39, 8}, {0x39, 8}, {0x35, 8}, {0x35, 8},
-   {0x2d, 8}, {0x2d, 8}, {0x1d, 8}, {0x1d, 8},
-   {0x26, 8}, {0x26, 8}, {0x1a, 8}, {0x1a, 8},
-   {0x25, 8}, {0x25, 8}, {0x19, 8}, {0x19, 8},
-   {0x2b, 8}, {0x2b, 8}, {0x17, 8}, {0x17, 8},
-   {0x33, 8}, {0x33, 8}, {0x0f, 8}, {0x0f, 8},
-   {0x2a, 8}, {0x2a, 8}, {0x16, 8}, {0x16, 8},
-   {0x32, 8}, {0x32, 8}, {0x0e, 8}, {0x0e, 8},
-   {0x29, 8}, {0x29, 8}, {0x15, 8}, {0x15, 8},
-   {0x31, 8}, {0x31, 8}, {0x0d, 8}, {0x0d, 8},
-   {0x23, 8}, {0x23, 8}, {0x13, 8}, {0x13, 8},
-   {0x0b, 8}, {0x0b, 8}, {0x07, 8}, {0x07, 8}
-};
-
-static const DCtab DC_lum_5 [] = {
-   {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2},
-   {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2},
-   {0, 3}, {0, 3}, {0, 3}, {0, 3}, {3, 3}, {3, 3}, {3, 3}, {3, 3},
-   {4, 3}, {4, 3}, {4, 3}, {4, 3}, {5, 4}, {5, 4}, {6, 5}
-};
-
-static const DCtab DC_chrom_5 [] = {
-   {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2},
-   {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2},
-   {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2},
-   {3, 3}, {3, 3}, {3, 3}, {3, 3}, {4, 4}, {4, 4}, {5, 5}
-};
-
-static const DCtab DC_long [] = {
-   {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, { 6, 5}, { 6, 5},
-   {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, { 6, 5}, { 6, 5},
-   {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, { 7, 6}, { 7, 6},
-   {8, 7}, {8, 7}, {8, 7}, {8, 7}, {9, 8}, {9, 8}, {10, 9}, {11, 9}
-};
-
-static const DCTtab DCT_16 [] = {
-   {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0},
-   {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0},
-   {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0},
-   {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0},
-   {  2,18, 0}, {  2,17, 0}, {  2,16, 0}, {  2,15, 0},
-   {  7, 3, 0}, { 17, 2, 0}, { 16, 2, 0}, { 15, 2, 0},
-   { 14, 2, 0}, { 13, 2, 0}, { 12, 2, 0}, { 32, 1, 0},
-   { 31, 1, 0}, { 30, 1, 0}, { 29, 1, 0}, { 28, 1, 0}
-};
-
-static const DCTtab DCT_15 [] = {
-   {  1,40,15}, {  1,39,15}, {  1,38,15}, {  1,37,15},
-   {  1,36,15}, {  1,35,15}, {  1,34,15}, {  1,33,15},
-   {  1,32,15}, {  2,14,15}, {  2,13,15}, {  2,12,15},
-   {  2,11,15}, {  2,10,15}, {  2, 9,15}, {  2, 8,15},
-   {  1,31,14}, {  1,31,14}, {  1,30,14}, {  1,30,14},
-   {  1,29,14}, {  1,29,14}, {  1,28,14}, {  1,28,14},
-   {  1,27,14}, {  1,27,14}, {  1,26,14}, {  1,26,14},
-   {  1,25,14}, {  1,25,14}, {  1,24,14}, {  1,24,14},
-   {  1,23,14}, {  1,23,14}, {  1,22,14}, {  1,22,14},
-   {  1,21,14}, {  1,21,14}, {  1,20,14}, {  1,20,14},
-   {  1,19,14}, {  1,19,14}, {  1,18,14}, {  1,18,14},
-   {  1,17,14}, {  1,17,14}, {  1,16,14}, {  1,16,14}
-};
-
-static const DCTtab DCT_13 [] = {
-   { 11, 2,13}, { 10, 2,13}, {  6, 3,13}, {  4, 4,13},
-   {  3, 5,13}, {  2, 7,13}, {  2, 6,13}, {  1,15,13},
-   {  1,14,13}, {  1,13,13}, {  1,12,13}, { 27, 1,13},
-   { 26, 1,13}, { 25, 1,13}, { 24, 1,13}, { 23, 1,13},
-   {  1,11,12}, {  1,11,12}, {  9, 2,12}, {  9, 2,12},
-   {  5, 3,12}, {  5, 3,12}, {  1,10,12}, {  1,10,12},
-   {  3, 4,12}, {  3, 4,12}, {  8, 2,12}, {  8, 2,12},
-   { 22, 1,12}, { 22, 1,12}, { 21, 1,12}, { 21, 1,12},
-   {  1, 9,12}, {  1, 9,12}, { 20, 1,12}, { 20, 1,12},
-   { 19, 1,12}, { 19, 1,12}, {  2, 5,12}, {  2, 5,12},
-   {  4, 3,12}, {  4, 3,12}, {  1, 8,12}, {  1, 8,12},
-   {  7, 2,12}, {  7, 2,12}, { 18, 1,12}, { 18, 1,12}
-};
-
-static const DCTtab DCT_B14_10 [] = {
-   { 17, 1,10}, {  6, 2,10}, {  1, 7,10}, {  3, 3,10},
-   {  2, 4,10}, { 16, 1,10}, { 15, 1,10}, {  5, 2,10}
-};
-
-static const DCTtab DCT_B14_8 [] = {
-   { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6},
-   {  3, 2, 7}, {  3, 2, 7}, { 10, 1, 7}, { 10, 1, 7},
-   {  1, 4, 7}, {  1, 4, 7}, {  9, 1, 7}, {  9, 1, 7},
-   {  8, 1, 6}, {  8, 1, 6}, {  8, 1, 6}, {  8, 1, 6},
-   {  7, 1, 6}, {  7, 1, 6}, {  7, 1, 6}, {  7, 1, 6},
-   {  2, 2, 6}, {  2, 2, 6}, {  2, 2, 6}, {  2, 2, 6},
-   {  6, 1, 6}, {  6, 1, 6}, {  6, 1, 6}, {  6, 1, 6},
-   { 14, 1, 8}, {  1, 6, 8}, { 13, 1, 8}, { 12, 1, 8},
-   {  4, 2, 8}, {  2, 3, 8}, {  1, 5, 8}, { 11, 1, 8}
-};
-
-static const DCTtab DCT_B14AC_5 [] = {
-                {  1, 3, 5}, {  5, 1, 5}, {  4, 1, 5},
-   {  1, 2, 4}, {  1, 2, 4}, {  3, 1, 4}, {  3, 1, 4},
-   {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
-   {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, {129, 0, 2},
-   {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, {129, 0, 2},
-   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}
-};
-
-static const DCTtab DCT_B14DC_5 [] = {
-                {  1, 3, 5}, {  5, 1, 5}, {  4, 1, 5},
-   {  1, 2, 4}, {  1, 2, 4}, {  3, 1, 4}, {  3, 1, 4},
-   {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
-   {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1},
-   {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1},
-   {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1},
-   {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1}
-};
-
-static const DCTtab DCT_B15_10 [] = {
-   {  6, 2, 9}, {  6, 2, 9}, { 15, 1, 9}, { 15, 1, 9},
-   {  3, 4,10}, { 17, 1,10}, { 16, 1, 9}, { 16, 1, 9}
-};
-
-static const DCTtab DCT_B15_8 [] = {
-   { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6},
-   {  8, 1, 7}, {  8, 1, 7}, {  9, 1, 7}, {  9, 1, 7},
-   {  7, 1, 7}, {  7, 1, 7}, {  3, 2, 7}, {  3, 2, 7},
-   {  1, 7, 6}, {  1, 7, 6}, {  1, 7, 6}, {  1, 7, 6},
-   {  1, 6, 6}, {  1, 6, 6}, {  1, 6, 6}, {  1, 6, 6},
-   {  5, 1, 6}, {  5, 1, 6}, {  5, 1, 6}, {  5, 1, 6},
-   {  6, 1, 6}, {  6, 1, 6}, {  6, 1, 6}, {  6, 1, 6},
-   {  2, 5, 8}, { 12, 1, 8}, {  1,11, 8}, {  1,10, 8},
-   { 14, 1, 8}, { 13, 1, 8}, {  4, 2, 8}, {  2, 4, 8},
-   {  3, 1, 5}, {  3, 1, 5}, {  3, 1, 5}, {  3, 1, 5},
-   {  3, 1, 5}, {  3, 1, 5}, {  3, 1, 5}, {  3, 1, 5},
-   {  2, 2, 5}, {  2, 2, 5}, {  2, 2, 5}, {  2, 2, 5},
-   {  2, 2, 5}, {  2, 2, 5}, {  2, 2, 5}, {  2, 2, 5},
-   {  4, 1, 5}, {  4, 1, 5}, {  4, 1, 5}, {  4, 1, 5},
-   {  4, 1, 5}, {  4, 1, 5}, {  4, 1, 5}, {  4, 1, 5},
-   {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
-   {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
-   {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
-   {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
-   {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
-   {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
-   {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
-   {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
-   {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4},
-   {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4},
-   {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4},
-   {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4},
-   {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4},
-   {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4},
-   {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4},
-   {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4},
-   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-   {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
-   {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
-   {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
-   {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
-   {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
-   {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
-   {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
-   {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
-   {  1, 4, 5}, {  1, 4, 5}, {  1, 4, 5}, {  1, 4, 5},
-   {  1, 4, 5}, {  1, 4, 5}, {  1, 4, 5}, {  1, 4, 5},
-   {  1, 5, 5}, {  1, 5, 5}, {  1, 5, 5}, {  1, 5, 5},
-   {  1, 5, 5}, {  1, 5, 5}, {  1, 5, 5}, {  1, 5, 5},
-   { 10, 1, 7}, { 10, 1, 7}, {  2, 3, 7}, {  2, 3, 7},
-   { 11, 1, 7}, { 11, 1, 7}, {  1, 8, 7}, {  1, 8, 7},
-   {  1, 9, 7}, {  1, 9, 7}, {  1,12, 8}, {  1,13, 8},
-   {  3, 3, 8}, {  5, 2, 8}, {  1,14, 8}, {  1,15, 8}
-};
-
-static const MBAtab MBA_5 [] = {
-                   {6, 5}, {5, 5}, {4, 4}, {4, 4}, {3, 4}, {3, 4},
-   {2, 3}, {2, 3}, {2, 3}, {2, 3}, {1, 3}, {1, 3}, {1, 3}, {1, 3},
-   {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1},
-   {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}
-};
-
-static const MBAtab MBA_11 [] = {
-   {32, 11}, {31, 11}, {30, 11}, {29, 11},
-   {28, 11}, {27, 11}, {26, 11}, {25, 11},
-   {24, 11}, {23, 11}, {22, 11}, {21, 11},
-   {20, 10}, {20, 10}, {19, 10}, {19, 10},
-   {18, 10}, {18, 10}, {17, 10}, {17, 10},
-   {16, 10}, {16, 10}, {15, 10}, {15, 10},
-   {14,  8}, {14,  8}, {14,  8}, {14,  8},
-   {14,  8}, {14,  8}, {14,  8}, {14,  8},
-   {13,  8}, {13,  8}, {13,  8}, {13,  8},
-   {13,  8}, {13,  8}, {13,  8}, {13,  8},
-   {12,  8}, {12,  8}, {12,  8}, {12,  8},
-   {12,  8}, {12,  8}, {12,  8}, {12,  8},
-   {11,  8}, {11,  8}, {11,  8}, {11,  8},
-   {11,  8}, {11,  8}, {11,  8}, {11,  8},
-   {10,  8}, {10,  8}, {10,  8}, {10,  8},
-   {10,  8}, {10,  8}, {10,  8}, {10,  8},
-   { 9,  8}, { 9,  8}, { 9,  8}, { 9,  8},
-   { 9,  8}, { 9,  8}, { 9,  8}, { 9,  8},
-   { 8,  7}, { 8,  7}, { 8,  7}, { 8,  7},
-   { 8,  7}, { 8,  7}, { 8,  7}, { 8,  7},
-   { 8,  7}, { 8,  7}, { 8,  7}, { 8,  7},
-   { 8,  7}, { 8,  7}, { 8,  7}, { 8,  7},
-   { 7,  7}, { 7,  7}, { 7,  7}, { 7,  7},
-   { 7,  7}, { 7,  7}, { 7,  7}, { 7,  7},
-   { 7,  7}, { 7,  7}, { 7,  7}, { 7,  7},
-   { 7,  7}, { 7,  7}, { 7,  7}, { 7,  7}
-};
-
-static const int non_linear_quantizer_scale[] = {
-   0,  1,  2,  3,  4,  5,   6,   7,
-   8, 10, 12, 14, 16, 18,  20,  22,
-   24, 28, 32, 36, 40, 44,  48,  52,
-   56, 64, 72, 80, 88, 96, 104, 112
-};
-
-static INLINE int
-get_macroblock_modes(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture)
+struct dct_coeff_compressed
 {
-   int macroblock_modes;
-   const MBtab * tab;
+   uint32_t bitcode;
+   struct dct_coeff coeff;
+};
 
-   switch (picture->picture_coding_type) {
-   case I_TYPE:
+/* coding table as found in the spec annex B.5 table B-1 */
+static const struct vl_vlc_compressed macroblock_address_increment[] = {
+   { 0x8000, { 1, 1 } },
+   { 0x6000, { 3, 2 } },
+   { 0x4000, { 3, 3 } },
+   { 0x3000, { 4, 4 } },
+   { 0x2000, { 4, 5 } },
+   { 0x1800, { 5, 6 } },
+   { 0x1000, { 5, 7 } },
+   { 0x0e00, { 7, 8 } },
+   { 0x0c00, { 7, 9 } },
+   { 0x0b00, { 8, 10 } },
+   { 0x0a00, { 8, 11 } },
+   { 0x0900, { 8, 12 } },
+   { 0x0800, { 8, 13 } },
+   { 0x0700, { 8, 14 } },
+   { 0x0600, { 8, 15 } },
+   { 0x05c0, { 10, 16 } },
+   { 0x0580, { 10, 17 } },
+   { 0x0540, { 10, 18 } },
+   { 0x0500, { 10, 19 } },
+   { 0x04c0, { 10, 20 } },
+   { 0x0480, { 10, 21 } },
+   { 0x0460, { 11, 22 } },
+   { 0x0440, { 11, 23 } },
+   { 0x0420, { 11, 24 } },
+   { 0x0400, { 11, 25 } },
+   { 0x03e0, { 11, 26 } },
+   { 0x03c0, { 11, 27 } },
+   { 0x03a0, { 11, 28 } },
+   { 0x0380, { 11, 29 } },
+   { 0x0360, { 11, 30 } },
+   { 0x0340, { 11, 31 } },
+   { 0x0320, { 11, 32 } },
+   { 0x0300, { 11, 33 } }
+};
 
-      tab = MB_I + vl_vlc_ubits(&bs->vlc, 1);
-      vl_vlc_dumpbits(&bs->vlc, tab->len);
-      macroblock_modes = tab->modes;
+#define Q PIPE_MPEG12_MB_TYPE_QUANT
+#define F PIPE_MPEG12_MB_TYPE_MOTION_FORWARD
+#define B PIPE_MPEG12_MB_TYPE_MOTION_BACKWARD
+#define P PIPE_MPEG12_MB_TYPE_PATTERN
+#define I PIPE_MPEG12_MB_TYPE_INTRA
 
-      return macroblock_modes;
+/* coding table as found in the spec annex B.5 table B-2 */
+static const struct vl_vlc_compressed macroblock_type_i[] = {
+   { 0x8000, { 1, I } },
+   { 0x4000, { 2, Q|I } }
+};
 
-   case P_TYPE:
+/* coding table as found in the spec annex B.5 table B-3 */
+static const struct vl_vlc_compressed macroblock_type_p[] = {
+   { 0x8000, { 1, F|P } },
+   { 0x4000, { 2, P } },
+   { 0x2000, { 3, F } },
+   { 0x1800, { 5, I } },
+   { 0x1000, { 5, Q|F|P } },
+   { 0x0800, { 5, Q|P } },
+   { 0x0400, { 6, Q|I } }
+};
 
-      tab = MB_P + vl_vlc_ubits(&bs->vlc, 5);
-      vl_vlc_dumpbits(&bs->vlc, tab->len);
-      macroblock_modes = tab->modes;
+/* coding table as found in the spec annex B.5 table B-4 */
+static const struct vl_vlc_compressed macroblock_type_b[] = {
+   { 0x8000, { 2, F|B } },
+   { 0xC000, { 2, F|B|P } },
+   { 0x4000, { 3, B } },
+   { 0x6000, { 3, B|P } },
+   { 0x2000, { 4, F } },
+   { 0x3000, { 4, F|P } },
+   { 0x1800, { 5, I } },
+   { 0x1000, { 5, Q|F|B|P } },
+   { 0x0C00, { 6, Q|F|P } },
+   { 0x0800, { 6, Q|B|P } },
+   { 0x0400, { 6, Q|I } }
+};
 
-      if (picture->picture_structure != FRAME_PICTURE) {
-         if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) {
-            macroblock_modes |= vl_vlc_ubits(&bs->vlc, 2) * MOTION_TYPE_BASE;
-            vl_vlc_dumpbits(&bs->vlc, 2);
-          }
-          return macroblock_modes;
-      } else if (picture->frame_pred_frame_dct) {
-          if (macroblock_modes & MACROBLOCK_MOTION_FORWARD)
-            macroblock_modes |= MC_FRAME;
-          return macroblock_modes;
-      } else {
-          if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) {
-            macroblock_modes |= vl_vlc_ubits(&bs->vlc, 2) * MOTION_TYPE_BASE;
-            vl_vlc_dumpbits(&bs->vlc, 2);
-          }
-          return macroblock_modes;
-      }
+#undef Q
+#undef F
+#undef B
+#undef P
+#undef I
 
-   case B_TYPE:
+/* coding table as found in the spec annex B.5 table B-9 */
+static const struct vl_vlc_compressed coded_block_pattern[] = {
+   { 0xE000, { 3, 60 } },
+   { 0xD000, { 4, 4 } },
+   { 0xC000, { 4, 8 } },
+   { 0xB000, { 4, 16 } },
+   { 0xA000, { 4, 32 } },
+   { 0x9800, { 5, 12 } },
+   { 0x9000, { 5, 48 } },
+   { 0x8800, { 5, 20 } },
+   { 0x8000, { 5, 40 } },
+   { 0x7800, { 5, 28 } },
+   { 0x7000, { 5, 44 } },
+   { 0x6800, { 5, 52 } },
+   { 0x6000, { 5, 56 } },
+   { 0x5800, { 5, 1 } },
+   { 0x5000, { 5, 61 } },
+   { 0x4800, { 5, 2 } },
+   { 0x4000, { 5, 62 } },
+   { 0x3C00, { 6, 24 } },
+   { 0x3800, { 6, 36 } },
+   { 0x3400, { 6, 3 } },
+   { 0x3000, { 6, 63 } },
+   { 0x2E00, { 7, 5 } },
+   { 0x2C00, { 7, 9 } },
+   { 0x2A00, { 7, 17 } },
+   { 0x2800, { 7, 33 } },
+   { 0x2600, { 7, 6 } },
+   { 0x2400, { 7, 10 } },
+   { 0x2200, { 7, 18 } },
+   { 0x2000, { 7, 34 } },
+   { 0x1F00, { 8, 7 } },
+   { 0x1E00, { 8, 11 } },
+   { 0x1D00, { 8, 19 } },
+   { 0x1C00, { 8, 35 } },
+   { 0x1B00, { 8, 13 } },
+   { 0x1A00, { 8, 49 } },
+   { 0x1900, { 8, 21 } },
+   { 0x1800, { 8, 41 } },
+   { 0x1700, { 8, 14 } },
+   { 0x1600, { 8, 50 } },
+   { 0x1500, { 8, 22 } },
+   { 0x1400, { 8, 42 } },
+   { 0x1300, { 8, 15 } },
+   { 0x1200, { 8, 51 } },
+   { 0x1100, { 8, 23 } },
+   { 0x1000, { 8, 43 } },
+   { 0x0F00, { 8, 25 } },
+   { 0x0E00, { 8, 37 } },
+   { 0x0D00, { 8, 26 } },
+   { 0x0C00, { 8, 38 } },
+   { 0x0B00, { 8, 29 } },
+   { 0x0A00, { 8, 45 } },
+   { 0x0900, { 8, 53 } },
+   { 0x0800, { 8, 57 } },
+   { 0x0700, { 8, 30 } },
+   { 0x0600, { 8, 46 } },
+   { 0x0500, { 8, 54 } },
+   { 0x0400, { 8, 58 } },
+   { 0x0380, { 9, 31 } },
+   { 0x0300, { 9, 47 } },
+   { 0x0280, { 9, 55 } },
+   { 0x0200, { 9, 59 } },
+   { 0x0180, { 9, 27 } },
+   { 0x0100, { 9, 39 } },
+   { 0x0080, { 9, 0 } }
+};
 
-      tab = MB_B + vl_vlc_ubits(&bs->vlc, 6);
-      vl_vlc_dumpbits(&bs->vlc, tab->len);
-      macroblock_modes = tab->modes;
+/* coding table as found in the spec annex B.5 table B-10 */
+static const struct vl_vlc_compressed motion_code[] = {
+   { 0x0320, { 11, -16 } },
+   { 0x0360, { 11, -15 } },
+   { 0x03a0, { 11, -14 } },
+   { 0x03e0, { 11, -13 } },
+   { 0x0420, { 11, -12 } },
+   { 0x0460, { 11, -11 } },
+   { 0x04c0, { 10, -10 } },
+   { 0x0540, { 10, -9 } },
+   { 0x05c0, { 10, -8 } },
+   { 0x0700, { 8, -7 } },
+   { 0x0900, { 8, -6 } },
+   { 0x0b00, { 8, -5 } },
+   { 0x0e00, { 7, -4 } },
+   { 0x1800, { 5, -3 } },
+   { 0x3000, { 4, -2 } },
+   { 0x6000, { 3, -1 } },
+   { 0x8000, { 1, 0 } },
+   { 0x4000, { 3, 1 } },
+   { 0x2000, { 4, 2 } },
+   { 0x1000, { 5, 3 } },
+   { 0x0c00, { 7, 4 } },
+   { 0x0a00, { 8, 5 } },
+   { 0x0800, { 8, 6 } },
+   { 0x0600, { 8, 7 } },
+   { 0x0580, { 10, 8 } },
+   { 0x0500, { 10, 9 } },
+   { 0x0480, { 10, 10 } },
+   { 0x0440, { 11, 11 } },
+   { 0x0400, { 11, 12 } },
+   { 0x03c0, { 11, 13 } },
+   { 0x0380, { 11, 14 } },
+   { 0x0340, { 11, 15 } },
+   { 0x0300, { 11, 16 } }
+};
 
-      if (picture->picture_structure != FRAME_PICTURE) {
-          if (! (macroblock_modes & MACROBLOCK_INTRA)) {
-            macroblock_modes |= vl_vlc_ubits(&bs->vlc, 2) * MOTION_TYPE_BASE;
-            vl_vlc_dumpbits(&bs->vlc, 2);
-          }
-      } else if (picture->frame_pred_frame_dct) {
-          macroblock_modes |= MC_FRAME;
-      } else if (!(macroblock_modes & MACROBLOCK_INTRA)) {
-          macroblock_modes |= vl_vlc_ubits(&bs->vlc, 2) * MOTION_TYPE_BASE;
-          vl_vlc_dumpbits(&bs->vlc, 2);
-      }
-      return macroblock_modes;
+/* coding table as found in the spec annex B.5 table B-11 */
+static const struct vl_vlc_compressed dmvector[] = {
+   { 0x0000, { 1, 0 } },
+   { 0x8000, { 2, 1 } },
+   { 0xc000, { 2, -1 } }
+};
 
-   case D_TYPE:
+/* coding table as found in the spec annex B.5 table B-12 */
+static const struct vl_vlc_compressed dct_dc_size_luminance[] = {
+   { 0x8000, { 3, 0 } },
+   { 0x0000, { 2, 1 } },
+   { 0x4000, { 2, 2 } },
+   { 0xA000, { 3, 3 } },
+   { 0xC000, { 3, 4 } },
+   { 0xE000, { 4, 5 } },
+   { 0xF000, { 5, 6 } },
+   { 0xF800, { 6, 7 } },
+   { 0xFC00, { 7, 8 } },
+   { 0xFE00, { 8, 9 } },
+   { 0xFF00, { 9, 10 } },
+   { 0xFF80, { 9, 11 } }
+};
 
-      vl_vlc_dumpbits(&bs->vlc, 1);
-      return MACROBLOCK_INTRA;
+/* coding table as found in the spec annex B.5 table B-13 */
+static const struct vl_vlc_compressed dct_dc_size_chrominance[] = {
+   { 0x0000, { 2, 0 } },
+   { 0x4000, { 2, 1 } },
+   { 0x8000, { 2, 2 } },
+   { 0xC000, { 3, 3 } },
+   { 0xE000, { 4, 4 } },
+   { 0xF000, { 5, 5 } },
+   { 0xF800, { 6, 6 } },
+   { 0xFC00, { 7, 7 } },
+   { 0xFE00, { 8, 8 } },
+   { 0xFF00, { 9, 9 } },
+   { 0xFF80, { 10, 10 } },
+   { 0xFFC0, { 10, 11 } }
+};
 
-   default:
-      return 0;
-   }
-}
+/* coding table as found in the spec annex B.5 table B-14 */
+static const struct dct_coeff_compressed dct_coeff_tbl_zero[] = {
+   { 0x8000, { 2, dct_End_of_Block, 0 } },
+   { 0x8000, { 1, dct_DC, 1 } },
+   { 0xC000, { 2, dct_AC, 1 } },
+   { 0x6000, { 3, 1, 1 } },
+   { 0x4000, { 4, 0, 2 } },
+   { 0x5000, { 4, 2, 1 } },
+   { 0x2800, { 5, 0, 3 } },
+   { 0x3800, { 5, 3, 1 } },
+   { 0x3000, { 5, 4, 1 } },
+   { 0x1800, { 6, 1, 2 } },
+   { 0x1C00, { 6, 5, 1 } },
+   { 0x1400, { 6, 6, 1 } },
+   { 0x1000, { 6, 7, 1 } },
+   { 0x0C00, { 7, 0, 4 } },
+   { 0x0800, { 7, 2, 2 } },
+   { 0x0E00, { 7, 8, 1 } },
+   { 0x0A00, { 7, 9, 1 } },
+   { 0x0400, { 6, dct_Escape, 0 } },
+   { 0x2600, { 8, 0, 5 } },
+   { 0x2100, { 8, 0, 6 } },
+   { 0x2500, { 8, 1, 3 } },
+   { 0x2400, { 8, 3, 2 } },
+   { 0x2700, { 8, 10, 1 } },
+   { 0x2300, { 8, 11, 1 } },
+   { 0x2200, { 8, 12, 1 } },
+   { 0x2000, { 8, 13, 1 } },
+   { 0x0280, { 10, 0, 7 } },
+   { 0x0300, { 10, 1, 4 } },
+   { 0x02C0, { 10, 2, 3 } },
+   { 0x03C0, { 10, 4, 2 } },
+   { 0x0240, { 10, 5, 2 } },
+   { 0x0380, { 10, 14, 1 } },
+   { 0x0340, { 10, 15, 1 } },
+   { 0x0200, { 10, 16, 1 } },
+   { 0x01D0, { 12, 0, 8 } },
+   { 0x0180, { 12, 0, 9 } },
+   { 0x0130, { 12, 0, 10 } },
+   { 0x0100, { 12, 0, 11 } },
+   { 0x01B0, { 12, 1, 5 } },
+   { 0x0140, { 12, 2, 4 } },
+   { 0x01C0, { 12, 3, 3 } },
+   { 0x0120, { 12, 4, 3 } },
+   { 0x01E0, { 12, 6, 2 } },
+   { 0x0150, { 12, 7, 2 } },
+   { 0x0110, { 12, 8, 2 } },
+   { 0x01F0, { 12, 17, 1 } },
+   { 0x01A0, { 12, 18, 1 } },
+   { 0x0190, { 12, 19, 1 } },
+   { 0x0170, { 12, 20, 1 } },
+   { 0x0160, { 12, 21, 1 } },
+   { 0x00D0, { 13, 0, 12 } },
+   { 0x00C8, { 13, 0, 13 } },
+   { 0x00C0, { 13, 0, 14 } },
+   { 0x00B8, { 13, 0, 15 } },
+   { 0x00B0, { 13, 1, 6 } },
+   { 0x00A8, { 13, 1, 7 } },
+   { 0x00A0, { 13, 2, 5 } },
+   { 0x0098, { 13, 3, 4 } },
+   { 0x0090, { 13, 5, 3 } },
+   { 0x0088, { 13, 9, 2 } },
+   { 0x0080, { 13, 10, 2 } },
+   { 0x00F8, { 13, 22, 1 } },
+   { 0x00F0, { 13, 23, 1 } },
+   { 0x00E8, { 13, 24, 1 } },
+   { 0x00E0, { 13, 25, 1 } },
+   { 0x00D8, { 13, 26, 1 } },
+   { 0x007C, { 14, 0, 16 } },
+   { 0x0078, { 14, 0, 17 } },
+   { 0x0074, { 14, 0, 18 } },
+   { 0x0070, { 14, 0, 19 } },
+   { 0x006C, { 14, 0, 20 } },
+   { 0x0068, { 14, 0, 21 } },
+   { 0x0064, { 14, 0, 22 } },
+   { 0x0060, { 14, 0, 23 } },
+   { 0x005C, { 14, 0, 24 } },
+   { 0x0058, { 14, 0, 25 } },
+   { 0x0054, { 14, 0, 26 } },
+   { 0x0050, { 14, 0, 27 } },
+   { 0x004C, { 14, 0, 28 } },
+   { 0x0048, { 14, 0, 29 } },
+   { 0x0044, { 14, 0, 30 } },
+   { 0x0040, { 14, 0, 31 } },
+   { 0x0030, { 15, 0, 32 } },
+   { 0x002E, { 15, 0, 33 } },
+   { 0x002C, { 15, 0, 34 } },
+   { 0x002A, { 15, 0, 35 } },
+   { 0x0028, { 15, 0, 36 } },
+   { 0x0026, { 15, 0, 37 } },
+   { 0x0024, { 15, 0, 38 } },
+   { 0x0022, { 15, 0, 39 } },
+   { 0x0020, { 15, 0, 40 } },
+   { 0x003E, { 15, 1, 8 } },
+   { 0x003C, { 15, 1, 9 } },
+   { 0x003A, { 15, 1, 10 } },
+   { 0x0038, { 15, 1, 11 } },
+   { 0x0036, { 15, 1, 12 } },
+   { 0x0034, { 15, 1, 13 } },
+   { 0x0032, { 15, 1, 14 } },
+   { 0x0013, { 16, 1, 15 } },
+   { 0x0012, { 16, 1, 16 } },
+   { 0x0011, { 16, 1, 17 } },
+   { 0x0010, { 16, 1, 18 } },
+   { 0x0014, { 16, 6, 3 } },
+   { 0x001A, { 16, 11, 2 } },
+   { 0x0019, { 16, 12, 2 } },
+   { 0x0018, { 16, 13, 2 } },
+   { 0x0017, { 16, 14, 2 } },
+   { 0x0016, { 16, 15, 2 } },
+   { 0x0015, { 16, 16, 2 } },
+   { 0x001F, { 16, 27, 1 } },
+   { 0x001E, { 16, 28, 1 } },
+   { 0x001D, { 16, 29, 1 } },
+   { 0x001C, { 16, 30, 1 } },
+   { 0x001B, { 16, 31, 1 } }
+};
 
-static INLINE enum pipe_mpeg12_dct_type
-get_dct_type(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, int macroblock_modes)
-{
-   enum pipe_mpeg12_dct_type dct_type = PIPE_MPEG12_DCT_TYPE_FRAME;
+/* coding table as found in the spec annex B.5 table B-15 */
+static const struct dct_coeff_compressed dct_coeff_tbl_one[] = {
+   { 0x6000, { 4, dct_End_of_Block, 0 } },
+   { 0x8000, { 2, 0, 1 } },
+   { 0x4000, { 3, 1, 1 } },
+   { 0xC000, { 3, 0, 2 } },
+   { 0x2800, { 5, 2, 1 } },
+   { 0x7000, { 4, 0, 3 } },
+   { 0x3800, { 5, 3, 1 } },
+   { 0x1800, { 6, 4, 1 } },
+   { 0x3000, { 5, 1, 2 } },
+   { 0x1C00, { 6, 5, 1 } },
+   { 0x0C00, { 7, 6, 1 } },
+   { 0x0800, { 7, 7, 1 } },
+   { 0xE000, { 5, 0, 4 } },
+   { 0x0E00, { 7, 2, 2 } },
+   { 0x0A00, { 7, 8, 1 } },
+   { 0xF000, { 7, 9, 1 } },
+   { 0x0400, { 6, dct_Escape, 0 } },
+   { 0xE800, { 5, 0, 5 } },
+   { 0x1400, { 6, 0, 6 } },
+   { 0xF200, { 7, 1, 3 } },
+   { 0x2600, { 8, 3, 2 } },
+   { 0xF400, { 7, 10, 1 } },
+   { 0x2100, { 8, 11, 1 } },
+   { 0x2500, { 8, 12, 1 } },
+   { 0x2400, { 8, 13, 1 } },
+   { 0x1000, { 6, 0, 7 } },
+   { 0x2700, { 8, 1, 4 } },
+   { 0xFC00, { 8, 2, 3 } },
+   { 0xFD00, { 8, 4, 2 } },
+   { 0x0200, { 9, 5, 2 } },
+   { 0x0280, { 9, 14, 1 } },
+   { 0x0380, { 9, 15, 1 } },
+   { 0x0340, { 10, 16, 1 } },
+   { 0xF600, { 7, 0, 8 } },
+   { 0xF800, { 7, 0, 9 } },
+   { 0x2300, { 8, 0, 10 } },
+   { 0x2200, { 8, 0, 11 } },
+   { 0x2000, { 8, 1, 5 } },
+   { 0x0300, { 10, 2, 4 } },
+   { 0x01C0, { 12, 3, 3 } },
+   { 0x0120, { 12, 4, 3 } },
+   { 0x01E0, { 12, 6, 2 } },
+   { 0x0150, { 12, 7, 2 } },
+   { 0x0110, { 12, 8, 2 } },
+   { 0x01F0, { 12, 17, 1 } },
+   { 0x01A0, { 12, 18, 1 } },
+   { 0x0190, { 12, 19, 1 } },
+   { 0x0170, { 12, 20, 1 } },
+   { 0x0160, { 12, 21, 1 } },
+   { 0xFA00, { 8, 0, 12 } },
+   { 0xFB00, { 8, 0, 13 } },
+   { 0xFE00, { 8, 0, 14 } },
+   { 0xFF00, { 8, 0, 15 } },
+   { 0x00B0, { 13, 1, 6 } },
+   { 0x00A8, { 13, 1, 7 } },
+   { 0x00A0, { 13, 2, 5 } },
+   { 0x0098, { 13, 3, 4 } },
+   { 0x0090, { 13, 5, 3 } },
+   { 0x0088, { 13, 9, 2 } },
+   { 0x0080, { 13, 10, 2 } },
+   { 0x00F8, { 13, 22, 1 } },
+   { 0x00F0, { 13, 23, 1 } },
+   { 0x00E8, { 13, 24, 1 } },
+   { 0x00E0, { 13, 25, 1 } },
+   { 0x00D8, { 13, 26, 1 } },
+   { 0x007C, { 14, 0, 16 } },
+   { 0x0078, { 14, 0, 17 } },
+   { 0x0074, { 14, 0, 18 } },
+   { 0x0070, { 14, 0, 19 } },
+   { 0x006C, { 14, 0, 20 } },
+   { 0x0068, { 14, 0, 21 } },
+   { 0x0064, { 14, 0, 22 } },
+   { 0x0060, { 14, 0, 23 } },
+   { 0x005C, { 14, 0, 24 } },
+   { 0x0058, { 14, 0, 25 } },
+   { 0x0054, { 14, 0, 26 } },
+   { 0x0050, { 14, 0, 27 } },
+   { 0x004C, { 14, 0, 28 } },
+   { 0x0048, { 14, 0, 29 } },
+   { 0x0044, { 14, 0, 30 } },
+   { 0x0040, { 14, 0, 31 } },
+   { 0x0030, { 15, 0, 32 } },
+   { 0x002E, { 15, 0, 33 } },
+   { 0x002C, { 15, 0, 34 } },
+   { 0x002A, { 15, 0, 35 } },
+   { 0x0028, { 15, 0, 36 } },
+   { 0x0026, { 15, 0, 37 } },
+   { 0x0024, { 15, 0, 38 } },
+   { 0x0022, { 15, 0, 39 } },
+   { 0x0020, { 15, 0, 40 } },
+   { 0x003E, { 15, 1, 8 } },
+   { 0x003C, { 15, 1, 9 } },
+   { 0x003A, { 15, 1, 10 } },
+   { 0x0038, { 15, 1, 11 } },
+   { 0x0036, { 15, 1, 12 } },
+   { 0x0034, { 15, 1, 13 } },
+   { 0x0032, { 15, 1, 14 } },
+   { 0x0013, { 16, 1, 15 } },
+   { 0x0012, { 16, 1, 16 } },
+   { 0x0011, { 16, 1, 17 } },
+   { 0x0010, { 16, 1, 18 } },
+   { 0x0014, { 16, 6, 3 } },
+   { 0x001A, { 16, 11, 2 } },
+   { 0x0019, { 16, 12, 2 } },
+   { 0x0018, { 16, 13, 2 } },
+   { 0x0017, { 16, 14, 2 } },
+   { 0x0016, { 16, 15, 2 } },
+   { 0x0015, { 16, 16, 2 } },
+   { 0x001F, { 16, 27, 1 } },
+   { 0x001E, { 16, 28, 1 } },
+   { 0x001D, { 16, 29, 1 } },
+   { 0x001C, { 16, 30, 1 } },
+   { 0x001B, { 16, 31, 1 } }
+};
 
-   if ((picture->picture_structure == FRAME_PICTURE) &&
-       (!picture->frame_pred_frame_dct) &&
-       (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN))) {
+/* q_scale_type */
+static const unsigned quant_scale[2][32] = {
+  { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30,
+    32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62 },
+  { 0, 1, 2, 3, 4,  5,  6,  7,  8, 10, 12, 14, 16, 18, 20, 22, 24,
+    28, 32, 36, 40, 44, 48, 52, 56, 64, 72, 80, 88, 96, 104, 112 }
+};
 
-      dct_type = vl_vlc_ubits(&bs->vlc, 1) ? PIPE_MPEG12_DCT_TYPE_FIELD : PIPE_MPEG12_DCT_TYPE_FRAME;
-      vl_vlc_dumpbits(&bs->vlc, 1);
-   }
-   return dct_type;
-}
-
-static INLINE int
-get_quantizer_scale(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture)
-{
-   int quantizer_scale_code;
-
-   quantizer_scale_code = vl_vlc_ubits(&bs->vlc, 5);
-   vl_vlc_dumpbits(&bs->vlc, 5);
-
-   if (picture->q_scale_type)
-      return non_linear_quantizer_scale[quantizer_scale_code];
-   else
-      return quantizer_scale_code << 1;
-}
-
-static INLINE int
-get_motion_delta(struct vl_mpg12_bs *bs, unsigned f_code)
-{
-   int delta;
-   int sign;
-   const MVtab * tab;
-
-   if (bs->vlc.buf & 0x80000000) {
-      vl_vlc_dumpbits(&bs->vlc, 1);
-      return 0;
-   } else if (bs->vlc.buf >= 0x0c000000) {
-
-      tab = MV_4 + vl_vlc_ubits(&bs->vlc, 4);
-      delta = (tab->delta << f_code) + 1;
-      bs->vlc.bits += tab->len + f_code + 1;
-      bs->vlc.buf <<= tab->len;
-
-      sign = vl_vlc_sbits(&bs->vlc, 1);
-      bs->vlc.buf <<= 1;
-
-      if (f_code)
-         delta += vl_vlc_ubits(&bs->vlc, f_code);
-      bs->vlc.buf <<= f_code;
-
-      return (delta ^ sign) - sign;
-
-   } else {
-
-      tab = MV_10 + vl_vlc_ubits(&bs->vlc, 10);
-      delta = (tab->delta << f_code) + 1;
-      bs->vlc.bits += tab->len + 1;
-      bs->vlc.buf <<= tab->len;
-
-      sign = vl_vlc_sbits(&bs->vlc, 1);
-      bs->vlc.buf <<= 1;
-
-      if (f_code) {
-         vl_vlc_needbits(&bs->vlc);
-         delta += vl_vlc_ubits(&bs->vlc, f_code);
-         vl_vlc_dumpbits(&bs->vlc, f_code);
-      }
-
-      return (delta ^ sign) - sign;
-   }
-}
-
-static INLINE int
-bound_motion_vector(int vec, unsigned f_code)
-{
-#if 1
-   unsigned int limit;
-   int sign;
-
-   limit = 16 << f_code;
-
-   if ((unsigned int)(vec + limit) < 2 * limit)
-      return vec;
-   else {
-      sign = ((int32_t)vec) >> 31;
-      return vec - ((2 * limit) ^ sign) + sign;
-   }
-#else
-   return ((int32_t)vec << (28 - f_code)) >> (28 - f_code);
-#endif
-}
-
-static INLINE int
-get_dmv(struct vl_mpg12_bs *bs)
-{
-   const DMVtab * tab;
-
-   tab = DMV_2 + vl_vlc_ubits(&bs->vlc, 2);
-   vl_vlc_dumpbits(&bs->vlc, tab->len);
-   return tab->dmv;
-}
-
-static INLINE int
-get_coded_block_pattern(struct vl_mpg12_bs *bs)
-{
-   const CBPtab * tab;
-
-   vl_vlc_needbits(&bs->vlc);
-
-   if (bs->vlc.buf >= 0x20000000) {
-
-      tab = CBP_7 + (vl_vlc_ubits(&bs->vlc, 7) - 16);
-      vl_vlc_dumpbits(&bs->vlc, tab->len);
-      return tab->cbp;
-
-   } else {
-
-      tab = CBP_9 + vl_vlc_ubits(&bs->vlc, 9);
-      vl_vlc_dumpbits(&bs->vlc, tab->len);
-      return tab->cbp;
-   }
-}
-
-static INLINE int
-get_luma_dc_dct_diff(struct vl_mpg12_bs *bs)
-{
-   const DCtab * tab;
-   int size;
-   int dc_diff;
-
-   if (bs->vlc.buf < 0xf8000000) {
-      tab = DC_lum_5 + vl_vlc_ubits(&bs->vlc, 5);
-      size = tab->size;
-      if (size) {
-         bs->vlc.bits += tab->len + size;
-         bs->vlc.buf <<= tab->len;
-         dc_diff = vl_vlc_ubits(&bs->vlc, size) - UBITS (SBITS (~bs->vlc.buf, 1), size);
-         bs->vlc.buf <<= size;
-         return dc_diff;
-      } else {
-         vl_vlc_dumpbits(&bs->vlc, 3);
-         return 0;
-      }
-   } else {
-      tab = DC_long + (vl_vlc_ubits(&bs->vlc, 9) - 0x1e0);
-      size = tab->size;
-      vl_vlc_dumpbits(&bs->vlc, tab->len);
-      vl_vlc_needbits(&bs->vlc);
-      dc_diff = vl_vlc_ubits(&bs->vlc, size) - UBITS (SBITS (~bs->vlc.buf, 1), size);
-      vl_vlc_dumpbits(&bs->vlc, size);
-      return dc_diff;
-   }
-}
-
-static INLINE int
-get_chroma_dc_dct_diff(struct vl_mpg12_bs *bs)
-{
-   const DCtab * tab;
-   int size;
-   int dc_diff;
-
-   if (bs->vlc.buf < 0xf8000000) {
-      tab = DC_chrom_5 + vl_vlc_ubits(&bs->vlc, 5);
-      size = tab->size;
-      if (size) {
-         bs->vlc.bits += tab->len + size;
-         bs->vlc.buf <<= tab->len;
-         dc_diff = vl_vlc_ubits(&bs->vlc, size) - UBITS (SBITS (~bs->vlc.buf, 1), size);
-         bs->vlc.buf <<= size;
-         return dc_diff;
-      } else {
-         vl_vlc_dumpbits(&bs->vlc, 2);
-         return 0;
-      }
-   } else {
-      tab = DC_long + (vl_vlc_ubits(&bs->vlc, 10) - 0x3e0);
-      size = tab->size;
-      vl_vlc_dumpbits(&bs->vlc, tab->len + 1);
-      vl_vlc_needbits(&bs->vlc);
-      dc_diff = vl_vlc_ubits(&bs->vlc, size) - UBITS (SBITS (~bs->vlc.buf, 1), size);
-      vl_vlc_dumpbits(&bs->vlc, size);
-      return dc_diff;
-   }
-}
+static struct vl_vlc_entry tbl_B1[1 << 11];
+static struct vl_vlc_entry tbl_B2[1 << 2];
+static struct vl_vlc_entry tbl_B3[1 << 6];
+static struct vl_vlc_entry tbl_B4[1 << 6];
+static struct vl_vlc_entry tbl_B9[1 << 9];
+static struct vl_vlc_entry tbl_B10[1 << 11];
+static struct vl_vlc_entry tbl_B11[1 << 2];
+static struct vl_vlc_entry tbl_B12[1 << 10];
+static struct vl_vlc_entry tbl_B13[1 << 10];
+static struct dct_coeff tbl_B14_DC[1 << 17];
+static struct dct_coeff tbl_B14_AC[1 << 17];
+static struct dct_coeff tbl_B15[1 << 17];
 
 static INLINE void
-get_intra_block_B14(struct vl_mpg12_bs *bs, int quantizer_scale, short *dest)
+init_dct_coeff_table(struct dct_coeff *dst, const struct dct_coeff_compressed *src,
+                     unsigned size, bool is_DC)
 {
-   int i, val;
-   const DCTtab *tab;
+   unsigned i;
 
-   i = 0;
-
-   vl_vlc_needbits(&bs->vlc);
-
-   while (1) {
-      if (bs->vlc.buf >= 0x28000000) {
-
-         tab = DCT_B14AC_5 + (vl_vlc_ubits(&bs->vlc, 5) - 5);
-
-         i += tab->run;
-         if (i >= 64)
-            break;	/* end of block */
-
-      normal_code:
-         bs->vlc.buf <<= tab->len;
-         bs->vlc.bits += tab->len + 1;
-         val = tab->level * quantizer_scale;
-
-         val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1);
-
-         dest[i] = val;
-
-         bs->vlc.buf <<= 1;
-         vl_vlc_needbits(&bs->vlc);
-
-         continue;
-
-      } else if (bs->vlc.buf >= 0x04000000) {
-
-         tab = DCT_B14_8 + (vl_vlc_ubits(&bs->vlc, 8) - 4);
-
-         i += tab->run;
-         if (i < 64)
-            goto normal_code;
-
-         /* escape code */
-
-         i += UBITS(bs->vlc.buf << 6, 6) - 64;
-         if (i >= 64)
-            break;	/* illegal, check needed to avoid buffer overflow */
-
-         vl_vlc_dumpbits(&bs->vlc, 12);
-         vl_vlc_needbits(&bs->vlc);
-         val = vl_vlc_sbits(&bs->vlc, 12) * quantizer_scale;
-
-         dest[i] = val;
-
-         vl_vlc_dumpbits(&bs->vlc, 12);
-         vl_vlc_needbits(&bs->vlc);
-
-         continue;
-
-      } else if (bs->vlc.buf >= 0x02000000) {
-         tab = DCT_B14_10 + (vl_vlc_ubits(&bs->vlc, 10) - 8);
-         i += tab->run;
-         if (i < 64)
-            goto normal_code;
-      } else if (bs->vlc.buf >= 0x00800000) {
-         tab = DCT_13 + (vl_vlc_ubits(&bs->vlc, 13) - 16);
-         i += tab->run;
-         if (i < 64)
-            goto normal_code;
-      } else if (bs->vlc.buf >= 0x00200000) {
-         tab = DCT_15 + (vl_vlc_ubits(&bs->vlc, 15) - 16);
-         i += tab->run;
-         if (i < 64)
-            goto normal_code;
-      } else {
-         tab = DCT_16 + vl_vlc_ubits(&bs->vlc, 16);
-         bs->vlc.buf <<= 16;
-         vl_vlc_getword(&bs->vlc, bs->vlc.bits + 16);
-         i += tab->run;
-         if (i < 64)
-            goto normal_code;
-      }
-      break;	/* illegal, check needed to avoid buffer overflow */
+   for (i=0;i<(1<<17);++i) {
+      dst[i].length = 0;
+      dst[i].level = 0;
+      dst[i].run = dct_End_of_Block;
    }
 
-   vl_vlc_dumpbits(&bs->vlc, 2);	/* dump end of block code */
-}
+   for(; size > 0; --size, ++src) {
+      struct dct_coeff coeff = src->coeff;
+      bool has_sign = true;
 
-static INLINE void
-get_intra_block_B15(struct vl_mpg12_bs *bs, int quantizer_scale, short *dest)
-{
-   int i, val;
-   const DCTtab * tab;
-
-   i = 0;
-
-   vl_vlc_needbits(&bs->vlc);
-
-   while (1) {
-      if (bs->vlc.buf >= 0x04000000) {
-
-         tab = DCT_B15_8 + (vl_vlc_ubits(&bs->vlc, 8) - 4);
-
-         i += tab->run;
-         if (i < 64) {
-
-         normal_code:
-            bs->vlc.buf <<= tab->len;
-            bs->vlc.bits += tab->len + 1;
-            val = tab->level * quantizer_scale;
-
-            val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1);
-
-            dest[i] = val;
-
-            bs->vlc.buf <<= 1;
-            vl_vlc_needbits(&bs->vlc);
-
+      switch (coeff.run) {
+      case dct_End_of_Block:
+         if (is_DC)
             continue;
 
-         } else {
+         has_sign = false;
+         break;
 
-            /* end of block. I commented out this code because if we */
-            /* dont exit here we will still exit at the later test :) */
+      case dct_Escape:
+         has_sign = false;
+         break;
 
-            /* if (i >= 128) break;	*/	/* end of block */
-
-            /* escape code */
-
-            i += UBITS(bs->vlc.buf << 6, 6) - 64;
-            if (i >= 64)
-                break;	/* illegal, check against buffer overflow */
-
-            vl_vlc_dumpbits(&bs->vlc, 12);
-            vl_vlc_needbits(&bs->vlc);
-            val = vl_vlc_sbits(&bs->vlc, 12) * quantizer_scale;
-
-            dest[i] = val;
-
-            vl_vlc_dumpbits(&bs->vlc, 12);
-            vl_vlc_needbits(&bs->vlc);
-
+      case dct_DC:
+         if (!is_DC)
             continue;
 
-          }
-      } else if (bs->vlc.buf >= 0x02000000) {
-         tab = DCT_B15_10 + (vl_vlc_ubits(&bs->vlc, 10) - 8);
-         i += tab->run;
-         if (i < 64)
-            goto normal_code;
-      } else if (bs->vlc.buf >= 0x00800000) {
-         tab = DCT_13 + (vl_vlc_ubits(&bs->vlc, 13) - 16);
-         i += tab->run;
-         if (i < 64)
-            goto normal_code;
-      } else if (bs->vlc.buf >= 0x00200000) {
-         tab = DCT_15 + (vl_vlc_ubits(&bs->vlc, 15) - 16);
-         i += tab->run;
-         if (i < 64)
-            goto normal_code;
-      } else {
-         tab = DCT_16 + vl_vlc_ubits(&bs->vlc, 16);
-         bs->vlc.buf <<= 16;
-         vl_vlc_getword(&bs->vlc, bs->vlc.bits + 16);
-         i += tab->run;
-         if (i < 64)
-            goto normal_code;
-      }
-      break;	/* illegal, check needed to avoid buffer overflow */
-   }
+         coeff.length += 1;
+         coeff.run = 1;
+         break;
 
-   vl_vlc_dumpbits(&bs->vlc, 4);	/* dump end of block code */
-}
+      case dct_AC:
+         if (is_DC)
+            continue;
 
-static INLINE void
-get_non_intra_block(struct vl_mpg12_bs *bs, int quantizer_scale, short *dest)
-{
-   int i, val;
-   const DCTtab *tab;
-
-   i = -1;
-
-   vl_vlc_needbits(&bs->vlc);
-   if (bs->vlc.buf >= 0x28000000) {
-      tab = DCT_B14DC_5 + (vl_vlc_ubits(&bs->vlc, 5) - 5);
-      goto entry_1;
-   } else
-      goto entry_2;
-
-   while (1) {
-      if (bs->vlc.buf >= 0x28000000) {
-
-         tab = DCT_B14AC_5 + (vl_vlc_ubits(&bs->vlc, 5) - 5);
-
-      entry_1:
-         i += tab->run;
-         if (i >= 64)
-            break;	/* end of block */
-
-      normal_code:
-         bs->vlc.buf <<= tab->len;
-         bs->vlc.bits += tab->len + 1;
-         val = ((2*tab->level+1) * quantizer_scale) >> 1;
-
-         val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1);
-
-         dest[i] = val;
-
-         bs->vlc.buf <<= 1;
-         vl_vlc_needbits(&bs->vlc);
-
-         continue;
-
-      }
-
-   entry_2:
-      if (bs->vlc.buf >= 0x04000000) {
-
-         tab = DCT_B14_8 + (vl_vlc_ubits(&bs->vlc, 8) - 4);
-
-         i += tab->run;
-         if (i < 64)
-            goto normal_code;
-
-         /* escape code */
-
-         i += UBITS(bs->vlc.buf << 6, 6) - 64;
-         if (i >= 64)
-            break;	/* illegal, check needed to avoid buffer overflow */
-
-         vl_vlc_dumpbits(&bs->vlc, 12);
-         vl_vlc_needbits(&bs->vlc);
-         val = 2 * (vl_vlc_sbits(&bs->vlc, 12) + vl_vlc_sbits(&bs->vlc, 1)) + 1;
-         val = (val * quantizer_scale) / 2;
-
-         dest[i] = val;
-
-         vl_vlc_dumpbits(&bs->vlc, 12);
-         vl_vlc_needbits(&bs->vlc);
-
-         continue;
-
-      } else if (bs->vlc.buf >= 0x02000000) {
-         tab = DCT_B14_10 + (vl_vlc_ubits(&bs->vlc, 10) - 8);
-         i += tab->run;
-         if (i < 64)
-            goto normal_code;
-      } else if (bs->vlc.buf >= 0x00800000) {
-         tab = DCT_13 + (vl_vlc_ubits(&bs->vlc, 13) - 16);
-         i += tab->run;
-         if (i < 64)
-            goto normal_code;
-      } else if (bs->vlc.buf >= 0x00200000) {
-         tab = DCT_15 + (vl_vlc_ubits(&bs->vlc, 15) - 16);
-         i += tab->run;
-         if (i < 64)
-            goto normal_code;
-      } else {
-         tab = DCT_16 + vl_vlc_ubits(&bs->vlc, 16);
-         bs->vlc.buf <<= 16;
-         vl_vlc_getword(&bs->vlc, bs->vlc.bits + 16);
-         i += tab->run;
-         if (i < 64)
-            goto normal_code;
-      }
-      break;	/* illegal, check needed to avoid buffer overflow */
-   }
-   vl_vlc_dumpbits(&bs->vlc, 2);	/* dump end of block code */
-}
-
-static INLINE void
-get_mpeg1_intra_block(struct vl_mpg12_bs *bs, int quantizer_scale, short *dest)
-{
-   int i, val;
-   const DCTtab * tab;
-
-   i = 0;
-
-   vl_vlc_needbits(&bs->vlc);
-
-   while (1) {
-      if (bs->vlc.buf >= 0x28000000) {
-
-         tab = DCT_B14AC_5 + (vl_vlc_ubits(&bs->vlc, 5) - 5);
-
-         i += tab->run;
-         if (i >= 64)
-            break;	/* end of block */
-
-      normal_code:
-         bs->vlc.buf <<= tab->len;
-         bs->vlc.bits += tab->len + 1;
-         val = tab->level * quantizer_scale;
-
-         /* oddification */
-         val = (val - 1) | 1;
-
-         /* if (bitstream_get (1)) val = -val; */
-         val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1);
-
-         dest[i] = val;
-
-         bs->vlc.buf <<= 1;
-         vl_vlc_needbits(&bs->vlc);
-
-         continue;
-
-      } else if (bs->vlc.buf >= 0x04000000) {
-
-         tab = DCT_B14_8 + (vl_vlc_ubits(&bs->vlc, 8) - 4);
-
-         i += tab->run;
-         if (i < 64)
-            goto normal_code;
-
-         /* escape code */
-
-         i += UBITS(bs->vlc.buf << 6, 6) - 64;
-         if (i >= 64)
-            break;	/* illegal, check needed to avoid buffer overflow */
-
-         vl_vlc_dumpbits(&bs->vlc, 12);
-         vl_vlc_needbits(&bs->vlc);
-         val = vl_vlc_sbits(&bs->vlc, 8);
-         if (! (val & 0x7f)) {
-            vl_vlc_dumpbits(&bs->vlc, 8);
-            val = vl_vlc_ubits(&bs->vlc, 8) + 2 * val;
-         }
-         val = val * quantizer_scale;
-
-         /* oddification */
-         val = (val + ~SBITS (val, 1)) | 1;
-
-         dest[i] = val;
-
-         vl_vlc_dumpbits(&bs->vlc, 8);
-         vl_vlc_needbits(&bs->vlc);
-
-         continue;
-
-      } else if (bs->vlc.buf >= 0x02000000) {
-         tab = DCT_B14_10 + (vl_vlc_ubits(&bs->vlc, 10) - 8);
-         i += tab->run;
-         if (i < 64)
-            goto normal_code;
-      } else if (bs->vlc.buf >= 0x00800000) {
-         tab = DCT_13 + (vl_vlc_ubits(&bs->vlc, 13) - 16);
-         i += tab->run;
-         if (i < 64)
-            goto normal_code;
-      } else if (bs->vlc.buf >= 0x00200000) {
-         tab = DCT_15 + (vl_vlc_ubits(&bs->vlc, 15) - 16);
-         i += tab->run;
-         if (i < 64)
-            goto normal_code;
-      } else {
-         tab = DCT_16 + vl_vlc_ubits(&bs->vlc, 16);
-         bs->vlc.buf <<= 16;
-         vl_vlc_getword(&bs->vlc, bs->vlc.bits + 16);
-         i += tab->run;
-         if (i < 64)
-            goto normal_code;
-      }
-      break;	/* illegal, check needed to avoid buffer overflow */
-   }
-   vl_vlc_dumpbits(&bs->vlc, 2);	/* dump end of block code */
-}
-
-static INLINE void
-get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, int quantizer_scale, short *dest)
-{
-   int i, val;
-   const DCTtab * tab;
-
-   i = -1;
-
-   vl_vlc_needbits(&bs->vlc);
-   if (bs->vlc.buf >= 0x28000000) {
-      tab = DCT_B14DC_5 + (vl_vlc_ubits(&bs->vlc, 5) - 5);
-      goto entry_1;
-   } else
-      goto entry_2;
-
-   while (1) {
-      if (bs->vlc.buf >= 0x28000000) {
-
-         tab = DCT_B14AC_5 + (vl_vlc_ubits(&bs->vlc, 5) - 5);
-
-      entry_1:
-         i += tab->run;
-         if (i >= 64)
-            break;	/* end of block */
-
-      normal_code:
-         bs->vlc.buf <<= tab->len;
-         bs->vlc.bits += tab->len + 1;
-         val = ((2*tab->level+1) * quantizer_scale) >> 1;
-
-         /* oddification */
-         val = (val - 1) | 1;
-
-         /* if (bitstream_get (1)) val = -val; */
-         val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1);
-
-         dest[i] = val;
-
-         bs->vlc.buf <<= 1;
-         vl_vlc_needbits(&bs->vlc);
-
-         continue;
-
-      }
-
-   entry_2:
-      if (bs->vlc.buf >= 0x04000000) {
-
-         tab = DCT_B14_8 + (vl_vlc_ubits(&bs->vlc, 8) - 4);
-
-         i += tab->run;
-         if (i < 64)
-            goto normal_code;
-
-         /* escape code */
-
-         i += UBITS(bs->vlc.buf << 6, 6) - 64;
-         if (i >= 64)
-            break;	/* illegal, check needed to avoid buffer overflow */
-
-         vl_vlc_dumpbits(&bs->vlc, 12);
-         vl_vlc_needbits(&bs->vlc);
-         val = vl_vlc_sbits(&bs->vlc, 8);
-         if (! (val & 0x7f)) {
-            vl_vlc_dumpbits(&bs->vlc, 8);
-            val = vl_vlc_ubits(&bs->vlc, 8) + 2 * val;
-         }
-         val = 2 * (val + SBITS (val, 1)) + 1;
-         val = (val * quantizer_scale) / 2;
-
-         /* oddification */
-         val = (val + ~SBITS (val, 1)) | 1;
-
-         dest[i] = val;
-
-         vl_vlc_dumpbits(&bs->vlc, 8);
-         vl_vlc_needbits(&bs->vlc);
-
-         continue;
-
-      } else if (bs->vlc.buf >= 0x02000000) {
-         tab = DCT_B14_10 + (vl_vlc_ubits(&bs->vlc, 10) - 8);
-         i += tab->run;
-         if (i < 64)
-            goto normal_code;
-      } else if (bs->vlc.buf >= 0x00800000) {
-         tab = DCT_13 + (vl_vlc_ubits(&bs->vlc, 13) - 16);
-         i += tab->run;
-         if (i < 64)
-            goto normal_code;
-      } else if (bs->vlc.buf >= 0x00200000) {
-         tab = DCT_15 + (vl_vlc_ubits(&bs->vlc, 15) - 16);
-         i += tab->run;
-         if (i < 64)
-            goto normal_code;
-      } else {
-         tab = DCT_16 + vl_vlc_ubits(&bs->vlc, 16);
-         bs->vlc.buf <<= 16;
-         vl_vlc_getword(&bs->vlc, bs->vlc.bits + 16);
-         i += tab->run;
-         if (i < 64)
-            goto normal_code;
-      }
-      break;	/* illegal, check needed to avoid buffer overflow */
-   }
-   vl_vlc_dumpbits(&bs->vlc, 2);	/* dump end of block code */
-}
-
-static INLINE void
-slice_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, int cc,
-                 unsigned x, unsigned y, enum pipe_mpeg12_dct_type coding, int quantizer_scale, int dc_dct_pred[3])
-{
-   short dest[64];
-
-   bs->ycbcr_stream[cc]->x = x;
-   bs->ycbcr_stream[cc]->y = y;
-   bs->ycbcr_stream[cc]->intra = PIPE_MPEG12_DCT_INTRA;
-   bs->ycbcr_stream[cc]->coding = coding;
-
-   vl_vlc_needbits(&bs->vlc);
-
-   /* Get the intra DC coefficient and inverse quantize it */
-   if (cc == 0)
-      dc_dct_pred[0] += get_luma_dc_dct_diff(bs);
-   else
-      dc_dct_pred[cc] += get_chroma_dc_dct_diff(bs);
-
-   memset(dest, 0, sizeof(int16_t) * 64);
-   dest[0] = dc_dct_pred[cc];
-   if (picture->base.profile == PIPE_VIDEO_PROFILE_MPEG1) {
-      if (picture->picture_coding_type != D_TYPE)
-          get_mpeg1_intra_block(bs, quantizer_scale, dest);
-   } else if (picture->intra_vlc_format)
-      get_intra_block_B15(bs, quantizer_scale, dest);
-   else
-      get_intra_block_B14(bs, quantizer_scale, dest);
-
-   memcpy(bs->ycbcr_buffer[cc], dest, sizeof(int16_t) * 64);
-
-   bs->num_ycbcr_blocks[cc]++;
-   bs->ycbcr_stream[cc]++;
-   bs->ycbcr_buffer[cc] += 64;
-}
-
-static INLINE void
-slice_non_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, int cc,
-                    unsigned x, unsigned y,  enum pipe_mpeg12_dct_type coding, int quantizer_scale)
-{
-   short dest[64];
-
-   bs->ycbcr_stream[cc]->x = x;
-   bs->ycbcr_stream[cc]->y = y;
-   bs->ycbcr_stream[cc]->intra = PIPE_MPEG12_DCT_DELTA;
-   bs->ycbcr_stream[cc]->coding = coding;
-
-   memset(dest, 0, sizeof(int16_t) * 64);
-   if (picture->base.profile == PIPE_VIDEO_PROFILE_MPEG1)
-      get_mpeg1_non_intra_block(bs, quantizer_scale, dest);
-   else
-      get_non_intra_block(bs, quantizer_scale, dest);
-
-   memcpy(bs->ycbcr_buffer[cc], dest, sizeof(int16_t) * 64);
-
-   bs->num_ycbcr_blocks[cc]++;
-   bs->ycbcr_stream[cc]++;
-   bs->ycbcr_buffer[cc] += 64;
-}
-
-static INLINE void
-motion_mp1(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv)
-{
-   int motion_x, motion_y;
-
-   mv->top.field_select = mv->bottom.field_select = PIPE_VIDEO_FRAME;
-
-   vl_vlc_needbits(&bs->vlc);
-   motion_x = (mv->top.x + (get_motion_delta(bs, f_code[0]) << f_code[1]));
-   motion_x = bound_motion_vector (motion_x, f_code[0] + f_code[1]);
-   mv->top.x = mv->bottom.x = motion_x;
-
-   vl_vlc_needbits(&bs->vlc);
-   motion_y = (mv->top.y + (get_motion_delta(bs, f_code[0]) << f_code[1]));
-   motion_y = bound_motion_vector (motion_y, f_code[0] + f_code[1]);
-   mv->top.y = mv->bottom.y = motion_y;
-}
-
-static INLINE void
-motion_fr_frame(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv)
-{
-   int motion_x, motion_y;
-
-   mv->top.field_select = mv->bottom.field_select = PIPE_VIDEO_FRAME;
-
-   vl_vlc_needbits(&bs->vlc);
-   motion_x = mv->top.x + get_motion_delta(bs, f_code[0]);
-   motion_x = bound_motion_vector(motion_x, f_code[0]);
-   mv->top.x = mv->bottom.x = motion_x;
-
-   vl_vlc_needbits(&bs->vlc);
-   motion_y = mv->top.y + get_motion_delta(bs, f_code[1]);
-   motion_y = bound_motion_vector(motion_y, f_code[1]);
-   mv->top.y = mv->bottom.y = motion_y;
-}
-
-static INLINE void
-motion_fr_field(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv)
-{
-   int motion_x, motion_y;
-
-   vl_vlc_needbits(&bs->vlc);
-   mv->top.field_select = vl_vlc_ubits(&bs->vlc, 1) ?
-      PIPE_VIDEO_BOTTOM_FIELD : PIPE_VIDEO_TOP_FIELD;
-   vl_vlc_dumpbits(&bs->vlc, 1);
-
-   motion_x = mv->top.x + get_motion_delta(bs, f_code[0]);
-   motion_x = bound_motion_vector (motion_x, f_code[0]);
-   mv->top.x = motion_x;
-
-   vl_vlc_needbits(&bs->vlc);
-   motion_y = (mv->top.y >> 1) + get_motion_delta(bs, f_code[1]);
-   /* motion_y = bound_motion_vector (motion_y, f_code[1]); */
-   mv->top.y = motion_y << 1;
-
-   vl_vlc_needbits(&bs->vlc);
-   mv->bottom.field_select = vl_vlc_ubits(&bs->vlc, 1) ?
-      PIPE_VIDEO_BOTTOM_FIELD : PIPE_VIDEO_TOP_FIELD;
-   vl_vlc_dumpbits(&bs->vlc, 1);
-
-   motion_x = mv->bottom.x + get_motion_delta(bs, f_code[0]);
-   motion_x = bound_motion_vector (motion_x, f_code[0]);
-   mv->bottom.x = motion_x;
-
-   vl_vlc_needbits(&bs->vlc);
-   motion_y = (mv->bottom.y >> 1) + get_motion_delta(bs, f_code[1]);
-   /* motion_y = bound_motion_vector (motion_y, f_code[1]); */
-   mv->bottom.y = motion_y << 1;
-}
-
-static INLINE void
-motion_fr_dmv(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv)
-{
-   int motion_x, motion_y;
-
-   // TODO Implement dmv
-   mv->top.field_select = mv->bottom.field_select = PIPE_VIDEO_FRAME;
-
-   vl_vlc_needbits(&bs->vlc);
-   motion_x = mv->top.x + get_motion_delta(bs, f_code[0]);
-   motion_x = bound_motion_vector(motion_x, f_code[0]);
-   mv->top.x = mv->bottom.x = motion_x;
-
-   vl_vlc_needbits(&bs->vlc);
-   motion_y = (mv->top.y >> 1) + get_motion_delta(bs, f_code[1]);
-   /* motion_y = bound_motion_vector (motion_y, f_code[1]); */
-   mv->top.y = mv->bottom.y = motion_y << 1;
-}
-
-/* like motion_frame, but parsing without actual motion compensation */
-static INLINE void
-motion_fr_conceal(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv)
-{
-   int tmp;
-
-   mv->top.field_select = mv->bottom.field_select = PIPE_VIDEO_FRAME;
-
-   vl_vlc_needbits(&bs->vlc);
-   tmp = (mv->top.x + get_motion_delta(bs, f_code[0]));
-   tmp = bound_motion_vector (tmp, f_code[0]);
-   mv->top.x = mv->bottom.x = tmp;
-
-   vl_vlc_needbits(&bs->vlc);
-   tmp = (mv->top.y + get_motion_delta(bs, f_code[1]));
-   tmp = bound_motion_vector (tmp, f_code[1]);
-   mv->top.y = mv->bottom.y = tmp;
-
-   vl_vlc_dumpbits(&bs->vlc, 1); /* remove marker_bit */
-}
-
-static INLINE void
-motion_fi_field(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv)
-{
-   int motion_x, motion_y;
-
-   vl_vlc_needbits(&bs->vlc);
-
-   // ref_field
-   //vl_vlc_ubits(&bs->vlc, 1);
-
-   // TODO field select may need to do something here for bob (weave ok)
-   mv->top.field_select = mv->bottom.field_select = PIPE_VIDEO_FRAME;
-   vl_vlc_dumpbits(&bs->vlc, 1);
-
-   motion_x = mv->top.x + get_motion_delta(bs, f_code[0]);
-   motion_x = bound_motion_vector (motion_x, f_code[0]);
-   mv->top.x = mv->bottom.x = motion_x;
-
-   vl_vlc_needbits(&bs->vlc);
-   motion_y = mv->top.y + get_motion_delta(bs, f_code[1]);
-   motion_y = bound_motion_vector (motion_y, f_code[1]);
-   mv->top.y = mv->bottom.y = motion_y;
-}
-
-static INLINE void
-motion_fi_16x8(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv)
-{
-   int motion_x, motion_y;
-
-   vl_vlc_needbits(&bs->vlc);
-
-   // ref_field
-   //vl_vlc_ubits(&bs->vlc, 1);
-
-   // TODO field select may need to do something here bob  (weave ok)
-   mv->top.field_select = PIPE_VIDEO_FRAME;
-   vl_vlc_dumpbits(&bs->vlc, 1);
-
-   motion_x = mv->top.x + get_motion_delta(bs, f_code[0]);
-   motion_x = bound_motion_vector (motion_x, f_code[0]);
-   mv->top.x = motion_x;
-
-   vl_vlc_needbits(&bs->vlc);
-   motion_y = mv->top.y + get_motion_delta(bs, f_code[1]);
-   motion_y = bound_motion_vector (motion_y, f_code[1]);
-   mv->top.y = motion_y;
-
-   vl_vlc_needbits(&bs->vlc);
-   // ref_field
-   //vl_vlc_ubits(&bs->vlc, 1);
-
-   // TODO field select may need to do something here for bob (weave ok)
-   mv->bottom.field_select = PIPE_VIDEO_FRAME;
-   vl_vlc_dumpbits(&bs->vlc, 1);
-
-   motion_x = mv->bottom.x + get_motion_delta(bs, f_code[0]);
-   motion_x = bound_motion_vector (motion_x, f_code[0]);
-   mv->bottom.x = motion_x;
-
-   vl_vlc_needbits(&bs->vlc);
-   motion_y = mv->bottom.y + get_motion_delta(bs, f_code[1]);
-   motion_y = bound_motion_vector (motion_y, f_code[1]);
-   mv->bottom.y = motion_y;
-}
-
-static INLINE void
-motion_fi_dmv(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv)
-{
-   int motion_x, motion_y;
-
-   // TODO field select may need to do something here for bob  (weave ok)
-   mv->top.field_select = mv->bottom.field_select = PIPE_VIDEO_FRAME;
-
-   vl_vlc_needbits(&bs->vlc);
-   motion_x = mv->top.x + get_motion_delta(bs, f_code[0]);
-   motion_x = bound_motion_vector (motion_x, f_code[0]);
-   mv->top.x = mv->bottom.x = motion_x;
-
-   vl_vlc_needbits(&bs->vlc);
-   motion_y = mv->top.y + get_motion_delta(bs, f_code[1]);
-   motion_y = bound_motion_vector (motion_y, f_code[1]);
-   mv->top.y = mv->bottom.y = motion_y;
-}
-
-
-static INLINE void
-motion_fi_conceal(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv)
-{
-   int tmp;
-
-   vl_vlc_needbits(&bs->vlc);
-   vl_vlc_dumpbits(&bs->vlc, 1); /* remove field_select */
-
-   tmp = (mv->top.x + get_motion_delta(bs, f_code[0]));
-   tmp = bound_motion_vector(tmp, f_code[0]);
-   mv->top.x = mv->bottom.x = tmp;
-
-   vl_vlc_needbits(&bs->vlc);
-   tmp = (mv->top.y + get_motion_delta(bs, f_code[1]));
-   tmp = bound_motion_vector(tmp, f_code[1]);
-   mv->top.y = mv->bottom.y = tmp;
-
-   vl_vlc_dumpbits(&bs->vlc, 1); /* remove marker_bit */
-}
-
-#define MOTION_CALL(routine, macroblock_modes)		\
-do {							\
-   if ((macroblock_modes) & MACROBLOCK_MOTION_FORWARD)  \
-      routine(bs, picture->f_code[0], &mv_fwd);         \
-   if ((macroblock_modes) & MACROBLOCK_MOTION_BACKWARD)	\
-      routine(bs, picture->f_code[1], &mv_bwd);         \
-} while (0)
-
-static INLINE void
-store_motionvectors(struct vl_mpg12_bs *bs, unsigned *mv_pos,
-                    struct pipe_motionvector *mv_fwd,
-                    struct pipe_motionvector *mv_bwd)
-{
-   bs->mv_stream[0][*mv_pos].top = mv_fwd->top;
-   bs->mv_stream[0][*mv_pos].bottom =
-      mv_fwd->top.field_select == PIPE_VIDEO_FRAME ?
-      mv_fwd->top : mv_fwd->bottom;
-
-   bs->mv_stream[1][*mv_pos].top = mv_bwd->top;
-   bs->mv_stream[1][*mv_pos].bottom =
-      mv_bwd->top.field_select == PIPE_VIDEO_FRAME ?
-      mv_bwd->top : mv_bwd->bottom;
-
-   (*mv_pos)++;
-}
-
-static INLINE bool
-slice_init(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture,
-           int *quantizer_scale, unsigned *x, unsigned *y, unsigned *mv_pos)
-{
-   const MBAtab * mba;
-
-   vl_vlc_need32bits(&bs->vlc);
-   while(bs->vlc.buf < 0x101 || bs->vlc.buf > 0x1AF) {
-      if(!vl_vlc_getbyte(&bs->vlc))
-         return false;
-   }
-   *y = (bs->vlc.buf & 0xFF) - 1;
-   vl_vlc_restart(&bs->vlc);
-
-   *quantizer_scale = get_quantizer_scale(bs, picture);
-
-   /* ignore intra_slice and all the extra data */
-   while (bs->vlc.buf & 0x80000000) {
-      vl_vlc_dumpbits(&bs->vlc, 9);
-      vl_vlc_needbits(&bs->vlc);
-   }
-
-   /* decode initial macroblock address increment */
-   *x = 0;
-   while (1) {
-      if (bs->vlc.buf >= 0x08000000) {
-          mba = MBA_5 + (vl_vlc_ubits(&bs->vlc, 6) - 2);
-          break;
-      } else if (bs->vlc.buf >= 0x01800000) {
-          mba = MBA_11 + (vl_vlc_ubits(&bs->vlc, 12) - 24);
-          break;
-      } else switch (vl_vlc_ubits(&bs->vlc, 12)) {
-      case 8:		/* macroblock_escape */
-          *x += 33;
-          vl_vlc_dumpbits(&bs->vlc, 11);
-          vl_vlc_needbits(&bs->vlc);
-          continue;
-      case 15:	/* macroblock_stuffing (MPEG1 only) */
-          bs->vlc.buf &= 0xfffff;
-          vl_vlc_dumpbits(&bs->vlc, 11);
-          vl_vlc_needbits(&bs->vlc);
-          continue;
-      default:	/* error */
-          return false;
-      }
-   }
-   vl_vlc_dumpbits(&bs->vlc, mba->len + 1);
-   *x += mba->mba;
-
-   while (*x >= bs->width) {
-      *x -= bs->width;
-      (*y)++;
-   }
-   if (*y > bs->height)
-      return false;
-
-   *mv_pos = *x + *y * bs->width;
-
-   return true;
-}
-
-static INLINE bool
-decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture)
-{
-   enum pipe_video_field_select default_field_select;
-   struct pipe_motionvector mv_fwd, mv_bwd;
-   enum pipe_mpeg12_dct_type dct_type;
-
-   /* predictor for DC coefficients in intra blocks */
-   int dc_dct_pred[3] = { 0, 0, 0 };
-   int quantizer_scale;
-
-   unsigned x, y, mv_pos;
-
-   switch(picture->picture_structure) {
-   case TOP_FIELD:
-      default_field_select = PIPE_VIDEO_TOP_FIELD;
-      break;
-
-   case BOTTOM_FIELD:
-      default_field_select = PIPE_VIDEO_BOTTOM_FIELD;
-      break;
-
-   default:
-      default_field_select = PIPE_VIDEO_FRAME;
-      break;
-   }
-
-   if (!slice_init(bs, picture, &quantizer_scale, &x, &y, &mv_pos))
-      return false;
-
-   mv_fwd.top.x = mv_fwd.top.y = mv_fwd.bottom.x = mv_fwd.bottom.y = 0;
-   mv_fwd.top.field_select = mv_fwd.bottom.field_select = default_field_select;
-
-   mv_bwd.top.x = mv_bwd.top.y = mv_bwd.bottom.x = mv_bwd.bottom.y = 0;
-   mv_bwd.top.field_select = mv_bwd.bottom.field_select = default_field_select;
-
-   while (1) {
-      int macroblock_modes;
-      int mba_inc;
-      const MBAtab * mba;
-
-      vl_vlc_needbits(&bs->vlc);
-
-      macroblock_modes = get_macroblock_modes(bs, picture);
-      dct_type = get_dct_type(bs, picture, macroblock_modes);
-
-      switch(macroblock_modes & (MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD)) {
-      case (MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD):
-         mv_fwd.top.weight = mv_fwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_HALF;
-         mv_bwd.top.weight = mv_bwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_HALF;
+         coeff.length += 1;
+         coeff.run = 1;
          break;
 
       default:
-         mv_fwd.top.field_select = mv_fwd.bottom.field_select = default_field_select;
-         mv_bwd.top.field_select = mv_bwd.bottom.field_select = default_field_select;
-
-         /* fall through */
-      case MACROBLOCK_MOTION_FORWARD:
-         mv_fwd.top.weight = mv_fwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MAX;
-         mv_bwd.top.weight = mv_bwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MIN;
-         break;
-
-      case MACROBLOCK_MOTION_BACKWARD:
-         mv_fwd.top.weight = mv_fwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MIN;
-         mv_bwd.top.weight = mv_bwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MAX;
+         coeff.length += 1;
+         coeff.run += 1;
          break;
       }
 
-      /* maybe integrate MACROBLOCK_QUANT test into get_macroblock_modes ? */
-      if (macroblock_modes & MACROBLOCK_QUANT)
-         quantizer_scale = get_quantizer_scale(bs, picture);
+      for(i=0; i<(1 << (17 - coeff.length)); ++i)
+         dst[src->bitcode << 1 | i] = coeff;
 
-      if (macroblock_modes & MACROBLOCK_INTRA) {
-
-         if (picture->concealment_motion_vectors) {
-            if (picture->picture_structure == FRAME_PICTURE)
-               motion_fr_conceal(bs, picture->f_code[0], &mv_fwd);
-            else
-               motion_fi_conceal(bs, picture->f_code[0], &mv_fwd);
-
-         } else {
-            mv_fwd.top.x = mv_fwd.top.y = mv_fwd.bottom.x = mv_fwd.bottom.y = 0;
-            mv_bwd.top.x = mv_bwd.top.y = mv_bwd.bottom.x = mv_bwd.bottom.y = 0;
-         }
-         mv_fwd.top.weight = mv_fwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MIN;
-         mv_bwd.top.weight = mv_bwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MIN;
-
-         // unravaled loop of 6 block(i) calls in macroblock()
-         slice_intra_DCT(bs, picture, 0, x*2+0, y*2+0, dct_type, quantizer_scale, dc_dct_pred);
-         slice_intra_DCT(bs, picture, 0, x*2+1, y*2+0, dct_type, quantizer_scale, dc_dct_pred);
-         slice_intra_DCT(bs, picture, 0, x*2+0, y*2+1, dct_type, quantizer_scale, dc_dct_pred);
-         slice_intra_DCT(bs, picture, 0, x*2+1, y*2+1, dct_type, quantizer_scale, dc_dct_pred);
-         slice_intra_DCT(bs, picture, 1, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale, dc_dct_pred);
-         slice_intra_DCT(bs, picture, 2, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale, dc_dct_pred);
-
-         if (picture->picture_coding_type == D_TYPE) {
-            vl_vlc_needbits(&bs->vlc);
-            vl_vlc_dumpbits(&bs->vlc, 1);
-         }
-
-      } else {
-         if (picture->picture_structure == FRAME_PICTURE)
-            switch (macroblock_modes & MOTION_TYPE_MASK) {
-            case MC_FRAME:
-               if (picture->base.profile == PIPE_VIDEO_PROFILE_MPEG1) {
-                  MOTION_CALL(motion_mp1, macroblock_modes);
-               } else {
-                  MOTION_CALL(motion_fr_frame, macroblock_modes);
-               }
-               break;
-
-            case MC_FIELD:
-               MOTION_CALL (motion_fr_field, macroblock_modes);
-               break;
-
-            case MC_DMV:
-               MOTION_CALL (motion_fr_dmv, MACROBLOCK_MOTION_FORWARD);
-               break;
-
-            case 0:
-               /* non-intra mb without forward mv in a P picture */
-               mv_fwd.top.x = mv_fwd.top.y = mv_fwd.bottom.x = mv_fwd.bottom.y = 0;
-               mv_bwd.top.x = mv_bwd.top.y = mv_bwd.bottom.x = mv_bwd.bottom.y = 0;
-               break;
-            }
-         else
-            switch (macroblock_modes & MOTION_TYPE_MASK) {
-            case MC_FIELD:
-               MOTION_CALL (motion_fi_field, macroblock_modes);
-               break;
-
-            case MC_16X8:
-               MOTION_CALL (motion_fi_16x8, macroblock_modes);
-               break;
-
-            case MC_DMV:
-               MOTION_CALL (motion_fi_dmv, MACROBLOCK_MOTION_FORWARD);
-               break;
-
-            case 0:
-               /* non-intra mb without forward mv in a P picture */
-               mv_fwd.top.x = mv_fwd.top.y = mv_fwd.bottom.x = mv_fwd.bottom.y = 0;
-               mv_bwd.top.x = mv_bwd.top.y = mv_bwd.bottom.x = mv_bwd.bottom.y = 0;
-               break;
-            }
-
-         if (macroblock_modes & MACROBLOCK_PATTERN) {
-            int coded_block_pattern = get_coded_block_pattern(bs);
-
-            // TODO  optimize not fully used for idct accel only mc.
-            if (coded_block_pattern & 0x20)
-               slice_non_intra_DCT(bs, picture, 0, x*2+0, y*2+0, dct_type, quantizer_scale); // cc0  luma 0
-            if (coded_block_pattern & 0x10)
-               slice_non_intra_DCT(bs, picture, 0, x*2+1, y*2+0, dct_type, quantizer_scale); // cc0 luma 1
-            if (coded_block_pattern & 0x08)
-               slice_non_intra_DCT(bs, picture, 0, x*2+0, y*2+1, dct_type, quantizer_scale); // cc0 luma 2
-            if (coded_block_pattern & 0x04)
-               slice_non_intra_DCT(bs, picture, 0, x*2+1, y*2+1, dct_type, quantizer_scale); // cc0 luma 3
-            if (coded_block_pattern & 0x2)
-               slice_non_intra_DCT(bs, picture, 1, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale); // cc1 croma
-            if (coded_block_pattern & 0x1)
-               slice_non_intra_DCT(bs, picture, 2, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale); // cc2 croma
-         }
-
-         dc_dct_pred[0] = dc_dct_pred[1] = dc_dct_pred[2] = 0;
-      }
-
-      store_motionvectors(bs, &mv_pos, &mv_fwd, &mv_bwd);
-      if (++x >= bs->width) {
-         ++y;
-         if (y >= bs->height)
-            return false;
-         x -= bs->width;
-      }
-
-      vl_vlc_needbits(&bs->vlc);
-      mba_inc = 0;
-      while (1) {
-         if (bs->vlc.buf >= 0x10000000) {
-            mba = MBA_5 + (vl_vlc_ubits(&bs->vlc, 5) - 2);
-            break;
-         } else if (bs->vlc.buf >= 0x03000000) {
-            mba = MBA_11 + (vl_vlc_ubits(&bs->vlc, 11) - 24);
-            break;
-         } else switch (vl_vlc_ubits(&bs->vlc, 11)) {
-         case 8:		/* macroblock_escape */
-            mba_inc += 33;
-            /* pass through */
-         case 15:	/* macroblock_stuffing (MPEG1 only) */
-            vl_vlc_dumpbits(&bs->vlc, 11);
-            vl_vlc_needbits(&bs->vlc);
-            continue;
-         default:	/* end of slice, or error */
-            return true;
-         }
-      }
-      vl_vlc_dumpbits(&bs->vlc, mba->len);
-      mba_inc += mba->mba;
-      if (mba_inc) {
-         //TODO  conversion to signed format signed format
-         dc_dct_pred[0] = dc_dct_pred[1] = dc_dct_pred[2] = 0;
-
-         mv_fwd.top.field_select = mv_fwd.bottom.field_select = default_field_select;
-         mv_bwd.top.field_select = mv_bwd.bottom.field_select = default_field_select;
-
-         if (picture->picture_coding_type == P_TYPE) {
-            mv_fwd.top.x = mv_fwd.top.y = mv_fwd.bottom.x = mv_fwd.bottom.y = 0;
-            mv_fwd.top.weight = mv_fwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MAX;
-         }
-
-         x += mba_inc;
-         do {
-            store_motionvectors(bs, &mv_pos, &mv_fwd, &mv_bwd);
-         } while (--mba_inc);
-      }
-      while (x >= bs->width) {
-         ++y;
-         if (y >= bs->height)
-            return false;
-         x -= bs->width;
+      if (has_sign) {
+	 coeff.level = -coeff.level;
+         for(; i<(1 << (18 - coeff.length)); ++i)
+            dst[src->bitcode << 1 | i] = coeff;
       }
    }
 }
 
-void
-vl_mpg12_bs_init(struct vl_mpg12_bs *bs, unsigned width, unsigned height)
+static INLINE void
+init_tables()
 {
+   vl_vlc_init_table(tbl_B1, Elements(tbl_B1), macroblock_address_increment, Elements(macroblock_address_increment));
+   vl_vlc_init_table(tbl_B2, Elements(tbl_B2), macroblock_type_i, Elements(macroblock_type_i));
+   vl_vlc_init_table(tbl_B3, Elements(tbl_B3), macroblock_type_p, Elements(macroblock_type_p));
+   vl_vlc_init_table(tbl_B4, Elements(tbl_B4), macroblock_type_b, Elements(macroblock_type_b));
+   vl_vlc_init_table(tbl_B9, Elements(tbl_B9), coded_block_pattern, Elements(coded_block_pattern));
+   vl_vlc_init_table(tbl_B10, Elements(tbl_B10), motion_code, Elements(motion_code));
+   vl_vlc_init_table(tbl_B11, Elements(tbl_B11), dmvector, Elements(dmvector));
+   vl_vlc_init_table(tbl_B12, Elements(tbl_B12), dct_dc_size_luminance, Elements(dct_dc_size_luminance));
+   vl_vlc_init_table(tbl_B13, Elements(tbl_B13), dct_dc_size_chrominance, Elements(dct_dc_size_chrominance));
+   init_dct_coeff_table(tbl_B14_DC, dct_coeff_tbl_zero, Elements(dct_coeff_tbl_zero), true);
+   init_dct_coeff_table(tbl_B14_AC, dct_coeff_tbl_zero, Elements(dct_coeff_tbl_zero), false);
+   init_dct_coeff_table(tbl_B15, dct_coeff_tbl_one, Elements(dct_coeff_tbl_one), false);
+}
+
+static INLINE int
+DIV2DOWN(int todiv)
+{
+   return (todiv&~1)/2;
+}
+
+static INLINE int
+DIV2UP(int todiv)
+{
+   return (todiv+1)/2;
+}
+
+static INLINE void
+motion_vector(struct vl_mpg12_bs *bs, int r, int s, int dmv, short delta[2], short dmvector[2])
+{
+   int t;
+   for (t = 0; t < 2; ++t) {
+      int motion_code;
+      int r_size = bs->desc.f_code[s][t];
+
+      vl_vlc_fillbits(&bs->vlc);
+      motion_code = vl_vlc_get_vlclbf(&bs->vlc, tbl_B10, 11);
+
+      assert(r_size >= 0);
+      if (r_size && motion_code) {
+         int residual = vl_vlc_get_uimsbf(&bs->vlc, r_size) + 1;
+         delta[t] = ((abs(motion_code) - 1) << r_size) + residual;
+         if (motion_code < 0)
+            delta[t] = -delta[t];
+      } else
+         delta[t] = motion_code;
+      if (dmv)
+         dmvector[t] = vl_vlc_get_vlclbf(&bs->vlc, tbl_B11, 2);
+   }
+}
+
+static INLINE int
+wrap(short f, int shift)
+{
+   if (f < (-16 << shift))
+      return f + (32 << shift);
+   else if (f >= 16 << shift)
+      return f - (32 << shift);
+   else
+      return f;
+}
+
+static INLINE void
+motion_vector_frame(struct vl_mpg12_bs *bs, int s, struct pipe_mpeg12_macroblock *mb)
+{
+   int dmv = mb->macroblock_modes.bits.frame_motion_type == PIPE_MPEG12_MO_TYPE_DUAL_PRIME;
+   short dmvector[2], delta[2];
+
+   if (mb->macroblock_modes.bits.frame_motion_type == PIPE_MPEG12_MO_TYPE_FIELD) {
+      mb->motion_vertical_field_select |= vl_vlc_get_uimsbf(&bs->vlc, 1) << s;
+      motion_vector(bs, 0, s, dmv, delta, dmvector);
+      mb->PMV[0][s][0] = wrap(mb->PMV[0][s][0] + delta[0], bs->desc.f_code[s][0]);
+      mb->PMV[0][s][1] = wrap(DIV2DOWN(mb->PMV[0][s][1]) + delta[1], bs->desc.f_code[s][1]) * 2;
+
+      mb->motion_vertical_field_select |= vl_vlc_get_uimsbf(&bs->vlc, 1) << (s + 2);
+      motion_vector(bs, 1, s, dmv, delta, dmvector);
+      mb->PMV[1][s][0] = wrap(mb->PMV[1][s][0] + delta[0], bs->desc.f_code[s][0]);
+      mb->PMV[1][s][1] = wrap(DIV2DOWN(mb->PMV[1][s][1]) + delta[1], bs->desc.f_code[s][1]) * 2;
+
+   } else {
+      motion_vector(bs, 0, s, dmv, delta, dmvector);
+      mb->PMV[0][s][0] = wrap(mb->PMV[0][s][0] + delta[0], bs->desc.f_code[s][0]);
+      mb->PMV[0][s][1] = wrap(mb->PMV[0][s][1] + delta[1], bs->desc.f_code[s][1]);
+   }
+}
+
+static INLINE void
+motion_vector_field(struct vl_mpg12_bs *bs, int s, struct pipe_mpeg12_macroblock *mb)
+{
+   int dmv = mb->macroblock_modes.bits.field_motion_type == PIPE_MPEG12_MO_TYPE_DUAL_PRIME;
+   short dmvector[2], delta[2];
+
+   if (mb->macroblock_modes.bits.field_motion_type == PIPE_MPEG12_MO_TYPE_16x8) {
+      mb->motion_vertical_field_select |= vl_vlc_get_uimsbf(&bs->vlc, 1) << s;
+      motion_vector(bs, 0, s, dmv, delta, dmvector);
+
+      mb->motion_vertical_field_select |= vl_vlc_get_uimsbf(&bs->vlc, 1) << (s + 2);
+      motion_vector(bs, 1, s, dmv, delta, dmvector);
+   } else {
+      if (!dmv)
+         mb->motion_vertical_field_select |= vl_vlc_get_uimsbf(&bs->vlc, 1) << s;
+      motion_vector(bs, 0, s, dmv, delta, dmvector);
+   }
+}
+
+static INLINE void
+reset_predictor(struct vl_mpg12_bs *bs) {
+   bs->pred_dc[0] = bs->pred_dc[1] = bs->pred_dc[2] = 0;
+}
+
+static INLINE void
+decode_dct(struct vl_mpg12_bs *bs, struct pipe_mpeg12_macroblock *mb, int scale)
+{
+   static const unsigned blk2cc[] = { 0, 0, 0, 0, 1, 2 };
+   static const struct vl_vlc_entry *blk2dcsize[] = {
+      tbl_B12, tbl_B12, tbl_B12, tbl_B12, tbl_B13, tbl_B13
+   };
+
+   bool intra = mb->macroblock_type & PIPE_MPEG12_MB_TYPE_INTRA;
+   const struct dct_coeff *table = intra ? bs->intra_dct_tbl : tbl_B14_AC;
+   const struct dct_coeff *entry;
+   int i, cbp, blk = 0;
+   short *dst = mb->blocks;
+
+   vl_vlc_fillbits(&bs->vlc);
+   mb->coded_block_pattern = cbp = intra ? 0x3F : vl_vlc_get_vlclbf(&bs->vlc, tbl_B9, 9);
+
+   goto entry;
+
+   while(1) {
+      vl_vlc_eatbits(&bs->vlc, entry->length);
+      if (entry->run == dct_End_of_Block) {
+
+         dst += 64;
+         cbp <<= 1;
+         cbp &= 0x3F;
+         blk++;
+
+entry:
+         if (!cbp)
+            break;
+
+         while(!(cbp & 0x20)) {
+            cbp <<= 1;
+            blk++;
+         }
+
+         vl_vlc_fillbits(&bs->vlc);
+
+         if (intra) {
+            unsigned cc = blk2cc[blk];
+            unsigned size = vl_vlc_get_vlclbf(&bs->vlc, blk2dcsize[blk], 10);
+
+            if (size) {
+               int dct_diff = vl_vlc_get_uimsbf(&bs->vlc, size);
+               int half_range = 1 << (size - 1);
+               if (dct_diff < half_range)
+                  dct_diff = (dct_diff + 1) - (2 * half_range);
+               bs->pred_dc[cc] += dct_diff;
+            }
+
+            dst[0] = bs->pred_dc[cc];
+            i = 0;
+
+         } else {
+            entry = tbl_B14_DC + vl_vlc_peekbits(&bs->vlc, 17);
+            i = -1;
+            continue;
+         }
+
+      } else if (entry->run == dct_Escape) {
+         i += vl_vlc_get_uimsbf(&bs->vlc, 6) + 1;
+         if (i > 64)
+            break;
+
+         dst[i] = vl_vlc_get_simsbf(&bs->vlc, 12) * scale;
+
+      } else {
+         i += entry->run;
+         if (i > 64)
+            break;
+
+         dst[i] = entry->level * scale;
+      }
+
+      vl_vlc_fillbits(&bs->vlc);
+      entry = table + vl_vlc_peekbits(&bs->vlc, 17);
+   }
+}
+
+static INLINE bool
+decode_slice(struct vl_mpg12_bs *bs)
+{
+   struct pipe_mpeg12_macroblock mb;
+   short dct_blocks[64*6];
+   unsigned dct_scale;
+   signed x = -1;
+
+   memset(&mb, 0, sizeof(mb));
+   mb.base.codec = PIPE_VIDEO_CODEC_MPEG12;
+   mb.y = vl_vlc_get_uimsbf(&bs->vlc, 8) - 1;
+   mb.blocks = dct_blocks;
+
+   reset_predictor(bs);
+   dct_scale = quant_scale[bs->desc.q_scale_type][vl_vlc_get_uimsbf(&bs->vlc, 5)];
+
+   if (vl_vlc_get_uimsbf(&bs->vlc, 1))
+      while (vl_vlc_get_uimsbf(&bs->vlc, 9) & 1)
+         vl_vlc_fillbits(&bs->vlc);
+
+   do {
+      int inc = 0;
+
+      vl_vlc_fillbits(&bs->vlc);
+
+      while (vl_vlc_peekbits(&bs->vlc, 11) == 15) {
+         vl_vlc_eatbits(&bs->vlc, 11);
+         vl_vlc_fillbits(&bs->vlc);
+      }
+
+      while (vl_vlc_peekbits(&bs->vlc, 11) == 8) {
+         vl_vlc_eatbits(&bs->vlc, 11);
+         vl_vlc_fillbits(&bs->vlc);
+         inc += 33;
+      }
+      inc += vl_vlc_get_vlclbf(&bs->vlc, tbl_B1, 11);
+      if (x != -1) {
+         mb.num_skipped_macroblocks = inc - 1;
+         bs->decoder->decode_macroblock(bs->decoder, &mb.base, 1);
+      }
+      mb.x = x += inc;
+
+      switch (bs->desc.picture_coding_type) {
+      case PIPE_MPEG12_PICTURE_CODING_TYPE_I:
+         mb.macroblock_type = vl_vlc_get_vlclbf(&bs->vlc, tbl_B2, 2);
+         break;
+
+      case PIPE_MPEG12_PICTURE_CODING_TYPE_P:
+         mb.macroblock_type = vl_vlc_get_vlclbf(&bs->vlc, tbl_B3, 6);
+         break;
+
+      case PIPE_MPEG12_PICTURE_CODING_TYPE_B:
+         mb.macroblock_type = vl_vlc_get_vlclbf(&bs->vlc, tbl_B4, 6);
+         break;
+
+      default:
+         mb.macroblock_type = 0;
+         /* dumb gcc */
+         assert(0);
+      }
+
+      mb.macroblock_modes.value = 0;
+      if (mb.macroblock_type & (PIPE_MPEG12_MB_TYPE_MOTION_FORWARD | PIPE_MPEG12_MB_TYPE_MOTION_BACKWARD)) {
+         if (bs->desc.picture_structure == PIPE_MPEG12_PICTURE_STRUCTURE_FRAME) {
+            if (bs->desc.frame_pred_frame_dct == 0)
+               mb.macroblock_modes.bits.frame_motion_type = vl_vlc_get_uimsbf(&bs->vlc, 2);
+            else
+               mb.macroblock_modes.bits.frame_motion_type = 2;
+         } else
+            mb.macroblock_modes.bits.field_motion_type = vl_vlc_get_uimsbf(&bs->vlc, 2);
+
+      } else if ((mb.macroblock_type & PIPE_MPEG12_MB_TYPE_INTRA) && bs->desc.concealment_motion_vectors) {
+         if (bs->desc.picture_structure == PIPE_MPEG12_PICTURE_STRUCTURE_FRAME)
+            mb.macroblock_modes.bits.frame_motion_type = 2;
+         else
+            mb.macroblock_modes.bits.field_motion_type = 1;
+      }
+
+      if (bs->desc.picture_structure == PIPE_MPEG12_PICTURE_STRUCTURE_FRAME &&
+          bs->desc.frame_pred_frame_dct == 0 &&
+          mb.macroblock_type & (PIPE_MPEG12_MB_TYPE_INTRA | PIPE_MPEG12_MB_TYPE_PATTERN))
+         mb.macroblock_modes.bits.dct_type = vl_vlc_get_uimsbf(&bs->vlc, 1);
+
+      if (mb.macroblock_type & PIPE_MPEG12_MB_TYPE_QUANT)
+         dct_scale = quant_scale[bs->desc.q_scale_type][vl_vlc_get_uimsbf(&bs->vlc, 5)];
+
+      if (inc > 1 && bs->desc.picture_coding_type == PIPE_MPEG12_PICTURE_CODING_TYPE_P)
+         memset(mb.PMV, 0, sizeof(mb.PMV));
+
+      mb.motion_vertical_field_select = 0;
+      if ((mb.macroblock_type & PIPE_MPEG12_MB_TYPE_MOTION_FORWARD) ||
+          (mb.macroblock_type & PIPE_MPEG12_MB_TYPE_INTRA && bs->desc.concealment_motion_vectors)) {
+         if (bs->desc.picture_structure == PIPE_MPEG12_PICTURE_STRUCTURE_FRAME)
+            motion_vector_frame(bs, 0, &mb);
+         else
+            motion_vector_field(bs, 0, &mb);
+      }
+
+      if (mb.macroblock_type & PIPE_MPEG12_MB_TYPE_MOTION_BACKWARD) {
+         if (bs->desc.picture_structure == PIPE_MPEG12_PICTURE_STRUCTURE_FRAME)
+            motion_vector_frame(bs, 1, &mb);
+         else
+            motion_vector_field(bs, 1, &mb);
+      }
+
+      if (mb.macroblock_type & PIPE_MPEG12_MB_TYPE_INTRA && bs->desc.concealment_motion_vectors) {
+         unsigned extra = vl_vlc_get_uimsbf(&bs->vlc, 1);
+         mb.PMV[1][0][0] = mb.PMV[0][0][0];
+         mb.PMV[1][0][1] = mb.PMV[0][0][1];
+         assert(extra);
+      } else if (mb.macroblock_type & PIPE_MPEG12_MB_TYPE_INTRA ||
+                !(mb.macroblock_type & (PIPE_MPEG12_MB_TYPE_MOTION_FORWARD |
+                                        PIPE_MPEG12_MB_TYPE_MOTION_BACKWARD))) {
+         memset(mb.PMV, 0, sizeof(mb.PMV));
+      }
+
+      if ((mb.macroblock_type & PIPE_MPEG12_MB_TYPE_MOTION_FORWARD &&
+           mb.macroblock_modes.bits.frame_motion_type == 2) ||
+          (mb.macroblock_modes.bits.frame_motion_type == 3)) {
+            mb.PMV[1][0][0] = mb.PMV[0][0][0];
+            mb.PMV[1][0][1] = mb.PMV[0][0][1];
+      }
+
+      if (mb.macroblock_type & PIPE_MPEG12_MB_TYPE_MOTION_BACKWARD &&
+          mb.macroblock_modes.bits.frame_motion_type == 2) {
+            mb.PMV[1][1][0] = mb.PMV[0][1][0];
+            mb.PMV[1][1][1] = mb.PMV[0][1][1];
+      }
+
+      if (inc > 1 || !(mb.macroblock_type & PIPE_MPEG12_MB_TYPE_INTRA))
+         reset_predictor(bs);
+
+      if (mb.macroblock_type & (PIPE_MPEG12_MB_TYPE_INTRA | PIPE_MPEG12_MB_TYPE_PATTERN)) {
+         memset(dct_blocks, 0, sizeof(dct_blocks));
+         decode_dct(bs, &mb, dct_scale);
+      } else
+         mb.coded_block_pattern = 0;
+
+   } while (vl_vlc_bytes_left(&bs->vlc) && vl_vlc_peekbits(&bs->vlc, 23));
+
+   mb.num_skipped_macroblocks = 0;
+   bs->decoder->decode_macroblock(bs->decoder, &mb.base, 1);
+   return true;
+}
+
+void
+vl_mpg12_bs_init(struct vl_mpg12_bs *bs, struct pipe_video_decoder *decoder)
+{
+   static bool tables_initialized = false;
+
    assert(bs);
 
    memset(bs, 0, sizeof(struct vl_mpg12_bs));
 
-   bs->width = width;
-   bs->height = height;
-}
+   bs->decoder = decoder;
 
-void
-vl_mpg12_bs_set_buffers(struct vl_mpg12_bs *bs, struct pipe_ycbcr_block *ycbcr_stream[VL_MAX_PLANES],
-                        short *ycbcr_buffer[VL_MAX_PLANES], struct pipe_motionvector *mv_stream[VL_MAX_REF_FRAMES])
-{
-   unsigned i;
-
-   assert(bs);
-   assert(ycbcr_stream && ycbcr_buffer);
-   assert(mv_stream);
-
-   for (i = 0; i < VL_MAX_PLANES; ++i) {
-      bs->ycbcr_stream[i] = ycbcr_stream[i];
-      bs->ycbcr_buffer[i] = ycbcr_buffer[i];
-   }
-   for (i = 0; i < VL_MAX_REF_FRAMES; ++i)
-      bs->mv_stream[i] = mv_stream[i];
-
-   // TODO
-   for (i = 0; i < bs->width*bs->height; ++i) {
-      bs->mv_stream[0][i].top.x = bs->mv_stream[0][i].top.y = 0;
-      bs->mv_stream[0][i].top.field_select = PIPE_VIDEO_FRAME;
-      bs->mv_stream[0][i].top.weight = PIPE_VIDEO_MV_WEIGHT_MAX;
-      bs->mv_stream[0][i].bottom.x = bs->mv_stream[0][i].bottom.y = 0;
-      bs->mv_stream[0][i].bottom.field_select = PIPE_VIDEO_FRAME;
-      bs->mv_stream[0][i].bottom.weight = PIPE_VIDEO_MV_WEIGHT_MAX;
-
-      bs->mv_stream[1][i].top.x = bs->mv_stream[1][i].top.y = 0;
-      bs->mv_stream[1][i].top.field_select = PIPE_VIDEO_FRAME;
-      bs->mv_stream[1][i].top.weight = PIPE_VIDEO_MV_WEIGHT_MIN;
-      bs->mv_stream[1][i].bottom.x = bs->mv_stream[1][i].bottom.y = 0;
-      bs->mv_stream[1][i].bottom.field_select = PIPE_VIDEO_FRAME;
-      bs->mv_stream[1][i].bottom.weight = PIPE_VIDEO_MV_WEIGHT_MIN;
+   if (!tables_initialized) {
+      init_tables();
+      tables_initialized = true;
    }
 }
 
 void
-vl_mpg12_bs_decode(struct vl_mpg12_bs *bs, unsigned num_bytes, const void *buffer,
-                   struct pipe_mpeg12_picture_desc *picture, unsigned num_ycbcr_blocks[3])
+vl_mpg12_bs_set_picture_desc(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture)
+{
+   bs->desc = *picture;
+   bs->intra_dct_tbl = picture->intra_vlc_format ? tbl_B15 : tbl_B14_AC;
+}
+
+void
+vl_mpg12_bs_decode(struct vl_mpg12_bs *bs, unsigned num_bytes, const uint8_t *buffer)
 {
    assert(bs);
-   assert(num_ycbcr_blocks);
    assert(buffer && num_bytes);
 
-   bs->num_ycbcr_blocks = num_ycbcr_blocks;
+   while(num_bytes > 2) {
+      if (buffer[0] == 0x00 && buffer[1] == 0x00 && buffer[2] == 0x01 &&
+	buffer[3] >= 0x01 && buffer[3] < 0xAF) {
+         unsigned consumed;
 
-   vl_vlc_init(&bs->vlc, buffer, num_bytes);
+         buffer += 3;
+         num_bytes -= 3;
 
-   while(decode_slice(bs, picture));
+         vl_vlc_init(&bs->vlc, buffer, num_bytes);
+
+         if (!decode_slice(bs))
+            return;
+
+         /* it's possible for the vlc to consume up to eight extra bytes */
+         consumed = num_bytes - vl_vlc_bytes_left(&bs->vlc);
+         consumed = consumed > 8 ? consumed - 8 : 0;
+
+         /* crap, this is a bug we have consumed more bytes than left in the buffer */
+         assert(consumed <= num_bytes);
+
+         num_bytes -= consumed;
+         buffer += consumed;
+
+      } else {
+         ++buffer;
+         --num_bytes;
+      }
+   }
 }
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.h b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.h
index 4e48a9f..c3f14a1 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.h
@@ -33,27 +33,22 @@
 
 struct vl_mpg12_bs
 {
-   unsigned width, height;
+   struct pipe_video_decoder *decoder;
+
+   struct pipe_mpeg12_picture_desc desc;
+   struct dct_coeff *intra_dct_tbl;
 
    struct vl_vlc vlc;
-
-   unsigned *num_ycbcr_blocks;
-
-   struct pipe_ycbcr_block *ycbcr_stream[VL_MAX_PLANES];
-   short *ycbcr_buffer[VL_MAX_PLANES];
-
-   struct pipe_motionvector *mv_stream[VL_MAX_REF_FRAMES];
+   short pred_dc[3];
 };
 
 void
-vl_mpg12_bs_init(struct vl_mpg12_bs *bs, unsigned width, unsigned height);
+vl_mpg12_bs_init(struct vl_mpg12_bs *bs, struct pipe_video_decoder *decoder);
 
 void
-vl_mpg12_bs_set_buffers(struct vl_mpg12_bs *bs, struct pipe_ycbcr_block *ycbcr_stream[VL_MAX_PLANES],
-                        short *ycbcr_buffer[VL_MAX_PLANES], struct pipe_motionvector *mv_stream[VL_MAX_REF_FRAMES]);
+vl_mpg12_bs_set_picture_desc(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture);
 
 void
-vl_mpg12_bs_decode(struct vl_mpg12_bs *bs, unsigned num_bytes, const void *buffer,
-                   struct pipe_mpeg12_picture_desc *picture, unsigned num_ycbcr_blocks[3]);
+vl_mpg12_bs_decode(struct vl_mpg12_bs *bs, unsigned num_bytes, const uint8_t *buffer);
 
 #endif /* vl_mpeg12_bitstream_h */
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
index 61d947c..7d53168 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -30,6 +30,7 @@
 
 #include <util/u_memory.h>
 #include <util/u_rect.h>
+#include <util/u_sampler.h>
 #include <util/u_video.h>
 
 #include "vl_mpeg12_decoder.h"
@@ -75,36 +76,44 @@
 static const unsigned num_mc_format_configs =
    sizeof(mc_format_config) / sizeof(struct format_config);
 
+static const unsigned const_empty_block_mask_420[3][2][2] = {
+   { { 0x20, 0x10 },  { 0x08, 0x04 } },
+   { { 0x02, 0x02 },  { 0x02, 0x02 } },
+   { { 0x01, 0x01 },  { 0x01, 0x01 } }
+};
+
 static bool
-init_zscan_buffer(struct vl_mpeg12_buffer *buffer)
+init_zscan_buffer(struct vl_mpeg12_decoder *dec, struct vl_mpeg12_buffer *buffer)
 {
-   enum pipe_format formats[3];
-
-   struct pipe_sampler_view **source;
+   struct pipe_resource *res, res_tmpl;
+   struct pipe_sampler_view sv_tmpl;
    struct pipe_surface **destination;
 
-   struct vl_mpeg12_decoder *dec;
-
    unsigned i;
 
-   assert(buffer);
+   assert(dec && buffer);
 
-   dec = (struct vl_mpeg12_decoder*)buffer->base.decoder;
+   memset(&res_tmpl, 0, sizeof(res_tmpl));
+   res_tmpl.target = PIPE_TEXTURE_2D;
+   res_tmpl.format = dec->zscan_source_format;
+   res_tmpl.width0 = dec->blocks_per_line * BLOCK_WIDTH * BLOCK_HEIGHT;
+   res_tmpl.height0 = align(dec->num_blocks, dec->blocks_per_line) / dec->blocks_per_line;
+   res_tmpl.depth0 = 1;
+   res_tmpl.array_size = 1;
+   res_tmpl.usage = PIPE_USAGE_STREAM;
+   res_tmpl.bind = PIPE_BIND_SAMPLER_VIEW;
 
-   formats[0] = formats[1] = formats[2] = dec->zscan_source_format;
-   buffer->zscan_source = vl_video_buffer_create_ex
-   (
-      dec->base.context,
-      dec->blocks_per_line * BLOCK_WIDTH * BLOCK_HEIGHT,
-      align(dec->num_blocks, dec->blocks_per_line) / dec->blocks_per_line,
-      1, PIPE_VIDEO_CHROMA_FORMAT_444, formats, PIPE_USAGE_STATIC
-   );
-
-   if (!buffer->zscan_source)
+   res = dec->base.context->screen->resource_create(dec->base.context->screen, &res_tmpl);
+   if (!res)
       goto error_source;
 
-   source = buffer->zscan_source->get_sampler_view_planes(buffer->zscan_source);
-   if (!source)
+
+   memset(&sv_tmpl, 0, sizeof(sv_tmpl));
+   u_sampler_view_default_template(&sv_tmpl, res, res->format);
+   sv_tmpl.swizzle_r = sv_tmpl.swizzle_g = sv_tmpl.swizzle_b = sv_tmpl.swizzle_a = PIPE_SWIZZLE_RED;
+   buffer->zscan_source = dec->base.context->create_sampler_view(dec->base.context, res, &sv_tmpl);
+   pipe_resource_reference(&res, NULL);
+   if (!buffer->zscan_source)
       goto error_sampler;
 
    if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
@@ -117,7 +126,7 @@
 
    for (i = 0; i < VL_MAX_PLANES; ++i)
       if (!vl_zscan_init_buffer(i == 0 ? &dec->zscan_y : &dec->zscan_c,
-                                &buffer->zscan[i], source[i], destination[i]))
+                                &buffer->zscan[i], buffer->zscan_source, destination[i]))
          goto error_plane;
 
    return true;
@@ -128,7 +137,7 @@
 
 error_surface:
 error_sampler:
-   buffer->zscan_source->destroy(buffer->zscan_source);
+   pipe_sampler_view_reference(&buffer->zscan_source, NULL);
 
 error_source:
    return false;
@@ -143,21 +152,18 @@
 
    for (i = 0; i < VL_MAX_PLANES; ++i)
       vl_zscan_cleanup_buffer(&buffer->zscan[i]);
-   buffer->zscan_source->destroy(buffer->zscan_source);
+
+   pipe_sampler_view_reference(&buffer->zscan_source, NULL);
 }
 
 static bool
-init_idct_buffer(struct vl_mpeg12_buffer *buffer)
+init_idct_buffer(struct vl_mpeg12_decoder *dec, struct vl_mpeg12_buffer *buffer)
 {
    struct pipe_sampler_view **idct_source_sv, **mc_source_sv;
 
-   struct vl_mpeg12_decoder *dec;
-
    unsigned i;
 
-   assert(buffer);
-
-   dec = (struct vl_mpeg12_decoder*)buffer->base.decoder;
+   assert(dec && buffer);
 
    idct_source_sv = dec->idct_source->get_sampler_view_planes(dec->idct_source);
    if (!idct_source_sv)
@@ -187,27 +193,18 @@
 static void
 cleanup_idct_buffer(struct vl_mpeg12_buffer *buf)
 {
-   struct vl_mpeg12_decoder *dec;
    unsigned i;
    
    assert(buf);
 
-   dec = (struct vl_mpeg12_decoder*)buf->base.decoder;
-   assert(dec);
-
    for (i = 0; i < 3; ++i)
       vl_idct_cleanup_buffer(&buf->idct[0]);
 }
 
 static bool
-init_mc_buffer(struct vl_mpeg12_buffer *buf)
+init_mc_buffer(struct vl_mpeg12_decoder *dec, struct vl_mpeg12_buffer *buf)
 {
-   struct vl_mpeg12_decoder *dec;
-
-   assert(buf);
-
-   dec = (struct vl_mpeg12_decoder*)buf->base.decoder;
-   assert(dec);
+   assert(dec && buf);
 
    if(!vl_mc_init_buffer(&dec->mc_y, &buf->mc[0]))
       goto error_mc_y;
@@ -241,183 +238,148 @@
       vl_mc_cleanup_buffer(&buf->mc[i]);
 }
 
-static void
-vl_mpeg12_buffer_destroy(struct pipe_video_decode_buffer *buffer)
+static INLINE void
+MacroBlockTypeToPipeWeights(const struct pipe_mpeg12_macroblock *mb, unsigned weights[2])
 {
-   struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
-   struct vl_mpeg12_decoder *dec;
+   assert(mb);
 
-   assert(buf);
+   switch (mb->macroblock_type & (PIPE_MPEG12_MB_TYPE_MOTION_FORWARD | PIPE_MPEG12_MB_TYPE_MOTION_BACKWARD)) {
+   case PIPE_MPEG12_MB_TYPE_MOTION_FORWARD:
+      weights[0] = PIPE_VIDEO_MV_WEIGHT_MAX;
+      weights[1] = PIPE_VIDEO_MV_WEIGHT_MIN;
+      break;
 
-   dec = (struct vl_mpeg12_decoder*)buf->base.decoder;
-   assert(dec);
+   case (PIPE_MPEG12_MB_TYPE_MOTION_FORWARD | PIPE_MPEG12_MB_TYPE_MOTION_BACKWARD):
+      weights[0] = PIPE_VIDEO_MV_WEIGHT_HALF;
+      weights[1] = PIPE_VIDEO_MV_WEIGHT_HALF;
+      break;
 
-   cleanup_zscan_buffer(buf);
+   case PIPE_MPEG12_MB_TYPE_MOTION_BACKWARD:
+      weights[0] = PIPE_VIDEO_MV_WEIGHT_MIN;
+      weights[1] = PIPE_VIDEO_MV_WEIGHT_MAX;
+      break;
 
-   if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
-      cleanup_idct_buffer(buf);
-
-   cleanup_mc_buffer(buf);
-
-   vl_vb_cleanup(&buf->vertex_stream);
-
-   FREE(buf);
+   default:
+      if (mb->macroblock_type & PIPE_MPEG12_MB_TYPE_PATTERN) {
+         /* patern without a motion vector, just copy the old frame content */
+         weights[0] = PIPE_VIDEO_MV_WEIGHT_MAX;
+         weights[1] = PIPE_VIDEO_MV_WEIGHT_MIN;
+      } else {
+         weights[0] = PIPE_VIDEO_MV_WEIGHT_MIN;
+         weights[1] = PIPE_VIDEO_MV_WEIGHT_MIN;
+      }
+      break;
+   }
 }
 
-static void
-vl_mpeg12_buffer_begin_frame(struct pipe_video_decode_buffer *buffer)
+static INLINE struct vl_motionvector
+MotionVectorToPipe(const struct pipe_mpeg12_macroblock *mb, unsigned vector,
+                   unsigned field_select_mask, unsigned weight)
 {
-   struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
-   struct vl_mpeg12_decoder *dec;
+   struct vl_motionvector mv;
 
-   struct pipe_sampler_view **sampler_views;
-   unsigned i;
+   assert(mb);
 
-   assert(buf);
+   if (mb->macroblock_type & (PIPE_MPEG12_MB_TYPE_MOTION_FORWARD | PIPE_MPEG12_MB_TYPE_MOTION_BACKWARD)) {
+      switch (mb->macroblock_modes.bits.frame_motion_type) {
+      case PIPE_MPEG12_MO_TYPE_FRAME:
+         mv.top.x = mb->PMV[0][vector][0];
+         mv.top.y = mb->PMV[0][vector][1];
+         mv.top.field_select = PIPE_VIDEO_FRAME;
+         mv.top.weight = weight;
 
-   dec = (struct vl_mpeg12_decoder *)buf->base.decoder;
-   assert(dec);
+         mv.bottom.x = mb->PMV[0][vector][0];
+         mv.bottom.y = mb->PMV[0][vector][1];
+         mv.bottom.weight = weight;
+         mv.bottom.field_select = PIPE_VIDEO_FRAME;
+         break;
 
-   vl_vb_map(&buf->vertex_stream, dec->base.context);
+      case PIPE_MPEG12_MO_TYPE_FIELD:
+         mv.top.x = mb->PMV[0][vector][0];
+         mv.top.y = mb->PMV[0][vector][1];
+         mv.top.field_select = (mb->motion_vertical_field_select & field_select_mask) ?
+            PIPE_VIDEO_BOTTOM_FIELD : PIPE_VIDEO_TOP_FIELD;
+         mv.top.weight = weight;
 
-   sampler_views = buf->zscan_source->get_sampler_view_planes(buf->zscan_source);
+         mv.bottom.x = mb->PMV[1][vector][0];
+         mv.bottom.y = mb->PMV[1][vector][1];
+         mv.bottom.field_select = (mb->motion_vertical_field_select & (field_select_mask << 2)) ?
+            PIPE_VIDEO_BOTTOM_FIELD : PIPE_VIDEO_TOP_FIELD;
+         mv.bottom.weight = weight;
+         break;
 
-   assert(sampler_views);
-
-   for (i = 0; i < VL_MAX_PLANES; ++i) {
-      struct pipe_resource *tex = sampler_views[i]->texture;
-      struct pipe_box rect =
-      {
-         0, 0, 0,
-         tex->width0,
-         tex->height0,
-         1
-      };
-
-      buf->tex_transfer[i] = dec->base.context->get_transfer
-      (
-         dec->base.context, tex,
-         0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
-         &rect
-      );
-
-      buf->texels[i] = dec->base.context->transfer_map(dec->base.context, buf->tex_transfer[i]);
-   }
-
-   if (dec->base.entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM) {
-      struct pipe_ycbcr_block *ycbcr_stream[VL_MAX_PLANES];
-      struct pipe_motionvector *mv_stream[VL_MAX_REF_FRAMES];
-
-      for (i = 0; i < VL_MAX_PLANES; ++i)
-         ycbcr_stream[i] = vl_vb_get_ycbcr_stream(&buf->vertex_stream, i);
-
-      for (i = 0; i < VL_MAX_REF_FRAMES; ++i)
-         mv_stream[i] = vl_vb_get_mv_stream(&buf->vertex_stream, i);
-
-      vl_mpg12_bs_set_buffers(&buf->bs, ycbcr_stream, buf->texels, mv_stream);
+      default: // TODO: Support DUALPRIME and 16x8
+         break;
+      }
    } else {
+      mv.top.x = mv.top.y = 0;
+      mv.top.field_select = PIPE_VIDEO_FRAME;
+      mv.top.weight = weight;
 
-      for (i = 0; i < VL_MAX_PLANES; ++i)
-         vl_zscan_set_layout(&buf->zscan[i], dec->zscan_linear);
+      mv.bottom.x = mv.bottom.y = 0;
+      mv.bottom.field_select = PIPE_VIDEO_FRAME;
+      mv.bottom.weight = weight;
    }
+   return mv;
 }
 
-static void
-vl_mpeg12_buffer_set_quant_matrix(struct pipe_video_decode_buffer *buffer,
-                                  const uint8_t intra_matrix[64],
-                                  const uint8_t non_intra_matrix[64])
+static INLINE void
+UploadYcbcrBlocks(struct vl_mpeg12_decoder *dec,
+                  struct vl_mpeg12_buffer *buf,
+                  const struct pipe_mpeg12_macroblock *mb)
 {
-   struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
-   unsigned i;
+   unsigned intra;
+   unsigned tb, x, y, num_blocks = 0;
 
-   for (i = 0; i < VL_MAX_PLANES; ++i) {
-      vl_zscan_upload_quant(&buf->zscan[i], intra_matrix, true);
-      vl_zscan_upload_quant(&buf->zscan[i], non_intra_matrix, false);
+   assert(dec && buf);
+   assert(mb);
+
+   if (!mb->coded_block_pattern)
+      return;
+
+   intra = mb->macroblock_type & PIPE_MPEG12_MB_TYPE_INTRA ? 1 : 0;
+
+   for (y = 0; y < 2; ++y) {
+      for (x = 0; x < 2; ++x, ++tb) {
+         if (mb->coded_block_pattern & const_empty_block_mask_420[0][y][x]) {
+
+            struct vl_ycbcr_block *stream = buf->ycbcr_stream[0];
+            stream->x = mb->x * 2 + x;
+            stream->y = mb->y * 2 + y;
+            stream->intra = intra;
+            stream->coding = mb->macroblock_modes.bits.dct_type;
+            stream->block_num = buf->block_num++;
+
+            buf->num_ycbcr_blocks[0]++;
+            buf->ycbcr_stream[0]++;
+
+            num_blocks++;
+         }
+      }
    }
-}
 
-static struct pipe_ycbcr_block *
-vl_mpeg12_buffer_get_ycbcr_stream(struct pipe_video_decode_buffer *buffer, int component)
-{
-   struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
+   /* TODO: Implement 422, 444 */
+   //assert(ctx->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
 
-   assert(buf);
+   for (tb = 1; tb < 3; ++tb) {
+      if (mb->coded_block_pattern & const_empty_block_mask_420[tb][0][0]) {
 
-   return vl_vb_get_ycbcr_stream(&buf->vertex_stream, component);
-}
+         struct vl_ycbcr_block *stream = buf->ycbcr_stream[tb];
+         stream->x = mb->x;
+         stream->y = mb->y;
+         stream->intra = intra;
+         stream->coding = 0;
+         stream->block_num = buf->block_num++;
 
-static short *
-vl_mpeg12_buffer_get_ycbcr_buffer(struct pipe_video_decode_buffer *buffer, int component)
-{
-   struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
+         buf->num_ycbcr_blocks[tb]++;
+         buf->ycbcr_stream[tb]++;
 
-   assert(buf);
-   assert(component < VL_MAX_PLANES);
-
-   return buf->texels[component];
-}
-
-static unsigned
-vl_mpeg12_buffer_get_mv_stream_stride(struct pipe_video_decode_buffer *buffer)
-{
-   struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
-
-   assert(buf);
-
-   return vl_vb_get_mv_stream_stride(&buf->vertex_stream);
-}
-
-static struct pipe_motionvector *
-vl_mpeg12_buffer_get_mv_stream(struct pipe_video_decode_buffer *buffer, int ref_frame)
-{
-   struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
-
-   assert(buf);
-
-   return vl_vb_get_mv_stream(&buf->vertex_stream, ref_frame);
-}
-
-static void
-vl_mpeg12_buffer_decode_bitstream(struct pipe_video_decode_buffer *buffer,
-                                  unsigned num_bytes, const void *data,
-                                  struct pipe_picture_desc *picture,
-                                  unsigned num_ycbcr_blocks[3])
-{
-   struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
-   struct pipe_mpeg12_picture_desc *pic = (struct pipe_mpeg12_picture_desc *)picture;
-   
-   struct vl_mpeg12_decoder *dec;
-   unsigned i;
-
-   assert(buf);
-
-   dec = (struct vl_mpeg12_decoder *)buf->base.decoder;
-   assert(dec);
-
-   for (i = 0; i < VL_MAX_PLANES; ++i)
-      vl_zscan_set_layout(&buf->zscan[i], pic->alternate_scan ? dec->zscan_alternate : dec->zscan_normal);
-
-   vl_mpg12_bs_decode(&buf->bs, num_bytes, data, pic, num_ycbcr_blocks);
-}
-
-static void
-vl_mpeg12_buffer_end_frame(struct pipe_video_decode_buffer *buffer)
-{
-   struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
-   struct vl_mpeg12_decoder *dec;
-   unsigned i;
-
-   assert(buf);
-
-   dec = (struct vl_mpeg12_decoder *)buf->base.decoder;
-   assert(dec);
-
-   vl_vb_unmap(&buf->vertex_stream, dec->base.context);
-
-   for (i = 0; i < VL_MAX_PLANES; ++i) {
-      dec->base.context->transfer_unmap(dec->base.context, buf->tex_transfer[i]);
-      dec->base.context->transfer_destroy(dec->base.context, buf->tex_transfer[i]);
+         num_blocks++;
+      }
    }
+
+   memcpy(buf->texels, mb->blocks, 64 * sizeof(short) * num_blocks);
+   buf->texels += 64 * num_blocks;
 }
 
 static void
@@ -452,7 +414,6 @@
 
    pipe_resource_reference(&dec->quads.buffer, NULL);
    pipe_resource_reference(&dec->pos.buffer, NULL);
-   pipe_resource_reference(&dec->block_num.buffer, NULL);
 
    pipe_sampler_view_reference(&dec->zscan_linear, NULL);
    pipe_sampler_view_reference(&dec->zscan_normal, NULL);
@@ -461,7 +422,7 @@
    FREE(dec);
 }
 
-static struct pipe_video_decode_buffer *
+static void *
 vl_mpeg12_create_buffer(struct pipe_video_decoder *decoder)
 {
    struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder*)decoder;
@@ -473,38 +434,25 @@
    if (buffer == NULL)
       return NULL;
 
-   buffer->base.decoder = decoder;
-   buffer->base.destroy = vl_mpeg12_buffer_destroy;
-   buffer->base.begin_frame = vl_mpeg12_buffer_begin_frame;
-   buffer->base.set_quant_matrix = vl_mpeg12_buffer_set_quant_matrix;
-   buffer->base.get_ycbcr_stream = vl_mpeg12_buffer_get_ycbcr_stream;
-   buffer->base.get_ycbcr_buffer = vl_mpeg12_buffer_get_ycbcr_buffer;
-   buffer->base.get_mv_stream_stride = vl_mpeg12_buffer_get_mv_stream_stride;
-   buffer->base.get_mv_stream = vl_mpeg12_buffer_get_mv_stream;
-   buffer->base.decode_bitstream = vl_mpeg12_buffer_decode_bitstream;
-   buffer->base.end_frame = vl_mpeg12_buffer_end_frame;
-
    if (!vl_vb_init(&buffer->vertex_stream, dec->base.context,
                    dec->base.width / MACROBLOCK_WIDTH,
                    dec->base.height / MACROBLOCK_HEIGHT))
       goto error_vertex_buffer;
 
-   if (!init_mc_buffer(buffer))
+   if (!init_mc_buffer(dec, buffer))
       goto error_mc;
 
    if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
-      if (!init_idct_buffer(buffer))
+      if (!init_idct_buffer(dec, buffer))
          goto error_idct;
 
-   if (!init_zscan_buffer(buffer))
+   if (!init_zscan_buffer(dec, buffer))
       goto error_zscan;
 
    if (dec->base.entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM)
-      vl_mpg12_bs_init(&buffer->bs,
-                       dec->base.width / MACROBLOCK_WIDTH,
-                       dec->base.height / MACROBLOCK_HEIGHT);
+      vl_mpg12_bs_init(&buffer->bs, decoder);
 
-   return &buffer->base;
+   return buffer;
 
 error_zscan:
    if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
@@ -522,76 +470,307 @@
 }
 
 static void
-vl_mpeg12_decoder_flush_buffer(struct pipe_video_decode_buffer *buffer,
-                               unsigned num_ycbcr_blocks[3],
-                               struct pipe_video_buffer *refs[2],
-                               struct pipe_video_buffer *dst)
+vl_mpeg12_destroy_buffer(struct pipe_video_decoder *decoder, void *buffer)
 {
-   struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer *)buffer;
-   struct vl_mpeg12_decoder *dec;
+   struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder*)decoder;
+   struct vl_mpeg12_buffer *buf = buffer;
 
-   struct pipe_sampler_view **sv[VL_MAX_REF_FRAMES], **mc_source_sv;
+   assert(dec && buf);
+
+   cleanup_zscan_buffer(buf);
+
+   if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
+      cleanup_idct_buffer(buf);
+
+   cleanup_mc_buffer(buf);
+
+   vl_vb_cleanup(&buf->vertex_stream);
+
+   FREE(buf);
+}
+
+static void
+vl_mpeg12_set_decode_buffer(struct pipe_video_decoder *decoder, void *buffer)
+{
+   struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder;
+
+   assert(dec && buffer);
+
+   dec->current_buffer = buffer;
+}
+
+static void
+vl_mpeg12_set_picture_parameters(struct pipe_video_decoder *decoder,
+                                 struct pipe_picture_desc *picture)
+{
+   struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder;
+   struct pipe_mpeg12_picture_desc *pic = (struct pipe_mpeg12_picture_desc *)picture;
+
+   assert(dec && pic);
+
+   dec->picture_desc = *pic;
+}
+
+static void
+vl_mpeg12_set_quant_matrix(struct pipe_video_decoder *decoder,
+                           const struct pipe_quant_matrix *matrix)
+{
+   struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder;
+   const struct pipe_mpeg12_quant_matrix *m = (const struct pipe_mpeg12_quant_matrix *)matrix;
+
+   assert(dec);
+   assert(matrix->codec == PIPE_VIDEO_CODEC_MPEG12);
+
+   memcpy(dec->intra_matrix, m->intra_matrix, 64);
+   memcpy(dec->non_intra_matrix, m->non_intra_matrix, 64);
+}
+
+static void
+vl_mpeg12_set_decode_target(struct pipe_video_decoder *decoder,
+                            struct pipe_video_buffer *target)
+{
+   struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder;
    struct pipe_surface **surfaces;
+   unsigned i;
 
+   assert(dec);
+
+   surfaces = target->get_surfaces(target);
+   for (i = 0; i < VL_MAX_PLANES; ++i)
+      pipe_surface_reference(&dec->target_surfaces[i], surfaces[i]);
+}
+
+static void
+vl_mpeg12_set_reference_frames(struct pipe_video_decoder *decoder,
+                               struct pipe_video_buffer **ref_frames,
+                               unsigned num_ref_frames)
+{
+   struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder;
+   struct pipe_sampler_view **sv;
+   unsigned i,j;
+
+   assert(dec);
+   assert(num_ref_frames <= VL_MAX_REF_FRAMES);
+
+   for (i = 0; i < num_ref_frames; ++i) {
+      sv = ref_frames[i]->get_sampler_view_planes(ref_frames[i]);
+      for (j = 0; j < VL_MAX_PLANES; ++j)
+         pipe_sampler_view_reference(&dec->ref_frames[i][j], sv[j]);
+   }
+
+   for (; i < VL_MAX_REF_FRAMES; ++i)
+      for (j = 0; j < VL_MAX_PLANES; ++j)
+         pipe_sampler_view_reference(&dec->ref_frames[i][j], NULL);
+}
+
+static void
+vl_mpeg12_begin_frame(struct pipe_video_decoder *decoder)
+{
+   struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder;
+   struct vl_mpeg12_buffer *buf;
+
+   struct pipe_resource *tex;
+   struct pipe_box rect = { 0, 0, 0, 1, 1, 1 };
+
+   unsigned i;
+
+   assert(dec);
+
+   buf = dec->current_buffer;
+   assert(buf);
+
+   if (dec->base.entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM)
+      dec->intra_matrix[0] = 1 << (7 - dec->picture_desc.intra_dc_precision);
+
+   for (i = 0; i < VL_MAX_PLANES; ++i) {
+      vl_zscan_upload_quant(&buf->zscan[i], dec->intra_matrix, true);
+      vl_zscan_upload_quant(&buf->zscan[i], dec->non_intra_matrix, false);
+   }
+
+   vl_vb_map(&buf->vertex_stream, dec->base.context);
+
+   tex = buf->zscan_source->texture;
+   rect.width = tex->width0;
+   rect.height = tex->height0;
+
+   buf->tex_transfer = dec->base.context->get_transfer
+   (
+      dec->base.context, tex,
+      0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
+      &rect
+   );
+
+   buf->block_num = 0;
+   buf->texels = dec->base.context->transfer_map(dec->base.context, buf->tex_transfer);
+
+   for (i = 0; i < VL_MAX_PLANES; ++i) {
+      buf->ycbcr_stream[i] = vl_vb_get_ycbcr_stream(&buf->vertex_stream, i);
+      buf->num_ycbcr_blocks[i] = 0;
+   }
+
+   for (i = 0; i < VL_MAX_REF_FRAMES; ++i)
+      buf->mv_stream[i] = vl_vb_get_mv_stream(&buf->vertex_stream, i);
+
+   if (dec->base.entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM) {
+      vl_mpg12_bs_set_picture_desc(&buf->bs, &dec->picture_desc);
+
+   } else {
+
+      for (i = 0; i < VL_MAX_PLANES; ++i)
+         vl_zscan_set_layout(&buf->zscan[i], dec->zscan_linear);
+   }
+}
+
+static void
+vl_mpeg12_decode_macroblock(struct pipe_video_decoder *decoder,
+                            const struct pipe_macroblock *macroblocks,
+                            unsigned num_macroblocks)
+{
+   struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder;
+   const struct pipe_mpeg12_macroblock *mb = (const struct pipe_mpeg12_macroblock *)macroblocks;
+   struct vl_mpeg12_buffer *buf;
+
+   unsigned i, j, mv_weights[2];
+
+   assert(dec && dec->current_buffer);
+   assert(macroblocks && macroblocks->codec == PIPE_VIDEO_CODEC_MPEG12);
+
+   buf = dec->current_buffer;
+   assert(buf);
+
+   for (; num_macroblocks > 0; --num_macroblocks) {
+      unsigned mb_addr = mb->y * dec->width_in_macroblocks + mb->x;
+
+      if (mb->macroblock_type & (PIPE_MPEG12_MB_TYPE_PATTERN | PIPE_MPEG12_MB_TYPE_INTRA))
+         UploadYcbcrBlocks(dec, buf, mb);
+
+      MacroBlockTypeToPipeWeights(mb, mv_weights);
+
+      for (i = 0; i < 2; ++i) {
+          if (!dec->ref_frames[i][0]) continue;
+
+         buf->mv_stream[i][mb_addr] = MotionVectorToPipe
+         (
+            mb, i,
+            i ? PIPE_MPEG12_FS_FIRST_BACKWARD : PIPE_MPEG12_FS_FIRST_FORWARD,
+            mv_weights[i]
+         );
+      }
+
+      /* see section 7.6.6 of the spec */
+      if (mb->num_skipped_macroblocks > 0) {
+         struct vl_motionvector skipped_mv[2];
+
+         if (dec->ref_frames[0][0] && !dec->ref_frames[1][0]) {
+            skipped_mv[0].top.x = skipped_mv[0].top.y = 0;
+            skipped_mv[0].top.weight = PIPE_VIDEO_MV_WEIGHT_MAX;
+         } else {
+           skipped_mv[0] = buf->mv_stream[0][mb_addr];
+           skipped_mv[1] = buf->mv_stream[1][mb_addr];
+         }
+         skipped_mv[0].top.field_select = PIPE_VIDEO_FRAME;
+         skipped_mv[1].top.field_select = PIPE_VIDEO_FRAME;
+
+         skipped_mv[0].bottom = skipped_mv[0].top;
+         skipped_mv[1].bottom = skipped_mv[1].top;
+
+         ++mb_addr;
+         for (i = 0; i < mb->num_skipped_macroblocks; ++i, ++mb_addr) {
+            for (j = 0; j < 2; ++j) {
+               if (!dec->ref_frames[j][0]) continue;
+               buf->mv_stream[j][mb_addr] = skipped_mv[j];
+
+            }
+         }
+      }
+
+      ++mb;
+   }
+}
+
+static void
+vl_mpeg12_decode_bitstream(struct pipe_video_decoder *decoder,
+                           unsigned num_bytes, const void *data)
+{
+   struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder;
+   struct vl_mpeg12_buffer *buf;
+   
+   unsigned i;
+
+   assert(dec && dec->current_buffer);
+
+   buf = dec->current_buffer;
+   assert(buf);
+
+   for (i = 0; i < VL_MAX_PLANES; ++i)
+      vl_zscan_set_layout(&buf->zscan[i], dec->picture_desc.alternate_scan ?
+                          dec->zscan_alternate : dec->zscan_normal);
+
+   vl_mpg12_bs_decode(&buf->bs, num_bytes, data);
+}
+
+static void
+vl_mpeg12_end_frame(struct pipe_video_decoder *decoder)
+{
+   struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder;
+   struct pipe_sampler_view **mc_source_sv;
    struct pipe_vertex_buffer vb[3];
+   struct vl_mpeg12_buffer *buf;
 
    unsigned i, j, component;
    unsigned nr_components;
 
-   assert(buf);
+   assert(dec && dec->current_buffer);
 
-   dec = (struct vl_mpeg12_decoder *)buf->base.decoder;
-   assert(dec);
+   buf = dec->current_buffer;
 
-   for (i = 0; i < 2; ++i)
-      sv[i] = refs[i] ? refs[i]->get_sampler_view_planes(refs[i]) : NULL;
+   vl_vb_unmap(&buf->vertex_stream, dec->base.context);
+
+   dec->base.context->transfer_unmap(dec->base.context, buf->tex_transfer);
+   dec->base.context->transfer_destroy(dec->base.context, buf->tex_transfer);
 
    vb[0] = dec->quads;
    vb[1] = dec->pos;
 
-   surfaces = dst->get_surfaces(dst);
-
    dec->base.context->bind_vertex_elements_state(dec->base.context, dec->ves_mv);
    for (i = 0; i < VL_MAX_PLANES; ++i) {
-      if (!surfaces[i]) continue;
+      if (!dec->target_surfaces[i]) continue;
 
-      vl_mc_set_surface(&buf->mc[i], surfaces[i]);
+      vl_mc_set_surface(&buf->mc[i], dec->target_surfaces[i]);
 
       for (j = 0; j < VL_MAX_REF_FRAMES; ++j) {
-         if (!sv[j]) continue;
+         if (!dec->ref_frames[j][i]) continue;
 
          vb[2] = vl_vb_get_mv(&buf->vertex_stream, j);;
          dec->base.context->set_vertex_buffers(dec->base.context, 3, vb);
 
-         vl_mc_render_ref(&buf->mc[i], sv[j][i]);
+         vl_mc_render_ref(&buf->mc[i], dec->ref_frames[j][i]);
       }
    }
 
-   vb[2] = dec->block_num;
-
    dec->base.context->bind_vertex_elements_state(dec->base.context, dec->ves_ycbcr);
    for (i = 0; i < VL_MAX_PLANES; ++i) {
-      if (!num_ycbcr_blocks[i]) continue;
+      if (!buf->num_ycbcr_blocks[i]) continue;
 
       vb[1] = vl_vb_get_ycbcr(&buf->vertex_stream, i);
-      dec->base.context->set_vertex_buffers(dec->base.context, 3, vb);
+      dec->base.context->set_vertex_buffers(dec->base.context, 2, vb);
 
-      vl_zscan_render(&buf->zscan[i] , num_ycbcr_blocks[i]);
+      vl_zscan_render(&buf->zscan[i] , buf->num_ycbcr_blocks[i]);
 
       if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
-         vl_idct_flush(&buf->idct[i], num_ycbcr_blocks[i]);
+         vl_idct_flush(&buf->idct[i], buf->num_ycbcr_blocks[i]);
    }
 
    mc_source_sv = dec->mc_source->get_sampler_view_planes(dec->mc_source);
    for (i = 0, component = 0; i < VL_MAX_PLANES; ++i) {
-      if (!surfaces[i]) continue;
+      if (!dec->target_surfaces[i]) continue;
 
-      nr_components = util_format_get_nr_components(surfaces[i]->texture->format);
+      nr_components = util_format_get_nr_components(dec->target_surfaces[i]->texture->format);
       for (j = 0; j < nr_components; ++j, ++component) {
-         if (!num_ycbcr_blocks[i]) continue;
+         if (!buf->num_ycbcr_blocks[i]) continue;
 
          vb[1] = vl_vb_get_ycbcr(&buf->vertex_stream, component);
-         dec->base.context->set_vertex_buffers(dec->base.context, 3, vb);
+         dec->base.context->set_vertex_buffers(dec->base.context, 2, vb);
 
          if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
             vl_idct_prepare_stage2(&buf->idct[component]);
@@ -599,11 +778,19 @@
             dec->base.context->set_fragment_sampler_views(dec->base.context, 1, &mc_source_sv[component]);
             dec->base.context->bind_fragment_sampler_states(dec->base.context, 1, &dec->sampler_ycbcr);
          }
-         vl_mc_render_ycbcr(&buf->mc[i], j, num_ycbcr_blocks[component]);
+         vl_mc_render_ycbcr(&buf->mc[i], j, buf->num_ycbcr_blocks[component]);
       }
    }
 }
 
+static void
+vl_mpeg12_flush(struct pipe_video_decoder *decoder)
+{
+   assert(decoder);
+
+   //Noop, for shaders it is much faster to flush everything in end_frame
+}
+
 static bool
 init_pipe_state(struct vl_mpeg12_decoder *dec)
 {
@@ -870,21 +1057,21 @@
 
    dec->base.destroy = vl_mpeg12_destroy;
    dec->base.create_buffer = vl_mpeg12_create_buffer;
-   dec->base.flush_buffer = vl_mpeg12_decoder_flush_buffer;
+   dec->base.destroy_buffer = vl_mpeg12_destroy_buffer;
+   dec->base.set_decode_buffer = vl_mpeg12_set_decode_buffer;
+   dec->base.set_picture_parameters = vl_mpeg12_set_picture_parameters;
+   dec->base.set_quant_matrix = vl_mpeg12_set_quant_matrix;
+   dec->base.set_decode_target = vl_mpeg12_set_decode_target;
+   dec->base.set_reference_frames = vl_mpeg12_set_reference_frames;
+   dec->base.begin_frame = vl_mpeg12_begin_frame;
+   dec->base.decode_macroblock = vl_mpeg12_decode_macroblock;
+   dec->base.decode_bitstream = vl_mpeg12_decode_bitstream;
+   dec->base.end_frame = vl_mpeg12_end_frame;
+   dec->base.flush = vl_mpeg12_flush;
 
    dec->blocks_per_line = MAX2(util_next_power_of_two(dec->base.width) / block_size_pixels, 4);
    dec->num_blocks = (dec->base.width * dec->base.height) / block_size_pixels;
-
-   dec->quads = vl_vb_upload_quads(dec->base.context);
-   dec->pos = vl_vb_upload_pos(
-      dec->base.context,
-      dec->base.width / MACROBLOCK_WIDTH,
-      dec->base.height / MACROBLOCK_HEIGHT
-   );
-   dec->block_num = vl_vb_upload_block_num(dec->base.context, dec->num_blocks);
-
-   dec->ves_ycbcr = vl_vb_get_ves_ycbcr(dec->base.context);
-   dec->ves_mv = vl_vb_get_ves_mv(dec->base.context);
+   dec->width_in_macroblocks = align(dec->base.width, MACROBLOCK_WIDTH) / MACROBLOCK_WIDTH;
 
    /* TODO: Implement 422, 444 */
    assert(dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
@@ -892,14 +1079,27 @@
    if (dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
       dec->chroma_width = dec->base.width / 2;
       dec->chroma_height = dec->base.height / 2;
+      dec->num_blocks = dec->num_blocks * 2;
    } else if (dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422) {
       dec->chroma_width = dec->base.width;
       dec->chroma_height = dec->base.height / 2;
+      dec->num_blocks = dec->num_blocks * 2 + dec->num_blocks;
    } else {
       dec->chroma_width = dec->base.width;
       dec->chroma_height = dec->base.height;
+      dec->num_blocks = dec->num_blocks * 3;
    }
 
+   dec->quads = vl_vb_upload_quads(dec->base.context);
+   dec->pos = vl_vb_upload_pos(
+      dec->base.context,
+      dec->base.width / MACROBLOCK_WIDTH,
+      dec->base.height / MACROBLOCK_HEIGHT
+   );
+
+   dec->ves_ycbcr = vl_vb_get_ves_ycbcr(dec->base.context);
+   dec->ves_mv = vl_vb_get_ves_mv(dec->base.context);
+
    switch (entrypoint) {
    case PIPE_VIDEO_ENTRYPOINT_BITSTREAM:
       format_config = find_format_config(dec, bitstream_format_config, num_bitstream_format_configs);
@@ -946,6 +1146,9 @@
    if (!init_pipe_state(dec))
       goto error_pipe_state;
 
+   memset(dec->intra_matrix, 0x10, 64);
+   memset(dec->non_intra_matrix, 0x10, 64);
+
    return &dec->base;
 
 error_pipe_state:
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
index 01265e3..4a8d653 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
@@ -49,12 +49,12 @@
 
    unsigned blocks_per_line;
    unsigned num_blocks;
+   unsigned width_in_macroblocks;
 
    enum pipe_format zscan_source_format;
 
    struct pipe_vertex_buffer quads;
    struct pipe_vertex_buffer pos;
-   struct pipe_vertex_buffer block_num;
 
    void *ves_ycbcr;
    void *ves_mv;
@@ -73,23 +73,34 @@
    struct vl_mc mc_y, mc_c;
 
    void *dsa;
+
+   struct vl_mpeg12_buffer *current_buffer;
+   struct pipe_mpeg12_picture_desc picture_desc;
+   uint8_t intra_matrix[64];
+   uint8_t non_intra_matrix[64];
+   struct pipe_sampler_view *ref_frames[VL_MAX_REF_FRAMES][VL_MAX_PLANES];
+   struct pipe_surface *target_surfaces[VL_MAX_PLANES];
 };
 
 struct vl_mpeg12_buffer
 {
-   struct pipe_video_decode_buffer base;
-
    struct vl_vertex_buffer vertex_stream;
 
-   struct pipe_video_buffer *zscan_source;
+   unsigned block_num;
+   unsigned num_ycbcr_blocks[3];
+
+   struct pipe_sampler_view *zscan_source;
 
    struct vl_mpg12_bs bs;
    struct vl_zscan_buffer zscan[VL_MAX_PLANES];
    struct vl_idct_buffer idct[VL_MAX_PLANES];
    struct vl_mc_buffer mc[VL_MAX_PLANES];
 
-   struct pipe_transfer *tex_transfer[VL_MAX_PLANES];
-   short *texels[VL_MAX_PLANES];
+   struct pipe_transfer *tex_transfer;
+   short *texels;
+
+   struct vl_ycbcr_block *ycbcr_stream[VL_MAX_PLANES];
+   struct vl_motionvector *mv_stream[VL_MAX_REF_FRAMES];
 };
 
 /**
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.c b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
index c0f1449..281db80 100644
--- a/src/gallium/auxiliary/vl/vl_vertex_buffers.c
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
@@ -125,49 +125,6 @@
    return pos;
 }
 
-struct pipe_vertex_buffer
-vl_vb_upload_block_num(struct pipe_context *pipe, unsigned num_blocks)
-{
-   struct pipe_vertex_buffer buf;
-   struct pipe_transfer *buf_transfer;
-   struct vertex2s *v;
-   unsigned i;
-
-   assert(pipe);
-
-   /* create buffer */
-   buf.stride = sizeof(struct vertex2s);
-   buf.buffer_offset = 0;
-   buf.buffer = pipe_buffer_create
-   (
-      pipe->screen,
-      PIPE_BIND_VERTEX_BUFFER,
-      PIPE_USAGE_STATIC,
-      sizeof(struct vertex2s) * num_blocks
-   );
-
-   if(!buf.buffer)
-      return buf;
-
-   /* and fill it */
-   v = pipe_buffer_map
-   (
-      pipe,
-      buf.buffer,
-      PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
-      &buf_transfer
-   );
-
-   for ( i = 0; i < num_blocks; ++i, ++v) {
-      v->x = i;
-      v->y = i;
-   }
-
-   pipe_buffer_unmap(pipe, buf_transfer);
-
-   return buf;
-}
-
 static struct pipe_vertex_element
 vl_vb_get_quad_vertex_element(void)
 {
@@ -211,12 +168,10 @@
    /* Position element */
    vertex_elems[VS_I_VPOS].src_format = PIPE_FORMAT_R8G8B8A8_USCALED;
 
-   vl_vb_element_helper(&vertex_elems[VS_I_VPOS], 1, 1);
-
    /* block num element */
-   vertex_elems[VS_I_BLOCK_NUM].src_format = PIPE_FORMAT_R16G16_SSCALED;
+   vertex_elems[VS_I_BLOCK_NUM].src_format = PIPE_FORMAT_R32_FLOAT;
 
-   vl_vb_element_helper(&vertex_elems[VS_I_BLOCK_NUM], 1, 2);
+   vl_vb_element_helper(&vertex_elems[VS_I_VPOS], 2, 1);
 
    return pipe->create_vertex_elements_state(pipe, 3, vertex_elems);
 }
@@ -266,7 +221,7 @@
          pipe->screen,
          PIPE_BIND_VERTEX_BUFFER,
          PIPE_USAGE_STREAM,
-         sizeof(struct pipe_ycbcr_block) * size * 4
+         sizeof(struct vl_ycbcr_block) * size * 4
       );
       if (!buffer->ycbcr[i].resource)
          goto error_ycbcr;
@@ -278,7 +233,7 @@
          pipe->screen,
          PIPE_BIND_VERTEX_BUFFER,
          PIPE_USAGE_STREAM,
-         sizeof(struct pipe_motionvector) * size
+         sizeof(struct vl_motionvector) * size
       );
       if (!buffer->mv[i].resource)
          goto error_mv;
@@ -310,7 +265,7 @@
 
    assert(buffer);
 
-   buf.stride = sizeof(struct pipe_ycbcr_block);
+   buf.stride = sizeof(struct vl_ycbcr_block);
    buf.buffer_offset = 0;
    buf.buffer = buffer->ycbcr[component].resource;
 
@@ -324,7 +279,7 @@
 
    assert(buffer);
 
-   buf.stride = sizeof(struct pipe_motionvector);
+   buf.stride = sizeof(struct vl_motionvector);
    buf.buffer_offset = 0;
    buf.buffer = buffer->mv[motionvector].resource;
 
@@ -360,7 +315,7 @@
 
 }
 
-struct pipe_ycbcr_block *
+struct vl_ycbcr_block *
 vl_vb_get_ycbcr_stream(struct vl_vertex_buffer *buffer, int component)
 {
    assert(buffer);
@@ -377,7 +332,7 @@
    return buffer->width;
 }
 
-struct pipe_motionvector *
+struct vl_motionvector *
 vl_vb_get_mv_stream(struct vl_vertex_buffer *buffer, int ref_frame)
 {
    assert(buffer);
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.h b/src/gallium/auxiliary/vl/vl_vertex_buffers.h
index 74845a4..874ecce 100644
--- a/src/gallium/auxiliary/vl/vl_vertex_buffers.h
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.h
@@ -52,20 +52,56 @@
    NUM_VS_INPUTS = 4
 };
 
+enum vl_mv_weight
+{
+   PIPE_VIDEO_MV_WEIGHT_MIN = 0,
+   PIPE_VIDEO_MV_WEIGHT_HALF = 128,
+   PIPE_VIDEO_MV_WEIGHT_MAX = 256
+};
+
+enum vl_field_select
+{
+   PIPE_VIDEO_FRAME = 0,
+   PIPE_VIDEO_TOP_FIELD = 1,
+   PIPE_VIDEO_BOTTOM_FIELD = 3,
+
+   /* TODO
+   PIPE_VIDEO_DUALPRIME
+   PIPE_VIDEO_16x8
+   */
+};
+
+struct vl_motionvector
+{
+   struct {
+      int16_t x, y;
+      int16_t field_select; /**< enum pipe_video_field_select */
+      int16_t weight;  /**< enum pipe_video_mv_weight  */
+   } top, bottom;
+};
+
+struct vl_ycbcr_block
+{
+   uint8_t x, y;
+   uint8_t intra;
+   uint8_t coding;
+   float block_num;
+};
+
 struct vl_vertex_buffer
 {
    unsigned width, height;
 
    struct {
-      struct pipe_resource    *resource;
-      struct pipe_transfer    *transfer;
-      struct pipe_ycbcr_block *vertex_stream;
+      struct pipe_resource  *resource;
+      struct pipe_transfer  *transfer;
+      struct vl_ycbcr_block *vertex_stream;
    } ycbcr[VL_MAX_PLANES];
 
    struct {
-      struct pipe_resource     *resource;
-      struct pipe_transfer     *transfer;
-      struct pipe_motionvector *vertex_stream;
+      struct pipe_resource   *resource;
+      struct pipe_transfer   *transfer;
+      struct vl_motionvector *vertex_stream;
    } mv[VL_MAX_REF_FRAMES];
 };
 
@@ -73,8 +109,6 @@
 
 struct pipe_vertex_buffer vl_vb_upload_pos(struct pipe_context *pipe, unsigned width, unsigned height);
 
-struct pipe_vertex_buffer vl_vb_upload_block_num(struct pipe_context *pipe, unsigned num_blocks);
-
 void *vl_vb_get_ves_ycbcr(struct pipe_context *pipe);
 
 void *vl_vb_get_ves_mv(struct pipe_context *pipe);
@@ -89,13 +123,13 @@
 
 struct pipe_vertex_buffer vl_vb_get_ycbcr(struct vl_vertex_buffer *buffer, int component);
 
-struct pipe_ycbcr_block *vl_vb_get_ycbcr_stream(struct vl_vertex_buffer *buffer, int component);
+struct vl_ycbcr_block *vl_vb_get_ycbcr_stream(struct vl_vertex_buffer *buffer, int component);
 
 struct pipe_vertex_buffer vl_vb_get_mv(struct vl_vertex_buffer *buffer, int ref_frame);
 
 unsigned vl_vb_get_mv_stream_stride(struct vl_vertex_buffer *buffer);
 
-struct pipe_motionvector *vl_vb_get_mv_stream(struct vl_vertex_buffer *buffer, int ref_frame);
+struct vl_motionvector *vl_vb_get_mv_stream(struct vl_vertex_buffer *buffer, int ref_frame);
 
 void vl_vb_unmap(struct vl_vertex_buffer *buffer, struct pipe_context *pipe);
 
diff --git a/src/gallium/auxiliary/vl/vl_vlc.h b/src/gallium/auxiliary/vl/vl_vlc.h
index e81b1e9..4db1334 100644
--- a/src/gallium/auxiliary/vl/vl_vlc.h
+++ b/src/gallium/auxiliary/vl/vl_vlc.h
@@ -25,116 +25,148 @@
  *
  **************************************************************************/
 
-/**
- * This file is based uppon slice_xvmc.c and vlc.h from the xine project,
- * which in turn is based on mpeg2dec. The following is the original copyright:
- *
- * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org>
- * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
- *
- * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
- * See http://libmpeg2.sourceforge.net/ for updates.
- *
- * mpeg2dec is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * mpeg2dec is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
 #ifndef vl_vlc_h
 #define vl_vlc_h
 
-#include "pipe/p_compiler.h"
+#include <assert.h>
+
+#include <pipe/p_compiler.h>
+
+#include <util/u_math.h>
+#include "util/u_pointer.h"
 
 struct vl_vlc
 {
-   uint32_t buf; /* current 32 bit working set of buffer */
-   int bits;     /* used bits in working set */
-   const uint8_t *ptr; /* buffer with stream data */
-   const uint8_t *max; /* ptr+len of buffer */
+   uint64_t buffer;
+   unsigned valid_bits;
+   uint32_t *data;
+   uint32_t *end;
+};
+
+struct vl_vlc_entry
+{
+   int8_t length;
+   int8_t value;
+};
+
+struct vl_vlc_compressed
+{
+   uint16_t bitcode;
+   struct vl_vlc_entry entry;
 };
 
 static INLINE void
-vl_vlc_restart(struct vl_vlc *vlc)
+vl_vlc_init_table(struct vl_vlc_entry *dst, unsigned dst_size, const struct vl_vlc_compressed *src, unsigned src_size)
 {
-   vlc->buf = (vlc->ptr[0] << 24) | (vlc->ptr[1] << 16) | (vlc->ptr[2] << 8) | vlc->ptr[3];
-   vlc->bits = -16;
-   vlc->ptr += 4;
+   unsigned i, bits = util_logbase2(dst_size);
+
+   for (i=0;i<dst_size;++i) {
+      dst[i].length = 0;
+      dst[i].value = 0;
+   }
+
+   for(; src_size > 0; --src_size, ++src) {
+      for(i=0; i<(1 << (bits - src->entry.length)); ++i)
+         dst[src->bitcode >> (16 - bits) | i] = src->entry;
+   }
+}
+
+static INLINE void
+vl_vlc_fillbits(struct vl_vlc *vlc)
+{
+   if (vlc->valid_bits < 32) {
+      uint32_t value = *vlc->data;
+
+      //assert(vlc->data <= vlc->end);
+
+#ifndef PIPE_ARCH_BIG_ENDIAN
+      value = util_bswap32(value);
+#endif
+
+      vlc->buffer |= (uint64_t)value << (32 - vlc->valid_bits);
+      ++vlc->data;
+      vlc->valid_bits += 32;
+   }
 }
 
 static INLINE void
 vl_vlc_init(struct vl_vlc *vlc, const uint8_t *data, unsigned len)
 {
-   vlc->ptr = data;
-   vlc->max = data + len;
-   vl_vlc_restart(vlc);
+   assert(vlc);
+   assert(data && len);
+
+   vlc->buffer = 0;
+   vlc->valid_bits = 0;
+
+   /* align the data pointer */
+   while (pointer_to_uintptr(data) & 3) {
+      vlc->buffer |= (uint64_t)*data << (56 - vlc->valid_bits);
+      ++data;
+      --len;
+      vlc->valid_bits += 8;
+   }
+   vlc->data = (uint32_t*)data;
+   vlc->end = (uint32_t*)(data + len);
+
+   vl_vlc_fillbits(vlc);
+   vl_vlc_fillbits(vlc);
 }
 
-static INLINE bool
-vl_vlc_getbyte(struct vl_vlc *vlc)
+static INLINE unsigned
+vl_vlc_bytes_left(struct vl_vlc *vlc)
 {
-   vlc->buf <<= 8;
-   vlc->buf |= vlc->ptr[0];
-   vlc->ptr++;
-   return vlc->ptr < vlc->max;
+   return ((uint8_t*)vlc->end)-((uint8_t*)vlc->data);
 }
 
-#define vl_vlc_getword(vlc, shift)                                      \
-do {                                                                    \
-   (vlc)->buf |= (((vlc)->ptr[0] << 8) | (vlc)->ptr[1]) << (shift);     \
-   (vlc)->ptr += 2;                                                     \
-} while (0)
+static INLINE unsigned
+vl_vlc_peekbits(struct vl_vlc *vlc, unsigned num_bits)
+{
+   //assert(vlc->valid_bits >= num_bits);
 
-/* make sure that there are at least 16 valid bits in bit_buf */
-#define vl_vlc_needbits(vlc)                    \
-do {                                            \
-    if ((vlc)->bits >= 0) {                      \
-	vl_vlc_getword(vlc, (vlc)->bits);       \
-	(vlc)->bits -= 16;                      \
-    }                                           \
-} while (0)
+   return vlc->buffer >> (64 - num_bits);
+}
 
-/* make sure that the full 32 bit of the buffer are valid */
 static INLINE void
-vl_vlc_need32bits(struct vl_vlc *vlc)
+vl_vlc_eatbits(struct vl_vlc *vlc, unsigned num_bits)
 {
-   vl_vlc_needbits(vlc);
-   if (vlc->bits > -8) {
-      unsigned n = -vlc->bits;
-      vlc->buf <<= n;
-      vlc->buf |= *vlc->ptr << 8;
-      vlc->bits = -8;
-      vlc->ptr++;
-   }
-   if (vlc->bits > -16) {
-      unsigned n = -vlc->bits - 8;
-      vlc->buf <<= n;
-      vlc->buf |= *vlc->ptr;
-      vlc->bits = -16;
-      vlc->ptr++;
-   }
+   //assert(vlc->valid_bits > num_bits);
+
+   vlc->buffer <<= num_bits;
+   vlc->valid_bits -= num_bits;
 }
 
-/* remove num valid bits from bit_buf */
-#define vl_vlc_dumpbits(vlc, num)       \
-do {					\
-    (vlc)->buf <<= (num);		\
-    (vlc)->bits += (num);		\
-} while (0)
+static INLINE unsigned
+vl_vlc_get_uimsbf(struct vl_vlc *vlc, unsigned num_bits)
+{
+   unsigned value;
 
-/* take num bits from the high part of bit_buf and zero extend them */
-#define vl_vlc_ubits(vlc, num) (((uint32_t)((vlc)->buf)) >> (32 - (num)))
+   //assert(vlc->valid_bits >= num_bits);
 
-/* take num bits from the high part of bit_buf and sign extend them */
-#define vl_vlc_sbits(vlc, num) (((int32_t)((vlc)->buf)) >> (32 - (num)))
+   value = vlc->buffer >> (64 - num_bits);
+   vl_vlc_eatbits(vlc, num_bits);
+
+   return value;
+}
+
+static INLINE signed
+vl_vlc_get_simsbf(struct vl_vlc *vlc, unsigned num_bits)
+{
+   signed value;
+
+   //assert(vlc->valid_bits >= num_bits);
+
+   value = ((int64_t)vlc->buffer) >> (64 - num_bits);
+   vl_vlc_eatbits(vlc, num_bits);
+
+   return value;
+}
+
+static INLINE int8_t
+vl_vlc_get_vlclbf(struct vl_vlc *vlc, const struct vl_vlc_entry *tbl, unsigned num_bits)
+{
+   tbl += vl_vlc_peekbits(vlc, num_bits);
+   vl_vlc_eatbits(vlc, tbl->length);
+   return tbl->value;
+}
 
 #endif /* vl_vlc_h */
diff --git a/src/gallium/docs/source/context.rst b/src/gallium/docs/source/context.rst
index 25a3245..3faf801 100644
--- a/src/gallium/docs/source/context.rst
+++ b/src/gallium/docs/source/context.rst
@@ -329,8 +329,15 @@
 but overlapping blits are not permitted.
 
 ``resource_resolve`` resolves a multisampled resource into a non-multisampled
-one. Formats and dimensions must match. This function must be present if a driver
+one. Their formats must match. This function must be present if a driver
 supports multisampling.
+The region that is to be resolved is described by ``pipe_resolve_info``, which
+provides a source and a destination rectangle.
+The source rectangle may be vertically flipped, but otherwise the dimensions
+of the rectangles must match, unless PIPE_CAP_SCALED_RESOLVE is supported,
+in which case scaling and horizontal flipping are allowed as well.
+The result of resolving depth/stencil values may be any function of the values at
+the sample points, but returning the value of the centermost sample is preferred.
 
 The interfaces to these calls are likely to change to make it easier
 for a driver to batch multiple blits with the same source and
diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst
index 4debcc6..039cb1c 100644
--- a/src/gallium/docs/source/tgsi.rst
+++ b/src/gallium/docs/source/tgsi.rst
@@ -1031,9 +1031,21 @@
   TBD
 
 
-.. opcode:: TXQ - Texture Size Query
+.. opcode:: TXQ - Texture Size Query (as per NV_gpu_program4)
+                  retrieve the dimensions of the texture
+                  depending on the target. For 1D (width), 2D/RECT/CUBE
+		  (width, height), 3D (width, height, depth),
+		  1D array (width, layers), 2D array (width, height, layers)
 
-  TBD
+.. math::
+
+  lod = src0
+
+  dst.x = texture_width(unit, lod)
+
+  dst.y = texture_height(unit, lod)
+
+  dst.z = texture_depth(unit, lod)
 
 
 .. opcode:: CONT - Continue
diff --git a/src/gallium/drivers/i915/i915_batch.h b/src/gallium/drivers/i915/i915_batch.h
index a1f8bca..56d331f 100644
--- a/src/gallium/drivers/i915/i915_batch.h
+++ b/src/gallium/drivers/i915/i915_batch.h
@@ -64,11 +64,16 @@
                                             int num_vertex)
 {
    struct i915_winsys *iws = i915->iws;
-   i915->vertices_since_last_flush += num_vertex;
-   if ( i915->vertices_since_last_flush > 4096
-      || ( i915->vertices_since_last_flush > 256 &&
-           !iws->buffer_is_busy(iws, i915->current.cbuf_bo)) )
+
+   i915->queued_vertices += num_vertex;
+
+   /* fire if we have more than 1/20th of the last frame's vertices */
+   if (i915->queued_vertices > i915->last_fired_vertices / 20) {
       FLUSH_BATCH(NULL);
+      i915->fired_vertices += i915->queued_vertices;
+      i915->queued_vertices = 0;
+      return;
+   }
 }
 
 
diff --git a/src/gallium/drivers/i915/i915_clear.c b/src/gallium/drivers/i915/i915_clear.c
index e1d6a74..4f9aa2c 100644
--- a/src/gallium/drivers/i915/i915_clear.c
+++ b/src/gallium/drivers/i915/i915_clear.c
@@ -125,6 +125,9 @@
     * This is not required, just a heuristic
     */
    FLUSH_BATCH(NULL);
+
+   i915->last_fired_vertices = i915->fired_vertices;
+   i915->fired_vertices = 0;
 }
 
 /**
diff --git a/src/gallium/drivers/i915/i915_context.h b/src/gallium/drivers/i915/i915_context.h
index 8486235..fca8688 100644
--- a/src/gallium/drivers/i915/i915_context.h
+++ b/src/gallium/drivers/i915/i915_context.h
@@ -264,7 +264,10 @@
    struct util_slab_mempool transfer_pool;
    struct util_slab_mempool texture_transfer_pool;
 
-   int vertices_since_last_flush;
+   /* state for tracking flushes */
+   int last_fired_vertices;
+   int fired_vertices;
+   int queued_vertices;
 
    /** blitter/hw-clear */
    struct blitter_context* blitter;
diff --git a/src/gallium/drivers/i915/i915_flush.c b/src/gallium/drivers/i915/i915_flush.c
index 6d76afa..5d8e3c8 100644
--- a/src/gallium/drivers/i915/i915_flush.c
+++ b/src/gallium/drivers/i915/i915_flush.c
@@ -77,5 +77,6 @@
    i915->static_dirty = ~0;
    /* kernel emits flushes in between batchbuffers */
    i915->flush_dirty = 0;
-   i915->vertices_since_last_flush = 0;
+   i915->fired_vertices += i915->queued_vertices;
+   i915->queued_vertices = 0;
 }
diff --git a/src/gallium/drivers/i915/i915_fpc.h b/src/gallium/drivers/i915/i915_fpc.h
index b760bc4..b2683c8 100644
--- a/src/gallium/drivers/i915/i915_fpc.h
+++ b/src/gallium/drivers/i915/i915_fpc.h
@@ -39,9 +39,9 @@
 
 #define I915_PROGRAM_SIZE 192
 
-/* Use those indices for pos/face routing, must be >= I915_TEX_UNITS */
-#define I915_SEMANTIC_POS  10
-#define I915_SEMANTIC_FACE 11
+/* Use those indices for pos/face routing, must be >= num of inputs */
+#define I915_SEMANTIC_POS  100
+#define I915_SEMANTIC_FACE 101
 
 
 /**
diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c
index e743f60..c108c70 100644
--- a/src/gallium/drivers/i915/i915_screen.c
+++ b/src/gallium/drivers/i915/i915_screen.c
@@ -221,6 +221,8 @@
          return 1;
       case PIPE_SHADER_CAP_SUBROUTINES:
          return 0;
+      case PIPE_SHADER_CAP_INTEGERS:
+         return 0;
       default:
          debug_printf("%s: Unknown cap %u.\n", __FUNCTION__, cap);
          return 0;
diff --git a/src/gallium/drivers/i915/i915_state_dynamic.c b/src/gallium/drivers/i915/i915_state_dynamic.c
index 204cee6..1a21433 100644
--- a/src/gallium/drivers/i915/i915_state_dynamic.c
+++ b/src/gallium/drivers/i915/i915_state_dynamic.c
@@ -268,8 +268,8 @@
 {
    unsigned x1 = i915->scissor.minx;
    unsigned y1 = i915->scissor.miny;
-   unsigned x2 = i915->scissor.maxx;
-   unsigned y2 = i915->scissor.maxy;
+   unsigned x2 = i915->scissor.maxx - 1;
+   unsigned y2 = i915->scissor.maxy - 1;
    unsigned sc[3];
 
    sc[0] = _3DSTATE_SCISSOR_RECT_0_CMD;
diff --git a/src/gallium/drivers/i915/i915_surface.c b/src/gallium/drivers/i915/i915_surface.c
index ac6e945..41146be 100644
--- a/src/gallium/drivers/i915/i915_surface.c
+++ b/src/gallium/drivers/i915/i915_surface.c
@@ -80,7 +80,7 @@
                                             i915->saved_nr_sampler_views,
                                             i915->saved_sampler_views);
 
-   util_blitter_copy_region(i915->blitter, dst, dst_level, dstx, dsty, dstz,
+   util_blitter_copy_texture(i915->blitter, dst, dst_level, dstx, dsty, dstz,
                             src, src_level, src_box, TRUE);
 }
 
diff --git a/src/gallium/drivers/i965/brw_screen.c b/src/gallium/drivers/i965/brw_screen.c
index 9178dfa..39e9e2f 100644
--- a/src/gallium/drivers/i965/brw_screen.c
+++ b/src/gallium/drivers/i965/brw_screen.c
@@ -243,6 +243,8 @@
           return 1;
       case PIPE_SHADER_CAP_SUBROUTINES:
           return 1;
+      case PIPE_SHADER_CAP_INTEGERS:
+         return 0;
       default:
          assert(0);
          return 0;
diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile
index ba9705b..f930135 100644
--- a/src/gallium/drivers/llvmpipe/Makefile
+++ b/src/gallium/drivers/llvmpipe/Makefile
@@ -51,6 +51,7 @@
 CPP_SOURCES = \
 
 PROGS := lp_test_format	\
+	 lp_test_arit	\
 	 lp_test_blend	\
 	 lp_test_conv	\
 	 lp_test_printf \
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index d6b20ce..129de29 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -88,11 +88,12 @@
         'format',
         'blend',
         'conv',
-	'printf',
-	'sincos',
+        'printf',
+        'sincos',
     ]
 
     if not env['msvc']:
+        tests.append('arit')
         tests.append('round')
 
     for test in tests:
diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c
index 268f0fa..ce92a80 100644
--- a/src/gallium/drivers/llvmpipe/lp_jit.c
+++ b/src/gallium/drivers/llvmpipe/lp_jit.c
@@ -68,10 +68,17 @@
       elem_types[LP_JIT_TEXTURE_BORDER_COLOR] = 
          LLVMArrayType(LLVMFloatTypeInContext(lc), 4);
 
+#if HAVE_LLVM >= 0x0300
+   texture_type = LLVMStructCreateNamed(gallivm->context, "texture");
+   LLVMStructSetBody(texture_type, elem_types,
+                     Elements(elem_types), 0);
+#else
       texture_type = LLVMStructTypeInContext(lc, elem_types,
                                              Elements(elem_types), 0);
+      LLVMAddTypeName(gallivm->module, "texture", texture_type);
 
       LLVMInvalidateStructLayout(gallivm->target, texture_type);
+#endif
 
       LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, width,
                              gallivm->target, texture_type,
@@ -112,8 +119,6 @@
 
       LP_CHECK_STRUCT_SIZE(struct lp_jit_texture,
                            gallivm->target, texture_type);
-
-      LLVMAddTypeName(gallivm->module, "texture", texture_type);
    }
 
    /* struct lp_jit_context */
@@ -129,11 +134,19 @@
       elem_types[LP_JIT_CTX_TEXTURES] = LLVMArrayType(texture_type,
                                                       PIPE_MAX_SAMPLERS);
 
+#if HAVE_LLVM >= 0x0300
+   context_type = LLVMStructCreateNamed(gallivm->context, "context");
+   LLVMStructSetBody(context_type, elem_types,
+                     Elements(elem_types), 0);
+#else
       context_type = LLVMStructTypeInContext(lc, elem_types,
                                              Elements(elem_types), 0);
 
       LLVMInvalidateStructLayout(gallivm->target, context_type);
 
+      LLVMAddTypeName(gallivm->module, "context", context_type);
+#endif
+
       LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, constants,
                              gallivm->target, context_type,
                              LP_JIT_CTX_CONSTANTS);
@@ -155,8 +168,6 @@
       LP_CHECK_STRUCT_SIZE(struct lp_jit_context,
                            gallivm->target, context_type);
 
-      LLVMAddTypeName(gallivm->module, "context", context_type);
-
       lp->jit_context_ptr_type = LLVMPointerType(context_type, 0);
    }
 
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
index 4b2ae14..9e2a45c 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -30,6 +30,7 @@
 #include "util/u_math.h"
 #include "util/u_cpu_detect.h"
 #include "util/u_format.h"
+#include "util/u_string.h"
 #include "util/u_format_s3tc.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_screen.h"
@@ -93,7 +94,9 @@
 static const char *
 llvmpipe_get_name(struct pipe_screen *screen)
 {
-   return "llvmpipe";
+   static char buf[100];
+   util_snprintf(buf, sizeof(buf), "llvmpipe (LLVM 0x%x)", HAVE_LLVM);
+   return buf;
 }
 
 
diff --git a/src/gallium/drivers/llvmpipe/lp_test_arit.c b/src/gallium/drivers/llvmpipe/lp_test_arit.c
new file mode 100644
index 0000000..ea2a659
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_test_arit.c
@@ -0,0 +1,295 @@
+/**************************************************************************
+ *
+ * Copyright 2011 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "util/u_pointer.h"
+#include "util/u_memory.h"
+#include "util/u_math.h"
+
+#include "gallivm/lp_bld.h"
+#include "gallivm/lp_bld_init.h"
+#include "gallivm/lp_bld_arit.h"
+
+#include "lp_test.h"
+
+
+void
+write_tsv_header(FILE *fp)
+{
+   fprintf(fp,
+           "result\t"
+           "format\n");
+
+   fflush(fp);
+}
+
+
+typedef float (*unary_func_t)(float);
+
+
+/**
+ * Describe a test case of one unary function.
+ */
+struct unary_test_t
+{
+   /*
+    * Test name -- name of the mathematical function under test.
+    */
+
+   const char *name;
+
+   LLVMValueRef
+   (*builder)(struct lp_build_context *bld, LLVMValueRef a);
+
+   /*
+    * Reference (pure-C) function.
+    */
+   float
+   (*ref)(float a);
+
+   /*
+    * Test values.
+    */
+   const float *values;
+   unsigned num_values;
+};
+
+
+const float exp2_values[] = {
+   -60,
+   -4,
+   -2,
+   -1,
+   -1e-007,
+   0,
+   1e-007,
+   1, 
+   2, 
+   4, 
+   60
+};
+
+
+const float log2_values[] = {
+#if 0
+   /* 
+    * Smallest denormalized number; meant just for experimentation, but not
+    * validation.
+    */
+   1.4012984643248171e-45,
+#endif
+   1e-007,
+   0.5,
+   1,
+   2,
+   4,
+   100000,
+   1e+018
+};
+
+
+static float rsqrtf(float x)
+{
+   return 1.0/sqrt(x);
+}
+
+
+const float rsqrt_values[] = {
+   -1, -1e-007,
+   1e-007, 1,
+   -4, -1,
+   1, 4,
+   -1e+035, -100000,
+   100000, 1e+035,
+};
+
+
+const float sincos_values[] = {
+   -5*M_PI/4,
+   -4*M_PI/4,
+   -4*M_PI/4,
+   -3*M_PI/4,
+   -2*M_PI/4,
+   -1*M_PI/4,
+    1*M_PI/4,
+    2*M_PI/4,
+    3*M_PI/4,
+    4*M_PI/4,
+    5*M_PI/4,
+};
+
+
+/*
+ * Unary test cases.
+ */
+
+static const struct unary_test_t unary_tests[] = {
+   {"exp2", &lp_build_exp2, &exp2f, exp2_values, Elements(exp2_values)},
+   {"log2", &lp_build_log2, &log2f, log2_values, Elements(log2_values)},
+   {"exp", &lp_build_exp, &expf, exp2_values, Elements(exp2_values)},
+   {"log", &lp_build_log, &logf, log2_values, Elements(log2_values)},
+   {"rsqrt", &lp_build_rsqrt, &rsqrtf, rsqrt_values, Elements(rsqrt_values)},
+   {"sin", &lp_build_sin, &sinf, sincos_values, Elements(sincos_values)},
+   {"cos", &lp_build_cos, &cosf, sincos_values, Elements(sincos_values)},
+};
+
+
+/*
+ * Build LLVM function that exercises the unary operator builder.
+ */
+static LLVMValueRef
+build_unary_test_func(struct gallivm_state *gallivm,
+                      LLVMModuleRef module,
+                      LLVMContextRef context,
+                      const struct unary_test_t *test)
+{
+   LLVMTypeRef i32t = LLVMInt32TypeInContext(context);
+   LLVMTypeRef f32t = LLVMFloatTypeInContext(context);
+   LLVMTypeRef v4f32t = LLVMVectorType(f32t, 4);
+   LLVMTypeRef args[1] = { f32t };
+   LLVMValueRef func = LLVMAddFunction(module, test->name, LLVMFunctionType(f32t, args, Elements(args), 0));
+   LLVMValueRef arg1 = LLVMGetParam(func, 0);
+   LLVMBuilderRef builder = gallivm->builder;
+   LLVMBasicBlockRef block = LLVMAppendBasicBlockInContext(context, func, "entry");
+   LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);
+   LLVMValueRef ret;
+
+   struct lp_build_context bld;
+
+   lp_build_context_init(&bld, gallivm, lp_float32_vec4_type());
+
+   LLVMSetFunctionCallConv(func, LLVMCCallConv);
+
+   LLVMPositionBuilderAtEnd(builder, block);
+   
+   /* scalar to vector */
+   arg1 = LLVMBuildInsertElement(builder, LLVMGetUndef(v4f32t), arg1, index0, "");
+
+   ret = test->builder(&bld, arg1);
+   
+   /* vector to scalar */
+   ret = LLVMBuildExtractElement(builder, ret, index0, "");
+
+   LLVMBuildRet(builder, ret);
+   return func;
+}
+
+
+/*
+ * Test one LLVM unary arithmetic builder function.
+ */
+static boolean
+test_unary(struct gallivm_state *gallivm, unsigned verbose, FILE *fp, const struct unary_test_t *test)
+{
+   LLVMModuleRef module = gallivm->module;
+   LLVMValueRef test_func;
+   LLVMExecutionEngineRef engine = gallivm->engine;
+   LLVMContextRef context = gallivm->context;
+   char *error = NULL;
+   unary_func_t test_func_jit;
+   boolean success = TRUE;
+   int i;
+
+   test_func = build_unary_test_func(gallivm, module, context, test);
+
+   if (LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) {
+      printf("LLVMVerifyModule: %s\n", error);
+      LLVMDumpModule(module);
+      abort();
+   }
+   LLVMDisposeMessage(error);
+
+   test_func_jit = (unary_func_t) pointer_to_func(LLVMGetPointerToGlobal(engine, test_func));
+
+   for (i = 0; i < test->num_values; ++i) {
+      float value = test->values[i];
+      float ref = test->ref(value);
+      float src = test_func_jit(value);
+
+      double error = fabs(src - ref);
+      double precision = error ? -log2(error/fabs(ref)) : FLT_MANT_DIG;
+
+      bool pass = precision >= 20.0;
+
+      if (isnan(ref)) {
+         continue;
+      }
+
+      if (!pass || verbose) {
+         printf("%s(%.9g): ref = %.9g, src = %.9g, precision = %f bits, %s\n",
+               test->name, value, ref, src, precision,
+               pass ? "PASS" : "FAIL");
+      }
+
+      if (!pass) {
+         success = FALSE;
+      }
+   }
+
+   LLVMFreeMachineCodeForFunction(engine, test_func);
+
+   return success;
+}
+
+
+boolean
+test_all(struct gallivm_state *gallivm, unsigned verbose, FILE *fp)
+{
+   boolean success = TRUE;
+   int i;
+
+   for (i = 0; i < Elements(unary_tests); ++i) {
+      if (!test_unary(gallivm, verbose, fp, &unary_tests[i])) {
+         success = FALSE;
+      }
+   }
+
+   return success;
+}
+
+
+boolean
+test_some(struct gallivm_state *gallivm, unsigned verbose, FILE *fp,
+          unsigned long n)
+{
+   /*
+    * Not randomly generated test cases, so test all.
+    */
+
+   return test_all(gallivm, verbose, fp);
+}
+
+
+boolean
+test_single(struct gallivm_state *gallivm, unsigned verbose, FILE *fp)
+{
+   return TRUE;
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.py b/src/gallium/drivers/llvmpipe/lp_tile_soa.py
index f4324e6..a2795b6 100644
--- a/src/gallium/drivers/llvmpipe/lp_tile_soa.py
+++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.py
@@ -75,7 +75,7 @@
     src_native_type = native_type(format)
 
     print 'static void'
-    print 'lp_tile_%s_swizzle_%s(%s *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)' % (name, dst_suffix, dst_native_type)
+    print 'lp_tile_%s_swizzle_%s(%s * restrict dst, const uint8_t * restrict src, unsigned src_stride, unsigned x0, unsigned y0)' % (name, dst_suffix, dst_native_type)
     print '{'
     print '   unsigned x, y;'
     print '   const uint8_t *src_row = src + y0*src_stride;'
@@ -273,7 +273,7 @@
     name = format.short_name()
 
     print 'static void'
-    print 'lp_tile_%s_unswizzle_%s(const %s *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)' % (name, src_suffix, src_native_type)
+    print 'lp_tile_%s_unswizzle_%s(const %s * restrict src, uint8_t * restrict dst, unsigned dst_stride, unsigned x0, unsigned y0)' % (name, src_suffix, src_native_type)
     print '{'
     if format.layout == PLAIN \
         and format.colorspace == 'rgb' \
@@ -501,7 +501,7 @@
     print 'void'
     print 'lp_tile_swizzle_%s(enum pipe_format format, %s *dst, const void *src, unsigned src_stride, unsigned x, unsigned y)' % (dst_suffix, dst_native_type)
     print '{'
-    print '   void (*func)(%s *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0);' % dst_native_type
+    print '   void (*func)(%s * restrict dst, const uint8_t * restrict src, unsigned src_stride, unsigned x0, unsigned y0);' % dst_native_type
     print '#ifdef DEBUG'
     print '   lp_tile_swizzle_count += 1;'
     print '#endif'
@@ -539,7 +539,7 @@
     print 'lp_tile_unswizzle_%s(enum pipe_format format, const %s *src, void *dst, unsigned dst_stride, unsigned x, unsigned y)' % (src_suffix, src_native_type)
     
     print '{'
-    print '   void (*func)(const %s *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0);' % src_native_type
+    print '   void (*func)(const %s * restrict src, uint8_t * restrict dst, unsigned dst_stride, unsigned x0, unsigned y0);' % src_native_type
     print '#ifdef DEBUG'
     print '   lp_tile_unswizzle_count += 1;'
     print '#endif'
diff --git a/src/gallium/drivers/noop/noop_pipe.c b/src/gallium/drivers/noop/noop_pipe.c
index 18308b7..ffc444e 100644
--- a/src/gallium/drivers/noop/noop_pipe.c
+++ b/src/gallium/drivers/noop/noop_pipe.c
@@ -324,131 +324,34 @@
 
 static int noop_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
 {
-	switch (param) {
-	/* Supported features (boolean caps). */
-	case PIPE_CAP_NPOT_TEXTURES:
-	case PIPE_CAP_TWO_SIDED_STENCIL:
-	case PIPE_CAP_GLSL:
-	case PIPE_CAP_OCCLUSION_QUERY:
-	case PIPE_CAP_POINT_SPRITE:
-	case PIPE_CAP_ANISOTROPIC_FILTER:
-	case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
-	case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
-	case PIPE_CAP_TEXTURE_SHADOW_MAP:
-	case PIPE_CAP_TEXTURE_SWIZZLE:
-	case PIPE_CAP_BLEND_EQUATION_SEPARATE:
-	case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
+	struct pipe_screen *screen = ((struct noop_pipe_screen*)pscreen)->oscreen;
 
-	  return 1;
-	case PIPE_CAP_DUAL_SOURCE_BLEND:
-
-	case PIPE_CAP_SM3:
-	case PIPE_CAP_INDEP_BLEND_ENABLE:
-	case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE:
-	case PIPE_CAP_DEPTH_CLAMP:
-	case PIPE_CAP_SHADER_STENCIL_EXPORT:
-	case PIPE_CAP_TIMER_QUERY:
-	case PIPE_CAP_STREAM_OUTPUT:
-	case PIPE_CAP_PRIMITIVE_RESTART:
-	case PIPE_CAP_INDEP_BLEND_FUNC:
-		return 0;
-
-	/* Texturing. */
-	case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
-	case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
-	case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
-		return 14;
-	case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
-		return 16;
-	case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
-	case PIPE_CAP_MAX_COMBINED_SAMPLERS:
-		return 16;
-
-	/* Render targets. */
-	case PIPE_CAP_MAX_RENDER_TARGETS:
-		return 8;
-
-	/* Fragment coordinate conventions. */
-	case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
-	case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
-		return 1;
-	case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
-	case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
-		return 0;
-
-	default:
-		return 0;
-	}
+	return screen->get_param(screen, param);
 }
 
 static float noop_get_paramf(struct pipe_screen* pscreen, enum pipe_cap param)
 {
-	switch (param) {
-	case PIPE_CAP_MAX_LINE_WIDTH:
-	case PIPE_CAP_MAX_LINE_WIDTH_AA:
-	case PIPE_CAP_MAX_POINT_WIDTH:
-	case PIPE_CAP_MAX_POINT_WIDTH_AA:
-		return 8192.0f;
-	case PIPE_CAP_MAX_TEXTURE_ANISOTROPY:
-		return 16.0f;
-	case PIPE_CAP_MAX_TEXTURE_LOD_BIAS:
-		return 16.0f;
-	default:
-		return 0.0f;
-	}
+	struct pipe_screen *screen = ((struct noop_pipe_screen*)pscreen)->oscreen;
+
+	return screen->get_paramf(screen, param);
 }
 
 static int noop_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enum pipe_shader_cap param)
 {
-	switch(shader)
-	{
-	case PIPE_SHADER_FRAGMENT:
-	case PIPE_SHADER_VERTEX:
-	case PIPE_SHADER_GEOMETRY:
-		break;
-	default:
-		return 0;
-	}
+	struct pipe_screen *screen = ((struct noop_pipe_screen*)pscreen)->oscreen;
 
-	switch (param) {
-	case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
-	case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
-	case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
-	case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
-		return 16384;
-	case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
-		return 8;
-	case PIPE_SHADER_CAP_MAX_INPUTS:
-		return 16;
-	case PIPE_SHADER_CAP_MAX_TEMPS:
-		return 256;
-	case PIPE_SHADER_CAP_MAX_ADDRS:
-		return 1;
-	case PIPE_SHADER_CAP_MAX_CONSTS:
-		return 256;
-	case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
-		return 1;
-	case PIPE_SHADER_CAP_MAX_PREDS:
-		return 0;
-	case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
-		return 1;
-	case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
-	case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
-	case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
-	case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
-		return 1;
-	default:
-		return 0;
-	}
+	return screen->get_shader_param(screen, shader, param);
 }
 
-static boolean noop_is_format_supported(struct pipe_screen* screen,
+static boolean noop_is_format_supported(struct pipe_screen* pscreen,
 					enum pipe_format format,
 					enum pipe_texture_target target,
 					unsigned sample_count,
                                         unsigned usage)
 {
-	return true;
+	struct pipe_screen *screen = ((struct noop_pipe_screen*)pscreen)->oscreen;
+
+	return screen->is_format_supported(screen, format, target, sample_count, usage);
 }
 
 static void noop_destroy_screen(struct pipe_screen *screen)
diff --git a/src/gallium/drivers/nouveau/Makefile b/src/gallium/drivers/nouveau/Makefile
index 3210d1f..aae6d98 100644
--- a/src/gallium/drivers/nouveau/Makefile
+++ b/src/gallium/drivers/nouveau/Makefile
@@ -10,6 +10,7 @@
 C_SOURCES = nouveau_screen.c \
             nouveau_fence.c \
             nouveau_mm.c \
-            nouveau_buffer.c
+            nouveau_buffer.c \
+            nouveau_video.c
 
 include ../../Makefile.template
diff --git a/src/gallium/drivers/nouveau/nouveau_context.h b/src/gallium/drivers/nouveau/nouveau_context.h
index 696e0d3..19bf7c8 100644
--- a/src/gallium/drivers/nouveau/nouveau_context.h
+++ b/src/gallium/drivers/nouveau/nouveau_context.h
@@ -23,4 +23,7 @@
    return (struct nouveau_context *)pipe;
 }
 
+void
+nouveau_context_init_vdec(struct nouveau_context *);
+
 #endif
diff --git a/src/gallium/drivers/nouveau/nouveau_screen.h b/src/gallium/drivers/nouveau/nouveau_screen.h
index d910809..cf291c6 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.h
+++ b/src/gallium/drivers/nouveau/nouveau_screen.h
@@ -76,6 +76,7 @@
 int nouveau_screen_init(struct nouveau_screen *, struct nouveau_device *);
 void nouveau_screen_fini(struct nouveau_screen *);
 
+void nouveau_screen_init_vdec(struct nouveau_screen *);
 
 
 #ifndef NOUVEAU_NVC0
diff --git a/src/gallium/drivers/nouveau/nouveau_video.c b/src/gallium/drivers/nouveau/nouveau_video.c
new file mode 100644
index 0000000..620c030
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nouveau_video.c
@@ -0,0 +1,41 @@
+
+#include "vl/vl_decoder.h"
+#include "vl/vl_video_buffer.h"
+
+#include "nouveau/nouveau_screen.h"
+#include "nouveau/nouveau_context.h"
+
+static int
+nouveau_screen_get_video_param(struct pipe_screen *pscreen,
+                               enum pipe_video_profile profile,
+                               enum pipe_video_cap param)
+{
+   switch (param) {
+   case PIPE_VIDEO_CAP_SUPPORTED:
+      return vl_profile_supported(pscreen, profile);
+   case PIPE_VIDEO_CAP_NPOT_TEXTURES:
+      return 1;
+   case PIPE_VIDEO_CAP_MAX_WIDTH:
+   case PIPE_VIDEO_CAP_MAX_HEIGHT:
+      return vl_video_buffer_max_size(pscreen);
+   case PIPE_VIDEO_CAP_NUM_BUFFERS_DESIRED:
+      return vl_num_buffers_desired(pscreen, profile);
+   default:
+      debug_printf("unknown video param: %d\n", param);
+      return 0;
+   }
+}
+
+void
+nouveau_screen_init_vdec(struct nouveau_screen *screen)
+{
+   screen->base.get_video_param = nouveau_screen_get_video_param;
+   screen->base.is_video_format_supported = vl_video_buffer_is_format_supported;
+}
+
+void
+nouveau_context_init_vdec(struct nouveau_context *nv)
+{
+   nv->pipe.create_video_decoder = vl_create_decoder;
+   nv->pipe.create_video_buffer = vl_video_buffer_create;
+}
diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c
index ac3e361..0d46406 100644
--- a/src/gallium/drivers/nv50/nv50_context.c
+++ b/src/gallium/drivers/nv50/nv50_context.c
@@ -149,6 +149,8 @@
    assert(nv50->draw);
    draw_set_rasterize_stage(nv50->draw, nv50_draw_render_stage(nv50));
 
+   nouveau_context_init_vdec(&nv50->base);
+
    return pipe;
 }
 
diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h
index c1226d5..284db69 100644
--- a/src/gallium/drivers/nv50/nv50_context.h
+++ b/src/gallium/drivers/nv50/nv50_context.h
@@ -171,7 +171,8 @@
 extern void nv50_init_state_functions(struct nv50_context *);
 
 /* nv50_state_validate.c */
-extern boolean nv50_state_validate(struct nv50_context *);
+/* @words: check for space before emitting relocs */
+extern boolean nv50_state_validate(struct nv50_context *, unsigned words);
 
 /* nv50_surface.c */
 extern void nv50_clear(struct pipe_context *, unsigned buffers,
diff --git a/src/gallium/drivers/nv50/nv50_formats.c b/src/gallium/drivers/nv50/nv50_formats.c
index be43147..34502d0 100644
--- a/src/gallium/drivers/nv50/nv50_formats.c
+++ b/src/gallium/drivers/nv50/nv50_formats.c
@@ -116,7 +116,7 @@
     SAMPLER_VIEW | DEPTH_STENCIL },
 
    [PIPE_FORMAT_Z24_UNORM_S8_USCALED] = { NV50_ZETA_FORMAT_Z24_S8_UNORM,
-    B_(C0, C0, C0, ONE_FLOAT, UNORM, UINT, UINT, UINT, Z24_S8, 0),
+    B_(C0, C1, C0, ONE_FLOAT, UNORM, UINT, UINT, UINT, Z24_S8, 0),
     SAMPLER_VIEW | DEPTH_STENCIL },
 
    [PIPE_FORMAT_Z24X8_UNORM] = { NV50_ZETA_FORMAT_Z24_X8_UNORM,
@@ -124,7 +124,7 @@
     SAMPLER_VIEW | DEPTH_STENCIL },
 
    [PIPE_FORMAT_S8_USCALED_Z24_UNORM] = { NV50_ZETA_FORMAT_S8_Z24_UNORM,
-    B_(C1, C1, C1, ONE_FLOAT, UINT, UNORM, UINT, UINT, S8_Z24, 0),
+    B_(C1, C0, C1, ONE_FLOAT, UINT, UNORM, UINT, UINT, S8_Z24, 0),
     SAMPLER_VIEW | DEPTH_STENCIL },
 
    [PIPE_FORMAT_Z32_FLOAT] = { NV50_ZETA_FORMAT_Z32_FLOAT,
diff --git a/src/gallium/drivers/nv50/nv50_pc_optimize.c b/src/gallium/drivers/nv50/nv50_pc_optimize.c
index d72b23c..4271731 100644
--- a/src/gallium/drivers/nv50/nv50_pc_optimize.c
+++ b/src/gallium/drivers/nv50/nv50_pc_optimize.c
@@ -462,6 +462,10 @@
          mi->saturate = 1;
          mi->def[0] = nvi->def[0];
          mi->def[0]->insn = mi;
+         if (nvi->flags_def) {
+            mi->flags_def = nvi->flags_def;
+            mi->flags_def->insn = mi;
+         }
          nv_nvi_delete(nvi);
       }
    }
diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c
index a697ff5..581aad1 100644
--- a/src/gallium/drivers/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nv50/nv50_screen.c
@@ -91,6 +91,7 @@
    case PIPE_CAP_TEXTURE_SHADOW_MAP:
    case PIPE_CAP_NPOT_TEXTURES:
    case PIPE_CAP_ANISOTROPIC_FILTER:
+   case PIPE_CAP_SCALED_RESOLVE:
       return 1;
    case PIPE_CAP_SEAMLESS_CUBE_MAP:
       return nv50_screen(pscreen)->tesla->grclass >= NVA0_3D;
@@ -182,6 +183,8 @@
       return 1;
    case PIPE_SHADER_CAP_SUBROUTINES:
       return 0; /* please inline, or provide function declarations */
+   case PIPE_SHADER_CAP_INTEGERS:
+      return 0;
    default:
       NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param);
       return 0;
@@ -315,6 +318,8 @@
 
    nv50_screen_init_resource_functions(pscreen);
 
+   nouveau_screen_init_vdec(&screen->base);
+
    ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, 4096,
                         &screen->fence.bo);
    if (ret)
@@ -602,6 +607,9 @@
 
    screen->mm_VRAM_fe0 = nouveau_mm_create(dev, NOUVEAU_BO_VRAM, 0xfe0);
 
+   if (!nv50_blitctx_create(screen))
+      goto fail;
+
    nouveau_fence_new(&screen->base, &screen->base.fence.current, FALSE);
 
    return pscreen;
diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h
index 64ad209..315ca80 100644
--- a/src/gallium/drivers/nv50/nv50_screen.h
+++ b/src/gallium/drivers/nv50/nv50_screen.h
@@ -21,6 +21,8 @@
 
 #define NV50_SCREEN_RESIDENT_BO_COUNT 5
 
+struct nv50_blitctx;
+
 struct nv50_screen {
    struct nouveau_screen base;
    struct nouveau_winsys *nvws;
@@ -39,6 +41,8 @@
    struct nouveau_resource *gp_code_heap;
    struct nouveau_resource *fp_code_heap;
 
+   struct nv50_blitctx *blitctx;
+
    struct {
       void **entries;
       int next;
@@ -71,6 +75,8 @@
    return (struct nv50_screen *)screen;
 }
 
+boolean nv50_blitctx_create(struct nv50_screen *);
+
 void nv50_screen_make_buffers_resident(struct nv50_screen *);
 
 int nv50_screen_tic_alloc(struct nv50_screen *, void *);
diff --git a/src/gallium/drivers/nv50/nv50_shader_state.c b/src/gallium/drivers/nv50/nv50_shader_state.c
index e5b10c3..d73f7c7 100644
--- a/src/gallium/drivers/nv50/nv50_shader_state.c
+++ b/src/gallium/drivers/nv50/nv50_shader_state.c
@@ -130,13 +130,14 @@
    int ret;
    unsigned size;
 
-   if (prog->translated)
+   if (!prog->translated) {
+      prog->translated = nv50_program_translate(prog);
+      if (!prog->translated)
+         return FALSE;
+   } else
+   if (prog->res)
       return TRUE;
 
-   prog->translated = nv50_program_translate(prog);
-   if (!prog->translated)
-      return FALSE;
-
    if (prog->type == PIPE_SHADER_FRAGMENT) heap = nv50->screen->fp_code_heap;
    else
    if (prog->type == PIPE_SHADER_GEOMETRY) heap = nv50->screen->gp_code_heap;
diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c
index 8b0b08f..44f2d25 100644
--- a/src/gallium/drivers/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nv50/nv50_state_validate.c
@@ -350,7 +350,7 @@
 #define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0]))
 
 boolean
-nv50_state_validate(struct nv50_context *nv50)
+nv50_state_validate(struct nv50_context *nv50, unsigned words)
 {
    unsigned i;
 
@@ -367,6 +367,8 @@
       nv50->dirty = 0;
    }
 
+   MARK_RING(nv50->screen->base.channel, words, 0);
+
    nv50_bufctx_emit_relocs(nv50);
 
    return TRUE;
diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c
index eefbaad..8bca900 100644
--- a/src/gallium/drivers/nv50/nv50_surface.c
+++ b/src/gallium/drivers/nv50/nv50_surface.c
@@ -198,6 +198,7 @@
 {
    struct nv50_screen *screen = nv50_context(pipe)->screen;
    int ret;
+   boolean m2mf;
    unsigned dst_layer = dstz, src_layer = src_box->z;
 
    /* Fallback for buffers. */
@@ -207,9 +208,15 @@
       return;
    }
 
+   assert(src->nr_samples == dst->nr_samples);
+
+   m2mf = (src->format == dst->format) ||
+      (util_format_get_blocksizebits(src->format) ==
+       util_format_get_blocksizebits(dst->format));
+
    nv04_resource(dst)->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
 
-   if (src->format == dst->format && src->nr_samples == dst->nr_samples) {
+   if (m2mf) {
       struct nv50_m2mf_rect drect, srect;
       unsigned i;
       unsigned nx = util_format_get_nblocksx(src->format, src_box->width);
@@ -368,7 +375,7 @@
 
    /* don't need NEW_BLEND, COLOR_MASK doesn't affect CLEAR_BUFFERS */
    nv50->dirty &= NV50_NEW_FRAMEBUFFER;
-   if (!nv50_state_validate(nv50))
+   if (!nv50_state_validate(nv50, 9 + (fb->nr_cbufs * 2)))
       return;
 
    if (buffers & PIPE_CLEAR_COLOR && fb->nr_cbufs) {
@@ -405,12 +412,546 @@
    nv50->dirty = dirty & ~NV50_NEW_FRAMEBUFFER;
 }
 
+
+struct nv50_blitctx
+{
+   struct nv50_screen *screen;
+   struct {
+      struct pipe_framebuffer_state fb;
+      struct nv50_program *vp;
+      struct nv50_program *gp;
+      struct nv50_program *fp;
+      unsigned num_textures[3];
+      unsigned num_samplers[3];
+      struct pipe_sampler_view *texture;
+      struct nv50_tsc_entry *sampler;
+      unsigned dirty;
+      unsigned clip_nr;
+   } saved;
+   struct nv50_program vp;
+   struct nv50_program fp;
+   struct nv50_tsc_entry sampler[2]; /* nearest, bilinear */
+   uint32_t fp_offset;
+   uint16_t color_mask;
+   uint8_t filter;
+};
+
+static void
+nv50_blitctx_make_vp(struct nv50_blitctx *blit)
+{
+   static const uint32_t code[] =
+   {
+      0x10000001, /* mov b32 o[0x00] s[0x00] */ /* HPOS.x */
+      0x0423c788,
+      0x10000205, /* mov b32 o[0x04] s[0x04] */ /* HPOS.y */
+      0x0423c788,
+      0x10000409, /* mov b32 o[0x08] s[0x08] */ /* TEXC.x */
+      0x0423c788,
+      0x1000060d, /* mov b32 o[0x0c] s[0x0c] */ /* TEXC.y */
+      0x0423c788,
+      0x10000811, /* exit mov b32 o[0x10] s[0x10] */ /* TEXC.z */
+      0x0423c789,
+   };
+
+   blit->vp.type = PIPE_SHADER_VERTEX;
+   blit->vp.translated = TRUE;
+   blit->vp.code = (uint32_t *)code; /* const_cast */
+   blit->vp.code_size = sizeof(code);
+   blit->vp.max_gpr = 4;
+   blit->vp.max_out = 5;
+   blit->vp.out_nr = 2;
+   blit->vp.out[0].mask = 0x3;
+   blit->vp.out[0].sn = TGSI_SEMANTIC_POSITION;
+   blit->vp.out[1].hw = 2;
+   blit->vp.out[1].mask = 0x7;
+   blit->vp.out[1].sn = TGSI_SEMANTIC_GENERIC;
+   blit->vp.vp.attrs[0] = 0x73;
+   blit->vp.vp.psiz = 0x40;
+   blit->vp.vp.edgeflag = 0x40;
+}
+
+static void
+nv50_blitctx_make_fp(struct nv50_blitctx *blit)
+{
+   static const uint32_t code[] =
+   {
+      /* 3 coords RGBA in, RGBA out, also for Z32_FLOAT(_S8X24_USCALED) */
+      0x80000000, /* interp $r0 v[0x0] */
+      0x80010004, /* interp $r1 v[0x4] */
+      0x80020009, /* interp $r2 flat v[0x8] */
+      0x00040780,
+      0xf6800001, /* texauto live { $r0,1,2,3 } $t0 $s0 { $r0,1,2 } */
+      0x0000c785, /* exit */
+
+      /* 3 coords ZS in, S encoded in R, Z encoded in GBA (8_UNORM) */
+      0x80000000, /* interp $r0 v[0x00] */
+      0x80010004, /* interp $r1 v[0x04] */
+      0x80020009, /* interp $r2 flat v[0x8] */
+      0x00040780,
+      0xf6800001, /* texauto live { $r0,1,#,# } $t0 $s0 { $r0,1,2 } */
+      0x00000784,
+      0xc03f0009, /* mul f32 $r2 $r0 (2^24 - 1) */
+      0x04b7ffff,
+      0xa0000201, /* cvt f32 $r0 s32 $r1 */
+      0x44014780,
+      0xa0000409, /* cvt rni s32 $r2 f32 $r2 */
+      0x8c004780,
+      0xc0010001, /* mul f32 $r0 $r0 1/0xff */
+      0x03b8080b,
+      0xd03f0405, /* and b32 $r1 $r2 0x0000ff */
+      0x0000000f,
+      0xd000040d, /* and b32 $r3 $r2 0xff0000 */
+      0x000ff003,
+      0xd0000409, /* and b32 $r2 $r2 0x00ff00 */
+      0x00000ff3,
+      0xa0000205, /* cvt f32 $r1 s32 $r1 */
+      0x44014780,
+      0xa000060d, /* cvt f32 $r3 s32 $r3 */
+      0x44014780,
+      0xa0000409, /* cvt f32 $r2 s32 $r2 */
+      0x44014780,
+      0xc0010205, /* mul f32 $r1 $r1 1/0x0000ff */
+      0x03b8080b,
+      0xc001060d, /* mul f32 $r3 $r3 1/0x00ff00 */
+      0x0338080b,
+      0xc0010409, /* mul f32 $r2 $r2 1/0xff0000 */
+      0x0378080b,
+      0xf0000001, /* exit never nop */
+      0xe0000001,
+
+      /* 3 coords ZS in, Z encoded in RGB, S encoded in A (U8_UNORM) */
+      0x80000000, /* interp $r0 v[0x00] */
+      0x80010004, /* interp $r1 v[0x04] */
+      0x80020009, /* interp $r2 flat v[0x8] */
+      0x00040780,
+      0xf6800001, /* texauto live { $r0,1,#,# } $t0 $s0 { $r0,1,2 } */
+      0x00000784,
+      0xc03f0009, /* mul f32 $r2 $r0 (2^24 - 1) */
+      0x04b7ffff,
+      0xa0000281, /* cvt f32 $r3 s32 $r1 */
+      0x44014780,
+      0xa0000409, /* cvt rni s32 $r2 f32 $r2 */
+      0x8c004780,
+      0xc001060d, /* mul f32 $r3 $r3 1/0xff */
+      0x03b8080b,
+      0xd03f0401, /* and b32 $r0 $r2 0x0000ff */
+      0x0000000f,
+      0xd0000405, /* and b32 $r1 $r2 0x00ff00 */
+      0x00000ff3,
+      0xd0000409, /* and b32 $r2 $r2 0xff0000 */
+      0x000ff003,
+      0xa0000001, /* cvt f32 $r0 s32 $r0 */
+      0x44014780,
+      0xa0000205, /* cvt f32 $r1 s32 $r1 */
+      0x44014780,
+      0xa0000409, /* cvt f32 $r2 s32 $r2 */
+      0x44014780,
+      0xc0010001, /* mul f32 $r0 $r0 1/0x0000ff */
+      0x03b8080b,
+      0xc0010205, /* mul f32 $r1 $r1 1/0x00ff00 */
+      0x0378080b,
+      0xc0010409, /* mul f32 $r2 $r2 1/0xff0000 */
+      0x0338080b,
+      0xf0000001, /* exit never nop */
+      0xe0000001
+   };
+
+   blit->fp.type = PIPE_SHADER_FRAGMENT;
+   blit->fp.translated = TRUE;
+   blit->fp.code = (uint32_t *)code; /* const_cast */
+   blit->fp.code_size = sizeof(code);
+   blit->fp.max_gpr = 4;
+   blit->fp.max_out = 4;
+   blit->fp.in_nr = 1;
+   blit->fp.in[0].mask = 0x7; /* last component flat */
+   blit->fp.in[0].linear = 1;
+   blit->fp.in[0].sn = TGSI_SEMANTIC_GENERIC;
+   blit->fp.out_nr = 1;
+   blit->fp.out[0].mask = 0xf;
+   blit->fp.out[0].sn = TGSI_SEMANTIC_COLOR;
+   blit->fp.fp.interp = 0x00020403;
+   blit->fp.gp.primid = 0x80;
+}
+
+static void
+nv50_blitctx_make_sampler(struct nv50_blitctx *blit)
+{
+   /* clamp to edge, min/max lod = 0, nearest filtering */
+
+   blit->sampler[0].id = -1;
+
+   blit->sampler[0].tsc[0] = 0x00000092;
+   blit->sampler[0].tsc[1] = 0x00000051;
+
+   /* clamp to edge, min/max lod = 0, bilinear filtering */
+
+   blit->sampler[1].id = -1;
+
+   blit->sampler[1].tsc[0] = 0x00000092;
+   blit->sampler[1].tsc[1] = 0x00000062;
+}
+
+/* Since shaders cannot export stencil, we cannot copy stencil values when
+ * rendering to ZETA, so we attach the ZS surface to a colour render target.
+ */
+static INLINE enum pipe_format
+nv50_blit_zeta_to_colour_format(enum pipe_format format)
+{
+   switch (format) {
+   case PIPE_FORMAT_Z16_UNORM:               return PIPE_FORMAT_R16_UNORM;
+   case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
+   case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
+   case PIPE_FORMAT_Z24X8_UNORM:             return PIPE_FORMAT_R8G8B8A8_UNORM;
+   case PIPE_FORMAT_Z32_FLOAT:               return PIPE_FORMAT_R32_FLOAT;
+   case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: return PIPE_FORMAT_R32G32_FLOAT;
+   default:
+      assert(0);
+      return PIPE_FORMAT_NONE;
+   }
+}
+
+static void
+nv50_blitctx_get_color_mask_and_fp(struct nv50_blitctx *blit,
+                                   enum pipe_format format, uint8_t mask)
+{
+   blit->color_mask = 0;
+
+   switch (format) {
+   case PIPE_FORMAT_Z24X8_UNORM:
+   case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
+      blit->fp_offset = 160;
+      if (mask & PIPE_MASK_Z)
+         blit->color_mask |= 0x0111;
+      if (mask & PIPE_MASK_S)
+         blit->color_mask |= 0x1000;
+      break;
+   case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
+      blit->fp_offset = 24;
+      if (mask & PIPE_MASK_Z)
+         blit->color_mask |= 0x1110;
+      if (mask & PIPE_MASK_S)
+         blit->color_mask |= 0x0001;
+      break;
+   default:
+      blit->fp_offset = 0;
+      if (mask & (PIPE_MASK_R | PIPE_MASK_Z)) blit->color_mask |= 0x0001;
+      if (mask & (PIPE_MASK_G | PIPE_MASK_S)) blit->color_mask |= 0x0010;
+      if (mask & PIPE_MASK_B) blit->color_mask |= 0x0100;
+      if (mask & PIPE_MASK_A) blit->color_mask |= 0x1000;
+      break;
+   }
+}
+
+static void
+nv50_blit_set_dst(struct nv50_context *nv50,
+                  struct pipe_resource *res, unsigned level, unsigned layer)
+{
+   struct pipe_context *pipe = &nv50->base.pipe;
+   struct pipe_surface templ;
+
+   if (util_format_is_depth_or_stencil(res->format))
+      templ.format = nv50_blit_zeta_to_colour_format(res->format);
+   else
+      templ.format = res->format;
+
+   templ.usage = PIPE_USAGE_STREAM;
+   templ.u.tex.level = level;
+   templ.u.tex.first_layer = templ.u.tex.last_layer = layer;
+
+   nv50->framebuffer.cbufs[0] = nv50_miptree_surface_new(pipe, res, &templ);
+   nv50->framebuffer.nr_cbufs = 1;
+   nv50->framebuffer.zsbuf = NULL;
+   nv50->framebuffer.width = nv50->framebuffer.cbufs[0]->width;
+   nv50->framebuffer.height = nv50->framebuffer.cbufs[0]->height;
+}
+
+static INLINE void
+nv50_blit_fixup_tic_entry(struct pipe_sampler_view *view)
+{
+   struct nv50_tic_entry *ent = nv50_tic_entry(view);
+
+   ent->tic[2] &= ~(1 << 31); /* scaled coordinates, ok with 3d textures ? */
+
+   /* magic: */
+
+   ent->tic[3] = 0x20000000; /* affects quality of near vertical edges in MS8 */
+}
+
+static void
+nv50_blit_set_src(struct nv50_context *nv50,
+                  struct pipe_resource *res, unsigned level, unsigned layer)
+{
+   struct pipe_context *pipe = &nv50->base.pipe;
+   struct pipe_sampler_view templ;
+
+   templ.format = res->format;
+   templ.u.tex.first_layer = templ.u.tex.last_layer = layer;
+   templ.u.tex.first_level = templ.u.tex.last_level = level;
+   templ.swizzle_r = PIPE_SWIZZLE_RED;
+   templ.swizzle_g = PIPE_SWIZZLE_GREEN;
+   templ.swizzle_b = PIPE_SWIZZLE_BLUE;
+   templ.swizzle_a = PIPE_SWIZZLE_ALPHA;
+
+   nv50->textures[2][0] = nv50_create_sampler_view(pipe, res, &templ);
+
+   nv50_blit_fixup_tic_entry(nv50->textures[2][0]);
+
+   nv50->num_textures[0] = nv50->num_textures[1] = 0;
+   nv50->num_textures[2] = 1;
+}
+
+static void
+nv50_blitctx_prepare_state(struct nv50_blitctx *blit)
+{
+   struct nouveau_channel *chan = blit->screen->base.channel;
+
+   /* blend state */
+   BEGIN_RING(chan, RING_3D(COLOR_MASK(0)), 1);
+   OUT_RING  (chan, blit->color_mask);
+   BEGIN_RING(chan, RING_3D(BLEND_ENABLE(0)), 1);
+   OUT_RING  (chan, 0);
+   BEGIN_RING(chan, RING_3D(LOGIC_OP_ENABLE), 1);
+   OUT_RING  (chan, 0);
+
+   /* rasterizer state */
+#ifndef NV50_SCISSORS_CLIPPING
+   BEGIN_RING(chan, RING_3D(SCISSOR_ENABLE(0)), 1);
+   OUT_RING  (chan, 1);
+#endif
+   BEGIN_RING(chan, RING_3D(VERTEX_TWO_SIDE_ENABLE), 1);
+   OUT_RING  (chan, 0);
+   BEGIN_RING(chan, RING_3D(FRAG_COLOR_CLAMP_EN), 1);
+   OUT_RING  (chan, 0);
+   BEGIN_RING(chan, RING_3D(MULTISAMPLE_ENABLE), 1);
+   OUT_RING  (chan, 0);
+   BEGIN_RING(chan, RING_3D(MSAA_MASK(0)), 4);
+   OUT_RING  (chan, 0xffff);
+   OUT_RING  (chan, 0xffff);
+   OUT_RING  (chan, 0xffff);
+   OUT_RING  (chan, 0xffff);
+   BEGIN_RING(chan, RING_3D(POLYGON_MODE_FRONT), 3);
+   OUT_RING  (chan, NV50_3D_POLYGON_MODE_FRONT_FILL);
+   OUT_RING  (chan, NV50_3D_POLYGON_MODE_BACK_FILL);
+   OUT_RING  (chan, 0);
+   BEGIN_RING(chan, RING_3D(CULL_FACE_ENABLE), 1);
+   OUT_RING  (chan, 0);
+   BEGIN_RING(chan, RING_3D(POLYGON_STIPPLE_ENABLE), 1);
+   OUT_RING  (chan, 0);
+   BEGIN_RING(chan, RING_3D(POLYGON_OFFSET_FILL_ENABLE), 1);
+   OUT_RING  (chan, 0);
+
+   /* zsa state */
+   BEGIN_RING(chan, RING_3D(DEPTH_TEST_ENABLE), 1);
+   OUT_RING  (chan, 0);
+   BEGIN_RING(chan, RING_3D(STENCIL_ENABLE), 1);
+   OUT_RING  (chan, 0);
+   BEGIN_RING(chan, RING_3D(ALPHA_TEST_ENABLE), 1);
+   OUT_RING  (chan, 0);
+}
+
+static void
+nv50_blitctx_pre_blit(struct nv50_blitctx *blit, struct nv50_context *nv50)
+{
+   int s;
+
+   blit->saved.fb.width = nv50->framebuffer.width;
+   blit->saved.fb.height = nv50->framebuffer.height;
+   blit->saved.fb.nr_cbufs = nv50->framebuffer.nr_cbufs;
+   blit->saved.fb.cbufs[0] = nv50->framebuffer.cbufs[0];
+   blit->saved.fb.zsbuf = nv50->framebuffer.zsbuf;
+
+   blit->saved.vp = nv50->vertprog;
+   blit->saved.gp = nv50->gmtyprog;
+   blit->saved.fp = nv50->fragprog;
+
+   nv50->vertprog = &blit->vp;
+   nv50->gmtyprog = NULL;
+   nv50->fragprog = &blit->fp;
+
+   blit->saved.clip_nr = nv50->clip.nr;
+
+   nv50->clip.nr = 0;
+
+   for (s = 0; s < 3; ++s) {
+      blit->saved.num_textures[s] = nv50->num_textures[s];
+      blit->saved.num_samplers[s] = nv50->num_samplers[s];
+   }
+   blit->saved.texture = nv50->textures[2][0];
+   blit->saved.sampler = nv50->samplers[2][0];
+
+   nv50->samplers[2][0] = &blit->sampler[blit->filter];
+
+   nv50->num_samplers[0] = nv50->num_samplers[1] = 0;
+   nv50->num_samplers[2] = 1;
+
+   blit->saved.dirty = nv50->dirty;
+
+   nv50->dirty =
+      NV50_NEW_FRAMEBUFFER |
+      NV50_NEW_VERTPROG | NV50_NEW_FRAGPROG | NV50_NEW_GMTYPROG |
+      NV50_NEW_TEXTURES | NV50_NEW_SAMPLERS;
+}
+
+static void
+nv50_blitctx_post_blit(struct nv50_context *nv50, struct nv50_blitctx *blit)
+{
+   int s;
+
+   pipe_surface_reference(&nv50->framebuffer.cbufs[0], NULL);
+
+   nv50->framebuffer.width = blit->saved.fb.width;
+   nv50->framebuffer.height = blit->saved.fb.height;
+   nv50->framebuffer.nr_cbufs = blit->saved.fb.nr_cbufs;
+   nv50->framebuffer.cbufs[0] = blit->saved.fb.cbufs[0];
+   nv50->framebuffer.zsbuf = blit->saved.fb.zsbuf;
+
+   nv50->vertprog = blit->saved.vp;
+   nv50->gmtyprog = blit->saved.gp;
+   nv50->fragprog = blit->saved.fp;
+
+   nv50->clip.nr = blit->saved.clip_nr;
+
+   pipe_sampler_view_reference(&nv50->textures[2][0], NULL);
+
+   for (s = 0; s < 3; ++s) {
+      nv50->num_textures[s] = blit->saved.num_textures[s];
+      nv50->num_samplers[s] = blit->saved.num_samplers[s];
+   }
+   nv50->textures[2][0] = blit->saved.texture;
+   nv50->samplers[2][0] = blit->saved.sampler;
+
+   nv50->dirty = blit->saved.dirty |
+      (NV50_NEW_FRAMEBUFFER | NV50_NEW_SCISSOR | NV50_NEW_SAMPLE_MASK |
+       NV50_NEW_RASTERIZER | NV50_NEW_ZSA | NV50_NEW_BLEND |
+       NV50_NEW_TEXTURES | NV50_NEW_SAMPLERS |
+       NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG | NV50_NEW_FRAGPROG);
+}
+
+static void
+nv50_resource_resolve(struct pipe_context *pipe,
+                      const struct pipe_resolve_info *info)
+{
+   struct nv50_context *nv50 = nv50_context(pipe);
+   struct nv50_screen *screen = nv50->screen;
+   struct nv50_blitctx *blit = screen->blitctx;
+   struct nouveau_channel *chan = screen->base.channel;
+   struct pipe_resource *src = info->src.res;
+   struct pipe_resource *dst = info->dst.res;
+   float x0, x1, y0, y1, z;
+   float x_range, y_range;
+
+   nv50_blitctx_get_color_mask_and_fp(blit, dst->format, info->mask);
+
+   blit->filter = util_format_is_depth_or_stencil(dst->format) ? 0 : 1;
+
+   nv50_blitctx_pre_blit(blit, nv50);
+
+   nv50_blit_set_dst(nv50, dst, info->dst.level, info->dst.layer);
+   nv50_blit_set_src(nv50, src, 0,               info->src.layer);
+
+   nv50_blitctx_prepare_state(blit);
+
+   nv50_state_validate(nv50, 36);
+
+   x_range =
+      (float)(info->src.x1 - info->src.x0) /
+      (float)(info->dst.x1 - info->dst.x0);
+   y_range =
+      (float)(info->src.y1 - info->src.y0) /
+      (float)(info->dst.y1 - info->dst.y0);
+
+   x0 = (float)info->src.x0 - x_range * (float)info->dst.x0;
+   y0 = (float)info->src.y0 - y_range * (float)info->dst.y0;
+
+   x1 = x0 + 16384.0f * x_range;
+   y1 = y0 + 16384.0f * y_range;
+
+   x0 *= (float)(1 << nv50_miptree(src)->ms_x);
+   x1 *= (float)(1 << nv50_miptree(src)->ms_x);
+   y0 *= (float)(1 << nv50_miptree(src)->ms_y);
+   y1 *= (float)(1 << nv50_miptree(src)->ms_y);
+
+   z = (float)info->src.layer;
+
+   BEGIN_RING(chan, RING_3D(FP_START_ID), 1);
+   OUT_RING  (chan,
+              blit->fp.code_base + blit->fp_offset);
+
+   BEGIN_RING(chan, RING_3D(VIEWPORT_TRANSFORM_EN), 1);
+   OUT_RING  (chan, 0);
+
+   /* Draw a large triangle in screen coordinates covering the whole
+    * render target, with scissors defining the destination region.
+    * The vertex is supplied with non-normalized texture coordinates
+    * arranged in a way to yield the desired offset and scale.
+    */
+
+   BEGIN_RING(chan, RING_3D(SCISSOR_HORIZ(0)), 2);
+   OUT_RING  (chan, (info->dst.x1 << 16) | info->dst.x0);
+   OUT_RING  (chan, (info->dst.y1 << 16) | info->dst.y0);
+
+   BEGIN_RING(chan, RING_3D(VERTEX_BEGIN_GL), 1);
+   OUT_RING  (chan, NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES);
+   BEGIN_RING(chan, RING_3D(VTX_ATTR_3F_X(1)), 3);
+   OUT_RINGf (chan, x0);
+   OUT_RINGf (chan, y0);
+   OUT_RINGf (chan, z);
+   BEGIN_RING(chan, RING_3D(VTX_ATTR_2F_X(0)), 2);
+   OUT_RINGf (chan, 0.0f);
+   OUT_RINGf (chan, 0.0f);
+   BEGIN_RING(chan, RING_3D(VTX_ATTR_3F_X(1)), 3);
+   OUT_RINGf (chan, x1);
+   OUT_RINGf (chan, y0);
+   OUT_RINGf (chan, z);
+   BEGIN_RING(chan, RING_3D(VTX_ATTR_2F_X(0)), 2);
+   OUT_RINGf (chan, 16384 << nv50_miptree(dst)->ms_x);
+   OUT_RINGf (chan, 0.0f);
+   BEGIN_RING(chan, RING_3D(VTX_ATTR_3F_X(1)), 3);
+   OUT_RINGf (chan, x0);
+   OUT_RINGf (chan, y1);
+   OUT_RINGf (chan, z);
+   BEGIN_RING(chan, RING_3D(VTX_ATTR_2F_X(0)), 2);
+   OUT_RINGf (chan, 0.0f);
+   OUT_RINGf (chan, 16384 << nv50_miptree(dst)->ms_y);
+   BEGIN_RING(chan, RING_3D(VERTEX_END_GL), 1);
+   OUT_RING  (chan, 0);
+
+   /* re-enable normally constant state */
+
+   BEGIN_RING(chan, RING_3D(VIEWPORT_TRANSFORM_EN), 1);
+   OUT_RING  (chan, 1);
+
+   nv50_blitctx_post_blit(nv50, blit);
+}
+
+boolean
+nv50_blitctx_create(struct nv50_screen *screen)
+{
+   screen->blitctx = CALLOC_STRUCT(nv50_blitctx);
+   if (!screen->blitctx) {
+      NOUVEAU_ERR("failed to allocate blit context\n");
+      return FALSE;
+   }
+
+   screen->blitctx->screen = screen;
+
+   nv50_blitctx_make_vp(screen->blitctx);
+   nv50_blitctx_make_fp(screen->blitctx);
+
+   nv50_blitctx_make_sampler(screen->blitctx);
+
+   screen->blitctx->color_mask = 0x1111;
+
+   return TRUE;
+}
+
 void
 nv50_init_surface_functions(struct nv50_context *nv50)
 {
    struct pipe_context *pipe = &nv50->base.pipe;
 
    pipe->resource_copy_region = nv50_resource_copy_region;
+   pipe->resource_resolve = nv50_resource_resolve;
    pipe->clear_render_target = nv50_clear_render_target;
    pipe->clear_depth_stencil = nv50_clear_depth_stencil;
 }
diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c
index f23008a..1c8347a 100644
--- a/src/gallium/drivers/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nv50/nv50_vbo.c
@@ -647,7 +647,7 @@
    if (nv50->vbo_user && !(nv50->dirty & (NV50_NEW_VERTEX | NV50_NEW_ARRAYS)))
       nv50_update_user_vbufs(nv50);
 
-   nv50_state_validate(nv50);
+   nv50_state_validate(nv50, 8); /* 8 as minimum, we use flush_notify here */
 
    chan->flush_notify = nv50_draw_vbo_flush_notify;
 
diff --git a/src/gallium/drivers/nvc0/nvc0_context.c b/src/gallium/drivers/nvc0/nvc0_context.c
index 983db23..360afbb 100644
--- a/src/gallium/drivers/nvc0/nvc0_context.c
+++ b/src/gallium/drivers/nvc0/nvc0_context.c
@@ -150,6 +150,8 @@
    assert(nvc0->draw);
    draw_set_rasterize_stage(nvc0->draw, nvc0_draw_render_stage(nvc0));
 
+   nouveau_context_init_vdec(&nvc0->base);
+
    return pipe;
 }
 
diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c
index 605a0b0..c79256a 100644
--- a/src/gallium/drivers/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nvc0/nvc0_screen.c
@@ -24,6 +24,9 @@
 #include "util/u_format_s3tc.h"
 #include "pipe/p_screen.h"
 
+#include "vl/vl_decoder.h"
+#include "vl/vl_video_buffer.h"
+
 #include "nvc0_context.h"
 #include "nvc0_screen.h"
 
@@ -167,6 +170,8 @@
       return 1;
    case PIPE_SHADER_CAP_SUBROUTINES:
       return 0; /* please inline, or provide function declarations */
+   case PIPE_SHADER_CAP_INTEGERS:
+      return 0;
    default:
       NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param);
       return 0;
@@ -373,6 +378,8 @@
 
    nvc0_screen_init_resource_functions(pscreen);
 
+   nouveau_screen_init_vdec(&screen->base);
+
    ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, 4096,
                         &screen->fence.bo);
    if (ret)
diff --git a/src/gallium/drivers/nvc0/nvc0_surface.c b/src/gallium/drivers/nvc0/nvc0_surface.c
index 67bba3c..a4fd17e 100644
--- a/src/gallium/drivers/nvc0/nvc0_surface.c
+++ b/src/gallium/drivers/nvc0/nvc0_surface.c
@@ -205,6 +205,7 @@
 {
    struct nvc0_screen *screen = nvc0_context(pipe)->screen;
    int ret;
+   boolean m2mf;
    unsigned dst_layer = dstz, src_layer = src_box->z;
 
    /* Fallback for buffers. */
@@ -214,9 +215,15 @@
       return;
    }
 
+   assert(src->nr_samples == dst->nr_samples);
+
+   m2mf = (src->format == dst->format) ||
+      (util_format_get_blocksizebits(src->format) ==
+       util_format_get_blocksizebits(dst->format));
+
    nv04_resource(dst)->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
 
-   if (src->format == dst->format && src->nr_samples == dst->nr_samples) {
+   if (m2mf) {
       struct nv50_m2mf_rect drect, srect;
       unsigned i;
       unsigned nx = util_format_get_nblocksx(src->format, src_box->width);
diff --git a/src/gallium/drivers/nvfx/nvfx_screen.c b/src/gallium/drivers/nvfx/nvfx_screen.c
index 8cb6cd0..3b77c96 100644
--- a/src/gallium/drivers/nvfx/nvfx_screen.c
+++ b/src/gallium/drivers/nvfx/nvfx_screen.c
@@ -179,6 +179,8 @@
 			return 1;
 		case PIPE_SHADER_CAP_SUBROUTINES:
 			return 1;
+		case PIPE_SHADER_CAP_INTEGERS:
+			return 0;
 		default:
 			break;
 		}
@@ -224,6 +226,8 @@
 	case PIPE_VIDEO_CAP_MAX_WIDTH:
 	case PIPE_VIDEO_CAP_MAX_HEIGHT:
 		return vl_video_buffer_max_size(screen);
+	case PIPE_VIDEO_CAP_NUM_BUFFERS_DESIRED:
+		return vl_num_buffers_desired(screen, profile);
 	default:
 		return 0;
 	}
diff --git a/src/gallium/drivers/nvfx/nvfx_surface.c b/src/gallium/drivers/nvfx/nvfx_surface.c
index 339906e..04b0304 100644
--- a/src/gallium/drivers/nvfx/nvfx_surface.c
+++ b/src/gallium/drivers/nvfx/nvfx_surface.c
@@ -288,7 +288,7 @@
 		 * TODO: perhaps support reinterpreting the formats
 		 */
 		struct blitter_context* blitter = nvfx_get_blitter(pipe, 1);
-		util_blitter_copy_region(blitter, dstr, dst_level, dstx, dsty, dstz, srcr, src_level, src_box, TRUE);
+		util_blitter_copy_texture(blitter, dstr, dst_level, dstx, dsty, dstz, srcr, src_level, src_box, TRUE);
 		nvfx_put_blitter(pipe, blitter);
 	}
 	else
diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile
index 4088216..4f02127 100644
--- a/src/gallium/drivers/r300/Makefile
+++ b/src/gallium/drivers/r300/Makefile
@@ -26,19 +26,51 @@
 	r300_texture.c \
 	r300_texture_desc.c \
 	r300_tgsi_to_rc.c \
-	r300_transfer.c
+	r300_transfer.c \
+	\
+	compiler/radeon_code.c \
+	compiler/radeon_compiler.c \
+	compiler/radeon_compiler_util.c \
+	compiler/radeon_emulate_branches.c \
+	compiler/radeon_emulate_loops.c \
+	compiler/radeon_program.c \
+	compiler/radeon_program_print.c \
+	compiler/radeon_opcodes.c \
+	compiler/radeon_program_alu.c \
+	compiler/radeon_program_pair.c \
+	compiler/radeon_program_tex.c \
+	compiler/radeon_pair_translate.c \
+	compiler/radeon_pair_schedule.c \
+	compiler/radeon_pair_regalloc.c \
+	compiler/radeon_pair_dead_sources.c \
+	compiler/radeon_dataflow.c \
+	compiler/radeon_dataflow_deadcode.c \
+	compiler/radeon_dataflow_swizzles.c \
+	compiler/radeon_list.c \
+	compiler/radeon_optimize.c \
+	compiler/radeon_remove_constants.c \
+	compiler/radeon_rename_regs.c \
+	compiler/radeon_variable.c \
+	compiler/r3xx_fragprog.c \
+	compiler/r300_fragprog.c \
+	compiler/r300_fragprog_swizzle.c \
+	compiler/r300_fragprog_emit.c \
+	compiler/r500_fragprog.c \
+	compiler/r500_fragprog_emit.c \
+	compiler/r3xx_vertprog.c \
+	compiler/r3xx_vertprog_dump.c \
+	compiler/memory_pool.c \
+	\
+	$(TOP)/src/glsl/ralloc.c \
+	$(TOP)/src/mesa/program/register_allocate.c
+
 
 LIBRARY_INCLUDES = \
-	-I$(TOP)/src/mesa/drivers/dri/r300/compiler \
-	-I$(TOP)/include
-
-COMPILER_ARCHIVE = $(TOP)/src/mesa/drivers/dri/r300/compiler/libr300compiler.a
-
-EXTRA_OBJECTS = \
-	$(COMPILER_ARCHIVE)
+	-I$(TOP)/include \
+	-I$(TOP)/src/mesa \
+	-I$(TOP)/src/glsl
 
 include ../../Makefile.template
 
-.PHONY: $(COMPILER_ARCHIVE)
-$(COMPILER_ARCHIVE):
-	$(MAKE) -C $(TOP)/src/mesa/drivers/dri/r300/compiler
+test: default
+	@$(MAKE) -s -C compiler/tests/
diff --git a/src/gallium/drivers/r300/SConscript b/src/gallium/drivers/r300/SConscript
index 3af157a..7ffd1c2 100644
--- a/src/gallium/drivers/r300/SConscript
+++ b/src/gallium/drivers/r300/SConscript
@@ -1,13 +1,11 @@
 Import('*')
 
-r300compiler = SConscript('#/src/mesa/drivers/dri/r300/compiler/SConscript')
-
 env = env.Clone()
-# add the paths for r300compiler
 env.Append(CPPPATH = [
-    '#/src/mesa/drivers/dri/r300/compiler', 
     '#/include', 
     '#/src/mesa',
+    '#/src/glsl',
+    '#/src/mapi',
 ])
 
 r300 = env.ConvenienceLibrary(
@@ -36,7 +34,41 @@
         'r300_texture_desc.c',
         'r300_tgsi_to_rc.c',
         'r300_transfer.c',
-    ] + r300compiler) + r300compiler
+        'compiler/radeon_code.c',
+        'compiler/radeon_compiler.c',
+        'compiler/radeon_compiler_util.c',
+        'compiler/radeon_program.c',
+        'compiler/radeon_program_print.c',
+        'compiler/radeon_opcodes.c',
+        'compiler/radeon_program_alu.c',
+        'compiler/radeon_program_pair.c',
+        'compiler/radeon_program_tex.c',
+        'compiler/radeon_pair_translate.c',
+        'compiler/radeon_pair_schedule.c',
+        'compiler/radeon_pair_regalloc.c',
+        'compiler/radeon_pair_dead_sources.c',
+        'compiler/radeon_optimize.c',
+        'compiler/radeon_remove_constants.c',
+        'compiler/radeon_rename_regs.c',
+        'compiler/radeon_emulate_branches.c',
+        'compiler/radeon_emulate_loops.c',
+        'compiler/radeon_dataflow.c',
+        'compiler/radeon_dataflow_deadcode.c',
+        'compiler/radeon_dataflow_swizzles.c',
+        'compiler/radeon_variable.c',
+        'compiler/radeon_list.c',
+        'compiler/r3xx_fragprog.c',
+        'compiler/r300_fragprog.c',
+        'compiler/r300_fragprog_swizzle.c',
+        'compiler/r300_fragprog_emit.c',
+        'compiler/r500_fragprog.c',
+        'compiler/r500_fragprog_emit.c',
+        'compiler/r3xx_vertprog.c',
+        'compiler/r3xx_vertprog_dump.c',
+        'compiler/memory_pool.c',
+        '#/src/glsl/ralloc.c',
+        '#/src/mesa/program/register_allocate.c'
+    ])
 
 env.Alias('r300', r300)
 
diff --git a/src/gallium/drivers/r300/compiler/memory_pool.c b/src/gallium/drivers/r300/compiler/memory_pool.c
new file mode 100644
index 0000000..ddcdddf
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/memory_pool.c
@@ -0,0 +1,97 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "memory_pool.h"
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+
+#define POOL_LARGE_ALLOC 4096
+#define POOL_ALIGN 8
+
+
+struct memory_block {
+	struct memory_block * next;
+};
+
+void memory_pool_init(struct memory_pool * pool)
+{
+	memset(pool, 0, sizeof(struct memory_pool));
+}
+
+
+void memory_pool_destroy(struct memory_pool * pool)
+{
+	while(pool->blocks) {
+		struct memory_block * block = pool->blocks;
+		pool->blocks = block->next;
+		free(block);
+	}
+}
+
+static void refill_pool(struct memory_pool * pool)
+{
+	unsigned int blocksize = pool->total_allocated;
+	struct memory_block * newblock;
+
+	if (!blocksize)
+		blocksize = 2*POOL_LARGE_ALLOC;
+
+	newblock = (struct memory_block*)malloc(blocksize);
+	newblock->next = pool->blocks;
+	pool->blocks = newblock;
+
+	pool->head = (unsigned char*)(newblock + 1);
+	pool->end = ((unsigned char*)newblock) + blocksize;
+	pool->total_allocated += blocksize;
+}
+
+
+void * memory_pool_malloc(struct memory_pool * pool, unsigned int bytes)
+{
+	if (bytes < POOL_LARGE_ALLOC) {
+		void * ptr;
+
+		if (pool->head + bytes > pool->end)
+			refill_pool(pool);
+
+		assert(pool->head + bytes <= pool->end);
+
+		ptr = pool->head;
+
+		pool->head += bytes;
+		pool->head = (unsigned char*)(((unsigned long)pool->head + POOL_ALIGN - 1) & ~(POOL_ALIGN - 1));
+
+		return ptr;
+	} else {
+		struct memory_block * block = (struct memory_block*)malloc(bytes + sizeof(struct memory_block));
+
+		block->next = pool->blocks;
+		pool->blocks = block;
+
+		return (block + 1);
+	}
+}
+
+
diff --git a/src/gallium/drivers/r300/compiler/memory_pool.h b/src/gallium/drivers/r300/compiler/memory_pool.h
new file mode 100644
index 0000000..42344d0
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/memory_pool.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef MEMORY_POOL_H
+#define MEMORY_POOL_H
+
+struct memory_block;
+
+/**
+ * Provides a pool of memory that can quickly be allocated from, at the
+ * cost of being unable to explicitly free one of the allocated blocks.
+ * Instead, the entire pool can be freed at once.
+ *
+ * The idea is to allow one to quickly allocate a flexible amount of
+ * memory during operations like shader compilation while avoiding
+ * reference counting headaches.
+ */
+struct memory_pool {
+	unsigned char * head;
+	unsigned char * end;
+	unsigned int total_allocated;
+	struct memory_block * blocks;
+};
+
+
+void memory_pool_init(struct memory_pool * pool);
+void memory_pool_destroy(struct memory_pool * pool);
+void * memory_pool_malloc(struct memory_pool * pool, unsigned int bytes);
+
+
+/**
+ * Generic helper for growing an array that has separate size/count
+ * and reserved counters to accomodate up to num new element.
+ *
+ *  type * Array;
+ *  unsigned int Size;
+ *  unsigned int Reserved;
+ *
+ * memory_pool_array_reserve(pool, type, Array, Size, Reserved, k);
+ * assert(Size + k < Reserved);
+ *
+ * \note Size is not changed by this macro.
+ *
+ * \warning Array, Size, Reserved have to be lvalues and may be evaluated
+ * several times.
+ */
+#define memory_pool_array_reserve(pool, type, array, size, reserved, num) do { \
+	unsigned int _num = (num); \
+	if ((size) + _num > (reserved)) { \
+		unsigned int newreserve = (reserved) * 2; \
+		type * newarray; \
+		if (newreserve < _num) \
+			newreserve = 4 * _num; /* arbitrary heuristic */ \
+		newarray = memory_pool_malloc((pool), newreserve * sizeof(type)); \
+		memcpy(newarray, (array), (size) * sizeof(type)); \
+		(array) = newarray; \
+		(reserved) = newreserve; \
+	} \
+} while(0)
+
+#endif /* MEMORY_POOL_H */
diff --git a/src/gallium/drivers/r300/compiler/r300_fragprog.c b/src/gallium/drivers/r300/compiler/r300_fragprog.c
new file mode 100644
index 0000000..deba9ca
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/r300_fragprog.c
@@ -0,0 +1,338 @@
+/*
+ * Copyright (C) 2005 Ben Skeggs.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "r300_fragprog.h"
+
+#include <stdio.h>
+
+#include "../r300_reg.h"
+
+static void presub_string(char out[10], unsigned int inst)
+{
+	switch(inst & 0x600000){
+	case R300_ALU_SRCP_1_MINUS_2_SRC0:
+		sprintf(out, "bias");
+		break;
+	case R300_ALU_SRCP_SRC1_MINUS_SRC0:
+		sprintf(out, "sub");
+		break;
+	case R300_ALU_SRCP_SRC1_PLUS_SRC0:
+		sprintf(out, "add");
+		break;
+	case R300_ALU_SRCP_1_MINUS_SRC0:
+		sprintf(out, "inv ");
+		break;
+	}
+}
+
+static int get_msb(unsigned int bit, unsigned int r400_ext_addr)
+{
+	return (r400_ext_addr & bit) ? 1 << 5 : 0;
+}
+
+/* just some random things... */
+void r300FragmentProgramDump(struct radeon_compiler *c, void *user)
+{
+	struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c;
+	struct r300_fragment_program_code *code = &compiler->code->code.r300;
+	int n, i, j;
+	static int pc = 0;
+
+	fprintf(stderr, "pc=%d*************************************\n", pc++);
+
+	fprintf(stderr, "Hardware program\n");
+	fprintf(stderr, "----------------\n");
+	if (c->is_r400) {
+		fprintf(stderr, "code_offset_ext: %08x\n", code->r400_code_offset_ext);
+	}
+
+	for (n = 0; n <= (code->config & 3); n++) {
+		uint32_t code_addr = code->code_addr[3 - (code->config & 3) + n];
+		unsigned int alu_offset = ((code_addr & R300_ALU_START_MASK) >> R300_ALU_START_SHIFT) +
+				(((code->r400_code_offset_ext >> (24 - (n * 6))) & 0x7) << 6);
+		unsigned int alu_end = ((code_addr & R300_ALU_SIZE_MASK) >> R300_ALU_SIZE_SHIFT) +
+				(((code->r400_code_offset_ext >> (27 - (n * 6))) & 0x7) << 6);
+		int tex_offset = (code_addr & R300_TEX_START_MASK) >> R300_TEX_START_SHIFT;
+		int tex_end = (code_addr & R300_TEX_SIZE_MASK) >> R300_TEX_SIZE_SHIFT;
+
+		fprintf(stderr, "NODE %d: alu_offset: %u, tex_offset: %d, "
+			"alu_end: %u, tex_end: %d  (code_addr: %08x)\n", n,
+			alu_offset, tex_offset, alu_end, tex_end, code_addr);
+
+		if (n > 0 || (code->config & R300_PFS_CNTL_FIRST_NODE_HAS_TEX)) {
+			fprintf(stderr, "  TEX:\n");
+			for (i = tex_offset;
+			     i <= tex_offset + tex_end;
+			     ++i) {
+				const char *instr;
+
+				switch ((code->tex.
+					 inst[i] >> R300_TEX_INST_SHIFT) &
+					15) {
+				case R300_TEX_OP_LD:
+					instr = "TEX";
+					break;
+				case R300_TEX_OP_KIL:
+					instr = "KIL";
+					break;
+				case R300_TEX_OP_TXP:
+					instr = "TXP";
+					break;
+				case R300_TEX_OP_TXB:
+					instr = "TXB";
+					break;
+				default:
+					instr = "UNKNOWN";
+				}
+
+				fprintf(stderr,
+					"    %s t%i, %c%i, texture[%i]   (%08x)\n",
+					instr,
+					(code->tex.
+					 inst[i] >> R300_DST_ADDR_SHIFT) & 31,
+					't',
+					(code->tex.
+					 inst[i] >> R300_SRC_ADDR_SHIFT) & 31,
+					(code->tex.
+					 inst[i] & R300_TEX_ID_MASK) >>
+					R300_TEX_ID_SHIFT,
+					code->tex.inst[i]);
+			}
+		}
+
+		for (i = alu_offset;
+		     i <= alu_offset + alu_end; ++i) {
+			char srcc[4][10], dstc[20];
+			char srca[4][10], dsta[20];
+			char argc[3][20];
+			char arga[3][20];
+			char flags[5], tmp[10];
+
+			for (j = 0; j < 3; ++j) {
+				int regc = code->alu.inst[i].rgb_addr >> (j * 6);
+				int rega = code->alu.inst[i].alpha_addr >> (j * 6);
+				int msbc = get_msb(R400_ADDR_EXT_RGB_MSB_BIT(j),
+					code->alu.inst[i].r400_ext_addr);
+				int msba = get_msb(R400_ADDR_EXT_A_MSB_BIT(j),
+					code->alu.inst[i].r400_ext_addr);
+
+				sprintf(srcc[j], "%c%i",
+					(regc & 32) ? 'c' : 't', (regc & 31) | msbc);
+				sprintf(srca[j], "%c%i",
+					(rega & 32) ? 'c' : 't', (rega & 31) | msba);
+			}
+
+			dstc[0] = 0;
+			sprintf(flags, "%s%s%s",
+				(code->alu.inst[i].
+				 rgb_addr & R300_ALU_DSTC_REG_X) ? "x" : "",
+				(code->alu.inst[i].
+				 rgb_addr & R300_ALU_DSTC_REG_Y) ? "y" : "",
+				(code->alu.inst[i].
+				 rgb_addr & R300_ALU_DSTC_REG_Z) ? "z" : "");
+			if (flags[0] != 0) {
+				unsigned int msb = get_msb(
+					R400_ADDRD_EXT_RGB_MSB_BIT,
+					code->alu.inst[i].r400_ext_addr);
+
+				sprintf(dstc, "t%i.%s ",
+					((code->alu.inst[i].
+					 rgb_addr >> R300_ALU_DSTC_SHIFT)
+					 & 31) | msb,
+					flags);
+			}
+			sprintf(flags, "%s%s%s",
+				(code->alu.inst[i].
+				 rgb_addr & R300_ALU_DSTC_OUTPUT_X) ? "x" : "",
+				(code->alu.inst[i].
+				 rgb_addr & R300_ALU_DSTC_OUTPUT_Y) ? "y" : "",
+				(code->alu.inst[i].
+				 rgb_addr & R300_ALU_DSTC_OUTPUT_Z) ? "z" : "");
+			if (flags[0] != 0) {
+				sprintf(tmp, "o%i.%s",
+					(code->alu.inst[i].
+					 rgb_addr >> 29) & 3,
+					flags);
+				strcat(dstc, tmp);
+			}
+			/* Presub */
+			presub_string(srcc[3], code->alu.inst[i].rgb_inst);
+			presub_string(srca[3], code->alu.inst[i].alpha_inst);
+
+			dsta[0] = 0;
+			if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_REG) {
+				unsigned int msb = get_msb(
+					R400_ADDRD_EXT_A_MSB_BIT,
+					code->alu.inst[i].r400_ext_addr);
+				sprintf(dsta, "t%i.w ",
+					((code->alu.inst[i].
+					 alpha_addr >> R300_ALU_DSTA_SHIFT) & 31)
+					 | msb);
+			}
+			if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_OUTPUT) {
+				sprintf(tmp, "o%i.w ",
+					(code->alu.inst[i].
+					 alpha_addr >> 25) & 3);
+				strcat(dsta, tmp);
+			}
+			if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_DEPTH) {
+				strcat(dsta, "Z");
+			}
+
+			fprintf(stderr,
+				"%3i: xyz: %3s %3s %3s %5s-> %-20s (%08x)\n"
+				"       w: %3s %3s %3s %5s-> %-20s (%08x)\n", i,
+				srcc[0], srcc[1], srcc[2], srcc[3], dstc,
+				code->alu.inst[i].rgb_addr, srca[0], srca[1],
+				srca[2], srca[3], dsta,
+				code->alu.inst[i].alpha_addr);
+
+			for (j = 0; j < 3; ++j) {
+				int regc = code->alu.inst[i].rgb_inst >> (j * 7);
+				int rega = code->alu.inst[i].alpha_inst >> (j * 7);
+				int d;
+				char buf[20];
+
+				d = regc & 31;
+				if (d < 12) {
+					switch (d % 4) {
+					case R300_ALU_ARGC_SRC0C_XYZ:
+						sprintf(buf, "%s.xyz",
+							srcc[d / 4]);
+						break;
+					case R300_ALU_ARGC_SRC0C_XXX:
+						sprintf(buf, "%s.xxx",
+							srcc[d / 4]);
+						break;
+					case R300_ALU_ARGC_SRC0C_YYY:
+						sprintf(buf, "%s.yyy",
+							srcc[d / 4]);
+						break;
+					case R300_ALU_ARGC_SRC0C_ZZZ:
+						sprintf(buf, "%s.zzz",
+							srcc[d / 4]);
+						break;
+					}
+				} else if (d < 15) {
+					sprintf(buf, "%s.www", srca[d - 12]);
+				} else if (d < 20 ) {
+					switch(d) {
+					case R300_ALU_ARGC_SRCP_XYZ:
+						sprintf(buf, "srcp.xyz");
+						break;
+					case R300_ALU_ARGC_SRCP_XXX:
+						sprintf(buf, "srcp.xxx");
+						break;
+					case R300_ALU_ARGC_SRCP_YYY:
+						sprintf(buf, "srcp.yyy");
+						break;
+					case R300_ALU_ARGC_SRCP_ZZZ:
+						sprintf(buf, "srcp.zzz");
+						break;
+					case R300_ALU_ARGC_SRCP_WWW:
+						sprintf(buf, "srcp.www");
+						break;
+					}
+				} else if (d == 20) {
+					sprintf(buf, "0.0");
+				} else if (d == 21) {
+					sprintf(buf, "1.0");
+				} else if (d == 22) {
+					sprintf(buf, "0.5");
+				} else if (d >= 23 && d < 32) {
+					d -= 23;
+					switch (d / 3) {
+					case 0:
+						sprintf(buf, "%s.yzx",
+							srcc[d % 3]);
+						break;
+					case 1:
+						sprintf(buf, "%s.zxy",
+							srcc[d % 3]);
+						break;
+					case 2:
+						sprintf(buf, "%s.Wzy",
+							srcc[d % 3]);
+						break;
+					}
+				} else {
+					sprintf(buf, "%i", d);
+				}
+
+				sprintf(argc[j], "%s%s%s%s",
+					(regc & 32) ? "-" : "",
+					(regc & 64) ? "|" : "",
+					buf, (regc & 64) ? "|" : "");
+
+				d = rega & 31;
+				if (d < 9) {
+					sprintf(buf, "%s.%c", srcc[d / 3],
+						'x' + (char)(d % 3));
+				} else if (d < 12) {
+					sprintf(buf, "%s.w", srca[d - 9]);
+				} else if (d < 16) {
+					switch(d) {
+					case R300_ALU_ARGA_SRCP_X:
+						sprintf(buf, "srcp.x");
+						break;
+					case R300_ALU_ARGA_SRCP_Y:
+						sprintf(buf, "srcp.y");
+						break;
+					case R300_ALU_ARGA_SRCP_Z:
+						sprintf(buf, "srcp.z");
+						break;
+					case R300_ALU_ARGA_SRCP_W:
+						sprintf(buf, "srcp.w");
+						break;
+					}
+				} else if (d == 16) {
+					sprintf(buf, "0.0");
+				} else if (d == 17) {
+					sprintf(buf, "1.0");
+				} else if (d == 18) {
+					sprintf(buf, "0.5");
+				} else {
+					sprintf(buf, "%i", d);
+				}
+
+				sprintf(arga[j], "%s%s%s%s",
+					(rega & 32) ? "-" : "",
+					(rega & 64) ? "|" : "",
+					buf, (rega & 64) ? "|" : "");
+			}
+
+			fprintf(stderr, "     xyz: %8s %8s %8s    op: %08x %s\n"
+				"       w: %8s %8s %8s    op: %08x\n",
+				argc[0], argc[1], argc[2],
+				code->alu.inst[i].rgb_inst,
+				code->alu.inst[i].rgb_inst & R300_ALU_INSERT_NOP ?
+				"NOP" : "",
+				arga[0], arga[1],arga[2],
+				code->alu.inst[i].alpha_inst);
+		}
+	}
+}
diff --git a/src/gallium/drivers/r300/compiler/r300_fragprog.h b/src/gallium/drivers/r300/compiler/r300_fragprog.h
new file mode 100644
index 0000000..0c88bab
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/r300_fragprog.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2005 Ben Skeggs.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/*
+ * Authors:
+ *   Ben Skeggs <darktama@iinet.net.au>
+ *   Jerome Glisse <j.glisse@gmail.com>
+ */
+#ifndef __R300_FRAGPROG_H_
+#define __R300_FRAGPROG_H_
+
+#include "radeon_compiler.h"
+#include "radeon_program.h"
+
+
+extern void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user);
+
+extern void r300FragmentProgramDump(struct radeon_compiler *c, void *user);
+
+#endif
diff --git a/src/gallium/drivers/r300/compiler/r300_fragprog_emit.c b/src/gallium/drivers/r300/compiler/r300_fragprog_emit.c
new file mode 100644
index 0000000..e6fd1fd
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/r300_fragprog_emit.c
@@ -0,0 +1,536 @@
+/*
+ * Copyright (C) 2005 Ben Skeggs.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * \file
+ *
+ * Emit the r300_fragment_program_code that can be understood by the hardware.
+ * Input is a pre-transformed radeon_program.
+ *
+ * \author Ben Skeggs <darktama@iinet.net.au>
+ *
+ * \author Jerome Glisse <j.glisse@gmail.com>
+ */
+
+#include "r300_fragprog.h"
+
+#include "../r300_reg.h"
+
+#include "radeon_program_pair.h"
+#include "r300_fragprog_swizzle.h"
+
+
+struct r300_emit_state {
+	struct r300_fragment_program_compiler * compiler;
+
+	unsigned current_node : 2;
+	unsigned node_first_tex : 8;
+	unsigned node_first_alu : 8;
+	uint32_t node_flags;
+};
+
+#define PROG_CODE \
+	struct r300_fragment_program_compiler *c = emit->compiler; \
+	struct r300_fragment_program_code *code = &c->code->code.r300
+
+#define error(fmt, args...) do {			\
+		rc_error(&c->Base, "%s::%s(): " fmt "\n",	\
+			__FILE__, __FUNCTION__, ##args);	\
+	} while(0)
+
+static unsigned int get_msbs_alu(unsigned int bits)
+{
+	return (bits >> 6) & 0x7;
+}
+
+/**
+ * @param lsbs The number of least significant bits
+ */
+static unsigned int get_msbs_tex(unsigned int bits, unsigned int lsbs)
+{
+	return (bits >> lsbs) & 0x15;
+}
+
+#define R400_EXT_GET_MSBS(x, lsbs, mask) (((x) >> lsbs) & mask)
+
+/**
+ * Mark a temporary register as used.
+ */
+static void use_temporary(struct r300_fragment_program_code *code, unsigned int index)
+{
+	if (index > code->pixsize)
+		code->pixsize = index;
+}
+
+static unsigned int use_source(struct r300_fragment_program_code* code, struct rc_pair_instruction_source src)
+{
+	if (!src.Used)
+		return 0;
+
+	if (src.File == RC_FILE_CONSTANT) {
+		return src.Index | (1 << 5);
+	} else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) {
+		use_temporary(code, src.Index);
+		return src.Index & 0x1f;
+	}
+
+	return 0;
+}
+
+
+static unsigned int translate_rgb_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode)
+{
+	switch(opcode) {
+	case RC_OPCODE_CMP: return R300_ALU_OUTC_CMP;
+	case RC_OPCODE_CND: return R300_ALU_OUTC_CND;
+	case RC_OPCODE_DP3: return R300_ALU_OUTC_DP3;
+	case RC_OPCODE_DP4: return R300_ALU_OUTC_DP4;
+	case RC_OPCODE_FRC: return R300_ALU_OUTC_FRC;
+	default:
+		error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name);
+		/* fall through */
+	case RC_OPCODE_NOP:
+		/* fall through */
+	case RC_OPCODE_MAD: return R300_ALU_OUTC_MAD;
+	case RC_OPCODE_MAX: return R300_ALU_OUTC_MAX;
+	case RC_OPCODE_MIN: return R300_ALU_OUTC_MIN;
+	case RC_OPCODE_REPL_ALPHA: return R300_ALU_OUTC_REPL_ALPHA;
+	}
+}
+
+static unsigned int translate_alpha_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode)
+{
+	switch(opcode) {
+	case RC_OPCODE_CMP: return R300_ALU_OUTA_CMP;
+	case RC_OPCODE_CND: return R300_ALU_OUTA_CND;
+	case RC_OPCODE_DP3: return R300_ALU_OUTA_DP4;
+	case RC_OPCODE_DP4: return R300_ALU_OUTA_DP4;
+	case RC_OPCODE_EX2: return R300_ALU_OUTA_EX2;
+	case RC_OPCODE_FRC: return R300_ALU_OUTA_FRC;
+	case RC_OPCODE_LG2: return R300_ALU_OUTA_LG2;
+	default:
+		error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name);
+		/* fall through */
+	case RC_OPCODE_NOP:
+		/* fall through */
+	case RC_OPCODE_MAD: return R300_ALU_OUTA_MAD;
+	case RC_OPCODE_MAX: return R300_ALU_OUTA_MAX;
+	case RC_OPCODE_MIN: return R300_ALU_OUTA_MIN;
+	case RC_OPCODE_RCP: return R300_ALU_OUTA_RCP;
+	case RC_OPCODE_RSQ: return R300_ALU_OUTA_RSQ;
+	}
+}
+
+/**
+ * Emit one paired ALU instruction.
+ */
+static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* inst)
+{
+	int ip;
+	int j;
+	PROG_CODE;
+
+	if (code->alu.length >= c->Base.max_alu_insts) {
+		error("Too many ALU instructions");
+		return 0;
+	}
+
+	ip = code->alu.length++;
+
+	code->alu.inst[ip].rgb_inst = translate_rgb_opcode(c, inst->RGB.Opcode);
+	code->alu.inst[ip].alpha_inst = translate_alpha_opcode(c, inst->Alpha.Opcode);
+
+	for(j = 0; j < 3; ++j) {
+		/* Set the RGB address */
+		unsigned int src = use_source(code, inst->RGB.Src[j]);
+		unsigned int arg;
+		if (inst->RGB.Src[j].Index >= R300_PFS_NUM_TEMP_REGS)
+			code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_RGB_MSB_BIT(j);
+
+		code->alu.inst[ip].rgb_addr |= src << (6*j);
+
+		/* Set the Alpha address */
+		src = use_source(code, inst->Alpha.Src[j]);
+		if (inst->Alpha.Src[j].Index >= R300_PFS_NUM_TEMP_REGS)
+			code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_A_MSB_BIT(j);
+
+		code->alu.inst[ip].alpha_addr |= src << (6*j);
+
+		arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle);
+		arg |= inst->RGB.Arg[j].Abs << 6;
+		arg |= inst->RGB.Arg[j].Negate << 5;
+		code->alu.inst[ip].rgb_inst |= arg << (7*j);
+
+		arg = r300FPTranslateAlphaSwizzle(inst->Alpha.Arg[j].Source, inst->Alpha.Arg[j].Swizzle);
+		arg |= inst->Alpha.Arg[j].Abs << 6;
+		arg |= inst->Alpha.Arg[j].Negate << 5;
+		code->alu.inst[ip].alpha_inst |= arg << (7*j);
+	}
+
+	/* Presubtract */
+	if (inst->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
+		switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) {
+		case RC_PRESUB_BIAS:
+			code->alu.inst[ip].rgb_inst |=
+						R300_ALU_SRCP_1_MINUS_2_SRC0;
+			break;
+		case RC_PRESUB_ADD:
+			code->alu.inst[ip].rgb_inst |=
+						R300_ALU_SRCP_SRC1_PLUS_SRC0;
+			break;
+		case RC_PRESUB_SUB:
+			code->alu.inst[ip].rgb_inst |=
+						R300_ALU_SRCP_SRC1_MINUS_SRC0;
+			break;
+		case RC_PRESUB_INV:
+			code->alu.inst[ip].rgb_inst |=
+						R300_ALU_SRCP_1_MINUS_SRC0;
+			break;
+		default:
+			break;
+		}
+	}
+
+	if (inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
+		switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) {
+		case RC_PRESUB_BIAS:
+			code->alu.inst[ip].alpha_inst |=
+						R300_ALU_SRCP_1_MINUS_2_SRC0;
+			break;
+		case RC_PRESUB_ADD:
+			code->alu.inst[ip].alpha_inst |=
+						R300_ALU_SRCP_SRC1_PLUS_SRC0;
+			break;
+		case RC_PRESUB_SUB:
+			code->alu.inst[ip].alpha_inst |=
+						R300_ALU_SRCP_SRC1_MINUS_SRC0;
+			break;
+		case RC_PRESUB_INV:
+			code->alu.inst[ip].alpha_inst |=
+						R300_ALU_SRCP_1_MINUS_SRC0;
+			break;
+		default:
+			break;
+		}
+	}
+
+	if (inst->RGB.Saturate)
+		code->alu.inst[ip].rgb_inst |= R300_ALU_OUTC_CLAMP;
+	if (inst->Alpha.Saturate)
+		code->alu.inst[ip].alpha_inst |= R300_ALU_OUTA_CLAMP;
+
+	if (inst->RGB.WriteMask) {
+		use_temporary(code, inst->RGB.DestIndex);
+		if (inst->RGB.DestIndex >= R300_PFS_NUM_TEMP_REGS)
+			code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_RGB_MSB_BIT;
+		code->alu.inst[ip].rgb_addr |=
+			((inst->RGB.DestIndex & 0x1f) << R300_ALU_DSTC_SHIFT) |
+			(inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT);
+	}
+	if (inst->RGB.OutputWriteMask) {
+		code->alu.inst[ip].rgb_addr |=
+            (inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT) |
+            R300_RGB_TARGET(inst->RGB.Target);
+		emit->node_flags |= R300_RGBA_OUT;
+	}
+
+	if (inst->Alpha.WriteMask) {
+		use_temporary(code, inst->Alpha.DestIndex);
+		if (inst->Alpha.DestIndex >= R300_PFS_NUM_TEMP_REGS)
+			code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_A_MSB_BIT;
+		code->alu.inst[ip].alpha_addr |=
+			((inst->Alpha.DestIndex & 0x1f) << R300_ALU_DSTA_SHIFT) |
+			R300_ALU_DSTA_REG;
+	}
+	if (inst->Alpha.OutputWriteMask) {
+		code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_OUTPUT |
+            R300_ALPHA_TARGET(inst->Alpha.Target);
+		emit->node_flags |= R300_RGBA_OUT;
+	}
+	if (inst->Alpha.DepthWriteMask) {
+		code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_DEPTH;
+		emit->node_flags |= R300_W_OUT;
+		c->code->writes_depth = 1;
+	}
+	if (inst->Nop)
+		code->alu.inst[ip].rgb_inst |= R300_ALU_INSERT_NOP;
+
+	return 1;
+}
+
+
+/**
+ * Finish the current node without advancing to the next one.
+ */
+static int finish_node(struct r300_emit_state * emit)
+{
+	struct r300_fragment_program_compiler * c = emit->compiler;
+	struct r300_fragment_program_code *code = &emit->compiler->code->code.r300;
+	unsigned alu_offset;
+	unsigned alu_end;
+	unsigned tex_offset;
+	unsigned tex_end;
+
+	unsigned int alu_offset_msbs, alu_end_msbs;
+
+	if (code->alu.length == emit->node_first_alu) {
+		/* Generate a single NOP for this node */
+		struct rc_pair_instruction inst;
+		memset(&inst, 0, sizeof(inst));
+		if (!emit_alu(emit, &inst))
+			return 0;
+	}
+
+	alu_offset = emit->node_first_alu;
+	alu_end = code->alu.length - alu_offset - 1;
+	tex_offset = emit->node_first_tex;
+	tex_end = code->tex.length - tex_offset - 1;
+
+	if (code->tex.length == emit->node_first_tex) {
+		if (emit->current_node > 0) {
+			error("Node %i has no TEX instructions", emit->current_node);
+			return 0;
+		}
+
+		tex_end = 0;
+	} else {
+		if (emit->current_node == 0)
+			code->config |= R300_PFS_CNTL_FIRST_NODE_HAS_TEX;
+	}
+
+	/* Write the config register.
+	 * Note: The order in which the words for each node are written
+	 * is not correct here and needs to be fixed up once we're entirely
+	 * done
+	 *
+	 * Also note that the register specification from AMD is slightly
+	 * incorrect in its description of this register. */
+	code->code_addr[emit->current_node]  =
+			((alu_offset << R300_ALU_START_SHIFT)
+				& R300_ALU_START_MASK)
+			| ((alu_end << R300_ALU_SIZE_SHIFT)
+				& R300_ALU_SIZE_MASK)
+			| ((tex_offset << R300_TEX_START_SHIFT)
+				& R300_TEX_START_MASK)
+			| ((tex_end << R300_TEX_SIZE_SHIFT)
+				& R300_TEX_SIZE_MASK)
+			| emit->node_flags
+			| (get_msbs_tex(tex_offset, 5)
+				<< R400_TEX_START_MSB_SHIFT)
+			| (get_msbs_tex(tex_end, 5)
+				<< R400_TEX_SIZE_MSB_SHIFT)
+			;
+
+	/* Write r400 extended instruction fields.  These will be ignored on
+	 * r300 cards.  */
+	alu_offset_msbs = get_msbs_alu(alu_offset);
+	alu_end_msbs = get_msbs_alu(alu_end);
+	switch(emit->current_node) {
+	case 0:
+		code->r400_code_offset_ext |=
+			alu_offset_msbs << R400_ALU_START3_MSB_SHIFT
+			| alu_end_msbs << R400_ALU_SIZE3_MSB_SHIFT;
+		break;
+	case 1:
+		code->r400_code_offset_ext |=
+			alu_offset_msbs << R400_ALU_START2_MSB_SHIFT
+			| alu_end_msbs << R400_ALU_SIZE2_MSB_SHIFT;
+		break;
+	case 2:
+		code->r400_code_offset_ext |=
+			alu_offset_msbs << R400_ALU_START1_MSB_SHIFT
+			| alu_end_msbs << R400_ALU_SIZE1_MSB_SHIFT;
+		break;
+	case 3:
+		code->r400_code_offset_ext |=
+			alu_offset_msbs << R400_ALU_START0_MSB_SHIFT
+			| alu_end_msbs << R400_ALU_SIZE0_MSB_SHIFT;
+		break;
+	}
+	return 1;
+}
+
+
+/**
+ * Begin a block of texture instructions.
+ * Create the necessary indirection.
+ */
+static int begin_tex(struct r300_emit_state * emit)
+{
+	PROG_CODE;
+
+	if (code->alu.length == emit->node_first_alu &&
+	    code->tex.length == emit->node_first_tex) {
+		return 1;
+	}
+
+	if (emit->current_node == 3) {
+		error("Too many texture indirections");
+		return 0;
+	}
+
+	if (!finish_node(emit))
+		return 0;
+
+	emit->current_node++;
+	emit->node_first_tex = code->tex.length;
+	emit->node_first_alu = code->alu.length;
+	emit->node_flags = 0;
+	return 1;
+}
+
+
+static int emit_tex(struct r300_emit_state * emit, struct rc_instruction * inst)
+{
+	unsigned int unit;
+	unsigned int dest;
+	unsigned int opcode;
+	PROG_CODE;
+
+	if (code->tex.length >= emit->compiler->Base.max_tex_insts) {
+		error("Too many TEX instructions");
+		return 0;
+	}
+
+	unit = inst->U.I.TexSrcUnit;
+	dest = inst->U.I.DstReg.Index;
+
+	switch(inst->U.I.Opcode) {
+	case RC_OPCODE_KIL: opcode = R300_TEX_OP_KIL; break;
+	case RC_OPCODE_TEX: opcode = R300_TEX_OP_LD; break;
+	case RC_OPCODE_TXB: opcode = R300_TEX_OP_TXB; break;
+	case RC_OPCODE_TXP: opcode = R300_TEX_OP_TXP; break;
+	default:
+		error("Unknown texture opcode %s", rc_get_opcode_info(inst->U.I.Opcode)->Name);
+		return 0;
+	}
+
+	if (inst->U.I.Opcode == RC_OPCODE_KIL) {
+		unit = 0;
+		dest = 0;
+	} else {
+		use_temporary(code, dest);
+	}
+
+	use_temporary(code, inst->U.I.SrcReg[0].Index);
+
+	code->tex.inst[code->tex.length++] =
+		((inst->U.I.SrcReg[0].Index << R300_SRC_ADDR_SHIFT)
+			& R300_SRC_ADDR_MASK)
+		| ((dest << R300_DST_ADDR_SHIFT)
+			& R300_DST_ADDR_MASK)
+		| (unit << R300_TEX_ID_SHIFT)
+		| (opcode << R300_TEX_INST_SHIFT)
+		| (inst->U.I.SrcReg[0].Index >= R300_PFS_NUM_TEMP_REGS ?
+			R400_SRC_ADDR_EXT_BIT : 0)
+		| (dest >= R300_PFS_NUM_TEMP_REGS ?
+			R400_DST_ADDR_EXT_BIT : 0)
+		;
+	return 1;
+}
+
+
+/**
+ * Final compilation step: Turn the intermediate radeon_program into
+ * machine-readable instructions.
+ */
+void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)
+{
+	struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c;
+	struct r300_emit_state emit;
+	struct r300_fragment_program_code *code = &compiler->code->code.r300;
+	unsigned int tex_end;
+
+	memset(&emit, 0, sizeof(emit));
+	emit.compiler = compiler;
+
+	memset(code, 0, sizeof(struct r300_fragment_program_code));
+
+	for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next;
+	    inst != &compiler->Base.Program.Instructions && !compiler->Base.Error;
+	    inst = inst->Next) {
+		if (inst->Type == RC_INSTRUCTION_NORMAL) {
+			if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) {
+				begin_tex(&emit);
+				continue;
+			}
+
+			emit_tex(&emit, inst);
+		} else {
+			emit_alu(&emit, &inst->U.P);
+		}
+	}
+
+	if (code->pixsize >= compiler->Base.max_temp_regs)
+		rc_error(&compiler->Base, "Too many hardware temporaries used.\n");
+
+	if (compiler->Base.Error)
+		return;
+
+	/* Finish the program */
+	finish_node(&emit);
+
+	code->config |= emit.current_node; /* FIRST_NODE_HAS_TEX set by finish_node */
+
+	/* Set r400 extended instruction fields.  These values will be ignored
+	 * on r300 cards. */
+	code->r400_code_offset_ext |=
+		(get_msbs_alu(0)
+				<< R400_ALU_OFFSET_MSB_SHIFT)
+		| (get_msbs_alu(code->alu.length - 1)
+				<< R400_ALU_SIZE_MSB_SHIFT);
+
+	tex_end = code->tex.length ? code->tex.length - 1 : 0;
+	code->code_offset =
+		((0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT)
+			& R300_PFS_CNTL_ALU_OFFSET_MASK)
+		| (((code->alu.length - 1) << R300_PFS_CNTL_ALU_END_SHIFT)
+			& R300_PFS_CNTL_ALU_END_MASK)
+		| ((0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT)
+			& R300_PFS_CNTL_TEX_OFFSET_MASK)
+		| ((tex_end << R300_PFS_CNTL_TEX_END_SHIFT)
+			& R300_PFS_CNTL_TEX_END_MASK)
+		| (get_msbs_tex(0, 5) << R400_TEX_START_MSB_SHIFT)
+		| (get_msbs_tex(tex_end, 6) << R400_TEX_SIZE_MSB_SHIFT)
+		;
+
+	if (emit.current_node < 3) {
+		int shift = 3 - emit.current_node;
+		int i;
+		for(i = emit.current_node; i >= 0; --i)
+			code->code_addr[shift + i] = code->code_addr[i];
+		for(i = 0; i < shift; ++i)
+			code->code_addr[i] = 0;
+	}
+
+	if (code->pixsize >= R300_PFS_NUM_TEMP_REGS
+	    || code->alu.length > R300_PFS_MAX_ALU_INST
+	    || code->tex.length > R300_PFS_MAX_TEX_INST) {
+
+		code->r390_mode = 1;
+	}
+}
diff --git a/src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.c b/src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.c
new file mode 100644
index 0000000..b7bca8c
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.c
@@ -0,0 +1,243 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * @file
+ * Utilities to deal with the somewhat odd restriction on R300 fragment
+ * program swizzles.
+ */
+
+#include "r300_fragprog_swizzle.h"
+
+#include <stdio.h>
+
+#include "../r300_reg.h"
+#include "radeon_compiler.h"
+
+#define MAKE_SWZ3(x, y, z) (RC_MAKE_SWIZZLE(RC_SWIZZLE_##x, RC_SWIZZLE_##y, RC_SWIZZLE_##z, RC_SWIZZLE_ZERO))
+
+struct swizzle_data {
+	unsigned int hash; /**< swizzle value this matches */
+	unsigned int base; /**< base value for hw swizzle */
+	unsigned int stride; /**< difference in base between arg0/1/2 */
+	unsigned int srcp_stride; /**< difference in base between arg0/scrp */
+};
+
+static const struct swizzle_data native_swizzles[] = {
+	{MAKE_SWZ3(X, Y, Z), R300_ALU_ARGC_SRC0C_XYZ, 4, 15},
+	{MAKE_SWZ3(X, X, X), R300_ALU_ARGC_SRC0C_XXX, 4, 15},
+	{MAKE_SWZ3(Y, Y, Y), R300_ALU_ARGC_SRC0C_YYY, 4, 15},
+	{MAKE_SWZ3(Z, Z, Z), R300_ALU_ARGC_SRC0C_ZZZ, 4, 15},
+	{MAKE_SWZ3(W, W, W), R300_ALU_ARGC_SRC0A, 1, 7},
+	{MAKE_SWZ3(Y, Z, X), R300_ALU_ARGC_SRC0C_YZX, 1, 0},
+	{MAKE_SWZ3(Z, X, Y), R300_ALU_ARGC_SRC0C_ZXY, 1, 0},
+	{MAKE_SWZ3(W, Z, Y), R300_ALU_ARGC_SRC0CA_WZY, 1, 0},
+	{MAKE_SWZ3(ONE, ONE, ONE), R300_ALU_ARGC_ONE, 0, 0},
+	{MAKE_SWZ3(ZERO, ZERO, ZERO), R300_ALU_ARGC_ZERO, 0, 0},
+	{MAKE_SWZ3(HALF, HALF, HALF), R300_ALU_ARGC_HALF, 0, 0}
+};
+
+static const int num_native_swizzles = sizeof(native_swizzles)/sizeof(native_swizzles[0]);
+
+/**
+ * Find a native RGB swizzle that matches the given swizzle.
+ * Returns 0 if none found.
+ */
+static const struct swizzle_data* lookup_native_swizzle(unsigned int swizzle)
+{
+	int i, comp;
+
+	for(i = 0; i < num_native_swizzles; ++i) {
+		const struct swizzle_data* sd = &native_swizzles[i];
+		for(comp = 0; comp < 3; ++comp) {
+			unsigned int swz = GET_SWZ(swizzle, comp);
+			if (swz == RC_SWIZZLE_UNUSED)
+				continue;
+			if (swz != GET_SWZ(sd->hash, comp))
+				break;
+		}
+		if (comp == 3)
+			return sd;
+	}
+
+	return 0;
+}
+
+/**
+ * Determines if the given swizzle is valid for r300/r400.  In most situations
+ * it is better to use r300_swizzle_is_native() which can be accesed via
+ * struct radeon_compiler *c; c->SwizzleCaps->IsNative().
+ */
+int r300_swizzle_is_native_basic(unsigned int swizzle)
+{
+	if(lookup_native_swizzle(swizzle))
+		return 1;
+	else
+		return 0;
+}
+
+/**
+ * Check whether the given instruction supports the swizzle and negate
+ * combinations in the given source register.
+ */
+static int r300_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
+{
+	const struct swizzle_data* sd;
+	unsigned int relevant;
+	int j;
+
+	if (opcode == RC_OPCODE_KIL ||
+	    opcode == RC_OPCODE_TEX ||
+	    opcode == RC_OPCODE_TXB ||
+	    opcode == RC_OPCODE_TXP) {
+		if (reg.Abs || reg.Negate)
+			return 0;
+
+		for(j = 0; j < 4; ++j) {
+			unsigned int swz = GET_SWZ(reg.Swizzle, j);
+			if (swz == RC_SWIZZLE_UNUSED)
+				continue;
+			if (swz != j)
+				return 0;
+		}
+
+		return 1;
+	}
+
+	relevant = 0;
+
+	for(j = 0; j < 3; ++j)
+		if (GET_SWZ(reg.Swizzle, j) != RC_SWIZZLE_UNUSED)
+			relevant |= 1 << j;
+
+	if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant))
+		return 0;
+
+	sd = lookup_native_swizzle(reg.Swizzle);
+	if (!sd || (reg.File == RC_FILE_PRESUB && sd->srcp_stride == 0))
+		return 0;
+
+	return 1;
+}
+
+
+static void r300_swizzle_split(
+		struct rc_src_register src, unsigned int mask,
+		struct rc_swizzle_split * split)
+{
+	split->NumPhases = 0;
+
+	while(mask) {
+		unsigned int best_matchcount = 0;
+		unsigned int best_matchmask = 0;
+		int i, comp;
+
+		for(i = 0; i < num_native_swizzles; ++i) {
+			const struct swizzle_data *sd = &native_swizzles[i];
+			unsigned int matchcount = 0;
+			unsigned int matchmask = 0;
+			for(comp = 0; comp < 3; ++comp) {
+				unsigned int swz;
+				if (!GET_BIT(mask, comp))
+					continue;
+				swz = GET_SWZ(src.Swizzle, comp);
+				if (swz == RC_SWIZZLE_UNUSED)
+					continue;
+				if (swz == GET_SWZ(sd->hash, comp)) {
+					/* check if the negate bit of current component
+					 * is the same for already matched components */
+					if (matchmask && (!!(src.Negate & matchmask) != !!(src.Negate & (1 << comp))))
+						continue;
+
+					matchcount++;
+					matchmask |= 1 << comp;
+				}
+			}
+			if (matchcount > best_matchcount) {
+				best_matchcount = matchcount;
+				best_matchmask = matchmask;
+				if (matchmask == (mask & RC_MASK_XYZ))
+					break;
+			}
+		}
+
+		if (mask & RC_MASK_W)
+			best_matchmask |= RC_MASK_W;
+
+		split->Phase[split->NumPhases++] = best_matchmask;
+		mask &= ~best_matchmask;
+	}
+}
+
+struct rc_swizzle_caps r300_swizzle_caps = {
+	.IsNative = r300_swizzle_is_native,
+	.Split = r300_swizzle_split
+};
+
+
+/**
+ * Translate an RGB (XYZ) swizzle into the hardware code for the given
+ * instruction source.
+ */
+unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle)
+{
+	const struct swizzle_data* sd = lookup_native_swizzle(swizzle);
+
+	if (!sd || (src == RC_PAIR_PRESUB_SRC && sd->srcp_stride == 0)) {
+		fprintf(stderr, "Not a native swizzle: %08x\n", swizzle);
+		return 0;
+	}
+
+	if (src == RC_PAIR_PRESUB_SRC) {
+		return sd->base + sd->srcp_stride;
+	} else {
+		return sd->base + src*sd->stride;
+	}
+}
+
+
+/**
+ * Translate an Alpha (W) swizzle into the hardware code for the given
+ * instruction source.
+ */
+unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle)
+{
+	unsigned int swz = GET_SWZ(swizzle, 0);
+	if (src == RC_PAIR_PRESUB_SRC) {
+		return R300_ALU_ARGA_SRCP_X + swz;
+	}
+	if (swz < 3)
+		return swz + 3*src;
+
+	switch(swz) {
+	case RC_SWIZZLE_W: return R300_ALU_ARGA_SRC0A + src;
+	case RC_SWIZZLE_ONE: return R300_ALU_ARGA_ONE;
+	case RC_SWIZZLE_ZERO: return R300_ALU_ARGA_ZERO;
+	case RC_SWIZZLE_HALF: return R300_ALU_ARGA_HALF;
+	default: return R300_ALU_ARGA_ONE;
+	}
+}
diff --git a/src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.h b/src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.h
new file mode 100644
index 0000000..f2635be
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __R300_FRAGPROG_SWIZZLE_H_
+#define __R300_FRAGPROG_SWIZZLE_H_
+
+#include "radeon_swizzle.h"
+
+extern struct rc_swizzle_caps r300_swizzle_caps;
+
+unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle);
+unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle);
+int r300_swizzle_is_native_basic(unsigned int swizzle);
+
+#endif /* __R300_FRAGPROG_SWIZZLE_H_ */
diff --git a/src/gallium/drivers/r300/compiler/r3xx_fragprog.c b/src/gallium/drivers/r300/compiler/r3xx_fragprog.c
new file mode 100644
index 0000000..bb6c010
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/r3xx_fragprog.c
@@ -0,0 +1,172 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "radeon_compiler.h"
+
+#include <stdio.h>
+
+#include "radeon_compiler_util.h"
+#include "radeon_dataflow.h"
+#include "radeon_emulate_branches.h"
+#include "radeon_emulate_loops.h"
+#include "radeon_program_alu.h"
+#include "radeon_program_tex.h"
+#include "radeon_rename_regs.h"
+#include "radeon_remove_constants.h"
+#include "r300_fragprog.h"
+#include "r300_fragprog_swizzle.h"
+#include "r500_fragprog.h"
+
+
+static void dataflow_outputs_mark_use(void * userdata, void * data,
+		void (*callback)(void *, unsigned int, unsigned int))
+{
+	struct r300_fragment_program_compiler * c = userdata;
+	callback(data, c->OutputColor[0], RC_MASK_XYZW);
+	callback(data, c->OutputColor[1], RC_MASK_XYZW);
+	callback(data, c->OutputColor[2], RC_MASK_XYZW);
+	callback(data, c->OutputColor[3], RC_MASK_XYZW);
+	callback(data, c->OutputDepth, RC_MASK_W);
+}
+
+static void rc_rewrite_depth_out(struct radeon_compiler *cc, void *user)
+{
+	struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
+	struct rc_instruction *rci;
+
+	for (rci = c->Base.Program.Instructions.Next; rci != &c->Base.Program.Instructions; rci = rci->Next) {
+		struct rc_sub_instruction * inst = &rci->U.I;
+		unsigned i;
+		const struct rc_opcode_info *info = rc_get_opcode_info(inst->Opcode);
+
+		if (inst->DstReg.File != RC_FILE_OUTPUT || inst->DstReg.Index != c->OutputDepth)
+			continue;
+
+		if (inst->DstReg.WriteMask & RC_MASK_Z) {
+			inst->DstReg.WriteMask = RC_MASK_W;
+		} else {
+			inst->DstReg.WriteMask = 0;
+			continue;
+		}
+
+		if (!info->IsComponentwise) {
+			continue;
+		}
+
+		for (i = 0; i < info->NumSrcRegs; i++) {
+			inst->SrcReg[i] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[i]);
+		}
+	}
+}
+
+static int radeon_saturate_output(
+		struct radeon_compiler * c,
+		struct rc_instruction * inst,
+		void* data)
+{
+	const struct rc_opcode_info *info = rc_get_opcode_info(inst->U.I.Opcode);
+
+	if (!info->HasDstReg || inst->U.I.DstReg.File != RC_FILE_OUTPUT)
+		return 0;
+
+	inst->U.I.SaturateMode = RC_SATURATE_ZERO_ONE;
+	return 1;
+}
+
+void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
+{
+	int is_r500 = c->Base.is_r500;
+	int opt = !c->Base.disable_optimizations;
+	int sat_out = c->state.frag_clamp;
+
+	/* Lists of instruction transformations. */
+	struct radeon_program_transformation saturate_output[] = {
+		{ &radeon_saturate_output, c },
+		{ 0, 0 }
+	};
+
+	struct radeon_program_transformation rewrite_tex[] = {
+		{ &radeonTransformTEX, c },
+		{ 0, 0 }
+	};
+
+	struct radeon_program_transformation rewrite_if[] = {
+		{ &r500_transform_IF, 0 },
+		{0, 0}
+	};
+
+	struct radeon_program_transformation native_rewrite_r500[] = {
+		{ &radeonTransformALU, 0 },
+		{ &radeonTransformDeriv, 0 },
+		{ &radeonTransformTrigScale, 0 },
+		{ 0, 0 }
+	};
+
+	struct radeon_program_transformation native_rewrite_r300[] = {
+		{ &radeonTransformALU, 0 },
+		{ &r300_transform_trig_simple, 0 },
+		{ 0, 0 }
+	};
+
+	/* List of compiler passes. */
+	struct radeon_compiler_pass fs_list[] = {
+		/* NAME				DUMP PREDICATE	FUNCTION			PARAM */
+		{"rewrite depth out",		1, 1,		rc_rewrite_depth_out,		NULL},
+		/* This transformation needs to be done before any of the IF
+		 * instructions are modified. */
+		{"transform KILP",		1, 1,		rc_transform_KILP,		NULL},
+		{"unroll loops",		1, is_r500,	rc_unroll_loops,		NULL},
+		{"transform loops",		1, !is_r500,	rc_transform_loops,		NULL},
+		{"emulate branches",		1, !is_r500,	rc_emulate_branches,		NULL},
+		{"saturate output writes",	1, sat_out,	rc_local_transform,		saturate_output},
+		{"transform TEX",		1, 1,		rc_local_transform,		rewrite_tex},
+		{"transform IF",		1, is_r500,	rc_local_transform,		rewrite_if},
+		{"native rewrite",		1, is_r500,	rc_local_transform,		native_rewrite_r500},
+		{"native rewrite",		1, !is_r500,	rc_local_transform,		native_rewrite_r300},
+		{"deadcode",			1, opt,		rc_dataflow_deadcode,		dataflow_outputs_mark_use},
+		{"emulate loops",		1, !is_r500,	rc_emulate_loops,		NULL},
+		{"dataflow optimize",		1, opt,		rc_optimize,			NULL},
+		{"dataflow swizzles",		1, 1,		rc_dataflow_swizzles,		NULL},
+		{"dead constants",		1, 1,		rc_remove_unused_constants,	&c->code->constants_remap_table},
+		/* This pass makes it easier for the scheduler to group TEX
+		 * instructions and reduces the chances of creating too
+		 * many texture indirections.*/
+		{"register rename",		1, !is_r500,	rc_rename_regs,			NULL},
+		{"pair translate",		1, 1,		rc_pair_translate,		NULL},
+		{"pair scheduling",		1, 1,		rc_pair_schedule,		NULL},
+		{"dead sources",		1, 1,		rc_pair_remove_dead_sources, NULL},
+		{"register allocation",		1, 1,		rc_pair_regalloc,		&opt},
+		{"final code validation",	0, 1,		rc_validate_final_shader,	NULL},
+		{"machine code generation",	0, is_r500,	r500BuildFragmentProgramHwCode,	NULL},
+		{"machine code generation",	0, !is_r500,	r300BuildFragmentProgramHwCode,	NULL},
+		{"dump machine code",		0, is_r500  && (c->Base.Debug & RC_DBG_LOG), r500FragmentProgramDump, NULL},
+		{"dump machine code",		0, !is_r500 && (c->Base.Debug & RC_DBG_LOG), r300FragmentProgramDump, NULL},
+		{NULL, 0, 0, NULL, NULL}
+	};
+
+	c->Base.type = RC_FRAGMENT_PROGRAM;
+	c->Base.SwizzleCaps = c->Base.is_r500 ? &r500_swizzle_caps : &r300_swizzle_caps;
+
+	rc_run_compiler(&c->Base, fs_list);
+
+	rc_constants_copy(&c->code->constants, &c->Base.Program.Constants);
+}
diff --git a/src/gallium/drivers/r300/compiler/r3xx_vertprog.c b/src/gallium/drivers/r300/compiler/r3xx_vertprog.c
new file mode 100644
index 0000000..654f9a0
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/r3xx_vertprog.c
@@ -0,0 +1,1045 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "radeon_compiler.h"
+
+#include <stdio.h>
+
+#include "../r300_reg.h"
+
+#include "radeon_compiler_util.h"
+#include "radeon_dataflow.h"
+#include "radeon_program_alu.h"
+#include "radeon_swizzle.h"
+#include "radeon_emulate_branches.h"
+#include "radeon_emulate_loops.h"
+#include "radeon_remove_constants.h"
+
+struct loop {
+	int BgnLoop;
+
+};
+
+/*
+ * Take an already-setup and valid source then swizzle it appropriately to
+ * obtain a constant ZERO or ONE source.
+ */
+#define __CONST(x, y)	\
+	(PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[x]),	\
+			   t_swizzle(y),	\
+			   t_swizzle(y),	\
+			   t_swizzle(y),	\
+			   t_swizzle(y),	\
+			   t_src_class(vpi->SrcReg[x].File), \
+			   RC_MASK_NONE) | (vpi->SrcReg[x].RelAddr << 4))
+
+
+static unsigned long t_dst_mask(unsigned int mask)
+{
+	/* RC_MASK_* is equivalent to VSF_FLAG_* */
+	return mask & RC_MASK_XYZW;
+}
+
+static unsigned long t_dst_class(rc_register_file file)
+{
+	switch (file) {
+	default:
+		fprintf(stderr, "%s: Bad register file %i\n", __FUNCTION__, file);
+		/* fall-through */
+	case RC_FILE_TEMPORARY:
+		return PVS_DST_REG_TEMPORARY;
+	case RC_FILE_OUTPUT:
+		return PVS_DST_REG_OUT;
+	case RC_FILE_ADDRESS:
+		return PVS_DST_REG_A0;
+	}
+}
+
+static unsigned long t_dst_index(struct r300_vertex_program_code *vp,
+				 struct rc_dst_register *dst)
+{
+	if (dst->File == RC_FILE_OUTPUT)
+		return vp->outputs[dst->Index];
+
+	return dst->Index;
+}
+
+static unsigned long t_src_class(rc_register_file file)
+{
+	switch (file) {
+	default:
+		fprintf(stderr, "%s: Bad register file %i\n", __FUNCTION__, file);
+		/* fall-through */
+	case RC_FILE_NONE:
+	case RC_FILE_TEMPORARY:
+		return PVS_SRC_REG_TEMPORARY;
+	case RC_FILE_INPUT:
+		return PVS_SRC_REG_INPUT;
+	case RC_FILE_CONSTANT:
+		return PVS_SRC_REG_CONSTANT;
+	}
+}
+
+static int t_src_conflict(struct rc_src_register a, struct rc_src_register b)
+{
+	unsigned long aclass = t_src_class(a.File);
+	unsigned long bclass = t_src_class(b.File);
+
+	if (aclass != bclass)
+		return 0;
+	if (aclass == PVS_SRC_REG_TEMPORARY)
+		return 0;
+
+	if (a.RelAddr || b.RelAddr)
+		return 1;
+	if (a.Index != b.Index)
+		return 1;
+
+	return 0;
+}
+
+static inline unsigned long t_swizzle(unsigned int swizzle)
+{
+	/* this is in fact a NOP as the Mesa RC_SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
+	return swizzle;
+}
+
+static unsigned long t_src_index(struct r300_vertex_program_code *vp,
+				 struct rc_src_register *src)
+{
+	if (src->File == RC_FILE_INPUT) {
+		assert(vp->inputs[src->Index] != -1);
+		return vp->inputs[src->Index];
+	} else {
+		if (src->Index < 0) {
+			fprintf(stderr,
+				"negative offsets for indirect addressing do not work.\n");
+			return 0;
+		}
+		return src->Index;
+	}
+}
+
+/* these two functions should probably be merged... */
+
+static unsigned long t_src(struct r300_vertex_program_code *vp,
+			   struct rc_src_register *src)
+{
+	/* src->Negate uses the RC_MASK_ flags from program_instruction.h,
+	 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
+	 */
+	return PVS_SRC_OPERAND(t_src_index(vp, src),
+			       t_swizzle(GET_SWZ(src->Swizzle, 0)),
+			       t_swizzle(GET_SWZ(src->Swizzle, 1)),
+			       t_swizzle(GET_SWZ(src->Swizzle, 2)),
+			       t_swizzle(GET_SWZ(src->Swizzle, 3)),
+			       t_src_class(src->File),
+			       src->Negate) |
+	       (src->RelAddr << 4) | (src->Abs << 3);
+}
+
+static unsigned long t_src_scalar(struct r300_vertex_program_code *vp,
+				  struct rc_src_register *src)
+{
+	/* src->Negate uses the RC_MASK_ flags from program_instruction.h,
+	 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
+	 */
+	return PVS_SRC_OPERAND(t_src_index(vp, src),
+			       t_swizzle(GET_SWZ(src->Swizzle, 0)),
+			       t_swizzle(GET_SWZ(src->Swizzle, 0)),
+			       t_swizzle(GET_SWZ(src->Swizzle, 0)),
+			       t_swizzle(GET_SWZ(src->Swizzle, 0)),
+			       t_src_class(src->File),
+			       src->Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
+	       (src->RelAddr << 4) | (src->Abs << 3);
+}
+
+static int valid_dst(struct r300_vertex_program_code *vp,
+			   struct rc_dst_register *dst)
+{
+	if (dst->File == RC_FILE_OUTPUT && vp->outputs[dst->Index] == -1) {
+		return 0;
+	} else if (dst->File == RC_FILE_ADDRESS) {
+		assert(dst->Index == 0);
+	}
+
+	return 1;
+}
+
+static void ei_vector1(struct r300_vertex_program_code *vp,
+				unsigned int hw_opcode,
+				struct rc_sub_instruction *vpi,
+				unsigned int * inst)
+{
+	inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
+				     0,
+				     0,
+				     t_dst_index(vp, &vpi->DstReg),
+				     t_dst_mask(vpi->DstReg.WriteMask),
+				     t_dst_class(vpi->DstReg.File));
+	inst[1] = t_src(vp, &vpi->SrcReg[0]);
+	inst[2] = __CONST(0, RC_SWIZZLE_ZERO);
+	inst[3] = __CONST(0, RC_SWIZZLE_ZERO);
+}
+
+static void ei_vector2(struct r300_vertex_program_code *vp,
+				unsigned int hw_opcode,
+				struct rc_sub_instruction *vpi,
+				unsigned int * inst)
+{
+	inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
+				     0,
+				     0,
+				     t_dst_index(vp, &vpi->DstReg),
+				     t_dst_mask(vpi->DstReg.WriteMask),
+				     t_dst_class(vpi->DstReg.File));
+	inst[1] = t_src(vp, &vpi->SrcReg[0]);
+	inst[2] = t_src(vp, &vpi->SrcReg[1]);
+	inst[3] = __CONST(1, RC_SWIZZLE_ZERO);
+}
+
+static void ei_math1(struct r300_vertex_program_code *vp,
+				unsigned int hw_opcode,
+				struct rc_sub_instruction *vpi,
+				unsigned int * inst)
+{
+	inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
+				     1,
+				     0,
+				     t_dst_index(vp, &vpi->DstReg),
+				     t_dst_mask(vpi->DstReg.WriteMask),
+				     t_dst_class(vpi->DstReg.File));
+	inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]);
+	inst[2] = __CONST(0, RC_SWIZZLE_ZERO);
+	inst[3] = __CONST(0, RC_SWIZZLE_ZERO);
+}
+
+static void ei_lit(struct r300_vertex_program_code *vp,
+				      struct rc_sub_instruction *vpi,
+				      unsigned int * inst)
+{
+	//LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
+
+	inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX,
+				     1,
+				     0,
+				     t_dst_index(vp, &vpi->DstReg),
+				     t_dst_mask(vpi->DstReg.WriteMask),
+				     t_dst_class(vpi->DstReg.File));
+	/* NOTE: Users swizzling might not work. */
+	inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)),	// X
+				  t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)),	// W
+				  PVS_SRC_SELECT_FORCE_0,	// Z
+				  t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)),	// Y
+				  t_src_class(vpi->SrcReg[0].File),
+				  vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
+	    (vpi->SrcReg[0].RelAddr << 4);
+	inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)),	// Y
+				  t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)),	// W
+				  PVS_SRC_SELECT_FORCE_0,	// Z
+				  t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)),	// X
+				  t_src_class(vpi->SrcReg[0].File),
+				  vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
+	    (vpi->SrcReg[0].RelAddr << 4);
+	inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)),	// Y
+				  t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)),	// X
+				  PVS_SRC_SELECT_FORCE_0,	// Z
+				  t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)),	// W
+				  t_src_class(vpi->SrcReg[0].File),
+				  vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
+	    (vpi->SrcReg[0].RelAddr << 4);
+}
+
+static void ei_mad(struct r300_vertex_program_code *vp,
+				      struct rc_sub_instruction *vpi,
+				      unsigned int * inst)
+{
+	unsigned int i;
+	/* Remarks about hardware limitations of MAD
+	 * (please preserve this comment, as this information is _NOT_
+	 * in the documentation provided by AMD).
+	 *
+	 * As described in the documentation, MAD with three unique temporary
+	 * source registers requires the use of the macro version.
+	 *
+	 * However (and this is not mentioned in the documentation), apparently
+	 * the macro version is _NOT_ a full superset of the normal version.
+	 * In particular, the macro version does not always work when relative
+	 * addressing is used in the source operands.
+	 *
+	 * This limitation caused incorrect rendering in Sauerbraten's OpenGL
+	 * assembly shader path when using medium quality animations
+	 * (i.e. animations with matrix blending instead of quaternion blending).
+	 *
+	 * Unfortunately, I (nha) have been unable to extract a Piglit regression
+	 * test for this issue - for some reason, it is possible to have vertex
+	 * programs whose prefix is *exactly* the same as the prefix of the
+	 * offending program in Sauerbraten up to the offending instruction
+	 * without causing any trouble.
+	 *
+	 * Bottom line: Only use the macro version only when really necessary;
+	 * according to AMD docs, this should improve performance by one clock
+	 * as a nice side bonus.
+	 */
+	if (vpi->SrcReg[0].File == RC_FILE_TEMPORARY &&
+	    vpi->SrcReg[1].File == RC_FILE_TEMPORARY &&
+	    vpi->SrcReg[2].File == RC_FILE_TEMPORARY &&
+	    vpi->SrcReg[0].Index != vpi->SrcReg[1].Index &&
+	    vpi->SrcReg[0].Index != vpi->SrcReg[2].Index &&
+	    vpi->SrcReg[1].Index != vpi->SrcReg[2].Index) {
+		inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD,
+				0,
+				1,
+				t_dst_index(vp, &vpi->DstReg),
+				t_dst_mask(vpi->DstReg.WriteMask),
+				t_dst_class(vpi->DstReg.File));
+	} else {
+		inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
+				0,
+				0,
+				t_dst_index(vp, &vpi->DstReg),
+				t_dst_mask(vpi->DstReg.WriteMask),
+				t_dst_class(vpi->DstReg.File));
+
+		/* Arguments with constant swizzles still count as a unique
+		 * temporary, so we should make sure these arguments share a
+		 * register index with one of the other arguments. */
+		for (i = 0; i < 3; i++) {
+			unsigned int j;
+			if (vpi->SrcReg[i].File != RC_FILE_NONE)
+				continue;
+
+			for (j = 0; j < 3; j++) {
+				if (i != j) {
+					vpi->SrcReg[i].Index =
+						vpi->SrcReg[j].Index;
+					break;
+				}
+			}
+		}
+	}
+	inst[1] = t_src(vp, &vpi->SrcReg[0]);
+	inst[2] = t_src(vp, &vpi->SrcReg[1]);
+	inst[3] = t_src(vp, &vpi->SrcReg[2]);
+}
+
+static void ei_pow(struct r300_vertex_program_code *vp,
+				      struct rc_sub_instruction *vpi,
+				      unsigned int * inst)
+{
+	inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF,
+				     1,
+				     0,
+				     t_dst_index(vp, &vpi->DstReg),
+				     t_dst_mask(vpi->DstReg.WriteMask),
+				     t_dst_class(vpi->DstReg.File));
+	inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]);
+	inst[2] = __CONST(0, RC_SWIZZLE_ZERO);
+	inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]);
+}
+
+static void mark_write(void * userdata,	struct rc_instruction * inst,
+		rc_register_file file,	unsigned int index, unsigned int mask)
+{
+	unsigned int * writemasks = userdata;
+
+	if (file != RC_FILE_TEMPORARY)
+		return;
+
+	if (index >= R300_VS_MAX_TEMPS)
+		return;
+
+	writemasks[index] |= mask;
+}
+
+static unsigned long t_pred_src(struct r300_vertex_program_compiler * compiler)
+{
+	return PVS_SRC_OPERAND(compiler->PredicateIndex,
+		t_swizzle(RC_SWIZZLE_ZERO),
+		t_swizzle(RC_SWIZZLE_ZERO),
+		t_swizzle(RC_SWIZZLE_ZERO),
+		t_swizzle(RC_SWIZZLE_W),
+		t_src_class(RC_FILE_TEMPORARY),
+		0);
+}
+
+static unsigned long t_pred_dst(struct r300_vertex_program_compiler * compiler,
+					unsigned int hw_opcode, int is_math)
+{
+	return PVS_OP_DST_OPERAND(hw_opcode,
+	     is_math,
+	     0,
+	     compiler->PredicateIndex,
+	     RC_MASK_W,
+	     t_dst_class(RC_FILE_TEMPORARY));
+
+}
+
+static void ei_if(struct r300_vertex_program_compiler * compiler,
+					struct rc_instruction *rci,
+					unsigned int * inst,
+					unsigned int branch_depth)
+{
+	unsigned int predicate_opcode;
+	int is_math = 0;
+
+	if (!compiler->Base.is_r500) {
+		rc_error(&compiler->Base,"Opcode IF not supported\n");
+		return;
+	}
+
+	/* Reserve a temporary to use as our predicate stack counter, if we
+	 * don't already have one. */
+	if (!compiler->PredicateMask) {
+		unsigned int writemasks[RC_REGISTER_MAX_INDEX];
+		struct rc_instruction * inst;
+		unsigned int i;
+		memset(writemasks, 0, sizeof(writemasks));
+		for(inst = compiler->Base.Program.Instructions.Next;
+				inst != &compiler->Base.Program.Instructions;
+							inst = inst->Next) {
+			rc_for_all_writes_mask(inst, mark_write, writemasks);
+		}
+		for(i = 0; i < compiler->Base.max_temp_regs; i++) {
+			unsigned int mask = ~writemasks[i] & RC_MASK_XYZW;
+			/* Only the W component can be used fo the predicate
+			 * stack counter. */
+			if (mask & RC_MASK_W) {
+				compiler->PredicateMask = RC_MASK_W;
+				compiler->PredicateIndex = i;
+				break;
+			}
+		}
+		if (i == compiler->Base.max_temp_regs) {
+			rc_error(&compiler->Base, "No free temporary to use for"
+					" predicate stack counter.\n");
+			return;
+		}
+	}
+	predicate_opcode =
+			branch_depth ? VE_PRED_SET_NEQ_PUSH : ME_PRED_SET_NEQ;
+
+	rci->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(rci->U.I.SrcReg[0].Swizzle,0));
+	if (branch_depth == 0) {
+		is_math = 1;
+		predicate_opcode = ME_PRED_SET_NEQ;
+		inst[1] = t_src(compiler->code, &rci->U.I.SrcReg[0]);
+		inst[2] = 0;
+	} else {
+		predicate_opcode = VE_PRED_SET_NEQ_PUSH;
+		inst[1] = t_pred_src(compiler);
+		inst[2] = t_src(compiler->code, &rci->U.I.SrcReg[0]);
+	}
+
+	inst[0] = t_pred_dst(compiler, predicate_opcode, is_math);
+	inst[3] = 0;
+
+}
+
+static void ei_else(struct r300_vertex_program_compiler * compiler,
+							unsigned int * inst)
+{
+	if (!compiler->Base.is_r500) {
+		rc_error(&compiler->Base,"Opcode ELSE not supported\n");
+		return;
+	}
+	inst[0] = t_pred_dst(compiler, ME_PRED_SET_INV, 1);
+	inst[1] = t_pred_src(compiler);
+	inst[2] = 0;
+	inst[3] = 0;
+}
+
+static void ei_endif(struct r300_vertex_program_compiler *compiler,
+							unsigned int * inst)
+{
+	if (!compiler->Base.is_r500) {
+		rc_error(&compiler->Base,"Opcode ENDIF not supported\n");
+		return;
+	}
+	inst[0] = t_pred_dst(compiler, ME_PRED_SET_POP, 1);
+	inst[1] = t_pred_src(compiler);
+	inst[2] = 0;
+	inst[3] = 0;
+}
+
+static void translate_vertex_program(struct radeon_compiler *c, void *user)
+{
+	struct r300_vertex_program_compiler *compiler = (struct r300_vertex_program_compiler*)c;
+	struct rc_instruction *rci;
+
+	struct loop * loops = NULL;
+	int current_loop_depth = 0;
+	int loops_reserved = 0;
+
+	unsigned int branch_depth = 0;
+
+	compiler->code->pos_end = 0;	/* Not supported yet */
+	compiler->code->length = 0;
+	compiler->code->num_temporaries = 0;
+
+	compiler->SetHwInputOutput(compiler);
+
+	for(rci = compiler->Base.Program.Instructions.Next; rci != &compiler->Base.Program.Instructions; rci = rci->Next) {
+		struct rc_sub_instruction *vpi = &rci->U.I;
+		unsigned int *inst = compiler->code->body.d + compiler->code->length;
+		const struct rc_opcode_info *info = rc_get_opcode_info(vpi->Opcode);
+
+		/* Skip instructions writing to non-existing destination */
+		if (!valid_dst(compiler->code, &vpi->DstReg))
+			continue;
+
+		if (info->HasDstReg) {
+			/* Neither is Saturate. */
+			if (vpi->SaturateMode != RC_SATURATE_NONE) {
+				rc_error(&compiler->Base, "Vertex program does not support the Saturate "
+					 "modifier (yet).\n");
+			}
+		}
+
+		if (compiler->code->length >= c->max_alu_insts * 4) {
+			rc_error(&compiler->Base, "Vertex program has too many instructions\n");
+			return;
+		}
+
+		assert(compiler->Base.is_r500 ||
+		       (vpi->Opcode != RC_OPCODE_SEQ &&
+			vpi->Opcode != RC_OPCODE_SNE));
+
+		switch (vpi->Opcode) {
+		case RC_OPCODE_ADD: ei_vector2(compiler->code, VE_ADD, vpi, inst); break;
+		case RC_OPCODE_ARL: ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst); break;
+		case RC_OPCODE_COS: ei_math1(compiler->code, ME_COS, vpi, inst); break;
+		case RC_OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break;
+		case RC_OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break;
+		case RC_OPCODE_ELSE: ei_else(compiler, inst); break;
+		case RC_OPCODE_ENDIF: ei_endif(compiler, inst); branch_depth--; break;
+		case RC_OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break;
+		case RC_OPCODE_EXP: ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); break;
+		case RC_OPCODE_FRC: ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break;
+		case RC_OPCODE_IF: ei_if(compiler, rci, inst, branch_depth); branch_depth++; break;
+		case RC_OPCODE_LG2: ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); break;
+		case RC_OPCODE_LIT: ei_lit(compiler->code, vpi, inst); break;
+		case RC_OPCODE_LOG: ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); break;
+		case RC_OPCODE_MAD: ei_mad(compiler->code, vpi, inst); break;
+		case RC_OPCODE_MAX: ei_vector2(compiler->code, VE_MAXIMUM, vpi, inst); break;
+		case RC_OPCODE_MIN: ei_vector2(compiler->code, VE_MINIMUM, vpi, inst); break;
+		case RC_OPCODE_MOV: ei_vector1(compiler->code, VE_ADD, vpi, inst); break;
+		case RC_OPCODE_MUL: ei_vector2(compiler->code, VE_MULTIPLY, vpi, inst); break;
+		case RC_OPCODE_POW: ei_pow(compiler->code, vpi, inst); break;
+		case RC_OPCODE_RCP: ei_math1(compiler->code, ME_RECIP_DX, vpi, inst); break;
+		case RC_OPCODE_RSQ: ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst); break;
+		case RC_OPCODE_SEQ: ei_vector2(compiler->code, VE_SET_EQUAL, vpi, inst); break;
+		case RC_OPCODE_SGE: ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); break;
+		case RC_OPCODE_SIN: ei_math1(compiler->code, ME_SIN, vpi, inst); break;
+		case RC_OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break;
+		case RC_OPCODE_SNE: ei_vector2(compiler->code, VE_SET_NOT_EQUAL, vpi, inst); break;
+		case RC_OPCODE_BGNLOOP:
+		{
+			struct loop * l;
+
+			if ((!compiler->Base.is_r500
+				&& loops_reserved >= R300_VS_MAX_LOOP_DEPTH)
+				|| loops_reserved >= R500_VS_MAX_FC_DEPTH) {
+				rc_error(&compiler->Base,
+						"Loops are nested too deep.");
+				return;
+			}
+			memory_pool_array_reserve(&compiler->Base.Pool,
+					struct loop, loops, current_loop_depth,
+					loops_reserved, 1);
+			l = &loops[current_loop_depth++];
+			memset(l , 0, sizeof(struct loop));
+			l->BgnLoop = (compiler->code->length / 4);
+			continue;
+		}
+		case RC_OPCODE_ENDLOOP:
+		{
+			struct loop * l;
+			unsigned int act_addr;
+			unsigned int last_addr;
+			unsigned int ret_addr;
+
+			assert(loops);
+			l = &loops[current_loop_depth - 1];
+			act_addr = l->BgnLoop - 1;
+			last_addr = (compiler->code->length / 4) - 1;
+			ret_addr = l->BgnLoop;
+
+			if (loops_reserved >= R300_VS_MAX_FC_OPS) {
+				rc_error(&compiler->Base,
+					"Too many flow control instructions.");
+				return;
+			}
+			if (compiler->Base.is_r500) {
+				compiler->code->fc_op_addrs.r500
+					[compiler->code->num_fc_ops].lw =
+					R500_PVS_FC_ACT_ADRS(act_addr)
+					| R500_PVS_FC_LOOP_CNT_JMP_INST(0xffff)
+					;
+				compiler->code->fc_op_addrs.r500
+					[compiler->code->num_fc_ops].uw =
+					R500_PVS_FC_LAST_INST(last_addr)
+					| R500_PVS_FC_RTN_INST(ret_addr)
+					;
+			} else {
+				compiler->code->fc_op_addrs.r300
+					[compiler->code->num_fc_ops] =
+					R300_PVS_FC_ACT_ADRS(act_addr)
+					| R300_PVS_FC_LOOP_CNT_JMP_INST(0xff)
+					| R300_PVS_FC_LAST_INST(last_addr)
+					| R300_PVS_FC_RTN_INST(ret_addr)
+					;
+			}
+			compiler->code->fc_loop_index[compiler->code->num_fc_ops] =
+				R300_PVS_FC_LOOP_INIT_VAL(0x0)
+				| R300_PVS_FC_LOOP_STEP_VAL(0x1)
+				;
+			compiler->code->fc_ops |= R300_VAP_PVS_FC_OPC_LOOP(
+						compiler->code->num_fc_ops);
+			compiler->code->num_fc_ops++;
+			current_loop_depth--;
+			continue;
+		}
+
+		default:
+			rc_error(&compiler->Base, "Unknown opcode %s\n", info->Name);
+			return;
+		}
+
+		/* Non-flow control instructions that are inside an if statement
+		 * need to pay attention to the predicate bit. */
+		if (branch_depth
+			&& vpi->Opcode != RC_OPCODE_IF
+			&& vpi->Opcode != RC_OPCODE_ELSE
+			&& vpi->Opcode != RC_OPCODE_ENDIF) {
+
+			inst[0] |= (PVS_DST_PRED_ENABLE_MASK
+						<< PVS_DST_PRED_ENABLE_SHIFT);
+			inst[0] |= (PVS_DST_PRED_SENSE_MASK
+						<< PVS_DST_PRED_SENSE_SHIFT);
+		}
+
+		/* Update the number of temporaries. */
+		if (info->HasDstReg && vpi->DstReg.File == RC_FILE_TEMPORARY &&
+		    vpi->DstReg.Index >= compiler->code->num_temporaries)
+			compiler->code->num_temporaries = vpi->DstReg.Index + 1;
+
+		for (unsigned i = 0; i < info->NumSrcRegs; i++)
+			if (vpi->SrcReg[i].File == RC_FILE_TEMPORARY &&
+			    vpi->SrcReg[i].Index >= compiler->code->num_temporaries)
+				compiler->code->num_temporaries = vpi->SrcReg[i].Index + 1;
+
+		if (compiler->PredicateMask)
+			if (compiler->PredicateIndex >= compiler->code->num_temporaries)
+				compiler->code->num_temporaries = compiler->PredicateIndex + 1;
+
+		if (compiler->code->num_temporaries > compiler->Base.max_temp_regs) {
+			rc_error(&compiler->Base, "Too many temporaries.\n");
+			return;
+		}
+
+		compiler->code->length += 4;
+
+		if (compiler->Base.Error)
+			return;
+	}
+}
+
+struct temporary_allocation {
+	unsigned int Allocated:1;
+	unsigned int HwTemp:15;
+	struct rc_instruction * LastRead;
+};
+
+static void allocate_temporary_registers(struct radeon_compiler *c, void *user)
+{
+	struct r300_vertex_program_compiler *compiler = (struct r300_vertex_program_compiler*)c;
+	struct rc_instruction *inst;
+	struct rc_instruction *end_loop = NULL;
+	unsigned int num_orig_temps = 0;
+	char hwtemps[RC_REGISTER_MAX_INDEX];
+	struct temporary_allocation * ta;
+	unsigned int i, j;
+
+	memset(hwtemps, 0, sizeof(hwtemps));
+
+	rc_recompute_ips(c);
+
+	/* Pass 1: Count original temporaries. */
+	for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
+		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+		for (i = 0; i < opcode->NumSrcRegs; ++i) {
+			if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
+				if (inst->U.I.SrcReg[i].Index >= num_orig_temps)
+					num_orig_temps = inst->U.I.SrcReg[i].Index + 1;
+			}
+		}
+
+		if (opcode->HasDstReg) {
+			if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY) {
+				if (inst->U.I.DstReg.Index >= num_orig_temps)
+					num_orig_temps = inst->U.I.DstReg.Index + 1;
+			}
+		}
+	}
+
+	ta = (struct temporary_allocation*)memory_pool_malloc(&compiler->Base.Pool,
+			sizeof(struct temporary_allocation) * num_orig_temps);
+	memset(ta, 0, sizeof(struct temporary_allocation) * num_orig_temps);
+
+	/* Pass 2: Determine original temporary lifetimes */
+	for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
+		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+		/* Instructions inside of loops need to use the ENDLOOP
+		 * instruction as their LastRead. */
+		if (!end_loop && inst->U.I.Opcode == RC_OPCODE_BGNLOOP) {
+			int endloops = 1;
+			struct rc_instruction * ptr;
+			for(ptr = inst->Next;
+				ptr != &compiler->Base.Program.Instructions;
+							ptr = ptr->Next){
+				if (ptr->U.I.Opcode == RC_OPCODE_BGNLOOP) {
+					endloops++;
+				} else if (ptr->U.I.Opcode == RC_OPCODE_ENDLOOP) {
+					endloops--;
+					if (endloops <= 0) {
+						end_loop = ptr;
+						break;
+					}
+				}
+			}
+		}
+
+		if (inst == end_loop) {
+			end_loop = NULL;
+			continue;
+		}
+
+		for (i = 0; i < opcode->NumSrcRegs; ++i) {
+			if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
+				ta[inst->U.I.SrcReg[i].Index].LastRead = end_loop ? end_loop : inst;
+			}
+		}
+	}
+
+	/* Pass 3: Register allocation */
+	for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
+		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+		for (i = 0; i < opcode->NumSrcRegs; ++i) {
+			if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
+				unsigned int orig = inst->U.I.SrcReg[i].Index;
+				inst->U.I.SrcReg[i].Index = ta[orig].HwTemp;
+
+				if (ta[orig].Allocated && inst == ta[orig].LastRead)
+					hwtemps[ta[orig].HwTemp] = 0;
+			}
+		}
+
+		if (opcode->HasDstReg) {
+			if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY) {
+				unsigned int orig = inst->U.I.DstReg.Index;
+
+				if (!ta[orig].Allocated) {
+					for(j = 0; j < c->max_temp_regs; ++j) {
+						if (!hwtemps[j])
+							break;
+					}
+					ta[orig].Allocated = 1;
+					ta[orig].HwTemp = j;
+					hwtemps[ta[orig].HwTemp] = 1;
+				}
+
+				inst->U.I.DstReg.Index = ta[orig].HwTemp;
+			}
+		}
+	}
+}
+
+/**
+ * R3xx-R4xx vertex engine does not support the Absolute source operand modifier
+ * and the Saturate opcode modifier. Only Absolute is currently transformed.
+ */
+static int transform_nonnative_modifiers(
+	struct radeon_compiler *c,
+	struct rc_instruction *inst,
+	void* unused)
+{
+	const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
+	unsigned i;
+
+	/* Transform ABS(a) to MAX(a, -a). */
+	for (i = 0; i < opcode->NumSrcRegs; i++) {
+		if (inst->U.I.SrcReg[i].Abs) {
+			struct rc_instruction *new_inst;
+			unsigned temp;
+
+			inst->U.I.SrcReg[i].Abs = 0;
+
+			temp = rc_find_free_temporary(c);
+
+			new_inst = rc_insert_new_instruction(c, inst->Prev);
+			new_inst->U.I.Opcode = RC_OPCODE_MAX;
+			new_inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
+			new_inst->U.I.DstReg.Index = temp;
+			new_inst->U.I.SrcReg[0] = inst->U.I.SrcReg[i];
+			new_inst->U.I.SrcReg[1] = inst->U.I.SrcReg[i];
+			new_inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
+
+			memset(&inst->U.I.SrcReg[i], 0, sizeof(inst->U.I.SrcReg[i]));
+			inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
+			inst->U.I.SrcReg[i].Index = temp;
+			inst->U.I.SrcReg[i].Swizzle = RC_SWIZZLE_XYZW;
+		}
+	}
+	return 1;
+}
+
+/**
+ * Vertex engine cannot read two inputs or two constants at the same time.
+ * Introduce intermediate MOVs to temporary registers to account for this.
+ */
+static int transform_source_conflicts(
+	struct radeon_compiler *c,
+	struct rc_instruction* inst,
+	void* unused)
+{
+	const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+	if (opcode->NumSrcRegs == 3) {
+		if (t_src_conflict(inst->U.I.SrcReg[1], inst->U.I.SrcReg[2])
+		    || t_src_conflict(inst->U.I.SrcReg[0], inst->U.I.SrcReg[2])) {
+			int tmpreg = rc_find_free_temporary(c);
+			struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
+			inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+			inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+			inst_mov->U.I.DstReg.Index = tmpreg;
+			inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
+
+			reset_srcreg(&inst->U.I.SrcReg[2]);
+			inst->U.I.SrcReg[2].File = RC_FILE_TEMPORARY;
+			inst->U.I.SrcReg[2].Index = tmpreg;
+		}
+	}
+
+	if (opcode->NumSrcRegs >= 2) {
+		if (t_src_conflict(inst->U.I.SrcReg[1], inst->U.I.SrcReg[0])) {
+			int tmpreg = rc_find_free_temporary(c);
+			struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
+			inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+			inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+			inst_mov->U.I.DstReg.Index = tmpreg;
+			inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
+
+			reset_srcreg(&inst->U.I.SrcReg[1]);
+			inst->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
+			inst->U.I.SrcReg[1].Index = tmpreg;
+		}
+	}
+
+	return 1;
+}
+
+static void rc_vs_add_artificial_outputs(struct radeon_compiler *c, void *user)
+{
+	struct r300_vertex_program_compiler * compiler = (struct r300_vertex_program_compiler*)c;
+	int i;
+
+	for(i = 0; i < 32; ++i) {
+		if ((compiler->RequiredOutputs & (1 << i)) &&
+		    !(compiler->Base.Program.OutputsWritten & (1 << i))) {
+			struct rc_instruction * inst = rc_insert_new_instruction(&compiler->Base, compiler->Base.Program.Instructions.Prev);
+			inst->U.I.Opcode = RC_OPCODE_MOV;
+
+			inst->U.I.DstReg.File = RC_FILE_OUTPUT;
+			inst->U.I.DstReg.Index = i;
+			inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+
+			inst->U.I.SrcReg[0].File = RC_FILE_CONSTANT;
+			inst->U.I.SrcReg[0].Index = 0;
+			inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
+
+			compiler->Base.Program.OutputsWritten |= 1 << i;
+		}
+	}
+}
+
+static void dataflow_outputs_mark_used(void * userdata, void * data,
+		void (*callback)(void *, unsigned int, unsigned int))
+{
+	struct r300_vertex_program_compiler * c = userdata;
+	int i;
+
+	for(i = 0; i < 32; ++i) {
+		if (c->RequiredOutputs & (1 << i))
+			callback(data, i, RC_MASK_XYZW);
+	}
+}
+
+static int swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
+{
+	(void) opcode;
+	(void) reg;
+
+	return 1;
+}
+
+static void transform_negative_addressing(struct r300_vertex_program_compiler *c,
+					  struct rc_instruction *arl,
+					  struct rc_instruction *end,
+					  int min_offset)
+{
+	struct rc_instruction *inst, *add;
+	unsigned const_swizzle;
+
+	/* Transform ARL */
+	add = rc_insert_new_instruction(&c->Base, arl->Prev);
+	add->U.I.Opcode = RC_OPCODE_ADD;
+	add->U.I.DstReg.File = RC_FILE_TEMPORARY;
+	add->U.I.DstReg.Index = rc_find_free_temporary(&c->Base);
+	add->U.I.DstReg.WriteMask = RC_MASK_X;
+	add->U.I.SrcReg[0] = arl->U.I.SrcReg[0];
+	add->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
+	add->U.I.SrcReg[1].Index = rc_constants_add_immediate_scalar(&c->Base.Program.Constants,
+								     min_offset, &const_swizzle);
+	add->U.I.SrcReg[1].Swizzle = const_swizzle;
+
+	arl->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+	arl->U.I.SrcReg[0].Index = add->U.I.DstReg.Index;
+	arl->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XXXX;
+
+	/* Rewrite offsets up to and excluding inst. */
+	for (inst = arl->Next; inst != end; inst = inst->Next) {
+		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+		for (unsigned i = 0; i < opcode->NumSrcRegs; i++)
+			if (inst->U.I.SrcReg[i].RelAddr)
+				inst->U.I.SrcReg[i].Index -= min_offset;
+	}
+}
+
+static void rc_emulate_negative_addressing(struct radeon_compiler *compiler, void *user)
+{
+	struct r300_vertex_program_compiler * c = (struct r300_vertex_program_compiler*)compiler;
+	struct rc_instruction *inst, *lastARL = NULL;
+	int min_offset = 0;
+
+	for (inst = c->Base.Program.Instructions.Next; inst != &c->Base.Program.Instructions; inst = inst->Next) {
+		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+		if (inst->U.I.Opcode == RC_OPCODE_ARL) {
+			if (lastARL != NULL && min_offset < 0)
+				transform_negative_addressing(c, lastARL, inst, min_offset);
+
+			lastARL = inst;
+			min_offset = 0;
+			continue;
+		}
+
+		for (unsigned i = 0; i < opcode->NumSrcRegs; i++) {
+			if (inst->U.I.SrcReg[i].RelAddr &&
+			    inst->U.I.SrcReg[i].Index < 0) {
+				/* ARL must precede any indirect addressing. */
+				if (lastARL == NULL) {
+					rc_error(&c->Base, "Vertex shader: Found relative addressing without ARL.");
+					return;
+				}
+
+				if (inst->U.I.SrcReg[i].Index < min_offset)
+					min_offset = inst->U.I.SrcReg[i].Index;
+			}
+		}
+	}
+
+	if (lastARL != NULL && min_offset < 0)
+		transform_negative_addressing(c, lastARL, inst, min_offset);
+}
+
+static struct rc_swizzle_caps r300_vertprog_swizzle_caps = {
+	.IsNative = &swizzle_is_native,
+	.Split = 0 /* should never be called */
+};
+
+void r3xx_compile_vertex_program(struct r300_vertex_program_compiler *c)
+{
+	int is_r500 = c->Base.is_r500;
+	int opt = !c->Base.disable_optimizations;
+
+	/* Lists of instruction transformations. */
+	struct radeon_program_transformation alu_rewrite_r500[] = {
+		{ &r300_transform_vertex_alu, 0 },
+		{ &r300_transform_trig_scale_vertex, 0 },
+		{ 0, 0 }
+	};
+
+	struct radeon_program_transformation alu_rewrite_r300[] = {
+		{ &r300_transform_vertex_alu, 0 },
+		{ &r300_transform_trig_simple, 0 },
+		{ 0, 0 }
+	};
+
+	/* Note: These passes have to be done seperately from ALU rewrite,
+	 * otherwise non-native ALU instructions with source conflits
+	 * or non-native modifiers will not be treated properly.
+	 */
+	struct radeon_program_transformation emulate_modifiers[] = {
+		{ &transform_nonnative_modifiers, 0 },
+		{ 0, 0 }
+	};
+
+	struct radeon_program_transformation resolve_src_conflicts[] = {
+		{ &transform_source_conflicts, 0 },
+		{ 0, 0 }
+	};
+
+	/* List of compiler passes. */
+	struct radeon_compiler_pass vs_list[] = {
+		/* NAME				DUMP PREDICATE	FUNCTION			PARAM */
+		{"add artificial outputs",	0, 1,		rc_vs_add_artificial_outputs,	NULL},
+		{"transform loops",		1, 1,		rc_transform_loops,		NULL},
+		{"emulate branches",		1, !is_r500,	rc_emulate_branches,		NULL},
+		{"emulate negative addressing", 1, 1,		rc_emulate_negative_addressing,	NULL},
+		{"native rewrite",		1, is_r500,	rc_local_transform,		alu_rewrite_r500},
+		{"native rewrite",		1, !is_r500,	rc_local_transform,		alu_rewrite_r300},
+		{"emulate modifiers",		1, !is_r500,	rc_local_transform,		emulate_modifiers},
+		{"deadcode",			1, opt,		rc_dataflow_deadcode,		dataflow_outputs_mark_used},
+		{"dataflow optimize",		1, opt,		rc_optimize,			NULL},
+		/* This pass must be done after optimizations. */
+		{"source conflict resolve",	1, 1,		rc_local_transform,		resolve_src_conflicts},
+		{"register allocation",		1, opt,		allocate_temporary_registers,	NULL},
+		{"dead constants",		1, 1,		rc_remove_unused_constants,	&c->code->constants_remap_table},
+		{"final code validation",	0, 1,		rc_validate_final_shader,	NULL},
+		{"machine code generation",	0, 1,		translate_vertex_program,	NULL},
+		{"dump machine code",		0, c->Base.Debug & RC_DBG_LOG, r300_vertex_program_dump,	NULL},
+		{NULL, 0, 0, NULL, NULL}
+	};
+
+	c->Base.type = RC_VERTEX_PROGRAM;
+	c->Base.SwizzleCaps = &r300_vertprog_swizzle_caps;
+
+	rc_run_compiler(&c->Base, vs_list);
+
+	c->code->InputsRead = c->Base.Program.InputsRead;
+	c->code->OutputsWritten = c->Base.Program.OutputsWritten;
+	rc_constants_copy(&c->code->constants, &c->Base.Program.Constants);
+}
diff --git a/src/gallium/drivers/r300/compiler/r3xx_vertprog_dump.c b/src/gallium/drivers/r300/compiler/r3xx_vertprog_dump.c
new file mode 100644
index 0000000..2bc0a87
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/r3xx_vertprog_dump.c
@@ -0,0 +1,207 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "radeon_compiler.h"
+#include "radeon_code.h"
+#include "../r300_reg.h"
+
+#include <stdio.h>
+
+static char* r300_vs_ve_ops[] = {
+	/* R300 vector ops */
+	"                 VE_NO_OP",
+	"           VE_DOT_PRODUCT",
+	"              VE_MULTIPLY",
+	"                   VE_ADD",
+	"          VE_MULTIPLY_ADD",
+	"       VE_DISTANCE_FACTOR",
+	"              VE_FRACTION",
+	"               VE_MAXIMUM",
+	"               VE_MINIMUM",
+	"VE_SET_GREATER_THAN_EQUAL",
+	"         VE_SET_LESS_THAN",
+	"        VE_MULTIPLYX2_ADD",
+	"        VE_MULTIPLY_CLAMP",
+	"            VE_FLT2FIX_DX",
+	"        VE_FLT2FIX_DX_RND",
+	/* R500 vector ops */
+	"      VE_PRED_SET_EQ_PUSH",
+	"      VE_PRED_SET_GT_PUSH",
+	"     VE_PRED_SET_GTE_PUSH",
+	"     VE_PRED_SET_NEQ_PUSH",
+	"         VE_COND_WRITE_EQ",
+	"         VE_COND_WRITE_GT",
+	"        VE_COND_WRITE_GTE",
+	"        VE_COND_WRITE_NEQ",
+	"           VE_COND_MUX_EQ",
+	"           VE_COND_MUX_GT",
+	"          VE_COND_MUX_GTE",
+	"      VE_SET_GREATER_THAN",
+	"             VE_SET_EQUAL",
+	"         VE_SET_NOT_EQUAL",
+	"               (reserved)",
+	"               (reserved)",
+	"               (reserved)",
+};
+
+static char* r300_vs_me_ops[] = {
+	/* R300 math ops */
+	"                 ME_NO_OP",
+	"          ME_EXP_BASE2_DX",
+	"          ME_LOG_BASE2_DX",
+	"          ME_EXP_BASEE_FF",
+	"        ME_LIGHT_COEFF_DX",
+	"         ME_POWER_FUNC_FF",
+	"              ME_RECIP_DX",
+	"              ME_RECIP_FF",
+	"         ME_RECIP_SQRT_DX",
+	"         ME_RECIP_SQRT_FF",
+	"              ME_MULTIPLY",
+	"     ME_EXP_BASE2_FULL_DX",
+	"     ME_LOG_BASE2_FULL_DX",
+	" ME_POWER_FUNC_FF_CLAMP_B",
+	"ME_POWER_FUNC_FF_CLAMP_B1",
+	"ME_POWER_FUNC_FF_CLAMP_01",
+	"                   ME_SIN",
+	"                   ME_COS",
+	/* R500 math ops */
+	"        ME_LOG_BASE2_IEEE",
+	"            ME_RECIP_IEEE",
+	"       ME_RECIP_SQRT_IEEE",
+	"           ME_PRED_SET_EQ",
+	"           ME_PRED_SET_GT",
+	"          ME_PRED_SET_GTE",
+	"          ME_PRED_SET_NEQ",
+	"          ME_PRED_SET_CLR",
+	"          ME_PRED_SET_INV",
+	"          ME_PRED_SET_POP",
+	"      ME_PRED_SET_RESTORE",
+	"               (reserved)",
+	"               (reserved)",
+	"               (reserved)",
+};
+
+/* XXX refactor to avoid clashing symbols */
+static char* r300_vs_src_debug[] = {
+	"t",
+	"i",
+	"c",
+	"a",
+};
+
+static char* r300_vs_dst_debug[] = {
+	"t",
+	"a0",
+	"o",
+	"ox",
+	"a",
+	"i",
+	"u",
+	"u",
+};
+
+static char* r300_vs_swiz_debug[] = {
+	"X",
+	"Y",
+	"Z",
+	"W",
+	"0",
+	"1",
+	"U",
+	"U",
+};
+
+
+static void r300_vs_op_dump(uint32_t op)
+{
+	fprintf(stderr, " dst: %d%s op: ",
+			(op >> 13) & 0x7f, r300_vs_dst_debug[(op >> 8) & 0x7]);
+	if ((op >> PVS_DST_PRED_ENABLE_SHIFT) & 0x1) {
+		fprintf(stderr, "PRED %u",
+				(op >> PVS_DST_PRED_SENSE_SHIFT) & 0x1);
+	}
+	if (op & 0x80) {
+		if (op & 0x1) {
+			fprintf(stderr, "PVS_MACRO_OP_2CLK_M2X_ADD\n");
+		} else {
+			fprintf(stderr, "   PVS_MACRO_OP_2CLK_MADD\n");
+		}
+	} else if (op & 0x40) {
+		fprintf(stderr, "%s\n", r300_vs_me_ops[op & 0x1f]);
+	} else {
+		fprintf(stderr, "%s\n", r300_vs_ve_ops[op & 0x1f]);
+	}
+}
+
+static void r300_vs_src_dump(uint32_t src)
+{
+	fprintf(stderr, " reg: %d%s swiz: %s%s/%s%s/%s%s/%s%s\n",
+			(src >> 5) & 0xff, r300_vs_src_debug[src & 0x3],
+			src & (1 << 25) ? "-" : " ",
+			r300_vs_swiz_debug[(src >> 13) & 0x7],
+			src & (1 << 26) ? "-" : " ",
+			r300_vs_swiz_debug[(src >> 16) & 0x7],
+			src & (1 << 27) ? "-" : " ",
+			r300_vs_swiz_debug[(src >> 19) & 0x7],
+			src & (1 << 28) ? "-" : " ",
+			r300_vs_swiz_debug[(src >> 22) & 0x7]);
+}
+
+void r300_vertex_program_dump(struct radeon_compiler *compiler, void *user)
+{
+	struct r300_vertex_program_compiler *c = (struct r300_vertex_program_compiler*)compiler;
+	struct r300_vertex_program_code * vs = c->code;
+	unsigned instrcount = vs->length / 4;
+	unsigned i;
+
+	fprintf(stderr, "Final vertex program code:\n");
+
+	for(i = 0; i < instrcount; i++) {
+		unsigned offset = i*4;
+		unsigned src;
+
+		fprintf(stderr, "%d: op: 0x%08x", i, vs->body.d[offset]);
+		r300_vs_op_dump(vs->body.d[offset]);
+
+		for(src = 0; src < 3; ++src) {
+			fprintf(stderr, " src%i: 0x%08x", src, vs->body.d[offset+1+src]);
+			r300_vs_src_dump(vs->body.d[offset+1+src]);
+		}
+	}
+
+	fprintf(stderr, "Flow Control Ops: 0x%08x\n",vs->fc_ops);
+	for(i = 0; i < vs->num_fc_ops; i++) {
+		switch((vs->fc_ops >> (i * 2)) & 0x3 ) {
+		case 0: fprintf(stderr, "NOP"); break;
+		case 1: fprintf(stderr, "JUMP"); break;
+		case 2: fprintf(stderr, "LOOP"); break;
+		case 3: fprintf(stderr, "JSR"); break;
+		}
+		if (c->Base.is_r500) {
+			fprintf(stderr,": uw-> 0x%08x lw-> 0x%08x\n",
+				vs->fc_op_addrs.r500[i].uw,
+				vs->fc_op_addrs.r500[i].lw);
+		} else {
+			fprintf(stderr,": 0x%08x\n", vs->fc_op_addrs.r300[i]);
+		}
+	}
+}
diff --git a/src/gallium/drivers/r300/compiler/r500_fragprog.c b/src/gallium/drivers/r300/compiler/r500_fragprog.c
new file mode 100644
index 0000000..cf99f5e
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/r500_fragprog.c
@@ -0,0 +1,539 @@
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "r500_fragprog.h"
+
+#include <stdio.h>
+
+#include "radeon_compiler_util.h"
+#include "radeon_list.h"
+#include "radeon_variable.h"
+#include "../r300_reg.h"
+
+/**
+ * Rewrite IF instructions to use the ALU result special register.
+ */
+int r500_transform_IF(
+	struct radeon_compiler * c,
+	struct rc_instruction * inst_if,
+	void *data)
+{
+	struct rc_variable * writer;
+	struct rc_list * writer_list, * list_ptr;
+	struct rc_list * var_list = rc_get_variables(c);
+	unsigned int generic_if = 0;
+	unsigned int alu_chan;
+
+	if (inst_if->U.I.Opcode != RC_OPCODE_IF) {
+		return 0;
+	}
+
+	writer_list = rc_variable_list_get_writers(
+			var_list, inst_if->Type, &inst_if->U.I.SrcReg[0]);
+	if (!writer_list) {
+		generic_if = 1;
+	} else {
+
+		/* Make sure it is safe for the writers to write to
+		 * ALU Result */
+		for (list_ptr = writer_list; list_ptr;
+						list_ptr = list_ptr->Next) {
+			struct rc_instruction * inst;
+			writer = list_ptr->Item;
+			/* We are going to modify the destination register
+			 * of writer, so if it has a reader other than
+			 * inst_if (aka ReaderCount > 1) we must fall back to
+			 * our generic IF.
+			 * If the writer has a lower IP than inst_if, this
+			 * means that inst_if is above the writer in a loop.
+			 * I'm not sure why this would ever happen, but
+			 * if it does we want to make sure we fall back
+			 * to our generic IF. */
+			if (writer->ReaderCount > 1 || writer->Inst->IP < inst_if->IP) {
+				generic_if = 1;
+				break;
+			}
+
+			/* The ALU Result is not preserved across IF
+			 * instructions, so if there is another IF
+			 * instruction between writer and inst_if, then
+			 * we need to fall back to generic IF. */
+			for (inst = writer->Inst; inst != inst_if; inst = inst->Next) {
+				const struct rc_opcode_info * info =
+					rc_get_opcode_info(inst->U.I.Opcode);
+				if (info->IsFlowControl) {
+					generic_if = 1;
+					break;
+				}
+			}
+			if (generic_if) {
+				break;
+			}
+		}
+	}
+
+	if (GET_SWZ(inst_if->U.I.SrcReg[0].Swizzle, 0) == RC_SWIZZLE_X) {
+		alu_chan = RC_ALURESULT_X;
+	} else {
+		alu_chan = RC_ALURESULT_W;
+	}
+	if (generic_if) {
+		struct rc_instruction * inst_mov =
+				rc_insert_new_instruction(c, inst_if->Prev);
+
+		inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+		inst_mov->U.I.DstReg.WriteMask = 0;
+		inst_mov->U.I.DstReg.File = RC_FILE_NONE;
+		inst_mov->U.I.ALUResultCompare = RC_COMPARE_FUNC_NOTEQUAL;
+		inst_mov->U.I.WriteALUResult = alu_chan;
+		inst_mov->U.I.SrcReg[0] = inst_if->U.I.SrcReg[0];
+		if (alu_chan == RC_ALURESULT_X) {
+			inst_mov->U.I.SrcReg[0].Swizzle = combine_swizzles4(
+					inst_mov->U.I.SrcReg[0].Swizzle,
+					RC_SWIZZLE_X, RC_SWIZZLE_UNUSED,
+					RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED);
+		} else {
+			inst_mov->U.I.SrcReg[0].Swizzle = combine_swizzles4(
+					inst_mov->U.I.SrcReg[0].Swizzle,
+					RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED,
+					RC_SWIZZLE_UNUSED, RC_SWIZZLE_Z);
+		}
+	} else {
+		rc_compare_func compare_func = RC_COMPARE_FUNC_NEVER;
+		unsigned int reverse_srcs = 0;
+		unsigned int preserve_opcode = 0;
+		for (list_ptr = writer_list; list_ptr;
+						list_ptr = list_ptr->Next) {
+			writer = list_ptr->Item;
+			switch(writer->Inst->U.I.Opcode) {
+			case RC_OPCODE_SEQ:
+				compare_func = RC_COMPARE_FUNC_EQUAL;
+				break;
+			case RC_OPCODE_SNE:
+				compare_func = RC_COMPARE_FUNC_NOTEQUAL;
+				break;
+			case RC_OPCODE_SLE:
+				reverse_srcs = 1;
+				/* Fall through */
+			case RC_OPCODE_SGE:
+				compare_func = RC_COMPARE_FUNC_GEQUAL;
+				break;
+			case RC_OPCODE_SGT:
+				reverse_srcs = 1;
+				/* Fall through */
+			case RC_OPCODE_SLT:
+				compare_func = RC_COMPARE_FUNC_LESS;
+				break;
+			default:
+				compare_func = RC_COMPARE_FUNC_NOTEQUAL;
+				preserve_opcode = 1;
+				break;
+			}
+			if (!preserve_opcode) {
+				writer->Inst->U.I.Opcode = RC_OPCODE_SUB;
+			}
+			writer->Inst->U.I.DstReg.WriteMask = 0;
+			writer->Inst->U.I.DstReg.File = RC_FILE_NONE;
+			writer->Inst->U.I.WriteALUResult = alu_chan;
+			writer->Inst->U.I.ALUResultCompare = compare_func;
+			if (reverse_srcs) {
+				struct rc_src_register temp_src;
+				temp_src = writer->Inst->U.I.SrcReg[0];
+				writer->Inst->U.I.SrcReg[0] =
+					writer->Inst->U.I.SrcReg[1];
+				writer->Inst->U.I.SrcReg[1] = temp_src;
+			}
+		}
+	}
+
+	inst_if->U.I.SrcReg[0].File = RC_FILE_SPECIAL;
+	inst_if->U.I.SrcReg[0].Index = RC_SPECIAL_ALU_RESULT;
+	inst_if->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE(
+				RC_SWIZZLE_X, RC_SWIZZLE_UNUSED,
+				RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED);
+	inst_if->U.I.SrcReg[0].Negate = 0;
+
+	return 1;
+}
+
+static int r500_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
+{
+	unsigned int relevant;
+	int i;
+
+	if (opcode == RC_OPCODE_TEX ||
+	    opcode == RC_OPCODE_TXB ||
+	    opcode == RC_OPCODE_TXP ||
+	    opcode == RC_OPCODE_TXD ||
+	    opcode == RC_OPCODE_TXL ||
+	    opcode == RC_OPCODE_KIL) {
+		if (reg.Abs)
+			return 0;
+
+		if (opcode == RC_OPCODE_KIL && (reg.Swizzle != RC_SWIZZLE_XYZW || reg.Negate != RC_MASK_NONE))
+			return 0;
+
+		for(i = 0; i < 4; ++i) {
+			unsigned int swz = GET_SWZ(reg.Swizzle, i);
+			if (swz == RC_SWIZZLE_UNUSED) {
+				reg.Negate &= ~(1 << i);
+				continue;
+			}
+			if (swz >= 4)
+				return 0;
+		}
+
+		if (reg.Negate)
+			return 0;
+
+		return 1;
+	} else if (opcode == RC_OPCODE_DDX || opcode == RC_OPCODE_DDY) {
+		/* DDX/MDH and DDY/MDV explicitly ignore incoming swizzles;
+		 * if it doesn't fit perfectly into a .xyzw case... */
+		if (reg.Swizzle == RC_SWIZZLE_XYZW && !reg.Abs && !reg.Negate)
+			return 1;
+
+		return 0;
+	} else {
+		/* ALU instructions support almost everything */
+		relevant = 0;
+		for(i = 0; i < 3; ++i) {
+			unsigned int swz = GET_SWZ(reg.Swizzle, i);
+			if (swz != RC_SWIZZLE_UNUSED && swz != RC_SWIZZLE_ZERO)
+				relevant |= 1 << i;
+		}
+		if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant))
+			return 0;
+
+		return 1;
+	}
+}
+
+/**
+ * Split source register access.
+ *
+ * The only thing we *cannot* do in an ALU instruction is per-component
+ * negation.
+ */
+static void r500_swizzle_split(struct rc_src_register src, unsigned int usemask,
+		struct rc_swizzle_split * split)
+{
+	unsigned int negatebase[2] = { 0, 0 };
+	int i;
+
+	for(i = 0; i < 4; ++i) {
+		unsigned int swz = GET_SWZ(src.Swizzle, i);
+		if (swz == RC_SWIZZLE_UNUSED || !GET_BIT(usemask, i))
+			continue;
+		negatebase[GET_BIT(src.Negate, i)] |= 1 << i;
+	}
+
+	split->NumPhases = 0;
+
+	for(i = 0; i <= 1; ++i) {
+		if (!negatebase[i])
+			continue;
+
+		split->Phase[split->NumPhases++] = negatebase[i];
+	}
+}
+
+struct rc_swizzle_caps r500_swizzle_caps = {
+	.IsNative = r500_swizzle_is_native,
+	.Split = r500_swizzle_split
+};
+
+static char *toswiz(int swiz_val) {
+  switch(swiz_val) {
+  case 0: return "R";
+  case 1: return "G";
+  case 2: return "B";
+  case 3: return "A";
+  case 4: return "0";
+  case 5: return "H";
+  case 6: return "1";
+  case 7: return "U";
+  }
+  return NULL;
+}
+
+static char *toop(int op_val)
+{
+  char *str = NULL;
+  switch (op_val) {
+  case 0: str = "MAD"; break;
+  case 1: str = "DP3"; break;
+  case 2: str = "DP4"; break;
+  case 3: str = "D2A"; break;
+  case 4: str = "MIN"; break;
+  case 5: str = "MAX"; break;
+  case 6: str = "Reserved"; break;
+  case 7: str = "CND"; break;
+  case 8: str = "CMP"; break;
+  case 9: str = "FRC"; break;
+  case 10: str = "SOP"; break;
+  case 11: str = "MDH"; break;
+  case 12: str = "MDV"; break;
+  }
+  return str;
+}
+
+static char *to_alpha_op(int op_val)
+{
+  char *str = NULL;
+  switch (op_val) {
+  case 0: str = "MAD"; break;
+  case 1: str = "DP"; break;
+  case 2: str = "MIN"; break;
+  case 3: str = "MAX"; break;
+  case 4: str = "Reserved"; break;
+  case 5: str = "CND"; break;
+  case 6: str = "CMP"; break;
+  case 7: str = "FRC"; break;
+  case 8: str = "EX2"; break;
+  case 9: str = "LN2"; break;
+  case 10: str = "RCP"; break;
+  case 11: str = "RSQ"; break;
+  case 12: str = "SIN"; break;
+  case 13: str = "COS"; break;
+  case 14: str = "MDH"; break;
+  case 15: str = "MDV"; break;
+  }
+  return str;
+}
+
+static char *to_mask(int val)
+{
+  char *str = NULL;
+  switch(val) {
+  case 0: str = "NONE"; break;
+  case 1: str = "R"; break;
+  case 2: str = "G"; break;
+  case 3: str = "RG"; break;
+  case 4: str = "B"; break;
+  case 5: str = "RB"; break;
+  case 6: str = "GB"; break;
+  case 7: str = "RGB"; break;
+  case 8: str = "A"; break;
+  case 9: str = "AR"; break;
+  case 10: str = "AG"; break;
+  case 11: str = "ARG"; break;
+  case 12: str = "AB"; break;
+  case 13: str = "ARB"; break;
+  case 14: str = "AGB"; break;
+  case 15: str = "ARGB"; break;
+  }
+  return str;
+}
+
+static char *to_texop(int val)
+{
+  switch(val) {
+  case 0: return "NOP";
+  case 1: return "LD";
+  case 2: return "TEXKILL";
+  case 3: return "PROJ";
+  case 4: return "LODBIAS";
+  case 5: return "LOD";
+  case 6: return "DXDY";
+  }
+  return NULL;
+}
+
+void r500FragmentProgramDump(struct radeon_compiler *c, void *user)
+{
+  struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c;
+  struct r500_fragment_program_code *code = &compiler->code->code.r500;
+  int n, i;
+  uint32_t inst;
+  uint32_t inst0;
+  char *str = NULL;
+  fprintf(stderr, "R500 Fragment Program:\n--------\n");
+
+  for (n = 0; n < code->inst_end+1; n++) {
+    inst0 = inst = code->inst[n].inst0;
+    fprintf(stderr,"%d\t0:CMN_INST   0x%08x:", n, inst);
+    switch(inst & 0x3) {
+    case R500_INST_TYPE_ALU: str = "ALU"; break;
+    case R500_INST_TYPE_OUT: str = "OUT"; break;
+    case R500_INST_TYPE_FC: str = "FC"; break;
+    case R500_INST_TYPE_TEX: str = "TEX"; break;
+    };
+    fprintf(stderr,"%s %s %s %s %s ", str,
+	    inst & R500_INST_TEX_SEM_WAIT ? "TEX_WAIT" : "",
+	    inst & R500_INST_LAST ? "LAST" : "",
+	    inst & R500_INST_NOP ? "NOP" : "",
+	    inst & R500_INST_ALU_WAIT ? "ALU WAIT" : "");
+    fprintf(stderr,"wmask: %s omask: %s\n", to_mask((inst >> 11) & 0xf),
+	    to_mask((inst >> 15) & 0xf));
+
+    switch(inst0 & 0x3) {
+    case R500_INST_TYPE_ALU:
+    case R500_INST_TYPE_OUT:
+      fprintf(stderr,"\t1:RGB_ADDR   0x%08x:", code->inst[n].inst1);
+      inst = code->inst[n].inst1;
+
+      fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
+	      inst & 0xff, (inst & (1<<8)) ? 'c' : 't',
+	      (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't',
+	      (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't',
+	      (inst >> 30));
+
+      fprintf(stderr,"\t2:ALPHA_ADDR 0x%08x:", code->inst[n].inst2);
+      inst = code->inst[n].inst2;
+      fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
+	      inst & 0xff, (inst & (1<<8)) ? 'c' : 't',
+	      (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't',
+	      (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't',
+	      (inst >> 30));
+      fprintf(stderr,"\t3 RGB_INST:  0x%08x:", code->inst[n].inst3);
+      inst = code->inst[n].inst3;
+      fprintf(stderr,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d targ: %d\n",
+	      (inst) & 0x3, toswiz((inst >> 2) & 0x7), toswiz((inst >> 5) & 0x7), toswiz((inst >> 8) & 0x7),
+	      (inst >> 11) & 0x3,
+	      (inst >> 13) & 0x3, toswiz((inst >> 15) & 0x7), toswiz((inst >> 18) & 0x7), toswiz((inst >> 21) & 0x7),
+	      (inst >> 24) & 0x3, (inst >> 29) & 0x3);
+
+
+      fprintf(stderr,"\t4 ALPHA_INST:0x%08x:", code->inst[n].inst4);
+      inst = code->inst[n].inst4;
+      fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d targ %d w:%d\n", to_alpha_op(inst & 0xf),
+	      (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"",
+	      (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), (inst >> 17) & 0x3,
+	      (inst >> 19) & 0x3, toswiz((inst >> 21) & 0x7), (inst >> 24) & 0x3,
+	      (inst >> 29) & 0x3,
+	      (inst >> 31) & 0x1);
+
+      fprintf(stderr,"\t5 RGBA_INST: 0x%08x:", code->inst[n].inst5);
+      inst = code->inst[n].inst5;
+      fprintf(stderr,"%s dest:%d%s rgb_C_src:%d %s/%s/%s %d alp_C_src:%d %s %d\n", toop(inst & 0xf),
+	      (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"",
+	      (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), toswiz((inst >> 17) & 0x7), toswiz((inst >> 20) & 0x7),
+	      (inst >> 23) & 0x3,
+	      (inst >> 25) & 0x3, toswiz((inst >> 27) & 0x7), (inst >> 30) & 0x3);
+      break;
+    case R500_INST_TYPE_FC:
+      fprintf(stderr, "\t2:FC_INST    0x%08x:", code->inst[n].inst2);
+      inst = code->inst[n].inst2;
+      /* JUMP_FUNC JUMP_ANY*/
+      fprintf(stderr, "0x%02x %1x ", inst >> 8 & 0xff,
+          (inst & R500_FC_JUMP_ANY) >> 5);
+      
+      /* OP */
+      switch(inst & 0x7){
+      case R500_FC_OP_JUMP:
+      	fprintf(stderr, "JUMP");
+        break;
+      case R500_FC_OP_LOOP:
+        fprintf(stderr, "LOOP");
+        break;
+      case R500_FC_OP_ENDLOOP:
+        fprintf(stderr, "ENDLOOP");
+        break;
+      case R500_FC_OP_REP:
+        fprintf(stderr, "REP");
+        break;
+      case R500_FC_OP_ENDREP:
+        fprintf(stderr, "ENDREP");
+        break;
+      case R500_FC_OP_BREAKLOOP:
+        fprintf(stderr, "BREAKLOOP");
+        break;
+      case R500_FC_OP_BREAKREP:
+        fprintf(stderr, "BREAKREP");
+	break;
+      case R500_FC_OP_CONTINUE:
+        fprintf(stderr, "CONTINUE");
+        break;
+      }
+      fprintf(stderr," "); 
+      /* A_OP */
+      switch(inst & (0x3 << 6)){
+      case R500_FC_A_OP_NONE:
+        fprintf(stderr, "NONE");
+        break;
+      case R500_FC_A_OP_POP:
+	fprintf(stderr, "POP");
+        break;
+      case R500_FC_A_OP_PUSH:
+        fprintf(stderr, "PUSH");
+        break;
+      }
+      /* B_OP0 B_OP1 */
+      for(i=0; i<2; i++){
+        fprintf(stderr, " ");
+        switch(inst & (0x3 << (24 + (i * 2)))){
+        /* R500_FC_B_OP0_NONE 
+	 * R500_FC_B_OP1_NONE */
+	case 0:
+          fprintf(stderr, "NONE");
+          break;
+        case R500_FC_B_OP0_DECR:
+        case R500_FC_B_OP1_DECR:
+          fprintf(stderr, "DECR");
+          break;
+        case R500_FC_B_OP0_INCR:
+        case R500_FC_B_OP1_INCR:
+          fprintf(stderr, "INCR");
+          break;
+        }
+      }
+      /*POP_CNT B_ELSE */
+      fprintf(stderr, " %d %1x", (inst >> 16) & 0x1f, (inst & R500_FC_B_ELSE) >> 4);
+      inst = code->inst[n].inst3;
+      /* JUMP_ADDR */
+      fprintf(stderr, " %d", inst >> 16);
+      
+      if(code->inst[n].inst2 & R500_FC_IGNORE_UNCOVERED){
+        fprintf(stderr, " IGN_UNC");
+      }
+      inst = code->inst[n].inst3;
+      fprintf(stderr, "\n\t3:FC_ADDR    0x%08x:", inst);
+      fprintf(stderr, "BOOL: 0x%02x, INT: 0x%02x, JUMP_ADDR: %d, JMP_GLBL: %1x\n",
+      inst & 0x1f, (inst >> 8) & 0x1f, (inst >> 16) & 0x1ff, inst >> 31); 
+      break;
+    case R500_INST_TYPE_TEX:
+      inst = code->inst[n].inst1;
+      fprintf(stderr,"\t1:TEX_INST:  0x%08x: id: %d op:%s, %s, %s %s\n", inst, (inst >> 16) & 0xf,
+	      to_texop((inst >> 22) & 0x7), (inst & (1<<25)) ? "ACQ" : "",
+	      (inst & (1<<26)) ? "IGNUNC" : "", (inst & (1<<27)) ? "UNSCALED" : "SCALED");
+      inst = code->inst[n].inst2;
+      fprintf(stderr,"\t2:TEX_ADDR:  0x%08x: src: %d%s %s/%s/%s/%s dst: %d%s %s/%s/%s/%s\n", inst,
+	      inst & 127, inst & (1<<7) ? "(rel)" : "",
+	      toswiz((inst >> 8) & 0x3), toswiz((inst >> 10) & 0x3),
+	      toswiz((inst >> 12) & 0x3), toswiz((inst >> 14) & 0x3),
+	      (inst >> 16) & 127, inst & (1<<23) ? "(rel)" : "",
+	      toswiz((inst >> 24) & 0x3), toswiz((inst >> 26) & 0x3),
+	      toswiz((inst >> 28) & 0x3), toswiz((inst >> 30) & 0x3));
+
+      fprintf(stderr,"\t3:TEX_DXDY:  0x%08x\n", code->inst[n].inst3);
+      break;
+    }
+    fprintf(stderr,"\n");
+  }
+
+}
diff --git a/src/gallium/drivers/r300/compiler/r500_fragprog.h b/src/gallium/drivers/r300/compiler/r500_fragprog.h
new file mode 100644
index 0000000..6aa448c
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/r500_fragprog.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2005 Ben Skeggs.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/*
+ * Authors:
+ *   Ben Skeggs <darktama@iinet.net.au>
+ *   Jerome Glisse <j.glisse@gmail.com>
+ */
+#ifndef __R500_FRAGPROG_H_
+#define __R500_FRAGPROG_H_
+
+#include "radeon_compiler.h"
+#include "radeon_swizzle.h"
+
+extern void r500BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user);
+
+extern void r500FragmentProgramDump(struct radeon_compiler *c, void *user);
+
+extern struct rc_swizzle_caps r500_swizzle_caps;
+
+extern int r500_transform_IF(
+	struct radeon_compiler * c,
+	struct rc_instruction * inst_if,
+	void* data);
+
+#endif
diff --git a/src/gallium/drivers/r300/compiler/r500_fragprog_emit.c b/src/gallium/drivers/r300/compiler/r500_fragprog_emit.c
new file mode 100644
index 0000000..c30cd75
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/r500_fragprog_emit.c
@@ -0,0 +1,678 @@
+/*
+ * Copyright (C) 2005 Ben Skeggs.
+ *
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ * Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * \file
+ *
+ * \author Ben Skeggs <darktama@iinet.net.au>
+ *
+ * \author Jerome Glisse <j.glisse@gmail.com>
+ *
+ * \author Corbin Simpson <MostAwesomeDude@gmail.com>
+ *
+ */
+
+#include "r500_fragprog.h"
+
+#include "../r300_reg.h"
+
+#include "radeon_program_pair.h"
+
+#define PROG_CODE \
+	struct r500_fragment_program_code *code = &c->code->code.r500
+
+#define error(fmt, args...) do {			\
+		rc_error(&c->Base, "%s::%s(): " fmt "\n",	\
+			__FILE__, __FUNCTION__, ##args);	\
+	} while(0)
+
+
+struct branch_info {
+	int If;
+	int Else;
+	int Endif;
+};
+
+struct r500_loop_info {
+	int BgnLoop;
+
+	int BranchDepth;
+	int * Brks;
+	int BrkCount;
+	int BrkReserved;
+
+	int * Conts;
+	int ContCount;
+	int ContReserved;
+};
+
+struct emit_state {
+	struct radeon_compiler * C;
+	struct r500_fragment_program_code * Code;
+
+	struct branch_info * Branches;
+	unsigned int CurrentBranchDepth;
+	unsigned int BranchesReserved;
+
+	struct r500_loop_info * Loops;
+	unsigned int CurrentLoopDepth;
+	unsigned int LoopsReserved;
+
+	unsigned int MaxBranchDepth;
+
+};
+
+static unsigned int translate_rgb_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
+{
+	switch(opcode) {
+	case RC_OPCODE_CMP: return R500_ALU_RGBA_OP_CMP;
+	case RC_OPCODE_CND: return R500_ALU_RGBA_OP_CND;
+	case RC_OPCODE_DDX: return R500_ALU_RGBA_OP_MDH;
+	case RC_OPCODE_DDY: return R500_ALU_RGBA_OP_MDV;
+	case RC_OPCODE_DP3: return R500_ALU_RGBA_OP_DP3;
+	case RC_OPCODE_DP4: return R500_ALU_RGBA_OP_DP4;
+	case RC_OPCODE_FRC: return R500_ALU_RGBA_OP_FRC;
+	default:
+		error("translate_rgb_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name);
+		/* fall through */
+	case RC_OPCODE_NOP:
+		/* fall through */
+	case RC_OPCODE_MAD: return R500_ALU_RGBA_OP_MAD;
+	case RC_OPCODE_MAX: return R500_ALU_RGBA_OP_MAX;
+	case RC_OPCODE_MIN: return R500_ALU_RGBA_OP_MIN;
+	case RC_OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP;
+	}
+}
+
+static unsigned int translate_alpha_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
+{
+	switch(opcode) {
+	case RC_OPCODE_CMP: return R500_ALPHA_OP_CMP;
+	case RC_OPCODE_CND: return R500_ALPHA_OP_CND;
+	case RC_OPCODE_COS: return R500_ALPHA_OP_COS;
+	case RC_OPCODE_DDX: return R500_ALPHA_OP_MDH;
+	case RC_OPCODE_DDY: return R500_ALPHA_OP_MDV;
+	case RC_OPCODE_DP3: return R500_ALPHA_OP_DP;
+	case RC_OPCODE_DP4: return R500_ALPHA_OP_DP;
+	case RC_OPCODE_EX2: return R500_ALPHA_OP_EX2;
+	case RC_OPCODE_FRC: return R500_ALPHA_OP_FRC;
+	case RC_OPCODE_LG2: return R500_ALPHA_OP_LN2;
+	default:
+		error("translate_alpha_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name);
+		/* fall through */
+	case RC_OPCODE_NOP:
+		/* fall through */
+	case RC_OPCODE_MAD: return R500_ALPHA_OP_MAD;
+	case RC_OPCODE_MAX: return R500_ALPHA_OP_MAX;
+	case RC_OPCODE_MIN: return R500_ALPHA_OP_MIN;
+	case RC_OPCODE_RCP: return R500_ALPHA_OP_RCP;
+	case RC_OPCODE_RSQ: return R500_ALPHA_OP_RSQ;
+	case RC_OPCODE_SIN: return R500_ALPHA_OP_SIN;
+	}
+}
+
+static unsigned int fix_hw_swizzle(unsigned int swz)
+{
+    switch (swz) {
+        case RC_SWIZZLE_ZERO:
+        case RC_SWIZZLE_UNUSED:
+            swz = 4;
+            break;
+        case RC_SWIZZLE_HALF:
+            swz = 5;
+            break;
+        case RC_SWIZZLE_ONE:
+            swz = 6;
+            break;
+    }
+
+	return swz;
+}
+
+static unsigned int translate_arg_rgb(struct rc_pair_instruction *inst, int arg)
+{
+	unsigned int t = inst->RGB.Arg[arg].Source;
+	int comp;
+	t |= inst->RGB.Arg[arg].Negate << 11;
+	t |= inst->RGB.Arg[arg].Abs << 12;
+
+	for(comp = 0; comp < 3; ++comp)
+		t |= fix_hw_swizzle(GET_SWZ(inst->RGB.Arg[arg].Swizzle, comp)) << (3*comp + 2);
+
+	return t;
+}
+
+static unsigned int translate_arg_alpha(struct rc_pair_instruction *inst, int i)
+{
+	unsigned int t = inst->Alpha.Arg[i].Source;
+	t |= fix_hw_swizzle(GET_SWZ(inst->Alpha.Arg[i].Swizzle, 0)) << 2;
+	t |= inst->Alpha.Arg[i].Negate << 5;
+	t |= inst->Alpha.Arg[i].Abs << 6;
+	return t;
+}
+
+static uint32_t translate_alu_result_op(struct r300_fragment_program_compiler * c, rc_compare_func func)
+{
+	switch(func) {
+	case RC_COMPARE_FUNC_EQUAL: return R500_INST_ALU_RESULT_OP_EQ;
+	case RC_COMPARE_FUNC_LESS: return R500_INST_ALU_RESULT_OP_LT;
+	case RC_COMPARE_FUNC_GEQUAL: return R500_INST_ALU_RESULT_OP_GE;
+	case RC_COMPARE_FUNC_NOTEQUAL: return R500_INST_ALU_RESULT_OP_NE;
+	default:
+		rc_error(&c->Base, "%s: unsupported compare func %i\n", __FUNCTION__, func);
+		return 0;
+	}
+}
+
+static void use_temporary(struct r500_fragment_program_code* code, unsigned int index)
+{
+	if (index > code->max_temp_idx)
+		code->max_temp_idx = index;
+}
+
+static unsigned int use_source(struct r500_fragment_program_code* code, struct rc_pair_instruction_source src)
+{
+	/* From docs:
+	 *   Note that inline constants set the MSB of ADDR0 and clear ADDR0_CONST.
+	 * MSB = 1 << 7 */
+	if (!src.Used)
+		return 1 << 7;
+
+	if (src.File == RC_FILE_CONSTANT) {
+		return src.Index | R500_RGB_ADDR0_CONST;
+	} else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) {
+		use_temporary(code, src.Index);
+		return src.Index;
+	}
+
+	return 0;
+}
+
+/**
+ * NOP the specified instruction if it is not a texture lookup.
+ */
+static void alu_nop(struct r300_fragment_program_compiler *c, int ip)
+{
+	PROG_CODE;
+
+	if ((code->inst[ip].inst0 & 0x3) != R500_INST_TYPE_TEX) {
+		code->inst[ip].inst0 |= R500_INST_NOP;
+	}
+}
+
+/**
+ * Emit a paired ALU instruction.
+ */
+static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair_instruction *inst)
+{
+	int ip;
+	PROG_CODE;
+
+	if (code->inst_end >= c->Base.max_alu_insts-1) {
+		error("emit_alu: Too many instructions");
+		return;
+	}
+
+	ip = ++code->inst_end;
+
+	/* Quirk: MDH/MDV (DDX/DDY) need a NOP on previous non-TEX instructions. */
+	if (inst->RGB.Opcode == RC_OPCODE_DDX || inst->Alpha.Opcode == RC_OPCODE_DDX ||
+		inst->RGB.Opcode == RC_OPCODE_DDY || inst->Alpha.Opcode == RC_OPCODE_DDY) {
+		if (ip > 0) {
+			alu_nop(c, ip - 1);
+		}
+	}
+
+	code->inst[ip].inst5 = translate_rgb_op(c, inst->RGB.Opcode);
+	code->inst[ip].inst4 = translate_alpha_op(c, inst->Alpha.Opcode);
+
+	if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) {
+		code->inst[ip].inst0 = R500_INST_TYPE_OUT;
+		if (inst->WriteALUResult) {
+			error("Cannot write output and ALU result at the same time");
+			return;
+		}
+	} else {
+		code->inst[ip].inst0 = R500_INST_TYPE_ALU;
+	}
+	code->inst[ip].inst0 |= R500_INST_TEX_SEM_WAIT;
+
+	code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11);
+	code->inst[ip].inst0 |= inst->Alpha.WriteMask ? 1 << 14 : 0;
+	code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18);
+	if (inst->Nop) {
+		code->inst[ip].inst0 |= R500_INST_NOP;
+	}
+	if (inst->Alpha.DepthWriteMask) {
+		code->inst[ip].inst4 |= R500_ALPHA_W_OMASK;
+		c->code->writes_depth = 1;
+	}
+
+	code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex);
+	code->inst[ip].inst5 |= R500_ALU_RGBA_ADDRD(inst->RGB.DestIndex);
+	use_temporary(code, inst->Alpha.DestIndex);
+	use_temporary(code, inst->RGB.DestIndex);
+
+	if (inst->RGB.Saturate)
+		code->inst[ip].inst0 |= R500_INST_RGB_CLAMP;
+	if (inst->Alpha.Saturate)
+		code->inst[ip].inst0 |= R500_INST_ALPHA_CLAMP;
+
+	/* Set the presubtract operation. */
+	switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) {
+		case RC_PRESUB_BIAS:
+			code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_2RGB0;
+			break;
+		case RC_PRESUB_SUB:
+			code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_MINUS_RGB0;
+			break;
+		case RC_PRESUB_ADD:
+			code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_PLUS_RGB0;
+			break;
+		case RC_PRESUB_INV:
+			code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_RGB0;
+			break;
+		default:
+			break;
+	}
+	switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) {
+		case RC_PRESUB_BIAS:
+			code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_2A0;
+			break;
+		case RC_PRESUB_SUB:
+			code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_MINUS_A0;
+			break;
+		case RC_PRESUB_ADD:
+			code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_PLUS_A0;
+			break;
+		case RC_PRESUB_INV:
+			code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_A0;
+			break;
+		default:
+			break;
+	}
+
+	code->inst[ip].inst1 |= R500_RGB_ADDR0(use_source(code, inst->RGB.Src[0]));
+	code->inst[ip].inst1 |= R500_RGB_ADDR1(use_source(code, inst->RGB.Src[1]));
+	code->inst[ip].inst1 |= R500_RGB_ADDR2(use_source(code, inst->RGB.Src[2]));
+
+	code->inst[ip].inst2 |= R500_ALPHA_ADDR0(use_source(code, inst->Alpha.Src[0]));
+	code->inst[ip].inst2 |= R500_ALPHA_ADDR1(use_source(code, inst->Alpha.Src[1]));
+	code->inst[ip].inst2 |= R500_ALPHA_ADDR2(use_source(code, inst->Alpha.Src[2]));
+
+	code->inst[ip].inst3 |= translate_arg_rgb(inst, 0) << R500_ALU_RGB_SEL_A_SHIFT;
+	code->inst[ip].inst3 |= translate_arg_rgb(inst, 1) << R500_ALU_RGB_SEL_B_SHIFT;
+	code->inst[ip].inst5 |= translate_arg_rgb(inst, 2) << R500_ALU_RGBA_SEL_C_SHIFT;
+
+	code->inst[ip].inst4 |= translate_arg_alpha(inst, 0) << R500_ALPHA_SEL_A_SHIFT;
+	code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT;
+	code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT;
+
+	code->inst[ip].inst3 |= R500_ALU_RGB_TARGET(inst->RGB.Target);
+	code->inst[ip].inst4 |= R500_ALPHA_TARGET(inst->Alpha.Target);
+
+	if (inst->WriteALUResult) {
+		code->inst[ip].inst3 |= R500_ALU_RGB_WMASK;
+
+		if (inst->WriteALUResult == RC_ALURESULT_X)
+			code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_RED;
+		else
+			code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_ALPHA;
+
+		code->inst[ip].inst0 |= translate_alu_result_op(c, inst->ALUResultCompare);
+	}
+}
+
+static unsigned int translate_strq_swizzle(unsigned int swizzle)
+{
+	unsigned int swiz = 0;
+	int i;
+	for (i = 0; i < 4; i++)
+		swiz |= (GET_SWZ(swizzle, i) & 0x3) << i*2;
+	return swiz;
+}
+
+/**
+ * Emit a single TEX instruction
+ */
+static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst)
+{
+	int ip;
+	PROG_CODE;
+
+	if (code->inst_end >= c->Base.max_alu_insts-1) {
+		error("emit_tex: Too many instructions");
+		return 0;
+	}
+
+	ip = ++code->inst_end;
+
+	code->inst[ip].inst0 = R500_INST_TYPE_TEX
+		| (inst->DstReg.WriteMask << 11)
+		| R500_INST_TEX_SEM_WAIT;
+	code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit)
+		| R500_TEX_SEM_ACQUIRE;
+
+	if (inst->TexSrcTarget == RC_TEXTURE_RECT)
+		code->inst[ip].inst1 |= R500_TEX_UNSCALED;
+
+	switch (inst->Opcode) {
+	case RC_OPCODE_KIL:
+		code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL;
+		break;
+	case RC_OPCODE_TEX:
+		code->inst[ip].inst1 |= R500_TEX_INST_LD;
+		break;
+	case RC_OPCODE_TXB:
+		code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS;
+		break;
+	case RC_OPCODE_TXP:
+		code->inst[ip].inst1 |= R500_TEX_INST_PROJ;
+		break;
+	case RC_OPCODE_TXD:
+		code->inst[ip].inst1 |= R500_TEX_INST_DXDY;
+		break;
+	case RC_OPCODE_TXL:
+		code->inst[ip].inst1 |= R500_TEX_INST_LOD;
+		break;
+	default:
+		error("emit_tex can't handle opcode %s\n", rc_get_opcode_info(inst->Opcode)->Name);
+	}
+
+	use_temporary(code, inst->SrcReg[0].Index);
+	if (inst->Opcode != RC_OPCODE_KIL)
+		use_temporary(code, inst->DstReg.Index);
+
+	code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index)
+		| (translate_strq_swizzle(inst->SrcReg[0].Swizzle) << 8)
+		| R500_TEX_DST_ADDR(inst->DstReg.Index)
+		| (GET_SWZ(inst->TexSwizzle, 0) << 24)
+		| (GET_SWZ(inst->TexSwizzle, 1) << 26)
+		| (GET_SWZ(inst->TexSwizzle, 2) << 28)
+		| (GET_SWZ(inst->TexSwizzle, 3) << 30)
+		;
+
+	if (inst->Opcode == RC_OPCODE_TXD) {
+		use_temporary(code, inst->SrcReg[1].Index);
+		use_temporary(code, inst->SrcReg[2].Index);
+
+		/* DX and DY parameters are specified in a separate register. */
+		code->inst[ip].inst3 =
+			R500_DX_ADDR(inst->SrcReg[1].Index) |
+			(translate_strq_swizzle(inst->SrcReg[1].Swizzle) << 8) |
+			R500_DY_ADDR(inst->SrcReg[2].Index) |
+			(translate_strq_swizzle(inst->SrcReg[2].Swizzle) << 24);
+	}
+
+	return 1;
+}
+
+static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst)
+{
+	unsigned int newip;
+
+	if (s->Code->inst_end >= s->C->max_alu_insts-1) {
+		rc_error(s->C, "emit_tex: Too many instructions");
+		return;
+	}
+
+	newip = ++s->Code->inst_end;
+
+	/* Currently all loops use the same integer constant to intialize
+	 * the loop variables. */
+	if(!s->Code->int_constants[0]) {
+		s->Code->int_constants[0] = R500_FC_INT_CONST_KR(0xff);
+		s->Code->int_constant_count = 1;
+	}
+	s->Code->inst[newip].inst0 = R500_INST_TYPE_FC | R500_INST_ALU_WAIT;
+
+	switch(inst->U.I.Opcode){
+	struct branch_info * branch;
+	struct r500_loop_info * loop;
+	case RC_OPCODE_BGNLOOP:
+		memory_pool_array_reserve(&s->C->Pool, struct r500_loop_info,
+			s->Loops, s->CurrentLoopDepth, s->LoopsReserved, 1);
+
+		loop = &s->Loops[s->CurrentLoopDepth++];
+		memset(loop, 0, sizeof(struct r500_loop_info));
+		loop->BranchDepth = s->CurrentBranchDepth;
+		loop->BgnLoop = newip;
+
+		s->Code->inst[newip].inst2 = R500_FC_OP_LOOP
+			| R500_FC_JUMP_FUNC(0x00)
+			| R500_FC_IGNORE_UNCOVERED
+			;
+		break;
+	case RC_OPCODE_BRK:
+		loop = &s->Loops[s->CurrentLoopDepth - 1];
+		memory_pool_array_reserve(&s->C->Pool, int, loop->Brks,
+					loop->BrkCount, loop->BrkReserved, 1);
+
+		loop->Brks[loop->BrkCount++] = newip;
+		s->Code->inst[newip].inst2 = R500_FC_OP_BREAKLOOP
+			| R500_FC_JUMP_FUNC(0xff)
+			| R500_FC_B_OP1_DECR
+			| R500_FC_B_POP_CNT(
+				s->CurrentBranchDepth - loop->BranchDepth)
+			| R500_FC_IGNORE_UNCOVERED
+			;
+		break;
+
+	case RC_OPCODE_CONT:
+		loop = &s->Loops[s->CurrentLoopDepth - 1];
+		memory_pool_array_reserve(&s->C->Pool, int, loop->Conts,
+					loop->ContCount, loop->ContReserved, 1);
+		loop->Conts[loop->ContCount++] = newip;
+		s->Code->inst[newip].inst2 = R500_FC_OP_CONTINUE
+			| R500_FC_JUMP_FUNC(0xff)
+			| R500_FC_B_OP1_DECR
+			| R500_FC_B_POP_CNT(
+				s->CurrentBranchDepth -	loop->BranchDepth)
+			| R500_FC_IGNORE_UNCOVERED
+			;
+		break;
+
+	case RC_OPCODE_ENDLOOP:
+	{
+		loop = &s->Loops[s->CurrentLoopDepth - 1];
+		/* Emit ENDLOOP */
+		s->Code->inst[newip].inst2 = R500_FC_OP_ENDLOOP
+			| R500_FC_JUMP_FUNC(0xff)
+			| R500_FC_JUMP_ANY
+			| R500_FC_IGNORE_UNCOVERED
+			;
+		/* The constant integer at index 0 is used by all loops. */
+		s->Code->inst[newip].inst3 = R500_FC_INT_ADDR(0)
+			| R500_FC_JUMP_ADDR(loop->BgnLoop + 1)
+			;
+
+		/* Set jump address and int constant for BGNLOOP */
+		s->Code->inst[loop->BgnLoop].inst3 = R500_FC_INT_ADDR(0)
+			| R500_FC_JUMP_ADDR(newip)
+			;
+
+		/* Set jump address for the BRK instructions. */
+		while(loop->BrkCount--) {
+			s->Code->inst[loop->Brks[loop->BrkCount]].inst3 =
+						R500_FC_JUMP_ADDR(newip + 1);
+		}
+
+		/* Set jump address for CONT instructions. */
+		while(loop->ContCount--) {
+			s->Code->inst[loop->Conts[loop->ContCount]].inst3 =
+						R500_FC_JUMP_ADDR(newip);
+		}
+		s->CurrentLoopDepth--;
+		break;
+	}
+	case RC_OPCODE_IF:
+		if ( s->CurrentBranchDepth >= R500_PFS_MAX_BRANCH_DEPTH_FULL) {
+			rc_error(s->C, "Branch depth exceeds hardware limit");
+			return;
+		}
+		memory_pool_array_reserve(&s->C->Pool, struct branch_info,
+				s->Branches, s->CurrentBranchDepth, s->BranchesReserved, 1);
+
+		branch = &s->Branches[s->CurrentBranchDepth++];
+		branch->If = newip;
+		branch->Else = -1;
+		branch->Endif = -1;
+
+		if (s->CurrentBranchDepth > s->MaxBranchDepth)
+			s->MaxBranchDepth = s->CurrentBranchDepth;
+
+		/* actual instruction is filled in at ENDIF time */
+		break;
+	
+	case RC_OPCODE_ELSE:
+		if (!s->CurrentBranchDepth) {
+			rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__);
+			return;
+		}
+
+		branch = &s->Branches[s->CurrentBranchDepth - 1];
+		branch->Else = newip;
+
+		/* actual instruction is filled in at ENDIF time */
+		break;
+
+	case RC_OPCODE_ENDIF:
+		if (!s->CurrentBranchDepth) {
+			rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__);
+			return;
+		}
+
+		branch = &s->Branches[s->CurrentBranchDepth - 1];
+		branch->Endif = newip;
+
+		s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP
+			| R500_FC_A_OP_NONE /* no address stack */
+			| R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */
+			| R500_FC_B_OP0_DECR /* decrement branch counter if stay */
+			| R500_FC_B_OP1_NONE /* no branch counter if stay */
+			| R500_FC_B_POP_CNT(1)
+			;
+		s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
+		s->Code->inst[branch->If].inst2 = R500_FC_OP_JUMP
+			| R500_FC_A_OP_NONE /* no address stack */
+			| R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */
+			| R500_FC_B_OP0_INCR /* increment branch counter if stay */
+			| R500_FC_IGNORE_UNCOVERED
+		;
+
+		if (branch->Else >= 0) {
+			/* increment branch counter also if jump */
+			s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_INCR;
+			s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Else + 1);
+
+			s->Code->inst[branch->Else].inst2 = R500_FC_OP_JUMP
+				| R500_FC_A_OP_NONE /* no address stack */
+				| R500_FC_B_ELSE /* all active pixels want to jump */
+				| R500_FC_B_OP0_NONE /* no counter op if stay */
+				| R500_FC_B_OP1_DECR /* decrement branch counter if jump */
+				| R500_FC_B_POP_CNT(1)
+			;
+			s->Code->inst[branch->Else].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
+		} else {
+			/* don't touch branch counter on jump */
+			s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_NONE;
+			s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
+		}
+
+
+		s->CurrentBranchDepth--;
+		break;
+	default:
+		rc_error(s->C, "%s: unknown opcode %s\n", __FUNCTION__, rc_get_opcode_info(inst->U.I.Opcode)->Name);
+	}
+}
+
+void r500BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)
+{
+	struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c;
+	struct emit_state s;
+	struct r500_fragment_program_code *code = &compiler->code->code.r500;
+
+	memset(&s, 0, sizeof(s));
+	s.C = &compiler->Base;
+	s.Code = code;
+
+	memset(code, 0, sizeof(*code));
+	code->max_temp_idx = 1;
+	code->inst_end = -1;
+
+	for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next;
+	    inst != &compiler->Base.Program.Instructions && !compiler->Base.Error;
+	    inst = inst->Next) {
+		if (inst->Type == RC_INSTRUCTION_NORMAL) {
+			const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+			if (opcode->IsFlowControl) {
+				emit_flowcontrol(&s, inst);
+			} else if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) {
+				continue;
+			} else {
+				emit_tex(compiler, &inst->U.I);
+			}
+		} else {
+			emit_paired(compiler, &inst->U.P);
+		}
+	}
+
+	if (code->max_temp_idx >= compiler->Base.max_temp_regs)
+		rc_error(&compiler->Base, "Too many hardware temporaries used");
+
+	if (compiler->Base.Error)
+		return;
+
+	if (code->inst_end == -1 ||
+	    (code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) {
+		int ip;
+
+		/* This may happen when dead-code elimination is disabled or
+		 * when most of the fragment program logic is leading to a KIL */
+		if (code->inst_end >= compiler->Base.max_alu_insts-1) {
+			rc_error(&compiler->Base, "Introducing fake OUT: Too many instructions");
+			return;
+		}
+
+		ip = ++code->inst_end;
+		code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT;
+	}
+
+	/* Enable full flow control mode if we are using loops or have if
+	 * statements nested at least four deep. */
+	if (s.MaxBranchDepth >= 4 || s.LoopsReserved > 0) {
+		if (code->max_temp_idx < 1)
+			code->max_temp_idx = 1;
+
+		code->us_fc_ctrl |= R500_FC_FULL_FC_EN;
+	}
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_code.c b/src/gallium/drivers/r300/compiler/radeon_code.c
new file mode 100644
index 0000000..6842fb8
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_code.c
@@ -0,0 +1,187 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_code.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "radeon_program.h"
+
+void rc_constants_init(struct rc_constant_list * c)
+{
+	memset(c, 0, sizeof(*c));
+}
+
+/**
+ * Copy a constants structure, assuming that the destination structure
+ * is not initialized.
+ */
+void rc_constants_copy(struct rc_constant_list * dst, struct rc_constant_list * src)
+{
+	dst->Constants = malloc(sizeof(struct rc_constant) * src->Count);
+	memcpy(dst->Constants, src->Constants, sizeof(struct rc_constant) * src->Count);
+	dst->Count = src->Count;
+	dst->_Reserved = src->Count;
+}
+
+void rc_constants_destroy(struct rc_constant_list * c)
+{
+	free(c->Constants);
+	memset(c, 0, sizeof(*c));
+}
+
+unsigned rc_constants_add(struct rc_constant_list * c, struct rc_constant * constant)
+{
+	unsigned index = c->Count;
+
+	if (c->Count >= c->_Reserved) {
+		struct rc_constant * newlist;
+
+		c->_Reserved = c->_Reserved * 2;
+		if (!c->_Reserved)
+			c->_Reserved = 16;
+
+		newlist = malloc(sizeof(struct rc_constant) * c->_Reserved);
+		memcpy(newlist, c->Constants, sizeof(struct rc_constant) * c->Count);
+
+		free(c->Constants);
+		c->Constants = newlist;
+	}
+
+	c->Constants[index] = *constant;
+	c->Count++;
+
+	return index;
+}
+
+
+/**
+ * Add a state vector to the constant list, while trying to avoid duplicates.
+ */
+unsigned rc_constants_add_state(struct rc_constant_list * c, unsigned state0, unsigned state1)
+{
+	unsigned index;
+	struct rc_constant constant;
+
+	for(index = 0; index < c->Count; ++index) {
+		if (c->Constants[index].Type == RC_CONSTANT_STATE) {
+			if (c->Constants[index].u.State[0] == state0 &&
+			    c->Constants[index].u.State[1] == state1)
+				return index;
+		}
+	}
+
+	memset(&constant, 0, sizeof(constant));
+	constant.Type = RC_CONSTANT_STATE;
+	constant.Size = 4;
+	constant.u.State[0] = state0;
+	constant.u.State[1] = state1;
+
+	return rc_constants_add(c, &constant);
+}
+
+
+/**
+ * Add an immediate vector to the constant list, while trying to avoid
+ * duplicates.
+ */
+unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const float * data)
+{
+	unsigned index;
+	struct rc_constant constant;
+
+	for(index = 0; index < c->Count; ++index) {
+		if (c->Constants[index].Type == RC_CONSTANT_IMMEDIATE) {
+			if (!memcmp(c->Constants[index].u.Immediate, data, sizeof(float)*4))
+				return index;
+		}
+	}
+
+	memset(&constant, 0, sizeof(constant));
+	constant.Type = RC_CONSTANT_IMMEDIATE;
+	constant.Size = 4;
+	memcpy(constant.u.Immediate, data, sizeof(float) * 4);
+
+	return rc_constants_add(c, &constant);
+}
+
+
+/**
+ * Add an immediate scalar to the constant list, while trying to avoid
+ * duplicates.
+ */
+unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle)
+{
+	unsigned index;
+	int free_index = -1;
+	struct rc_constant constant;
+
+	for(index = 0; index < c->Count; ++index) {
+		if (c->Constants[index].Type == RC_CONSTANT_IMMEDIATE) {
+			unsigned comp;
+			for(comp = 0; comp < c->Constants[index].Size; ++comp) {
+				if (c->Constants[index].u.Immediate[comp] == data) {
+					*swizzle = RC_MAKE_SWIZZLE_SMEAR(comp);
+					return index;
+				}
+			}
+
+			if (c->Constants[index].Size < 4)
+				free_index = index;
+		}
+	}
+
+	if (free_index >= 0) {
+		unsigned comp = c->Constants[free_index].Size++;
+		c->Constants[free_index].u.Immediate[comp] = data;
+		*swizzle = RC_MAKE_SWIZZLE_SMEAR(comp);
+		return free_index;
+	}
+
+	memset(&constant, 0, sizeof(constant));
+	constant.Type = RC_CONSTANT_IMMEDIATE;
+	constant.Size = 1;
+	constant.u.Immediate[0] = data;
+	*swizzle = RC_SWIZZLE_XXXX;
+
+	return rc_constants_add(c, &constant);
+}
+
+void rc_constants_print(struct rc_constant_list * c)
+{
+	unsigned int i;
+	for(i = 0; i < c->Count; i++) {
+		if (c->Constants[i].Type == RC_CONSTANT_IMMEDIATE) {
+			float * values = c->Constants[i].u.Immediate;
+			fprintf(stderr, "CONST[%u] = "
+				"{ %10.4f %10.4f %10.4f %10.4f }\n",
+				i, values[0],values[1], values[2], values[3]);
+		}
+	}
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_code.h b/src/gallium/drivers/r300/compiler/radeon_code.h
new file mode 100644
index 0000000..67e6acf
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_code.h
@@ -0,0 +1,306 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef RADEON_CODE_H
+#define RADEON_CODE_H
+
+#include <stdint.h>
+
+#define R300_PFS_MAX_ALU_INST     64
+#define R300_PFS_MAX_TEX_INST     32
+#define R300_PFS_MAX_TEX_INDIRECT 4
+#define R300_PFS_NUM_TEMP_REGS    32
+#define R300_PFS_NUM_CONST_REGS   32
+
+#define R400_PFS_MAX_ALU_INST     512
+#define R400_PFS_MAX_TEX_INST     512
+
+#define R500_PFS_MAX_INST         512
+#define R500_PFS_NUM_TEMP_REGS    128
+#define R500_PFS_NUM_CONST_REGS   256
+#define R500_PFS_MAX_BRANCH_DEPTH_FULL 32
+#define R500_PFS_MAX_BRANCH_DEPTH_PARTIAL 4
+
+
+#define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0)
+
+enum {
+	/**
+	 * External constants are constants whose meaning is unknown to this
+	 * compiler. For example, a Mesa gl_program's constants are turned
+	 * into external constants.
+	 */
+	RC_CONSTANT_EXTERNAL = 0,
+
+	RC_CONSTANT_IMMEDIATE,
+
+	/**
+	 * Constant referring to state that is known by this compiler,
+	 * see RC_STATE_xxx, i.e. *not* arbitrary Mesa (or other) state.
+	 */
+	RC_CONSTANT_STATE
+};
+
+enum {
+	RC_STATE_SHADOW_AMBIENT = 0,
+
+	RC_STATE_R300_WINDOW_DIMENSION,
+	RC_STATE_R300_TEXRECT_FACTOR,
+	RC_STATE_R300_TEXSCALE_FACTOR,
+	RC_STATE_R300_VIEWPORT_SCALE,
+	RC_STATE_R300_VIEWPORT_OFFSET
+};
+
+struct rc_constant {
+	unsigned Type:2; /**< RC_CONSTANT_xxx */
+	unsigned Size:3;
+
+	union {
+		unsigned External;
+		float Immediate[4];
+		unsigned State[2];
+	} u;
+};
+
+struct rc_constant_list {
+	struct rc_constant * Constants;
+	unsigned Count;
+
+	unsigned _Reserved;
+};
+
+void rc_constants_init(struct rc_constant_list * c);
+void rc_constants_copy(struct rc_constant_list * dst, struct rc_constant_list * src);
+void rc_constants_destroy(struct rc_constant_list * c);
+unsigned rc_constants_add(struct rc_constant_list * c, struct rc_constant * constant);
+unsigned rc_constants_add_state(struct rc_constant_list * c, unsigned state1, unsigned state2);
+unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const float * data);
+unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle);
+void rc_constants_print(struct rc_constant_list * c);
+
+/**
+ * Compare functions.
+ *
+ * \note By design, RC_COMPARE_FUNC_xxx + GL_NEVER gives you
+ * the correct GL compare function.
+ */
+typedef enum {
+	RC_COMPARE_FUNC_NEVER = 0,
+	RC_COMPARE_FUNC_LESS,
+	RC_COMPARE_FUNC_EQUAL,
+	RC_COMPARE_FUNC_LEQUAL,
+	RC_COMPARE_FUNC_GREATER,
+	RC_COMPARE_FUNC_NOTEQUAL,
+	RC_COMPARE_FUNC_GEQUAL,
+	RC_COMPARE_FUNC_ALWAYS
+} rc_compare_func;
+
+/**
+ * Coordinate wrapping modes.
+ *
+ * These are not quite the same as their GL counterparts yet.
+ */
+typedef enum {
+	RC_WRAP_NONE = 0,
+	RC_WRAP_REPEAT,
+	RC_WRAP_MIRRORED_REPEAT,
+	RC_WRAP_MIRRORED_CLAMP
+} rc_wrap_mode;
+
+/**
+ * Stores state that influences the compilation of a fragment program.
+ */
+struct r300_fragment_program_external_state {
+	struct {
+		/**
+		 * This field contains swizzle for some lowering passes
+		 * (shadow comparison, unorm->snorm conversion)
+		 */
+		unsigned texture_swizzle:12;
+
+		/**
+		 * If the sampler is used as a shadow sampler,
+		 * this field specifies the compare function.
+		 *
+		 * Otherwise, this field is \ref RC_COMPARE_FUNC_NEVER (aka 0).
+		 * \sa rc_compare_func
+		 */
+		unsigned texture_compare_func : 3;
+
+		/**
+		 * No matter what the sampler type is,
+		 * this field turns it into a shadow sampler.
+		 */
+		unsigned compare_mode_enabled : 1;
+
+		/**
+		 * If the sampler will receive non-normalized coords,
+		 * this field is set. The scaling factor is given by
+		 * RC_STATE_R300_TEXRECT_FACTOR.
+		 */
+		unsigned non_normalized_coords : 1;
+
+		/**
+		 * This field specifies wrapping modes for the sampler.
+		 *
+		 * If this field is \ref RC_WRAP_NONE (aka 0), no wrapping maths
+		 * will be performed on the coordinates.
+		 */
+		unsigned wrap_mode : 3;
+
+		/**
+		 * The coords are scaled after applying the wrap mode emulation
+		 * and right before texture fetch. The scaling factor is given by
+		 * RC_STATE_R300_TEXSCALE_FACTOR. */
+		unsigned clamp_and_scale_before_fetch : 1;
+
+		/**
+		 * Fetch RGTC1_SNORM or LATC1_SNORM as UNORM and convert UNORM -> SNORM
+		 * in the shader.
+		 */
+		unsigned convert_unorm_to_snorm:1;
+	} unit[16];
+
+	unsigned frag_clamp:1;
+};
+
+
+
+struct r300_fragment_program_node {
+	int tex_offset; /**< first tex instruction */
+	int tex_end; /**< last tex instruction, relative to tex_offset */
+	int alu_offset; /**< first ALU instruction */
+	int alu_end; /**< last ALU instruction, relative to alu_offset */
+	int flags;
+};
+
+/**
+ * Stores an R300 fragment program in its compiled-to-hardware form.
+ */
+struct r300_fragment_program_code {
+	struct {
+		unsigned int length; /**< total # of texture instructions used */
+		uint32_t inst[R400_PFS_MAX_TEX_INST];
+	} tex;
+
+	struct {
+		unsigned int length; /**< total # of ALU instructions used */
+		struct {
+			uint32_t rgb_inst;
+			uint32_t rgb_addr;
+			uint32_t alpha_inst;
+			uint32_t alpha_addr;
+			uint32_t r400_ext_addr;
+		} inst[R400_PFS_MAX_ALU_INST];
+	} alu;
+
+	uint32_t config; /* US_CONFIG */
+	uint32_t pixsize; /* US_PIXSIZE */
+	uint32_t code_offset; /* US_CODE_OFFSET */
+	uint32_t r400_code_offset_ext; /* US_CODE_EXT */
+	uint32_t code_addr[4]; /* US_CODE_ADDR */
+	/*US_CODE_BANK.R390_MODE: Enables 512 instructions and 64 temporaries
+	 * for r400 cards */
+	unsigned int r390_mode:1;
+};
+
+
+struct r500_fragment_program_code {
+	struct {
+		uint32_t inst0;
+		uint32_t inst1;
+		uint32_t inst2;
+		uint32_t inst3;
+		uint32_t inst4;
+		uint32_t inst5;
+	} inst[R500_PFS_MAX_INST];
+
+	int inst_end; /* Number of instructions - 1; also, last instruction to be executed */
+
+	int max_temp_idx;
+
+	uint32_t us_fc_ctrl;
+
+	uint32_t int_constants[32];
+	uint32_t int_constant_count;
+};
+
+struct rX00_fragment_program_code {
+	union {
+		struct r300_fragment_program_code r300;
+		struct r500_fragment_program_code r500;
+	} code;
+
+	unsigned writes_depth:1;
+
+	struct rc_constant_list constants;
+	unsigned *constants_remap_table;
+};
+
+
+#define R300_VS_MAX_ALU		256
+#define R300_VS_MAX_ALU_DWORDS  (R300_VS_MAX_ALU * 4)
+#define R500_VS_MAX_ALU	        1024
+#define R500_VS_MAX_ALU_DWORDS  (R500_VS_MAX_ALU * 4)
+#define R300_VS_MAX_TEMPS	32
+/* This is the max for all chipsets (r300-r500) */
+#define R300_VS_MAX_FC_OPS 16
+/* The r500 maximum depth is not just for loops, but any combination of loops
+ * and subroutine jumps. */
+#define R500_VS_MAX_FC_DEPTH 8
+#define R300_VS_MAX_LOOP_DEPTH 1
+
+#define VSF_MAX_INPUTS 32
+#define VSF_MAX_OUTPUTS 32
+
+struct r300_vertex_program_code {
+	int length;
+	union {
+		uint32_t d[R500_VS_MAX_ALU_DWORDS];
+		float f[R500_VS_MAX_ALU_DWORDS];
+	} body;
+
+	int pos_end;
+	int num_temporaries;	/* Number of temp vars used by program */
+	int inputs[VSF_MAX_INPUTS];
+	int outputs[VSF_MAX_OUTPUTS];
+
+	struct rc_constant_list constants;
+	unsigned *constants_remap_table;
+
+	uint32_t InputsRead;
+	uint32_t OutputsWritten;
+
+	unsigned int num_fc_ops;
+	uint32_t fc_ops;
+	union {
+	        uint32_t r300[R300_VS_MAX_FC_OPS];
+		struct {
+			uint32_t lw;
+			uint32_t uw;
+		} r500[R300_VS_MAX_FC_OPS];
+	} fc_op_addrs;
+	int32_t fc_loop_index[R300_VS_MAX_FC_OPS];
+};
+
+#endif /* RADEON_CODE_H */
+
diff --git a/src/gallium/drivers/r300/compiler/radeon_compiler.c b/src/gallium/drivers/r300/compiler/radeon_compiler.c
new file mode 100644
index 0000000..b793672
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_compiler.c
@@ -0,0 +1,489 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "radeon_compiler.h"
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "radeon_dataflow.h"
+#include "radeon_program.h"
+#include "radeon_program_pair.h"
+#include "radeon_compiler_util.h"
+
+
+void rc_init(struct radeon_compiler * c)
+{
+	memset(c, 0, sizeof(*c));
+
+	memory_pool_init(&c->Pool);
+	c->Program.Instructions.Prev = &c->Program.Instructions;
+	c->Program.Instructions.Next = &c->Program.Instructions;
+	c->Program.Instructions.U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE;
+}
+
+void rc_destroy(struct radeon_compiler * c)
+{
+	rc_constants_destroy(&c->Program.Constants);
+	memory_pool_destroy(&c->Pool);
+	free(c->ErrorMsg);
+}
+
+void rc_debug(struct radeon_compiler * c, const char * fmt, ...)
+{
+	va_list ap;
+
+	if (!(c->Debug & RC_DBG_LOG))
+		return;
+
+	va_start(ap, fmt);
+	vfprintf(stderr, fmt, ap);
+	va_end(ap);
+}
+
+void rc_error(struct radeon_compiler * c, const char * fmt, ...)
+{
+	va_list ap;
+
+	c->Error = 1;
+
+	if (!c->ErrorMsg) {
+		/* Only remember the first error */
+		char buf[1024];
+		int written;
+
+		va_start(ap, fmt);
+		written = vsnprintf(buf, sizeof(buf), fmt, ap);
+		va_end(ap);
+
+		if (written < sizeof(buf)) {
+			c->ErrorMsg = strdup(buf);
+		} else {
+			c->ErrorMsg = malloc(written + 1);
+
+			va_start(ap, fmt);
+			vsnprintf(c->ErrorMsg, written + 1, fmt, ap);
+			va_end(ap);
+		}
+	}
+
+	if (c->Debug & RC_DBG_LOG) {
+		fprintf(stderr, "r300compiler error: ");
+
+		va_start(ap, fmt);
+		vfprintf(stderr, fmt, ap);
+		va_end(ap);
+	}
+}
+
+int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion)
+{
+	rc_error(c, "ICE at %s:%i: assertion failed: %s\n", file, line, assertion);
+	return 1;
+}
+
+/**
+ * Recompute c->Program.InputsRead and c->Program.OutputsWritten
+ * based on which inputs and outputs are actually referenced
+ * in program instructions.
+ */
+void rc_calculate_inputs_outputs(struct radeon_compiler * c)
+{
+	struct rc_instruction *inst;
+
+	c->Program.InputsRead = 0;
+	c->Program.OutputsWritten = 0;
+
+	for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next)
+	{
+		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+		int i;
+
+		for (i = 0; i < opcode->NumSrcRegs; ++i) {
+			if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT)
+				c->Program.InputsRead |= 1 << inst->U.I.SrcReg[i].Index;
+		}
+
+		if (opcode->HasDstReg) {
+			if (inst->U.I.DstReg.File == RC_FILE_OUTPUT)
+				c->Program.OutputsWritten |= 1 << inst->U.I.DstReg.Index;
+		}
+	}
+}
+
+/**
+ * Rewrite the program such that everything that source the given input
+ * register will source new_input instead.
+ */
+void rc_move_input(struct radeon_compiler * c, unsigned input, struct rc_src_register new_input)
+{
+	struct rc_instruction * inst;
+
+	c->Program.InputsRead &= ~(1 << input);
+
+	for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
+		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+		unsigned i;
+
+		for(i = 0; i < opcode->NumSrcRegs; ++i) {
+			if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && inst->U.I.SrcReg[i].Index == input) {
+				inst->U.I.SrcReg[i].File = new_input.File;
+				inst->U.I.SrcReg[i].Index = new_input.Index;
+				inst->U.I.SrcReg[i].Swizzle = combine_swizzles(new_input.Swizzle, inst->U.I.SrcReg[i].Swizzle);
+				if (!inst->U.I.SrcReg[i].Abs) {
+					inst->U.I.SrcReg[i].Negate ^= new_input.Negate;
+					inst->U.I.SrcReg[i].Abs = new_input.Abs;
+				}
+
+				c->Program.InputsRead |= 1 << new_input.Index;
+			}
+		}
+	}
+}
+
+
+/**
+ * Rewrite the program such that everything that writes into the given
+ * output register will instead write to new_output. The new_output
+ * writemask is honoured.
+ */
+void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask)
+{
+	struct rc_instruction * inst;
+
+	c->Program.OutputsWritten &= ~(1 << output);
+
+	for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
+		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+		if (opcode->HasDstReg) {
+			if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) {
+				inst->U.I.DstReg.Index = new_output;
+				inst->U.I.DstReg.WriteMask &= writemask;
+
+				c->Program.OutputsWritten |= 1 << new_output;
+			}
+		}
+	}
+}
+
+
+/**
+ * Rewrite the program such that a given output is duplicated.
+ */
+void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output)
+{
+	unsigned tempreg = rc_find_free_temporary(c);
+	struct rc_instruction * inst;
+
+	for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
+		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+		if (opcode->HasDstReg) {
+			if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) {
+				inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
+				inst->U.I.DstReg.Index = tempreg;
+			}
+		}
+	}
+
+	inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev);
+	inst->U.I.Opcode = RC_OPCODE_MOV;
+	inst->U.I.DstReg.File = RC_FILE_OUTPUT;
+	inst->U.I.DstReg.Index = output;
+
+	inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+	inst->U.I.SrcReg[0].Index = tempreg;
+	inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
+
+	inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev);
+	inst->U.I.Opcode = RC_OPCODE_MOV;
+	inst->U.I.DstReg.File = RC_FILE_OUTPUT;
+	inst->U.I.DstReg.Index = dup_output;
+
+	inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+	inst->U.I.SrcReg[0].Index = tempreg;
+	inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
+
+	c->Program.OutputsWritten |= 1 << dup_output;
+}
+
+
+/**
+ * Introduce standard code fragment to deal with fragment.position.
+ */
+void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input,
+                                int full_vtransform)
+{
+	unsigned tempregi = rc_find_free_temporary(c);
+	struct rc_instruction * inst_rcp;
+	struct rc_instruction * inst_mul;
+	struct rc_instruction * inst_mad;
+	struct rc_instruction * inst;
+
+	c->Program.InputsRead &= ~(1 << wpos);
+	c->Program.InputsRead |= 1 << new_input;
+
+	/* perspective divide */
+	inst_rcp = rc_insert_new_instruction(c, &c->Program.Instructions);
+	inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
+
+	inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
+	inst_rcp->U.I.DstReg.Index = tempregi;
+	inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
+
+	inst_rcp->U.I.SrcReg[0].File = RC_FILE_INPUT;
+	inst_rcp->U.I.SrcReg[0].Index = new_input;
+	inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW;
+
+	inst_mul = rc_insert_new_instruction(c, inst_rcp);
+	inst_mul->U.I.Opcode = RC_OPCODE_MUL;
+
+	inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
+	inst_mul->U.I.DstReg.Index = tempregi;
+	inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+
+	inst_mul->U.I.SrcReg[0].File = RC_FILE_INPUT;
+	inst_mul->U.I.SrcReg[0].Index = new_input;
+
+	inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
+	inst_mul->U.I.SrcReg[1].Index = tempregi;
+	inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
+
+	/* viewport transformation */
+	inst_mad = rc_insert_new_instruction(c, inst_mul);
+	inst_mad->U.I.Opcode = RC_OPCODE_MAD;
+
+	inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
+	inst_mad->U.I.DstReg.Index = tempregi;
+	inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+
+	inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+	inst_mad->U.I.SrcReg[0].Index = tempregi;
+	inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;
+
+	inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
+	inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0;
+
+	inst_mad->U.I.SrcReg[2].File = RC_FILE_CONSTANT;
+	inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZ0;
+
+	if (full_vtransform) {
+		inst_mad->U.I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_SCALE, 0);
+		inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_OFFSET, 0);
+	} else {
+		inst_mad->U.I.SrcReg[1].Index =
+		inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_WINDOW_DIMENSION, 0);
+	}
+
+	for (inst = inst_mad->Next; inst != &c->Program.Instructions; inst = inst->Next) {
+		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+		unsigned i;
+
+		for(i = 0; i < opcode->NumSrcRegs; i++) {
+			if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT &&
+			    inst->U.I.SrcReg[i].Index == wpos) {
+				inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
+				inst->U.I.SrcReg[i].Index = tempregi;
+			}
+		}
+	}
+}
+
+
+/**
+ * The FACE input in hardware contains 1 if it's a back face, 0 otherwise.
+ * Gallium and OpenGL define it the other way around.
+ *
+ * So let's just negate FACE at the beginning of the shader and rewrite the rest
+ * of the shader to read from the newly allocated temporary.
+ */
+void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face)
+{
+	unsigned tempregi = rc_find_free_temporary(c);
+	struct rc_instruction *inst_add;
+	struct rc_instruction *inst;
+
+	/* perspective divide */
+	inst_add = rc_insert_new_instruction(c, &c->Program.Instructions);
+	inst_add->U.I.Opcode = RC_OPCODE_ADD;
+
+	inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY;
+	inst_add->U.I.DstReg.Index = tempregi;
+	inst_add->U.I.DstReg.WriteMask = RC_MASK_X;
+
+	inst_add->U.I.SrcReg[0].File = RC_FILE_NONE;
+	inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;
+
+	inst_add->U.I.SrcReg[1].File = RC_FILE_INPUT;
+	inst_add->U.I.SrcReg[1].Index = face;
+	inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX;
+	inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZW;
+
+	for (inst = inst_add->Next; inst != &c->Program.Instructions; inst = inst->Next) {
+		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+		unsigned i;
+
+		for(i = 0; i < opcode->NumSrcRegs; i++) {
+			if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT &&
+			    inst->U.I.SrcReg[i].Index == face) {
+				inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
+				inst->U.I.SrcReg[i].Index = tempregi;
+			}
+		}
+	}
+}
+
+static void reg_count_callback(void * userdata, struct rc_instruction * inst,
+		rc_register_file file, unsigned int index, unsigned int mask)
+{
+	int *max_reg = userdata;
+	if (file == RC_FILE_TEMPORARY)
+		(int)index > *max_reg ? *max_reg = index : 0;
+}
+
+void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s)
+{
+	int max_reg = -1;
+	struct rc_instruction * tmp;
+	memset(s, 0, sizeof(*s));
+
+	for(tmp = c->Program.Instructions.Next; tmp != &c->Program.Instructions;
+							tmp = tmp->Next){
+		const struct rc_opcode_info * info;
+		rc_for_all_reads_mask(tmp, reg_count_callback, &max_reg);
+		if (tmp->Type == RC_INSTRUCTION_NORMAL) {
+			info = rc_get_opcode_info(tmp->U.I.Opcode);
+			if (info->Opcode == RC_OPCODE_BEGIN_TEX)
+				continue;
+			if (tmp->U.I.PreSub.Opcode != RC_PRESUB_NONE)
+				s->num_presub_ops++;
+		} else {
+			if (tmp->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used)
+				s->num_presub_ops++;
+			if (tmp->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used)
+				s->num_presub_ops++;
+			/* Assuming alpha will never be a flow control or
+			 * a tex instruction. */
+			if (tmp->U.P.Alpha.Opcode != RC_OPCODE_NOP)
+				s->num_alpha_insts++;
+			if (tmp->U.P.RGB.Opcode != RC_OPCODE_NOP)
+				s->num_rgb_insts++;
+			info = rc_get_opcode_info(tmp->U.P.RGB.Opcode);
+		}
+		if (info->IsFlowControl)
+			s->num_fc_insts++;
+		if (info->HasTexture)
+			s->num_tex_insts++;
+		s->num_insts++;
+	}
+	s->num_temp_regs = max_reg + 1;
+}
+
+static void print_stats(struct radeon_compiler * c)
+{
+	struct rc_program_stats s;
+
+	if (c->initial_num_insts <= 5)
+		return;
+
+	rc_get_stats(c, &s);
+
+	switch (c->type) {
+	case RC_VERTEX_PROGRAM:
+		fprintf(stderr,"~~~~~~~~~ VERTEX PROGRAM ~~~~~~~~\n"
+			       "~%4u Instructions\n"
+			       "~%4u Flow Control Instructions\n"
+			       "~%4u Temporary Registers\n"
+			       "~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n",
+			       s.num_insts, s.num_fc_insts, s.num_temp_regs);
+		break;
+
+	case RC_FRAGMENT_PROGRAM:
+		fprintf(stderr,"~~~~~~~~ FRAGMENT PROGRAM ~~~~~~~\n"
+			       "~%4u Instructions\n"
+			       "~%4u Vector Instructions (RGB)\n"
+			       "~%4u Scalar Instructions (Alpha)\n"
+			       "~%4u Flow Control Instructions\n"
+			       "~%4u Texture Instructions\n"
+			       "~%4u Presub Operations\n"
+			       "~%4u Temporary Registers\n"
+			       "~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n",
+			       s.num_insts, s.num_rgb_insts, s.num_alpha_insts,
+			       s.num_fc_insts, s.num_tex_insts, s.num_presub_ops,
+			       s.num_temp_regs);
+		break;
+	default:
+		assert(0);
+	}
+}
+
+static const char *shader_name[RC_NUM_PROGRAM_TYPES] = {
+	"Vertex Program",
+	"Fragment Program"
+};
+
+void rc_run_compiler_passes(struct radeon_compiler *c, struct radeon_compiler_pass *list)
+{
+	for (unsigned i = 0; list[i].name; i++) {
+		if (list[i].predicate) {
+			list[i].run(c, list[i].user);
+
+			if (c->Error)
+				return;
+
+			if ((c->Debug & RC_DBG_LOG) && list[i].dump) {
+				fprintf(stderr, "%s: after '%s'\n", shader_name[c->type], list[i].name);
+				rc_print_program(&c->Program);
+			}
+		}
+	}
+}
+
+/* Executes a list of compiler passes given in the parameter 'list'. */
+void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list)
+{
+	struct rc_program_stats s;
+
+	rc_get_stats(c, &s);
+	c->initial_num_insts = s.num_insts;
+
+	if (c->Debug & RC_DBG_LOG) {
+		fprintf(stderr, "%s: before compilation\n", shader_name[c->type]);
+		rc_print_program(&c->Program);
+	}
+
+	rc_run_compiler_passes(c, list);
+
+	if (c->Debug & RC_DBG_STATS)
+		print_stats(c);
+}
+
+void rc_validate_final_shader(struct radeon_compiler *c, void *user)
+{
+	/* Check the number of constants. */
+	if (c->Program.Constants.Count > c->max_constants) {
+		rc_error(c, "Too many constants. Max: %i, Got: %i\n",
+			 c->max_constants, c->Program.Constants.Count);
+	}
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_compiler.h b/src/gallium/drivers/r300/compiler/radeon_compiler.h
new file mode 100644
index 0000000..74594af
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_compiler.h
@@ -0,0 +1,171 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef RADEON_COMPILER_H
+#define RADEON_COMPILER_H
+
+#include "main/compiler.h"
+
+#include "memory_pool.h"
+#include "radeon_code.h"
+#include "radeon_program.h"
+#include "radeon_emulate_loops.h"
+
+#define RC_DBG_LOG        (1 << 0)
+#define RC_DBG_STATS      (1 << 1)
+
+struct rc_swizzle_caps;
+
+enum rc_program_type {
+	RC_VERTEX_PROGRAM,
+	RC_FRAGMENT_PROGRAM,
+	RC_NUM_PROGRAM_TYPES
+};
+
+struct radeon_compiler {
+	struct memory_pool Pool;
+	struct rc_program Program;
+	enum rc_program_type type;
+	unsigned Debug:2;
+	unsigned Error:1;
+	char * ErrorMsg;
+
+	/* Hardware specification. */
+	unsigned is_r400:1;
+	unsigned is_r500:1;
+	unsigned has_half_swizzles:1;
+	unsigned has_presub:1;
+	unsigned disable_optimizations:1;
+	unsigned max_temp_regs;
+	unsigned max_constants;
+	int max_alu_insts;
+	unsigned max_tex_insts;
+
+	/* Whether to remove unused constants and empty holes in constant space. */
+	unsigned remove_unused_constants:1;
+
+	/**
+	 * Variables used internally, not be touched by callers
+	 * of the compiler
+	 */
+	/*@{*/
+	struct rc_swizzle_caps * SwizzleCaps;
+	/*@}*/
+
+	struct emulate_loop_state loop_state;
+
+	unsigned initial_num_insts; /* Number of instructions at start. */
+};
+
+void rc_init(struct radeon_compiler * c);
+void rc_destroy(struct radeon_compiler * c);
+
+void rc_debug(struct radeon_compiler * c, const char * fmt, ...);
+void rc_error(struct radeon_compiler * c, const char * fmt, ...);
+
+int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion);
+
+/**
+ * This macro acts like an if-statement that can be used to implement
+ * non-aborting assertions in the compiler.
+ *
+ * It checks whether \p cond is true. If not, an internal compiler error is
+ * flagged and the if-clause is run.
+ *
+ * A typical use-case would be:
+ *
+ *  if (rc_assert(c, condition-that-must-be-true))
+ *  	return;
+ */
+#define rc_assert(c, cond) \
+	(!(cond) && rc_if_fail_helper(c, __FILE__, __LINE__, #cond))
+
+void rc_calculate_inputs_outputs(struct radeon_compiler * c);
+
+void rc_move_input(struct radeon_compiler * c, unsigned input, struct rc_src_register new_input);
+void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask);
+void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output);
+void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input,
+                                int full_vtransform);
+void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face);
+
+struct r300_fragment_program_compiler {
+	struct radeon_compiler Base;
+	struct rX00_fragment_program_code *code;
+	/* Optional transformations and features. */
+	struct r300_fragment_program_external_state state;
+	unsigned enable_shadow_ambient;
+	/* Register corresponding to the depthbuffer. */
+	unsigned OutputDepth;
+	/* Registers corresponding to the four colorbuffers. */
+	unsigned OutputColor[4];
+
+	void * UserData;
+	void (*AllocateHwInputs)(
+		struct r300_fragment_program_compiler * c,
+		void (*allocate)(void * data, unsigned input, unsigned hwreg),
+		void * mydata);
+};
+
+void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c);
+
+struct r300_vertex_program_compiler {
+	struct radeon_compiler Base;
+	struct r300_vertex_program_code *code;
+	uint32_t RequiredOutputs;
+
+	void * UserData;
+	void (*SetHwInputOutput)(struct r300_vertex_program_compiler * c);
+
+	int PredicateIndex;
+	unsigned int PredicateMask;
+};
+
+void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* c);
+void r300_vertex_program_dump(struct radeon_compiler *compiler, void *user);
+
+struct radeon_compiler_pass {
+	const char *name;	/* Name of the pass. */
+	int dump;		/* Dump the program if Debug == 1? */
+	int predicate;		/* Run this pass? */
+	void (*run)(struct radeon_compiler *c, void *user); /* The main entrypoint. */
+	void *user;		/* Optional parameter which is passed to the run function. */
+};
+
+struct rc_program_stats {
+	unsigned num_insts;
+	unsigned num_fc_insts;
+	unsigned num_tex_insts;
+	unsigned num_rgb_insts;
+	unsigned num_alpha_insts;
+	unsigned num_presub_ops;
+	unsigned num_temp_regs;
+};
+
+void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s);
+
+/* Executes a list of compiler passes given in the parameter 'list'. */
+void rc_run_compiler_passes(struct radeon_compiler *c, struct radeon_compiler_pass *list);
+void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list);
+void rc_validate_final_shader(struct radeon_compiler *c, void *user);
+
+#endif /* RADEON_COMPILER_H */
diff --git a/src/gallium/drivers/r300/compiler/radeon_compiler_util.c b/src/gallium/drivers/r300/compiler/radeon_compiler_util.c
new file mode 100644
index 0000000..2742721
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_compiler_util.c
@@ -0,0 +1,701 @@
+/*
+ * Copyright 2010 Tom Stellard <tstellar@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * \file
+ */
+
+#include "radeon_compiler_util.h"
+
+#include "radeon_compiler.h"
+#include "radeon_dataflow.h"
+/**
+ */
+unsigned int rc_swizzle_to_writemask(unsigned int swz)
+{
+	unsigned int mask = 0;
+	unsigned int i;
+
+	for(i = 0; i < 4; i++) {
+		mask |= 1 << GET_SWZ(swz, i);
+	}
+	mask &= RC_MASK_XYZW;
+
+	return mask;
+}
+
+rc_swizzle get_swz(unsigned int swz, rc_swizzle idx)
+{
+	if (idx & 0x4)
+		return idx;
+	return GET_SWZ(swz, idx);
+}
+
+/**
+ * The purpose of this function is to standardize the number channels used by
+ * swizzles.  All swizzles regardless of what instruction they are a part of
+ * should have 4 channels initialized with values.
+ * @param channels The number of channels in initial_value that have a
+ * meaningful value.
+ * @return An initialized swizzle that has all of the unused channels set to
+ * RC_SWIZZLE_UNUSED.
+ */
+unsigned int rc_init_swizzle(unsigned int initial_value, unsigned int channels)
+{
+	unsigned int i;
+	for (i = channels; i < 4; i++) {
+		SET_SWZ(initial_value, i, RC_SWIZZLE_UNUSED);
+	}
+	return initial_value;
+}
+
+unsigned int combine_swizzles4(unsigned int src,
+		rc_swizzle swz_x, rc_swizzle swz_y, rc_swizzle swz_z, rc_swizzle swz_w)
+{
+	unsigned int ret = 0;
+
+	ret |= get_swz(src, swz_x);
+	ret |= get_swz(src, swz_y) << 3;
+	ret |= get_swz(src, swz_z) << 6;
+	ret |= get_swz(src, swz_w) << 9;
+
+	return ret;
+}
+
+unsigned int combine_swizzles(unsigned int src, unsigned int swz)
+{
+	unsigned int ret = 0;
+
+	ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_X));
+	ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Y)) << 3;
+	ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Z)) << 6;
+	ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_W)) << 9;
+
+	return ret;
+}
+
+/**
+ * @param mask Must be either RC_MASK_X, RC_MASK_Y, RC_MASK_Z, or RC_MASK_W
+ */
+rc_swizzle rc_mask_to_swizzle(unsigned int mask)
+{
+	switch (mask) {
+	case RC_MASK_X: return RC_SWIZZLE_X;
+	case RC_MASK_Y: return RC_SWIZZLE_Y;
+	case RC_MASK_Z: return RC_SWIZZLE_Z;
+	case RC_MASK_W: return RC_SWIZZLE_W;
+	}
+	return RC_SWIZZLE_UNUSED;
+}
+
+/* Reorder mask bits according to swizzle. */
+unsigned swizzle_mask(unsigned swizzle, unsigned mask)
+{
+	unsigned ret = 0;
+	for (unsigned chan = 0; chan < 4; ++chan) {
+		unsigned swz = GET_SWZ(swizzle, chan);
+		if (swz < 4)
+			ret |= GET_BIT(mask, swz) << chan;
+	}
+	return ret;
+}
+
+static unsigned int srcs_need_rewrite(const struct rc_opcode_info * info)
+{
+	if (info->HasTexture) {
+		return 0;
+	}
+	switch (info->Opcode) {
+		case RC_OPCODE_DP2:
+		case RC_OPCODE_DP3:
+		case RC_OPCODE_DP4:
+		case RC_OPCODE_DDX:
+		case RC_OPCODE_DDY:
+			return 0;
+		default:
+			return 1;
+	}
+}
+
+/**
+ * @return A swizzle the results from converting old_swizzle using
+ * conversion_swizzle
+ */
+unsigned int rc_adjust_channels(
+	unsigned int old_swizzle,
+	unsigned int conversion_swizzle)
+{
+	unsigned int i;
+	unsigned int new_swizzle = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0);
+	for (i = 0; i < 4; i++) {
+		unsigned int new_chan = get_swz(conversion_swizzle, i);
+		if (new_chan == RC_SWIZZLE_UNUSED) {
+			continue;
+		}
+		SET_SWZ(new_swizzle, new_chan, GET_SWZ(old_swizzle, i));
+	}
+	return new_swizzle;
+}
+
+static unsigned int rewrite_writemask(
+	unsigned int old_mask,
+	unsigned int conversion_swizzle)
+{
+	unsigned int new_mask = 0;
+	unsigned int i;
+
+	for (i = 0; i < 4; i++) {
+		if (!GET_BIT(old_mask, i)
+		   || GET_SWZ(conversion_swizzle, i) == RC_SWIZZLE_UNUSED) {
+			continue;
+		}
+		new_mask |= (1 << GET_SWZ(conversion_swizzle, i));
+	}
+
+	return new_mask;
+}
+
+/**
+ * This function rewrites the writemask of sub and adjusts the swizzles
+ * of all its source registers based on the conversion_swizzle.
+ * conversion_swizzle represents a mapping of the old writemask to the
+ * new writemask.  For a detailed description of how conversion swizzles
+ * work see rc_rewrite_swizzle().
+ */
+void rc_pair_rewrite_writemask(
+	struct rc_pair_sub_instruction * sub,
+	unsigned int conversion_swizzle)
+{
+	const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode);
+	unsigned int i;
+
+	sub->WriteMask = rewrite_writemask(sub->WriteMask, conversion_swizzle);
+
+	if (!srcs_need_rewrite(info)) {
+		return ;
+	}
+
+	for (i = 0; i < info->NumSrcRegs; i++) {
+		sub->Arg[i].Swizzle =
+			rc_adjust_channels(sub->Arg[i].Swizzle,
+						conversion_swizzle);
+	}
+}
+
+static void normal_rewrite_writemask_cb(
+	void * userdata,
+	struct rc_instruction * inst,
+	struct rc_src_register * src)
+{
+	unsigned int * new_mask = (unsigned int *)userdata;
+	src->Swizzle = rc_adjust_channels(src->Swizzle, *new_mask);
+}
+
+/**
+ * This function is the same as rc_pair_rewrite_writemask() except it
+ * operates on normal instructions.
+ */
+void rc_normal_rewrite_writemask(
+	struct rc_instruction * inst,
+	unsigned int conversion_swizzle)
+{
+	unsigned int new_mask;
+	struct rc_sub_instruction * sub = &inst->U.I;
+	const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode);
+	sub->DstReg.WriteMask =
+		rewrite_writemask(sub->DstReg.WriteMask, conversion_swizzle);
+
+	if (info->HasTexture) {
+		unsigned int i;
+		assert(sub->TexSwizzle == RC_SWIZZLE_XYZW);
+		for (i = 0; i < 4; i++) {
+			unsigned int swz = GET_SWZ(conversion_swizzle, i);
+			if (swz > 3)
+				continue;
+			SET_SWZ(sub->TexSwizzle, swz, i);
+		}
+	}
+
+	if (!srcs_need_rewrite(info)) {
+		return;
+	}
+
+	new_mask = sub->DstReg.WriteMask;
+	rc_for_all_reads_src(inst, normal_rewrite_writemask_cb, &new_mask);
+}
+
+/**
+ * This function replaces each value 'swz' in swizzle with the value of
+ * GET_SWZ(conversion_swizzle, swz).  So, if you want to change all the X's
+ * in swizzle to Y, then conversion_swizzle should be Y___ (0xff9).  If you want
+ * to change all the Y's in swizzle to X, then conversion_swizzle should be
+ * _X__ (0xfc7).  If you want to change the Y's to X and the X's to Y, then
+ * conversion swizzle should be YX__ (0xfc1).
+ * @param swizzle The swizzle to change
+ * @param conversion_swizzle Describes the conversion to perform on the swizzle
+ * @return A converted swizzle
+ */
+unsigned int rc_rewrite_swizzle(
+	unsigned int swizzle,
+	unsigned int conversion_swizzle)
+{
+	unsigned int chan;
+	unsigned int out_swizzle = swizzle;
+
+	for (chan = 0; chan < 4; chan++) {
+		unsigned int swz = GET_SWZ(swizzle, chan);
+		unsigned int new_swz;
+		if (swz > 3) {
+			SET_SWZ(out_swizzle, chan, swz);
+		} else {
+			new_swz = GET_SWZ(conversion_swizzle, swz);
+			if (new_swz != RC_SWIZZLE_UNUSED) {
+				SET_SWZ(out_swizzle, chan, new_swz);
+			} else {
+				SET_SWZ(out_swizzle, chan, swz);
+			}
+		}
+	}
+	return out_swizzle;
+}
+
+/**
+ * Left multiplication of a register with a swizzle
+ */
+struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg)
+{
+	struct rc_src_register tmp = srcreg;
+	int i;
+	tmp.Swizzle = 0;
+	tmp.Negate = 0;
+	for(i = 0; i < 4; ++i) {
+		rc_swizzle swz = GET_SWZ(swizzle, i);
+		if (swz < 4) {
+			tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3);
+			tmp.Negate |= GET_BIT(srcreg.Negate, swz) << i;
+		} else {
+			tmp.Swizzle |= swz << (i*3);
+		}
+	}
+	return tmp;
+}
+
+void reset_srcreg(struct rc_src_register* reg)
+{
+	memset(reg, 0, sizeof(struct rc_src_register));
+	reg->Swizzle = RC_SWIZZLE_XYZW;
+}
+
+unsigned int rc_src_reads_dst_mask(
+		rc_register_file src_file,
+		unsigned int src_idx,
+		unsigned int src_swz,
+		rc_register_file dst_file,
+		unsigned int dst_idx,
+		unsigned int dst_mask)
+{
+	if (src_file != dst_file || src_idx != dst_idx) {
+		return RC_MASK_NONE;
+	}
+	return dst_mask & rc_swizzle_to_writemask(src_swz);
+}
+
+/**
+ * @return A bit mask specifying whether this swizzle will select from an RGB
+ * source, an Alpha source, or both.
+ */
+unsigned int rc_source_type_swz(unsigned int swizzle)
+{
+	unsigned int chan;
+	unsigned int swz = RC_SWIZZLE_UNUSED;
+	unsigned int ret = RC_SOURCE_NONE;
+
+	for(chan = 0; chan < 4; chan++) {
+		swz = GET_SWZ(swizzle, chan);
+		if (swz == RC_SWIZZLE_W) {
+			ret |= RC_SOURCE_ALPHA;
+		} else if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y
+						|| swz == RC_SWIZZLE_Z) {
+			ret |= RC_SOURCE_RGB;
+		}
+	}
+	return ret;
+}
+
+unsigned int rc_source_type_mask(unsigned int mask)
+{
+	unsigned int ret = RC_SOURCE_NONE;
+
+	if (mask & RC_MASK_XYZ)
+		ret |= RC_SOURCE_RGB;
+
+	if (mask & RC_MASK_W)
+		ret |= RC_SOURCE_ALPHA;
+
+	return ret;
+}
+
+struct src_select {
+	rc_register_file File;
+	int Index;
+	unsigned int SrcType;
+};
+
+struct can_use_presub_data {
+	struct src_select Selects[5];
+	unsigned int SelectCount;
+	const struct rc_src_register * ReplaceReg;
+	unsigned int ReplaceRemoved;
+};
+
+static void can_use_presub_data_add_select(
+	struct can_use_presub_data * data,
+	rc_register_file file,
+	unsigned int index,
+	unsigned int src_type)
+{
+	struct src_select * select;
+
+	select = &data->Selects[data->SelectCount++];
+	select->File = file;
+	select->Index = index;
+	select->SrcType = src_type;
+}
+
+/**
+ * This callback function counts the number of sources in inst that are
+ * different from the sources in can_use_presub_data->RemoveSrcs.
+ */
+static void can_use_presub_read_cb(
+	void * userdata,
+	struct rc_instruction * inst,
+	struct rc_src_register * src)
+{
+	struct can_use_presub_data * d = userdata;
+
+	if (!d->ReplaceRemoved && src == d->ReplaceReg) {
+		d->ReplaceRemoved = 1;
+		return;
+	}
+
+	if (src->File == RC_FILE_NONE)
+		return;
+
+	can_use_presub_data_add_select(d, src->File, src->Index,
+					rc_source_type_swz(src->Swizzle));
+}
+
+unsigned int rc_inst_can_use_presub(
+	struct rc_instruction * inst,
+	rc_presubtract_op presub_op,
+	unsigned int presub_writemask,
+	const struct rc_src_register * replace_reg,
+	const struct rc_src_register * presub_src0,
+	const struct rc_src_register * presub_src1)
+{
+	struct can_use_presub_data d;
+	unsigned int num_presub_srcs;
+	unsigned int i;
+	const struct rc_opcode_info * info =
+					rc_get_opcode_info(inst->U.I.Opcode);
+	int rgb_count = 0, alpha_count = 0;
+	unsigned int src_type0, src_type1;
+
+	if (presub_op == RC_PRESUB_NONE) {
+		return 1;
+	}
+
+	if (info->HasTexture) {
+		return 0;
+	}
+
+	/* We can't use more than one presubtract value in an
+	 * instruction, unless the two prsubtract operations
+	 * are the same and read from the same registers.
+	 * XXX For now we will limit instructions to only one presubtract
+	 * value.*/
+	if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) {
+		return 0;
+	}
+
+	memset(&d, 0, sizeof(d));
+	d.ReplaceReg = replace_reg;
+
+	rc_for_all_reads_src(inst, can_use_presub_read_cb, &d);
+
+	num_presub_srcs = rc_presubtract_src_reg_count(presub_op);
+
+	src_type0 = rc_source_type_swz(presub_src0->Swizzle);
+	can_use_presub_data_add_select(&d,
+		presub_src0->File,
+		presub_src0->Index,
+		src_type0);
+
+	if (num_presub_srcs > 1) {
+		src_type1 = rc_source_type_swz(presub_src1->Swizzle);
+		can_use_presub_data_add_select(&d,
+			presub_src1->File,
+			presub_src1->Index,
+			src_type1);
+
+		/* Even if both of the presub sources read from the same
+		 * register, we still need to use 2 different source selects
+		 * for them, so we need to increment the count to compensate.
+		 */
+		if (presub_src0->File == presub_src1->File
+		    && presub_src0->Index == presub_src1->Index) {
+			if (src_type0 & src_type1 & RC_SOURCE_RGB) {
+				rgb_count++;
+			}
+			if (src_type0 & src_type1 & RC_SOURCE_ALPHA) {
+				alpha_count++;
+			}
+		}
+	}
+
+	/* Count the number of source selects for Alpha and RGB.  If we
+	 * encounter two of the same source selects then we can ignore the
+	 * first one. */
+	for (i = 0; i < d.SelectCount; i++) {
+		unsigned int j;
+		unsigned int src_type = d.Selects[i].SrcType;
+		for (j = i + 1; j < d.SelectCount; j++) {
+			if (d.Selects[i].File == d.Selects[j].File
+			    && d.Selects[i].Index == d.Selects[j].Index) {
+				src_type &= ~d.Selects[j].SrcType;
+			}
+		}
+		if (src_type & RC_SOURCE_RGB) {
+			rgb_count++;
+		}
+
+		if (src_type & RC_SOURCE_ALPHA) {
+			alpha_count++;
+		}
+	}
+
+	if (rgb_count > 3 || alpha_count > 3) {
+		return 0;
+	}
+
+	return 1;
+}
+
+struct max_data {
+	unsigned int Max;
+	unsigned int HasFileType;
+	rc_register_file File;
+};
+
+static void max_callback(
+	void * userdata,
+	struct rc_instruction * inst,
+	rc_register_file file,
+	unsigned int index,
+	unsigned int mask)
+{
+	struct max_data * d = (struct max_data*)userdata;
+	if (file == d->File && (!d->HasFileType || index > d->Max)) {
+		d->Max = index;
+		d->HasFileType = 1;
+	}
+}
+
+/**
+ * @return The maximum index of the specified register file used by the
+ * program.
+ */
+int rc_get_max_index(
+	struct radeon_compiler * c,
+	rc_register_file file)
+{
+	struct max_data data;
+	struct rc_instruction * inst;
+	data.Max = 0;
+	data.HasFileType = 0;
+	data.File = file;
+	for (inst = c->Program.Instructions.Next;
+					inst != &c->Program.Instructions;
+					inst = inst->Next) {
+		rc_for_all_reads_mask(inst, max_callback, &data);
+		rc_for_all_writes_mask(inst, max_callback, &data);
+	}
+	if (!data.HasFileType) {
+		return -1;
+	} else {
+		return data.Max;
+	}
+}
+
+static unsigned int get_source_readmask(
+	struct rc_pair_sub_instruction * sub,
+	unsigned int source,
+	unsigned int src_type)
+{
+	unsigned int i;
+	unsigned int readmask = 0;
+	const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode);
+
+	for (i = 0; i < info->NumSrcRegs; i++) {
+		if (sub->Arg[i].Source != source
+		    || src_type != rc_source_type_swz(sub->Arg[i].Swizzle)) {
+			continue;
+		}
+		readmask |= rc_swizzle_to_writemask(sub->Arg[i].Swizzle);
+	}
+	return readmask;
+}
+
+/**
+ * This function attempts to remove a source from a pair instructions.
+ * @param inst
+ * @param src_type RC_SOURCE_RGB, RC_SOURCE_ALPHA, or both bitwise or'd
+ * @param source The index of the source to remove
+ * @param new_readmask A mask representing the components that are read by
+ * the source that is intended to replace the one you are removing.  If you
+ * want to remove a source only and not replace it, this parameter should be
+ * zero.
+ * @return 1 if the source was successfully removed, 0 if it was not
+ */
+unsigned int rc_pair_remove_src(
+	struct rc_instruction * inst,
+	unsigned int src_type,
+	unsigned int source,
+	unsigned int new_readmask)
+{
+	unsigned int readmask = 0;
+
+	readmask |= get_source_readmask(&inst->U.P.RGB, source, src_type);
+	readmask |= get_source_readmask(&inst->U.P.Alpha, source, src_type);
+
+	if ((new_readmask & readmask) != readmask)
+		return 0;
+
+	if (src_type & RC_SOURCE_RGB) {
+		memset(&inst->U.P.RGB.Src[source], 0,
+			sizeof(struct rc_pair_instruction_source));
+	}
+
+	if (src_type & RC_SOURCE_ALPHA) {
+		memset(&inst->U.P.Alpha.Src[source], 0,
+			sizeof(struct rc_pair_instruction_source));
+	}
+
+	return 1;
+}
+
+/**
+ * @return RC_OPCODE_NOOP if inst is not a flow control instruction.
+ * @return The opcode of inst if it is a flow control instruction.
+ */
+rc_opcode rc_get_flow_control_inst(struct rc_instruction * inst)
+{
+	const struct rc_opcode_info * info;
+	if (inst->Type == RC_INSTRUCTION_NORMAL) {
+		info = rc_get_opcode_info(inst->U.I.Opcode);
+	} else {
+		info = rc_get_opcode_info(inst->U.P.RGB.Opcode);
+		/*A flow control instruction shouldn't have an alpha
+		 * instruction.*/
+		assert(!info->IsFlowControl ||
+				inst->U.P.Alpha.Opcode == RC_OPCODE_NOP);
+	}
+
+	if (info->IsFlowControl)
+		return info->Opcode;
+	else
+		return RC_OPCODE_NOP;
+
+}
+
+/**
+ * @return The BGNLOOP instruction that starts the loop ended by endloop.
+ */
+struct rc_instruction * rc_match_endloop(struct rc_instruction * endloop)
+{
+	unsigned int endloop_count = 0;
+	struct rc_instruction * inst;
+	for (inst = endloop->Prev; inst != endloop; inst = inst->Prev) {
+		rc_opcode op = rc_get_flow_control_inst(inst);
+		if (op == RC_OPCODE_ENDLOOP) {
+			endloop_count++;
+		} else if (op == RC_OPCODE_BGNLOOP) {
+			if (endloop_count == 0) {
+				return inst;
+			} else {
+				endloop_count--;
+			}
+		}
+	}
+	return NULL;
+}
+
+/**
+ * @return The ENDLOOP instruction that ends the loop started by bgnloop.
+ */
+struct rc_instruction * rc_match_bgnloop(struct rc_instruction * bgnloop)
+{
+	unsigned int bgnloop_count = 0;
+	struct rc_instruction * inst;
+	for (inst = bgnloop->Next; inst!=bgnloop; inst = inst->Next) {
+		rc_opcode op = rc_get_flow_control_inst(inst);
+		if (op == RC_OPCODE_BGNLOOP) {
+			bgnloop_count++;
+		} else if (op == RC_OPCODE_ENDLOOP) {
+			if (bgnloop_count == 0) {
+				return inst;
+			} else {
+				bgnloop_count--;
+			}
+		}
+	}
+	return NULL;
+}
+
+/**
+ * @return A conversion swizzle for converting from old_mask->new_mask
+ */
+unsigned int rc_make_conversion_swizzle(
+	unsigned int old_mask,
+	unsigned int new_mask)
+{
+	unsigned int conversion_swizzle = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0);
+	unsigned int old_idx;
+	unsigned int new_idx = 0;
+	for (old_idx = 0; old_idx < 4; old_idx++) {
+		if (!GET_BIT(old_mask, old_idx))
+			continue;
+		for ( ; new_idx < 4; new_idx++) {
+			if (GET_BIT(new_mask, new_idx)) {
+				SET_SWZ(conversion_swizzle, old_idx, new_idx);
+				new_idx++;
+				break;
+			}
+		}
+	}
+	return conversion_swizzle;
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_compiler_util.h b/src/gallium/drivers/r300/compiler/radeon_compiler_util.h
new file mode 100644
index 0000000..3730aa8
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_compiler_util.h
@@ -0,0 +1,89 @@
+#include "radeon_program_constants.h"
+
+#ifndef RADEON_PROGRAM_UTIL_H
+#define RADEON_PROGRAM_UTIL_H
+
+#include "radeon_opcodes.h"
+
+struct radeon_compiler;
+struct rc_instruction;
+struct rc_pair_instruction;
+struct rc_pair_sub_instruction;
+struct rc_src_register;
+
+unsigned int rc_swizzle_to_writemask(unsigned int swz);
+
+rc_swizzle get_swz(unsigned int swz, rc_swizzle idx);
+
+unsigned int rc_init_swizzle(unsigned int initial_value, unsigned int channels);
+
+unsigned int combine_swizzles4(unsigned int src,
+			       rc_swizzle swz_x, rc_swizzle swz_y,
+			       rc_swizzle swz_z, rc_swizzle swz_w);
+
+unsigned int combine_swizzles(unsigned int src, unsigned int swz);
+
+rc_swizzle rc_mask_to_swizzle(unsigned int mask);
+
+unsigned swizzle_mask(unsigned swizzle, unsigned mask);
+
+unsigned int rc_adjust_channels(
+	unsigned int old_swizzle,
+	unsigned int conversion_swizzle);
+
+void rc_pair_rewrite_writemask(
+	struct rc_pair_sub_instruction * sub,
+	unsigned int conversion_swizzle);
+
+void rc_normal_rewrite_writemask(
+	struct rc_instruction * inst,
+	unsigned int conversion_swizzle);
+
+unsigned int rc_rewrite_swizzle(
+	unsigned int swizzle,
+	unsigned int new_mask);
+
+struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg);
+
+void reset_srcreg(struct rc_src_register* reg);
+
+unsigned int rc_src_reads_dst_mask(
+		rc_register_file src_file,
+		unsigned int src_idx,
+		unsigned int src_swz,
+		rc_register_file dst_file,
+		unsigned int dst_idx,
+		unsigned int dst_mask);
+
+unsigned int rc_source_type_swz(unsigned int swizzle);
+
+unsigned int rc_source_type_mask(unsigned int mask);
+
+unsigned int rc_inst_can_use_presub(
+	struct rc_instruction * inst,
+	rc_presubtract_op presub_op,
+	unsigned int presub_writemask,
+	const struct rc_src_register * replace_reg,
+	const struct rc_src_register * presub_src0,
+	const struct rc_src_register * presub_src1);
+
+int rc_get_max_index(
+	struct radeon_compiler * c,
+	rc_register_file file);
+
+unsigned int rc_pair_remove_src(
+	struct rc_instruction * inst,
+	unsigned int src_type,
+	unsigned int source,
+	unsigned int new_readmask);
+
+rc_opcode rc_get_flow_control_inst(struct rc_instruction * inst);
+
+struct rc_instruction * rc_match_endloop(struct rc_instruction * endloop);
+struct rc_instruction * rc_match_bgnloop(struct rc_instruction * bgnloop);
+
+unsigned int rc_make_conversion_swizzle(
+	unsigned int old_mask,
+	unsigned int new_mask);
+
+#endif /* RADEON_PROGRAM_UTIL_H */
diff --git a/src/gallium/drivers/r300/compiler/radeon_dataflow.c b/src/gallium/drivers/r300/compiler/radeon_dataflow.c
new file mode 100644
index 0000000..a8decac
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_dataflow.c
@@ -0,0 +1,892 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ * Copyright 2010 Tom Stellard <tstellar@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_dataflow.h"
+
+#include "radeon_compiler.h"
+#include "radeon_compiler_util.h"
+#include "radeon_program.h"
+
+struct read_write_mask_data {
+	void * UserData;
+	rc_read_write_mask_fn Cb;
+};
+
+static void reads_normal_callback(
+	void * userdata,
+	struct rc_instruction * fullinst,
+	struct rc_src_register * src)
+{
+	struct read_write_mask_data * cb_data = userdata;
+	unsigned int refmask = 0;
+	unsigned int chan;
+	for(chan = 0; chan < 4; chan++) {
+		refmask |= 1 << GET_SWZ(src->Swizzle, chan);
+	}
+	refmask &= RC_MASK_XYZW;
+
+	if (refmask) {
+		cb_data->Cb(cb_data->UserData, fullinst, src->File,
+							src->Index, refmask);
+	}
+
+	if (refmask && src->RelAddr) {
+		cb_data->Cb(cb_data->UserData, fullinst, RC_FILE_ADDRESS, 0,
+								RC_MASK_X);
+	}
+}
+
+static void pair_get_src_refmasks(unsigned int * refmasks,
+					struct rc_pair_instruction * inst,
+					unsigned int swz, unsigned int src)
+{
+	if (swz >= 4)
+		return;
+
+	if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y || swz == RC_SWIZZLE_Z) {
+		if(src == RC_PAIR_PRESUB_SRC) {
+			unsigned int i;
+			int srcp_regs =
+				rc_presubtract_src_reg_count(
+				inst->RGB.Src[src].Index);
+			for(i = 0; i < srcp_regs; i++) {
+				refmasks[i] |= 1 << swz;
+			}
+		}
+		else {
+			refmasks[src] |= 1 << swz;
+		}
+	}
+
+	if (swz == RC_SWIZZLE_W) {
+		if (src == RC_PAIR_PRESUB_SRC) {
+			unsigned int i;
+			int srcp_regs = rc_presubtract_src_reg_count(
+					inst->Alpha.Src[src].Index);
+			for(i = 0; i < srcp_regs; i++) {
+				refmasks[i] |= 1 << swz;
+			}
+		}
+		else {
+			refmasks[src] |= 1 << swz;
+		}
+	}
+}
+
+static void reads_pair(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata)
+{
+	struct rc_pair_instruction * inst = &fullinst->U.P;
+	unsigned int refmasks[3] = { 0, 0, 0 };
+
+	unsigned int arg;
+
+	for(arg = 0; arg < 3; ++arg) {
+		unsigned int chan;
+		for(chan = 0; chan < 3; ++chan) {
+			unsigned int swz_rgb =
+				GET_SWZ(inst->RGB.Arg[arg].Swizzle, chan);
+			unsigned int swz_alpha =
+				GET_SWZ(inst->Alpha.Arg[arg].Swizzle, chan);
+			pair_get_src_refmasks(refmasks, inst, swz_rgb,
+						inst->RGB.Arg[arg].Source);
+			pair_get_src_refmasks(refmasks, inst, swz_alpha,
+						inst->Alpha.Arg[arg].Source);
+		}
+	}
+
+	for(unsigned int src = 0; src < 3; ++src) {
+		if (inst->RGB.Src[src].Used && (refmasks[src] & RC_MASK_XYZ))
+			cb(userdata, fullinst, inst->RGB.Src[src].File, inst->RGB.Src[src].Index,
+			   refmasks[src] & RC_MASK_XYZ);
+
+		if (inst->Alpha.Src[src].Used && (refmasks[src] & RC_MASK_W))
+			cb(userdata, fullinst, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, RC_MASK_W);
+	}
+}
+
+static void pair_sub_for_all_args(
+	struct rc_instruction * fullinst,
+	struct rc_pair_sub_instruction * sub,
+	rc_pair_read_arg_fn cb,
+	void * userdata)
+{
+	int i;
+	const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode);
+
+	for(i = 0; i < info->NumSrcRegs; i++) {
+		unsigned int src_type;
+
+		src_type = rc_source_type_swz(sub->Arg[i].Swizzle);
+
+		if (src_type == RC_SOURCE_NONE)
+			continue;
+
+		if (sub->Arg[i].Source == RC_PAIR_PRESUB_SRC) {
+			unsigned int presub_type;
+			unsigned int presub_src_count;
+			struct rc_pair_instruction_source * src_array;
+			unsigned int j;
+
+			if (src_type & RC_SOURCE_RGB) {
+				presub_type = fullinst->
+					U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index;
+				src_array = fullinst->U.P.RGB.Src;
+			} else {
+				presub_type = fullinst->
+					U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Index;
+				src_array = fullinst->U.P.Alpha.Src;
+			}
+			presub_src_count
+				= rc_presubtract_src_reg_count(presub_type);
+			for(j = 0; j < presub_src_count; j++) {
+				cb(userdata, fullinst, &sub->Arg[i],
+								&src_array[j]);
+			}
+		} else {
+			struct rc_pair_instruction_source * src =
+				rc_pair_get_src(&fullinst->U.P, &sub->Arg[i]);
+			if (src) {
+				cb(userdata, fullinst, &sub->Arg[i], src);
+			}
+		}
+	}
+}
+
+/* This function calls the callback function (cb) for each source used by
+ * the instruction.
+ * */
+void rc_for_all_reads_src(
+	struct rc_instruction * inst,
+	rc_read_src_fn cb,
+	void * userdata)
+{
+	const struct rc_opcode_info * opcode =
+					rc_get_opcode_info(inst->U.I.Opcode);
+
+	/* This function only works with normal instructions. */
+	if (inst->Type != RC_INSTRUCTION_NORMAL) {
+		assert(0);
+		return;
+	}
+
+	for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
+
+		if (inst->U.I.SrcReg[src].File == RC_FILE_NONE)
+			continue;
+
+		if (inst->U.I.SrcReg[src].File == RC_FILE_PRESUB) {
+			unsigned int i;
+			unsigned int srcp_regs = rc_presubtract_src_reg_count(
+						inst->U.I.PreSub.Opcode);
+			for( i = 0; i < srcp_regs; i++) {
+				cb(userdata, inst, &inst->U.I.PreSub.SrcReg[i]);
+			}
+		} else {
+			cb(userdata, inst, &inst->U.I.SrcReg[src]);
+		}
+	}
+}
+
+/**
+ * This function calls the callback function (cb) for each arg of the RGB and
+ * alpha components.
+ */
+void rc_pair_for_all_reads_arg(
+	struct rc_instruction * inst,
+	rc_pair_read_arg_fn cb,
+	void * userdata)
+{
+	/* This function only works with pair instructions. */
+	if (inst->Type != RC_INSTRUCTION_PAIR) {
+		assert(0);
+		return;
+	}
+
+	pair_sub_for_all_args(inst, &inst->U.P.RGB, cb, userdata);
+	pair_sub_for_all_args(inst, &inst->U.P.Alpha, cb, userdata);
+}
+
+/**
+ * Calls a callback function for all register reads.
+ *
+ * This is conservative, i.e. if the same register is referenced multiple times,
+ * the callback may also be called multiple times.
+ * Also, the writemask of the instruction is not taken into account.
+ */
+void rc_for_all_reads_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata)
+{
+	if (inst->Type == RC_INSTRUCTION_NORMAL) {
+		struct read_write_mask_data cb_data;
+		cb_data.UserData = userdata;
+		cb_data.Cb = cb;
+
+		rc_for_all_reads_src(inst, reads_normal_callback, &cb_data);
+	} else {
+		reads_pair(inst, cb, userdata);
+	}
+}
+
+
+
+static void writes_normal(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata)
+{
+	struct rc_sub_instruction * inst = &fullinst->U.I;
+	const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
+
+	if (opcode->HasDstReg && inst->DstReg.WriteMask)
+		cb(userdata, fullinst, inst->DstReg.File, inst->DstReg.Index, inst->DstReg.WriteMask);
+
+	if (inst->WriteALUResult)
+		cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, RC_MASK_X);
+}
+
+static void writes_pair(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata)
+{
+	struct rc_pair_instruction * inst = &fullinst->U.P;
+
+	if (inst->RGB.WriteMask)
+		cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->RGB.DestIndex, inst->RGB.WriteMask);
+
+	if (inst->Alpha.WriteMask)
+		cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->Alpha.DestIndex, RC_MASK_W);
+
+	if (inst->WriteALUResult)
+		cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, RC_MASK_X);
+}
+
+/**
+ * Calls a callback function for all register writes in the instruction,
+ * reporting writemasks to the callback function.
+ *
+ * \warning Does not report output registers for paired instructions!
+ */
+void rc_for_all_writes_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata)
+{
+	if (inst->Type == RC_INSTRUCTION_NORMAL) {
+		writes_normal(inst, cb, userdata);
+	} else {
+		writes_pair(inst, cb, userdata);
+	}
+}
+
+
+struct mask_to_chan_data {
+	void * UserData;
+	rc_read_write_chan_fn Fn;
+};
+
+static void mask_to_chan_cb(void * data, struct rc_instruction * inst,
+		rc_register_file file, unsigned int index, unsigned int mask)
+{
+	struct mask_to_chan_data * d = data;
+	for(unsigned int chan = 0; chan < 4; ++chan) {
+		if (GET_BIT(mask, chan))
+			d->Fn(d->UserData, inst, file, index, chan);
+	}
+}
+
+/**
+ * Calls a callback function for all sourced register channels.
+ *
+ * This is conservative, i.e. channels may be called multiple times,
+ * and the writemask of the instruction is not taken into account.
+ */
+void rc_for_all_reads_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata)
+{
+	struct mask_to_chan_data d;
+	d.UserData = userdata;
+	d.Fn = cb;
+	rc_for_all_reads_mask(inst, &mask_to_chan_cb, &d);
+}
+
+/**
+ * Calls a callback function for all written register channels.
+ *
+ * \warning Does not report output registers for paired instructions!
+ */
+void rc_for_all_writes_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata)
+{
+	struct mask_to_chan_data d;
+	d.UserData = userdata;
+	d.Fn = cb;
+	rc_for_all_writes_mask(inst, &mask_to_chan_cb, &d);
+}
+
+static void remap_normal_instruction(struct rc_instruction * fullinst,
+		rc_remap_register_fn cb, void * userdata)
+{
+	struct rc_sub_instruction * inst = &fullinst->U.I;
+	const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
+	unsigned int remapped_presub = 0;
+
+	if (opcode->HasDstReg) {
+		rc_register_file file = inst->DstReg.File;
+		unsigned int index = inst->DstReg.Index;
+
+		cb(userdata, fullinst, &file, &index);
+
+		inst->DstReg.File = file;
+		inst->DstReg.Index = index;
+	}
+
+	for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
+		rc_register_file file = inst->SrcReg[src].File;
+		unsigned int index = inst->SrcReg[src].Index;
+
+		if (file == RC_FILE_PRESUB) {
+			unsigned int i;
+			unsigned int srcp_srcs = rc_presubtract_src_reg_count(
+						inst->PreSub.Opcode);
+			/* Make sure we only remap presubtract sources once in
+			 * case more than one source register reads the
+			 * presubtract result. */
+			if (remapped_presub)
+				continue;
+
+			for(i = 0; i < srcp_srcs; i++) {
+				file = inst->PreSub.SrcReg[i].File;
+				index = inst->PreSub.SrcReg[i].Index;
+				cb(userdata, fullinst, &file, &index);
+				inst->PreSub.SrcReg[i].File = file;
+				inst->PreSub.SrcReg[i].Index = index;
+			}
+			remapped_presub = 1;
+		}
+		else {
+			cb(userdata, fullinst, &file, &index);
+
+			inst->SrcReg[src].File = file;
+			inst->SrcReg[src].Index = index;
+		}
+	}
+}
+
+static void remap_pair_instruction(struct rc_instruction * fullinst,
+		rc_remap_register_fn cb, void * userdata)
+{
+	struct rc_pair_instruction * inst = &fullinst->U.P;
+
+	if (inst->RGB.WriteMask) {
+		rc_register_file file = RC_FILE_TEMPORARY;
+		unsigned int index = inst->RGB.DestIndex;
+
+		cb(userdata, fullinst, &file, &index);
+
+		inst->RGB.DestIndex = index;
+	}
+
+	if (inst->Alpha.WriteMask) {
+		rc_register_file file = RC_FILE_TEMPORARY;
+		unsigned int index = inst->Alpha.DestIndex;
+
+		cb(userdata, fullinst, &file, &index);
+
+		inst->Alpha.DestIndex = index;
+	}
+
+	for(unsigned int src = 0; src < 3; ++src) {
+		if (inst->RGB.Src[src].Used) {
+			rc_register_file file = inst->RGB.Src[src].File;
+			unsigned int index = inst->RGB.Src[src].Index;
+
+			cb(userdata, fullinst, &file, &index);
+
+			inst->RGB.Src[src].File = file;
+			inst->RGB.Src[src].Index = index;
+		}
+
+		if (inst->Alpha.Src[src].Used) {
+			rc_register_file file = inst->Alpha.Src[src].File;
+			unsigned int index = inst->Alpha.Src[src].Index;
+
+			cb(userdata, fullinst, &file, &index);
+
+			inst->Alpha.Src[src].File = file;
+			inst->Alpha.Src[src].Index = index;
+		}
+	}
+}
+
+
+/**
+ * Remap all register accesses according to the given function.
+ * That is, call the function \p cb for each referenced register (both read and written)
+ * and update the given instruction \p inst accordingly
+ * if it modifies its \ref pfile and \ref pindex contents.
+ */
+void rc_remap_registers(struct rc_instruction * inst, rc_remap_register_fn cb, void * userdata)
+{
+	if (inst->Type == RC_INSTRUCTION_NORMAL)
+		remap_normal_instruction(inst, cb, userdata);
+	else
+		remap_pair_instruction(inst, cb, userdata);
+}
+
+struct branch_write_mask {
+	unsigned int IfWriteMask:4;
+	unsigned int ElseWriteMask:4;
+	unsigned int HasElse:1;
+};
+
+union get_readers_read_cb {
+	rc_read_src_fn I;
+	rc_pair_read_arg_fn P;
+};
+
+struct get_readers_callback_data {
+	struct radeon_compiler * C;
+	struct rc_reader_data * ReaderData;
+	rc_read_src_fn ReadNormalCB;
+	rc_pair_read_arg_fn ReadPairCB;
+	rc_read_write_mask_fn WriteCB;
+	rc_register_file DstFile;
+	unsigned int DstIndex;
+	unsigned int DstMask;
+	unsigned int AliveWriteMask;
+	/*  For convenience, this is indexed starting at 1 */
+	struct branch_write_mask BranchMasks[R500_PFS_MAX_BRANCH_DEPTH_FULL + 1];
+};
+
+static struct rc_reader * add_reader(
+	struct memory_pool * pool,
+	struct rc_reader_data * data,
+	struct rc_instruction * inst,
+	unsigned int mask)
+{
+	struct rc_reader * new;
+	memory_pool_array_reserve(pool, struct rc_reader, data->Readers,
+				data->ReaderCount, data->ReadersReserved, 1);
+	new = &data->Readers[data->ReaderCount++];
+	new->Inst = inst;
+	new->WriteMask = mask;
+	return new;
+}
+
+static void add_reader_normal(
+	struct memory_pool * pool,
+	struct rc_reader_data * data,
+	struct rc_instruction * inst,
+	unsigned int mask,
+	struct rc_src_register * src)
+{
+	struct rc_reader * new = add_reader(pool, data, inst, mask);
+	new->U.I.Src = src;
+}
+
+
+static void add_reader_pair(
+	struct memory_pool * pool,
+	struct rc_reader_data * data,
+	struct rc_instruction * inst,
+	unsigned int mask,
+	struct rc_pair_instruction_arg * arg,
+	struct rc_pair_instruction_source * src)
+{
+	struct rc_reader * new = add_reader(pool, data, inst, mask);
+	new->U.P.Src = src;
+	new->U.P.Arg = arg;
+}
+
+static unsigned int get_readers_read_callback(
+	struct get_readers_callback_data * cb_data,
+	unsigned int has_rel_addr,
+	rc_register_file file,
+	unsigned int index,
+	unsigned int swizzle)
+{
+	unsigned int shared_mask, read_mask;
+
+	if (has_rel_addr) {
+		cb_data->ReaderData->Abort = 1;
+		return RC_MASK_NONE;
+	}
+
+	shared_mask = rc_src_reads_dst_mask(file, index, swizzle,
+		cb_data->DstFile, cb_data->DstIndex, cb_data->AliveWriteMask);
+
+	if (shared_mask == RC_MASK_NONE)
+		return shared_mask;
+
+	/* If we make it this far, it means that this source reads from the
+	 * same register written to by d->ReaderData->Writer. */
+
+	read_mask = rc_swizzle_to_writemask(swizzle);
+	if (cb_data->ReaderData->AbortOnRead & read_mask) {
+		cb_data->ReaderData->Abort = 1;
+		return shared_mask;
+	}
+
+	if (cb_data->ReaderData->LoopDepth > 0) {
+		cb_data->ReaderData->AbortOnWrite |=
+				(read_mask & cb_data->AliveWriteMask);
+	}
+
+	/* XXX The behavior in this case should be configurable. */
+	if ((read_mask & cb_data->AliveWriteMask) != read_mask) {
+		cb_data->ReaderData->Abort = 1;
+		return shared_mask;
+	}
+
+	return shared_mask;
+}
+
+static void get_readers_pair_read_callback(
+	void * userdata,
+	struct rc_instruction * inst,
+	struct rc_pair_instruction_arg * arg,
+	struct rc_pair_instruction_source * src)
+{
+	unsigned int shared_mask;
+	struct get_readers_callback_data * d = userdata;
+
+	shared_mask = get_readers_read_callback(d,
+				0 /*Pair Instructions don't use RelAddr*/,
+				src->File, src->Index, arg->Swizzle);
+
+	if (shared_mask == RC_MASK_NONE)
+		return;
+
+	if (d->ReadPairCB)
+		d->ReadPairCB(d->ReaderData, inst, arg, src);
+
+	if (d->ReaderData->ExitOnAbort && d->ReaderData->Abort)
+		return;
+
+	add_reader_pair(&d->C->Pool, d->ReaderData, inst, shared_mask, arg, src);
+}
+
+/**
+ * This function is used by rc_get_readers_normal() to determine whether inst
+ * is a reader of userdata->ReaderData->Writer
+ */
+static void get_readers_normal_read_callback(
+	void * userdata,
+	struct rc_instruction * inst,
+	struct rc_src_register * src)
+{
+	struct get_readers_callback_data * d = userdata;
+	unsigned int shared_mask;
+
+	shared_mask = get_readers_read_callback(d,
+			src->RelAddr, src->File, src->Index, src->Swizzle);
+
+	if (shared_mask == RC_MASK_NONE)
+		return;
+	/* The callback function could potentially clear d->ReaderData->Abort,
+	 * so we need to call it before we return. */
+	if (d->ReadNormalCB)
+		d->ReadNormalCB(d->ReaderData, inst, src);
+
+	if (d->ReaderData->ExitOnAbort && d->ReaderData->Abort)
+		return;
+
+	add_reader_normal(&d->C->Pool, d->ReaderData, inst, shared_mask, src);
+}
+
+/**
+ * This function is used by rc_get_readers_normal() to determine when
+ * userdata->ReaderData->Writer is dead (i. e. All compontents of its
+ * destination register have been overwritten by other instructions).
+ */
+static void get_readers_write_callback(
+	void *userdata,
+	struct rc_instruction * inst,
+	rc_register_file file,
+	unsigned int index,
+	unsigned int mask)
+{
+	struct get_readers_callback_data * d = userdata;
+
+	if (index == d->DstIndex && file == d->DstFile) {
+		unsigned int shared_mask = mask & d->DstMask;
+		d->ReaderData->AbortOnRead &= ~shared_mask;
+		d->AliveWriteMask &= ~shared_mask;
+		if (d->ReaderData->AbortOnWrite & shared_mask) {
+			d->ReaderData->Abort = 1;
+		}
+	}
+
+	if(d->WriteCB)
+		d->WriteCB(d->ReaderData, inst, file, index, mask);
+}
+
+static void push_branch_mask(
+	struct get_readers_callback_data * d,
+	unsigned int * branch_depth)
+{
+	(*branch_depth)++;
+	if (*branch_depth > R500_PFS_MAX_BRANCH_DEPTH_FULL) {
+		d->ReaderData->Abort = 1;
+		return;
+	}
+	d->BranchMasks[*branch_depth].IfWriteMask =
+					d->AliveWriteMask;
+}
+
+static void pop_branch_mask(
+	struct get_readers_callback_data * d,
+	unsigned int * branch_depth)
+{
+	struct branch_write_mask * masks = &d->BranchMasks[*branch_depth];
+
+	if (masks->HasElse) {
+		/* Abort on read for components that were written in the IF
+		 * block. */
+		d->ReaderData->AbortOnRead |=
+				masks->IfWriteMask & ~masks->ElseWriteMask;
+		/* Abort on read for components that were written in the ELSE
+		 * block. */
+		d->ReaderData->AbortOnRead |=
+				masks->ElseWriteMask & ~d->AliveWriteMask;
+
+		d->AliveWriteMask = masks->IfWriteMask
+			^ ((masks->IfWriteMask ^ masks->ElseWriteMask)
+			& (masks->IfWriteMask ^ d->AliveWriteMask));
+	} else {
+		d->ReaderData->AbortOnRead |=
+				masks->IfWriteMask & ~d->AliveWriteMask;
+		d->AliveWriteMask = masks->IfWriteMask;
+
+	}
+	memset(masks, 0, sizeof(struct branch_write_mask));
+	(*branch_depth)--;
+}
+
+static void get_readers_for_single_write(
+	void * userdata,
+	struct rc_instruction * writer,
+	rc_register_file dst_file,
+	unsigned int dst_index,
+	unsigned int dst_mask)
+{
+	struct rc_instruction * tmp;
+	unsigned int branch_depth = 0;
+	struct rc_instruction * endloop = NULL;
+	unsigned int abort_on_read_at_endloop = 0;
+	struct get_readers_callback_data * d = userdata;
+
+	d->ReaderData->Writer = writer;
+	d->ReaderData->AbortOnRead = 0;
+	d->ReaderData->AbortOnWrite = 0;
+	d->ReaderData->LoopDepth = 0;
+	d->ReaderData->InElse = 0;
+	d->DstFile = dst_file;
+	d->DstIndex = dst_index;
+	d->DstMask = dst_mask;
+	d->AliveWriteMask = dst_mask;
+	memset(d->BranchMasks, 0, sizeof(d->BranchMasks));
+
+	if (!dst_mask)
+		return;
+
+	for(tmp = writer->Next; tmp != &d->C->Program.Instructions;
+							tmp = tmp->Next){
+		rc_opcode opcode = rc_get_flow_control_inst(tmp);
+		switch(opcode) {
+		case RC_OPCODE_BGNLOOP:
+			d->ReaderData->LoopDepth++;
+			push_branch_mask(d, &branch_depth);
+			break;
+		case RC_OPCODE_ENDLOOP:
+			if (d->ReaderData->LoopDepth > 0) {
+				d->ReaderData->LoopDepth--;
+				if (d->ReaderData->LoopDepth == 0) {
+					d->ReaderData->AbortOnWrite = 0;
+				}
+				pop_branch_mask(d, &branch_depth);
+			} else {
+				/* Here we have reached an ENDLOOP without
+				 * seeing its BGNLOOP.  These means that
+				 * the writer was written inside of a loop,
+				 * so it could have readers that are above it
+				 * (i.e. they have a lower IP).  To find these
+				 * readers we jump to the BGNLOOP instruction
+				 * and check each instruction until we get
+				 * back to the writer.
+				 */
+				endloop = tmp;
+				tmp = rc_match_endloop(tmp);
+				if (!tmp) {
+					rc_error(d->C, "Failed to match endloop.\n");
+					d->ReaderData->Abort = 1;
+					return;
+				}
+				abort_on_read_at_endloop = d->ReaderData->AbortOnRead;
+				d->ReaderData->AbortOnRead |= d->AliveWriteMask;
+				continue;
+			}
+			break;
+		case RC_OPCODE_IF:
+			push_branch_mask(d, &branch_depth);
+			break;
+		case RC_OPCODE_ELSE:
+			if (branch_depth == 0) {
+				d->ReaderData->InElse = 1;
+			} else {
+				unsigned int temp_mask = d->AliveWriteMask;
+				d->AliveWriteMask =
+					d->BranchMasks[branch_depth].IfWriteMask;
+				d->BranchMasks[branch_depth].ElseWriteMask =
+								temp_mask;
+				d->BranchMasks[branch_depth].HasElse = 1;
+			}
+			break;
+		case RC_OPCODE_ENDIF:
+			if (branch_depth == 0) {
+				d->ReaderData->AbortOnRead = d->AliveWriteMask;
+				d->ReaderData->InElse = 0;
+			}
+			else {
+				pop_branch_mask(d, &branch_depth);
+			}
+			break;
+		default:
+			break;
+		}
+
+		if (d->ReaderData->InElse)
+			continue;
+
+		if (tmp->Type == RC_INSTRUCTION_NORMAL) {
+			rc_for_all_reads_src(tmp,
+				get_readers_normal_read_callback, d);
+		} else {
+			rc_pair_for_all_reads_arg(tmp,
+				get_readers_pair_read_callback, d);
+		}
+
+		/* This can happen when we jump from an ENDLOOP to BGNLOOP */
+		if (tmp == writer) {
+			tmp = endloop;
+			endloop = NULL;
+			d->ReaderData->AbortOnRead = abort_on_read_at_endloop;
+			continue;
+		}
+		rc_for_all_writes_mask(tmp, get_readers_write_callback, d);
+
+		if (d->ReaderData->ExitOnAbort && d->ReaderData->Abort)
+			return;
+
+		if (branch_depth == 0 && !d->AliveWriteMask)
+			return;
+	}
+}
+
+static void init_get_readers_callback_data(
+	struct get_readers_callback_data * d,
+	struct rc_reader_data * reader_data,
+	struct radeon_compiler * c,
+	rc_read_src_fn read_normal_cb,
+	rc_pair_read_arg_fn read_pair_cb,
+	rc_read_write_mask_fn write_cb)
+{
+	reader_data->Abort = 0;
+	reader_data->ReaderCount = 0;
+	reader_data->ReadersReserved = 0;
+	reader_data->Readers = NULL;
+
+	d->C = c;
+	d->ReaderData = reader_data;
+	d->ReadNormalCB = read_normal_cb;
+	d->ReadPairCB = read_pair_cb;
+	d->WriteCB = write_cb;
+}
+
+/**
+ * This function will create a list of readers via the rc_reader_data struct.
+ * This function will abort (set the flag data->Abort) and return if it
+ * encounters an instruction that reads from @param writer and also a different
+ * instruction.  Here are some examples:
+ *
+ * writer = instruction 0;
+ * 0 MOV TEMP[0].xy, TEMP[1].xy
+ * 1 MOV TEMP[0].zw, TEMP[2].xy
+ * 2 MOV TEMP[3], TEMP[0]
+ * The Abort flag will be set on instruction 2, because it reads values written
+ * by instructions 0 and 1.
+ *
+ * writer = instruction 1;
+ * 0 IF TEMP[0].x
+ * 1 MOV TEMP[1], TEMP[2]
+ * 2 ELSE
+ * 3 MOV TEMP[1], TEMP[2]
+ * 4 ENDIF
+ * 5 MOV TEMP[3], TEMP[1]
+ * The Abort flag will be set on instruction 5, because it could read from the
+ * value written by either instruction 1 or 3, depending on the jump decision
+ * made at instruction 0.
+ *
+ * writer = instruction 0;
+ * 0 MOV TEMP[0], TEMP[1]
+ * 2 BGNLOOP
+ * 3 ADD TEMP[0], TEMP[0], none.1
+ * 4 ENDLOOP
+ * The Abort flag will be set on instruction 3, because in the first iteration
+ * of the loop it reads the value written by instruction 0 and in all other
+ * iterations it reads the value written by instruction 3.
+ *
+ * @param read_cb This function will be called for for every instruction that
+ * has been determined to be a reader of writer.
+ * @param write_cb This function will be called for every instruction after
+ * writer.
+ */
+void rc_get_readers(
+	struct radeon_compiler * c,
+	struct rc_instruction * writer,
+	struct rc_reader_data * data,
+	rc_read_src_fn read_normal_cb,
+	rc_pair_read_arg_fn read_pair_cb,
+	rc_read_write_mask_fn write_cb)
+{
+	struct get_readers_callback_data d;
+
+	init_get_readers_callback_data(&d, data, c, read_normal_cb,
+						read_pair_cb, write_cb);
+
+	rc_for_all_writes_mask(writer, get_readers_for_single_write, &d);
+}
+
+void rc_get_readers_sub(
+	struct radeon_compiler * c,
+	struct rc_instruction * writer,
+	struct rc_pair_sub_instruction * sub_writer,
+	struct rc_reader_data * data,
+	rc_read_src_fn read_normal_cb,
+	rc_pair_read_arg_fn read_pair_cb,
+	rc_read_write_mask_fn write_cb)
+{
+	struct get_readers_callback_data d;
+
+	init_get_readers_callback_data(&d, data, c, read_normal_cb,
+						read_pair_cb, write_cb);
+
+	if (sub_writer->WriteMask) {
+		get_readers_for_single_write(&d, writer, RC_FILE_TEMPORARY,
+			sub_writer->DestIndex, sub_writer->WriteMask);
+	}
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_dataflow.h b/src/gallium/drivers/r300/compiler/radeon_dataflow.h
new file mode 100644
index 0000000..d8a6272
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_dataflow.h
@@ -0,0 +1,134 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ * Copyright 2010 Tom Stellard <tstellar@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_DATAFLOW_H
+#define RADEON_DATAFLOW_H
+
+#include "radeon_program_constants.h"
+
+struct radeon_compiler;
+struct rc_instruction;
+struct rc_swizzle_caps;
+struct rc_src_register;
+struct rc_pair_instruction_arg;
+struct rc_pair_instruction_source;
+struct rc_pair_sub_instruction;
+struct rc_compiler;
+
+
+/**
+ * Help analyze and modify the register accesses of instructions.
+ */
+/*@{*/
+typedef void (*rc_read_write_chan_fn)(void * userdata, struct rc_instruction * inst,
+			rc_register_file file, unsigned int index, unsigned int chan);
+void rc_for_all_reads_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata);
+void rc_for_all_writes_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata);
+
+typedef void (*rc_read_write_mask_fn)(void * userdata, struct rc_instruction * inst,
+			rc_register_file file, unsigned int index, unsigned int mask);
+void rc_for_all_reads_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata);
+void rc_for_all_writes_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata);
+
+typedef void (*rc_read_src_fn)(void * userdata, struct rc_instruction * inst,
+			struct rc_src_register * src);
+void rc_for_all_reads_src(struct rc_instruction * inst, rc_read_src_fn cb,
+			void * userdata);
+
+typedef void (*rc_pair_read_arg_fn)(void * userdata,
+	struct rc_instruction * inst, struct rc_pair_instruction_arg * arg,
+	struct rc_pair_instruction_source * src);
+void rc_pair_for_all_reads_arg(struct rc_instruction * inst,
+					rc_pair_read_arg_fn cb, void * userdata);
+
+typedef void (*rc_remap_register_fn)(void * userdata, struct rc_instruction * inst,
+			rc_register_file * pfile, unsigned int * pindex);
+void rc_remap_registers(struct rc_instruction * inst, rc_remap_register_fn cb, void * userdata);
+/*@}*/
+
+struct rc_reader {
+	struct rc_instruction * Inst;
+	unsigned int WriteMask;
+	union {
+		struct {
+			struct rc_src_register * Src;
+		} I;
+		struct {
+			struct rc_pair_instruction_arg * Arg;
+			struct rc_pair_instruction_source * Src;
+		} P;
+	} U;
+};
+
+struct rc_reader_data {
+	unsigned int Abort;
+	unsigned int AbortOnRead;
+	unsigned int AbortOnWrite;
+	unsigned int LoopDepth;
+	unsigned int InElse;
+	struct rc_instruction * Writer;
+
+	unsigned int ReaderCount;
+	unsigned int ReadersReserved;
+	struct rc_reader * Readers;
+
+	/* If this flag is enabled, rc_get_readers will exit as soon possbile
+	 * after the Abort flag is set.*/
+	unsigned int ExitOnAbort;
+	void * CbData;
+};
+
+void rc_get_readers(
+	struct radeon_compiler * c,
+	struct rc_instruction * writer,
+	struct rc_reader_data * data,
+	rc_read_src_fn read_normal_cb,
+	rc_pair_read_arg_fn read_pair_cb,
+	rc_read_write_mask_fn write_cb);
+
+void rc_get_readers_sub(
+	struct radeon_compiler * c,
+	struct rc_instruction * writer,
+	struct rc_pair_sub_instruction * sub_writer,
+	struct rc_reader_data * data,
+	rc_read_src_fn read_normal_cb,
+	rc_pair_read_arg_fn read_pair_cb,
+	rc_read_write_mask_fn write_cb);
+/**
+ * Compiler passes based on dataflow analysis.
+ */
+/*@{*/
+typedef void (*rc_dataflow_mark_outputs_fn)(void * userdata, void * data,
+			void (*mark_fn)(void * data, unsigned int index, unsigned int mask));
+void rc_dataflow_deadcode(struct radeon_compiler * c, void *user);
+void rc_dataflow_swizzles(struct radeon_compiler * c, void *user);
+/*@}*/
+
+void rc_optimize(struct radeon_compiler * c, void *user);
+
+#endif /* RADEON_DATAFLOW_H */
diff --git a/src/gallium/drivers/r300/compiler/radeon_dataflow_deadcode.c b/src/gallium/drivers/r300/compiler/radeon_dataflow_deadcode.c
new file mode 100644
index 0000000..678e147
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_dataflow_deadcode.c
@@ -0,0 +1,359 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_dataflow.h"
+
+#include "radeon_compiler.h"
+
+
+struct updatemask_state {
+	unsigned char Output[RC_REGISTER_MAX_INDEX];
+	unsigned char Temporary[RC_REGISTER_MAX_INDEX];
+	unsigned char Address;
+	unsigned char Special[RC_NUM_SPECIAL_REGISTERS];
+};
+
+struct instruction_state {
+	unsigned char WriteMask:4;
+	unsigned char WriteALUResult:1;
+	unsigned char SrcReg[3];
+};
+
+struct loopinfo {
+	struct updatemask_state * Breaks;
+	unsigned int BreakCount;
+	unsigned int BreaksReserved;
+};
+
+struct branchinfo {
+	unsigned int HaveElse:1;
+
+	struct updatemask_state StoreEndif;
+	struct updatemask_state StoreElse;
+};
+
+struct deadcode_state {
+	struct radeon_compiler * C;
+	struct instruction_state * Instructions;
+
+	struct updatemask_state R;
+
+	struct branchinfo * BranchStack;
+	unsigned int BranchStackSize;
+	unsigned int BranchStackReserved;
+
+	struct loopinfo * LoopStack;
+	unsigned int LoopStackSize;
+	unsigned int LoopStackReserved;
+};
+
+
+static void or_updatemasks(
+	struct updatemask_state * dst,
+	struct updatemask_state * a,
+	struct updatemask_state * b)
+{
+	for(unsigned int i = 0; i < RC_REGISTER_MAX_INDEX; ++i) {
+		dst->Output[i] = a->Output[i] | b->Output[i];
+		dst->Temporary[i] = a->Temporary[i] | b->Temporary[i];
+	}
+
+	for(unsigned int i = 0; i < RC_NUM_SPECIAL_REGISTERS; ++i)
+		dst->Special[i] = a->Special[i] | b->Special[i];
+
+	dst->Address = a->Address | b->Address;
+}
+
+static void push_break(struct deadcode_state *s)
+{
+	struct loopinfo * loop = &s->LoopStack[s->LoopStackSize - 1];
+	memory_pool_array_reserve(&s->C->Pool, struct updatemask_state,
+		loop->Breaks, loop->BreakCount, loop->BreaksReserved, 1);
+
+	memcpy(&loop->Breaks[loop->BreakCount++], &s->R, sizeof(s->R));
+}
+
+static void push_loop(struct deadcode_state * s)
+{
+	memory_pool_array_reserve(&s->C->Pool, struct loopinfo, s->LoopStack,
+			s->LoopStackSize, s->LoopStackReserved, 1);
+	memset(&s->LoopStack[s->LoopStackSize++], 0, sizeof(struct loopinfo));
+}
+
+static void push_branch(struct deadcode_state * s)
+{
+	struct branchinfo * branch;
+
+	memory_pool_array_reserve(&s->C->Pool, struct branchinfo, s->BranchStack,
+			s->BranchStackSize, s->BranchStackReserved, 1);
+
+	branch = &s->BranchStack[s->BranchStackSize++];
+	branch->HaveElse = 0;
+	memcpy(&branch->StoreEndif, &s->R, sizeof(s->R));
+}
+
+static unsigned char * get_used_ptr(struct deadcode_state *s, rc_register_file file, unsigned int index)
+{
+	if (file == RC_FILE_OUTPUT || file == RC_FILE_TEMPORARY) {
+		if (index >= RC_REGISTER_MAX_INDEX) {
+			rc_error(s->C, "%s: index %i is out of bounds for file %i\n", __FUNCTION__, index, file);
+			return 0;
+		}
+
+		if (file == RC_FILE_OUTPUT)
+			return &s->R.Output[index];
+		else
+			return &s->R.Temporary[index];
+	} else if (file == RC_FILE_ADDRESS) {
+		return &s->R.Address;
+	} else if (file == RC_FILE_SPECIAL) {
+		if (index >= RC_NUM_SPECIAL_REGISTERS) {
+			rc_error(s->C, "%s: special file index %i out of bounds\n", __FUNCTION__, index);
+			return 0;
+		}
+
+		return &s->R.Special[index];
+	}
+
+	return 0;
+}
+
+static void mark_used(struct deadcode_state * s, rc_register_file file, unsigned int index, unsigned int mask)
+{
+	unsigned char * pused = get_used_ptr(s, file, index);
+	if (pused)
+		*pused |= mask;
+}
+
+static void update_instruction(struct deadcode_state * s, struct rc_instruction * inst)
+{
+	const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+	struct instruction_state * insts = &s->Instructions[inst->IP];
+	unsigned int usedmask = 0;
+	unsigned int srcmasks[3];
+
+	if (opcode->HasDstReg) {
+		unsigned char * pused = get_used_ptr(s, inst->U.I.DstReg.File, inst->U.I.DstReg.Index);
+		if (pused) {
+			usedmask = *pused & inst->U.I.DstReg.WriteMask;
+			*pused &= ~usedmask;
+		}
+	}
+
+	insts->WriteMask |= usedmask;
+
+	if (inst->U.I.WriteALUResult) {
+		unsigned char * pused = get_used_ptr(s, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT);
+		if (pused && *pused) {
+			if (inst->U.I.WriteALUResult == RC_ALURESULT_X)
+				usedmask |= RC_MASK_X;
+			else if (inst->U.I.WriteALUResult == RC_ALURESULT_W)
+				usedmask |= RC_MASK_W;
+
+			*pused = 0;
+			insts->WriteALUResult = 1;
+		}
+	}
+
+	rc_compute_sources_for_writemask(inst, usedmask, srcmasks);
+
+	for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
+		unsigned int refmask = 0;
+		unsigned int newsrcmask = srcmasks[src] & ~insts->SrcReg[src];
+		insts->SrcReg[src] |= newsrcmask;
+
+		for(unsigned int chan = 0; chan < 4; ++chan) {
+			if (GET_BIT(newsrcmask, chan))
+				refmask |= 1 << GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan);
+		}
+
+		/* get rid of spurious bits from ZERO, ONE, etc. swizzles */
+		refmask &= RC_MASK_XYZW;
+
+		if (!refmask)
+			continue;
+
+		mark_used(s, inst->U.I.SrcReg[src].File, inst->U.I.SrcReg[src].Index, refmask);
+
+		if (inst->U.I.SrcReg[src].RelAddr)
+			mark_used(s, RC_FILE_ADDRESS, 0, RC_MASK_X);
+	}
+}
+
+static void mark_output_use(void * data, unsigned int index, unsigned int mask)
+{
+	struct deadcode_state * s = data;
+
+	mark_used(s, RC_FILE_OUTPUT, index, mask);
+}
+
+void rc_dataflow_deadcode(struct radeon_compiler * c, void *user)
+{
+	struct deadcode_state s;
+	unsigned int nr_instructions;
+	rc_dataflow_mark_outputs_fn dce = (rc_dataflow_mark_outputs_fn)user;
+	unsigned int ip;
+
+	memset(&s, 0, sizeof(s));
+	s.C = c;
+
+	nr_instructions = rc_recompute_ips(c);
+	s.Instructions = memory_pool_malloc(&c->Pool, sizeof(struct instruction_state)*nr_instructions);
+	memset(s.Instructions, 0, sizeof(struct instruction_state)*nr_instructions);
+
+	dce(c, &s, &mark_output_use);
+
+	for(struct rc_instruction * inst = c->Program.Instructions.Prev;
+	    inst != &c->Program.Instructions;
+	    inst = inst->Prev) {
+		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+		switch(opcode->Opcode){
+		/* Mark all sources in the loop body as used before doing
+		 * normal deadcode analysis.  This is probably not optimal.
+		 */
+		case RC_OPCODE_ENDLOOP:
+		{
+			int endloops = 1;
+			struct rc_instruction *ptr;
+			for(ptr = inst->Prev; endloops > 0; ptr = ptr->Prev){
+				opcode = rc_get_opcode_info(ptr->U.I.Opcode);
+				if(ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){
+					endloops--;
+					continue;
+				}
+				if(ptr->U.I.Opcode == RC_OPCODE_ENDLOOP){
+					endloops++;
+					continue;
+				}
+				if(opcode->HasDstReg){
+					int src = 0;
+					unsigned int srcmasks[3];
+					rc_compute_sources_for_writemask(ptr,
+						ptr->U.I.DstReg.WriteMask, srcmasks);
+					for(src=0; src < opcode->NumSrcRegs; src++){
+						mark_used(&s,
+							ptr->U.I.SrcReg[src].File,
+							ptr->U.I.SrcReg[src].Index,
+							srcmasks[src]);
+					}
+				}
+			}
+			push_loop(&s);
+			break;
+		}
+		case RC_OPCODE_BRK:
+			push_break(&s);
+			break;
+		case RC_OPCODE_BGNLOOP:
+		{
+			unsigned int i;
+			struct loopinfo * loop = &s.LoopStack[s.LoopStackSize-1];
+			for(i = 0; i < loop->BreakCount; i++) {
+				or_updatemasks(&s.R, &s.R, &loop->Breaks[i]);
+			}
+			break;
+		}
+		case RC_OPCODE_CONT:
+			break;
+		case RC_OPCODE_ENDIF:
+			push_branch(&s);
+			break;
+		default:
+			if (opcode->IsFlowControl && s.BranchStackSize) {
+				struct branchinfo * branch = &s.BranchStack[s.BranchStackSize-1];
+				if (opcode->Opcode == RC_OPCODE_IF) {
+					or_updatemasks(&s.R,
+							&s.R,
+							branch->HaveElse ? &branch->StoreElse : &branch->StoreEndif);
+
+					s.BranchStackSize--;
+				} else if (opcode->Opcode == RC_OPCODE_ELSE) {
+					if (branch->HaveElse) {
+						rc_error(c, "%s: Multiple ELSE for one IF/ENDIF\n", __FUNCTION__);
+					} else {
+						memcpy(&branch->StoreElse, &s.R, sizeof(s.R));
+						memcpy(&s.R, &branch->StoreEndif, sizeof(s.R));
+						branch->HaveElse = 1;
+					}
+				} else {
+					rc_error(c, "%s: Unhandled control flow instruction %s\n", __FUNCTION__, opcode->Name);
+				}
+			}
+		}
+
+		update_instruction(&s, inst);
+	}
+
+	ip = 0;
+	for(struct rc_instruction * inst = c->Program.Instructions.Next;
+	    inst != &c->Program.Instructions;
+	    inst = inst->Next, ++ip) {
+		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+		int dead = 1;
+		unsigned int srcmasks[3];
+		unsigned int usemask;
+
+		if (!opcode->HasDstReg) {
+			dead = 0;
+		} else {
+			inst->U.I.DstReg.WriteMask = s.Instructions[ip].WriteMask;
+			if (s.Instructions[ip].WriteMask)
+				dead = 0;
+
+			if (s.Instructions[ip].WriteALUResult)
+				dead = 0;
+			else
+				inst->U.I.WriteALUResult = RC_ALURESULT_NONE;
+		}
+
+		if (dead) {
+			struct rc_instruction * todelete = inst;
+			inst = inst->Prev;
+			rc_remove_instruction(todelete);
+			continue;
+		}
+
+		usemask = s.Instructions[ip].WriteMask;
+
+		if (inst->U.I.WriteALUResult == RC_ALURESULT_X)
+			usemask |= RC_MASK_X;
+		else if (inst->U.I.WriteALUResult == RC_ALURESULT_W)
+			usemask |= RC_MASK_W;
+
+		rc_compute_sources_for_writemask(inst, usemask, srcmasks);
+
+		for(unsigned int src = 0; src < 3; ++src) {
+			for(unsigned int chan = 0; chan < 4; ++chan) {
+				if (!GET_BIT(srcmasks[src], chan))
+					SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED);
+			}
+		}
+	}
+
+	rc_calculate_inputs_outputs(c);
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_dataflow_swizzles.c b/src/gallium/drivers/r300/compiler/radeon_dataflow_swizzles.c
new file mode 100644
index 0000000..133a9f7
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_dataflow_swizzles.c
@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_dataflow.h"
+
+#include "radeon_compiler.h"
+#include "radeon_swizzle.h"
+
+
+static void rewrite_source(struct radeon_compiler * c,
+		struct rc_instruction * inst, unsigned src)
+{
+	struct rc_swizzle_split split;
+	unsigned int tempreg = rc_find_free_temporary(c);
+	unsigned int usemask;
+
+	usemask = 0;
+	for(unsigned int chan = 0; chan < 4; ++chan) {
+		if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) != RC_SWIZZLE_UNUSED)
+			usemask |= 1 << chan;
+	}
+
+	c->SwizzleCaps->Split(inst->U.I.SrcReg[src], usemask, &split);
+
+	for(unsigned int phase = 0; phase < split.NumPhases; ++phase) {
+		struct rc_instruction * mov = rc_insert_new_instruction(c, inst->Prev);
+		unsigned int phase_refmask;
+		unsigned int masked_negate;
+
+		mov->U.I.Opcode = RC_OPCODE_MOV;
+		mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+		mov->U.I.DstReg.Index = tempreg;
+		mov->U.I.DstReg.WriteMask = split.Phase[phase];
+		mov->U.I.SrcReg[0] = inst->U.I.SrcReg[src];
+		mov->U.I.PreSub = inst->U.I.PreSub;
+
+		phase_refmask = 0;
+		for(unsigned int chan = 0; chan < 4; ++chan) {
+			if (!GET_BIT(split.Phase[phase], chan))
+				SET_SWZ(mov->U.I.SrcReg[0].Swizzle, chan, RC_SWIZZLE_UNUSED);
+			else
+				phase_refmask |= 1 << GET_SWZ(mov->U.I.SrcReg[0].Swizzle, chan);
+		}
+
+		phase_refmask &= RC_MASK_XYZW;
+
+		masked_negate = split.Phase[phase] & mov->U.I.SrcReg[0].Negate;
+		if (masked_negate == 0)
+			mov->U.I.SrcReg[0].Negate = 0;
+		else if (masked_negate == split.Phase[phase])
+			mov->U.I.SrcReg[0].Negate = RC_MASK_XYZW;
+
+	}
+
+	inst->U.I.SrcReg[src].File = RC_FILE_TEMPORARY;
+	inst->U.I.SrcReg[src].Index = tempreg;
+	inst->U.I.SrcReg[src].Swizzle = 0;
+	inst->U.I.SrcReg[src].Negate = RC_MASK_NONE;
+	inst->U.I.SrcReg[src].Abs = 0;
+	for(unsigned int chan = 0; chan < 4; ++chan) {
+		SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan,
+				GET_BIT(usemask, chan) ? chan : RC_SWIZZLE_UNUSED);
+	}
+}
+
+void rc_dataflow_swizzles(struct radeon_compiler * c, void *user)
+{
+	struct rc_instruction * inst;
+
+	for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
+		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+		unsigned int src;
+
+		for(src = 0; src < opcode->NumSrcRegs; ++src) {
+			if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src]))
+				rewrite_source(c, inst, src);
+		}
+	}
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_emulate_branches.c b/src/gallium/drivers/r300/compiler/radeon_emulate_branches.c
new file mode 100644
index 0000000..7bede34
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_emulate_branches.c
@@ -0,0 +1,342 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "radeon_emulate_branches.h"
+
+#include <stdio.h>
+
+#include "radeon_compiler.h"
+#include "radeon_dataflow.h"
+
+#define VERBOSE 0
+
+#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
+
+
+struct proxy_info {
+	unsigned int Proxied:1;
+	unsigned int Index:RC_REGISTER_INDEX_BITS;
+};
+
+struct register_proxies {
+	struct proxy_info Temporary[RC_REGISTER_MAX_INDEX];
+};
+
+struct branch_info {
+	struct rc_instruction * If;
+	struct rc_instruction * Else;
+};
+
+struct emulate_branch_state {
+	struct radeon_compiler * C;
+
+	struct branch_info * Branches;
+	unsigned int BranchCount;
+	unsigned int BranchReserved;
+};
+
+
+static void handle_if(struct emulate_branch_state * s, struct rc_instruction * inst)
+{
+	struct branch_info * branch;
+	struct rc_instruction * inst_mov;
+
+	memory_pool_array_reserve(&s->C->Pool, struct branch_info,
+			s->Branches, s->BranchCount, s->BranchReserved, 1);
+
+	DBG("%s\n", __FUNCTION__);
+
+	branch = &s->Branches[s->BranchCount++];
+	memset(branch, 0, sizeof(struct branch_info));
+	branch->If = inst;
+
+	/* Make a safety copy of the decision register, because we will need
+	 * it at ENDIF time and it might be overwritten in both branches. */
+	inst_mov = rc_insert_new_instruction(s->C, inst->Prev);
+	inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+	inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+	inst_mov->U.I.DstReg.Index = rc_find_free_temporary(s->C);
+	inst_mov->U.I.DstReg.WriteMask = RC_MASK_X;
+	inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+
+	inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+	inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index;
+	inst->U.I.SrcReg[0].Swizzle = 0;
+	inst->U.I.SrcReg[0].Abs = 0;
+	inst->U.I.SrcReg[0].Negate = 0;
+}
+
+static void handle_else(struct emulate_branch_state * s, struct rc_instruction * inst)
+{
+	struct branch_info * branch;
+
+	if (!s->BranchCount) {
+		rc_error(s->C, "Encountered ELSE outside of branches");
+		return;
+	}
+
+	DBG("%s\n", __FUNCTION__);
+
+	branch = &s->Branches[s->BranchCount - 1];
+	branch->Else = inst;
+}
+
+
+struct state_and_proxies {
+	struct emulate_branch_state * S;
+	struct register_proxies * Proxies;
+};
+
+static struct proxy_info * get_proxy_info(struct state_and_proxies * sap,
+			rc_register_file file, unsigned int index)
+{
+	if (file == RC_FILE_TEMPORARY) {
+		return &sap->Proxies->Temporary[index];
+	} else {
+		return 0;
+	}
+}
+
+static void scan_write(void * userdata, struct rc_instruction * inst,
+		rc_register_file file, unsigned int index, unsigned int comp)
+{
+	struct state_and_proxies * sap = userdata;
+	struct proxy_info * proxy = get_proxy_info(sap, file, index);
+
+	if (proxy && !proxy->Proxied) {
+		proxy->Proxied = 1;
+		proxy->Index = rc_find_free_temporary(sap->S->C);
+	}
+}
+
+static void remap_proxy_function(void * userdata, struct rc_instruction * inst,
+		rc_register_file * pfile, unsigned int * pindex)
+{
+	struct state_and_proxies * sap = userdata;
+	struct proxy_info * proxy = get_proxy_info(sap, *pfile, *pindex);
+
+	if (proxy && proxy->Proxied) {
+		*pfile = RC_FILE_TEMPORARY;
+		*pindex = proxy->Index;
+	}
+}
+
+/**
+ * Redirect all writes in the instruction range [begin, end) to proxy
+ * temporary registers.
+ */
+static void allocate_and_insert_proxies(struct emulate_branch_state * s,
+		struct register_proxies * proxies,
+		struct rc_instruction * begin,
+		struct rc_instruction * end)
+{
+	struct state_and_proxies sap;
+
+	sap.S = s;
+	sap.Proxies = proxies;
+
+	for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) {
+		rc_for_all_writes_mask(inst, scan_write, &sap);
+		rc_remap_registers(inst, remap_proxy_function, &sap);
+	}
+
+	for(unsigned int index = 0; index < RC_REGISTER_MAX_INDEX; ++index) {
+		if (proxies->Temporary[index].Proxied) {
+			struct rc_instruction * inst_mov = rc_insert_new_instruction(s->C, begin->Prev);
+			inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+			inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+			inst_mov->U.I.DstReg.Index = proxies->Temporary[index].Index;
+			inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+			inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+			inst_mov->U.I.SrcReg[0].Index = index;
+		}
+	}
+}
+
+
+static void inject_cmp(struct emulate_branch_state * s,
+		struct rc_instruction * inst_if,
+		struct rc_instruction * inst_endif,
+		rc_register_file file, unsigned int index,
+		struct proxy_info ifproxy,
+		struct proxy_info elseproxy)
+{
+	struct rc_instruction * inst_cmp = rc_insert_new_instruction(s->C, inst_endif);
+	inst_cmp->U.I.Opcode = RC_OPCODE_CMP;
+	inst_cmp->U.I.DstReg.File = file;
+	inst_cmp->U.I.DstReg.Index = index;
+	inst_cmp->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+	inst_cmp->U.I.SrcReg[0] = inst_if->U.I.SrcReg[0];
+	inst_cmp->U.I.SrcReg[0].Abs = 1;
+	inst_cmp->U.I.SrcReg[0].Negate = RC_MASK_XYZW;
+	inst_cmp->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
+	inst_cmp->U.I.SrcReg[1].Index = ifproxy.Proxied ? ifproxy.Index : index;
+	inst_cmp->U.I.SrcReg[2].File = RC_FILE_TEMPORARY;
+	inst_cmp->U.I.SrcReg[2].Index = elseproxy.Proxied ? elseproxy.Index : index;
+}
+
+static void handle_endif(struct emulate_branch_state * s, struct rc_instruction * inst)
+{
+	struct branch_info * branch;
+	struct register_proxies IfProxies;
+	struct register_proxies ElseProxies;
+
+	if (!s->BranchCount) {
+		rc_error(s->C, "Encountered ENDIF outside of branches");
+		return;
+	}
+
+	DBG("%s\n", __FUNCTION__);
+
+	branch = &s->Branches[s->BranchCount - 1];
+
+	memset(&IfProxies, 0, sizeof(IfProxies));
+	memset(&ElseProxies, 0, sizeof(ElseProxies));
+
+	allocate_and_insert_proxies(s, &IfProxies, branch->If->Next, branch->Else ? branch->Else : inst);
+
+	if (branch->Else)
+		allocate_and_insert_proxies(s, &ElseProxies, branch->Else->Next, inst);
+
+	/* Insert the CMP instructions at the end. */
+	for(unsigned int index = 0; index < RC_REGISTER_MAX_INDEX; ++index) {
+		if (IfProxies.Temporary[index].Proxied || ElseProxies.Temporary[index].Proxied) {
+			inject_cmp(s, branch->If, inst, RC_FILE_TEMPORARY, index,
+					IfProxies.Temporary[index], ElseProxies.Temporary[index]);
+		}
+	}
+
+	/* Remove all traces of the branch instructions */
+	rc_remove_instruction(branch->If);
+	if (branch->Else)
+		rc_remove_instruction(branch->Else);
+	rc_remove_instruction(inst);
+
+	s->BranchCount--;
+
+	if (VERBOSE) {
+		DBG("Program after ENDIF handling:\n");
+		rc_print_program(&s->C->Program);
+	}
+}
+
+
+struct remap_output_data {
+	unsigned int Output:RC_REGISTER_INDEX_BITS;
+	unsigned int Temporary:RC_REGISTER_INDEX_BITS;
+};
+
+static void remap_output_function(void * userdata, struct rc_instruction * inst,
+		rc_register_file * pfile, unsigned int * pindex)
+{
+	struct remap_output_data * data = userdata;
+
+	if (*pfile == RC_FILE_OUTPUT && *pindex == data->Output) {
+		*pfile = RC_FILE_TEMPORARY;
+		*pindex = data->Temporary;
+	}
+}
+
+
+/**
+ * Output registers cannot be read from and so cannot be dealt with like
+ * temporary registers.
+ *
+ * We do the simplest thing: If an output registers is written within
+ * a branch, then *all* writes to this register are proxied to a
+ * temporary register, and a final MOV is appended to the end of
+ * the program.
+ */
+static void fix_output_writes(struct emulate_branch_state * s, struct rc_instruction * inst)
+{
+	const struct rc_opcode_info * opcode;
+
+	if (!s->BranchCount)
+		return;
+
+	opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+	if (!opcode->HasDstReg)
+		return;
+
+	if (inst->U.I.DstReg.File == RC_FILE_OUTPUT) {
+		struct remap_output_data remap;
+		struct rc_instruction * inst_mov;
+
+		remap.Output = inst->U.I.DstReg.Index;
+		remap.Temporary = rc_find_free_temporary(s->C);
+
+		for(struct rc_instruction * inst = s->C->Program.Instructions.Next;
+		    inst != &s->C->Program.Instructions;
+		    inst = inst->Next) {
+			rc_remap_registers(inst, &remap_output_function, &remap);
+		}
+
+		inst_mov = rc_insert_new_instruction(s->C, s->C->Program.Instructions.Prev);
+		inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+		inst_mov->U.I.DstReg.File = RC_FILE_OUTPUT;
+		inst_mov->U.I.DstReg.Index = remap.Output;
+		inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+		inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+		inst_mov->U.I.SrcReg[0].Index = remap.Temporary;
+	}
+}
+
+/**
+ * Remove branch instructions; instead, execute both branches
+ * on different register sets and choose between their results
+ * using CMP instructions in place of the original ENDIF.
+ */
+void rc_emulate_branches(struct radeon_compiler *c, void *user)
+{
+	struct emulate_branch_state s;
+	struct rc_instruction * ptr;
+
+	memset(&s, 0, sizeof(s));
+	s.C = c;
+
+	/* Untypical loop because we may remove the current instruction */
+	ptr = c->Program.Instructions.Next;
+	while(ptr != &c->Program.Instructions) {
+		struct rc_instruction * inst = ptr;
+		ptr = ptr->Next;
+
+		if (inst->Type == RC_INSTRUCTION_NORMAL) {
+			switch(inst->U.I.Opcode) {
+			case RC_OPCODE_IF:
+				handle_if(&s, inst);
+				break;
+			case RC_OPCODE_ELSE:
+				handle_else(&s, inst);
+				break;
+			case RC_OPCODE_ENDIF:
+				handle_endif(&s, inst);
+				break;
+			default:
+				fix_output_writes(&s, inst);
+				break;
+			}
+		} else {
+			rc_error(c, "%s: unhandled instruction type\n", __FUNCTION__);
+		}
+	}
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_emulate_branches.h b/src/gallium/drivers/r300/compiler/radeon_emulate_branches.h
new file mode 100644
index 0000000..818ab84
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_emulate_branches.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef RADEON_EMULATE_BRANCHES_H
+#define RADEON_EMULATE_BRANCHES_H
+
+struct radeon_compiler;
+
+void rc_emulate_branches(struct radeon_compiler *c, void *user);
+
+#endif /* RADEON_EMULATE_BRANCHES_H */
diff --git a/src/gallium/drivers/r300/compiler/radeon_emulate_loops.c b/src/gallium/drivers/r300/compiler/radeon_emulate_loops.c
new file mode 100644
index 0000000..205eecd
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_emulate_loops.c
@@ -0,0 +1,522 @@
+/*
+ * Copyright 2010 Tom Stellard <tstellar@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * \file
+ */
+
+#include "radeon_emulate_loops.h"
+
+#include "radeon_compiler.h"
+#include "radeon_dataflow.h"
+
+#define VERBOSE 0
+
+#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
+
+struct const_value {
+	struct radeon_compiler * C;
+	struct rc_src_register * Src;
+	float Value;
+	int HasValue;
+};
+
+struct count_inst {
+	struct radeon_compiler * C;
+	int Index;
+	rc_swizzle Swz;
+	float Amount;
+	int Unknown;
+};
+
+static float get_constant_value(struct radeon_compiler * c,
+						struct rc_src_register * src,
+						int chan)
+{
+	float base = 1.0f;
+	int swz = GET_SWZ(src->Swizzle, chan);
+	if(swz >= 4 || src->Index >= c->Program.Constants.Count ){
+		rc_error(c, "get_constant_value: Can't find a value.\n");
+		return 0.0f;
+	}
+	if(GET_BIT(src->Negate, chan)){
+		base = -1.0f;
+	}
+	return base *
+		c->Program.Constants.Constants[src->Index].u.Immediate[swz];
+}
+
+static int src_reg_is_immediate(struct rc_src_register * src,
+						struct radeon_compiler * c)
+{
+	return src->File == RC_FILE_CONSTANT &&
+	c->Program.Constants.Constants[src->Index].Type==RC_CONSTANT_IMMEDIATE;
+}
+
+static unsigned int loop_max_possible_iterations(struct radeon_compiler *c,
+			struct loop_info * loop)
+{
+	unsigned int total_i = rc_recompute_ips(c);
+	unsigned int loop_i = (loop->EndLoop->IP - loop->BeginLoop->IP) - 1;
+	/* +1 because the program already has one iteration of the loop. */
+	return 1 + ((c->max_alu_insts - total_i) / loop_i);
+}
+
+static void unroll_loop(struct radeon_compiler * c, struct loop_info * loop,
+						unsigned int iterations)
+{
+	unsigned int i;
+	struct rc_instruction * ptr;
+	struct rc_instruction * first = loop->BeginLoop->Next;
+	struct rc_instruction * last = loop->EndLoop->Prev;
+	struct rc_instruction * append_to = last;
+	rc_remove_instruction(loop->BeginLoop);
+	rc_remove_instruction(loop->EndLoop);
+	for( i = 1; i < iterations; i++){
+		for(ptr = first; ptr != last->Next; ptr = ptr->Next){
+			struct rc_instruction *new = rc_alloc_instruction(c);
+			memcpy(new, ptr, sizeof(struct rc_instruction));
+			rc_insert_instruction(append_to, new);
+			append_to = new;
+		}
+	}
+}
+
+
+static void update_const_value(void * data, struct rc_instruction * inst,
+		rc_register_file file, unsigned int index, unsigned int mask)
+{
+	struct const_value * value = data;
+	if(value->Src->File != file ||
+	   value->Src->Index != index ||
+	   !(1 << GET_SWZ(value->Src->Swizzle, 0) & mask)){
+		return;
+	}
+	switch(inst->U.I.Opcode){
+	case RC_OPCODE_MOV:
+		if(!src_reg_is_immediate(&inst->U.I.SrcReg[0], value->C)){
+			return;
+		}
+		value->HasValue = 1;
+		value->Value =
+			get_constant_value(value->C, &inst->U.I.SrcReg[0], 0);
+		break;
+	}
+}
+
+static void get_incr_amount(void * data, struct rc_instruction * inst,
+		rc_register_file file, unsigned int index, unsigned int mask)
+{
+	struct count_inst * count_inst = data;
+	int amnt_src_index;
+	const struct rc_opcode_info * opcode;
+	float amount;
+
+	if(file != RC_FILE_TEMPORARY ||
+	   count_inst->Index != index ||
+	   (1 << GET_SWZ(count_inst->Swz,0) != mask)){
+		return;
+	}
+	/* Find the index of the counter register. */
+	opcode = rc_get_opcode_info(inst->U.I.Opcode);
+	if(opcode->NumSrcRegs != 2){
+		count_inst->Unknown = 1;
+		return;
+	}
+	if(inst->U.I.SrcReg[0].File == RC_FILE_TEMPORARY &&
+	   inst->U.I.SrcReg[0].Index == count_inst->Index &&
+	   inst->U.I.SrcReg[0].Swizzle == count_inst->Swz){
+		amnt_src_index = 1;
+	} else if( inst->U.I.SrcReg[1].File == RC_FILE_TEMPORARY &&
+		   inst->U.I.SrcReg[1].Index == count_inst->Index &&
+		   inst->U.I.SrcReg[1].Swizzle == count_inst->Swz){
+		amnt_src_index = 0;
+	}
+	else{
+		count_inst->Unknown = 1;
+		return;
+	}
+	if(src_reg_is_immediate(&inst->U.I.SrcReg[amnt_src_index],
+							count_inst->C)){
+		amount = get_constant_value(count_inst->C,
+				&inst->U.I.SrcReg[amnt_src_index], 0);
+	}
+	else{
+		count_inst->Unknown = 1 ;
+		return;
+	}
+	switch(inst->U.I.Opcode){
+	case RC_OPCODE_ADD:
+		count_inst->Amount += amount;
+		break;
+	case RC_OPCODE_SUB:
+		if(amnt_src_index == 0){
+			count_inst->Unknown = 0;
+			return;
+		}
+		count_inst->Amount -= amount;
+		break;
+	default:
+		count_inst->Unknown = 1;
+		return;
+	}
+}
+
+/**
+ * If c->max_alu_inst is -1, then all eligible loops will be unrolled regardless
+ * of how many iterations they have.
+ */
+static int try_unroll_loop(struct radeon_compiler * c, struct loop_info * loop)
+{
+	int end_loops;
+	int iterations;
+	struct count_inst count_inst;
+	float limit_value;
+	struct rc_src_register * counter;
+	struct rc_src_register * limit;
+	struct const_value counter_value;
+	struct rc_instruction * inst;
+
+	/* Find the counter and the upper limit */
+
+	if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[0], c)){
+		limit = &loop->Cond->U.I.SrcReg[0];
+		counter = &loop->Cond->U.I.SrcReg[1];
+	}
+	else if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[1], c)){
+		limit = &loop->Cond->U.I.SrcReg[1];
+		counter = &loop->Cond->U.I.SrcReg[0];
+	}
+	else{
+		DBG("No constant limit.\n");
+		return 0;
+	}
+
+	/* Find the initial value of the counter */
+	counter_value.Src = counter;
+	counter_value.Value = 0.0f;
+	counter_value.HasValue = 0;
+	counter_value.C = c;
+	for(inst = c->Program.Instructions.Next; inst != loop->BeginLoop;
+							inst = inst->Next){
+		rc_for_all_writes_mask(inst, update_const_value, &counter_value);
+	}
+	if(!counter_value.HasValue){
+		DBG("Initial counter value cannot be determined.\n");
+		return 0;
+	}
+	DBG("Initial counter value is %f\n", counter_value.Value);
+	/* Determine how the counter is modified each loop */
+	count_inst.C = c;
+	count_inst.Index = counter->Index;
+	count_inst.Swz = counter->Swizzle;
+	count_inst.Amount = 0.0f;
+	count_inst.Unknown = 0;
+	end_loops = 1;
+	for(inst = loop->BeginLoop->Next; end_loops > 0; inst = inst->Next){
+		switch(inst->U.I.Opcode){
+		/* XXX In the future we might want to try to unroll nested
+		 * loops here.*/
+		case RC_OPCODE_BGNLOOP:
+			end_loops++;
+			break;
+		case RC_OPCODE_ENDLOOP:
+			loop->EndLoop = inst;
+			end_loops--;
+			break;
+		case RC_OPCODE_BRK:
+			/* Don't unroll loops if it has a BRK instruction
+			 * other one used when testing the main conditional
+			 * of the loop. */
+
+			/* Make sure we haven't entered a nested loops. */
+			if(inst != loop->Brk && end_loops == 1) {
+				return 0;
+			}
+			break;
+		/* XXX Check if the counter is modified within an if statement.
+		 */
+		case RC_OPCODE_IF:
+			break;
+		default:
+			rc_for_all_writes_mask(inst, get_incr_amount, &count_inst);
+			if(count_inst.Unknown){
+				return 0;
+			}
+			break;
+		}
+	}
+	/* Infinite loop */
+	if(count_inst.Amount == 0.0f){
+		return 0;
+	}
+	DBG("Counter is increased by %f each iteration.\n", count_inst.Amount);
+	/* Calculate the number of iterations of this loop.  Keeping this
+	 * simple, since we only support increment and decrement loops.
+	 */
+	limit_value = get_constant_value(c, limit, 0);
+	DBG("Limit is %f.\n", limit_value);
+	/* The iteration calculations are opposite of what you would expect.
+	 * In a normal loop, if the condition is met, then loop continues, but
+	 * with our loops, if the condition is met, the is exited. */
+	switch(loop->Cond->U.I.Opcode){
+	case RC_OPCODE_SGE:
+	case RC_OPCODE_SLE:
+		iterations = (int) ceilf((limit_value - counter_value.Value) /
+							count_inst.Amount);
+		break;
+
+	case RC_OPCODE_SGT:
+	case RC_OPCODE_SLT:
+		iterations = (int) floorf((limit_value - counter_value.Value) /
+							count_inst.Amount) + 1;
+		break;
+	default:
+		return 0;
+	}
+
+	if (c->max_alu_insts > 0
+		&& iterations > loop_max_possible_iterations(c, loop)) {
+		return 0;
+	}
+
+	DBG("Loop will have %d iterations.\n", iterations);
+
+	/* Prepare loop for unrolling */
+	rc_remove_instruction(loop->Cond);
+	rc_remove_instruction(loop->If);
+	rc_remove_instruction(loop->Brk);
+	rc_remove_instruction(loop->EndIf);
+
+	unroll_loop(c, loop, iterations);
+	loop->EndLoop = NULL;
+	return 1;
+}
+
+/**
+ * @param c
+ * @param loop
+ * @param inst A pointer to a BGNLOOP instruction.
+ * @return 1 if all of the members of loop where set.
+ * @return 0 if there was an error and some members of loop are still NULL.
+ */
+static int build_loop_info(struct radeon_compiler * c, struct loop_info * loop,
+						struct rc_instruction * inst)
+{
+	struct rc_instruction * ptr;
+
+	if(inst->U.I.Opcode != RC_OPCODE_BGNLOOP){
+		rc_error(c, "%s: expected BGNLOOP", __FUNCTION__);
+		return 0;
+	}
+
+	memset(loop, 0, sizeof(struct loop_info));
+
+	loop->BeginLoop = inst;
+
+	for(ptr = loop->BeginLoop->Next; !loop->EndLoop; ptr = ptr->Next) {
+
+		if (ptr == &c->Program.Instructions) {
+			rc_error(c, "%s: BGNLOOP without an ENDLOOOP.\n",
+								__FUNCTION__);
+			return 0;
+		}
+
+		switch(ptr->U.I.Opcode){
+		case RC_OPCODE_BGNLOOP:
+		{
+			/* Nested loop, skip ahead to the end. */
+			unsigned int loop_depth = 1;
+			for(ptr = ptr->Next; ptr != &c->Program.Instructions;
+							ptr = ptr->Next){
+				if (ptr->U.I.Opcode == RC_OPCODE_BGNLOOP) {
+					loop_depth++;
+				} else if (ptr->U.I.Opcode == RC_OPCODE_ENDLOOP) {
+					if (!--loop_depth) {
+						break;
+					}
+				}
+			}
+			if (ptr == &c->Program.Instructions) {
+				rc_error(c, "%s: BGNLOOP without an ENDLOOOP\n",
+								__FUNCTION__);
+					return 0;
+			}
+			break;
+		}
+		case RC_OPCODE_BRK:
+			if(ptr->Next->U.I.Opcode != RC_OPCODE_ENDIF
+					|| ptr->Prev->U.I.Opcode != RC_OPCODE_IF
+					|| loop->Brk){
+				continue;
+			}
+			loop->Brk = ptr;
+			loop->If = ptr->Prev;
+			loop->EndIf = ptr->Next;
+			switch(loop->If->Prev->U.I.Opcode){
+			case RC_OPCODE_SLT:
+			case RC_OPCODE_SGE:
+			case RC_OPCODE_SGT:
+			case RC_OPCODE_SLE:
+			case RC_OPCODE_SEQ:
+			case RC_OPCODE_SNE:
+				break;
+			default:
+				return 0;
+			}
+			loop->Cond = loop->If->Prev;
+			break;
+
+		case RC_OPCODE_ENDLOOP:
+			loop->EndLoop = ptr;
+			break;
+		}
+	}
+
+	if (loop->BeginLoop && loop->Brk && loop->If && loop->EndIf
+					&& loop->Cond && loop->EndLoop) {
+		return 1;
+	}
+	return 0;
+}
+
+/**
+ * This function prepares a loop to be unrolled by converting it into an if
+ * statement.  Here is an outline of the conversion process:
+ * BGNLOOP;                         	-> BGNLOOP;
+ * <Additional conditional code>	-> <Additional conditional code>
+ * SGE/SLT temp[0], temp[1], temp[2];	-> SLT/SGE temp[0], temp[1], temp[2];
+ * IF temp[0];                      	-> IF temp[0];
+ * BRK;                             	->
+ * ENDIF;                           	-> <Loop Body>
+ * <Loop Body>                      	-> ENDIF;
+ * ENDLOOP;                         	-> ENDLOOP
+ *
+ * @param inst A pointer to a BGNLOOP instruction.
+ * @return 1 for success, 0 for failure
+ */
+static int transform_loop(struct emulate_loop_state * s,
+						struct rc_instruction * inst)
+{
+	struct loop_info * loop;
+
+	memory_pool_array_reserve(&s->C->Pool, struct loop_info,
+			s->Loops, s->LoopCount, s->LoopReserved, 1);
+
+	loop = &s->Loops[s->LoopCount++];
+
+	if (!build_loop_info(s->C, loop, inst)) {
+		rc_error(s->C, "Failed to build loop info\n");
+		return 0;
+	}
+
+	if(try_unroll_loop(s->C, loop)){
+		return 1;
+	}
+
+	/* Reverse the conditional instruction */
+	switch(loop->Cond->U.I.Opcode){
+	case RC_OPCODE_SGE:
+		loop->Cond->U.I.Opcode = RC_OPCODE_SLT;
+		break;
+	case RC_OPCODE_SLT:
+		loop->Cond->U.I.Opcode = RC_OPCODE_SGE;
+		break;
+	case RC_OPCODE_SLE:
+		loop->Cond->U.I.Opcode = RC_OPCODE_SGT;
+		break;
+	case RC_OPCODE_SGT:
+		loop->Cond->U.I.Opcode = RC_OPCODE_SLE;
+		break;
+	case RC_OPCODE_SEQ:
+		loop->Cond->U.I.Opcode = RC_OPCODE_SNE;
+		break;
+	case RC_OPCODE_SNE:
+		loop->Cond->U.I.Opcode = RC_OPCODE_SEQ;
+		break;
+	default:
+		rc_error(s->C, "loop->Cond is not a conditional.\n");
+		return 0;
+	}
+
+	/* Prepare the loop to be emulated */
+	rc_remove_instruction(loop->Brk);
+	rc_remove_instruction(loop->EndIf);
+	rc_insert_instruction(loop->EndLoop->Prev, loop->EndIf);
+	return 1;
+}
+
+void rc_transform_loops(struct radeon_compiler *c, void *user)
+{
+	struct emulate_loop_state * s = &c->loop_state;
+	struct rc_instruction * ptr;
+
+	memset(s, 0, sizeof(struct emulate_loop_state));
+	s->C = c;
+	for(ptr = s->C->Program.Instructions.Next;
+			ptr != &s->C->Program.Instructions; ptr = ptr->Next) {
+		if(ptr->Type == RC_INSTRUCTION_NORMAL &&
+					ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){
+			if (!transform_loop(s, ptr))
+				return;
+		}
+	}
+}
+
+void rc_unroll_loops(struct radeon_compiler *c, void *user)
+{
+	struct rc_instruction * inst;
+	struct loop_info loop;
+
+	for(inst = c->Program.Instructions.Next;
+			inst != &c->Program.Instructions; inst = inst->Next) {
+
+		if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP) {
+			if (build_loop_info(c, &loop, inst)) {
+				try_unroll_loop(c, &loop);
+			}
+		}
+	}
+}
+
+void rc_emulate_loops(struct radeon_compiler *c, void *user)
+{
+	struct emulate_loop_state * s = &c->loop_state;
+	int i;
+	/* Iterate backwards of the list of loops so that loops that nested
+	 * loops are unrolled first.
+	 */
+	for( i = s->LoopCount - 1; i >= 0; i-- ){
+		unsigned int iterations;
+
+		if(!s->Loops[i].EndLoop){
+			continue;
+		}
+		iterations = loop_max_possible_iterations(s->C, &s->Loops[i]);
+		unroll_loop(s->C, &s->Loops[i], iterations);
+	}
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_emulate_loops.h b/src/gallium/drivers/r300/compiler/radeon_emulate_loops.h
new file mode 100644
index 0000000..cd800c0
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_emulate_loops.h
@@ -0,0 +1,32 @@
+
+
+#ifndef RADEON_EMULATE_LOOPS_H
+#define RADEON_EMULATE_LOOPS_H
+
+#define MAX_ITERATIONS 8
+
+struct radeon_compiler;
+
+struct loop_info {
+	struct rc_instruction * BeginLoop;
+	struct rc_instruction * Cond;
+	struct rc_instruction * If;
+	struct rc_instruction * Brk;
+	struct rc_instruction * EndIf;
+	struct rc_instruction * EndLoop;
+};
+
+struct emulate_loop_state {
+	struct radeon_compiler * C;
+	struct loop_info * Loops;
+	unsigned int LoopCount;
+	unsigned int LoopReserved;
+};
+
+void rc_transform_loops(struct radeon_compiler *c, void *user);
+
+void rc_unroll_loops(struct radeon_compiler * c, void *user);
+
+void rc_emulate_loops(struct radeon_compiler * c, void *user);
+
+#endif /* RADEON_EMULATE_LOOPS_H */
diff --git a/src/gallium/drivers/r300/compiler/radeon_list.c b/src/gallium/drivers/r300/compiler/radeon_list.c
new file mode 100644
index 0000000..811c908
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_list.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright 2011 Tom Stellard <tstellar@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_list.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "memory_pool.h"
+
+struct rc_list * rc_list(struct memory_pool * pool, void * item)
+{
+	struct rc_list * new = memory_pool_malloc(pool, sizeof(struct rc_list));
+	new->Item = item;
+	new->Next = NULL;
+	new->Prev = NULL;
+
+	return new;
+}
+
+void rc_list_add(struct rc_list ** list, struct rc_list * new_value)
+{
+	struct rc_list * temp;
+
+	if (*list == NULL) {
+		*list = new_value;
+		return;
+	}
+
+	for (temp = *list; temp->Next; temp = temp->Next);
+
+	temp->Next = new_value;
+	new_value->Prev = temp;
+}
+
+void rc_list_remove(struct rc_list ** list, struct rc_list * rm_value)
+{
+	if (*list == rm_value) {
+		*list = rm_value->Next;
+		return;
+	}
+
+	rm_value->Prev->Next = rm_value->Next;
+	if (rm_value->Next) {
+		rm_value->Next->Prev = rm_value->Prev;
+	}
+}
+
+unsigned int rc_list_count(struct rc_list * list)
+{
+	unsigned int count = 0;
+	while (list) {
+		count++;
+		list = list->Next;
+	}
+	return count;
+}
+
+void rc_list_print(struct rc_list * list)
+{
+	while(list) {
+		fprintf(stderr, "%p->", list->Item);
+		list = list->Next;
+	}
+	fprintf(stderr, "\n");
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_list.h b/src/gallium/drivers/r300/compiler/radeon_list.h
new file mode 100644
index 0000000..b3c8f89
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_list.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright 2011 Tom Stellard <tstellar@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_LIST_H
+#define RADEON_LIST_H
+
+struct memory_pool;
+
+struct rc_list {
+	void * Item;
+	struct rc_list * Prev;
+	struct rc_list * Next;
+};
+
+struct rc_list * rc_list(struct memory_pool * pool, void * item);
+void rc_list_add(struct rc_list ** list, struct rc_list * new_value);
+void rc_list_remove(struct rc_list ** list, struct rc_list * rm_value);
+unsigned int rc_list_count(struct rc_list * list);
+void rc_list_print(struct rc_list * list);
+
+#endif /* RADEON_LIST_H */
+
diff --git a/src/gallium/drivers/r300/compiler/radeon_opcodes.c b/src/gallium/drivers/r300/compiler/radeon_opcodes.c
new file mode 100644
index 0000000..afd78ad
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_opcodes.c
@@ -0,0 +1,546 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_opcodes.h"
+#include "radeon_program.h"
+
+#include "radeon_program_constants.h"
+
+struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
+	{
+		.Opcode = RC_OPCODE_NOP,
+		.Name = "NOP"
+	},
+	{
+		.Opcode = RC_OPCODE_ILLEGAL_OPCODE,
+		.Name = "ILLEGAL OPCODE"
+	},
+	{
+		.Opcode = RC_OPCODE_ABS,
+		.Name = "ABS",
+		.NumSrcRegs = 1,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_ADD,
+		.Name = "ADD",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_ARL,
+		.Name = "ARL",
+		.NumSrcRegs = 1,
+		.HasDstReg = 1
+	},
+	{
+		.Opcode = RC_OPCODE_CEIL,
+		.Name = "CEIL",
+		.NumSrcRegs = 1,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_CLAMP,
+		.Name = "CLAMP",
+		.NumSrcRegs = 3,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_CMP,
+		.Name = "CMP",
+		.NumSrcRegs = 3,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_CND,
+		.Name = "CND",
+		.NumSrcRegs = 3,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_COS,
+		.Name = "COS",
+		.NumSrcRegs = 1,
+		.HasDstReg = 1,
+		.IsStandardScalar = 1
+	},
+	{
+		.Opcode = RC_OPCODE_DDX,
+		.Name = "DDX",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_DDY,
+		.Name = "DDY",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_DP2,
+		.Name = "DP2",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1
+	},
+	{
+		.Opcode = RC_OPCODE_DP3,
+		.Name = "DP3",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1
+	},
+	{
+		.Opcode = RC_OPCODE_DP4,
+		.Name = "DP4",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1
+	},
+	{
+		.Opcode = RC_OPCODE_DPH,
+		.Name = "DPH",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1
+	},
+	{
+		.Opcode = RC_OPCODE_DST,
+		.Name = "DST",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1
+	},
+	{
+		.Opcode = RC_OPCODE_EX2,
+		.Name = "EX2",
+		.NumSrcRegs = 1,
+		.HasDstReg = 1,
+		.IsStandardScalar = 1
+	},
+	{
+		.Opcode = RC_OPCODE_EXP,
+		.Name = "EXP",
+		.NumSrcRegs = 1,
+		.HasDstReg = 1
+	},
+	{
+		.Opcode = RC_OPCODE_FLR,
+		.Name = "FLR",
+		.NumSrcRegs = 1,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_FRC,
+		.Name = "FRC",
+		.NumSrcRegs = 1,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_KIL,
+		.Name = "KIL",
+		.NumSrcRegs = 1
+	},
+	{
+		.Opcode = RC_OPCODE_LG2,
+		.Name = "LG2",
+		.NumSrcRegs = 1,
+		.HasDstReg = 1,
+		.IsStandardScalar = 1
+	},
+	{
+		.Opcode = RC_OPCODE_LIT,
+		.Name = "LIT",
+		.NumSrcRegs = 1,
+		.HasDstReg = 1
+	},
+	{
+		.Opcode = RC_OPCODE_LOG,
+		.Name = "LOG",
+		.NumSrcRegs = 1,
+		.HasDstReg = 1
+	},
+	{
+		.Opcode = RC_OPCODE_LRP,
+		.Name = "LRP",
+		.NumSrcRegs = 3,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_MAD,
+		.Name = "MAD",
+		.NumSrcRegs = 3,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_MAX,
+		.Name = "MAX",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_MIN,
+		.Name = "MIN",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_MOV,
+		.Name = "MOV",
+		.NumSrcRegs = 1,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_MUL,
+		.Name = "MUL",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_POW,
+		.Name = "POW",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1,
+		.IsStandardScalar = 1
+	},
+	{
+		.Opcode = RC_OPCODE_RCP,
+		.Name = "RCP",
+		.NumSrcRegs = 1,
+		.HasDstReg = 1,
+		.IsStandardScalar = 1
+	},
+	{
+		.Opcode = RC_OPCODE_RSQ,
+		.Name = "RSQ",
+		.NumSrcRegs = 1,
+		.HasDstReg = 1,
+		.IsStandardScalar = 1
+	},
+	{
+		.Opcode = RC_OPCODE_SCS,
+		.Name = "SCS",
+		.NumSrcRegs = 1,
+		.HasDstReg = 1
+	},
+	{
+		.Opcode = RC_OPCODE_SEQ,
+		.Name = "SEQ",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_SFL,
+		.Name = "SFL",
+		.NumSrcRegs = 0,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_SGE,
+		.Name = "SGE",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_SGT,
+		.Name = "SGT",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_SIN,
+		.Name = "SIN",
+		.NumSrcRegs = 1,
+		.HasDstReg = 1,
+		.IsStandardScalar = 1
+	},
+	{
+		.Opcode = RC_OPCODE_SLE,
+		.Name = "SLE",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_SLT,
+		.Name = "SLT",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_SNE,
+		.Name = "SNE",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_SSG,
+		.Name = "SSG",
+		.NumSrcRegs = 1,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_SUB,
+		.Name = "SUB",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_SWZ,
+		.Name = "SWZ",
+		.NumSrcRegs = 1,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_XPD,
+		.Name = "XPD",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1
+	},
+	{
+		.Opcode = RC_OPCODE_TEX,
+		.Name = "TEX",
+		.HasTexture = 1,
+		.NumSrcRegs = 1,
+		.HasDstReg = 1
+	},
+	{
+		.Opcode = RC_OPCODE_TXB,
+		.Name = "TXB",
+		.HasTexture = 1,
+		.NumSrcRegs = 1,
+		.HasDstReg = 1
+	},
+	{
+		.Opcode = RC_OPCODE_TXD,
+		.Name = "TXD",
+		.HasTexture = 1,
+		.NumSrcRegs = 3,
+		.HasDstReg = 1
+	},
+	{
+		.Opcode = RC_OPCODE_TXL,
+		.Name = "TXL",
+		.HasTexture = 1,
+		.NumSrcRegs = 1,
+		.HasDstReg = 1
+	},
+	{
+		.Opcode = RC_OPCODE_TXP,
+		.Name = "TXP",
+		.HasTexture = 1,
+		.NumSrcRegs = 1,
+		.HasDstReg = 1
+	},
+	{
+		.Opcode = RC_OPCODE_IF,
+		.Name = "IF",
+		.IsFlowControl = 1,
+		.NumSrcRegs = 1
+	},
+	{
+		.Opcode = RC_OPCODE_ELSE,
+		.Name = "ELSE",
+		.IsFlowControl = 1,
+		.NumSrcRegs = 0
+	},
+	{
+		.Opcode = RC_OPCODE_ENDIF,
+		.Name = "ENDIF",
+		.IsFlowControl = 1,
+		.NumSrcRegs = 0
+	},
+	{
+		.Opcode = RC_OPCODE_BGNLOOP,
+		.Name = "BGNLOOP",
+		.IsFlowControl = 1,
+		.NumSrcRegs = 0
+	},
+	{
+		.Opcode = RC_OPCODE_BRK,
+		.Name = "BRK",
+		.IsFlowControl = 1,
+		.NumSrcRegs = 0
+	},
+	{
+		.Opcode = RC_OPCODE_ENDLOOP,
+		.Name = "ENDLOOP",
+		.IsFlowControl = 1,
+		.NumSrcRegs = 0,
+	},
+	{
+		.Opcode = RC_OPCODE_CONT,
+		.Name = "CONT",
+		.IsFlowControl = 1,
+		.NumSrcRegs = 0
+	},
+	{
+		.Opcode = RC_OPCODE_REPL_ALPHA,
+		.Name = "REPL_ALPHA",
+		.HasDstReg = 1
+	},
+	{
+		.Opcode = RC_OPCODE_BEGIN_TEX,
+		.Name = "BEGIN_TEX"
+	},
+	{
+		.Opcode = RC_OPCODE_KILP,
+		.Name = "KILP",
+	}
+};
+
+void rc_compute_sources_for_writemask(
+		const struct rc_instruction *inst,
+		unsigned int writemask,
+		unsigned int *srcmasks)
+{
+	const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+	srcmasks[0] = 0;
+	srcmasks[1] = 0;
+	srcmasks[2] = 0;
+
+	if (opcode->Opcode == RC_OPCODE_KIL)
+		srcmasks[0] |= RC_MASK_XYZW;
+	else if (opcode->Opcode == RC_OPCODE_IF)
+		srcmasks[0] |= RC_MASK_X;
+
+	if (!writemask)
+		return;
+
+	if (opcode->IsComponentwise) {
+		for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src)
+			srcmasks[src] |= writemask;
+	} else if (opcode->IsStandardScalar) {
+		for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src)
+			srcmasks[src] |= RC_MASK_X;
+	} else {
+		switch(opcode->Opcode) {
+		case RC_OPCODE_ARL:
+			srcmasks[0] |= RC_MASK_X;
+			break;
+		case RC_OPCODE_DP2:
+			srcmasks[0] |= RC_MASK_XY;
+			srcmasks[1] |= RC_MASK_XY;
+			break;
+		case RC_OPCODE_DP3:
+		case RC_OPCODE_XPD:
+			srcmasks[0] |= RC_MASK_XYZ;
+			srcmasks[1] |= RC_MASK_XYZ;
+			break;
+		case RC_OPCODE_DP4:
+			srcmasks[0] |= RC_MASK_XYZW;
+			srcmasks[1] |= RC_MASK_XYZW;
+			break;
+		case RC_OPCODE_DPH:
+			srcmasks[0] |= RC_MASK_XYZ;
+			srcmasks[1] |= RC_MASK_XYZW;
+			break;
+		case RC_OPCODE_TXB:
+		case RC_OPCODE_TXP:
+		case RC_OPCODE_TXL:
+			srcmasks[0] |= RC_MASK_W;
+			/* Fall through */
+		case RC_OPCODE_TEX:
+			switch (inst->U.I.TexSrcTarget) {
+				case RC_TEXTURE_1D:
+					srcmasks[0] |= RC_MASK_X;
+					break;
+				case RC_TEXTURE_2D:
+				case RC_TEXTURE_RECT:
+				case RC_TEXTURE_1D_ARRAY:
+					srcmasks[0] |= RC_MASK_XY;
+					break;
+				case RC_TEXTURE_3D:
+				case RC_TEXTURE_CUBE:
+				case RC_TEXTURE_2D_ARRAY:
+					srcmasks[0] |= RC_MASK_XYZ;
+					break;
+			}
+			break;
+		case RC_OPCODE_TXD:
+			switch (inst->U.I.TexSrcTarget) {
+				case RC_TEXTURE_1D_ARRAY:
+					srcmasks[0] |= RC_MASK_Y;
+					/* Fall through. */
+				case RC_TEXTURE_1D:
+					srcmasks[0] |= RC_MASK_X;
+					srcmasks[1] |= RC_MASK_X;
+					srcmasks[2] |= RC_MASK_X;
+					break;
+				case RC_TEXTURE_2D_ARRAY:
+					srcmasks[0] |= RC_MASK_Z;
+					/* Fall through. */
+				case RC_TEXTURE_2D:
+				case RC_TEXTURE_RECT:
+					srcmasks[0] |= RC_MASK_XY;
+					srcmasks[1] |= RC_MASK_XY;
+					srcmasks[2] |= RC_MASK_XY;
+					break;
+				case RC_TEXTURE_3D:
+				case RC_TEXTURE_CUBE:
+					srcmasks[0] |= RC_MASK_XYZ;
+					srcmasks[1] |= RC_MASK_XYZ;
+					srcmasks[2] |= RC_MASK_XYZ;
+					break;
+			}
+			break;
+		case RC_OPCODE_DST:
+			srcmasks[0] |= RC_MASK_Y | RC_MASK_Z;
+			srcmasks[1] |= RC_MASK_Y | RC_MASK_W;
+			break;
+		case RC_OPCODE_EXP:
+		case RC_OPCODE_LOG:
+			srcmasks[0] |= RC_MASK_XY;
+			break;
+		case RC_OPCODE_LIT:
+			srcmasks[0] |= RC_MASK_X | RC_MASK_Y | RC_MASK_W;
+			break;
+		default:
+			break;
+		}
+	}
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_opcodes.h b/src/gallium/drivers/r300/compiler/radeon_opcodes.h
new file mode 100644
index 0000000..b586882
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_opcodes.h
@@ -0,0 +1,263 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_OPCODES_H
+#define RADEON_OPCODES_H
+
+#include <assert.h>
+
+/**
+ * Opcodes understood by the Radeon compiler.
+ */
+typedef enum {
+	RC_OPCODE_NOP = 0,
+	RC_OPCODE_ILLEGAL_OPCODE,
+
+	/** vec4 instruction: dst.c = abs(src0.c); */
+	RC_OPCODE_ABS,
+
+	/** vec4 instruction: dst.c = src0.c + src1.c; */
+	RC_OPCODE_ADD,
+
+	/** special instruction: load address register
+	 * dst.x = floor(src.x), where dst must be an address register */
+	RC_OPCODE_ARL,
+
+	/** vec4 instruction: dst.c = ceil(src0.c) */
+	RC_OPCODE_CEIL,
+
+	/** vec4 instruction: dst.c = clamp(src0.c, src1.c, src2.c) */
+	RC_OPCODE_CLAMP,
+
+	/** vec4 instruction: dst.c = src0.c < 0.0 ? src1.c : src2.c */
+	RC_OPCODE_CMP,
+
+	/** vec4 instruction: dst.c = src2.c > 0.5 ? src0.c : src1.c */
+	RC_OPCODE_CND,
+
+	/** scalar instruction: dst = cos(src0.x) */
+	RC_OPCODE_COS,
+
+	/** special instruction: take vec4 partial derivative in X direction
+	 * dst.c = d src0.c / dx */
+	RC_OPCODE_DDX,
+
+	/** special instruction: take vec4 partial derivative in Y direction
+	 * dst.c = d src0.c / dy */
+	RC_OPCODE_DDY,
+
+	/** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y */
+	RC_OPCODE_DP2,
+
+	/** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z */
+	RC_OPCODE_DP3,
+
+	/** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z + src0.w*src1.w */
+	RC_OPCODE_DP4,
+
+	/** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z + src1.w */
+	RC_OPCODE_DPH,
+
+	/** special instruction, see ARB_fragment_program */
+	RC_OPCODE_DST,
+
+	/** scalar instruction: dst = 2**src0.x */
+	RC_OPCODE_EX2,
+
+	/** special instruction, see ARB_vertex_program */
+	RC_OPCODE_EXP,
+
+	/** vec4 instruction: dst.c = floor(src0.c) */
+	RC_OPCODE_FLR,
+
+	/** vec4 instruction: dst.c = src0.c - floor(src0.c) */
+	RC_OPCODE_FRC,
+
+	/** special instruction: stop execution if any component of src0 is negative */
+	RC_OPCODE_KIL,
+
+	/** scalar instruction: dst = log_2(src0.x) */
+	RC_OPCODE_LG2,
+
+	/** special instruction, see ARB_vertex_program */
+	RC_OPCODE_LIT,
+
+	/** special instruction, see ARB_vertex_program */
+	RC_OPCODE_LOG,
+
+	/** vec4 instruction: dst.c = src0.c*src1.c + (1 - src0.c)*src2.c */
+	RC_OPCODE_LRP,
+
+	/** vec4 instruction: dst.c = src0.c*src1.c + src2.c */
+	RC_OPCODE_MAD,
+
+	/** vec4 instruction: dst.c = max(src0.c, src1.c) */
+	RC_OPCODE_MAX,
+
+	/** vec4 instruction: dst.c = min(src0.c, src1.c) */
+	RC_OPCODE_MIN,
+
+	/** vec4 instruction: dst.c = src0.c */
+	RC_OPCODE_MOV,
+
+	/** vec4 instruction: dst.c = src0.c*src1.c */
+	RC_OPCODE_MUL,
+
+	/** scalar instruction: dst = src0.x ** src1.x */
+	RC_OPCODE_POW,
+
+	/** scalar instruction: dst = 1 / src0.x */
+	RC_OPCODE_RCP,
+
+	/** scalar instruction: dst = 1 / sqrt(src0.x) */
+	RC_OPCODE_RSQ,
+
+	/** special instruction, see ARB_fragment_program */
+	RC_OPCODE_SCS,
+
+	/** vec4 instruction: dst.c = (src0.c == src1.c) ? 1.0 : 0.0 */
+	RC_OPCODE_SEQ,
+
+	/** vec4 instruction: dst.c = 0.0 */
+	RC_OPCODE_SFL,
+
+	/** vec4 instruction: dst.c = (src0.c >= src1.c) ? 1.0 : 0.0 */
+	RC_OPCODE_SGE,
+
+	/** vec4 instruction: dst.c = (src0.c > src1.c) ? 1.0 : 0.0 */
+	RC_OPCODE_SGT,
+
+	/** scalar instruction: dst = sin(src0.x) */
+	RC_OPCODE_SIN,
+
+	/** vec4 instruction: dst.c = (src0.c <= src1.c) ? 1.0 : 0.0 */
+	RC_OPCODE_SLE,
+
+	/** vec4 instruction: dst.c = (src0.c < src1.c) ? 1.0 : 0.0 */
+	RC_OPCODE_SLT,
+
+	/** vec4 instruction: dst.c = (src0.c != src1.c) ? 1.0 : 0.0 */
+	RC_OPCODE_SNE,
+
+	/** vec4 instruction: dst.c = (src0.c < 0 ?) -1 : ((src0.c > 0) : 1 : 0) */
+	RC_OPCODE_SSG,
+
+	/** vec4 instruction: dst.c = src0.c - src1.c */
+	RC_OPCODE_SUB,
+
+	/** vec4 instruction: dst.c = src0.c */
+	RC_OPCODE_SWZ,
+
+	/** special instruction, see ARB_fragment_program */
+	RC_OPCODE_XPD,
+
+	RC_OPCODE_TEX,
+	RC_OPCODE_TXB,
+	RC_OPCODE_TXD,
+	RC_OPCODE_TXL,
+	RC_OPCODE_TXP,
+
+	/** branch instruction:
+	 * If src0.x != 0.0, continue with the next instruction;
+	 * otherwise, jump to matching RC_OPCODE_ELSE or RC_OPCODE_ENDIF.
+	 */
+	RC_OPCODE_IF,
+
+	/** branch instruction: jump to matching RC_OPCODE_ENDIF */
+	RC_OPCODE_ELSE,
+
+	/** branch instruction: has no effect */
+	RC_OPCODE_ENDIF,
+	
+	RC_OPCODE_BGNLOOP,
+
+	RC_OPCODE_BRK,
+
+	RC_OPCODE_ENDLOOP,
+
+	RC_OPCODE_CONT,
+
+	/** special instruction, used in R300-R500 fragment program pair instructions
+	 * indicates that the result of the alpha operation shall be replicated
+	 * across all other channels */
+	RC_OPCODE_REPL_ALPHA,
+
+	/** special instruction, used in R300-R500 fragment programs
+	 * to indicate the start of a block of texture instructions that
+	 * can run simultaneously. */
+	RC_OPCODE_BEGIN_TEX,
+
+	/** Stop execution of the shader (GLSL discard) */
+	RC_OPCODE_KILP,
+
+	MAX_RC_OPCODE
+} rc_opcode;
+
+
+struct rc_opcode_info {
+	rc_opcode Opcode;
+	const char * Name;
+
+	/** true if the instruction reads from a texture.
+	 *
+	 * \note This is false for the KIL instruction, even though KIL is
+	 * a texture instruction from a hardware point of view. */
+	unsigned int HasTexture:1;
+
+	unsigned int NumSrcRegs:2;
+	unsigned int HasDstReg:1;
+
+	/** true if this instruction affects control flow */
+	unsigned int IsFlowControl:1;
+
+	/** true if this is a vector instruction that operates on components in parallel
+	 * without any cross-component interaction */
+	unsigned int IsComponentwise:1;
+
+	/** true if this instruction sources only its operands X components
+	 * to compute one result which is smeared across all output channels */
+	unsigned int IsStandardScalar:1;
+};
+
+extern struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE];
+
+static inline const struct rc_opcode_info * rc_get_opcode_info(rc_opcode opcode)
+{
+	assert((unsigned int)opcode < MAX_RC_OPCODE);
+	assert(rc_opcodes[opcode].Opcode == opcode);
+
+	return &rc_opcodes[opcode];
+}
+
+struct rc_instruction;
+
+void rc_compute_sources_for_writemask(
+		const struct rc_instruction *inst,
+		unsigned int writemask,
+		unsigned int *srcmasks);
+
+#endif /* RADEON_OPCODES_H */
diff --git a/src/gallium/drivers/r300/compiler/radeon_optimize.c b/src/gallium/drivers/r300/compiler/radeon_optimize.c
new file mode 100644
index 0000000..39dcb21
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_optimize.c
@@ -0,0 +1,700 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ * Copyright 2010 Tom Stellard <tstellar@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_dataflow.h"
+
+#include "radeon_compiler.h"
+#include "radeon_compiler_util.h"
+#include "radeon_swizzle.h"
+
+struct src_clobbered_reads_cb_data {
+	rc_register_file File;
+	unsigned int Index;
+	unsigned int Mask;
+	struct rc_reader_data * ReaderData;
+};
+
+typedef void (*rc_presub_replace_fn)(struct rc_instruction *,
+						struct rc_instruction *,
+						unsigned int);
+
+static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner)
+{
+	struct rc_src_register combine;
+	combine.File = inner.File;
+	combine.Index = inner.Index;
+	combine.RelAddr = inner.RelAddr;
+	if (outer.Abs) {
+		combine.Abs = 1;
+		combine.Negate = outer.Negate;
+	} else {
+		combine.Abs = inner.Abs;
+		combine.Negate = swizzle_mask(outer.Swizzle, inner.Negate);
+		combine.Negate ^= outer.Negate;
+	}
+	combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle);
+	return combine;
+}
+
+static void copy_propagate_scan_read(void * data, struct rc_instruction * inst,
+						struct rc_src_register * src)
+{
+	rc_register_file file = src->File;
+	struct rc_reader_data * reader_data = data;
+
+	if(!rc_inst_can_use_presub(inst,
+				reader_data->Writer->U.I.PreSub.Opcode,
+				rc_swizzle_to_writemask(src->Swizzle),
+				src,
+				&reader_data->Writer->U.I.PreSub.SrcReg[0],
+				&reader_data->Writer->U.I.PreSub.SrcReg[1])) {
+		reader_data->Abort = 1;
+		return;
+	}
+
+	/* XXX This could probably be handled better. */
+	if (file == RC_FILE_ADDRESS) {
+		reader_data->Abort = 1;
+		return;
+	}
+
+	/* These instructions cannot read from the constants file.
+	 * see radeonTransformTEX()
+	 */
+	if(reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_TEMPORARY &&
+			reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_INPUT &&
+				(inst->U.I.Opcode == RC_OPCODE_TEX ||
+				inst->U.I.Opcode == RC_OPCODE_TXB ||
+				inst->U.I.Opcode == RC_OPCODE_TXP ||
+				inst->U.I.Opcode == RC_OPCODE_TXD ||
+				inst->U.I.Opcode == RC_OPCODE_TXL ||
+				inst->U.I.Opcode == RC_OPCODE_KIL)){
+		reader_data->Abort = 1;
+		return;
+	}
+}
+
+static void src_clobbered_reads_cb(
+	void * data,
+	struct rc_instruction * inst,
+	struct rc_src_register * src)
+{
+	struct src_clobbered_reads_cb_data * sc_data = data;
+
+	if (src->File == sc_data->File
+	    && src->Index == sc_data->Index
+	    && (rc_swizzle_to_writemask(src->Swizzle) & sc_data->Mask)) {
+
+		sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW;
+	}
+
+	if (src->RelAddr && sc_data->File == RC_FILE_ADDRESS) {
+		sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW;
+	}
+}
+
+static void is_src_clobbered_scan_write(
+	void * data,
+	struct rc_instruction * inst,
+	rc_register_file file,
+	unsigned int index,
+	unsigned int mask)
+{
+	struct src_clobbered_reads_cb_data sc_data;
+	struct rc_reader_data * reader_data = data;
+	sc_data.File = file;
+	sc_data.Index = index;
+	sc_data.Mask = mask;
+	sc_data.ReaderData = reader_data;
+	rc_for_all_reads_src(reader_data->Writer,
+					src_clobbered_reads_cb, &sc_data);
+}
+
+static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov)
+{
+	struct rc_reader_data reader_data;
+	unsigned int i;
+
+	if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY ||
+	    inst_mov->U.I.WriteALUResult ||
+	    inst_mov->U.I.SaturateMode)
+		return;
+
+	/* Get a list of all the readers of this MOV instruction. */
+	reader_data.ExitOnAbort = 1;
+	rc_get_readers(c, inst_mov, &reader_data,
+		       copy_propagate_scan_read, NULL,
+		       is_src_clobbered_scan_write);
+
+	if (reader_data.Abort || reader_data.ReaderCount == 0)
+		return;
+
+	/* Propagate the MOV instruction. */
+	for (i = 0; i < reader_data.ReaderCount; i++) {
+		struct rc_instruction * inst = reader_data.Readers[i].Inst;
+		*reader_data.Readers[i].U.I.Src = chain_srcregs(*reader_data.Readers[i].U.I.Src, inst_mov->U.I.SrcReg[0]);
+
+		if (inst_mov->U.I.SrcReg[0].File == RC_FILE_PRESUB)
+			inst->U.I.PreSub = inst_mov->U.I.PreSub;
+	}
+
+	/* Finally, remove the original MOV instruction */
+	rc_remove_instruction(inst_mov);
+}
+
+/**
+ * Check if a source register is actually always the same
+ * swizzle constant.
+ */
+static int is_src_uniform_constant(struct rc_src_register src,
+		rc_swizzle * pswz, unsigned int * pnegate)
+{
+	int have_used = 0;
+
+	if (src.File != RC_FILE_NONE) {
+		*pswz = 0;
+		return 0;
+	}
+
+	for(unsigned int chan = 0; chan < 4; ++chan) {
+		unsigned int swz = GET_SWZ(src.Swizzle, chan);
+		if (swz < 4) {
+			*pswz = 0;
+			return 0;
+		}
+		if (swz == RC_SWIZZLE_UNUSED)
+			continue;
+
+		if (!have_used) {
+			*pswz = swz;
+			*pnegate = GET_BIT(src.Negate, chan);
+			have_used = 1;
+		} else {
+			if (swz != *pswz || *pnegate != GET_BIT(src.Negate, chan)) {
+				*pswz = 0;
+				return 0;
+			}
+		}
+	}
+
+	return 1;
+}
+
+static void constant_folding_mad(struct rc_instruction * inst)
+{
+	rc_swizzle swz = 0;
+	unsigned int negate= 0;
+
+	if (is_src_uniform_constant(inst->U.I.SrcReg[2], &swz, &negate)) {
+		if (swz == RC_SWIZZLE_ZERO) {
+			inst->U.I.Opcode = RC_OPCODE_MUL;
+			return;
+		}
+	}
+
+	if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
+		if (swz == RC_SWIZZLE_ONE) {
+			inst->U.I.Opcode = RC_OPCODE_ADD;
+			if (negate)
+				inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
+			inst->U.I.SrcReg[1] = inst->U.I.SrcReg[2];
+			return;
+		} else if (swz == RC_SWIZZLE_ZERO) {
+			inst->U.I.Opcode = RC_OPCODE_MOV;
+			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
+			return;
+		}
+	}
+
+	if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
+		if (swz == RC_SWIZZLE_ONE) {
+			inst->U.I.Opcode = RC_OPCODE_ADD;
+			if (negate)
+				inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
+			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
+			return;
+		} else if (swz == RC_SWIZZLE_ZERO) {
+			inst->U.I.Opcode = RC_OPCODE_MOV;
+			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
+			return;
+		}
+	}
+}
+
+static void constant_folding_mul(struct rc_instruction * inst)
+{
+	rc_swizzle swz = 0;
+	unsigned int negate = 0;
+
+	if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
+		if (swz == RC_SWIZZLE_ONE) {
+			inst->U.I.Opcode = RC_OPCODE_MOV;
+			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
+			if (negate)
+				inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
+			return;
+		} else if (swz == RC_SWIZZLE_ZERO) {
+			inst->U.I.Opcode = RC_OPCODE_MOV;
+			inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
+			return;
+		}
+	}
+
+	if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
+		if (swz == RC_SWIZZLE_ONE) {
+			inst->U.I.Opcode = RC_OPCODE_MOV;
+			if (negate)
+				inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
+			return;
+		} else if (swz == RC_SWIZZLE_ZERO) {
+			inst->U.I.Opcode = RC_OPCODE_MOV;
+			inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
+			return;
+		}
+	}
+}
+
+static void constant_folding_add(struct rc_instruction * inst)
+{
+	rc_swizzle swz = 0;
+	unsigned int negate = 0;
+
+	if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
+		if (swz == RC_SWIZZLE_ZERO) {
+			inst->U.I.Opcode = RC_OPCODE_MOV;
+			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
+			return;
+		}
+	}
+
+	if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
+		if (swz == RC_SWIZZLE_ZERO) {
+			inst->U.I.Opcode = RC_OPCODE_MOV;
+			return;
+		}
+	}
+}
+
+/**
+ * Replace 0.0, 1.0 and 0.5 immediate constants by their
+ * respective swizzles. Simplify instructions like ADD dst, src, 0;
+ */
+static void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst)
+{
+	const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+	unsigned int i;
+
+	/* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */
+	for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
+		struct rc_constant * constant;
+		struct rc_src_register newsrc;
+		int have_real_reference;
+		unsigned int chan;
+
+		/* If there are only 0, 0.5, 1, or _ swizzles, mark the source as a constant. */
+		for (chan = 0; chan < 4; ++chan)
+			if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) <= 3)
+				break;
+		if (chan == 4) {
+			inst->U.I.SrcReg[src].File = RC_FILE_NONE;
+			continue;
+		}
+
+		/* Convert immediates to swizzles. */
+		if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT ||
+		    inst->U.I.SrcReg[src].RelAddr ||
+		    inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count)
+			continue;
+
+		constant =
+			&c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index];
+
+		if (constant->Type != RC_CONSTANT_IMMEDIATE)
+			continue;
+
+		newsrc = inst->U.I.SrcReg[src];
+		have_real_reference = 0;
+		for (chan = 0; chan < 4; ++chan) {
+			unsigned int swz = GET_SWZ(newsrc.Swizzle, chan);
+			unsigned int newswz;
+			float imm;
+			float baseimm;
+
+			if (swz >= 4)
+				continue;
+
+			imm = constant->u.Immediate[swz];
+			baseimm = imm;
+			if (imm < 0.0)
+				baseimm = -baseimm;
+
+			if (baseimm == 0.0) {
+				newswz = RC_SWIZZLE_ZERO;
+			} else if (baseimm == 1.0) {
+				newswz = RC_SWIZZLE_ONE;
+			} else if (baseimm == 0.5 && c->has_half_swizzles) {
+				newswz = RC_SWIZZLE_HALF;
+			} else {
+				have_real_reference = 1;
+				continue;
+			}
+
+			SET_SWZ(newsrc.Swizzle, chan, newswz);
+			if (imm < 0.0 && !newsrc.Abs)
+				newsrc.Negate ^= 1 << chan;
+		}
+
+		if (!have_real_reference) {
+			newsrc.File = RC_FILE_NONE;
+			newsrc.Index = 0;
+		}
+
+		/* don't make the swizzle worse */
+		if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, newsrc) &&
+		    c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src]))
+			continue;
+
+		inst->U.I.SrcReg[src] = newsrc;
+	}
+
+	/* Simplify instructions based on constants */
+	if (inst->U.I.Opcode == RC_OPCODE_MAD)
+		constant_folding_mad(inst);
+
+	/* note: MAD can simplify to MUL or ADD */
+	if (inst->U.I.Opcode == RC_OPCODE_MUL)
+		constant_folding_mul(inst);
+	else if (inst->U.I.Opcode == RC_OPCODE_ADD)
+		constant_folding_add(inst);
+
+	/* In case this instruction has been converted, make sure all of the
+	 * registers that are no longer used are empty. */
+	opcode = rc_get_opcode_info(inst->U.I.Opcode);
+	for(i = opcode->NumSrcRegs; i < 3; i++) {
+		memset(&inst->U.I.SrcReg[i], 0, sizeof(struct rc_src_register));
+	}
+}
+
+/**
+ * If src and dst use the same register, this function returns a writemask that
+ * indicates wich components are read by src.  Otherwise zero is returned.
+ */
+static unsigned int src_reads_dst_mask(struct rc_src_register src,
+						struct rc_dst_register dst)
+{
+	if (dst.File != src.File || dst.Index != src.Index) {
+		return 0;
+	}
+	return rc_swizzle_to_writemask(src.Swizzle);
+}
+
+/* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0)
+ * in any of its channels.  Return 0 otherwise. */
+static int src_has_const_swz(struct rc_src_register src) {
+	int chan;
+	for(chan = 0; chan < 4; chan++) {
+		unsigned int swz = GET_SWZ(src.Swizzle, chan);
+		if (swz == RC_SWIZZLE_ZERO || swz == RC_SWIZZLE_HALF
+						|| swz == RC_SWIZZLE_ONE) {
+			return 1;
+		}
+	}
+	return 0;
+}
+
+static void presub_scan_read(
+	void * data,
+	struct rc_instruction * inst,
+	struct rc_src_register * src)
+{
+	struct rc_reader_data * reader_data = data;
+	rc_presubtract_op * presub_opcode = reader_data->CbData;
+
+	if (!rc_inst_can_use_presub(inst, *presub_opcode,
+			reader_data->Writer->U.I.DstReg.WriteMask,
+			src,
+			&reader_data->Writer->U.I.SrcReg[0],
+			&reader_data->Writer->U.I.SrcReg[1])) {
+		reader_data->Abort = 1;
+		return;
+	}
+}
+
+static int presub_helper(
+	struct radeon_compiler * c,
+	struct rc_instruction * inst_add,
+	rc_presubtract_op presub_opcode,
+	rc_presub_replace_fn presub_replace)
+{
+	struct rc_reader_data reader_data;
+	unsigned int i;
+	rc_presubtract_op cb_op = presub_opcode;
+
+	reader_data.CbData = &cb_op;
+	reader_data.ExitOnAbort = 1;
+	rc_get_readers(c, inst_add, &reader_data, presub_scan_read, NULL,
+						is_src_clobbered_scan_write);
+
+	if (reader_data.Abort || reader_data.ReaderCount == 0)
+		return 0;
+
+	for(i = 0; i < reader_data.ReaderCount; i++) {
+		unsigned int src_index;
+		struct rc_reader reader = reader_data.Readers[i];
+		const struct rc_opcode_info * info =
+				rc_get_opcode_info(reader.Inst->U.I.Opcode);
+
+		for (src_index = 0; src_index < info->NumSrcRegs; src_index++) {
+			if (&reader.Inst->U.I.SrcReg[src_index] == reader.U.I.Src)
+				presub_replace(inst_add, reader.Inst, src_index);
+		}
+	}
+	return 1;
+}
+
+/* This function assumes that inst_add->U.I.SrcReg[0] and
+ * inst_add->U.I.SrcReg[1] aren't both negative. */
+static void presub_replace_add(
+	struct rc_instruction * inst_add,
+	struct rc_instruction * inst_reader,
+	unsigned int src_index)
+{
+	rc_presubtract_op presub_opcode;
+	if (inst_add->U.I.SrcReg[1].Negate || inst_add->U.I.SrcReg[0].Negate)
+		presub_opcode = RC_PRESUB_SUB;
+	else
+		presub_opcode = RC_PRESUB_ADD;
+
+	if (inst_add->U.I.SrcReg[1].Negate) {
+		inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
+		inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[0];
+	} else {
+		inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[0];
+		inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[1];
+	}
+	inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;
+	inst_reader->U.I.PreSub.SrcReg[1].Negate = 0;
+	inst_reader->U.I.PreSub.Opcode = presub_opcode;
+	inst_reader->U.I.SrcReg[src_index] =
+			chain_srcregs(inst_reader->U.I.SrcReg[src_index],
+					inst_reader->U.I.PreSub.SrcReg[0]);
+	inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
+	inst_reader->U.I.SrcReg[src_index].Index = presub_opcode;
+}
+
+static int is_presub_candidate(
+	struct radeon_compiler * c,
+	struct rc_instruction * inst)
+{
+	const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode);
+	unsigned int i;
+	unsigned int is_constant[2] = {0, 0};
+
+	assert(inst->U.I.Opcode == RC_OPCODE_ADD);
+
+	if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE
+			|| inst->U.I.SaturateMode
+			|| inst->U.I.WriteALUResult) {
+		return 0;
+	}
+
+	/* If both sources use a constant swizzle, then we can't convert it to
+	 * a presubtract operation.  In fact for the ADD and SUB presubtract
+	 * operations neither source can contain a constant swizzle.  This
+	 * specific case is checked in peephole_add_presub_add() when
+	 * we make sure the swizzles for both sources are equal, so we
+	 * don't need to worry about it here. */
+	for (i = 0; i < 2; i++) {
+		int chan;
+		for (chan = 0; chan < 4; chan++) {
+			rc_swizzle swz =
+				get_swz(inst->U.I.SrcReg[i].Swizzle, chan);
+			if (swz == RC_SWIZZLE_ONE
+					|| swz == RC_SWIZZLE_ZERO
+					|| swz == RC_SWIZZLE_HALF) {
+				is_constant[i] = 1;
+			}
+		}
+	}
+	if (is_constant[0] && is_constant[1])
+		return 0;
+
+	for(i = 0; i < info->NumSrcRegs; i++) {
+		struct rc_src_register src = inst->U.I.SrcReg[i];
+		if (src_reads_dst_mask(src, inst->U.I.DstReg))
+			return 0;
+
+		src.File = RC_FILE_PRESUB;
+		if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, src))
+			return 0;
+	}
+	return 1;
+}
+
+static int peephole_add_presub_add(
+	struct radeon_compiler * c,
+	struct rc_instruction * inst_add)
+{
+	unsigned dstmask = inst_add->U.I.DstReg.WriteMask;
+        unsigned src0_neg = inst_add->U.I.SrcReg[0].Negate & dstmask;
+        unsigned src1_neg = inst_add->U.I.SrcReg[1].Negate & dstmask;
+
+	if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle)
+		return 0;
+
+	/* src0 and src1 can't have absolute values */
+	if (inst_add->U.I.SrcReg[0].Abs || inst_add->U.I.SrcReg[1].Abs)
+	        return 0;
+
+	/* presub_replace_add() assumes only one is negative */
+	if (inst_add->U.I.SrcReg[0].Negate && inst_add->U.I.SrcReg[1].Negate)
+	        return 0;
+
+        /* if src0 is negative, at least all bits of dstmask have to be set */
+        if (inst_add->U.I.SrcReg[0].Negate && src0_neg != dstmask)
+	        return 0;
+
+        /* if src1 is negative, at least all bits of dstmask have to be set */
+        if (inst_add->U.I.SrcReg[1].Negate && src1_neg != dstmask)
+	        return 0;
+
+	if (!is_presub_candidate(c, inst_add))
+		return 0;
+
+	if (presub_helper(c, inst_add, RC_PRESUB_ADD, presub_replace_add)) {
+		rc_remove_instruction(inst_add);
+		return 1;
+	}
+	return 0;
+}
+
+static void presub_replace_inv(
+	struct rc_instruction * inst_add,
+	struct rc_instruction * inst_reader,
+	unsigned int src_index)
+{
+	/* We must be careful not to modify inst_add, since it
+	 * is possible it will remain part of the program.*/
+	inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
+	inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;
+	inst_reader->U.I.PreSub.Opcode = RC_PRESUB_INV;
+	inst_reader->U.I.SrcReg[src_index] = chain_srcregs(inst_reader->U.I.SrcReg[src_index],
+						inst_reader->U.I.PreSub.SrcReg[0]);
+
+	inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
+	inst_reader->U.I.SrcReg[src_index].Index = RC_PRESUB_INV;
+}
+
+/**
+ * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1]
+ * Use the presubtract 1 - src0 for all readers of TEMP[0].  The first source
+ * of the add instruction must have the constatnt 1 swizzle.  This function
+ * does not check const registers to see if their value is 1.0, so it should
+ * be called after the constant_folding optimization.
+ * @return
+ * 	0 if the ADD instruction is still part of the program.
+ * 	1 if the ADD instruction is no longer part of the program.
+ */
+static int peephole_add_presub_inv(
+	struct radeon_compiler * c,
+	struct rc_instruction * inst_add)
+{
+	unsigned int i, swz;
+
+	if (!is_presub_candidate(c, inst_add))
+		return 0;
+
+	/* Check if src0 is 1. */
+	/* XXX It would be nice to use is_src_uniform_constant here, but that
+	 * function only works if the register's file is RC_FILE_NONE */
+	for(i = 0; i < 4; i++ ) {
+		swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i);
+		if(((1 << i) & inst_add->U.I.DstReg.WriteMask)
+						&& swz != RC_SWIZZLE_ONE) {
+			return 0;
+		}
+	}
+
+	/* Check src1. */
+	if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) !=
+						inst_add->U.I.DstReg.WriteMask
+		|| inst_add->U.I.SrcReg[1].Abs
+		|| (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY
+			&& inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT)
+		|| src_has_const_swz(inst_add->U.I.SrcReg[1])) {
+
+		return 0;
+	}
+
+	if (presub_helper(c, inst_add, RC_PRESUB_INV, presub_replace_inv)) {
+		rc_remove_instruction(inst_add);
+		return 1;
+	}
+	return 0;
+}
+
+/**
+ * @return
+ * 	0 if inst is still part of the program.
+ * 	1 if inst is no longer part of the program.
+ */
+static int peephole(struct radeon_compiler * c, struct rc_instruction * inst)
+{
+	switch(inst->U.I.Opcode){
+	case RC_OPCODE_ADD:
+		if (c->has_presub) {
+			if(peephole_add_presub_inv(c, inst))
+				return 1;
+			if(peephole_add_presub_add(c, inst))
+				return 1;
+		}
+		break;
+	default:
+		break;
+	}
+	return 0;
+}
+
+void rc_optimize(struct radeon_compiler * c, void *user)
+{
+	struct rc_instruction * inst = c->Program.Instructions.Next;
+	while(inst != &c->Program.Instructions) {
+		struct rc_instruction * cur = inst;
+		inst = inst->Next;
+
+		constant_folding(c, cur);
+
+		if(peephole(c, cur))
+			continue;
+
+		if (cur->U.I.Opcode == RC_OPCODE_MOV) {
+			copy_propagate(c, cur);
+			/* cur may no longer be part of the program */
+		}
+	}
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_pair_dead_sources.c b/src/gallium/drivers/r300/compiler/radeon_pair_dead_sources.c
new file mode 100644
index 0000000..1e9a2c0
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_pair_dead_sources.c
@@ -0,0 +1,62 @@
+
+#include "radeon_compiler.h"
+#include "radeon_compiler_util.h"
+#include "radeon_opcodes.h"
+#include "radeon_program_pair.h"
+
+static void mark_used_presub(struct rc_pair_sub_instruction * sub)
+{
+	if (sub->Src[RC_PAIR_PRESUB_SRC].Used) {
+		unsigned int presub_reg_count = rc_presubtract_src_reg_count(
+					sub->Src[RC_PAIR_PRESUB_SRC].Index);
+		unsigned int i;
+		for (i = 0; i < presub_reg_count; i++) {
+			sub->Src[i].Used = 1;
+		}
+	}
+}
+
+static void mark_used(
+	struct rc_instruction * inst,
+	struct rc_pair_sub_instruction * sub)
+{
+	unsigned int i;
+	const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode);
+	for (i = 0; i < info->NumSrcRegs; i++) {
+		unsigned int src_type = rc_source_type_swz(sub->Arg[i].Swizzle);
+		if (src_type & RC_SOURCE_RGB) {
+			inst->U.P.RGB.Src[sub->Arg[i].Source].Used = 1;
+		}
+
+		if (src_type & RC_SOURCE_ALPHA) {
+			inst->U.P.Alpha.Src[sub->Arg[i].Source].Used = 1;
+		}
+	}
+}
+
+/**
+ * This pass finds sources that are not used by their instruction and marks
+ * them as unused. 
+ */
+void rc_pair_remove_dead_sources(struct radeon_compiler * c, void *user)
+{
+	struct rc_instruction * inst;
+	for (inst = c->Program.Instructions.Next;
+					inst != &c->Program.Instructions;
+					inst = inst->Next) {
+		unsigned int i;
+		if (inst->Type == RC_INSTRUCTION_NORMAL)
+			continue;
+
+		/* Mark all sources as unused */
+		for (i = 0; i < 4; i++) {
+			inst->U.P.RGB.Src[i].Used = 0;
+			inst->U.P.Alpha.Src[i].Used = 0;
+		}
+		mark_used(inst, &inst->U.P.RGB);
+		mark_used(inst, &inst->U.P.Alpha);
+
+		mark_used_presub(&inst->U.P.RGB);
+		mark_used_presub(&inst->U.P.Alpha);
+	}
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c b/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c
new file mode 100644
index 0000000..49983d6
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c
@@ -0,0 +1,706 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ * Copyright 2011 Tom Stellard <tstellar@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_program_pair.h"
+
+#include <stdio.h>
+
+#include "main/glheader.h"
+#include "program/register_allocate.h"
+#include "ralloc.h"
+
+#include "r300_fragprog_swizzle.h"
+#include "radeon_compiler.h"
+#include "radeon_compiler_util.h"
+#include "radeon_dataflow.h"
+#include "radeon_list.h"
+#include "radeon_variable.h"
+
+#define VERBOSE 0
+
+#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
+
+
+
+struct register_info {
+	struct live_intervals Live[4];
+
+	unsigned int Used:1;
+	unsigned int Allocated:1;
+	unsigned int File:3;
+	unsigned int Index:RC_REGISTER_INDEX_BITS;
+	unsigned int Writemask;
+};
+
+struct regalloc_state {
+	struct radeon_compiler * C;
+
+	struct register_info * Input;
+	unsigned int NumInputs;
+
+	struct register_info * Temporary;
+	unsigned int NumTemporaries;
+
+	unsigned int Simple;
+	int LoopEnd;
+};
+
+enum rc_reg_class {
+	RC_REG_CLASS_SINGLE,
+	RC_REG_CLASS_DOUBLE,
+	RC_REG_CLASS_TRIPLE,
+	RC_REG_CLASS_ALPHA,
+	RC_REG_CLASS_SINGLE_PLUS_ALPHA,
+	RC_REG_CLASS_DOUBLE_PLUS_ALPHA,
+	RC_REG_CLASS_TRIPLE_PLUS_ALPHA,
+	RC_REG_CLASS_X,
+	RC_REG_CLASS_Y,
+	RC_REG_CLASS_Z,
+	RC_REG_CLASS_XY,
+	RC_REG_CLASS_YZ,
+	RC_REG_CLASS_XZ,
+	RC_REG_CLASS_XW,
+	RC_REG_CLASS_YW,
+	RC_REG_CLASS_ZW,
+	RC_REG_CLASS_XYW,
+	RC_REG_CLASS_YZW,
+	RC_REG_CLASS_XZW,
+	RC_REG_CLASS_COUNT
+};
+
+struct rc_class {
+	enum rc_reg_class Class;
+
+	unsigned int WritemaskCount;
+
+	/** This is 1 if this class is being used by the register allocator
+	 * and 0 otherwise */
+	unsigned int Used;
+
+	/** This is the ID number assigned to this class by ra. */
+	unsigned int Id;
+
+	/** List of writemasks that belong to this class */
+	unsigned int Writemasks[3];
+
+
+};
+
+static void print_live_intervals(struct live_intervals * src)
+{
+	if (!src || !src->Used) {
+		DBG("(null)");
+		return;
+	}
+
+	DBG("(%i,%i)", src->Start, src->End);
+}
+
+static int overlap_live_intervals(struct live_intervals * a, struct live_intervals * b)
+{
+	if (VERBOSE) {
+		DBG("overlap_live_intervals: ");
+		print_live_intervals(a);
+		DBG(" to ");
+		print_live_intervals(b);
+		DBG("\n");
+	}
+
+	if (!a->Used || !b->Used) {
+		DBG("    unused interval\n");
+		return 0;
+	}
+
+	if (a->Start > b->Start) {
+		if (a->Start < b->End) {
+			DBG("    overlap\n");
+			return 1;
+		}
+	} else if (b->Start > a->Start) {
+		if (b->Start < a->End) {
+			DBG("    overlap\n");
+			return 1;
+		}
+	} else { /* a->Start == b->Start */
+		if (a->Start != a->End && b->Start != b->End) {
+			DBG("    overlap\n");
+			return 1;
+		}
+	}
+
+	DBG("    no overlap\n");
+
+	return 0;
+}
+
+static void scan_read_callback(void * data, struct rc_instruction * inst,
+		rc_register_file file, unsigned int index, unsigned int mask)
+{
+	struct regalloc_state * s = data;
+	struct register_info * reg;
+	unsigned int i;
+
+	if (file != RC_FILE_INPUT)
+		return;
+
+	s->Input[index].Used = 1;
+	reg = &s->Input[index];
+
+	for (i = 0; i < 4; i++) {
+		if (!((mask >> i) & 0x1)) {
+			continue;
+		}
+		reg->Live[i].Used = 1;
+		reg->Live[i].Start = 0;
+		reg->Live[i].End =
+			s->LoopEnd > inst->IP ? s->LoopEnd : inst->IP;
+	}
+}
+
+static void remap_register(void * data, struct rc_instruction * inst,
+		rc_register_file * file, unsigned int * index)
+{
+	struct regalloc_state * s = data;
+	const struct register_info * reg;
+
+	if (*file == RC_FILE_TEMPORARY && s->Simple)
+		reg = &s->Temporary[*index];
+	else if (*file == RC_FILE_INPUT)
+		reg = &s->Input[*index];
+	else
+		return;
+
+	if (reg->Allocated) {
+		*index = reg->Index;
+	}
+}
+
+static void alloc_input_simple(void * data, unsigned int input,
+							unsigned int hwreg)
+{
+	struct regalloc_state * s = data;
+
+	if (input >= s->NumInputs)
+		return;
+
+	s->Input[input].Allocated = 1;
+	s->Input[input].File = RC_FILE_TEMPORARY;
+	s->Input[input].Index = hwreg;
+}
+
+/* This functions offsets the temporary register indices by the number
+ * of input registers, because input registers are actually temporaries and
+ * should not occupy the same space.
+ *
+ * This pass is supposed to be used to maintain correct allocation of inputs
+ * if the standard register allocation is disabled. */
+static void do_regalloc_inputs_only(struct regalloc_state * s)
+{
+	for (unsigned i = 0; i < s->NumTemporaries; i++) {
+		s->Temporary[i].Allocated = 1;
+		s->Temporary[i].File = RC_FILE_TEMPORARY;
+		s->Temporary[i].Index = i + s->NumInputs;
+	}
+}
+
+static unsigned int is_derivative(rc_opcode op)
+{
+	return (op == RC_OPCODE_DDX || op == RC_OPCODE_DDY);
+}
+
+static int find_class(
+	struct rc_class * classes,
+	unsigned int writemask,
+	unsigned int max_writemask_count)
+{
+	unsigned int i;
+	for (i = 0; i < RC_REG_CLASS_COUNT; i++) {
+		unsigned int j;
+		if (classes[i].WritemaskCount > max_writemask_count) {
+			continue;
+		}
+		for (j = 0; j < 3; j++) {
+			if (classes[i].Writemasks[j] == writemask) {
+				return i;
+			}
+		}
+	}
+	return -1;
+}
+
+static enum rc_reg_class variable_get_class(
+	struct rc_variable * variable,
+	struct rc_class * classes)
+{
+	unsigned int i;
+	unsigned int can_change_writemask= 1;
+	unsigned int writemask = rc_variable_writemask_sum(variable);
+	struct rc_list * readers = rc_variable_readers_union(variable);
+	int class_index;
+
+	if (!variable->C->is_r500) {
+		struct rc_class c;
+		/* The assumption here is that if an instruction has type
+		 * RC_INSTRUCTION_NORMAL then it is a TEX instruction.
+		 * r300 and r400 can't swizzle the result of a TEX lookup. */
+		if (variable->Inst->Type == RC_INSTRUCTION_NORMAL) {
+			writemask = RC_MASK_XYZW;
+		}
+
+		/* Check if it is possible to do swizzle packing for r300/r400
+		 * without creating non-native swizzles. */
+		class_index = find_class(classes, writemask, 3);
+		if (class_index < 0) {
+			goto error;
+		}
+		c = classes[class_index];
+		for (i = 0; i < c.WritemaskCount; i++) {
+			int j;
+			unsigned int conversion_swizzle =
+						rc_make_conversion_swizzle(
+						writemask, c.Writemasks[i]);
+			for (j = 0; j < variable->ReaderCount; j++) {
+				unsigned int old_swizzle;
+				unsigned int new_swizzle;
+				struct rc_reader r = variable->Readers[j];
+				if (r.Inst->Type == RC_INSTRUCTION_PAIR ) {
+					old_swizzle = r.U.P.Arg->Swizzle;
+				} else {
+					old_swizzle = r.U.I.Src->Swizzle;
+				}
+				new_swizzle = rc_adjust_channels(
+					old_swizzle, conversion_swizzle);
+				if (!r300_swizzle_is_native_basic(new_swizzle)) {
+					can_change_writemask = 0;
+					break;
+				}
+			}
+			if (!can_change_writemask) {
+				break;
+			}
+		}
+	}
+
+	if (variable->Inst->Type == RC_INSTRUCTION_PAIR) {
+		/* DDX/DDY seem to always fail when their writemasks are
+		 * changed.*/
+		if (is_derivative(variable->Inst->U.P.RGB.Opcode)
+		    || is_derivative(variable->Inst->U.P.Alpha.Opcode)) {
+			can_change_writemask = 0;
+		}
+	}
+	for ( ; readers; readers = readers->Next) {
+		struct rc_reader * r = readers->Item;
+		if (r->Inst->Type == RC_INSTRUCTION_PAIR) {
+			if (r->U.P.Arg->Source == RC_PAIR_PRESUB_SRC) {
+				can_change_writemask = 0;
+				break;
+			}
+			/* DDX/DDY also fail when their swizzles are changed. */
+			if (is_derivative(r->Inst->U.P.RGB.Opcode)
+			    || is_derivative(r->Inst->U.P.Alpha.Opcode)) {
+				can_change_writemask = 0;
+				break;
+			}
+		}
+	}
+
+	class_index = find_class(classes, writemask,
+						can_change_writemask ? 3 : 1);
+	if (class_index > -1) {
+		return classes[class_index].Class;
+	} else {
+error:
+		rc_error(variable->C,
+				"Could not find class for index=%u mask=%u\n",
+				variable->Dst.Index, writemask);
+		return 0;
+	}
+}
+
+static unsigned int overlap_live_intervals_array(
+	struct live_intervals * a,
+	struct live_intervals * b)
+{
+	unsigned int a_chan, b_chan;
+	for (a_chan = 0; a_chan < 4; a_chan++) {
+		for (b_chan = 0; b_chan < 4; b_chan++) {
+			if (overlap_live_intervals(&a[a_chan], &b[b_chan])) {
+					return 1;
+			}
+		}
+	}
+	return 0;
+}
+
+static unsigned int reg_get_index(int reg)
+{
+	return reg / RC_MASK_XYZW;
+}
+
+static unsigned int reg_get_writemask(int reg)
+{
+	return (reg % RC_MASK_XYZW) + 1;
+}
+
+static int get_reg_id(unsigned int index, unsigned int writemask)
+{
+	assert(writemask);
+	if (writemask == 0) {
+		return 0;
+	}
+	return (index * RC_MASK_XYZW) + (writemask - 1);
+}
+
+#if VERBOSE
+static void print_reg(int reg)
+{
+	unsigned int index = reg_get_index(reg);
+	unsigned int mask = reg_get_writemask(reg);
+	fprintf(stderr, "Temp[%u].%c%c%c%c", index,
+		mask & RC_MASK_X ? 'x' : '_',
+		mask & RC_MASK_Y ? 'y' : '_',
+		mask & RC_MASK_Z ? 'z' : '_',
+		mask & RC_MASK_W ? 'w' : '_');
+}
+#endif
+
+static void add_register_conflicts(
+	struct ra_regs * regs,
+	unsigned int max_temp_regs)
+{
+	unsigned int index, a_mask, b_mask;
+	for (index = 0; index < max_temp_regs; index++) {
+		for(a_mask = 1; a_mask <= RC_MASK_XYZW; a_mask++) {
+			for (b_mask = a_mask + 1; b_mask <= RC_MASK_XYZW;
+								b_mask++) {
+				if (a_mask & b_mask) {
+					ra_add_reg_conflict(regs,
+						get_reg_id(index, a_mask),
+						get_reg_id(index, b_mask));
+				}
+			}
+		}
+	}
+}
+
+static void do_advanced_regalloc(struct regalloc_state * s)
+{
+	struct rc_class rc_class_list [] = {
+		{RC_REG_CLASS_SINGLE, 3, 0, 0,
+			{RC_MASK_X,
+			 RC_MASK_Y,
+			 RC_MASK_Z}},
+		{RC_REG_CLASS_DOUBLE, 3, 0, 0,
+			{RC_MASK_X | RC_MASK_Y,
+			 RC_MASK_X | RC_MASK_Z,
+			 RC_MASK_Y | RC_MASK_Z}},
+		{RC_REG_CLASS_TRIPLE, 1, 0, 0,
+			{RC_MASK_X | RC_MASK_Y | RC_MASK_Z,
+			 RC_MASK_NONE,
+			 RC_MASK_NONE}},
+		{RC_REG_CLASS_ALPHA, 1, 0, 0,
+			{RC_MASK_W,
+			 RC_MASK_NONE,
+			 RC_MASK_NONE}},
+		{RC_REG_CLASS_SINGLE_PLUS_ALPHA, 3, 0, 0,
+			{RC_MASK_X | RC_MASK_W,
+			 RC_MASK_Y | RC_MASK_W,
+			 RC_MASK_Z | RC_MASK_W}},
+		{RC_REG_CLASS_DOUBLE_PLUS_ALPHA, 3, 0, 0,
+			{RC_MASK_X | RC_MASK_Y | RC_MASK_W,
+			 RC_MASK_X | RC_MASK_Z | RC_MASK_W,
+			 RC_MASK_Y | RC_MASK_Z | RC_MASK_W}},
+		{RC_REG_CLASS_TRIPLE_PLUS_ALPHA, 1, 0, 0,
+			{RC_MASK_X | RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
+			RC_MASK_NONE,
+			RC_MASK_NONE}},
+		{RC_REG_CLASS_X, 1, 0, 0,
+			{RC_MASK_X,
+			RC_MASK_NONE,
+			RC_MASK_NONE}},
+		{RC_REG_CLASS_Y, 1, 0, 0,
+			{RC_MASK_Y,
+			RC_MASK_NONE,
+			RC_MASK_NONE}},
+		{RC_REG_CLASS_Z, 1, 0, 0,
+			{RC_MASK_Z,
+			RC_MASK_NONE,
+			RC_MASK_NONE}},
+		{RC_REG_CLASS_XY, 1, 0, 0,
+			{RC_MASK_X | RC_MASK_Y,
+			RC_MASK_NONE,
+			RC_MASK_NONE}},
+		{RC_REG_CLASS_YZ, 1, 0, 0,
+			{RC_MASK_Y | RC_MASK_Z,
+			RC_MASK_NONE,
+			RC_MASK_NONE}},
+		{RC_REG_CLASS_XZ, 1, 0, 0,
+			{RC_MASK_X | RC_MASK_Z,
+			RC_MASK_NONE,
+			RC_MASK_NONE}},
+		{RC_REG_CLASS_XW, 1, 0, 0,
+			{RC_MASK_X | RC_MASK_W,
+			RC_MASK_NONE,
+			RC_MASK_NONE}},
+		{RC_REG_CLASS_YW, 1, 0, 0,
+			{RC_MASK_Y | RC_MASK_W,
+			RC_MASK_NONE,
+			RC_MASK_NONE}},
+		{RC_REG_CLASS_ZW, 1, 0, 0,
+			{RC_MASK_Z | RC_MASK_W,
+			RC_MASK_NONE,
+			RC_MASK_NONE}},
+		{RC_REG_CLASS_XYW, 1, 0, 0,
+			{RC_MASK_X | RC_MASK_Y | RC_MASK_W,
+			RC_MASK_NONE,
+			RC_MASK_NONE}},
+		{RC_REG_CLASS_YZW, 1, 0, 0,
+			{RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
+			RC_MASK_NONE,
+			RC_MASK_NONE}},
+		{RC_REG_CLASS_XZW, 1, 0, 0,
+			{RC_MASK_X | RC_MASK_Z | RC_MASK_W,
+			RC_MASK_NONE,
+			RC_MASK_NONE}}
+	};
+
+	unsigned int i, j, index, input_node, node_count, node_index;
+	unsigned int * node_classes;
+	unsigned int * input_classes;
+	struct rc_instruction * inst;
+	struct rc_list * var_ptr;
+	struct rc_list * variables;
+	struct ra_regs * regs;
+	struct ra_graph * graph;
+
+	/* Allocate the main ra data structure */
+	regs = ra_alloc_reg_set(s->C->max_temp_regs * RC_MASK_XYZW);
+
+	/* Get list of program variables */
+	variables = rc_get_variables(s->C);
+	node_count = rc_list_count(variables);
+	node_classes = memory_pool_malloc(&s->C->Pool,
+			node_count * sizeof(unsigned int));
+	input_classes = memory_pool_malloc(&s->C->Pool,
+			s->NumInputs * sizeof(unsigned int));
+
+	for (var_ptr = variables, node_index = 0; var_ptr;
+					var_ptr = var_ptr->Next, node_index++) {
+		unsigned int class_index;
+		/* Compute the live intervals */
+		rc_variable_compute_live_intervals(var_ptr->Item);
+
+		class_index = variable_get_class(var_ptr->Item,	rc_class_list);
+
+		/* If we haven't used this register class yet, mark it
+		 * as used and allocate space for it. */
+		if (!rc_class_list[class_index].Used) {
+			rc_class_list[class_index].Used = 1;
+			rc_class_list[class_index].Id = ra_alloc_reg_class(regs);
+		}
+
+		node_classes[node_index] = rc_class_list[class_index].Id;
+	}
+
+
+	/* Assign registers to the classes */
+	for (i = 0; i < RC_REG_CLASS_COUNT; i++) {
+		struct rc_class class = rc_class_list[i];
+		if (!class.Used) {
+			continue;
+		}
+
+		for (index = 0; index < s->C->max_temp_regs; index++) {
+			for (j = 0; j < class.WritemaskCount; j++) {
+				int reg_id = get_reg_id(index,
+							class.Writemasks[j]);
+				ra_class_add_reg(regs, class.Id, reg_id);
+			}
+		}
+	}
+
+	/* Add register conflicts */
+	add_register_conflicts(regs, s->C->max_temp_regs);
+
+	/* Calculate live intervals for input registers */
+	for (inst = s->C->Program.Instructions.Next;
+					inst != &s->C->Program.Instructions;
+					inst = inst->Next) {
+		rc_opcode op = rc_get_flow_control_inst(inst);
+		if (op == RC_OPCODE_BGNLOOP) {
+			struct rc_instruction * endloop =
+							rc_match_bgnloop(inst);
+			if (endloop->IP > s->LoopEnd) {
+				s->LoopEnd = endloop->IP;
+			}
+		}
+		rc_for_all_reads_mask(inst, scan_read_callback, s);
+	}
+
+	/* Create classes for input registers */
+	for (i = 0; i < s->NumInputs; i++) {
+		unsigned int chan, class_id, writemask = 0;
+		for (chan = 0; chan < 4; chan++) {
+			if (s->Input[i].Live[chan].Used) {
+				writemask |= (1 << chan);
+			}
+		}
+		s->Input[i].Writemask = writemask;
+		if (!writemask) {
+			continue;
+		}
+
+		class_id = ra_alloc_reg_class(regs);
+		input_classes[i] = class_id;
+		ra_class_add_reg(regs, class_id,
+				get_reg_id(s->Input[i].Index, writemask));
+	}
+
+	ra_set_finalize(regs);
+
+	graph = ra_alloc_interference_graph(regs, node_count + s->NumInputs);
+
+	/* Build the interference graph */
+	for (var_ptr = variables, node_index = 0; var_ptr;
+					var_ptr = var_ptr->Next,node_index++) {
+		struct rc_list * a, * b;
+		unsigned int b_index;
+
+		ra_set_node_class(graph, node_index, node_classes[node_index]);
+
+		for (a = var_ptr, b = var_ptr->Next, b_index = node_index + 1;
+						b; b = b->Next, b_index++) {
+			struct rc_variable * var_a = a->Item;
+			while (var_a) {
+				struct rc_variable * var_b = b->Item;
+				while (var_b) {
+					if (overlap_live_intervals_array(var_a->Live, var_b->Live)) {
+						ra_add_node_interference(graph,
+							node_index, b_index);
+					}
+					var_b = var_b->Friend;
+				}
+				var_a = var_a->Friend;
+			}
+		}
+	}
+
+	/* Add input registers to the interference graph */
+	for (i = 0, input_node = 0; i< s->NumInputs; i++) {
+		if (!s->Input[i].Writemask) {
+			continue;
+		}
+		ra_set_node_class(graph, node_count + input_node,
+							input_classes[i]);
+		for (var_ptr = variables, node_index = 0;
+				var_ptr; var_ptr = var_ptr->Next, node_index++) {
+			struct rc_variable * var = var_ptr->Item;
+			if (overlap_live_intervals_array(s->Input[i].Live,
+								var->Live)) {
+				ra_add_node_interference(graph, node_index,
+						node_count + input_node);
+			}
+		}
+		/* Manually allocate a register for this input */
+		ra_set_node_reg(graph, node_count + input_node, get_reg_id(
+				s->Input[i].Index, s->Input[i].Writemask));
+		input_node++;
+	}
+
+	if (!ra_allocate_no_spills(graph)) {
+		rc_error(s->C, "Ran out of hardware temporaries\n");
+		return;
+	}
+
+	/* Rewrite the registers */
+	for (var_ptr = variables, node_index = 0; var_ptr;
+				var_ptr = var_ptr->Next, node_index++) {
+		int reg = ra_get_node_reg(graph, node_index);
+		unsigned int writemask = reg_get_writemask(reg);
+		unsigned int index = reg_get_index(reg);
+		struct rc_variable * var = var_ptr->Item;
+
+		if (!s->C->is_r500 && var->Inst->Type == RC_INSTRUCTION_NORMAL) {
+			writemask = rc_variable_writemask_sum(var);
+		}
+
+		if (var->Dst.File == RC_FILE_INPUT) {
+			continue;
+		}
+		rc_variable_change_dst(var, index, writemask);
+	}
+
+	ralloc_free(graph);
+	ralloc_free(regs);
+}
+
+/**
+ * @param user This parameter should be a pointer to an integer value.  If this
+ * integer value is zero, then a simple register allocator will be used that
+ * only allocates space for input registers (\sa do_regalloc_inputs_only).  If
+ * user is non-zero, then the regular register allocator will be used
+ * (\sa do_regalloc).
+  */
+void rc_pair_regalloc(struct radeon_compiler *cc, void *user)
+{
+	struct r300_fragment_program_compiler *c =
+				(struct r300_fragment_program_compiler*)cc;
+	struct regalloc_state s;
+	int * do_full_regalloc = (int*)user;
+
+	memset(&s, 0, sizeof(s));
+	s.C = cc;
+	s.NumInputs = rc_get_max_index(cc, RC_FILE_INPUT) + 1;
+	s.Input = memory_pool_malloc(&cc->Pool,
+			s.NumInputs * sizeof(struct register_info));
+	memset(s.Input, 0, s.NumInputs * sizeof(struct register_info));
+
+	s.NumTemporaries = rc_get_max_index(cc, RC_FILE_TEMPORARY) + 1;
+	s.Temporary = memory_pool_malloc(&cc->Pool,
+			s.NumTemporaries * sizeof(struct register_info));
+	memset(s.Temporary, 0, s.NumTemporaries * sizeof(struct register_info));
+
+	rc_recompute_ips(s.C);
+
+	c->AllocateHwInputs(c, &alloc_input_simple, &s);
+	if (*do_full_regalloc) {
+		do_advanced_regalloc(&s);
+	} else {
+		s.Simple = 1;
+		do_regalloc_inputs_only(&s);
+	}
+
+	/* Rewrite inputs and if we are doing the simple allocation, rewrite
+	 * temporaries too. */
+	for (struct rc_instruction *inst = s.C->Program.Instructions.Next;
+					inst != &s.C->Program.Instructions;
+					inst = inst->Next) {
+		rc_remap_registers(inst, &remap_register, &s);
+	}
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_pair_schedule.c b/src/gallium/drivers/r300/compiler/radeon_pair_schedule.c
new file mode 100644
index 0000000..25cd52c
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_pair_schedule.c
@@ -0,0 +1,1010 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_program_pair.h"
+
+#include <stdio.h>
+
+#include "radeon_compiler.h"
+#include "radeon_compiler_util.h"
+#include "radeon_dataflow.h"
+
+
+#define VERBOSE 0
+
+#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
+
+struct schedule_instruction {
+	struct rc_instruction * Instruction;
+
+	/** Next instruction in the linked list of ready instructions. */
+	struct schedule_instruction *NextReady;
+
+	/** Values that this instruction reads and writes */
+	struct reg_value * WriteValues[4];
+	struct reg_value * ReadValues[12];
+	unsigned int NumWriteValues:3;
+	unsigned int NumReadValues:4;
+
+	/**
+	 * Number of (read and write) dependencies that must be resolved before
+	 * this instruction can be scheduled.
+	 */
+	unsigned int NumDependencies:5;
+
+	/** List of all readers (see rc_get_readers() for the definition of
+	 * "all readers"), even those outside the basic block this instruction
+	 * lives in. */
+	struct rc_reader_data GlobalReaders;
+};
+
+
+/**
+ * Used to keep track of which instructions read a value.
+ */
+struct reg_value_reader {
+	struct schedule_instruction *Reader;
+	struct reg_value_reader *Next;
+};
+
+/**
+ * Used to keep track which values are stored in each component of a
+ * RC_FILE_TEMPORARY.
+ */
+struct reg_value {
+	struct schedule_instruction * Writer;
+
+	/**
+	 * Unordered linked list of instructions that read from this value.
+	 * When this value becomes available, we increase all readers'
+	 * dependency count.
+	 */
+	struct reg_value_reader *Readers;
+
+	/**
+	 * Number of readers of this value. This is decremented each time
+	 * a reader of the value is committed.
+	 * When the reader cound reaches zero, the dependency count
+	 * of the instruction writing \ref Next is decremented.
+	 */
+	unsigned int NumReaders;
+
+	struct reg_value *Next; /**< Pointer to the next value to be written to the same register */
+};
+
+struct register_state {
+	struct reg_value * Values[4];
+};
+
+struct remap_reg {
+	struct rc_instruciont * Inst;
+	unsigned int OldIndex:(RC_REGISTER_INDEX_BITS+1);
+	unsigned int OldSwizzle:3;
+	unsigned int NewIndex:(RC_REGISTER_INDEX_BITS+1);
+	unsigned int NewSwizzle:3;
+	unsigned int OnlyTexReads:1;
+	struct remap_reg * Next;
+};
+
+struct schedule_state {
+	struct radeon_compiler * C;
+	struct schedule_instruction * Current;
+
+	struct register_state Temporary[RC_REGISTER_MAX_INDEX];
+
+	/**
+	 * Linked lists of instructions that can be scheduled right now,
+	 * based on which ALU/TEX resources they require.
+	 */
+	/*@{*/
+	struct schedule_instruction *ReadyFullALU;
+	struct schedule_instruction *ReadyRGB;
+	struct schedule_instruction *ReadyAlpha;
+	struct schedule_instruction *ReadyTEX;
+	/*@}*/
+};
+
+static struct reg_value ** get_reg_valuep(struct schedule_state * s,
+		rc_register_file file, unsigned int index, unsigned int chan)
+{
+	if (file != RC_FILE_TEMPORARY)
+		return 0;
+
+	if (index >= RC_REGISTER_MAX_INDEX) {
+		rc_error(s->C, "%s: index %i out of bounds\n", __FUNCTION__, index);
+		return 0;
+	}
+
+	return &s->Temporary[index].Values[chan];
+}
+
+static void add_inst_to_list(struct schedule_instruction ** list, struct schedule_instruction * inst)
+{
+	inst->NextReady = *list;
+	*list = inst;
+}
+
+static void add_inst_to_list_end(struct schedule_instruction ** list,
+					struct schedule_instruction * inst)
+{
+	if(!*list){
+		*list = inst;
+	}else{
+		struct schedule_instruction * temp = *list;
+		while(temp->NextReady){
+			temp = temp->NextReady;
+		}
+		temp->NextReady = inst;
+	}
+}
+
+static void instruction_ready(struct schedule_state * s, struct schedule_instruction * sinst)
+{
+	DBG("%i is now ready\n", sinst->Instruction->IP);
+
+	/* Adding Ready TEX instructions to the end of the "Ready List" helps
+	 * us emit TEX instructions in blocks without losing our place. */
+	if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL)
+		add_inst_to_list_end(&s->ReadyTEX, sinst);
+	else if (sinst->Instruction->U.P.Alpha.Opcode == RC_OPCODE_NOP)
+		add_inst_to_list(&s->ReadyRGB, sinst);
+	else if (sinst->Instruction->U.P.RGB.Opcode == RC_OPCODE_NOP)
+		add_inst_to_list(&s->ReadyAlpha, sinst);
+	else
+		add_inst_to_list(&s->ReadyFullALU, sinst);
+}
+
+static void decrease_dependencies(struct schedule_state * s, struct schedule_instruction * sinst)
+{
+	assert(sinst->NumDependencies > 0);
+	sinst->NumDependencies--;
+	if (!sinst->NumDependencies)
+		instruction_ready(s, sinst);
+}
+
+/**
+ * This function decreases the dependencies of the next instruction that
+ * wants to write to each of sinst's read values.
+ */
+static void commit_update_reads(struct schedule_state * s,
+					struct schedule_instruction * sinst){
+	unsigned int i;
+	for(i = 0; i < sinst->NumReadValues; ++i) {
+		struct reg_value * v = sinst->ReadValues[i];
+		assert(v->NumReaders > 0);
+		v->NumReaders--;
+		if (!v->NumReaders) {
+			if (v->Next)
+				decrease_dependencies(s, v->Next->Writer);
+		}
+	}
+}
+
+static void commit_update_writes(struct schedule_state * s,
+					struct schedule_instruction * sinst){
+	unsigned int i;
+	for(i = 0; i < sinst->NumWriteValues; ++i) {
+		struct reg_value * v = sinst->WriteValues[i];
+		if (v->NumReaders) {
+			for(struct reg_value_reader * r = v->Readers; r; r = r->Next) {
+				decrease_dependencies(s, r->Reader);
+			}
+		} else {
+			/* This happens in instruction sequences of the type
+			 *  OP r.x, ...;
+			 *  OP r.x, r.x, ...;
+			 * See also the subtlety in how instructions that both
+			 * read and write the same register are scanned.
+			 */
+			if (v->Next)
+				decrease_dependencies(s, v->Next->Writer);
+		}
+	}
+}
+
+static void commit_alu_instruction(struct schedule_state * s, struct schedule_instruction * sinst)
+{
+	DBG("%i: commit\n", sinst->Instruction->IP);
+
+	commit_update_reads(s, sinst);
+
+	commit_update_writes(s, sinst);
+}
+
+/**
+ * Emit all ready texture instructions in a single block.
+ *
+ * Emit as a single block to (hopefully) sample many textures in parallel,
+ * and to avoid hardware indirections on R300.
+ */
+static void emit_all_tex(struct schedule_state * s, struct rc_instruction * before)
+{
+	struct schedule_instruction *readytex;
+	struct rc_instruction * inst_begin;
+
+	assert(s->ReadyTEX);
+
+	/* Node marker for R300 */
+	inst_begin = rc_insert_new_instruction(s->C, before->Prev);
+	inst_begin->U.I.Opcode = RC_OPCODE_BEGIN_TEX;
+
+	/* Link texture instructions back in */
+	readytex = s->ReadyTEX;
+	while(readytex) {
+		rc_insert_instruction(before->Prev, readytex->Instruction);
+		DBG("%i: commit TEX reads\n", readytex->Instruction->IP);
+
+		/* All of the TEX instructions in the same TEX block have
+		 * their source registers read from before any of the
+		 * instructions in that block write to their destination
+		 * registers.  This means that when we commit a TEX
+		 * instruction, any other TEX instruction that wants to write
+		 * to one of the committed instruction's source register can be
+		 * marked as ready and should be emitted in the same TEX
+		 * block. This prevents the following sequence from being
+		 * emitted in two different TEX blocks:
+		 * 0: TEX temp[0].xyz, temp[1].xy__, 2D[0];
+		 * 1: TEX temp[1].xyz, temp[2].xy__, 2D[0];
+		 */
+		commit_update_reads(s, readytex);
+		readytex = readytex->NextReady;
+	}
+	readytex = s->ReadyTEX;
+	s->ReadyTEX = 0;
+	while(readytex){
+		DBG("%i: commit TEX writes\n", readytex->Instruction->IP);
+		commit_update_writes(s, readytex);
+		readytex = readytex->NextReady;
+	}
+}
+
+/* This is a helper function for destructive_merge_instructions().  It helps
+ * merge presubtract sources from two instructions and makes sure the
+ * presubtract sources end up in the correct spot.  This function assumes that
+ * dst_full is an rgb instruction, meaning that it has a vector instruction(rgb)
+ * but no scalar instruction (alpha).
+ * @return 0 if merging the presubtract sources fails.
+ * @retrun 1 if merging the presubtract sources succeeds.
+ */
+static int merge_presub_sources(
+	struct rc_pair_instruction * dst_full,
+	struct rc_pair_sub_instruction src,
+	unsigned int type)
+{
+	unsigned int srcp_src, srcp_regs, is_rgb, is_alpha;
+	struct rc_pair_sub_instruction * dst_sub;
+	const struct rc_opcode_info * info;
+
+	assert(dst_full->Alpha.Opcode == RC_OPCODE_NOP);
+
+	switch(type) {
+	case RC_SOURCE_RGB:
+		is_rgb = 1;
+		is_alpha = 0;
+		dst_sub = &dst_full->RGB;
+		break;
+	case RC_SOURCE_ALPHA:
+		is_rgb = 0;
+		is_alpha = 1;
+		dst_sub = &dst_full->Alpha;
+		break;
+	default:
+		assert(0);
+		return 0;
+	}
+
+	info = rc_get_opcode_info(dst_full->RGB.Opcode);
+
+	if (dst_sub->Src[RC_PAIR_PRESUB_SRC].Used)
+		return 0;
+
+	srcp_regs = rc_presubtract_src_reg_count(
+					src.Src[RC_PAIR_PRESUB_SRC].Index);
+	for(srcp_src = 0; srcp_src < srcp_regs; srcp_src++) {
+		unsigned int arg;
+		int free_source;
+		unsigned int one_way = 0;
+		struct rc_pair_instruction_source srcp = src.Src[srcp_src];
+		struct rc_pair_instruction_source temp;
+
+		free_source = rc_pair_alloc_source(dst_full, is_rgb, is_alpha,
+							srcp.File, srcp.Index);
+
+		/* If free_source < 0 then there are no free source
+		 * slots. */
+		if (free_source < 0)
+			return 0;
+
+		temp = dst_sub->Src[srcp_src];
+		dst_sub->Src[srcp_src] = dst_sub->Src[free_source];
+
+		/* srcp needs src0 and src1 to be the same */
+		if (free_source < srcp_src) {
+			if (!temp.Used)
+				continue;
+			free_source = rc_pair_alloc_source(dst_full, is_rgb,
+					is_alpha, temp.File, temp.Index);
+			if (free_source < 0)
+				return 0;
+			one_way = 1;
+		} else {
+			dst_sub->Src[free_source] = temp;
+		}
+
+		/* If free_source == srcp_src, then the presubtract
+		 * source is already in the correct place. */
+		if (free_source == srcp_src)
+			continue;
+
+		/* Shuffle the sources, so we can put the
+		 * presubtract source in the correct place. */
+		for(arg = 0; arg < info->NumSrcRegs; arg++) {
+			/*If this arg does not read from an rgb source,
+			 * do nothing. */
+			if (!(rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle)
+								& type)) {
+				continue;
+			}
+
+			if (dst_full->RGB.Arg[arg].Source == srcp_src)
+				dst_full->RGB.Arg[arg].Source = free_source;
+			/* We need to do this just in case register
+			 * is one of the sources already, but in the
+			 * wrong spot. */
+			else if(dst_full->RGB.Arg[arg].Source == free_source
+							&& !one_way) {
+				dst_full->RGB.Arg[arg].Source = srcp_src;
+			}
+		}
+	}
+	return 1;
+}
+
+
+/* This function assumes that rgb.Alpha and alpha.RGB are unused */
+static int destructive_merge_instructions(
+		struct rc_pair_instruction * rgb,
+		struct rc_pair_instruction * alpha)
+{
+	const struct rc_opcode_info * opcode;
+
+	assert(rgb->Alpha.Opcode == RC_OPCODE_NOP);
+	assert(alpha->RGB.Opcode == RC_OPCODE_NOP);
+
+	/* Presubtract registers need to be merged first so that registers
+	 * needed by the presubtract operation can be placed in src0 and/or
+	 * src1. */
+
+	/* Merge the rgb presubtract registers. */
+	if (alpha->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
+		if (!merge_presub_sources(rgb, alpha->RGB, RC_SOURCE_RGB)) {
+			return 0;
+		}
+	}
+	/* Merge the alpha presubtract registers */
+	if (alpha->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
+		if(!merge_presub_sources(rgb,  alpha->Alpha, RC_SOURCE_ALPHA)){
+			return 0;
+		}
+	}
+
+	/* Copy alpha args into rgb */
+	opcode = rc_get_opcode_info(alpha->Alpha.Opcode);
+
+	for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
+		unsigned int srcrgb = 0;
+		unsigned int srcalpha = 0;
+		unsigned int oldsrc = alpha->Alpha.Arg[arg].Source;
+		rc_register_file file = 0;
+		unsigned int index = 0;
+		int source;
+
+		if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 3) {
+			srcrgb = 1;
+			file = alpha->RGB.Src[oldsrc].File;
+			index = alpha->RGB.Src[oldsrc].Index;
+		} else if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 4) {
+			srcalpha = 1;
+			file = alpha->Alpha.Src[oldsrc].File;
+			index = alpha->Alpha.Src[oldsrc].Index;
+		}
+
+		source = rc_pair_alloc_source(rgb, srcrgb, srcalpha, file, index);
+		if (source < 0)
+			return 0;
+
+		rgb->Alpha.Arg[arg].Source = source;
+		rgb->Alpha.Arg[arg].Swizzle = alpha->Alpha.Arg[arg].Swizzle;
+		rgb->Alpha.Arg[arg].Abs = alpha->Alpha.Arg[arg].Abs;
+		rgb->Alpha.Arg[arg].Negate = alpha->Alpha.Arg[arg].Negate;
+	}
+
+	/* Copy alpha opcode into rgb */
+	rgb->Alpha.Opcode = alpha->Alpha.Opcode;
+	rgb->Alpha.DestIndex = alpha->Alpha.DestIndex;
+	rgb->Alpha.WriteMask = alpha->Alpha.WriteMask;
+	rgb->Alpha.OutputWriteMask = alpha->Alpha.OutputWriteMask;
+	rgb->Alpha.DepthWriteMask = alpha->Alpha.DepthWriteMask;
+	rgb->Alpha.Saturate = alpha->Alpha.Saturate;
+
+	/* Merge ALU result writing */
+	if (alpha->WriteALUResult) {
+		if (rgb->WriteALUResult)
+			return 0;
+
+		rgb->WriteALUResult = alpha->WriteALUResult;
+		rgb->ALUResultCompare = alpha->ALUResultCompare;
+	}
+
+	return 1;
+}
+
+/**
+ * Try to merge the given instructions into the rgb instructions.
+ *
+ * Return true on success; on failure, return false, and keep
+ * the instructions untouched.
+ */
+static int merge_instructions(struct rc_pair_instruction * rgb, struct rc_pair_instruction * alpha)
+{
+	struct rc_pair_instruction backup;
+
+	/*Instructions can't write output registers and ALU result at the
+	 * same time. */
+	if ((rgb->WriteALUResult && alpha->Alpha.OutputWriteMask)
+		|| (rgb->RGB.OutputWriteMask && alpha->WriteALUResult)) {
+		return 0;
+	}
+	memcpy(&backup, rgb, sizeof(struct rc_pair_instruction));
+
+	if (destructive_merge_instructions(rgb, alpha))
+		return 1;
+
+	memcpy(rgb, &backup, sizeof(struct rc_pair_instruction));
+	return 0;
+}
+
+static void presub_nop(struct rc_instruction * emitted) {
+	int prev_rgb_index, prev_alpha_index, i, num_src;
+
+	/* We don't need a nop if the previous instruction is a TEX. */
+	if (emitted->Prev->Type != RC_INSTRUCTION_PAIR) {
+		return;
+	}
+	if (emitted->Prev->U.P.RGB.WriteMask)
+		prev_rgb_index = emitted->Prev->U.P.RGB.DestIndex;
+	else
+		prev_rgb_index = -1;
+	if (emitted->Prev->U.P.Alpha.WriteMask)
+		prev_alpha_index = emitted->Prev->U.P.Alpha.DestIndex;
+	else
+		prev_alpha_index = 1;
+
+	/* Check the previous rgb instruction */
+	if (emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
+		num_src = rc_presubtract_src_reg_count(
+				emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index);
+		for (i = 0; i < num_src; i++) {
+			unsigned int index = emitted->U.P.RGB.Src[i].Index;
+			if (emitted->U.P.RGB.Src[i].File == RC_FILE_TEMPORARY
+			    && (index  == prev_rgb_index
+				|| index == prev_alpha_index)) {
+				emitted->Prev->U.P.Nop = 1;
+				return;
+			}
+		}
+	}
+
+	/* Check the previous alpha instruction. */
+	if (!emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used)
+		return;
+
+	num_src = rc_presubtract_src_reg_count(
+				emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Index);
+	for (i = 0; i < num_src; i++) {
+		unsigned int index = emitted->U.P.Alpha.Src[i].Index;
+		if(emitted->U.P.Alpha.Src[i].File == RC_FILE_TEMPORARY
+		   && (index == prev_rgb_index || index == prev_alpha_index)) {
+			emitted->Prev->U.P.Nop = 1;
+			return;
+		}
+	}
+}
+
+static void rgb_to_alpha_remap (
+	struct rc_instruction * inst,
+	struct rc_pair_instruction_arg * arg,
+	rc_register_file old_file,
+	rc_swizzle old_swz,
+	unsigned int new_index)
+{
+	int new_src_index;
+	unsigned int i;
+
+	for (i = 0; i < 3; i++) {
+		if (get_swz(arg->Swizzle, i) == old_swz) {
+			SET_SWZ(arg->Swizzle, i, RC_SWIZZLE_W);
+		}
+	}
+	new_src_index = rc_pair_alloc_source(&inst->U.P, 0, 1,
+							old_file, new_index);
+	/* This conversion is not possible, we must have made a mistake in
+	 * is_rgb_to_alpha_possible. */
+	if (new_src_index < 0) {
+		assert(0);
+		return;
+	}
+
+	arg->Source = new_src_index;
+}
+
+static int can_remap(unsigned int opcode)
+{
+	switch(opcode) {
+	case RC_OPCODE_DDX:
+	case RC_OPCODE_DDY:
+		return 0;
+	default:
+		return 1;
+	}
+}
+
+static int can_convert_opcode_to_alpha(unsigned int opcode)
+{
+	switch(opcode) {
+	case RC_OPCODE_DDX:
+	case RC_OPCODE_DDY:
+	case RC_OPCODE_DP2:
+	case RC_OPCODE_DP3:
+	case RC_OPCODE_DP4:
+	case RC_OPCODE_DPH:
+		return 0;
+	default:
+		return 1;
+	}
+}
+
+static void is_rgb_to_alpha_possible(
+	void * userdata,
+	struct rc_instruction * inst,
+	struct rc_pair_instruction_arg * arg,
+	struct rc_pair_instruction_source * src)
+{
+	unsigned int chan_count = 0;
+	unsigned int alpha_sources = 0;
+	unsigned int i;
+	struct rc_reader_data * reader_data = userdata;
+
+	if (!can_remap(inst->U.P.RGB.Opcode)
+	    || !can_remap(inst->U.P.Alpha.Opcode)) {
+		reader_data->Abort = 1;
+		return;
+	}
+
+	if (!src)
+		return;
+
+	/* XXX There are some cases where we can still do the conversion if
+	 * a reader reads from a presubtract source, but for now we'll prevent
+	 * it. */
+	if (arg->Source == RC_PAIR_PRESUB_SRC) {
+		reader_data->Abort = 1;
+		return;
+	}
+
+	/* Make sure the source only reads from one component.
+	 * XXX We should allow the source to read from the same component twice.
+	 * XXX If the index we will be converting to is the same as the
+	 * current index, then it is OK to read from more than one component.
+	 */
+	for (i = 0; i < 3; i++) {
+		rc_swizzle swz = get_swz(arg->Swizzle, i);
+		switch(swz) {
+		case RC_SWIZZLE_X:
+		case RC_SWIZZLE_Y:
+		case RC_SWIZZLE_Z:
+		case RC_SWIZZLE_W:
+			chan_count++;
+			break;
+		default:
+			break;
+		}
+	}
+	if (chan_count > 1) {
+		reader_data->Abort = 1;
+		return;
+	}
+
+	/* Make sure there are enough alpha sources.
+	 * XXX If we know what register all the readers are going
+	 * to be remapped to, then in some situations we can still do
+	 * the subsitution, even if all 3 alpha sources are being used.*/
+	for (i = 0; i < 3; i++) {
+		if (inst->U.P.Alpha.Src[i].Used) {
+			alpha_sources++;
+		}
+	}
+	if (alpha_sources > 2) {
+		reader_data->Abort = 1;
+		return;
+	}
+}
+
+static int convert_rgb_to_alpha(
+	struct schedule_state * s,
+	struct schedule_instruction * sched_inst)
+{
+	struct rc_pair_instruction * pair_inst = &sched_inst->Instruction->U.P;
+	unsigned int old_mask = pair_inst->RGB.WriteMask;
+	unsigned int old_swz = rc_mask_to_swizzle(old_mask);
+	const struct rc_opcode_info * info =
+				rc_get_opcode_info(pair_inst->RGB.Opcode);
+	int new_index = -1;
+	unsigned int i;
+
+	if (sched_inst->GlobalReaders.Abort)
+		return 0;
+
+	if (!pair_inst->RGB.WriteMask)
+		return 0;
+
+	if (!can_convert_opcode_to_alpha(pair_inst->RGB.Opcode)
+	    || !can_convert_opcode_to_alpha(pair_inst->Alpha.Opcode)) {
+		return 0;
+	}
+
+	assert(sched_inst->NumWriteValues == 1);
+
+	if (!sched_inst->WriteValues[0]) {
+		assert(0);
+		return 0;
+	}
+
+	/* We start at the old index, because if we can reuse the same
+	 * register and just change the swizzle then it is more likely we
+	 * will be able to convert all the readers. */
+	for (i = pair_inst->RGB.DestIndex; i < RC_REGISTER_MAX_INDEX; i++) {
+		struct reg_value ** new_regvalp = get_reg_valuep(
+						s, RC_FILE_TEMPORARY, i, 3);
+		if (!*new_regvalp) {
+			struct reg_value ** old_regvalp =
+				get_reg_valuep(s,
+					RC_FILE_TEMPORARY,
+					pair_inst->RGB.DestIndex,
+					rc_mask_to_swizzle(old_mask));
+			new_index = i;
+			*new_regvalp = *old_regvalp;
+			*old_regvalp = NULL;
+			new_regvalp = get_reg_valuep(s, RC_FILE_TEMPORARY, i, 3);
+			break;
+		}
+	}
+	if (new_index < 0) {
+		return 0;
+	}
+
+	pair_inst->Alpha.Opcode = pair_inst->RGB.Opcode;
+	pair_inst->Alpha.DestIndex = new_index;
+	pair_inst->Alpha.WriteMask = RC_MASK_W;
+	pair_inst->Alpha.Target = pair_inst->RGB.Target;
+	pair_inst->Alpha.OutputWriteMask = pair_inst->RGB.OutputWriteMask;
+	pair_inst->Alpha.DepthWriteMask = pair_inst->RGB.DepthWriteMask;
+	pair_inst->Alpha.Saturate = pair_inst->RGB.Saturate;
+	memcpy(pair_inst->Alpha.Arg, pair_inst->RGB.Arg,
+						sizeof(pair_inst->Alpha.Arg));
+	/* Move the swizzles into the first chan */
+	for (i = 0; i < info->NumSrcRegs; i++) {
+		unsigned int j;
+		for (j = 0; j < 3; j++) {
+			unsigned int swz = get_swz(pair_inst->Alpha.Arg[i].Swizzle, j);
+			if (swz != RC_SWIZZLE_UNUSED) {
+				pair_inst->Alpha.Arg[i].Swizzle =
+							rc_init_swizzle(swz, 1);
+				break;
+			}
+		}
+	}
+	pair_inst->RGB.Opcode = RC_OPCODE_NOP;
+	pair_inst->RGB.DestIndex = 0;
+	pair_inst->RGB.WriteMask = 0;
+	pair_inst->RGB.Target = 0;
+	pair_inst->RGB.OutputWriteMask = 0;
+	pair_inst->RGB.DepthWriteMask = 0;
+	pair_inst->RGB.Saturate = 0;
+	memset(pair_inst->RGB.Arg, 0, sizeof(pair_inst->RGB.Arg));
+
+	for(i = 0; i < sched_inst->GlobalReaders.ReaderCount; i++) {
+		struct rc_reader reader = sched_inst->GlobalReaders.Readers[i];
+		rgb_to_alpha_remap(reader.Inst, reader.U.P.Arg,
+					RC_FILE_TEMPORARY, old_swz, new_index);
+	}
+	return 1;
+}
+
+/**
+ * Find a good ALU instruction or pair of ALU instruction and emit it.
+ *
+ * Prefer emitting full ALU instructions, so that when we reach a point
+ * where no full ALU instruction can be emitted, we have more candidates
+ * for RGB/Alpha pairing.
+ */
+static void emit_one_alu(struct schedule_state *s, struct rc_instruction * before)
+{
+	struct schedule_instruction * sinst;
+
+	if (s->ReadyFullALU) {
+		sinst = s->ReadyFullALU;
+		s->ReadyFullALU = s->ReadyFullALU->NextReady;
+		rc_insert_instruction(before->Prev, sinst->Instruction);
+		commit_alu_instruction(s, sinst);
+	} else {
+		struct schedule_instruction **prgb;
+		struct schedule_instruction **palpha;
+		struct schedule_instruction *prev;
+pair:
+		/* Some pairings might fail because they require too
+		 * many source slots; try all possible pairings if necessary */
+		for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) {
+			for(palpha = &s->ReadyAlpha; *palpha; palpha = &(*palpha)->NextReady) {
+				struct schedule_instruction * psirgb = *prgb;
+				struct schedule_instruction * psialpha = *palpha;
+
+				if (!merge_instructions(&psirgb->Instruction->U.P, &psialpha->Instruction->U.P))
+					continue;
+
+				*prgb = (*prgb)->NextReady;
+				*palpha = (*palpha)->NextReady;
+				rc_insert_instruction(before->Prev, psirgb->Instruction);
+				commit_alu_instruction(s, psirgb);
+				commit_alu_instruction(s, psialpha);
+				goto success;
+			}
+		}
+		prev = NULL;
+		/* No success in pairing, now try to convert one of the RGB
+		 * instructions to an Alpha so we can pair it with another RGB.
+		 */
+		if (s->ReadyRGB && s->ReadyRGB->NextReady) {
+		for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) {
+			if ((*prgb)->NumWriteValues == 1) {
+				struct schedule_instruction * prgb_next;
+				if (!convert_rgb_to_alpha(s, *prgb))
+					goto cont_loop;
+				prgb_next = (*prgb)->NextReady;
+				/* Add instruction to the Alpha ready list. */
+				(*prgb)->NextReady = s->ReadyAlpha;
+				s->ReadyAlpha = *prgb;
+				/* Remove instruction from the RGB ready list.*/
+				if (prev)
+					prev->NextReady = prgb_next;
+				else
+					s->ReadyRGB = prgb_next;
+				goto pair;
+			}
+cont_loop:
+			prev = *prgb;
+		}
+		}
+		/* Still no success in pairing, just take the first RGB
+		 * or alpha instruction. */
+		if (s->ReadyRGB) {
+			sinst = s->ReadyRGB;
+			s->ReadyRGB = s->ReadyRGB->NextReady;
+		} else if (s->ReadyAlpha) {
+			sinst = s->ReadyAlpha;
+			s->ReadyAlpha = s->ReadyAlpha->NextReady;
+		} else {
+			/*XXX Something real bad has happened. */
+			assert(0);
+		}
+
+		rc_insert_instruction(before->Prev, sinst->Instruction);
+		commit_alu_instruction(s, sinst);
+	success: ;
+	}
+	/* If the instruction we just emitted uses a presubtract value, and
+	 * the presubtract sources were written by the previous intstruction,
+	 * the previous instruction needs a nop. */
+	presub_nop(before->Prev);
+}
+
+static void scan_read(void * data, struct rc_instruction * inst,
+		rc_register_file file, unsigned int index, unsigned int chan)
+{
+	struct schedule_state * s = data;
+	struct reg_value ** v = get_reg_valuep(s, file, index, chan);
+	struct reg_value_reader * reader;
+
+	if (!v)
+		return;
+
+	if (*v && (*v)->Writer == s->Current) {
+		/* The instruction reads and writes to a register component.
+		 * In this case, we only want to increment dependencies by one.
+		 */
+		return;
+	}
+
+	DBG("%i: read %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);
+
+	reader = memory_pool_malloc(&s->C->Pool, sizeof(*reader));
+	reader->Reader = s->Current;
+	if (!*v) {
+		/* In this situation, the instruction reads from a register
+		 * that hasn't been written to or read from in the current
+		 * block. */
+		*v = memory_pool_malloc(&s->C->Pool, sizeof(struct reg_value));
+		memset(*v, 0, sizeof(struct reg_value));
+		(*v)->Readers = reader;
+	} else {
+		reader->Next = (*v)->Readers;
+		(*v)->Readers = reader;
+		/* Only update the current instruction's dependencies if the
+		 * register it reads from has been written to in this block. */
+		if ((*v)->Writer) {
+			s->Current->NumDependencies++;
+		}
+	}
+	(*v)->NumReaders++;
+
+	if (s->Current->NumReadValues >= 12) {
+		rc_error(s->C, "%s: NumReadValues overflow\n", __FUNCTION__);
+	} else {
+		s->Current->ReadValues[s->Current->NumReadValues++] = *v;
+	}
+}
+
+static void scan_write(void * data, struct rc_instruction * inst,
+		rc_register_file file, unsigned int index, unsigned int chan)
+{
+	struct schedule_state * s = data;
+	struct reg_value ** pv = get_reg_valuep(s, file, index, chan);
+	struct reg_value * newv;
+
+	if (!pv)
+		return;
+
+	DBG("%i: write %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);
+
+	newv = memory_pool_malloc(&s->C->Pool, sizeof(*newv));
+	memset(newv, 0, sizeof(*newv));
+
+	newv->Writer = s->Current;
+
+	if (*pv) {
+		(*pv)->Next = newv;
+		s->Current->NumDependencies++;
+	}
+
+	*pv = newv;
+
+	if (s->Current->NumWriteValues >= 4) {
+		rc_error(s->C, "%s: NumWriteValues overflow\n", __FUNCTION__);
+	} else {
+		s->Current->WriteValues[s->Current->NumWriteValues++] = newv;
+	}
+}
+
+static void is_rgb_to_alpha_possible_normal(
+	void * userdata,
+	struct rc_instruction * inst,
+	struct rc_src_register * src)
+{
+	struct rc_reader_data * reader_data = userdata;
+	reader_data->Abort = 1;
+
+}
+
+static void schedule_block(struct r300_fragment_program_compiler * c,
+		struct rc_instruction * begin, struct rc_instruction * end)
+{
+	struct schedule_state s;
+	unsigned int ip;
+
+	memset(&s, 0, sizeof(s));
+	s.C = &c->Base;
+
+	/* Scan instructions for data dependencies */
+	ip = 0;
+	for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) {
+		s.Current = memory_pool_malloc(&c->Base.Pool, sizeof(*s.Current));
+		memset(s.Current, 0, sizeof(struct schedule_instruction));
+
+		s.Current->Instruction = inst;
+		inst->IP = ip++;
+
+		DBG("%i: Scanning\n", inst->IP);
+
+		/* The order of things here is subtle and maybe slightly
+		 * counter-intuitive, to account for the case where an
+		 * instruction writes to the same register as it reads
+		 * from. */
+		rc_for_all_writes_chan(inst, &scan_write, &s);
+		rc_for_all_reads_chan(inst, &scan_read, &s);
+
+		DBG("%i: Has %i dependencies\n", inst->IP, s.Current->NumDependencies);
+
+		if (!s.Current->NumDependencies)
+			instruction_ready(&s, s.Current);
+
+		/* Get global readers for possible RGB->Alpha conversion. */
+		s.Current->GlobalReaders.ExitOnAbort = 1;
+		rc_get_readers(s.C, inst, &s.Current->GlobalReaders,
+				is_rgb_to_alpha_possible_normal,
+				is_rgb_to_alpha_possible, NULL);
+	}
+
+	/* Temporarily unlink all instructions */
+	begin->Prev->Next = end;
+	end->Prev = begin->Prev;
+
+	/* Schedule instructions back */
+	while(!s.C->Error &&
+	      (s.ReadyTEX || s.ReadyRGB || s.ReadyAlpha || s.ReadyFullALU)) {
+		if (s.ReadyTEX)
+			emit_all_tex(&s, end);
+
+		while(!s.C->Error && (s.ReadyFullALU || s.ReadyRGB || s.ReadyAlpha))
+			emit_one_alu(&s, end);
+	}
+}
+
+static int is_controlflow(struct rc_instruction * inst)
+{
+	if (inst->Type == RC_INSTRUCTION_NORMAL) {
+		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+		return opcode->IsFlowControl;
+	}
+	return 0;
+}
+
+void rc_pair_schedule(struct radeon_compiler *cc, void *user)
+{
+	struct schedule_state s;
+
+	struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
+	struct rc_instruction * inst = c->Base.Program.Instructions.Next;
+
+	memset(&s, 0, sizeof(s));
+	s.C = &c->Base;
+	while(inst != &c->Base.Program.Instructions) {
+		struct rc_instruction * first;
+
+		if (is_controlflow(inst)) {
+			inst = inst->Next;
+			continue;
+		}
+
+		first = inst;
+
+		while(inst != &c->Base.Program.Instructions && !is_controlflow(inst))
+			inst = inst->Next;
+
+		DBG("Schedule one block\n");
+		schedule_block(c, first, inst);
+	}
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_pair_translate.c b/src/gallium/drivers/r300/compiler/radeon_pair_translate.c
new file mode 100644
index 0000000..2dae56a
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_pair_translate.c
@@ -0,0 +1,359 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_program_pair.h"
+
+#include "radeon_compiler.h"
+#include "radeon_compiler_util.h"
+
+
+/**
+ * Finally rewrite ADD, MOV, MUL as the appropriate native instruction
+ * and reverse the order of arguments for CMP.
+ */
+static void final_rewrite(struct rc_sub_instruction *inst)
+{
+	struct rc_src_register tmp;
+
+	switch(inst->Opcode) {
+	case RC_OPCODE_ADD:
+		inst->SrcReg[2] = inst->SrcReg[1];
+		inst->SrcReg[1].File = RC_FILE_NONE;
+		inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111;
+		inst->SrcReg[1].Negate = RC_MASK_NONE;
+		inst->Opcode = RC_OPCODE_MAD;
+		break;
+	case RC_OPCODE_CMP:
+		tmp = inst->SrcReg[2];
+		inst->SrcReg[2] = inst->SrcReg[0];
+		inst->SrcReg[0] = tmp;
+		break;
+	case RC_OPCODE_MOV:
+		/* AMD say we should use CMP.
+		 * However, when we transform
+		 *  KIL -r0;
+		 * into
+		 *  CMP tmp, -r0, -r0, 0;
+		 *  KIL tmp;
+		 * we get incorrect behaviour on R500 when r0 == 0.0.
+		 * It appears that the R500 KIL hardware treats -0.0 as less
+		 * than zero.
+		 */
+		inst->SrcReg[1].File = RC_FILE_NONE;
+		inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111;
+		inst->SrcReg[2].File = RC_FILE_NONE;
+		inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000;
+		inst->Opcode = RC_OPCODE_MAD;
+		break;
+	case RC_OPCODE_MUL:
+		inst->SrcReg[2].File = RC_FILE_NONE;
+		inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000;
+		inst->Opcode = RC_OPCODE_MAD;
+		break;
+	default:
+		/* nothing to do */
+		break;
+	}
+}
+
+
+/**
+ * Classify an instruction according to which ALUs etc. it needs
+ */
+static void classify_instruction(struct rc_sub_instruction * inst,
+	int * needrgb, int * needalpha, int * istranscendent)
+{
+	*needrgb = (inst->DstReg.WriteMask & RC_MASK_XYZ) ? 1 : 0;
+	*needalpha = (inst->DstReg.WriteMask & RC_MASK_W) ? 1 : 0;
+	*istranscendent = 0;
+
+	if (inst->WriteALUResult == RC_ALURESULT_X)
+		*needrgb = 1;
+	else if (inst->WriteALUResult == RC_ALURESULT_W)
+		*needalpha = 1;
+
+	switch(inst->Opcode) {
+	case RC_OPCODE_ADD:
+	case RC_OPCODE_CMP:
+	case RC_OPCODE_CND:
+	case RC_OPCODE_DDX:
+	case RC_OPCODE_DDY:
+	case RC_OPCODE_FRC:
+	case RC_OPCODE_MAD:
+	case RC_OPCODE_MAX:
+	case RC_OPCODE_MIN:
+	case RC_OPCODE_MOV:
+	case RC_OPCODE_MUL:
+		break;
+	case RC_OPCODE_COS:
+	case RC_OPCODE_EX2:
+	case RC_OPCODE_LG2:
+	case RC_OPCODE_RCP:
+	case RC_OPCODE_RSQ:
+	case RC_OPCODE_SIN:
+		*istranscendent = 1;
+		*needalpha = 1;
+		break;
+	case RC_OPCODE_DP4:
+		*needalpha = 1;
+		/* fall through */
+	case RC_OPCODE_DP3:
+		*needrgb = 1;
+		break;
+	default:
+		break;
+	}
+}
+
+static void src_uses(struct rc_src_register src, unsigned int * rgb,
+							unsigned int * alpha)
+{
+	int j;
+	for(j = 0; j < 4; ++j) {
+		unsigned int swz = GET_SWZ(src.Swizzle, j);
+		if (swz < 3)
+			*rgb = 1;
+		else if (swz < 4)
+			*alpha = 1;
+	}
+}
+
+/**
+ * Fill the given ALU instruction's opcodes and source operands into the given pair,
+ * if possible.
+ */
+static void set_pair_instruction(struct r300_fragment_program_compiler *c,
+	struct rc_pair_instruction * pair,
+	struct rc_sub_instruction * inst)
+{
+	int needrgb, needalpha, istranscendent;
+	const struct rc_opcode_info * opcode;
+	int i;
+
+	memset(pair, 0, sizeof(struct rc_pair_instruction));
+
+	classify_instruction(inst, &needrgb, &needalpha, &istranscendent);
+
+	if (needrgb) {
+		if (istranscendent)
+			pair->RGB.Opcode = RC_OPCODE_REPL_ALPHA;
+		else
+			pair->RGB.Opcode = inst->Opcode;
+		if (inst->SaturateMode == RC_SATURATE_ZERO_ONE)
+			pair->RGB.Saturate = 1;
+	}
+	if (needalpha) {
+		pair->Alpha.Opcode = inst->Opcode;
+		if (inst->SaturateMode == RC_SATURATE_ZERO_ONE)
+			pair->Alpha.Saturate = 1;
+	}
+
+	opcode = rc_get_opcode_info(inst->Opcode);
+
+	/* Presubtract handling:
+	 * We need to make sure that the values used by the presubtract
+	 * operation end up in src0 or src1. */
+	if(inst->PreSub.Opcode != RC_PRESUB_NONE) {
+		/* rc_pair_alloc_source() will fill in data for
+		 * pair->{RGB,ALPHA}.Src[RC_PAIR_PRESUB_SRC] */
+		int j;
+		for(j = 0; j < 3; j++) {
+			int src_regs;
+			if(inst->SrcReg[j].File != RC_FILE_PRESUB)
+				continue;
+
+			src_regs = rc_presubtract_src_reg_count(
+							inst->PreSub.Opcode);
+			for(i = 0; i < src_regs; i++) {
+				unsigned int rgb = 0;
+				unsigned int alpha = 0;
+				src_uses(inst->SrcReg[j], &rgb, &alpha);
+				if(rgb) {
+					pair->RGB.Src[i].File =
+						inst->PreSub.SrcReg[i].File;
+					pair->RGB.Src[i].Index =
+						inst->PreSub.SrcReg[i].Index;
+					pair->RGB.Src[i].Used = 1;
+				}
+				if(alpha) {
+					pair->Alpha.Src[i].File =
+						inst->PreSub.SrcReg[i].File;
+					pair->Alpha.Src[i].Index =
+						inst->PreSub.SrcReg[i].Index;
+					pair->Alpha.Src[i].Used = 1;
+				}
+			}
+		}
+	}
+
+	for(i = 0; i < opcode->NumSrcRegs; ++i) {
+		int source;
+		if (needrgb && !istranscendent) {
+			unsigned int srcrgb = 0;
+			unsigned int srcalpha = 0;
+			unsigned int srcmask = 0;
+			int j;
+			/* We don't care about the alpha channel here.  We only
+			 * want the part of the swizzle that writes to rgb,
+			 * since we are creating an rgb instruction. */
+			for(j = 0; j < 3; ++j) {
+				unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, j);
+
+				if (swz < RC_SWIZZLE_W)
+					srcrgb = 1;
+				else if (swz == RC_SWIZZLE_W)
+					srcalpha = 1;
+
+				if (swz < RC_SWIZZLE_UNUSED)
+					srcmask |= 1 << j;
+			}
+			source = rc_pair_alloc_source(pair, srcrgb, srcalpha,
+							inst->SrcReg[i].File, inst->SrcReg[i].Index);
+			if (source < 0) {
+				rc_error(&c->Base, "Failed to translate "
+							"rgb instruction.\n");
+				return;
+			}
+			pair->RGB.Arg[i].Source = source;
+			pair->RGB.Arg[i].Swizzle =
+				rc_init_swizzle(inst->SrcReg[i].Swizzle, 3);
+			pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs;
+			pair->RGB.Arg[i].Negate = !!(srcmask & inst->SrcReg[i].Negate & (RC_MASK_X | RC_MASK_Y | RC_MASK_Z));
+		}
+		if (needalpha) {
+			unsigned int srcrgb = 0;
+			unsigned int srcalpha = 0;
+			unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, istranscendent ? 0 : 3);
+			if (swz < 3)
+				srcrgb = 1;
+			else if (swz < 4)
+				srcalpha = 1;
+			source = rc_pair_alloc_source(pair, srcrgb, srcalpha,
+							inst->SrcReg[i].File, inst->SrcReg[i].Index);
+			if (source < 0) {
+				rc_error(&c->Base, "Failed to translate "
+							"alpha instruction.\n");
+				return;
+			}
+			pair->Alpha.Arg[i].Source = source;
+			pair->Alpha.Arg[i].Swizzle = rc_init_swizzle(swz, 1);
+			pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs;
+			pair->Alpha.Arg[i].Negate = !!(inst->SrcReg[i].Negate & RC_MASK_W);
+		}
+	}
+
+	/* Destination handling */
+	if (inst->DstReg.File == RC_FILE_OUTPUT) {
+        if (inst->DstReg.Index == c->OutputDepth) {
+            pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
+        } else {
+            for (i = 0; i < 4; i++) {
+                if (inst->DstReg.Index == c->OutputColor[i]) {
+                    pair->RGB.Target = i;
+                    pair->Alpha.Target = i;
+                    pair->RGB.OutputWriteMask |=
+                        inst->DstReg.WriteMask & RC_MASK_XYZ;
+                    pair->Alpha.OutputWriteMask |=
+                        GET_BIT(inst->DstReg.WriteMask, 3);
+                    break;
+                }
+            }
+        }
+	} else {
+		if (needrgb) {
+			pair->RGB.DestIndex = inst->DstReg.Index;
+			pair->RGB.WriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ;
+		}
+
+		if (needalpha) {
+			pair->Alpha.WriteMask |= (GET_BIT(inst->DstReg.WriteMask, 3) << 3);
+			if (pair->Alpha.WriteMask) {
+				pair->Alpha.DestIndex = inst->DstReg.Index;
+			}
+		}
+	}
+
+	if (inst->WriteALUResult) {
+		pair->WriteALUResult = inst->WriteALUResult;
+		pair->ALUResultCompare = inst->ALUResultCompare;
+	}
+}
+
+
+static void check_opcode_support(struct r300_fragment_program_compiler *c,
+				 struct rc_sub_instruction *inst)
+{
+	const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
+
+	if (opcode->HasDstReg) {
+		if (inst->SaturateMode == RC_SATURATE_MINUS_PLUS_ONE) {
+			rc_error(&c->Base, "Fragment program does not support signed Saturate.\n");
+			return;
+		}
+	}
+
+	for (unsigned i = 0; i < opcode->NumSrcRegs; i++) {
+		if (inst->SrcReg[i].RelAddr) {
+			rc_error(&c->Base, "Fragment program does not support relative addressing "
+				 " of source operands.\n");
+			return;
+		}
+	}
+}
+
+
+/**
+ * Translate all ALU instructions into corresponding pair instructions,
+ * performing no other changes.
+ */
+void rc_pair_translate(struct radeon_compiler *cc, void *user)
+{
+	struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
+
+	for(struct rc_instruction * inst = c->Base.Program.Instructions.Next;
+	    inst != &c->Base.Program.Instructions;
+	    inst = inst->Next) {
+		const struct rc_opcode_info * opcode;
+		struct rc_sub_instruction copy;
+
+		if (inst->Type != RC_INSTRUCTION_NORMAL)
+			continue;
+
+		opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+		if (opcode->HasTexture || opcode->IsFlowControl || opcode->Opcode == RC_OPCODE_KIL)
+			continue;
+
+		copy = inst->U.I;
+
+		check_opcode_support(c, &copy);
+
+		final_rewrite(&copy);
+		inst->Type = RC_INSTRUCTION_PAIR;
+		set_pair_instruction(c, &inst->U.P, &copy);
+	}
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_program.c b/src/gallium/drivers/r300/compiler/radeon_program.c
new file mode 100644
index 0000000..fe5756e
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_program.c
@@ -0,0 +1,225 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_program.h"
+
+#include <stdio.h>
+
+#include "radeon_compiler.h"
+#include "radeon_dataflow.h"
+
+
+/**
+ * Transform the given clause in the following way:
+ *  1. Replace it with an empty clause
+ *  2. For every instruction in the original clause, try the given
+ *     transformations in order.
+ *  3. If one of the transformations returns GL_TRUE, assume that it
+ *     has emitted the appropriate instruction(s) into the new clause;
+ *     otherwise, copy the instruction verbatim.
+ *
+ * \note The transformation is currently not recursive; in other words,
+ * instructions emitted by transformations are not transformed.
+ *
+ * \note The transform is called 'local' because it can only look at
+ * one instruction at a time.
+ */
+void rc_local_transform(
+	struct radeon_compiler * c,
+	void *user)
+{
+	struct radeon_program_transformation *transformations =
+		(struct radeon_program_transformation*)user;
+	struct rc_instruction * inst = c->Program.Instructions.Next;
+
+	while(inst != &c->Program.Instructions) {
+		struct rc_instruction * current = inst;
+		int i;
+
+		inst = inst->Next;
+
+		for(i = 0; transformations[i].function; ++i) {
+			struct radeon_program_transformation* t = transformations + i;
+
+			if (t->function(c, current, t->userData))
+				break;
+		}
+	}
+}
+
+struct get_used_temporaries_data {
+	unsigned char * Used;
+	unsigned int UsedLength;
+};
+
+static void get_used_temporaries_cb(
+	void * userdata,
+	struct rc_instruction * inst,
+	rc_register_file file,
+	unsigned int index,
+	unsigned int mask)
+{
+	struct get_used_temporaries_data * d = userdata;
+
+	if (file != RC_FILE_TEMPORARY)
+		return;
+
+	if (index >= d->UsedLength)
+		return;
+
+	d->Used[index] |= mask;
+}
+
+/**
+ * This function fills in the parameter 'used' with a writemask that
+ * represent which components of each temporary register are used by the
+ * program.  This is meant to be combined with rc_find_free_temporary_list as a
+ * more efficient version of rc_find_free_temporary.
+ * @param used The function does not initialize this parameter.
+ */
+void rc_get_used_temporaries(
+	struct radeon_compiler * c,
+	unsigned char * used,
+	unsigned int used_length)
+{
+	struct rc_instruction * inst;
+	struct get_used_temporaries_data d;
+	d.Used = used;
+	d.UsedLength = used_length;
+
+	for(inst = c->Program.Instructions.Next;
+			inst != &c->Program.Instructions; inst = inst->Next) {
+
+		rc_for_all_reads_mask(inst, get_used_temporaries_cb, &d);
+		rc_for_all_writes_mask(inst, get_used_temporaries_cb, &d);
+	}
+}
+
+/* Search a list of used temporaries for a free one
+ * \sa rc_get_used_temporaries
+ * @note If this functions finds a free temporary, it will mark it as used
+ * in the used temporary list (param 'used')
+ * @param used list of used temporaries
+ * @param used_length number of items in param 'used'
+ * @param mask which components must be free in the temporary index that is
+ * returned.
+ * @return -1 If there are no more free temporaries, otherwise the index of
+ * a temporary register where the components specified in param 'mask' are
+ * not being used.
+ */
+int rc_find_free_temporary_list(
+	struct radeon_compiler * c,
+	unsigned char * used,
+	unsigned int used_length,
+	unsigned int mask)
+{
+	int i;
+	for(i = 0; i < used_length; i++) {
+		if ((~used[i] & mask) == mask) {
+			used[i] |= mask;
+			return i;
+		}
+	}
+	return -1;
+}
+
+unsigned int rc_find_free_temporary(struct radeon_compiler * c)
+{
+	unsigned char used[RC_REGISTER_MAX_INDEX];
+	int free;
+
+	memset(used, 0, sizeof(used));
+
+	rc_get_used_temporaries(c, used, RC_REGISTER_MAX_INDEX);
+
+	free = rc_find_free_temporary_list(c, used, RC_REGISTER_MAX_INDEX,
+								RC_MASK_XYZW);
+	if (free < 0) {
+		rc_error(c, "Ran out of temporary registers\n");
+		return 0;
+	}
+	return free;
+}
+
+
+struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c)
+{
+	struct rc_instruction * inst = memory_pool_malloc(&c->Pool, sizeof(struct rc_instruction));
+
+	memset(inst, 0, sizeof(struct rc_instruction));
+
+	inst->U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE;
+	inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+	inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
+	inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZW;
+	inst->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZW;
+
+	return inst;
+}
+
+void rc_insert_instruction(struct rc_instruction * after, struct rc_instruction * inst)
+{
+	inst->Prev = after;
+	inst->Next = after->Next;
+
+	inst->Prev->Next = inst;
+	inst->Next->Prev = inst;
+}
+
+struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after)
+{
+	struct rc_instruction * inst = rc_alloc_instruction(c);
+
+	rc_insert_instruction(after, inst);
+
+	return inst;
+}
+
+void rc_remove_instruction(struct rc_instruction * inst)
+{
+	inst->Prev->Next = inst->Next;
+	inst->Next->Prev = inst->Prev;
+}
+
+/**
+ * Return the number of instructions in the program.
+ */
+unsigned int rc_recompute_ips(struct radeon_compiler * c)
+{
+	unsigned int ip = 0;
+	struct rc_instruction * inst;
+
+	for(inst = c->Program.Instructions.Next;
+	    inst != &c->Program.Instructions;
+	    inst = inst->Next) {
+		inst->IP = ip++;
+	}
+
+	c->Program.Instructions.IP = 0xcafedead;
+
+	return ip;
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_program.h b/src/gallium/drivers/r300/compiler/radeon_program.h
new file mode 100644
index 0000000..b899ecc
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_program.h
@@ -0,0 +1,206 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __RADEON_PROGRAM_H_
+#define __RADEON_PROGRAM_H_
+
+#include <stdint.h>
+#include <string.h>
+
+#include "radeon_opcodes.h"
+#include "radeon_code.h"
+#include "radeon_program_constants.h"
+#include "radeon_program_pair.h"
+
+struct radeon_compiler;
+
+struct rc_src_register {
+	unsigned int File:4;
+
+	/** Negative values may be used for relative addressing. */
+	signed int Index:(RC_REGISTER_INDEX_BITS+1);
+	unsigned int RelAddr:1;
+
+	unsigned int Swizzle:12;
+
+	/** Take the component-wise absolute value */
+	unsigned int Abs:1;
+
+	/** Post-Abs negation. */
+	unsigned int Negate:4;
+};
+
+struct rc_dst_register {
+	unsigned int File:3;
+	unsigned int Index:RC_REGISTER_INDEX_BITS;
+	unsigned int WriteMask:4;
+};
+
+struct rc_presub_instruction {
+	rc_presubtract_op Opcode;
+	struct rc_src_register SrcReg[2];
+};
+
+/**
+ * Instructions are maintained by the compiler in a doubly linked list
+ * of these structures.
+ *
+ * This instruction format is intended to be expanded for hardware-specific
+ * trickery. At different stages of compilation, a different set of
+ * instruction types may be valid.
+ */
+struct rc_sub_instruction {
+	struct rc_src_register SrcReg[3];
+	struct rc_dst_register DstReg;
+
+	/**
+	 * Opcode of this instruction, according to \ref rc_opcode enums.
+	 */
+	unsigned int Opcode:8;
+
+	/**
+	 * Saturate each value of the result to the range [0,1] or [-1,1],
+	 * according to \ref rc_saturate_mode enums.
+	 */
+	unsigned int SaturateMode:2;
+
+	/**
+	 * Writing to the special register RC_SPECIAL_ALU_RESULT
+	 */
+	/*@{*/
+	unsigned int WriteALUResult:2;
+	unsigned int ALUResultCompare:3;
+	/*@}*/
+
+	/**
+	 * \name Extra fields for TEX, TXB, TXD, TXL, TXP instructions.
+	 */
+	/*@{*/
+	/** Source texture unit. */
+	unsigned int TexSrcUnit:5;
+
+	/** Source texture target, one of the \ref rc_texture_target enums */
+	unsigned int TexSrcTarget:3;
+
+	/** True if tex instruction should do shadow comparison */
+	unsigned int TexShadow:1;
+
+	/**R500 Only.  How to swizzle the result of a TEX lookup*/
+	unsigned int TexSwizzle:12;
+	/*@}*/
+
+	/** This holds information about the presubtract operation used by
+	 * this instruction. */
+	struct rc_presub_instruction PreSub;
+};
+
+typedef enum {
+	RC_INSTRUCTION_NORMAL = 0,
+	RC_INSTRUCTION_PAIR
+} rc_instruction_type;
+
+struct rc_instruction {
+	struct rc_instruction * Prev;
+	struct rc_instruction * Next;
+
+	rc_instruction_type Type;
+	union {
+		struct rc_sub_instruction I;
+		struct rc_pair_instruction P;
+	} U;
+
+	/**
+	 * Warning: IPs are not stable. If you want to use them,
+	 * you need to recompute them at the beginning of each pass
+	 * using \ref rc_recompute_ips
+	 */
+	unsigned int IP;
+};
+
+struct rc_program {
+	/**
+	 * Instructions.Next points to the first instruction,
+	 * Instructions.Prev points to the last instruction.
+	 */
+	struct rc_instruction Instructions;
+
+	/* Long term, we should probably remove InputsRead & OutputsWritten,
+	 * since updating dependent state can be fragile, and they aren't
+	 * actually used very often. */
+	uint32_t InputsRead;
+	uint32_t OutputsWritten;
+	uint32_t ShadowSamplers; /**< Texture units used for shadow sampling. */
+
+	struct rc_constant_list Constants;
+};
+
+/**
+ * A transformation that can be passed to \ref rc_local_transform.
+ *
+ * The function will be called once for each instruction.
+ * It has to either emit the appropriate transformed code for the instruction
+ * and return true, or return false if it doesn't understand the
+ * instruction.
+ *
+ * The function gets passed the userData as last parameter.
+ */
+struct radeon_program_transformation {
+	int (*function)(
+		struct radeon_compiler*,
+		struct rc_instruction*,
+		void*);
+	void *userData;
+};
+
+void rc_local_transform(
+	struct radeon_compiler *c,
+	void *user);
+
+void rc_get_used_temporaries(
+	struct radeon_compiler * c,
+	unsigned char * used,
+	unsigned int used_length);
+
+int rc_find_free_temporary_list(
+	struct radeon_compiler * c,
+	unsigned char * used,
+	unsigned int used_length,
+	unsigned int mask);
+
+unsigned int rc_find_free_temporary(struct radeon_compiler * c);
+
+struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c);
+struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after);
+void rc_insert_instruction(struct rc_instruction * after, struct rc_instruction * inst);
+void rc_remove_instruction(struct rc_instruction * inst);
+
+unsigned int rc_recompute_ips(struct radeon_compiler * c);
+
+void rc_print_program(const struct rc_program *prog);
+
+rc_swizzle rc_mask_to_swizzle(unsigned int mask);
+#endif
diff --git a/src/gallium/drivers/r300/compiler/radeon_program_alu.c b/src/gallium/drivers/r300/compiler/radeon_program_alu.c
new file mode 100644
index 0000000..e273bc4
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_program_alu.c
@@ -0,0 +1,1154 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * @file
+ *
+ * Shareable transformations that transform "special" ALU instructions
+ * into ALU instructions that are supported by hardware.
+ *
+ */
+
+#include "radeon_program_alu.h"
+
+#include "radeon_compiler.h"
+#include "radeon_compiler_util.h"
+
+
+static struct rc_instruction *emit1(
+	struct radeon_compiler * c, struct rc_instruction * after,
+	rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg,
+	struct rc_src_register SrcReg)
+{
+	struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
+
+	fpi->U.I.Opcode = Opcode;
+	fpi->U.I.SaturateMode = Saturate;
+	fpi->U.I.DstReg = DstReg;
+	fpi->U.I.SrcReg[0] = SrcReg;
+	return fpi;
+}
+
+static struct rc_instruction *emit2(
+	struct radeon_compiler * c, struct rc_instruction * after,
+	rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg,
+	struct rc_src_register SrcReg0, struct rc_src_register SrcReg1)
+{
+	struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
+
+	fpi->U.I.Opcode = Opcode;
+	fpi->U.I.SaturateMode = Saturate;
+	fpi->U.I.DstReg = DstReg;
+	fpi->U.I.SrcReg[0] = SrcReg0;
+	fpi->U.I.SrcReg[1] = SrcReg1;
+	return fpi;
+}
+
+static struct rc_instruction *emit3(
+	struct radeon_compiler * c, struct rc_instruction * after,
+	rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg,
+	struct rc_src_register SrcReg0, struct rc_src_register SrcReg1,
+	struct rc_src_register SrcReg2)
+{
+	struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
+
+	fpi->U.I.Opcode = Opcode;
+	fpi->U.I.SaturateMode = Saturate;
+	fpi->U.I.DstReg = DstReg;
+	fpi->U.I.SrcReg[0] = SrcReg0;
+	fpi->U.I.SrcReg[1] = SrcReg1;
+	fpi->U.I.SrcReg[2] = SrcReg2;
+	return fpi;
+}
+
+static struct rc_dst_register dstregtmpmask(int index, int mask)
+{
+	struct rc_dst_register dst = {0, 0, 0};
+	dst.File = RC_FILE_TEMPORARY;
+	dst.Index = index;
+	dst.WriteMask = mask;
+	return dst;
+}
+
+static const struct rc_src_register builtin_zero = {
+	.File = RC_FILE_NONE,
+	.Index = 0,
+	.Swizzle = RC_SWIZZLE_0000
+};
+static const struct rc_src_register builtin_one = {
+	.File = RC_FILE_NONE,
+	.Index = 0,
+	.Swizzle = RC_SWIZZLE_1111
+};
+static const struct rc_src_register srcreg_undefined = {
+	.File = RC_FILE_NONE,
+	.Index = 0,
+	.Swizzle = RC_SWIZZLE_XYZW
+};
+
+static struct rc_src_register srcreg(int file, int index)
+{
+	struct rc_src_register src = srcreg_undefined;
+	src.File = file;
+	src.Index = index;
+	return src;
+}
+
+static struct rc_src_register srcregswz(int file, int index, int swz)
+{
+	struct rc_src_register src = srcreg_undefined;
+	src.File = file;
+	src.Index = index;
+	src.Swizzle = swz;
+	return src;
+}
+
+static struct rc_src_register absolute(struct rc_src_register reg)
+{
+	struct rc_src_register newreg = reg;
+	newreg.Abs = 1;
+	newreg.Negate = RC_MASK_NONE;
+	return newreg;
+}
+
+static struct rc_src_register negate(struct rc_src_register reg)
+{
+	struct rc_src_register newreg = reg;
+	newreg.Negate = newreg.Negate ^ RC_MASK_XYZW;
+	return newreg;
+}
+
+static struct rc_src_register swizzle(struct rc_src_register reg,
+		rc_swizzle x, rc_swizzle y, rc_swizzle z, rc_swizzle w)
+{
+	struct rc_src_register swizzled = reg;
+	swizzled.Swizzle = combine_swizzles4(reg.Swizzle, x, y, z, w);
+	return swizzled;
+}
+
+static struct rc_src_register swizzle_smear(struct rc_src_register reg,
+		rc_swizzle x)
+{
+	return swizzle(reg, x, x, x, x);
+}
+
+static struct rc_src_register swizzle_xxxx(struct rc_src_register reg)
+{
+	return swizzle_smear(reg, RC_SWIZZLE_X);
+}
+
+static struct rc_src_register swizzle_yyyy(struct rc_src_register reg)
+{
+	return swizzle_smear(reg, RC_SWIZZLE_Y);
+}
+
+static struct rc_src_register swizzle_zzzz(struct rc_src_register reg)
+{
+	return swizzle_smear(reg, RC_SWIZZLE_Z);
+}
+
+static struct rc_src_register swizzle_wwww(struct rc_src_register reg)
+{
+	return swizzle_smear(reg, RC_SWIZZLE_W);
+}
+
+static int is_dst_safe_to_reuse(struct rc_instruction *inst)
+{
+	const struct rc_opcode_info *info = rc_get_opcode_info(inst->U.I.Opcode);
+	unsigned i;
+
+	assert(info->HasDstReg);
+
+	if (inst->U.I.DstReg.File != RC_FILE_TEMPORARY)
+		return 0;
+
+	for (i = 0; i < info->NumSrcRegs; i++) {
+		if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY &&
+		    inst->U.I.SrcReg[i].Index == inst->U.I.DstReg.Index)
+			return 0;
+	}
+
+	return 1;
+}
+
+static struct rc_dst_register try_to_reuse_dst(struct radeon_compiler *c,
+					       struct rc_instruction *inst)
+{
+	unsigned tmp;
+
+	if (is_dst_safe_to_reuse(inst))
+		tmp = inst->U.I.DstReg.Index;
+	else
+		tmp = rc_find_free_temporary(c);
+
+	return dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask);
+}
+
+static void transform_ABS(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	struct rc_src_register src = inst->U.I.SrcReg[0];
+	src.Abs = 1;
+	src.Negate = RC_MASK_NONE;
+	emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode, inst->U.I.DstReg, src);
+	rc_remove_instruction(inst);
+}
+
+static void transform_CEIL(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	/* Assuming:
+	 *     ceil(x) = -floor(-x)
+	 *
+	 * After inlining floor:
+	 *     ceil(x) = -(-x-frac(-x))
+	 *
+	 * After simplification:
+	 *     ceil(x) = x+frac(-x)
+	 */
+
+	struct rc_dst_register dst = try_to_reuse_dst(c, inst);
+	emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, negate(inst->U.I.SrcReg[0]));
+	emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg,
+		inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, dst.Index));
+	rc_remove_instruction(inst);
+}
+
+static void transform_CLAMP(struct radeon_compiler *c,
+	struct rc_instruction *inst)
+{
+	/* CLAMP dst, src, min, max
+	 *    into:
+	 * MIN tmp, src, max
+	 * MAX dst, tmp, min
+	 */
+	struct rc_dst_register dst = try_to_reuse_dst(c, inst);
+	emit2(c, inst->Prev, RC_OPCODE_MIN, 0, dst,
+		inst->U.I.SrcReg[0], inst->U.I.SrcReg[2]);
+	emit2(c, inst->Prev, RC_OPCODE_MAX, inst->U.I.SaturateMode, inst->U.I.DstReg,
+		srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[1]);
+	rc_remove_instruction(inst);
+}
+
+static void transform_DP2(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	struct rc_src_register src0 = inst->U.I.SrcReg[0];
+	struct rc_src_register src1 = inst->U.I.SrcReg[1];
+	src0.Negate &= ~(RC_MASK_Z | RC_MASK_W);
+	src0.Swizzle &= ~(63 << (3 * 2));
+	src0.Swizzle |= (RC_SWIZZLE_ZERO << (3 * 2)) | (RC_SWIZZLE_ZERO << (3 * 3));
+	src1.Negate &= ~(RC_MASK_Z | RC_MASK_W);
+	src1.Swizzle &= ~(63 << (3 * 2));
+	src1.Swizzle |= (RC_SWIZZLE_ZERO << (3 * 2)) | (RC_SWIZZLE_ZERO << (3 * 3));
+	emit2(c, inst->Prev, RC_OPCODE_DP3, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1);
+	rc_remove_instruction(inst);
+}
+
+static void transform_DPH(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	struct rc_src_register src0 = inst->U.I.SrcReg[0];
+	src0.Negate &= ~RC_MASK_W;
+	src0.Swizzle &= ~(7 << (3 * 3));
+	src0.Swizzle |= RC_SWIZZLE_ONE << (3 * 3);
+	emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, inst->U.I.SrcReg[1]);
+	rc_remove_instruction(inst);
+}
+
+/**
+ * [1, src0.y*src1.y, src0.z, src1.w]
+ * So basically MUL with lotsa swizzling.
+ */
+static void transform_DST(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	emit2(c, inst->Prev, RC_OPCODE_MUL, inst->U.I.SaturateMode, inst->U.I.DstReg,
+		swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ONE),
+		swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_ONE, RC_SWIZZLE_W));
+	rc_remove_instruction(inst);
+}
+
+static void transform_FLR(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	struct rc_dst_register dst = try_to_reuse_dst(c, inst);
+	emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, inst->U.I.SrcReg[0]);
+	emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg,
+		inst->U.I.SrcReg[0], negate(srcreg(RC_FILE_TEMPORARY, dst.Index)));
+	rc_remove_instruction(inst);
+}
+
+/**
+ * Definition of LIT (from ARB_fragment_program):
+ *
+ *  tmp = VectorLoad(op0);
+ *  if (tmp.x < 0) tmp.x = 0;
+ *  if (tmp.y < 0) tmp.y = 0;
+ *  if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon);
+ *  else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon;
+ *  result.x = 1.0;
+ *  result.y = tmp.x;
+ *  result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0;
+ *  result.w = 1.0;
+ *
+ * The longest path of computation is the one leading to result.z,
+ * consisting of 5 operations. This implementation of LIT takes
+ * 5 slots, if the subsequent optimization passes are clever enough
+ * to pair instructions correctly.
+ */
+static void transform_LIT(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	unsigned int constant;
+	unsigned int constant_swizzle;
+	unsigned int temp;
+	struct rc_src_register srctemp;
+
+	constant = rc_constants_add_immediate_scalar(&c->Program.Constants, -127.999999, &constant_swizzle);
+
+	if (inst->U.I.DstReg.WriteMask != RC_MASK_XYZW || inst->U.I.DstReg.File != RC_FILE_TEMPORARY) {
+		struct rc_instruction * inst_mov;
+
+		inst_mov = emit1(c, inst,
+			RC_OPCODE_MOV, 0, inst->U.I.DstReg,
+			srcreg(RC_FILE_TEMPORARY, rc_find_free_temporary(c)));
+
+		inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
+		inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index;
+		inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+	}
+
+	temp = inst->U.I.DstReg.Index;
+	srctemp = srcreg(RC_FILE_TEMPORARY, temp);
+
+	/* tmp.x = max(0.0, Src.x); */
+	/* tmp.y = max(0.0, Src.y); */
+	/* tmp.w = clamp(Src.z, -128+eps, 128-eps); */
+	emit2(c, inst->Prev, RC_OPCODE_MAX, 0,
+		dstregtmpmask(temp, RC_MASK_XYW),
+		inst->U.I.SrcReg[0],
+		swizzle(srcreg(RC_FILE_CONSTANT, constant),
+			RC_SWIZZLE_ZERO, RC_SWIZZLE_ZERO, RC_SWIZZLE_ZERO, constant_swizzle&3));
+	emit2(c, inst->Prev, RC_OPCODE_MIN, 0,
+		dstregtmpmask(temp, RC_MASK_Z),
+		swizzle_wwww(srctemp),
+		negate(srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle)));
+
+	/* tmp.w = Pow(tmp.y, tmp.w) */
+	emit1(c, inst->Prev, RC_OPCODE_LG2, 0,
+		dstregtmpmask(temp, RC_MASK_W),
+		swizzle_yyyy(srctemp));
+	emit2(c, inst->Prev, RC_OPCODE_MUL, 0,
+		dstregtmpmask(temp, RC_MASK_W),
+		swizzle_wwww(srctemp),
+		swizzle_zzzz(srctemp));
+	emit1(c, inst->Prev, RC_OPCODE_EX2, 0,
+		dstregtmpmask(temp, RC_MASK_W),
+		swizzle_wwww(srctemp));
+
+	/* tmp.z = (tmp.x > 0) ? tmp.w : 0.0 */
+	emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode,
+		dstregtmpmask(temp, RC_MASK_Z),
+		negate(swizzle_xxxx(srctemp)),
+		swizzle_wwww(srctemp),
+		builtin_zero);
+
+	/* tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0 */
+	emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode,
+		dstregtmpmask(temp, RC_MASK_XYW),
+		swizzle(srctemp, RC_SWIZZLE_ONE, RC_SWIZZLE_X, RC_SWIZZLE_ONE, RC_SWIZZLE_ONE));
+
+	rc_remove_instruction(inst);
+}
+
+static void transform_LRP(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	struct rc_dst_register dst = try_to_reuse_dst(c, inst);
+
+	emit2(c, inst->Prev, RC_OPCODE_ADD, 0,
+		dst,
+		inst->U.I.SrcReg[1], negate(inst->U.I.SrcReg[2]));
+	emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode,
+		inst->U.I.DstReg,
+		inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[2]);
+
+	rc_remove_instruction(inst);
+}
+
+static void transform_POW(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	struct rc_dst_register tempdst = try_to_reuse_dst(c, inst);
+	struct rc_src_register tempsrc = srcreg(RC_FILE_TEMPORARY, tempdst.Index);
+	tempdst.WriteMask = RC_MASK_W;
+	tempsrc.Swizzle = RC_SWIZZLE_WWWW;
+
+	emit1(c, inst->Prev, RC_OPCODE_LG2, 0, tempdst, swizzle_xxxx(inst->U.I.SrcReg[0]));
+	emit2(c, inst->Prev, RC_OPCODE_MUL, 0, tempdst, tempsrc, swizzle_xxxx(inst->U.I.SrcReg[1]));
+	emit1(c, inst->Prev, RC_OPCODE_EX2, inst->U.I.SaturateMode, inst->U.I.DstReg, tempsrc);
+
+	rc_remove_instruction(inst);
+}
+
+static void transform_RSQ(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	inst->U.I.SrcReg[0] = absolute(inst->U.I.SrcReg[0]);
+}
+
+static void transform_SEQ(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	struct rc_dst_register dst = try_to_reuse_dst(c, inst);
+
+	emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
+	emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+		negate(absolute(srcreg(RC_FILE_TEMPORARY, dst.Index))), builtin_zero, builtin_one);
+
+	rc_remove_instruction(inst);
+}
+
+static void transform_SFL(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode, inst->U.I.DstReg, builtin_zero);
+	rc_remove_instruction(inst);
+}
+
+static void transform_SGE(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	struct rc_dst_register dst = try_to_reuse_dst(c, inst);
+
+	emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
+	emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+		srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_zero, builtin_one);
+
+	rc_remove_instruction(inst);
+}
+
+static void transform_SGT(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	struct rc_dst_register dst = try_to_reuse_dst(c, inst);
+
+	emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]);
+	emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+		srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_one, builtin_zero);
+
+	rc_remove_instruction(inst);
+}
+
+static void transform_SLE(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	struct rc_dst_register dst = try_to_reuse_dst(c, inst);
+
+	emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]);
+	emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+		srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_zero, builtin_one);
+
+	rc_remove_instruction(inst);
+}
+
+static void transform_SLT(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	struct rc_dst_register dst = try_to_reuse_dst(c, inst);
+
+	emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
+	emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+		srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_one, builtin_zero);
+
+	rc_remove_instruction(inst);
+}
+
+static void transform_SNE(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	struct rc_dst_register dst = try_to_reuse_dst(c, inst);
+
+	emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
+	emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+		negate(absolute(srcreg(RC_FILE_TEMPORARY, dst.Index))), builtin_one, builtin_zero);
+
+	rc_remove_instruction(inst);
+}
+
+static void transform_SSG(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	/* result = sign(x)
+	 *
+	 *   CMP tmp0, -x, 1, 0
+	 *   CMP tmp1, x, 1, 0
+	 *   ADD result, tmp0, -tmp1;
+	 */
+	struct rc_dst_register dst0;
+	unsigned tmp1;
+
+	/* 0 < x */
+	dst0 = try_to_reuse_dst(c, inst);
+	emit3(c, inst->Prev, RC_OPCODE_CMP, 0,
+	      dst0,
+	      negate(inst->U.I.SrcReg[0]),
+	      builtin_one,
+	      builtin_zero);
+
+	/* x < 0 */
+	tmp1 = rc_find_free_temporary(c);
+	emit3(c, inst->Prev, RC_OPCODE_CMP, 0,
+	      dstregtmpmask(tmp1, inst->U.I.DstReg.WriteMask),
+	      inst->U.I.SrcReg[0],
+	      builtin_one,
+	      builtin_zero);
+
+	/* Either both are zero, or one of them is one and the other is zero. */
+	/* result = tmp0 - tmp1 */
+	emit2(c, inst->Prev, RC_OPCODE_ADD, 0,
+	      inst->U.I.DstReg,
+	      srcreg(RC_FILE_TEMPORARY, dst0.Index),
+	      negate(srcreg(RC_FILE_TEMPORARY, tmp1)));
+
+	rc_remove_instruction(inst);
+}
+
+static void transform_SUB(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	inst->U.I.Opcode = RC_OPCODE_ADD;
+	inst->U.I.SrcReg[1] = negate(inst->U.I.SrcReg[1]);
+}
+
+static void transform_SWZ(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	inst->U.I.Opcode = RC_OPCODE_MOV;
+}
+
+static void transform_XPD(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	struct rc_dst_register dst = try_to_reuse_dst(c, inst);
+
+	emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dst,
+		swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W),
+		swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W));
+	emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode, inst->U.I.DstReg,
+		swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W),
+		swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W),
+		negate(srcreg(RC_FILE_TEMPORARY, dst.Index)));
+
+	rc_remove_instruction(inst);
+}
+
+
+/**
+ * Can be used as a transformation for @ref radeonClauseLocalTransform,
+ * no userData necessary.
+ *
+ * Eliminates the following ALU instructions:
+ *  ABS, CEIL, DPH, DST, FLR, LIT, LRP, POW, SEQ, SFL, SGE, SGT, SLE, SLT, SNE, SUB, SWZ, XPD
+ * using:
+ *  MOV, ADD, MUL, MAD, FRC, DP3, LG2, EX2, CMP
+ *
+ * Transforms RSQ to Radeon's native RSQ by explicitly setting
+ * absolute value.
+ *
+ * @note should be applicable to R300 and R500 fragment programs.
+ */
+int radeonTransformALU(
+	struct radeon_compiler * c,
+	struct rc_instruction* inst,
+	void* unused)
+{
+	switch(inst->U.I.Opcode) {
+	case RC_OPCODE_ABS: transform_ABS(c, inst); return 1;
+	case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1;
+	case RC_OPCODE_CLAMP: transform_CLAMP(c, inst); return 1;
+	case RC_OPCODE_DP2: transform_DP2(c, inst); return 1;
+	case RC_OPCODE_DPH: transform_DPH(c, inst); return 1;
+	case RC_OPCODE_DST: transform_DST(c, inst); return 1;
+	case RC_OPCODE_FLR: transform_FLR(c, inst); return 1;
+	case RC_OPCODE_LIT: transform_LIT(c, inst); return 1;
+	case RC_OPCODE_LRP: transform_LRP(c, inst); return 1;
+	case RC_OPCODE_POW: transform_POW(c, inst); return 1;
+	case RC_OPCODE_RSQ: transform_RSQ(c, inst); return 1;
+	case RC_OPCODE_SEQ: transform_SEQ(c, inst); return 1;
+	case RC_OPCODE_SFL: transform_SFL(c, inst); return 1;
+	case RC_OPCODE_SGE: transform_SGE(c, inst); return 1;
+	case RC_OPCODE_SGT: transform_SGT(c, inst); return 1;
+	case RC_OPCODE_SLE: transform_SLE(c, inst); return 1;
+	case RC_OPCODE_SLT: transform_SLT(c, inst); return 1;
+	case RC_OPCODE_SNE: transform_SNE(c, inst); return 1;
+	case RC_OPCODE_SSG: transform_SSG(c, inst); return 1;
+	case RC_OPCODE_SUB: transform_SUB(c, inst); return 1;
+	case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1;
+	case RC_OPCODE_XPD: transform_XPD(c, inst); return 1;
+	default:
+		return 0;
+	}
+}
+
+
+static void transform_r300_vertex_ABS(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	/* Note: r500 can take absolute values, but r300 cannot. */
+	inst->U.I.Opcode = RC_OPCODE_MAX;
+	inst->U.I.SrcReg[1] = inst->U.I.SrcReg[0];
+	inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
+}
+
+static void transform_r300_vertex_CMP(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	/* There is no decent CMP available, so let's rig one up.
+	 * CMP is defined as dst = src0 < 0.0 ? src1 : src2
+	 * The following sequence consumes zero to two temps and two extra slots
+	 * (the second temp and the second slot is consumed by transform_LRP),
+	 * but should be equivalent:
+	 *
+	 * SLT tmp0, src0, 0.0
+	 * LRP dst, tmp0, src1, src2
+	 *
+	 * Yes, I know, I'm a mad scientist. ~ C. & M. */
+	struct rc_dst_register dst = try_to_reuse_dst(c, inst);
+
+	/* SLT tmp0, src0, 0.0 */
+	emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
+		dst,
+		inst->U.I.SrcReg[0], builtin_zero);
+
+	/* LRP dst, tmp0, src1, src2 */
+	transform_LRP(c,
+		emit3(c, inst->Prev, RC_OPCODE_LRP, 0,
+		      inst->U.I.DstReg,
+		      srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[1],  inst->U.I.SrcReg[2]));
+
+	rc_remove_instruction(inst);
+}
+
+static void transform_r300_vertex_DP2(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	struct rc_instruction *next_inst = inst->Next;
+	transform_DP2(c, inst);
+	next_inst->Prev->U.I.Opcode = RC_OPCODE_DP4;
+}
+
+static void transform_r300_vertex_DP3(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	struct rc_src_register src0 = inst->U.I.SrcReg[0];
+	struct rc_src_register src1 = inst->U.I.SrcReg[1];
+	src0.Negate &= ~RC_MASK_W;
+	src0.Swizzle &= ~(7 << (3 * 3));
+	src0.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3);
+	src1.Negate &= ~RC_MASK_W;
+	src1.Swizzle &= ~(7 << (3 * 3));
+	src1.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3);
+	emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1);
+	rc_remove_instruction(inst);
+}
+
+static void transform_r300_vertex_fix_LIT(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	struct rc_dst_register dst = try_to_reuse_dst(c, inst);
+	unsigned constant_swizzle;
+	int constant = rc_constants_add_immediate_scalar(&c->Program.Constants,
+							 0.0000000000000000001,
+							 &constant_swizzle);
+
+	/* MOV dst, src */
+	dst.WriteMask = RC_MASK_XYZW;
+	emit1(c, inst->Prev, RC_OPCODE_MOV, 0,
+		dst,
+		inst->U.I.SrcReg[0]);
+
+	/* MAX dst.y, src, 0.00...001 */
+	emit2(c, inst->Prev, RC_OPCODE_MAX, 0,
+		dstregtmpmask(dst.Index, RC_MASK_Y),
+		srcreg(RC_FILE_TEMPORARY, dst.Index),
+		srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle));
+
+	inst->U.I.SrcReg[0] = srcreg(RC_FILE_TEMPORARY, dst.Index);
+}
+
+static void transform_r300_vertex_SEQ(struct radeon_compiler *c,
+	struct rc_instruction *inst)
+{
+	/* x = y  <==>  x >= y && y >= x */
+	int tmp = rc_find_free_temporary(c);
+
+	/* x <= y */
+	emit2(c, inst->Prev, RC_OPCODE_SGE, 0,
+	      dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask),
+	      inst->U.I.SrcReg[0],
+	      inst->U.I.SrcReg[1]);
+
+	/* y <= x */
+	emit2(c, inst->Prev, RC_OPCODE_SGE, 0,
+	      inst->U.I.DstReg,
+	      inst->U.I.SrcReg[1],
+	      inst->U.I.SrcReg[0]);
+
+	/* x && y  =  x * y */
+	emit2(c, inst->Prev, RC_OPCODE_MUL, 0,
+	      inst->U.I.DstReg,
+	      srcreg(RC_FILE_TEMPORARY, tmp),
+	      srcreg(inst->U.I.DstReg.File, inst->U.I.DstReg.Index));
+
+	rc_remove_instruction(inst);
+}
+
+static void transform_r300_vertex_SNE(struct radeon_compiler *c,
+	struct rc_instruction *inst)
+{
+	/* x != y  <==>  x < y || y < x */
+	int tmp = rc_find_free_temporary(c);
+
+	/* x < y */
+	emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
+	      dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask),
+	      inst->U.I.SrcReg[0],
+	      inst->U.I.SrcReg[1]);
+
+	/* y < x */
+	emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
+	      inst->U.I.DstReg,
+	      inst->U.I.SrcReg[1],
+	      inst->U.I.SrcReg[0]);
+
+	/* x || y  =  max(x, y) */
+	emit2(c, inst->Prev, RC_OPCODE_MAX, 0,
+	      inst->U.I.DstReg,
+	      srcreg(RC_FILE_TEMPORARY, tmp),
+	      srcreg(inst->U.I.DstReg.File, inst->U.I.DstReg.Index));
+
+	rc_remove_instruction(inst);
+}
+
+static void transform_r300_vertex_SGT(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	/* x > y  <==>  -x < -y */
+	inst->U.I.Opcode = RC_OPCODE_SLT;
+	inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
+	inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
+}
+
+static void transform_r300_vertex_SLE(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	/* x <= y  <==>  -x >= -y */
+	inst->U.I.Opcode = RC_OPCODE_SGE;
+	inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
+	inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
+}
+
+static void transform_r300_vertex_SSG(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	/* result = sign(x)
+	 *
+	 *   SLT tmp0, 0, x;
+	 *   SLT tmp1, x, 0;
+	 *   ADD result, tmp0, -tmp1;
+	 */
+	struct rc_dst_register dst0 = try_to_reuse_dst(c, inst);
+	unsigned tmp1;
+
+	/* 0 < x */
+	dst0 = try_to_reuse_dst(c, inst);
+	emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
+	      dst0,
+	      builtin_zero,
+	      inst->U.I.SrcReg[0]);
+
+	/* x < 0 */
+	tmp1 = rc_find_free_temporary(c);
+	emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
+	      dstregtmpmask(tmp1, inst->U.I.DstReg.WriteMask),
+	      inst->U.I.SrcReg[0],
+	      builtin_zero);
+
+	/* Either both are zero, or one of them is one and the other is zero. */
+	/* result = tmp0 - tmp1 */
+	emit2(c, inst->Prev, RC_OPCODE_ADD, 0,
+	      inst->U.I.DstReg,
+	      srcreg(RC_FILE_TEMPORARY, dst0.Index),
+	      negate(srcreg(RC_FILE_TEMPORARY, tmp1)));
+
+	rc_remove_instruction(inst);
+}
+
+/**
+ * For use with rc_local_transform, this transforms non-native ALU
+ * instructions of the r300 up to r500 vertex engine.
+ */
+int r300_transform_vertex_alu(
+	struct radeon_compiler * c,
+	struct rc_instruction* inst,
+	void* unused)
+{
+	switch(inst->U.I.Opcode) {
+	case RC_OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return 1;
+	case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1;
+	case RC_OPCODE_CLAMP: transform_CLAMP(c, inst); return 1;
+	case RC_OPCODE_CMP: transform_r300_vertex_CMP(c, inst); return 1;
+	case RC_OPCODE_DP2: transform_r300_vertex_DP2(c, inst); return 1;
+	case RC_OPCODE_DP3: transform_r300_vertex_DP3(c, inst); return 1;
+	case RC_OPCODE_DPH: transform_DPH(c, inst); return 1;
+	case RC_OPCODE_FLR: transform_FLR(c, inst); return 1;
+	case RC_OPCODE_LIT: transform_r300_vertex_fix_LIT(c, inst); return 1;
+	case RC_OPCODE_LRP: transform_LRP(c, inst); return 1;
+	case RC_OPCODE_SEQ:
+		if (!c->is_r500) {
+			transform_r300_vertex_SEQ(c, inst);
+			return 1;
+		}
+		return 0;
+	case RC_OPCODE_SFL: transform_SFL(c, inst); return 1;
+	case RC_OPCODE_SGT: transform_r300_vertex_SGT(c, inst); return 1;
+	case RC_OPCODE_SLE: transform_r300_vertex_SLE(c, inst); return 1;
+	case RC_OPCODE_SNE:
+		if (!c->is_r500) {
+			transform_r300_vertex_SNE(c, inst);
+			return 1;
+		}
+		return 0;
+	case RC_OPCODE_SSG: transform_r300_vertex_SSG(c, inst); return 1;
+	case RC_OPCODE_SUB: transform_SUB(c, inst); return 1;
+	case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1;
+	case RC_OPCODE_XPD: transform_XPD(c, inst); return 1;
+	default:
+		return 0;
+	}
+}
+
+static void sincos_constants(struct radeon_compiler* c, unsigned int *constants)
+{
+	static const float SinCosConsts[2][4] = {
+		{
+			1.273239545,		/* 4/PI */
+			-0.405284735,		/* -4/(PI*PI) */
+			3.141592654,		/* PI */
+			0.2225			/* weight */
+		},
+		{
+			0.75,
+			0.5,
+			0.159154943,		/* 1/(2*PI) */
+			6.283185307		/* 2*PI */
+		}
+	};
+	int i;
+
+	for(i = 0; i < 2; ++i)
+		constants[i] = rc_constants_add_immediate_vec4(&c->Program.Constants, SinCosConsts[i]);
+}
+
+/**
+ * Approximate sin(x), where x is clamped to (-pi/2, pi/2).
+ *
+ * MUL tmp.xy, src, { 4/PI, -4/(PI^2) }
+ * MAD tmp.x, tmp.y, |src|, tmp.x
+ * MAD tmp.y, tmp.x, |tmp.x|, -tmp.x
+ * MAD dest, tmp.y, weight, tmp.x
+ */
+static void sin_approx(
+	struct radeon_compiler* c, struct rc_instruction * inst,
+	struct rc_dst_register dst, struct rc_src_register src, const unsigned int* constants)
+{
+	unsigned int tempreg = rc_find_free_temporary(c);
+
+	emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstregtmpmask(tempreg, RC_MASK_XY),
+		swizzle_xxxx(src),
+		srcreg(RC_FILE_CONSTANT, constants[0]));
+	emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_X),
+		swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)),
+		absolute(swizzle_xxxx(src)),
+		swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)));
+	emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_Y),
+		swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)),
+		absolute(swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg))),
+		negate(swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg))));
+	emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dst,
+		swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)),
+		swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[0])),
+		swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)));
+}
+
+/**
+ * Translate the trigonometric functions COS, SIN, and SCS
+ * using only the basic instructions
+ *  MOV, ADD, MUL, MAD, FRC
+ */
+int r300_transform_trig_simple(struct radeon_compiler* c,
+	struct rc_instruction* inst,
+	void* unused)
+{
+	unsigned int constants[2];
+	unsigned int tempreg;
+
+	if (inst->U.I.Opcode != RC_OPCODE_COS &&
+	    inst->U.I.Opcode != RC_OPCODE_SIN &&
+	    inst->U.I.Opcode != RC_OPCODE_SCS)
+		return 0;
+
+	tempreg = rc_find_free_temporary(c);
+
+	sincos_constants(c, constants);
+
+	if (inst->U.I.Opcode == RC_OPCODE_COS) {
+		/* MAD tmp.x, src, 1/(2*PI), 0.75 */
+		/* FRC tmp.x, tmp.x */
+		/* MAD tmp.z, tmp.x, 2*PI, -PI */
+		emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W),
+			swizzle_xxxx(inst->U.I.SrcReg[0]),
+			swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])),
+			swizzle_xxxx(srcreg(RC_FILE_CONSTANT, constants[1])));
+		emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_W),
+			swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)));
+		emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W),
+			swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)),
+			swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])),
+			negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0]))));
+
+		sin_approx(c, inst, inst->U.I.DstReg,
+			swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)),
+			constants);
+	} else if (inst->U.I.Opcode == RC_OPCODE_SIN) {
+		emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W),
+			swizzle_xxxx(inst->U.I.SrcReg[0]),
+			swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])),
+			swizzle_yyyy(srcreg(RC_FILE_CONSTANT, constants[1])));
+		emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_W),
+			swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)));
+		emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W),
+			swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)),
+			swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])),
+			negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0]))));
+
+		sin_approx(c, inst, inst->U.I.DstReg,
+			swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)),
+			constants);
+	} else {
+		struct rc_dst_register dst;
+
+		emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_XY),
+			swizzle_xxxx(inst->U.I.SrcReg[0]),
+			swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])),
+			swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_W));
+		emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_XY),
+			srcreg(RC_FILE_TEMPORARY, tempreg));
+		emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_XY),
+			srcreg(RC_FILE_TEMPORARY, tempreg),
+			swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])),
+			negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0]))));
+
+		dst = inst->U.I.DstReg;
+
+		dst.WriteMask = inst->U.I.DstReg.WriteMask & RC_MASK_X;
+		sin_approx(c, inst, dst,
+			swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)),
+			constants);
+
+		dst.WriteMask = inst->U.I.DstReg.WriteMask & RC_MASK_Y;
+		sin_approx(c, inst, dst,
+			swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)),
+			constants);
+	}
+
+	rc_remove_instruction(inst);
+
+	return 1;
+}
+
+static void r300_transform_SIN_COS_SCS(struct radeon_compiler *c,
+	struct rc_instruction *inst,
+	unsigned srctmp)
+{
+	if (inst->U.I.Opcode == RC_OPCODE_COS) {
+		emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, inst->U.I.DstReg,
+			srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
+	} else if (inst->U.I.Opcode == RC_OPCODE_SIN) {
+		emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode,
+			inst->U.I.DstReg, srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
+	} else if (inst->U.I.Opcode == RC_OPCODE_SCS) {
+		struct rc_dst_register moddst = inst->U.I.DstReg;
+
+		if (inst->U.I.DstReg.WriteMask & RC_MASK_X) {
+			moddst.WriteMask = RC_MASK_X;
+			emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, moddst,
+				srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
+		}
+		if (inst->U.I.DstReg.WriteMask & RC_MASK_Y) {
+			moddst.WriteMask = RC_MASK_Y;
+			emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode, moddst,
+				srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
+		}
+	}
+
+	rc_remove_instruction(inst);
+}
+
+
+/**
+ * Transform the trigonometric functions COS, SIN, and SCS
+ * to include pre-scaling by 1/(2*PI) and taking the fractional
+ * part, so that the input to COS and SIN is always in the range [0,1).
+ * SCS is replaced by one COS and one SIN instruction.
+ *
+ * @warning This transformation implicitly changes the semantics of SIN and COS!
+ */
+int radeonTransformTrigScale(struct radeon_compiler* c,
+	struct rc_instruction* inst,
+	void* unused)
+{
+	static const float RCP_2PI = 0.15915494309189535;
+	unsigned int temp;
+	unsigned int constant;
+	unsigned int constant_swizzle;
+
+	if (inst->U.I.Opcode != RC_OPCODE_COS &&
+	    inst->U.I.Opcode != RC_OPCODE_SIN &&
+	    inst->U.I.Opcode != RC_OPCODE_SCS)
+		return 0;
+
+	temp = rc_find_free_temporary(c);
+	constant = rc_constants_add_immediate_scalar(&c->Program.Constants, RCP_2PI, &constant_swizzle);
+
+	emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstregtmpmask(temp, RC_MASK_W),
+		swizzle_xxxx(inst->U.I.SrcReg[0]),
+		srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle));
+	emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(temp, RC_MASK_W),
+		srcreg(RC_FILE_TEMPORARY, temp));
+
+	r300_transform_SIN_COS_SCS(c, inst, temp);
+	return 1;
+}
+
+/**
+ * Transform the trigonometric functions COS, SIN, and SCS
+ * so that the input to COS and SIN is always in the range [-PI, PI].
+ * SCS is replaced by one COS and one SIN instruction.
+ */
+int r300_transform_trig_scale_vertex(struct radeon_compiler *c,
+	struct rc_instruction *inst,
+	void *unused)
+{
+	static const float cons[4] = {0.15915494309189535, 0.5, 6.28318530717959, -3.14159265358979};
+	unsigned int temp;
+	unsigned int constant;
+
+	if (inst->U.I.Opcode != RC_OPCODE_COS &&
+	    inst->U.I.Opcode != RC_OPCODE_SIN &&
+	    inst->U.I.Opcode != RC_OPCODE_SCS)
+		return 0;
+
+	/* Repeat x in the range [-PI, PI]:
+	 *
+	 *   repeat(x) = frac(x / 2PI + 0.5) * 2PI - PI
+	 */
+
+	temp = rc_find_free_temporary(c);
+	constant = rc_constants_add_immediate_vec4(&c->Program.Constants, cons);
+
+	emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(temp, RC_MASK_W),
+		swizzle_xxxx(inst->U.I.SrcReg[0]),
+		srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_XXXX),
+		srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_YYYY));
+	emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(temp, RC_MASK_W),
+		srcreg(RC_FILE_TEMPORARY, temp));
+	emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(temp, RC_MASK_W),
+		srcreg(RC_FILE_TEMPORARY, temp),
+		srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_ZZZZ),
+		srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_WWWW));
+
+	r300_transform_SIN_COS_SCS(c, inst, temp);
+	return 1;
+}
+
+/**
+ * Rewrite DDX/DDY instructions to properly work with r5xx shaders.
+ * The r5xx MDH/MDV instruction provides per-quad partial derivatives.
+ * It takes the form A*B+C. A and C are set by setting src0. B should be -1.
+ *
+ * @warning This explicitly changes the form of DDX and DDY!
+ */
+
+int radeonTransformDeriv(struct radeon_compiler* c,
+	struct rc_instruction* inst,
+	void* unused)
+{
+	if (inst->U.I.Opcode != RC_OPCODE_DDX && inst->U.I.Opcode != RC_OPCODE_DDY)
+		return 0;
+
+	inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_1111;
+	inst->U.I.SrcReg[1].Negate = RC_MASK_XYZW;
+
+	return 1;
+}
+
+/**
+ * IF Temp[0].x -\
+ * KILP         - > KIL -abs(Temp[0].x)
+ * ENDIF        -/
+ *
+ * This needs to be done in its own pass, because it modifies the instructions
+ * before and after KILP.
+ */
+void rc_transform_KILP(struct radeon_compiler * c, void *user)
+{
+	struct rc_instruction * inst;
+	for (inst = c->Program.Instructions.Next;
+			inst != &c->Program.Instructions; inst = inst->Next) {
+
+		if (inst->U.I.Opcode != RC_OPCODE_KILP)
+			continue;
+
+		inst->U.I.Opcode = RC_OPCODE_KIL;
+
+		if (inst->Prev->U.I.Opcode != RC_OPCODE_IF
+				|| inst->Next->U.I.Opcode != RC_OPCODE_ENDIF) {
+			inst->U.I.SrcReg[0] = negate(builtin_one);
+		} else {
+
+			inst->U.I.SrcReg[0] =
+				negate(absolute(inst->Prev->U.I.SrcReg[0]));
+			/* Remove IF */
+			rc_remove_instruction(inst->Prev);
+			/* Remove ENDIF */
+			rc_remove_instruction(inst->Next);
+		}
+	}
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_program_alu.h b/src/gallium/drivers/r300/compiler/radeon_program_alu.h
new file mode 100644
index 0000000..b5f361e
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_program_alu.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __RADEON_PROGRAM_ALU_H_
+#define __RADEON_PROGRAM_ALU_H_
+
+#include "radeon_program.h"
+
+int radeonTransformALU(
+	struct radeon_compiler * c,
+	struct rc_instruction * inst,
+	void*);
+
+int r300_transform_vertex_alu(
+	struct radeon_compiler * c,
+	struct rc_instruction * inst,
+	void*);
+
+int r300_transform_trig_simple(
+	struct radeon_compiler * c,
+	struct rc_instruction * inst,
+	void*);
+
+int radeonTransformTrigScale(
+	struct radeon_compiler * c,
+	struct rc_instruction * inst,
+	void*);
+
+int r300_transform_trig_scale_vertex(
+	struct radeon_compiler *c,
+	struct rc_instruction *inst,
+	void*);
+
+int radeonTransformDeriv(
+	struct radeon_compiler * c,
+	struct rc_instruction * inst,
+	void*);
+
+void rc_transform_KILP(struct radeon_compiler * c,
+		       void *user);
+
+#endif /* __RADEON_PROGRAM_ALU_H_ */
diff --git a/src/gallium/drivers/r300/compiler/radeon_program_constants.h b/src/gallium/drivers/r300/compiler/radeon_program_constants.h
new file mode 100644
index 0000000..2457733
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_program_constants.h
@@ -0,0 +1,190 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_PROGRAM_CONSTANTS_H
+#define RADEON_PROGRAM_CONSTANTS_H
+
+typedef enum {
+	RC_SATURATE_NONE = 0,
+	RC_SATURATE_ZERO_ONE,
+	RC_SATURATE_MINUS_PLUS_ONE
+} rc_saturate_mode;
+
+typedef enum {
+	RC_TEXTURE_2D_ARRAY,
+	RC_TEXTURE_1D_ARRAY,
+	RC_TEXTURE_CUBE,
+	RC_TEXTURE_3D,
+	RC_TEXTURE_RECT,
+	RC_TEXTURE_2D,
+	RC_TEXTURE_1D
+} rc_texture_target;
+
+typedef enum {
+	/**
+	 * Used to indicate unused register descriptions and
+	 * source register that use a constant swizzle.
+	 */
+	RC_FILE_NONE = 0,
+	RC_FILE_TEMPORARY,
+
+	/**
+	 * Input register.
+	 *
+	 * \note The compiler attaches no implicit semantics to input registers.
+	 * Fragment/vertex program specific semantics must be defined explicitly
+	 * using the appropriate compiler interfaces.
+	 */
+	RC_FILE_INPUT,
+
+	/**
+	 * Output register.
+	 *
+	 * \note The compiler attaches no implicit semantics to input registers.
+	 * Fragment/vertex program specific semantics must be defined explicitly
+	 * using the appropriate compiler interfaces.
+	 */
+	RC_FILE_OUTPUT,
+	RC_FILE_ADDRESS,
+
+	/**
+	 * Indicates a constant from the \ref rc_constant_list .
+	 */
+	RC_FILE_CONSTANT,
+
+	/**
+	 * Indicates a special register, see RC_SPECIAL_xxx.
+	 */
+	RC_FILE_SPECIAL,
+
+	/**
+	 * Indicates this register should use the result of the presubtract
+	 * operation.
+	 */
+	RC_FILE_PRESUB
+} rc_register_file;
+
+enum {
+	/** R500 fragment program ALU result "register" */
+	RC_SPECIAL_ALU_RESULT = 0,
+
+	/** Must be last */
+	RC_NUM_SPECIAL_REGISTERS
+};
+
+#define RC_REGISTER_INDEX_BITS 10
+#define RC_REGISTER_MAX_INDEX (1 << RC_REGISTER_INDEX_BITS)
+
+typedef enum {
+	RC_SWIZZLE_X = 0,
+	RC_SWIZZLE_Y,
+	RC_SWIZZLE_Z,
+	RC_SWIZZLE_W,
+	RC_SWIZZLE_ZERO,
+	RC_SWIZZLE_ONE,
+	RC_SWIZZLE_HALF,
+	RC_SWIZZLE_UNUSED
+} rc_swizzle;
+
+#define RC_MAKE_SWIZZLE(a,b,c,d) (((a)<<0) | ((b)<<3) | ((c)<<6) | ((d)<<9))
+#define RC_MAKE_SWIZZLE_SMEAR(a) RC_MAKE_SWIZZLE((a),(a),(a),(a))
+#define GET_SWZ(swz, idx)      (((swz) >> ((idx)*3)) & 0x7)
+#define GET_BIT(msk, idx)      (((msk) >> (idx)) & 0x1)
+#define SET_SWZ(swz, idx, newv) \
+	do { \
+		(swz) = ((swz) & ~(7 << ((idx)*3))) | ((newv) << ((idx)*3)); \
+	} while(0)
+
+#define RC_SWIZZLE_XYZW RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_W)
+#define RC_SWIZZLE_XYZ0 RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ZERO)
+#define RC_SWIZZLE_XYZZ RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_Z)
+#define RC_SWIZZLE_XXXX RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_X)
+#define RC_SWIZZLE_YYYY RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_Y)
+#define RC_SWIZZLE_ZZZZ RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_Z)
+#define RC_SWIZZLE_WWWW RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_W)
+#define RC_SWIZZLE_0000 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ZERO)
+#define RC_SWIZZLE_1111 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ONE)
+#define RC_SWIZZLE_HHHH RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_HALF)
+#define RC_SWIZZLE_UUUU RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_UNUSED)
+
+/**
+ * \name Bitmasks for components of vectors.
+ *
+ * Used for write masks, negation masks, etc.
+ */
+/*@{*/
+#define RC_MASK_NONE 0
+#define RC_MASK_X 1
+#define RC_MASK_Y 2
+#define RC_MASK_Z 4
+#define RC_MASK_W 8
+#define RC_MASK_XY (RC_MASK_X|RC_MASK_Y)
+#define RC_MASK_XYZ (RC_MASK_X|RC_MASK_Y|RC_MASK_Z)
+#define RC_MASK_XYW (RC_MASK_X|RC_MASK_Y|RC_MASK_W)
+#define RC_MASK_XYZW (RC_MASK_X|RC_MASK_Y|RC_MASK_Z|RC_MASK_W)
+/*@}*/
+
+typedef enum {
+	RC_ALURESULT_NONE = 0,
+	RC_ALURESULT_X,
+	RC_ALURESULT_W
+} rc_write_aluresult;
+
+typedef enum {
+	RC_PRESUB_NONE = 0,
+
+	/** 1 - 2 * src0 */
+	RC_PRESUB_BIAS,
+
+	/** src1 - src0 */
+	RC_PRESUB_SUB,
+
+	/** src1 + src0 */
+	RC_PRESUB_ADD,
+
+	/** 1 - src0 */
+	RC_PRESUB_INV
+} rc_presubtract_op;
+
+static inline int rc_presubtract_src_reg_count(rc_presubtract_op op){
+	switch(op){
+	case RC_PRESUB_BIAS:
+	case RC_PRESUB_INV:
+		return 1;
+	case RC_PRESUB_ADD:
+	case RC_PRESUB_SUB:
+		return 2;
+	default:
+		return 0;
+	}
+}
+
+#define RC_SOURCE_NONE  0x0
+#define RC_SOURCE_RGB   0x1
+#define RC_SOURCE_ALPHA 0x2
+
+#endif /* RADEON_PROGRAM_CONSTANTS_H */
diff --git a/src/gallium/drivers/r300/compiler/radeon_program_pair.c b/src/gallium/drivers/r300/compiler/radeon_program_pair.c
new file mode 100644
index 0000000..5231595
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_program_pair.c
@@ -0,0 +1,239 @@
+/*
+ * Copyright (C) 2008-2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_program_pair.h"
+
+#include "radeon_compiler_util.h"
+
+#include <stdlib.h>
+
+/**
+ * Return the source slot where we installed the given register access,
+ * or -1 if no slot was free anymore.
+ */
+int rc_pair_alloc_source(struct rc_pair_instruction *pair,
+	unsigned int rgb, unsigned int alpha,
+	rc_register_file file, unsigned int index)
+{
+	int candidate = -1;
+	int candidate_quality = -1;
+	unsigned int alpha_used = 0;
+	unsigned int rgb_used = 0;
+	int i;
+
+	if ((!rgb && !alpha) || file == RC_FILE_NONE)
+		return 0;
+
+	/* Make sure only one presubtract operation is used per instruction. */
+	if (file == RC_FILE_PRESUB) {
+		if (rgb && pair->RGB.Src[RC_PAIR_PRESUB_SRC].Used
+			&& index != pair->RGB.Src[RC_PAIR_PRESUB_SRC].Index) {
+				return -1;
+		}
+
+		if (alpha && pair->Alpha.Src[RC_PAIR_PRESUB_SRC].Used
+			&& index != pair->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) {
+				return -1;
+		}
+	}
+
+	for(i = 0; i < 3; ++i) {
+		int q = 0;
+		if (rgb) {
+			if (pair->RGB.Src[i].Used) {
+				if (pair->RGB.Src[i].File != file ||
+				    pair->RGB.Src[i].Index != index) {
+					rgb_used++;
+					continue;
+				}
+				q++;
+			}
+		}
+		if (alpha) {
+			if (pair->Alpha.Src[i].Used) {
+				if (pair->Alpha.Src[i].File != file ||
+				    pair->Alpha.Src[i].Index != index) {
+					alpha_used++;
+					continue;
+				}
+				q++;
+			}
+		}
+		if (q > candidate_quality) {
+			candidate_quality = q;
+			candidate = i;
+		}
+	}
+
+	if (file == RC_FILE_PRESUB) {
+		candidate = RC_PAIR_PRESUB_SRC;
+	} else if (candidate < 0 || (rgb && rgb_used > 2)
+			|| (alpha && alpha_used > 2)) {
+		return -1;
+	}
+
+	/* candidate >= 0 */
+
+	if (rgb) {
+		pair->RGB.Src[candidate].Used = 1;
+		pair->RGB.Src[candidate].File = file;
+		pair->RGB.Src[candidate].Index = index;
+		if (candidate == RC_PAIR_PRESUB_SRC) {
+			/* For registers with the RC_FILE_PRESUB file,
+			 * the index stores the presubtract op. */
+			int src_regs = rc_presubtract_src_reg_count(index);
+			for(i = 0; i < src_regs; i++) {
+				pair->RGB.Src[i].Used = 1;
+			}
+		}
+	}
+	if (alpha) {
+		pair->Alpha.Src[candidate].Used = 1;
+		pair->Alpha.Src[candidate].File = file;
+		pair->Alpha.Src[candidate].Index = index;
+		if (candidate == RC_PAIR_PRESUB_SRC) {
+			/* For registers with the RC_FILE_PRESUB file,
+			 * the index stores the presubtract op. */
+			int src_regs = rc_presubtract_src_reg_count(index);
+			for(i=0; i < src_regs; i++) {
+				pair->Alpha.Src[i].Used = 1;
+			}
+		}
+	}
+
+	return candidate;
+}
+
+static void pair_foreach_source_callback(
+	struct rc_pair_instruction * pair,
+	void * data,
+	rc_pair_foreach_src_fn cb,
+	unsigned int swz,
+	unsigned int src)
+{
+	/* swz > 3 means that the swizzle is either not used, or a constant
+	 * swizzle (e.g. 0, 1, 0.5). */
+	if(swz > 3)
+		return;
+
+	if(swz == RC_SWIZZLE_W) {
+		if (src == RC_PAIR_PRESUB_SRC) {
+			unsigned int i;
+			unsigned int src_count = rc_presubtract_src_reg_count(
+				pair->Alpha.Src[RC_PAIR_PRESUB_SRC].Index);
+			for(i = 0; i < src_count; i++) {
+				cb(data, &pair->Alpha.Src[i]);
+			}
+		} else {
+			cb(data, &pair->Alpha.Src[src]);
+		}
+	} else {
+		if (src == RC_PAIR_PRESUB_SRC) {
+			unsigned int i;
+			unsigned int src_count = rc_presubtract_src_reg_count(
+				pair->RGB.Src[RC_PAIR_PRESUB_SRC].Index);
+			for(i = 0; i < src_count; i++) {
+				cb(data, &pair->RGB.Src[i]);
+			}
+		}
+		else {
+			cb(data, &pair->RGB.Src[src]);
+		}
+	}
+}
+
+void rc_pair_foreach_source_that_alpha_reads(
+	struct rc_pair_instruction * pair,
+	void * data,
+	rc_pair_foreach_src_fn cb)
+{
+	unsigned int i;
+	const struct rc_opcode_info * info =
+				rc_get_opcode_info(pair->Alpha.Opcode);
+	for(i = 0; i < info->NumSrcRegs; i++) {
+		pair_foreach_source_callback(pair, data, cb,
+					GET_SWZ(pair->Alpha.Arg[i].Swizzle, 0),
+					pair->Alpha.Arg[i].Source);
+	}
+}
+
+void rc_pair_foreach_source_that_rgb_reads(
+	struct rc_pair_instruction * pair,
+	void * data,
+	rc_pair_foreach_src_fn cb)
+{
+	unsigned int i;
+	const struct rc_opcode_info * info =
+				rc_get_opcode_info(pair->RGB.Opcode);
+	for(i = 0; i < info->NumSrcRegs; i++) {
+		unsigned int chan;
+		unsigned int swz = RC_SWIZZLE_UNUSED;
+		/* Find a swizzle that is either X,Y,Z,or W.  We assume here
+		 * that if one channel swizzles X,Y, or Z, then none of the
+		 * other channels swizzle W, and vice-versa. */
+		for(chan = 0; chan < 4; chan++) {
+			swz = GET_SWZ(pair->RGB.Arg[i].Swizzle, chan);
+			if(swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y
+			|| swz == RC_SWIZZLE_Z || swz == RC_SWIZZLE_W)
+				continue;
+		}
+		pair_foreach_source_callback(pair, data, cb,
+					swz,
+					pair->RGB.Arg[i].Source);
+	}
+}
+
+struct rc_pair_instruction_source * rc_pair_get_src(
+	struct rc_pair_instruction * pair_inst,
+	struct rc_pair_instruction_arg * arg)
+{
+	unsigned int type;
+
+	type = rc_source_type_swz(arg->Swizzle);
+
+	if (type & RC_SOURCE_RGB) {
+		return &pair_inst->RGB.Src[arg->Source];
+	} else if (type & RC_SOURCE_ALPHA) {
+		return &pair_inst->Alpha.Src[arg->Source];
+	} else {
+		return NULL;
+	}
+}
+
+int rc_pair_get_src_index(
+	struct rc_pair_instruction * pair_inst,
+	struct rc_pair_instruction_source * src)
+{
+	int i;
+	for (i = 0; i < 3; i++) {
+		if (&pair_inst->RGB.Src[i] == src
+			|| &pair_inst->Alpha.Src[i] == src) {
+			return i;
+		}
+	}
+	return -1;
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_program_pair.h b/src/gallium/drivers/r300/compiler/radeon_program_pair.h
new file mode 100644
index 0000000..a957ea9
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_program_pair.h
@@ -0,0 +1,137 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __RADEON_PROGRAM_PAIR_H_
+#define __RADEON_PROGRAM_PAIR_H_
+
+#include "radeon_code.h"
+#include "radeon_opcodes.h"
+#include "radeon_program_constants.h"
+
+struct radeon_compiler;
+
+
+/**
+ * \file
+ * Represents a paired ALU instruction, as found in R300 and R500
+ * fragment programs.
+ *
+ * Note that this representation is taking some liberties as far
+ * as register files are concerned, to allow separate register
+ * allocation.
+ *
+ * Also note that there are some subtleties in that the semantics
+ * of certain opcodes are implicitly changed in this representation;
+ * see \ref rc_pair_translate
+ */
+
+/* For rgb and alpha instructions when arg[n].Source = RC_PAIR_PRESUB_SRC, then
+ * the presubtract value will be used, and
+ * {RGB,Alpha}.Src[RC_PAIR_PRESUB_SRC].File will be set to RC_FILE_PRESUB.
+ */
+#define RC_PAIR_PRESUB_SRC 3
+
+struct rc_pair_instruction_source {
+	unsigned int Used:1;
+	unsigned int File:3;
+	unsigned int Index:RC_REGISTER_INDEX_BITS;
+};
+
+struct rc_pair_instruction_arg {
+	unsigned int Source:2;
+	unsigned int Swizzle:12;
+	unsigned int Abs:1;
+	unsigned int Negate:1;
+};
+
+struct rc_pair_sub_instruction {
+	unsigned int Opcode:8;
+	unsigned int DestIndex:RC_REGISTER_INDEX_BITS;
+	unsigned int WriteMask:4;
+	unsigned int Target:2;
+	unsigned int OutputWriteMask:3;
+	unsigned int DepthWriteMask:1;
+	unsigned int Saturate:1;
+
+	struct rc_pair_instruction_source Src[4];
+	struct rc_pair_instruction_arg Arg[3];
+};
+
+struct rc_pair_instruction {
+	struct rc_pair_sub_instruction RGB;
+	struct rc_pair_sub_instruction Alpha;
+
+	unsigned int WriteALUResult:2;
+	unsigned int ALUResultCompare:3;
+	unsigned int Nop:1;
+};
+
+typedef void (*rc_pair_foreach_src_fn)
+			(void *, struct rc_pair_instruction_source *);
+
+/**
+ * General helper functions for dealing with the paired instruction format.
+ */
+/*@{*/
+int rc_pair_alloc_source(struct rc_pair_instruction *pair,
+	unsigned int rgb, unsigned int alpha,
+	rc_register_file file, unsigned int index);
+
+void rc_pair_foreach_source_that_alpha_reads(
+	struct rc_pair_instruction * pair,
+	void * data,
+	rc_pair_foreach_src_fn cb);
+
+void rc_pair_foreach_source_that_rgb_reads(
+	struct rc_pair_instruction * pair,
+	void * data,
+	rc_pair_foreach_src_fn cb);
+
+struct rc_pair_instruction_source * rc_pair_get_src(
+	struct rc_pair_instruction * pair_inst,
+	struct rc_pair_instruction_arg * arg);
+
+int rc_pair_get_src_index(
+	struct rc_pair_instruction * pair_inst,
+	struct rc_pair_instruction_source * src);
+/*@}*/
+
+
+/**
+ * Compiler passes that operate with the paired format.
+ */
+/*@{*/
+struct radeon_pair_handler;
+
+void rc_pair_translate(struct radeon_compiler *cc, void *user);
+void rc_pair_schedule(struct radeon_compiler *cc, void *user);
+void rc_pair_regalloc(struct radeon_compiler *cc, void *user);
+void rc_pair_regalloc_inputs_only(struct radeon_compiler *cc, void *user);
+void rc_pair_remove_dead_sources(struct radeon_compiler *c, void *user);
+/*@}*/
+
+#endif /* __RADEON_PROGRAM_PAIR_H_ */
diff --git a/src/gallium/drivers/r300/compiler/radeon_program_print.c b/src/gallium/drivers/r300/compiler/radeon_program_print.c
new file mode 100644
index 0000000..390d131
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_program_print.c
@@ -0,0 +1,418 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "radeon_program.h"
+
+#include <stdio.h>
+
+static const char * textarget_to_string(rc_texture_target target)
+{
+	switch(target) {
+	case RC_TEXTURE_2D_ARRAY: return "2D_ARRAY";
+	case RC_TEXTURE_1D_ARRAY: return "1D_ARRAY";
+	case RC_TEXTURE_CUBE: return "CUBE";
+	case RC_TEXTURE_3D: return "3D";
+	case RC_TEXTURE_RECT: return "RECT";
+	case RC_TEXTURE_2D: return "2D";
+	case RC_TEXTURE_1D: return "1D";
+	default: return "BAD_TEXTURE_TARGET";
+	}
+}
+
+static const char * presubtract_op_to_string(rc_presubtract_op op)
+{
+	switch(op) {
+	case RC_PRESUB_NONE:
+		return "NONE";
+	case RC_PRESUB_BIAS:
+		return "(1 - 2 * src0)";
+	case RC_PRESUB_SUB:
+		return "(src1 - src0)";
+	case RC_PRESUB_ADD:
+		return "(src1 + src0)";
+	case RC_PRESUB_INV:
+		return "(1 - src0)";
+	default:
+		return "BAD_PRESUBTRACT_OP";
+	}
+}
+
+static void rc_print_comparefunc(FILE * f, const char * lhs, rc_compare_func func, const char * rhs)
+{
+	if (func == RC_COMPARE_FUNC_NEVER) {
+		fprintf(f, "false");
+	} else if (func == RC_COMPARE_FUNC_ALWAYS) {
+		fprintf(f, "true");
+	} else {
+		const char * op;
+		switch(func) {
+		case RC_COMPARE_FUNC_LESS: op = "<"; break;
+		case RC_COMPARE_FUNC_EQUAL: op = "=="; break;
+		case RC_COMPARE_FUNC_LEQUAL: op = "<="; break;
+		case RC_COMPARE_FUNC_GREATER: op = ">"; break;
+		case RC_COMPARE_FUNC_NOTEQUAL: op = "!="; break;
+		case RC_COMPARE_FUNC_GEQUAL: op = ">="; break;
+		default: op = "???"; break;
+		}
+		fprintf(f, "%s %s %s", lhs, op, rhs);
+	}
+}
+
+static void rc_print_register(FILE * f, rc_register_file file, int index, unsigned int reladdr)
+{
+	if (file == RC_FILE_NONE) {
+		fprintf(f, "none");
+	} else if (file == RC_FILE_SPECIAL) {
+		switch(index) {
+		case RC_SPECIAL_ALU_RESULT: fprintf(f, "aluresult"); break;
+		default: fprintf(f, "special[%i]", index); break;
+		}
+	} else {
+		const char * filename;
+		switch(file) {
+		case RC_FILE_TEMPORARY: filename = "temp"; break;
+		case RC_FILE_INPUT: filename = "input"; break;
+		case RC_FILE_OUTPUT: filename = "output"; break;
+		case RC_FILE_ADDRESS: filename = "addr"; break;
+		case RC_FILE_CONSTANT: filename = "const"; break;
+		default: filename = "BAD FILE"; break;
+		}
+		fprintf(f, "%s[%i%s]", filename, index, reladdr ? " + addr[0]" : "");
+	}
+}
+
+static void rc_print_mask(FILE * f, unsigned int mask)
+{
+	if (mask & RC_MASK_X) fprintf(f, "x");
+	if (mask & RC_MASK_Y) fprintf(f, "y");
+	if (mask & RC_MASK_Z) fprintf(f, "z");
+	if (mask & RC_MASK_W) fprintf(f, "w");
+}
+
+static void rc_print_dst_register(FILE * f, struct rc_dst_register dst)
+{
+	rc_print_register(f, dst.File, dst.Index, 0);
+	if (dst.WriteMask != RC_MASK_XYZW) {
+		fprintf(f, ".");
+		rc_print_mask(f, dst.WriteMask);
+	}
+}
+
+static char rc_swizzle_char(unsigned int swz)
+{
+	switch(swz) {
+	case RC_SWIZZLE_X: return 'x';
+	case RC_SWIZZLE_Y: return 'y';
+	case RC_SWIZZLE_Z: return 'z';
+	case RC_SWIZZLE_W: return 'w';
+	case RC_SWIZZLE_ZERO: return '0';
+	case RC_SWIZZLE_ONE: return '1';
+	case RC_SWIZZLE_HALF: return 'H';
+	case RC_SWIZZLE_UNUSED: return '_';
+	}
+	fprintf(stderr, "bad swz: %u\n", swz);
+	return '?';
+}
+
+static void rc_print_swizzle(FILE * f, unsigned int swizzle, unsigned int negate)
+{
+	unsigned int comp;
+	for(comp = 0; comp < 4; ++comp) {
+		rc_swizzle swz = GET_SWZ(swizzle, comp);
+		if (GET_BIT(negate, comp))
+			fprintf(f, "-");
+		fprintf(f, "%c", rc_swizzle_char(swz));
+	}
+}
+
+static void rc_print_presub_instruction(FILE * f,
+					struct rc_presub_instruction inst)
+{
+	fprintf(f,"(");
+	switch(inst.Opcode){
+	case RC_PRESUB_BIAS:
+		fprintf(f, "1 - 2 * ");
+		rc_print_register(f, inst.SrcReg[0].File,
+				inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr);
+		break;
+	case RC_PRESUB_SUB:
+		rc_print_register(f, inst.SrcReg[1].File,
+				inst.SrcReg[1].Index,inst.SrcReg[1].RelAddr);
+		fprintf(f, " - ");
+		rc_print_register(f, inst.SrcReg[0].File,
+				inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr);
+		break;
+	case RC_PRESUB_ADD:
+		rc_print_register(f, inst.SrcReg[1].File,
+				inst.SrcReg[1].Index,inst.SrcReg[1].RelAddr);
+		fprintf(f, " + ");
+		rc_print_register(f, inst.SrcReg[0].File,
+				inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr);
+		break;
+	case RC_PRESUB_INV:
+		fprintf(f, "1 - ");
+		rc_print_register(f, inst.SrcReg[0].File,
+				inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr);
+		break;
+	default:
+		break;
+	}
+	fprintf(f, ")");
+}
+
+static void rc_print_src_register(FILE * f, struct rc_instruction * inst,
+						struct rc_src_register src)
+{
+	int trivial_negate = (src.Negate == RC_MASK_NONE || src.Negate == RC_MASK_XYZW);
+
+	if (src.Negate == RC_MASK_XYZW)
+		fprintf(f, "-");
+	if (src.Abs)
+		fprintf(f, "|");
+
+	if(src.File == RC_FILE_PRESUB)
+		rc_print_presub_instruction(f, inst->U.I.PreSub);
+	else
+		rc_print_register(f, src.File, src.Index, src.RelAddr);
+
+	if (src.Abs && !trivial_negate)
+		fprintf(f, "|");
+
+	if (src.Swizzle != RC_SWIZZLE_XYZW || !trivial_negate) {
+		fprintf(f, ".");
+		rc_print_swizzle(f, src.Swizzle, trivial_negate ? 0 : src.Negate);
+	}
+
+	if (src.Abs && trivial_negate)
+		fprintf(f, "|");
+}
+
+static unsigned update_branch_depth(rc_opcode opcode, unsigned *branch_depth)
+{
+	switch (opcode) {
+	case RC_OPCODE_IF:
+	case RC_OPCODE_BGNLOOP:
+		return (*branch_depth)++ * 2;
+
+	case RC_OPCODE_ENDIF:
+	case RC_OPCODE_ENDLOOP:
+		assert(*branch_depth > 0);
+		return --(*branch_depth) * 2;
+
+	case RC_OPCODE_ELSE:
+		assert(*branch_depth > 0);
+		return (*branch_depth - 1) * 2;
+
+	default:
+		return *branch_depth * 2;
+	}
+}
+
+static void rc_print_normal_instruction(FILE * f, struct rc_instruction * inst, unsigned *branch_depth)
+{
+	const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+	unsigned int reg;
+	unsigned spaces = update_branch_depth(inst->U.I.Opcode, branch_depth);
+
+	for (unsigned i = 0; i < spaces; i++)
+		fprintf(f, " ");
+
+	fprintf(f, "%s", opcode->Name);
+
+	switch(inst->U.I.SaturateMode) {
+	case RC_SATURATE_NONE: break;
+	case RC_SATURATE_ZERO_ONE: fprintf(f, "_SAT"); break;
+	case RC_SATURATE_MINUS_PLUS_ONE: fprintf(f, "_SAT2"); break;
+	default: fprintf(f, "_BAD_SAT"); break;
+	}
+
+	if (opcode->HasDstReg) {
+		fprintf(f, " ");
+		rc_print_dst_register(f, inst->U.I.DstReg);
+		if (opcode->NumSrcRegs)
+			fprintf(f, ",");
+	}
+
+	for(reg = 0; reg < opcode->NumSrcRegs; ++reg) {
+		if (reg > 0)
+			fprintf(f, ",");
+		fprintf(f, " ");
+		rc_print_src_register(f, inst, inst->U.I.SrcReg[reg]);
+	}
+
+	if (opcode->HasTexture) {
+		fprintf(f, ", %s%s[%u]",
+			textarget_to_string(inst->U.I.TexSrcTarget),
+			inst->U.I.TexShadow ? "SHADOW" : "",
+			inst->U.I.TexSrcUnit);
+	}
+
+	fprintf(f, ";");
+
+	if (inst->U.I.WriteALUResult) {
+		fprintf(f, " [aluresult = (");
+		rc_print_comparefunc(f,
+			(inst->U.I.WriteALUResult == RC_ALURESULT_X) ? "x" : "w",
+			inst->U.I.ALUResultCompare, "0");
+		fprintf(f, ")]");
+	}
+
+	fprintf(f, "\n");
+}
+
+static void rc_print_pair_instruction(FILE * f, struct rc_instruction * fullinst, unsigned *branch_depth)
+{
+	struct rc_pair_instruction * inst = &fullinst->U.P;
+	int printedsrc = 0;
+	unsigned spaces = update_branch_depth(inst->RGB.Opcode != RC_OPCODE_NOP ?
+					      inst->RGB.Opcode : inst->Alpha.Opcode, branch_depth);
+
+	for (unsigned i = 0; i < spaces; i++)
+		fprintf(f, " ");
+
+	for(unsigned int src = 0; src < 3; ++src) {
+		if (inst->RGB.Src[src].Used) {
+			if (printedsrc)
+				fprintf(f, ", ");
+			fprintf(f, "src%i.xyz = ", src);
+			rc_print_register(f, inst->RGB.Src[src].File, inst->RGB.Src[src].Index, 0);
+			printedsrc = 1;
+		}
+		if (inst->Alpha.Src[src].Used) {
+			if (printedsrc)
+				fprintf(f, ", ");
+			fprintf(f, "src%i.w = ", src);
+			rc_print_register(f, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, 0);
+			printedsrc = 1;
+		}
+	}
+	if(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
+		fprintf(f, ", srcp.xyz = %s",
+			presubtract_op_to_string(
+					inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index));
+	}
+	if(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
+		fprintf(f, ", srcp.w = %s",
+			presubtract_op_to_string(
+					inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index));
+	}
+	fprintf(f, "\n");
+
+	if (inst->RGB.Opcode != RC_OPCODE_NOP) {
+		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->RGB.Opcode);
+
+		for (unsigned i = 0; i < spaces; i++)
+			fprintf(f, " ");
+
+		fprintf(f, "     %s%s", opcode->Name, inst->RGB.Saturate ? "_SAT" : "");
+		if (inst->RGB.WriteMask)
+			fprintf(f, " temp[%i].%s%s%s", inst->RGB.DestIndex,
+				(inst->RGB.WriteMask & 1) ? "x" : "",
+				(inst->RGB.WriteMask & 2) ? "y" : "",
+				(inst->RGB.WriteMask & 4) ? "z" : "");
+		if (inst->RGB.OutputWriteMask)
+			fprintf(f, " color[%i].%s%s%s", inst->RGB.Target,
+				(inst->RGB.OutputWriteMask & 1) ? "x" : "",
+				(inst->RGB.OutputWriteMask & 2) ? "y" : "",
+				(inst->RGB.OutputWriteMask & 4) ? "z" : "");
+		if (inst->WriteALUResult == RC_ALURESULT_X)
+			fprintf(f, " aluresult");
+
+		for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
+			const char* abs = inst->RGB.Arg[arg].Abs ? "|" : "";
+			const char* neg = inst->RGB.Arg[arg].Negate ? "-" : "";
+			fprintf(f, ", %s%ssrc", neg, abs);
+			if(inst->RGB.Arg[arg].Source == RC_PAIR_PRESUB_SRC)
+				fprintf(f,"p");
+			else
+				fprintf(f,"%d", inst->RGB.Arg[arg].Source);
+			fprintf(f,".%c%c%c%s",
+				rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 0)),
+				rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 1)),
+				rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 2)),
+				abs);
+		}
+		fprintf(f, "\n");
+	}
+
+	if (inst->Alpha.Opcode != RC_OPCODE_NOP) {
+		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Alpha.Opcode);
+
+		for (unsigned i = 0; i < spaces; i++)
+			fprintf(f, " ");
+
+		fprintf(f, "     %s%s", opcode->Name, inst->Alpha.Saturate ? "_SAT" : "");
+		if (inst->Alpha.WriteMask)
+			fprintf(f, " temp[%i].w", inst->Alpha.DestIndex);
+		if (inst->Alpha.OutputWriteMask)
+			fprintf(f, " color[%i].w", inst->Alpha.Target);
+		if (inst->Alpha.DepthWriteMask)
+			fprintf(f, " depth.w");
+		if (inst->WriteALUResult == RC_ALURESULT_W)
+			fprintf(f, " aluresult");
+
+		for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
+			const char* abs = inst->Alpha.Arg[arg].Abs ? "|" : "";
+			const char* neg = inst->Alpha.Arg[arg].Negate ? "-" : "";
+			fprintf(f, ", %s%ssrc", neg, abs);
+			if(inst->Alpha.Arg[arg].Source == RC_PAIR_PRESUB_SRC)
+				fprintf(f,"p");
+			else
+				fprintf(f,"%d", inst->Alpha.Arg[arg].Source);
+			fprintf(f,".%c%s",
+				rc_swizzle_char(GET_SWZ(inst->Alpha.Arg[arg].Swizzle, 0)), abs);
+		}
+		fprintf(f, "\n");
+	}
+
+	if (inst->WriteALUResult) {
+		for (unsigned i = 0; i < spaces; i++)
+			fprintf(f, " ");
+
+		fprintf(f, "      [aluresult = (");
+		rc_print_comparefunc(f, "result", inst->ALUResultCompare, "0");
+		fprintf(f, ")]\n");
+	}
+}
+
+/**
+ * Print program to stderr, default options.
+ */
+void rc_print_program(const struct rc_program *prog)
+{
+	unsigned int linenum = 0;
+	unsigned branch_depth = 0;
+	struct rc_instruction *inst;
+
+	fprintf(stderr, "# Radeon Compiler Program\n");
+
+	for(inst = prog->Instructions.Next; inst != &prog->Instructions; inst = inst->Next) {
+		fprintf(stderr, "%3d: ", linenum);
+
+		if (inst->Type == RC_INSTRUCTION_PAIR)
+			rc_print_pair_instruction(stderr, inst, &branch_depth);
+		else
+			rc_print_normal_instruction(stderr, inst, &branch_depth);
+
+		linenum++;
+	}
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_program_tex.c b/src/gallium/drivers/r300/compiler/radeon_program_tex.c
new file mode 100644
index 0000000..9d69ebd
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_program_tex.c
@@ -0,0 +1,528 @@
+/*
+ * Copyright (C) 2010 Corbin Simpson
+ * Copyright (C) 2010 Marek Olšák <maraeo@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_program_tex.h"
+
+#include "radeon_compiler_util.h"
+
+/* Series of transformations to be done on textures. */
+
+static struct rc_src_register shadow_fail_value(struct r300_fragment_program_compiler *compiler,
+						int tmu)
+{
+	struct rc_src_register reg = { 0, 0, 0, 0, 0, 0 };
+
+	if (compiler->enable_shadow_ambient) {
+		reg.File = RC_FILE_CONSTANT;
+		reg.Index = rc_constants_add_state(&compiler->Base.Program.Constants,
+						   RC_STATE_SHADOW_AMBIENT, tmu);
+		reg.Swizzle = RC_SWIZZLE_WWWW;
+	} else {
+		reg.File = RC_FILE_NONE;
+		reg.Swizzle = RC_SWIZZLE_0000;
+	}
+
+	reg.Swizzle = combine_swizzles(reg.Swizzle,
+				compiler->state.unit[tmu].texture_swizzle);
+	return reg;
+}
+
+static struct rc_src_register shadow_pass_value(struct r300_fragment_program_compiler *compiler,
+						int tmu)
+{
+	struct rc_src_register reg = { 0, 0, 0, 0, 0, 0 };
+
+	reg.File = RC_FILE_NONE;
+	reg.Swizzle = combine_swizzles(RC_SWIZZLE_1111,
+				compiler->state.unit[tmu].texture_swizzle);
+	return reg;
+}
+
+static void scale_texcoords(struct r300_fragment_program_compiler *compiler,
+			    struct rc_instruction *inst,
+			    unsigned state_constant)
+{
+	struct rc_instruction *inst_mov;
+
+	unsigned temp = rc_find_free_temporary(&compiler->Base);
+
+	inst_mov = rc_insert_new_instruction(&compiler->Base, inst->Prev);
+
+	inst_mov->U.I.Opcode = RC_OPCODE_MUL;
+	inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+	inst_mov->U.I.DstReg.Index = temp;
+	inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+	inst_mov->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
+	inst_mov->U.I.SrcReg[1].Index =
+			rc_constants_add_state(&compiler->Base.Program.Constants,
+					       state_constant, inst->U.I.TexSrcUnit);
+
+	reset_srcreg(&inst->U.I.SrcReg[0]);
+	inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+	inst->U.I.SrcReg[0].Index = temp;
+}
+
+static void projective_divide(struct r300_fragment_program_compiler *compiler,
+			      struct rc_instruction *inst)
+{
+	struct rc_instruction *inst_mul, *inst_rcp;
+
+	unsigned temp = rc_find_free_temporary(&compiler->Base);
+
+	inst_rcp = rc_insert_new_instruction(&compiler->Base, inst->Prev);
+	inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
+	inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
+	inst_rcp->U.I.DstReg.Index = temp;
+	inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
+	inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+	/* Because the input can be arbitrarily swizzled,
+	 * read the component mapped to W. */
+	inst_rcp->U.I.SrcReg[0].Swizzle =
+		RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 3));
+
+	inst_mul = rc_insert_new_instruction(&compiler->Base, inst->Prev);
+	inst_mul->U.I.Opcode = RC_OPCODE_MUL;
+	inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
+	inst_mul->U.I.DstReg.Index = temp;
+	inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+	inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
+	inst_mul->U.I.SrcReg[1].Index = temp;
+	inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
+
+	reset_srcreg(&inst->U.I.SrcReg[0]);
+	inst->U.I.Opcode = RC_OPCODE_TEX;
+	inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+	inst->U.I.SrcReg[0].Index = temp;
+}
+
+/**
+ * Transform TEX, TXP, TXB, and KIL instructions in the following ways:
+ *  - implement texture compare (shadow extensions)
+ *  - extract non-native source / destination operands
+ *  - premultiply texture coordinates for RECT
+ *  - extract operand swizzles
+ *  - introduce a temporary register when write masks are needed
+ */
+int radeonTransformTEX(
+	struct radeon_compiler * c,
+	struct rc_instruction * inst,
+	void* data)
+{
+	struct r300_fragment_program_compiler *compiler =
+		(struct r300_fragment_program_compiler*)data;
+	rc_wrap_mode wrapmode = compiler->state.unit[inst->U.I.TexSrcUnit].wrap_mode;
+	int is_rect = inst->U.I.TexSrcTarget == RC_TEXTURE_RECT ||
+		      compiler->state.unit[inst->U.I.TexSrcUnit].non_normalized_coords;
+
+	if (inst->U.I.Opcode != RC_OPCODE_TEX &&
+		inst->U.I.Opcode != RC_OPCODE_TXB &&
+		inst->U.I.Opcode != RC_OPCODE_TXP &&
+		inst->U.I.Opcode != RC_OPCODE_TXD &&
+		inst->U.I.Opcode != RC_OPCODE_TXL &&
+		inst->U.I.Opcode != RC_OPCODE_KIL)
+		return 0;
+
+	/* ARB_shadow & EXT_shadow_funcs */
+	if (inst->U.I.Opcode != RC_OPCODE_KIL &&
+		((c->Program.ShadowSamplers & (1 << inst->U.I.TexSrcUnit)) ||
+		 (compiler->state.unit[inst->U.I.TexSrcUnit].compare_mode_enabled))) {
+		rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func;
+
+		if (comparefunc == RC_COMPARE_FUNC_NEVER || comparefunc == RC_COMPARE_FUNC_ALWAYS) {
+			inst->U.I.Opcode = RC_OPCODE_MOV;
+
+			if (comparefunc == RC_COMPARE_FUNC_ALWAYS) {
+				inst->U.I.SrcReg[0] = shadow_pass_value(compiler, inst->U.I.TexSrcUnit);
+			} else {
+				inst->U.I.SrcReg[0] = shadow_fail_value(compiler, inst->U.I.TexSrcUnit);
+			}
+
+			return 1;
+		} else {
+			struct rc_instruction * inst_rcp = NULL;
+			struct rc_instruction *inst_mul, *inst_add, *inst_cmp;
+			unsigned tmp_texsample;
+			unsigned tmp_sum;
+			int pass, fail;
+
+			/* Save the output register. */
+			struct rc_dst_register output_reg = inst->U.I.DstReg;
+			unsigned saturate_mode = inst->U.I.SaturateMode;
+
+			/* Redirect TEX to a new temp. */
+			tmp_texsample = rc_find_free_temporary(c);
+			inst->U.I.SaturateMode = 0;
+			inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
+			inst->U.I.DstReg.Index = tmp_texsample;
+			inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+
+			tmp_sum = rc_find_free_temporary(c);
+
+			if (inst->U.I.Opcode == RC_OPCODE_TXP) {
+				/* Compute 1/W. */
+				inst_rcp = rc_insert_new_instruction(c, inst);
+				inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
+				inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
+				inst_rcp->U.I.DstReg.Index = tmp_sum;
+				inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
+				inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+				inst_rcp->U.I.SrcReg[0].Swizzle =
+					RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 3));
+			}
+
+			/* Divide Z by W (if it's TXP) and saturate. */
+			inst_mul = rc_insert_new_instruction(c, inst_rcp ? inst_rcp : inst);
+			inst_mul->U.I.Opcode = inst->U.I.Opcode == RC_OPCODE_TXP ? RC_OPCODE_MUL : RC_OPCODE_MOV;
+			inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
+			inst_mul->U.I.DstReg.Index = tmp_sum;
+			inst_mul->U.I.DstReg.WriteMask = RC_MASK_W;
+			inst_mul->U.I.SaturateMode = RC_SATURATE_ZERO_ONE;
+			inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+			inst_mul->U.I.SrcReg[0].Swizzle =
+				RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 2));
+			if (inst->U.I.Opcode == RC_OPCODE_TXP) {
+				inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
+				inst_mul->U.I.SrcReg[1].Index = tmp_sum;
+				inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
+			}
+
+			/* Add the depth texture value. */
+			inst_add = rc_insert_new_instruction(c, inst_mul);
+			inst_add->U.I.Opcode = RC_OPCODE_ADD;
+			inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY;
+			inst_add->U.I.DstReg.Index = tmp_sum;
+			inst_add->U.I.DstReg.WriteMask = RC_MASK_W;
+			inst_add->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+			inst_add->U.I.SrcReg[0].Index = tmp_sum;
+			inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW;
+			inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
+			inst_add->U.I.SrcReg[1].Index = tmp_texsample;
+			inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX;
+
+			/* Note that SrcReg[0] is r, SrcReg[1] is tex and:
+			 *   LESS:    r  < tex  <=>      -tex+r < 0
+			 *   GEQUAL:  r >= tex  <=> not (-tex+r < 0)
+			 *   GREATER: r  > tex  <=>       tex-r < 0
+			 *   LEQUAL:  r <= tex  <=> not ( tex-r < 0)
+			 *   EQUAL:   GEQUAL
+			 *   NOTEQUAL:LESS
+			 */
+
+			/* This negates either r or tex: */
+			if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL ||
+			    comparefunc == RC_COMPARE_FUNC_EQUAL || comparefunc == RC_COMPARE_FUNC_NOTEQUAL)
+				inst_add->U.I.SrcReg[1].Negate = inst_add->U.I.SrcReg[1].Negate ^ RC_MASK_XYZW;
+			else
+				inst_add->U.I.SrcReg[0].Negate = inst_add->U.I.SrcReg[0].Negate ^ RC_MASK_XYZW;
+
+			/* This negates the whole expresion: */
+			if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER ||
+			    comparefunc == RC_COMPARE_FUNC_NOTEQUAL) {
+				pass = 1;
+				fail = 2;
+			} else {
+				pass = 2;
+				fail = 1;
+			}
+
+			inst_cmp = rc_insert_new_instruction(c, inst_add);
+			inst_cmp->U.I.Opcode = RC_OPCODE_CMP;
+			inst_cmp->U.I.SaturateMode = saturate_mode;
+			inst_cmp->U.I.DstReg = output_reg;
+			inst_cmp->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+			inst_cmp->U.I.SrcReg[0].Index = tmp_sum;
+			inst_cmp->U.I.SrcReg[0].Swizzle =
+					combine_swizzles(RC_SWIZZLE_WWWW,
+							 compiler->state.unit[inst->U.I.TexSrcUnit].texture_swizzle);
+			inst_cmp->U.I.SrcReg[pass] = shadow_pass_value(compiler, inst->U.I.TexSrcUnit);
+			inst_cmp->U.I.SrcReg[fail] = shadow_fail_value(compiler, inst->U.I.TexSrcUnit);
+
+			assert(tmp_texsample != tmp_sum);
+		}
+	}
+
+	/* R300 cannot sample from rectangles and the wrap mode fallback needs
+	 * normalized coordinates anyway. */
+	if (inst->U.I.Opcode != RC_OPCODE_KIL &&
+	    is_rect && (!c->is_r500 || wrapmode != RC_WRAP_NONE)) {
+		scale_texcoords(compiler, inst, RC_STATE_R300_TEXRECT_FACTOR);
+		inst->U.I.TexSrcTarget = RC_TEXTURE_2D;
+	}
+
+	/* Divide by W if needed. */
+	if (inst->U.I.Opcode == RC_OPCODE_TXP &&
+	    (wrapmode == RC_WRAP_REPEAT || wrapmode == RC_WRAP_MIRRORED_REPEAT ||
+	     compiler->state.unit[inst->U.I.TexSrcUnit].clamp_and_scale_before_fetch)) {
+		projective_divide(compiler, inst);
+	}
+
+	/* Texture wrap modes don't work on NPOT textures.
+	 *
+	 * Non-wrapped/clamped texcoords with NPOT are free in HW. Repeat and
+	 * mirroring are not. If we need to repeat, we do:
+	 *
+	 * MUL temp, texcoord, <scaling factor constant>
+	 * FRC temp, temp ; Discard integer portion of coords
+	 *
+	 * This gives us coords in [0, 1].
+	 *
+	 * Mirroring is trickier. We're going to start out like repeat:
+	 *
+	 * MUL temp, texcoord, <scaling factor constant> ; De-mirror across axes
+	 * MUL temp, temp, 0.5 ; Pattern repeats in [0, 2]
+	 *                            ; so scale to [0, 1]
+	 * FRC temp, temp ; Make the pattern repeat
+	 * MAD temp, temp, 2, -1 ; Move the pattern to [-1, 1]
+	 * ADD temp, 1, -abs(temp) ; Now comes a neat trick: use abs to mirror the pattern.
+	 *				; The pattern is backwards, so reverse it (1-x).
+	 *
+	 * This gives us coords in [0, 1].
+	 *
+	 * ~ C & M. ;)
+	 */
+	if (inst->U.I.Opcode != RC_OPCODE_KIL &&
+	    wrapmode != RC_WRAP_NONE) {
+		struct rc_instruction *inst_mov;
+		unsigned temp = rc_find_free_temporary(c);
+
+		if (wrapmode == RC_WRAP_REPEAT) {
+			/* Both instructions will be paired up. */
+			struct rc_instruction *inst_frc = rc_insert_new_instruction(c, inst->Prev);
+
+			inst_frc->U.I.Opcode = RC_OPCODE_FRC;
+			inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY;
+			inst_frc->U.I.DstReg.Index = temp;
+			inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+			inst_frc->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+		} else if (wrapmode == RC_WRAP_MIRRORED_REPEAT) {
+			/*
+			 * Function:
+			 *   f(v) = 1 - abs(frac(v * 0.5) * 2 - 1)
+			 *
+			 * Code:
+			 *   MUL temp, src0, 0.5
+			 *   FRC temp, temp
+			 *   MAD temp, temp, 2, -1
+			 *   ADD temp, 1, -abs(temp)
+			 */
+
+			struct rc_instruction *inst_mul, *inst_frc, *inst_mad, *inst_add;
+			unsigned two, two_swizzle;
+
+			inst_mul = rc_insert_new_instruction(c, inst->Prev);
+
+			inst_mul->U.I.Opcode = RC_OPCODE_MUL;
+			inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
+			inst_mul->U.I.DstReg.Index = temp;
+			inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+			inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+			inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_HHHH;
+
+			inst_frc = rc_insert_new_instruction(c, inst->Prev);
+
+			inst_frc->U.I.Opcode = RC_OPCODE_FRC;
+			inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY;
+			inst_frc->U.I.DstReg.Index = temp;
+			inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+			inst_frc->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+			inst_frc->U.I.SrcReg[0].Index = temp;
+			inst_frc->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;
+
+			two = rc_constants_add_immediate_scalar(&c->Program.Constants, 2, &two_swizzle);
+			inst_mad = rc_insert_new_instruction(c, inst->Prev);
+
+			inst_mad->U.I.Opcode = RC_OPCODE_MAD;
+			inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
+			inst_mad->U.I.DstReg.Index = temp;
+			inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+			inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+			inst_mad->U.I.SrcReg[0].Index = temp;
+			inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;
+			inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
+			inst_mad->U.I.SrcReg[1].Index = two;
+			inst_mad->U.I.SrcReg[1].Swizzle = two_swizzle;
+			inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_1111;
+			inst_mad->U.I.SrcReg[2].Negate = RC_MASK_XYZ;
+
+			inst_add = rc_insert_new_instruction(c, inst->Prev);
+
+			inst_add->U.I.Opcode = RC_OPCODE_ADD;
+			inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY;
+			inst_add->U.I.DstReg.Index = temp;
+			inst_add->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+			inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;
+			inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
+			inst_add->U.I.SrcReg[1].Index = temp;
+			inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0;
+			inst_add->U.I.SrcReg[1].Abs = 1;
+			inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZ;
+		} else if (wrapmode == RC_WRAP_MIRRORED_CLAMP) {
+			/*
+			 * Mirrored clamp modes are bloody simple, we just use abs
+			 * to mirror [0, 1] into [-1, 0]. This works for
+			 * all modes i.e. CLAMP, CLAMP_TO_EDGE, and CLAMP_TO_BORDER.
+			 */
+			struct rc_instruction *inst_mov;
+
+			inst_mov = rc_insert_new_instruction(c, inst->Prev);
+
+			inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+			inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+			inst_mov->U.I.DstReg.Index = temp;
+			inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+			inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+			inst_mov->U.I.SrcReg[0].Abs = 1;
+		}
+
+		/* Preserve W for TXP/TXB. */
+		inst_mov = rc_insert_new_instruction(c, inst->Prev);
+
+		inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+		inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+		inst_mov->U.I.DstReg.Index = temp;
+		inst_mov->U.I.DstReg.WriteMask = RC_MASK_W;
+		inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+
+		reset_srcreg(&inst->U.I.SrcReg[0]);
+		inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+		inst->U.I.SrcReg[0].Index = temp;
+	}
+
+	/* NPOT -> POT conversion for 3D textures. */
+	if (inst->U.I.Opcode != RC_OPCODE_KIL &&
+	    compiler->state.unit[inst->U.I.TexSrcUnit].clamp_and_scale_before_fetch) {
+		struct rc_instruction *inst_mov;
+		unsigned temp = rc_find_free_temporary(c);
+
+		/* Saturate XYZ. */
+		inst_mov = rc_insert_new_instruction(c, inst->Prev);
+		inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+		inst_mov->U.I.SaturateMode = RC_SATURATE_ZERO_ONE;
+		inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+		inst_mov->U.I.DstReg.Index = temp;
+		inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+		inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+
+		/* Copy W. */
+		inst_mov = rc_insert_new_instruction(c, inst->Prev);
+		inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+		inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+		inst_mov->U.I.DstReg.Index = temp;
+		inst_mov->U.I.DstReg.WriteMask = RC_MASK_W;
+		inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+
+		reset_srcreg(&inst->U.I.SrcReg[0]);
+		inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+		inst->U.I.SrcReg[0].Index = temp;
+
+		scale_texcoords(compiler, inst, RC_STATE_R300_TEXSCALE_FACTOR);
+	}
+
+	/* Convert SNORM-encoded ATI1N sampled as UNORM to SNORM.
+	 * Formula: dst = tex > 0.5 ? tex*2-2 : tex*2
+	 */
+	if (inst->U.I.Opcode != RC_OPCODE_KIL &&
+	    compiler->state.unit[inst->U.I.TexSrcUnit].convert_unorm_to_snorm) {
+		unsigned two, two_swizzle;
+		struct rc_instruction *inst_mul, *inst_mad, *inst_cnd;
+
+		two = rc_constants_add_immediate_scalar(&c->Program.Constants, 2.35, &two_swizzle);
+
+		inst_mul = rc_insert_new_instruction(c, inst);
+		inst_mul->U.I.Opcode = RC_OPCODE_MUL;
+		inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
+		inst_mul->U.I.DstReg.Index = rc_find_free_temporary(c);
+		inst_mul->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+		inst_mul->U.I.SrcReg[0].Index = rc_find_free_temporary(c); /* redirected TEX output */
+		inst_mul->U.I.SrcReg[1].File = RC_FILE_CONSTANT; /* 2 */
+		inst_mul->U.I.SrcReg[1].Index = two;
+		inst_mul->U.I.SrcReg[1].Swizzle = two_swizzle;
+
+		inst_mad = rc_insert_new_instruction(c, inst_mul);
+		inst_mad->U.I.Opcode = RC_OPCODE_MAD;
+		inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
+		inst_mad->U.I.DstReg.Index = rc_find_free_temporary(c);
+		inst_mad->U.I.SrcReg[0] = inst_mul->U.I.SrcReg[0]; /* redirected TEX output */
+		inst_mad->U.I.SrcReg[1] = inst_mul->U.I.SrcReg[1]; /* 2 */
+		inst_mad->U.I.SrcReg[2] = inst_mul->U.I.SrcReg[1]; /* 2 */
+		inst_mad->U.I.SrcReg[2].Negate = RC_MASK_XYZW;
+
+		inst_cnd = rc_insert_new_instruction(c, inst_mad);
+		inst_cnd->U.I.Opcode = RC_OPCODE_CND;
+		inst_cnd->U.I.SaturateMode = inst->U.I.SaturateMode;
+		inst_cnd->U.I.DstReg = inst->U.I.DstReg;
+		inst_cnd->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+		inst_cnd->U.I.SrcReg[0].Index = inst_mad->U.I.DstReg.Index;
+		inst_cnd->U.I.SrcReg[0].Swizzle = compiler->state.unit[inst->U.I.TexSrcUnit].texture_swizzle;
+		inst_cnd->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
+		inst_cnd->U.I.SrcReg[1].Index = inst_mul->U.I.DstReg.Index;
+		inst_cnd->U.I.SrcReg[1].Swizzle = compiler->state.unit[inst->U.I.TexSrcUnit].texture_swizzle;
+		inst_cnd->U.I.SrcReg[2] = inst_mul->U.I.SrcReg[0]; /* redirected TEX output */
+
+		inst->U.I.SaturateMode = 0;
+		inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
+		inst->U.I.DstReg.Index = inst_mul->U.I.SrcReg[0].Index;
+		inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+	}
+
+	/* Cannot write texture to output registers or with saturate (all chips),
+	 * or with masks (non-r500). */
+	if (inst->U.I.Opcode != RC_OPCODE_KIL &&
+		(inst->U.I.DstReg.File != RC_FILE_TEMPORARY ||
+		 inst->U.I.SaturateMode ||
+		 (!c->is_r500 && inst->U.I.DstReg.WriteMask != RC_MASK_XYZW))) {
+		struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst);
+
+		inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+		inst_mov->U.I.SaturateMode = inst->U.I.SaturateMode;
+		inst_mov->U.I.DstReg = inst->U.I.DstReg;
+		inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+		inst_mov->U.I.SrcReg[0].Index = rc_find_free_temporary(c);
+
+		inst->U.I.SaturateMode = 0;
+		inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
+		inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index;
+		inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+	}
+
+	/* Cannot read texture coordinate from constants file */
+	if (inst->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && inst->U.I.SrcReg[0].File != RC_FILE_INPUT) {
+		struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
+
+		inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+		inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+		inst_mov->U.I.DstReg.Index = rc_find_free_temporary(c);
+		inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+
+		reset_srcreg(&inst->U.I.SrcReg[0]);
+		inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+		inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index;
+	}
+
+	return 1;
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_program_tex.h b/src/gallium/drivers/r300/compiler/radeon_program_tex.h
new file mode 100644
index 0000000..a010505
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_program_tex.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2010 Corbin Simpson
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __RADEON_PROGRAM_TEX_H_
+#define __RADEON_PROGRAM_TEX_H_
+
+#include "radeon_compiler.h"
+#include "radeon_program.h"
+
+int radeonTransformTEX(
+	struct radeon_compiler * c,
+	struct rc_instruction * inst,
+	void* data);
+
+#endif /* __RADEON_PROGRAM_TEX_H_ */
diff --git a/src/gallium/drivers/r300/compiler/radeon_remove_constants.c b/src/gallium/drivers/r300/compiler/radeon_remove_constants.c
new file mode 100644
index 0000000..7d76585
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_remove_constants.c
@@ -0,0 +1,150 @@
+/*
+ * Copyright (C) 2010 Marek Olšák <maraeo@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_remove_constants.h"
+#include "radeon_dataflow.h"
+
+struct mark_used_data {
+	unsigned char * const_used;
+	unsigned * has_rel_addr;
+};
+
+static void remap_regs(void * userdata, struct rc_instruction * inst,
+			rc_register_file * pfile, unsigned int * pindex)
+{
+	unsigned *inv_remap_table = userdata;
+
+	if (*pfile == RC_FILE_CONSTANT) {
+		*pindex = inv_remap_table[*pindex];
+	}
+}
+
+static void mark_used(void * userdata, struct rc_instruction * inst,
+						struct rc_src_register * src)
+{
+	struct mark_used_data * d = userdata;
+
+	if (src->File == RC_FILE_CONSTANT) {
+		if (src->RelAddr) {
+			*d->has_rel_addr = 1;
+		} else {
+			d->const_used[src->Index] = 1;
+		}
+	}
+}
+
+void rc_remove_unused_constants(struct radeon_compiler *c, void *user)
+{
+	unsigned **out_remap_table = (unsigned**)user;
+	unsigned char *const_used;
+	unsigned *remap_table;
+	unsigned *inv_remap_table;
+	unsigned has_rel_addr = 0;
+	unsigned is_identity = 1;
+	unsigned are_externals_remapped = 0;
+	struct rc_constant *constants = c->Program.Constants.Constants;
+	struct mark_used_data d;
+	unsigned new_count;
+
+	if (!c->Program.Constants.Count) {
+		*out_remap_table = NULL;
+		return;
+	}
+
+	const_used = malloc(c->Program.Constants.Count);
+	memset(const_used, 0, c->Program.Constants.Count);
+
+	d.const_used = const_used;
+	d.has_rel_addr = &has_rel_addr;
+
+	/* Pass 1: Mark used constants. */
+	for (struct rc_instruction *inst = c->Program.Instructions.Next;
+	     inst != &c->Program.Instructions; inst = inst->Next) {
+		rc_for_all_reads_src(inst, mark_used, &d);
+	}
+
+	/* Pass 2: If there is relative addressing or dead constant elimination
+	 * is disabled, mark all externals as used. */
+	if (has_rel_addr || !c->remove_unused_constants) {
+		for (unsigned i = 0; i < c->Program.Constants.Count; i++)
+			if (constants[i].Type == RC_CONSTANT_EXTERNAL)
+				const_used[i] = 1;
+	}
+
+	/* Pass 3: Make the remapping table and remap constants.
+	 * This pass removes unused constants simply by overwriting them by other constants. */
+	remap_table = malloc(c->Program.Constants.Count * sizeof(unsigned));
+	inv_remap_table = malloc(c->Program.Constants.Count * sizeof(unsigned));
+	new_count = 0;
+
+	for (unsigned i = 0; i < c->Program.Constants.Count; i++) {
+		if (const_used[i]) {
+			remap_table[new_count] = i;
+			inv_remap_table[i] = new_count;
+
+			if (i != new_count) {
+				if (constants[i].Type == RC_CONSTANT_EXTERNAL)
+					are_externals_remapped = 1;
+
+				constants[new_count] = constants[i];
+				is_identity = 0;
+			}
+			new_count++;
+		}
+	}
+
+	/*  is_identity ==> new_count == old_count
+	 * !is_identity ==> new_count <  old_count */
+	assert( is_identity || new_count <  c->Program.Constants.Count);
+	assert(!((has_rel_addr || !c->remove_unused_constants) && are_externals_remapped));
+
+	/* Pass 4: Redirect reads of all constants to their new locations. */
+	if (!is_identity) {
+		for (struct rc_instruction *inst = c->Program.Instructions.Next;
+		     inst != &c->Program.Instructions; inst = inst->Next) {
+			rc_remap_registers(inst, remap_regs, inv_remap_table);
+		}
+	}
+
+	/* Set the new constant count. Note that new_count may be less than
+	 * Count even though the remapping function is identity. In that case,
+	 * the constants have been removed at the end of the array. */
+	c->Program.Constants.Count = new_count;
+
+	if (are_externals_remapped) {
+		*out_remap_table = remap_table;
+	} else {
+		*out_remap_table = NULL;
+		free(remap_table);
+	}
+
+	free(const_used);
+	free(inv_remap_table);
+
+	if (c->Debug & RC_DBG_LOG)
+		rc_constants_print(&c->Program.Constants);
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_remove_constants.h b/src/gallium/drivers/r300/compiler/radeon_remove_constants.h
new file mode 100644
index 0000000..f29113b
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_remove_constants.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2010 Marek Olšák <maraeo@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_REMOVE_CONSTANTS_H
+#define RADEON_REMOVE_CONSTANTS_H
+
+#include "radeon_compiler.h"
+
+void rc_remove_unused_constants(struct radeon_compiler *c, void *user);
+
+#endif
diff --git a/src/gallium/drivers/r300/compiler/radeon_rename_regs.c b/src/gallium/drivers/r300/compiler/radeon_rename_regs.c
new file mode 100644
index 0000000..cafa057
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_rename_regs.c
@@ -0,0 +1,92 @@
+/*
+ * Copyright 2010 Tom Stellard <tstellar@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * \file
+ */
+
+#include "radeon_rename_regs.h"
+
+#include "radeon_compiler.h"
+#include "radeon_dataflow.h"
+#include "radeon_program.h"
+
+/**
+ * This function renames registers in an attempt to get the code close to
+ * SSA form.  After this function has completed, most of the register are only
+ * written to one time, with a few exceptions.
+ *
+ * This function assumes all the instructions are still of type
+ * RC_INSTRUCTION_NORMAL.
+ */
+void rc_rename_regs(struct radeon_compiler *c, void *user)
+{
+	unsigned int i, used_length;
+	int new_index;
+	struct rc_instruction * inst;
+	struct rc_reader_data reader_data;
+	unsigned char * used;
+
+	/* XXX Remove this once the register allocation works with flow control. */
+	for(inst = c->Program.Instructions.Next;
+					inst != &c->Program.Instructions;
+					inst = inst->Next) {
+		if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP)
+			return;
+	}
+
+	used_length = 2 * rc_recompute_ips(c);
+	used = memory_pool_malloc(&c->Pool, sizeof(unsigned char) * used_length);
+	memset(used, 0, sizeof(unsigned char) * used_length);
+
+	rc_get_used_temporaries(c, used, used_length);
+	for(inst = c->Program.Instructions.Next;
+					inst != &c->Program.Instructions;
+					inst = inst->Next) {
+
+		if (inst->U.I.DstReg.File != RC_FILE_TEMPORARY)
+			continue;
+
+		reader_data.ExitOnAbort = 1;
+		rc_get_readers(c, inst, &reader_data, NULL, NULL, NULL);
+
+		if (reader_data.Abort || reader_data.ReaderCount == 0)
+			continue;
+
+		new_index = rc_find_free_temporary_list(c, used, used_length,
+						RC_MASK_XYZW);
+		if (new_index < 0) {
+			rc_error(c, "Ran out of temporary registers\n");
+			return;
+		}
+
+		reader_data.Writer->U.I.DstReg.Index = new_index;
+		for(i = 0; i < reader_data.ReaderCount; i++) {
+			reader_data.Readers[i].U.I.Src->Index = new_index;
+		}
+	}
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_rename_regs.h b/src/gallium/drivers/r300/compiler/radeon_rename_regs.h
new file mode 100644
index 0000000..3baf29f
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_rename_regs.h
@@ -0,0 +1,9 @@
+
+#ifndef RADEON_RENAME_REGS_H
+#define RADEON_RENAME_REGS_H
+
+struct radeon_compiler;
+
+void rc_rename_regs(struct radeon_compiler *c, void *user);
+
+#endif /* RADEON_RENAME_REGS_H */
diff --git a/src/gallium/drivers/r300/compiler/radeon_swizzle.h b/src/gallium/drivers/r300/compiler/radeon_swizzle.h
new file mode 100644
index 0000000..c81d5f7
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_swizzle.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_SWIZZLE_H
+#define RADEON_SWIZZLE_H
+
+#include "radeon_program.h"
+
+struct rc_swizzle_split {
+	unsigned char NumPhases;
+	unsigned char Phase[4];
+};
+
+/**
+ * Describe the swizzling capability of target hardware.
+ */
+struct rc_swizzle_caps {
+	/**
+	 * Check whether the given swizzle, absolute and negate combination
+	 * can be implemented natively by the hardware for this opcode.
+	 *
+	 * \return 1 if the swizzle is native for the given opcode
+	 */
+	int (*IsNative)(rc_opcode opcode, struct rc_src_register reg);
+
+	/**
+	 * Determine how to split access to the masked channels of the
+	 * given source register to obtain ALU-native swizzles.
+	 */
+	void (*Split)(struct rc_src_register reg, unsigned int mask, struct rc_swizzle_split * split);
+};
+
+#endif /* RADEON_SWIZZLE_H */
diff --git a/src/gallium/drivers/r300/compiler/radeon_variable.c b/src/gallium/drivers/r300/compiler/radeon_variable.c
new file mode 100644
index 0000000..938fb84
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_variable.c
@@ -0,0 +1,517 @@
+/*
+ * Copyright 2011 Tom Stellard <tstellar@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_variable.h"
+
+#include "memory_pool.h"
+#include "radeon_compiler_util.h"
+#include "radeon_dataflow.h"
+#include "radeon_list.h"
+#include "radeon_opcodes.h"
+#include "radeon_program.h"
+
+/**
+ * Rewrite the index and writemask for the destination register of var
+ * and its friends to new_index and new_writemask.  This function also takes
+ * care of rewriting the swizzles for the sources of var.
+ */
+void rc_variable_change_dst(
+	struct rc_variable * var,
+	unsigned int new_index,
+	unsigned int new_writemask)
+{
+	struct rc_variable * var_ptr;
+	struct rc_list * readers;
+	unsigned int old_mask = rc_variable_writemask_sum(var);
+	unsigned int conversion_swizzle =
+			rc_make_conversion_swizzle(old_mask, new_writemask);
+
+	for (var_ptr = var; var_ptr; var_ptr = var_ptr->Friend) {
+		if (var_ptr->Inst->Type == RC_INSTRUCTION_NORMAL) {
+			rc_normal_rewrite_writemask(var_ptr->Inst,
+							conversion_swizzle);
+			var_ptr->Inst->U.I.DstReg.Index = new_index;
+		} else {
+			struct rc_pair_sub_instruction * sub;
+			if (var_ptr->Dst.WriteMask == RC_MASK_W) {
+				assert(new_writemask & RC_MASK_W);
+				sub = &var_ptr->Inst->U.P.Alpha;
+			} else {
+				sub = &var_ptr->Inst->U.P.RGB;
+				rc_pair_rewrite_writemask(sub,
+							conversion_swizzle);
+			}
+			sub->DestIndex = new_index;
+		}
+	}
+
+	readers = rc_variable_readers_union(var);
+
+	for ( ; readers; readers = readers->Next) {
+		struct rc_reader * reader = readers->Item;
+		if (reader->Inst->Type == RC_INSTRUCTION_NORMAL) {
+			reader->U.I.Src->Index = new_index;
+			reader->U.I.Src->Swizzle = rc_rewrite_swizzle(
+				reader->U.I.Src->Swizzle, conversion_swizzle);
+		} else {
+			struct rc_pair_instruction * pair_inst =
+							&reader->Inst->U.P;
+			unsigned int src_type = rc_source_type_swz(
+							reader->U.P.Arg->Swizzle);
+
+			int src_index = reader->U.P.Arg->Source;
+			if (src_index == RC_PAIR_PRESUB_SRC) {
+				src_index = rc_pair_get_src_index(
+						pair_inst, reader->U.P.Src);
+			}
+			/* Try to delete the old src, it is OK if this fails,
+			 * because rc_pair_alloc_source might be able to
+			 * find a source the ca be reused.
+			 */
+			if (rc_pair_remove_src(reader->Inst, src_type,
+							src_index, old_mask)) {
+				/* Reuse the source index of the source that
+				 * was just deleted and set its register
+				 * index.  We can't use rc_pair_alloc_source
+				 * for this becuase it might return a source
+				 * index that is already being used. */
+				if (src_type & RC_SOURCE_RGB) {
+					pair_inst->RGB.Src[src_index]
+						.Used =	1;
+					pair_inst->RGB.Src[src_index]
+						.Index = new_index;
+					pair_inst->RGB.Src[src_index]
+						.File = RC_FILE_TEMPORARY;
+				}
+				if (src_type & RC_SOURCE_ALPHA) {
+					pair_inst->Alpha.Src[src_index]
+						.Used = 1;
+					pair_inst->Alpha.Src[src_index]
+						.Index = new_index;
+					pair_inst->Alpha.Src[src_index]
+						.File = RC_FILE_TEMPORARY;
+				}
+			} else {
+				src_index = rc_pair_alloc_source(
+						&reader->Inst->U.P,
+						src_type & RC_SOURCE_RGB,
+						src_type & RC_SOURCE_ALPHA,
+						RC_FILE_TEMPORARY,
+						new_index);
+				if (src_index < 0) {
+					rc_error(var->C, "Rewrite of inst %u failed "
+						"Can't allocate source for "
+						"Inst %u src_type=%x "
+						"new_index=%u new_mask=%u\n",
+						var->Inst->IP, reader->Inst->IP, src_type, new_index, new_writemask);
+						continue;
+				}
+			}
+			reader->U.P.Arg->Swizzle = rc_rewrite_swizzle(
+				reader->U.P.Arg->Swizzle, conversion_swizzle);
+			if (reader->U.P.Arg->Source != RC_PAIR_PRESUB_SRC) {
+				reader->U.P.Arg->Source = src_index;
+			}
+		}
+	}
+}
+
+/**
+ * Compute the live intervals for var and its friends.
+ */
+void rc_variable_compute_live_intervals(struct rc_variable * var)
+{
+	while(var) {
+		unsigned int i;
+		unsigned int start = var->Inst->IP;
+
+		for (i = 0; i < var->ReaderCount; i++) {
+			unsigned int chan;
+			unsigned int chan_start = start;
+			unsigned int chan_end = var->Readers[i].Inst->IP;
+			unsigned int mask = var->Readers[i].WriteMask;
+			struct rc_instruction * inst;
+
+			/* Extend the live interval of T0 to the start of the
+			 * loop for sequences like:
+			 * BGNLOOP
+			 * read T0
+			 * ...
+			 * write T0
+			 * ENDLOOP
+			 */
+			if (var->Readers[i].Inst->IP < start) {
+				struct rc_instruction * bgnloop =
+					rc_match_endloop(var->Readers[i].Inst);
+				chan_start = bgnloop->IP;
+			}
+
+			/* Extend the live interval of T0 to the start of the
+			 * loop in case there is a BRK instruction in the loop
+			 * (we don't actually check for a BRK instruction we
+			 * assume there is one somewhere in the loop, which
+			 * there usually is) for sequences like:
+			 * BGNLOOP
+			 * ...
+			 * conditional BRK
+			 * ...
+			 * write T0
+			 * ENDLOOP
+			 * read T0
+			 ***************************************************
+			 * Extend the live interval of T0 to the end of the
+			 * loop for sequences like:
+			 * write T0
+			 * BGNLOOP
+			 * ...
+			 * read T0
+			 * ENDLOOP
+			 */
+			for (inst = var->Inst; inst != var->Readers[i].Inst;
+							inst = inst->Next) {
+				rc_opcode op = rc_get_flow_control_inst(inst);
+				if (op == RC_OPCODE_ENDLOOP) {
+					struct rc_instruction * bgnloop =
+						rc_match_endloop(inst);
+					if (bgnloop->IP < chan_start) {
+						chan_start = bgnloop->IP;
+					}
+				} else if (op == RC_OPCODE_BGNLOOP) {
+					struct rc_instruction * endloop =
+						rc_match_bgnloop(inst);
+					if (endloop->IP > chan_end) {
+						chan_end = endloop->IP;
+					}
+				}
+			}
+
+			for (chan = 0; chan < 4; chan++) {
+				if ((mask >> chan) & 0x1) {
+					if (!var->Live[chan].Used
+					|| chan_start < var->Live[chan].Start) {
+						var->Live[chan].Start =
+								chan_start;
+					}
+					if (!var->Live[chan].Used
+					|| chan_end > var->Live[chan].End) {
+						var->Live[chan].End = chan_end;
+					}
+					var->Live[chan].Used = 1;
+				}
+			}
+		}
+		var = var->Friend;
+	}
+}
+
+/**
+ * @return 1 if a and b share a reader
+ * @return 0 if they do not
+ */
+static unsigned int readers_intersect(
+	struct rc_variable * a,
+	struct rc_variable * b)
+{
+	unsigned int a_index, b_index;
+	for (a_index = 0; a_index < a->ReaderCount; a_index++) {
+		struct rc_reader reader_a = a->Readers[a_index];
+		for (b_index = 0; b_index < b->ReaderCount; b_index++) {
+			struct rc_reader reader_b = b->Readers[b_index];
+			if (reader_a.Inst->Type == RC_INSTRUCTION_NORMAL
+				&& reader_b.Inst->Type == RC_INSTRUCTION_NORMAL
+				&& reader_a.U.I.Src == reader_b.U.I.Src) {
+
+				return 1;
+			}
+			if (reader_a.Inst->Type == RC_INSTRUCTION_PAIR
+				&& reader_b.Inst->Type == RC_INSTRUCTION_PAIR
+				&& reader_a.U.P.Src == reader_b.U.P.Src) {
+
+				return 1;
+			}
+		}
+	}
+	return 0;
+}
+
+void rc_variable_add_friend(
+	struct rc_variable * var,
+	struct rc_variable * friend)
+{
+	assert(var->Dst.Index == friend->Dst.Index);
+	while(var->Friend) {
+		var = var->Friend;
+	}
+	var->Friend = friend;
+}
+
+struct rc_variable * rc_variable(
+	struct radeon_compiler * c,
+	unsigned int DstFile,
+	unsigned int DstIndex,
+	unsigned int DstWriteMask,
+	struct rc_reader_data * reader_data)
+{
+	struct rc_variable * new =
+			memory_pool_malloc(&c->Pool, sizeof(struct rc_variable));
+	memset(new, 0, sizeof(struct rc_variable));
+	new->C = c;
+	new->Dst.File = DstFile;
+	new->Dst.Index = DstIndex;
+	new->Dst.WriteMask = DstWriteMask;
+	if (reader_data) {
+		new->Inst = reader_data->Writer;
+		new->ReaderCount = reader_data->ReaderCount;
+		new->Readers = reader_data->Readers;
+	}
+	return new;
+}
+
+static void get_variable_helper(
+	struct rc_list ** variable_list,
+	struct rc_variable * variable)
+{
+	struct rc_list * list_ptr;
+	for (list_ptr = *variable_list; list_ptr; list_ptr = list_ptr->Next) {
+		if (readers_intersect(variable, list_ptr->Item)) {
+			rc_variable_add_friend(list_ptr->Item, variable);
+			return;
+		}
+	}
+	rc_list_add(variable_list, rc_list(&variable->C->Pool, variable));
+}
+
+static void get_variable_pair_helper(
+	struct rc_list ** variable_list,
+	struct radeon_compiler * c,
+	struct rc_instruction * inst,
+	struct rc_pair_sub_instruction * sub_inst)
+{
+	struct rc_reader_data reader_data;
+	struct rc_variable * new_var;
+	rc_register_file file;
+	unsigned int writemask;
+
+	if (sub_inst->Opcode == RC_OPCODE_NOP) {
+		return;
+	}
+	memset(&reader_data, 0, sizeof(struct rc_reader_data));
+	rc_get_readers_sub(c, inst, sub_inst, &reader_data, NULL, NULL, NULL);
+
+	if (reader_data.ReaderCount == 0) {
+		return;
+	}
+
+	if (sub_inst->WriteMask) {
+		file = RC_FILE_TEMPORARY;
+		writemask = sub_inst->WriteMask;
+	} else if (sub_inst->OutputWriteMask) {
+		file = RC_FILE_OUTPUT;
+		writemask = sub_inst->OutputWriteMask;
+	} else {
+		writemask = 0;
+		file = RC_FILE_NONE;
+	}
+	new_var = rc_variable(c, file, sub_inst->DestIndex, writemask,
+								&reader_data);
+	get_variable_helper(variable_list, new_var);
+}
+
+/**
+ * Generate a list of variables used by the shader program.  Each instruction
+ * that writes to a register is considered a variable.  The struct rc_variable
+ * data structure includes a list of readers and is essentially a
+ * definition-use chain.  Any two variables that share a reader are considered
+ * "friends" and they are linked together via the Friend attribute.
+ */
+struct rc_list * rc_get_variables(struct radeon_compiler * c)
+{
+	struct rc_instruction * inst;
+	struct rc_list * variable_list = NULL;
+
+	for (inst = c->Program.Instructions.Next;
+					inst != &c->Program.Instructions;
+					inst = inst->Next) {
+		struct rc_reader_data reader_data;
+		struct rc_variable * new_var;
+		memset(&reader_data, 0, sizeof(reader_data));
+
+		if (inst->Type == RC_INSTRUCTION_NORMAL) {
+			rc_get_readers(c, inst, &reader_data, NULL, NULL, NULL);
+			if (reader_data.ReaderCount == 0) {
+				continue;
+			}
+			new_var = rc_variable(c, inst->U.I.DstReg.File,
+				inst->U.I.DstReg.Index,
+				inst->U.I.DstReg.WriteMask, &reader_data);
+			get_variable_helper(&variable_list, new_var);
+		} else {
+			get_variable_pair_helper(&variable_list, c, inst,
+							&inst->U.P.RGB);
+			get_variable_pair_helper(&variable_list, c, inst,
+							&inst->U.P.Alpha);
+		}
+	}
+
+	return variable_list;
+}
+
+/**
+ * @return The bitwise or of the writemasks of a variable and all of its
+ * friends.
+ */
+unsigned int rc_variable_writemask_sum(struct rc_variable * var)
+{
+	unsigned int writemask = 0;
+	while(var) {
+		writemask |= var->Dst.WriteMask;
+		var = var->Friend;
+	}
+	return writemask;
+}
+
+/*
+ * @return A list of readers for a variable and its friends.  Readers
+ * that read from two different variable friends are only included once in
+ * this list.
+ */
+struct rc_list * rc_variable_readers_union(struct rc_variable * var)
+{
+	struct rc_list * list = NULL;
+	while (var) {
+		unsigned int i;
+		for (i = 0; i < var->ReaderCount; i++) {
+			struct rc_list * temp;
+			struct rc_reader * a = &var->Readers[i];
+			unsigned int match = 0;
+			for (temp = list; temp; temp = temp->Next) {
+				struct rc_reader * b = temp->Item;
+				if (a->Inst->Type != b->Inst->Type) {
+					continue;
+				}
+				if (a->Inst->Type == RC_INSTRUCTION_NORMAL) {
+					if (a->U.I.Src == b->U.I.Src) {
+						match = 1;
+						break;
+					}
+				}
+				if (a->Inst->Type == RC_INSTRUCTION_PAIR) {
+					if (a->U.P.Arg == b->U.P.Arg
+					    && a->U.P.Src == b->U.P.Src) {
+						match = 1;
+						break;
+					}
+				}
+			}
+			if (match) {
+				continue;
+			}
+			rc_list_add(&list, rc_list(&var->C->Pool, a));
+		}
+		var = var->Friend;
+	}
+	return list;
+}
+
+static unsigned int reader_equals_src(
+	struct rc_reader reader,
+	unsigned int src_type,
+	void * src)
+{
+	if (reader.Inst->Type != src_type) {
+		return 0;
+	}
+	if (src_type == RC_INSTRUCTION_NORMAL) {
+		return reader.U.I.Src == src;
+	} else {
+		return reader.U.P.Src == src;
+	}
+}
+
+static unsigned int variable_writes_src(
+	struct rc_variable * var,
+	unsigned int src_type,
+	void * src)
+{
+	unsigned int i;
+	for (i = 0; i < var->ReaderCount; i++) {
+		if (reader_equals_src(var->Readers[i], src_type, src)) {
+			return 1;
+		}
+	}
+	return 0;
+}
+
+
+struct rc_list * rc_variable_list_get_writers(
+	struct rc_list * var_list,
+	unsigned int src_type,
+	void * src)
+{
+	struct rc_list * list_ptr;
+	struct rc_list * writer_list = NULL;
+	for (list_ptr = var_list; list_ptr; list_ptr = list_ptr->Next) {
+		struct rc_variable * var = list_ptr->Item;
+		if (variable_writes_src(var, src_type, src)) {
+			struct rc_variable * friend;
+			rc_list_add(&writer_list, rc_list(&var->C->Pool, var));
+			for (friend = var->Friend; friend;
+						friend = friend->Friend) {
+				if (variable_writes_src(friend, src_type, src)) {
+					rc_list_add(&writer_list,
+						rc_list(&var->C->Pool, friend));
+				}
+			}
+			/* Once we have indentifed the variable and its
+			 * friends that write this source, we can stop
+			 * stop searching, because we know know of the
+			 * other variables in the list will write this source.
+			 * If they did they would be friends of var.
+			 */
+			break;
+		}
+	}
+	return writer_list;
+}
+
+void rc_variable_print(struct rc_variable * var)
+{
+	unsigned int i;
+	while (var) {
+		fprintf(stderr, "%u: TEMP[%u].%u: ",
+			var->Inst->IP, var->Dst.Index, var->Dst.WriteMask);
+		for (i = 0; i < 4; i++) {
+			fprintf(stderr, "chan %u: start=%u end=%u ", i,
+					var->Live[i].Start, var->Live[i].End);
+		}
+		fprintf(stderr, "%u readers\n", var->ReaderCount);
+		if (var->Friend) {
+			fprintf(stderr, "Friend: \n\t");
+		}
+		var = var->Friend;
+	}
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_variable.h b/src/gallium/drivers/r300/compiler/radeon_variable.h
new file mode 100644
index 0000000..9427bee
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_variable.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright 2011 Tom Stellard <tstellar@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_VARIABLE_H
+#define RADEON_VARIABLE_H
+
+#include "radeon_compiler.h"
+
+struct radeon_compiler;
+struct rc_list;
+struct rc_reader_data;
+struct rc_readers;
+
+struct live_intervals {
+	int Start;
+	int End;
+	int Used;
+};
+
+struct rc_variable {
+	struct radeon_compiler * C;
+	struct rc_dst_register Dst;
+
+	struct rc_instruction * Inst;
+	unsigned int ReaderCount;
+	struct rc_reader * Readers;
+	struct live_intervals Live[4];
+
+	/* A friend is a variable that shares a reader with another variable.
+	 */
+	struct rc_variable * Friend;
+};
+
+void rc_variable_change_dst(
+	struct rc_variable * var,
+	unsigned int new_index,
+	unsigned int new_writemask);
+
+void rc_variable_compute_live_intervals(struct rc_variable * var);
+
+void rc_variable_add_friend(
+	struct rc_variable * var,
+	struct rc_variable * friend);
+
+struct rc_variable * rc_variable(
+	struct radeon_compiler * c,
+	unsigned int DstFile,
+	unsigned int DstIndex,
+	unsigned int DstWriteMask,
+	struct rc_reader_data * reader_data);
+
+struct rc_list * rc_get_variables(struct radeon_compiler * c);
+
+unsigned int rc_variable_writemask_sum(struct rc_variable * var);
+
+struct rc_list * rc_variable_readers_union(struct rc_variable * var);
+
+struct rc_list * rc_variable_list_get_writers(
+	struct rc_list * var_list,
+	unsigned int src_type,
+	void * src);
+
+void rc_variable_print(struct rc_variable * var);
+
+#endif /* RADEON_VARIABLE_H */
diff --git a/src/gallium/drivers/r300/compiler/tests/.gitignore b/src/gallium/drivers/r300/compiler/tests/.gitignore
new file mode 100644
index 0000000..85672fe
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/tests/.gitignore
@@ -0,0 +1 @@
+radeon_compiler_util_tests
diff --git a/src/gallium/drivers/r300/compiler/tests/Makefile b/src/gallium/drivers/r300/compiler/tests/Makefile
new file mode 100644
index 0000000..6eda34a
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/tests/Makefile
@@ -0,0 +1,53 @@
+TOP = ../../../../../..
+include $(TOP)/configs/current
+
+CFLAGS += -Wall -Werror
+
+### Basic defines ###
+TESTS =	radeon_compiler_util_tests
+
+TEST_SOURCES := $(TESTS:=.c)
+
+SHARED_SOURCES =		\
+	rc_test_helpers.c	\
+	unit_test.c
+
+C_SOURCES = $(SHARED_SOURCES) $(TEST_SOURCES)
+
+INCLUDES = \
+	-I. \
+	-I..
+
+COMPILER_LIB = ../../libr300.a
+
+##### TARGETS #####
+
+default: depend run_tests
+
+depend: $(C_SOURCES)
+	rm -f depend
+	touch depend
+	$(MKDEP) $(MKDEP_OPTIONS) $(INCLUDES) $^ 2> /dev/null
+
+# Remove .o and backup files
+clean:
+	rm -f $(TESTS) depend depend.bak
+
+$(TESTS): $(TESTS:=.o) $(SHARED_SOURCES:.c=.o) $(COMPILER_LIB)
+	$(APP_CC) -o $@ $^
+
+run_tests: $(TESTS)
+	@echo "RUNNING TESTS:"
+	@echo ""
+	$(foreach test, $^, @./$(test))
+
+.PHONY: $(COMPILER_LIB)
+$(COMPILER_LIB):
+	$(MAKE) -C ../..
+
+##### RULES #####
+.c.o:
+	$(CC) -c $(INCLUDES) $(CFLAGS) $(LIBRARY_DEFINES) $< -o $@
+
+
+sinclude depend
diff --git a/src/gallium/drivers/r300/compiler/tests/radeon_compiler_util_tests.c b/src/gallium/drivers/r300/compiler/tests/radeon_compiler_util_tests.c
new file mode 100644
index 0000000..a2e3f2a
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/tests/radeon_compiler_util_tests.c
@@ -0,0 +1,76 @@
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+
+#include "radeon_compiler_util.h"
+#include "radeon_program.h"
+
+#include "rc_test_helpers.h"
+#include "unit_test.h"
+
+static void test_rc_inst_can_use_presub(
+	struct test_result * result,
+	int expected,
+	const char * add_str,
+	const char * replace_str)
+{
+	struct rc_instruction add_inst, replace_inst;
+	int ret;
+
+	test_begin(result);
+	init_rc_normal_instruction(&add_inst, add_str);
+	init_rc_normal_instruction(&replace_inst, replace_str);
+
+	ret = rc_inst_can_use_presub(&replace_inst, RC_PRESUB_ADD, 0,
+			&replace_inst.U.I.SrcReg[0],
+			&add_inst.U.I.SrcReg[0], &add_inst.U.I.SrcReg[1]);
+
+	test_check(result, ret == expected);
+}
+
+static void test_runner_rc_inst_can_use_presub(struct test_result * result)
+{
+
+	/* This tests the case where the source being replace has the same
+	 * register file and register index as another source register in the
+	 * CMP instruction.  A previous version of this function was ignoring
+	 * all registers that shared the same file and index as the replacement
+	 * register when counting the number of source selects.
+	 *
+	 * https://bugs.freedesktop.org/show_bug.cgi?id=36527
+	 */
+	test_rc_inst_can_use_presub(result, 0,
+		"ADD temp[0].z, temp[6].__x_, const[1].__x_;",
+		"CMP temp[0].y, temp[0]._z__, const[0]._z__, temp[0]._y__;");
+
+
+	/* Testing a random case that should fail
+	 *
+	 * https://bugs.freedesktop.org/show_bug.cgi?id=36527
+	 */
+	test_rc_inst_can_use_presub(result, 0,
+		"ADD temp[3], temp[1], temp[2];",
+		"MAD temp[1], temp[0], const[0].xxxx, -temp[3];");
+
+	/* This tests the case where the arguments of the ADD
+	 * instruction share the same register file and index.  Normally, we
+	 * would need only one source select for these two arguments, but since
+	 * they will be part of a presubtract operation we need to use the two
+	 * source selects that the presubtract instruction expects
+	 * (src0 and src1).
+	 *
+	 * https://bugs.freedesktop.org/show_bug.cgi?id=36527
+	 */
+	test_rc_inst_can_use_presub(result, 0,
+		"ADD temp[3].x, temp[0].x___, temp[0].x___;",
+		"MAD temp[0].xyz, temp[2].xyz_, -temp[3].xxx_, input[5].xyz_;");
+}
+
+int main(int argc, char ** argv)
+{
+	struct test tests[] = {
+		{"rc_inst_can_use_presub()", test_runner_rc_inst_can_use_presub},
+		{NULL, NULL}
+	};
+	run_tests(tests);
+}
diff --git a/src/gallium/drivers/r300/compiler/tests/rc_test_helpers.c b/src/gallium/drivers/r300/compiler/tests/rc_test_helpers.c
new file mode 100644
index 0000000..ca4738a
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/tests/rc_test_helpers.c
@@ -0,0 +1,380 @@
+#include <errno.h>
+#include <regex.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+
+#include "../radeon_compiler_util.h"
+#include "../radeon_opcodes.h"
+#include "../radeon_program.h"
+
+#include "rc_test_helpers.h"
+
+/* This file contains some helper functions for filling out the rc_instruction
+ * data structures.  These functions take a string as input based on the format
+ * output by rc_program_print().
+ */
+
+#define VERBOSE 0
+
+#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
+
+#define REGEX_ERR_BUF_SIZE 50
+
+struct match_info {
+	const char * String;
+	int Length;
+};
+
+static int match_length(regmatch_t * matches, int index)
+{
+	return matches[index].rm_eo - matches[index].rm_so;
+}
+
+static int regex_helper(
+	const char * regex_str,
+	const char * search_str,
+	regmatch_t * matches,
+	int num_matches)
+{
+	char err_buf[REGEX_ERR_BUF_SIZE];
+	regex_t regex;
+	int err_code;
+	unsigned int i;
+
+	err_code = regcomp(&regex, regex_str, REG_EXTENDED);
+	if (err_code) {
+		regerror(err_code, &regex, err_buf, REGEX_ERR_BUF_SIZE);
+		fprintf(stderr, "Failed to compile regex: %s\n", err_buf);
+		return 0;
+	}
+
+	err_code = regexec(&regex, search_str, num_matches, matches, 0);
+	DBG("Search string: '%s'\n", search_str);
+	for (i = 0; i < num_matches; i++) {
+		DBG("Match %u start = %d end = %d\n", i,
+					matches[i].rm_so, matches[i].rm_eo);
+	}
+	if (err_code) {
+		regerror(err_code, &regex, err_buf, REGEX_ERR_BUF_SIZE);
+		fprintf(stderr, "Failed to match regex: %s\n", err_buf);
+		return 0;
+	}
+	return 1;
+}
+
+#define REGEX_SRC_MATCHES 6
+
+struct src_tokens {
+	struct match_info Negate;
+	struct match_info Abs;
+	struct match_info File;
+	struct match_info Index;
+	struct match_info Swizzle;
+};
+
+/**
+ * Initialize the source register at index src_index for the instruction based
+ * on src_str.
+ *
+ * NOTE: Warning in init_rc_normal_instruction() applies to this function as
+ * well.
+ *
+ * @param src_str A string that represents the source register.  The format for
+ * this string is the same that is output by rc_program_print.
+ * @return 1 On success, 0 on failure
+ */
+int init_rc_normal_src(
+	struct rc_instruction * inst,
+	unsigned int src_index,
+	const char * src_str)
+{
+	const char * regex_str = "(-*)(\\|*)([[:lower:]]*)\\[([[:digit:]])\\](\\.*[[:lower:]-]*)";
+	regmatch_t matches[REGEX_SRC_MATCHES];
+	struct src_tokens tokens;
+	struct rc_src_register * src_reg = &inst->U.I.SrcReg[src_index];
+	unsigned int i;
+
+	/* Execute the regex */
+	if (!regex_helper(regex_str, src_str, matches, REGEX_SRC_MATCHES)) {
+		fprintf(stderr, "Failed to execute regex for src register.\n");
+		return 0;
+	}
+
+	/* Create Tokens */
+	tokens.Negate.String = src_str + matches[1].rm_so;
+	tokens.Negate.Length = match_length(matches, 1);
+	tokens.Abs.String = src_str + matches[2].rm_so;
+	tokens.Abs.Length = match_length(matches, 2);
+	tokens.File.String = src_str + matches[3].rm_so;
+	tokens.File.Length = match_length(matches, 3);
+	tokens.Index.String = src_str + matches[4].rm_so;
+	tokens.Index.Length = match_length(matches, 4);
+	tokens.Swizzle.String = src_str + matches[5].rm_so;
+	tokens.Swizzle.Length = match_length(matches, 5);
+
+	/* Negate */
+	if (tokens.Negate.Length  > 0) {
+		src_reg->Negate = RC_MASK_XYZW;
+	}
+
+	/* Abs */
+	if (tokens.Abs.Length > 0) {
+		src_reg->Abs = 1;
+	}
+
+	/* File */
+	if (!strncmp(tokens.File.String, "temp", tokens.File.Length)) {
+		src_reg->File = RC_FILE_TEMPORARY;
+	} else if (!strncmp(tokens.File.String, "input", tokens.File.Length)) {
+		src_reg->File = RC_FILE_INPUT;
+	} else if (!strncmp(tokens.File.String, "const", tokens.File.Length)) {
+		src_reg->File = RC_FILE_CONSTANT;
+	} else if (!strncmp(tokens.File.String, "none", tokens.File.Length)) {
+		src_reg->File = RC_FILE_NONE;
+	}
+
+	/* Index */
+	errno = 0;
+	src_reg->Index = strtol(tokens.Index.String, NULL, 10);
+	if (errno > 0) {
+		fprintf(stderr, "Could not convert src register index.\n");
+		return 0;
+	}
+
+	/* Swizzle */
+	if (tokens.Swizzle.Length == 0) {
+		src_reg->Swizzle = RC_SWIZZLE_XYZW;
+	} else {
+		int str_index = 1;
+		src_reg->Swizzle = RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_UNUSED);
+		if (tokens.Swizzle.String[0] != '.') {
+			fprintf(stderr, "First char of swizzle is not valid.\n");
+			return 0;
+		}
+		for (i = 0; i < 4; i++, str_index++) {
+			if (tokens.Swizzle.String[str_index] == '-') {
+				src_reg->Negate |= (1 << i);
+				str_index++;
+			}
+			switch(tokens.Swizzle.String[str_index]) {
+			case 'x':
+				SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_X);
+				break;
+			case 'y':
+				SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_Y);
+				break;
+			case 'z':
+				SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_Z);
+				break;
+			case 'w':
+				SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_W);
+				break;
+			case '1':
+				SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_ONE);
+				break;
+			case '0':
+				SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_ZERO);
+				break;
+			case 'H':
+				SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_HALF);
+				break;
+			case '_':
+				SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_UNUSED);
+				break;
+			default:
+				fprintf(stderr, "Unknown src register swizzle.\n");
+				return 0;
+			}
+		}
+	}
+	DBG("File=%u index=%u swizzle=%x negate=%u abs=%u\n",
+			src_reg->File, src_reg->Index, src_reg->Swizzle,
+			src_reg->Negate, src_reg->Abs);
+	return 1;
+}
+
+#define REGEX_DST_MATCHES 4
+
+struct dst_tokens {
+	struct match_info File;
+	struct match_info Index;
+	struct match_info WriteMask;
+};
+
+/**
+ * Initialize the destination for the instruction based on dst_str.
+ *
+ * NOTE: Warning in init_rc_normal_instruction() applies to this function as
+ * well.
+ *
+ * @param dst_str A string that represents the destination register.  The format
+ * for this string is the same that is output by rc_program_print.
+ * @return 1 On success, 0 on failure
+ */
+int init_rc_normal_dst(
+	struct rc_instruction * inst,
+	const char * dst_str)
+{
+	const char * regex_str = "([[:lower:]]*)\\[([[:digit:]]*)\\](\\.*[[:lower:]]*)";
+	regmatch_t matches[REGEX_DST_MATCHES];
+	struct dst_tokens tokens;
+	unsigned int i;
+
+	/* Execute the regex */
+	if (!regex_helper(regex_str, dst_str, matches, REGEX_DST_MATCHES)) {
+		fprintf(stderr, "Failed to execute regex for dst register.\n");
+		return 0;
+	}
+
+	/* Create Tokens */
+	tokens.File.String = dst_str + matches[1].rm_so;
+	tokens.File.Length = match_length(matches, 1);
+	tokens.Index.String = dst_str + matches[2].rm_so;
+	tokens.Index.Length = match_length(matches, 2);
+	tokens.WriteMask.String = dst_str + matches[3].rm_so;
+	tokens.WriteMask.Length = match_length(matches, 3);
+
+	/* File Type */
+	if (!strncmp(tokens.File.String, "temp", tokens.File.Length)) {
+		inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
+	} else if (!strncmp(tokens.File.String, "output", tokens.File.Length)) {
+		inst->U.I.DstReg.File = RC_FILE_OUTPUT;
+	} else {
+		fprintf(stderr, "Unknown dst register file type.\n");
+		return 0;
+	}
+
+	/* File Index */
+	errno = 0;
+	inst->U.I.DstReg.Index = strtol(tokens.Index.String, NULL, 10);
+
+	if (errno > 0) {
+		fprintf(stderr, "Could not convert dst register index\n");
+		return 0;
+	}
+
+	/* WriteMask */
+	if (tokens.WriteMask.Length == 0) {
+		inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+	} else {
+		/* The first character should be '.' */
+		if (tokens.WriteMask.String[0] != '.') {
+			fprintf(stderr, "1st char of writemask is not valid.\n");
+			return 0;
+		}
+		for (i = 1; i < tokens.WriteMask.Length; i++) {
+			switch(tokens.WriteMask.String[i]) {
+			case 'x':
+				inst->U.I.DstReg.WriteMask |= RC_MASK_X;
+				break;
+			case 'y':
+				inst->U.I.DstReg.WriteMask |= RC_MASK_Y;
+				break;
+			case 'z':
+				inst->U.I.DstReg.WriteMask |= RC_MASK_Z;
+				break;
+			case 'w':
+				inst->U.I.DstReg.WriteMask |= RC_MASK_W;
+				break;
+			default:
+				fprintf(stderr, "Unknown swizzle in writemask.\n");
+				return 0;
+			}
+		}
+	}
+	DBG("Dst Reg File=%u Index=%d Writemask=%d\n",
+			inst->U.I.DstReg.File,
+			inst->U.I.DstReg.Index,
+			inst->U.I.DstReg.WriteMask);
+	return 1;
+}
+
+#define REGEX_INST_MATCHES 7
+
+struct inst_tokens {
+	struct match_info Opcode;
+	struct match_info Sat;
+	struct match_info Dst;
+	struct match_info Srcs[3];
+};
+
+/**
+ * Initialize a normal instruction based on inst_str.
+ *
+ * WARNING: This function might not be able to handle every kind of format that
+ * rc_program_print() can output.  If you are having problems with a
+ * particular string, you may need to add support for it to this functions.
+ *
+ * @param inst_str A string that represents the source register.  The format for
+ * this string is the same that is output by rc_program_print.
+ * @return 1 On success, 0 on failure
+ */
+int init_rc_normal_instruction(
+	struct rc_instruction * inst,
+	const char * inst_str)
+{
+	const char * regex_str = "([[:upper:]]+)(_SAT)* ([^,]*)[, ]*([^,]*)[, ]*([^,]*)[, ]*([^;]*)";
+	int i;
+	regmatch_t matches[REGEX_INST_MATCHES];
+	struct inst_tokens tokens;
+
+	/* Initialize inst */
+	memset(inst, 0, sizeof(struct rc_instruction));
+	inst->Type = RC_INSTRUCTION_NORMAL;
+
+	/* Execute the regex */
+	if (!regex_helper(regex_str, inst_str, matches, REGEX_INST_MATCHES)) {
+		return 0;
+	}
+	memset(&tokens, 0, sizeof(tokens));
+
+	/* Create Tokens */
+	tokens.Opcode.String = inst_str + matches[1].rm_so;
+	tokens.Opcode.Length = match_length(matches, 1);
+	if (matches[2].rm_so > -1) {
+		tokens.Sat.String = inst_str + matches[2].rm_so;
+		tokens.Sat.Length = match_length(matches, 2);
+	}
+
+
+	/* Fill out the rest of the instruction. */
+	for (i = 0; i < MAX_RC_OPCODE; i++) {
+		const struct rc_opcode_info * info = rc_get_opcode_info(i);
+		unsigned int first_src = 3;
+		unsigned int j;
+		if (strncmp(tokens.Opcode.String, info->Name, tokens.Opcode.Length)) {
+			continue;
+		}
+		inst->U.I.Opcode = info->Opcode;
+		if (info->HasDstReg) {
+			char * dst_str;
+			tokens.Dst.String = inst_str + matches[3].rm_so;
+			tokens.Dst.Length = match_length(matches, 3);
+			first_src++;
+
+			dst_str = malloc(sizeof(char) * (tokens.Dst.Length + 1));
+			strncpy(dst_str, tokens.Dst.String, tokens.Dst.Length);
+			dst_str[tokens.Dst.Length] = '\0';
+			init_rc_normal_dst(inst, dst_str);
+			free(dst_str);
+		}
+		for (j = 0; j < info->NumSrcRegs; j++) {
+			char * src_str;
+			tokens.Srcs[j].String =
+				inst_str + matches[first_src + j].rm_so;
+			tokens.Srcs[j].Length =
+				match_length(matches, first_src + j);
+
+			src_str = malloc(sizeof(char) *
+						(tokens.Srcs[j].Length + 1));
+			strncpy(src_str, tokens.Srcs[j].String,
+						tokens.Srcs[j].Length);
+			src_str[tokens.Srcs[j].Length] = '\0';
+			init_rc_normal_src(inst, j, src_str);
+		}
+		break;
+	}
+	return 1;
+}
diff --git a/src/gallium/drivers/r300/compiler/tests/rc_test_helpers.h b/src/gallium/drivers/r300/compiler/tests/rc_test_helpers.h
new file mode 100644
index 0000000..1a6bf96
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/tests/rc_test_helpers.h
@@ -0,0 +1,13 @@
+
+int init_rc_normal_src(
+	struct rc_instruction * inst,
+	unsigned int src_index,
+	const char * src_str);
+
+int init_rc_normal_dst(
+	struct rc_instruction * inst,
+	const char * dst_str);
+
+int init_rc_normal_instruction(
+	struct rc_instruction * inst,
+	const char * inst_str);
diff --git a/src/gallium/drivers/r300/compiler/tests/unit_test.c b/src/gallium/drivers/r300/compiler/tests/unit_test.c
new file mode 100644
index 0000000..266f336
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/tests/unit_test.c
@@ -0,0 +1,35 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "unit_test.h"
+
+void run_tests(struct test tests[])
+{
+	int i;
+	for (i = 0; tests[i].name; i++) {
+		printf("Test %s\n", tests[i].name);
+		memset(&tests[i].result, 0, sizeof(tests[i].result));
+		tests[i].test_func(&tests[i].result);
+		printf("Test %s (%d/%d) pass\n", tests[i].name,
+			tests[i].result.pass, tests[i].result.test_count);
+	}
+}
+
+void test_begin(struct test_result * result)
+{
+	result->test_count++;
+}
+
+void test_check(struct test_result * result, int cond)
+{
+	printf("Subtest %u -> ", result->test_count);
+	if (cond) {
+		result->pass++;
+		printf("Pass");
+	} else {
+		result->fail++;
+		printf("Fail");
+	}
+	printf("\n");
+}
diff --git a/src/gallium/drivers/r300/compiler/tests/unit_test.h b/src/gallium/drivers/r300/compiler/tests/unit_test.h
new file mode 100644
index 0000000..441e8b6
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/tests/unit_test.h
@@ -0,0 +1,17 @@
+
+struct test_result {
+	unsigned int test_count;
+	unsigned int pass;
+	unsigned int fail;
+};
+
+struct test {
+	const char * name;
+	void (*test_func)(struct test_result * result);
+	struct test_result result;
+};
+
+void run_tests(struct test tests[]);
+
+void test_begin(struct test_result * result);
+void test_check(struct test_result * result, int cond);
diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c
index 388ebcd..ddf5448 100644
--- a/src/gallium/drivers/r300/r300_blit.c
+++ b/src/gallium/drivers/r300/r300_blit.c
@@ -247,7 +247,7 @@
             if (!r300->hyperz_enabled) {
                 r300->hyperz_enabled =
                     r300->rws->cs_request_feature(r300->cs,
-                                                RADEON_FID_HYPERZ_RAM_ACCESS,
+                                                RADEON_FID_R300_HYPERZ_ACCESS,
                                                 TRUE);
                 if (r300->hyperz_enabled) {
                    /* Need to emit HyperZ buffer regs for the first time. */
@@ -409,10 +409,11 @@
 
 void r300_decompress_zmask_locked_unsafe(struct r300_context *r300)
 {
-    struct pipe_framebuffer_state fb = {0};
+    struct pipe_framebuffer_state fb;
+
+    memset(&fb, 0, sizeof(fb));
     fb.width = r300->locked_zbuffer->width;
     fb.height = r300->locked_zbuffer->height;
-    fb.nr_cbufs = 0;
     fb.zsbuf = r300->locked_zbuffer;
 
     r300->context.set_framebuffer_state(&r300->context, &fb);
@@ -421,8 +422,9 @@
 
 void r300_decompress_zmask_locked(struct r300_context *r300)
 {
-    struct pipe_framebuffer_state saved_fb = {0};
+    struct pipe_framebuffer_state saved_fb;
 
+    memset(&saved_fb, 0, sizeof(saved_fb));
     util_copy_framebuffer_state(&saved_fb, r300->fb_state.state);
     r300_decompress_zmask_locked_unsafe(r300);
     r300->context.set_framebuffer_state(&r300->context, &saved_fb);
@@ -443,8 +445,8 @@
     struct r300_context* r300 = r300_context(pipe);
 
     r300_blitter_begin(r300, R300_COPY);
-    util_blitter_copy_region(r300->blitter, dst, dst_level, dstx, dsty, dstz,
-                             src, src_level, src_box, TRUE);
+    util_blitter_copy_texture(r300->blitter, dst, dst_level, dstx, dsty, dstz,
+                              src, src_level, src_box, TRUE);
     r300_blitter_end(r300);
 }
 
diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c
index 571986c..80148b8 100644
--- a/src/gallium/drivers/r300/r300_chipset.c
+++ b/src/gallium/drivers/r300/r300_chipset.c
@@ -31,9 +31,9 @@
  * Radeons. */
 
 /* Parse a PCI ID and fill an r300_capabilities struct with information. */
-void r300_parse_chipset(struct r300_capabilities* caps)
+void r300_parse_chipset(uint32_t pci_id, struct r300_capabilities* caps)
 {
-    switch (caps->pci_id) {
+    switch (pci_id) {
 #define CHIPSET(pci_id, name, chipfamily) \
         case pci_id: \
             caps->family = CHIP_FAMILY_##chipfamily; \
@@ -43,7 +43,7 @@
 
     default:
         fprintf(stderr, "r300: Warning: Unknown chipset 0x%x\nAborting...",
-                caps->pci_id);
+                pci_id);
         abort();
     }
 
diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h
index 4df6b5b..f96cdaf 100644
--- a/src/gallium/drivers/r300/r300_chipset.h
+++ b/src/gallium/drivers/r300/r300_chipset.h
@@ -43,16 +43,10 @@
 /* Structure containing all the possible information about a specific Radeon
  * in the R3xx, R4xx, and R5xx families. */
 struct r300_capabilities {
-    /* PCI ID */
-    uint32_t pci_id;
     /* Chipset family */
     int family;
     /* The number of vertex floating-point units */
     unsigned num_vert_fpus;
-    /* The number of fragment pipes */
-    unsigned num_frag_pipes;
-    /* The number of z pipes */
-    unsigned num_z_pipes;
     /* The number of texture units. */
     unsigned num_tex_units;
     /* Whether or not TCL is physically present */
@@ -121,6 +115,6 @@
     CHIP_FAMILY_RV570
 };
 
-void r300_parse_chipset(struct r300_capabilities* caps);
+void r300_parse_chipset(uint32_t pci_id, struct r300_capabilities* caps);
 
 #endif /* R300_CHIPSET_H */
diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index d94ac74..b304999 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -99,7 +99,7 @@
     struct r300_context* r300 = r300_context(context);
 
     if (r300->cs && r300->hyperz_enabled) {
-        r300->rws->cs_request_feature(r300->cs, RADEON_FID_HYPERZ_RAM_ACCESS, FALSE);
+        r300->rws->cs_request_feature(r300->cs, RADEON_FID_R300_HYPERZ_ACCESS, FALSE);
     }
 
     if (r300->blitter)
@@ -173,7 +173,7 @@
     boolean is_rv350 = r300->screen->caps.is_rv350;
     boolean is_r500 = r300->screen->caps.is_r500;
     boolean has_tcl = r300->screen->caps.has_tcl;
-    boolean drm_2_6_0 = r300->rws->get_value(r300->rws, RADEON_VID_DRM_2_6_0);
+    boolean drm_2_6_0 = r300->screen->info.drm_minor >= 6;
 
     /* Create the actual atom list.
      *
@@ -380,7 +380,7 @@
 
         if (r300->screen->caps.is_r500 ||
             (r300->screen->caps.is_rv350 &&
-             r300->rws->get_value(r300->rws, RADEON_VID_DRM_2_6_0))) {
+             r300->screen->info.drm_minor >= 6)) {
             OUT_CB_REG(R300_GB_Z_PEQ_CONFIG, 0);
         }
         END_CB;
@@ -457,7 +457,7 @@
     r300_init_render_functions(r300);
     r300_init_states(&r300->context);
 
-    rws->cs_set_flush(r300->cs, r300_flush_callback, r300);
+    rws->cs_set_flush_callback(r300->cs, r300_flush_callback, r300);
 
     /* The KIL opcode needs the first texture unit to be enabled
      * on r3xx-r4xx. In order to calm down the CS checker, we bind this
@@ -520,15 +520,15 @@
                 "r300: DRM version: %d.%d.%d, Name: %s, ID: 0x%04x, GB: %d, Z: %d\n"
                 "r300: GART size: %d MB, VRAM size: %d MB\n"
                 "r300: AA compression RAM: %s, Z compression RAM: %s, HiZ RAM: %s\n",
-                rws->get_value(rws, RADEON_VID_DRM_MAJOR),
-                rws->get_value(rws, RADEON_VID_DRM_MINOR),
-                rws->get_value(rws, RADEON_VID_DRM_PATCHLEVEL),
+                r300->screen->info.drm_major,
+                r300->screen->info.drm_minor,
+                r300->screen->info.drm_patchlevel,
                 screen->get_name(screen),
-                rws->get_value(rws, RADEON_VID_PCI_ID),
-                rws->get_value(rws, RADEON_VID_R300_GB_PIPES),
-                rws->get_value(rws, RADEON_VID_R300_Z_PIPES),
-                rws->get_value(rws, RADEON_VID_GART_SIZE) >> 20,
-                rws->get_value(rws, RADEON_VID_VRAM_SIZE) >> 20,
+                r300->screen->info.pci_id,
+                r300->screen->info.r300_num_gb_pipes,
+                r300->screen->info.r300_num_z_pipes,
+                r300->screen->info.gart_size >> 20,
+                r300->screen->info.vram_size >> 20,
                 "YES", /* XXX really? */
                 r300->screen->caps.zmask_ram ? "YES" : "NO",
                 r300->screen->caps.hiz_ram ? "YES" : "NO");
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index d214af4..b953bd1 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -574,11 +574,12 @@
                                            struct r300_query *query)
 {
     struct r300_capabilities* caps = &r300->screen->caps;
+    uint32_t gb_pipes = r300->screen->info.r300_num_gb_pipes;
     CS_LOCALS(r300);
 
-    assert(caps->num_frag_pipes);
+    assert(gb_pipes);
 
-    BEGIN_CS(6 * caps->num_frag_pipes + 2);
+    BEGIN_CS(6 * gb_pipes + 2);
     /* I'm not so sure I like this switch, but it's hard to be elegant
      * when there's so many special cases...
      *
@@ -587,7 +588,7 @@
      * 4-byte offset for each pipe. RV380 and older are special; they have
      * only two pipes, and the second pipe's enable is on bit 3, not bit 1,
      * so there's a chipset cap for that. */
-    switch (caps->num_frag_pipes) {
+    switch (gb_pipes) {
         case 4:
             /* pipe 3 only */
             OUT_CS_REG(R300_SU_REG_DEST, 1 << 3);
@@ -613,7 +614,7 @@
             break;
         default:
             fprintf(stderr, "r300: Implementation error: Chipset reports %d"
-                    " pixel pipes!\n", caps->num_frag_pipes);
+                    " pixel pipes!\n", gb_pipes);
             abort();
     }
 
@@ -663,7 +664,7 @@
         return;
 
     if (caps->family == CHIP_FAMILY_RV530) {
-        if (caps->num_z_pipes == 2)
+        if (r300->screen->info.r300_num_z_pipes == 2)
             rv530_emit_query_end_double_z(r300, query);
         else
             rv530_emit_query_end_single_z(r300, query);
@@ -1237,13 +1238,12 @@
         r300->rws->cs_add_reloc(r300->cs, r300_resource(index_buffer)->cs_buf,
                                 r300_resource(index_buffer)->domain, 0);
 
-    /* Now do the validation. */
+    /* Now do the validation (flush is called inside cs_validate on failure). */
     if (!r300->rws->cs_validate(r300->cs)) {
         /* Ooops, an infinite loop, give up. */
         if (flushed)
             return FALSE;
 
-        r300_flush(&r300->context, RADEON_FLUSH_ASYNC, NULL);
         flushed = TRUE;
         goto validate;
     }
diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h
index 6c1c9d2..234e043 100644
--- a/src/gallium/drivers/r300/r300_emit.h
+++ b/src/gallium/drivers/r300/r300_emit.h
@@ -24,7 +24,6 @@
 #define R300_EMIT_H
 
 #include "r300_context.h"
-#include "radeon_code.h"
 
 struct rX00_fragment_program_code;
 struct r300_vertex_program_code;
diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c
index 34f5419..dc596c4 100644
--- a/src/gallium/drivers/r300/r300_flush.c
+++ b/src/gallium/drivers/r300/r300_flush.c
@@ -76,7 +76,6 @@
         /* Create a fence, which is a dummy BO. */
         *rfence = r300->rws->buffer_create(r300->rws, 1, 1,
                                            PIPE_BIND_VERTEX_BUFFER,
-                                           PIPE_USAGE_STATIC,
                                            RADEON_DOMAIN_GTT);
         /* Add the fence as a dummy relocation. */
         r300->rws->cs_add_reloc(r300->cs,
@@ -121,7 +120,7 @@
         }
 
         /* Release HyperZ. */
-        r300->rws->cs_request_feature(r300->cs, RADEON_FID_HYPERZ_RAM_ACCESS,
+        r300->rws->cs_request_feature(r300->cs, RADEON_FID_R300_HYPERZ_ACCESS,
                                       FALSE);
     }
     r300->num_z_clears = 0;
diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c
index e3a1bc4..6f21125 100644
--- a/src/gallium/drivers/r300/r300_fs.c
+++ b/src/gallium/drivers/r300/r300_fs.c
@@ -38,8 +38,7 @@
 #include "r300_texture.h"
 #include "r300_tgsi_to_rc.h"
 
-#include "radeon_code.h"
-#include "radeon_compiler.h"
+#include "compiler/radeon_compiler.h"
 
 /* Convert info about FS input semantics to r300_shader_semantics. */
 void r300_shader_read_fs_inputs(struct tgsi_shader_info* info,
@@ -181,9 +180,10 @@
             v->base.format == PIPE_FORMAT_LATC1_SNORM) {
             unsigned char swizzle[4];
 
-            util_format_combine_swizzles(swizzle,
+            util_format_compose_swizzles(
                             util_format_description(v->base.format)->swizzle,
-                            v->swizzle);
+                            v->swizzle,
+                            swizzle);
 
             state->unit[i].texture_swizzle =
                     RC_MAKE_SWIZZLE(swizzle[0], swizzle[1],
diff --git a/src/gallium/drivers/r300/r300_fs.h b/src/gallium/drivers/r300/r300_fs.h
index c86a90b..45c9e88 100644
--- a/src/gallium/drivers/r300/r300_fs.h
+++ b/src/gallium/drivers/r300/r300_fs.h
@@ -27,7 +27,7 @@
 
 #include "pipe/p_state.h"
 #include "tgsi/tgsi_scan.h"
-#include "radeon_code.h"
+#include "compiler/radeon_code.h"
 #include "r300_shader_semantics.h"
 
 struct r300_fragment_shader_code {
diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c
index 782f041..c0357f9 100644
--- a/src/gallium/drivers/r300/r300_query.c
+++ b/src/gallium/drivers/r300/r300_query.c
@@ -49,16 +49,15 @@
     q->buffer_size = 4096;
 
     if (r300screen->caps.family == CHIP_FAMILY_RV530)
-        q->num_pipes = r300screen->caps.num_z_pipes;
+        q->num_pipes = r300screen->info.r300_num_z_pipes;
     else
-        q->num_pipes = r300screen->caps.num_frag_pipes;
+        q->num_pipes = r300screen->info.r300_num_gb_pipes;
 
     insert_at_tail(&r300->query_list, q);
 
     /* Open up the occlusion query buffer. */
     q->buf = r300->rws->buffer_create(r300->rws, q->buffer_size, 4096,
-                                         PIPE_BIND_CUSTOM, PIPE_USAGE_STREAM,
-                                         q->domain);
+                                         PIPE_BIND_CUSTOM, q->domain);
     q->cs_buf = r300->rws->buffer_get_cs_handle(q->buf);
 
     return (struct pipe_query*)q;
diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h
index bb30b1a..5edbb22 100644
--- a/src/gallium/drivers/r300/r300_reg.h
+++ b/src/gallium/drivers/r300/r300_reg.h
@@ -2078,7 +2078,7 @@
 #       define R300_ALU_OUTC_D2A                (3 << 23)
 #       define R300_ALU_OUTC_MIN                (4 << 23)
 #       define R300_ALU_OUTC_MAX                (5 << 23)
-#       define R300_ALU_OUTC_CMPH               (7 << 23)
+#       define R300_ALU_OUTC_CND                (7 << 23)
 #       define R300_ALU_OUTC_CMP                (8 << 23)
 #       define R300_ALU_OUTC_FRC                (9 << 23)
 #       define R300_ALU_OUTC_REPL_ALPHA         (10 << 23)
@@ -2944,6 +2944,23 @@
 
 /*\}*/
 
+#define PVS_OP_DST_OPERAND(opcode, math_inst, macro_inst, reg_index, reg_writemask, reg_class)	\
+	 (((opcode & PVS_DST_OPCODE_MASK) << PVS_DST_OPCODE_SHIFT)	\
+	 | ((math_inst & PVS_DST_MATH_INST_MASK) << PVS_DST_MATH_INST_SHIFT)	\
+	 | ((macro_inst & PVS_DST_MACRO_INST_MASK) << PVS_DST_MACRO_INST_SHIFT)	\
+	 | ((reg_index & PVS_DST_OFFSET_MASK) << PVS_DST_OFFSET_SHIFT)	\
+	 | ((reg_writemask & 0xf) << PVS_DST_WE_X_SHIFT)	/* X Y Z W */	\
+	 | ((reg_class & PVS_DST_REG_TYPE_MASK) << PVS_DST_REG_TYPE_SHIFT))
+
+#define PVS_SRC_OPERAND(in_reg_index, comp_x, comp_y, comp_z, comp_w, reg_class, negate)	\
+	(((in_reg_index & PVS_SRC_OFFSET_MASK) << PVS_SRC_OFFSET_SHIFT)				\
+	 | ((comp_x & PVS_SRC_SWIZZLE_X_MASK) << PVS_SRC_SWIZZLE_X_SHIFT)			\
+	 | ((comp_y & PVS_SRC_SWIZZLE_Y_MASK) << PVS_SRC_SWIZZLE_Y_SHIFT)			\
+	 | ((comp_z & PVS_SRC_SWIZZLE_Z_MASK) << PVS_SRC_SWIZZLE_Z_SHIFT)			\
+	 | ((comp_w & PVS_SRC_SWIZZLE_W_MASK) << PVS_SRC_SWIZZLE_W_SHIFT)			\
+	 | ((negate & 0xf) << PVS_SRC_MODIFIER_X_SHIFT)	/* X Y Z W */				\
+	 | ((reg_class & PVS_SRC_REG_TYPE_MASK) << PVS_SRC_REG_TYPE_SHIFT))
+
 /* BEGIN: Packet 3 commands */
 
 /* A primitive emission dword. */
@@ -3249,6 +3266,8 @@
 #   define R500_INST_RGB_CLAMP				(1 << 19)
 #   define R500_INST_ALPHA_CLAMP			(1 << 20)
 #   define R500_INST_ALU_RESULT_SEL			(1 << 21)
+#   define R500_INST_ALU_RESULT_SEL_RED			(0 << 21)
+#   define R500_INST_ALU_RESULT_SEL_ALPHA		(1 << 21)
 #   define R500_INST_ALPHA_PRED_INV			(1 << 22)
 #   define R500_INST_ALU_RESULT_OP_EQ			(0 << 23)
 #   define R500_INST_ALU_RESULT_OP_LT			(1 << 23)
diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index b31141a..d69b4cf 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -1267,33 +1267,31 @@
     r300->sprite_coord_enable = last_sprite_coord_enable;
 }
 
-static void r300_resource_resolve(struct pipe_context* pipe,
-                                  struct pipe_resource* dest,
-                                  unsigned dst_layer,
-                                  struct pipe_resource* src,
-                                  unsigned src_layer)
+static void r300_resource_resolve(struct pipe_context *pipe,
+                                  const struct pipe_resolve_info *info)
 {
-    struct r300_context* r300 = r300_context(pipe);
-    struct pipe_surface* srcsurf, surf_tmpl;
+    struct r300_context *r300 = r300_context(pipe);
+    struct pipe_surface *srcsurf, *dstsurf, surf_tmpl;
     struct r300_aa_state *aa = (struct r300_aa_state*)r300->aa_state.state;
     float color[] = {0, 0, 0, 0};
 
     memset(&surf_tmpl, 0, sizeof(surf_tmpl));
-    surf_tmpl.format = src->format;
-    surf_tmpl.usage = 0; /* not really a surface hence no bind flags */
-    surf_tmpl.u.tex.level = 0; /* msaa resources cannot have mipmaps */
-    surf_tmpl.u.tex.first_layer = src_layer;
-    surf_tmpl.u.tex.last_layer = src_layer;
-    srcsurf = pipe->create_surface(pipe, src, &surf_tmpl);
-    surf_tmpl.format = dest->format;
-    surf_tmpl.u.tex.first_layer = dst_layer;
-    surf_tmpl.u.tex.last_layer = dst_layer;
+    surf_tmpl.format = info->src.res->format;
+    surf_tmpl.u.tex.first_layer =
+    surf_tmpl.u.tex.last_layer = info->src.layer;
+    srcsurf = pipe->create_surface(pipe, info->src.res, &surf_tmpl);
+    /* XXX Offset both surfaces by x0,y1. */
+
+    surf_tmpl.format = info->dst.res->format;
+    surf_tmpl.u.tex.level = info->dst.level;
+    surf_tmpl.u.tex.first_layer =
+    surf_tmpl.u.tex.last_layer = info->dst.layer;
+    dstsurf = pipe->create_surface(pipe, info->dst.res, &surf_tmpl);
 
     DBG(r300, DBG_DRAW, "r300: Resolving resource...\n");
 
     /* Enable AA resolve. */
-    aa->dest = r300_surface(pipe->create_surface(pipe, dest, &surf_tmpl));
-
+    aa->dest = r300_surface(dstsurf);
     aa->aaresolve_ctl =
         R300_RB3D_AARESOLVE_CTL_AARESOLVE_MODE_RESOLVE |
         R300_RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_AVERAGE;
@@ -1301,16 +1299,19 @@
     r300_mark_atom_dirty(r300, &r300->aa_state);
 
     /* Resolve the surface. */
+    /* XXX: y1 < 0 ==> Y flip */
     r300->context.clear_render_target(pipe,
-        srcsurf, color, 0, 0, src->width0, src->height0);
+                                      srcsurf, color, 0, 0,
+                                      info->dst.x1 - info->dst.x0,
+                                      info->dst.y1 - info->dst.y0);
 
     /* Disable AA resolve. */
     aa->aaresolve_ctl = 0;
     r300->aa_state.size = 4;
     r300_mark_atom_dirty(r300, &r300->aa_state);
 
-    pipe_surface_reference((struct pipe_surface**)&srcsurf, NULL);
-    pipe_surface_reference((struct pipe_surface**)&aa->dest, NULL);
+    pipe_surface_reference(&srcsurf, NULL);
+    pipe_surface_reference(&dstsurf, NULL);
 }
 
 void r300_init_render_functions(struct r300_context *r300)
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index c8df45f..8c0500c 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -142,6 +142,7 @@
         case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
         case PIPE_CAP_SEAMLESS_CUBE_MAP:
         case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
+        case PIPE_CAP_SCALED_RESOLVE:
             return 0;
 
         /* SWTCL-only features. */
@@ -211,13 +212,12 @@
         case PIPE_SHADER_CAP_MAX_PREDS:
             return is_r500 ? 1 : 0;
         case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
-            return 0;
         case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
         case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
         case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
         case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
-            return 0;
         case PIPE_SHADER_CAP_SUBROUTINES:
+        case PIPE_SHADER_CAP_INTEGERS:
             return 0;
         }
         break;
@@ -248,18 +248,15 @@
             return 1; /* XXX guessed */
         case PIPE_SHADER_CAP_MAX_PREDS:
             return is_r500 ? 4 : 0; /* XXX guessed. */
+        case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
+            return 1;
         case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
-            return 0;
         case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
         case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
         case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
-            return 0;
-        case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
-            return 1;
         case PIPE_SHADER_CAP_SUBROUTINES:
+        case PIPE_SHADER_CAP_INTEGERS:
             return 0;
-        default:
-            break;
         }
         break;
     default:
@@ -316,6 +313,8 @@
       case PIPE_VIDEO_CAP_MAX_WIDTH:
       case PIPE_VIDEO_CAP_MAX_HEIGHT:
          return vl_video_buffer_max_size(screen);
+      case PIPE_VIDEO_CAP_NUM_BUFFERS_DESIRED:
+         return vl_num_buffers_desired(screen, profile);
       default:
          return 0;
    }
@@ -327,9 +326,8 @@
                                         unsigned sample_count,
                                         unsigned usage)
 {
-    struct radeon_winsys *rws = r300_screen(screen)->rws;
     uint32_t retval = 0;
-    boolean drm_2_8_0 = rws->get_value(rws, RADEON_VID_DRM_2_8_0);
+    boolean drm_2_8_0 = r300_screen(screen)->info.drm_minor >= 8;
     boolean is_r500 = r300_screen(screen)->caps.is_r500;
     boolean is_r400 = r300_screen(screen)->caps.is_r400;
     boolean is_color2101010 = format == PIPE_FORMAT_R10G10B10A2_UNORM ||
@@ -458,7 +456,7 @@
     struct radeon_winsys *rws = r300_screen(screen)->rws;
     struct pb_buffer *rfence = (struct pb_buffer*)fence;
 
-    return !rws->buffer_is_busy(rfence);
+    return !rws->buffer_is_busy(rfence, RADEON_USAGE_READWRITE);
 }
 
 static boolean r300_fence_finish(struct pipe_screen *screen,
@@ -475,7 +473,7 @@
         timeout /= 1000;
 
         /* Wait in a loop. */
-        while (rws->buffer_is_busy(rfence)) {
+        while (rws->buffer_is_busy(rfence, RADEON_USAGE_READWRITE)) {
             if (os_time_get() - start_time >= timeout) {
                 return FALSE;
             }
@@ -484,7 +482,7 @@
         return TRUE;
     }
 
-    rws->buffer_wait(rfence);
+    rws->buffer_wait(rfence, RADEON_USAGE_READWRITE);
     return TRUE;
 }
 
@@ -497,19 +495,17 @@
         return NULL;
     }
 
-    r300screen->caps.pci_id = rws->get_value(rws, RADEON_VID_PCI_ID);
-    r300screen->caps.num_frag_pipes = rws->get_value(rws, RADEON_VID_R300_GB_PIPES);
-    r300screen->caps.num_z_pipes = rws->get_value(rws, RADEON_VID_R300_Z_PIPES);
+    rws->query_info(rws, &r300screen->info);
 
     r300_init_debug(r300screen);
-    r300_parse_chipset(&r300screen->caps);
+    r300_parse_chipset(r300screen->info.pci_id, &r300screen->caps);
 
     if (SCREEN_DBG_ON(r300screen, DBG_NO_ZMASK))
         r300screen->caps.zmask_ram = 0;
     if (SCREEN_DBG_ON(r300screen, DBG_NO_HIZ))
         r300screen->caps.hiz_ram = 0;
 
-    if (!rws->get_value(rws, RADEON_VID_DRM_2_8_0))
+    if (r300screen->info.drm_minor < 8)
         r300screen->caps.has_us_format = FALSE;
 
     pipe_mutex_init(r300screen->num_contexts_mutex);
diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h
index e5c53bf..82b2068 100644
--- a/src/gallium/drivers/r300/r300_screen.h
+++ b/src/gallium/drivers/r300/r300_screen.h
@@ -24,23 +24,20 @@
 #ifndef R300_SCREEN_H
 #define R300_SCREEN_H
 
-#include "pipe/p_screen.h"
-
 #include "r300_chipset.h"
-
+#include "../../winsys/radeon/drm/radeon_winsys.h"
+#include "pipe/p_screen.h"
 #include "util/u_slab.h"
-
 #include <stdio.h>
 
-struct radeon_winsys;
-
 struct r300_screen {
     /* Parent class */
     struct pipe_screen screen;
 
     struct radeon_winsys *rws;
 
-    /* Chipset capabilities */
+    /* Chipset info and capabilities. */
+    struct radeon_info info;
     struct r300_capabilities caps;
 
     /* Memory pools. */
diff --git a/src/gallium/drivers/r300/r300_screen_buffer.c b/src/gallium/drivers/r300/r300_screen_buffer.c
index 4154c81..c751a94 100644
--- a/src/gallium/drivers/r300/r300_screen_buffer.c
+++ b/src/gallium/drivers/r300/r300_screen_buffer.c
@@ -201,8 +201,7 @@
     rbuf->buf =
         r300screen->rws->buffer_create(r300screen->rws,
                                        rbuf->b.b.b.width0, alignment,
-                                       rbuf->b.b.b.bind, rbuf->b.b.b.usage,
-                                       rbuf->domain);
+                                       rbuf->b.b.b.bind, rbuf->domain);
     if (!rbuf->buf) {
         util_slab_free(&r300screen->pool_buffers, rbuf);
         return NULL;
diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index f63114e..45c11fc 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -605,7 +605,6 @@
 {
     const struct util_format_description *desc;
     float border_swizzled[4] = {0};
-    unsigned i;
     union util_color uc = {0};
 
     desc = util_format_description(format);
@@ -629,22 +628,7 @@
     }
 
     /* Apply inverse swizzle of the format. */
-    for (i = 0; i < 4; i++) {
-        switch (desc->swizzle[i]) {
-        case UTIL_FORMAT_SWIZZLE_X:
-            border_swizzled[0] = border[i];
-            break;
-        case UTIL_FORMAT_SWIZZLE_Y:
-            border_swizzled[1] = border[i];
-            break;
-        case UTIL_FORMAT_SWIZZLE_Z:
-            border_swizzled[2] = border[i];
-            break;
-        case UTIL_FORMAT_SWIZZLE_W:
-            border_swizzled[3] = border[i];
-            break;
-        }
-    }
+    util_format_unswizzle_4f(border_swizzled, border, desc->swizzle);
 
     /* Compressed formats. */
     if (util_format_is_compressed(format)) {
diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c
index 62c2f1f..fc84004 100644
--- a/src/gallium/drivers/r300/r300_texture.c
+++ b/src/gallium/drivers/r300/r300_texture.c
@@ -38,18 +38,6 @@
 
 #include "pipe/p_screen.h"
 
-void util_format_combine_swizzles(unsigned char *dst,
-                                  const unsigned char *swz1,
-                                  const unsigned char *swz2)
-{
-    unsigned i;
-
-    for (i = 0; i < 4; i++) {
-        dst[i] = swz2[i] <= UTIL_FORMAT_SWIZZLE_W ?
-                 swz1[swz2[i]] : swz2[i];
-    }
-}
-
 unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format,
                                    const unsigned char *swizzle_view,
                                    boolean dxtc_swizzle)
@@ -72,7 +60,7 @@
 
     if (swizzle_view) {
         /* Combine two sets of swizzles. */
-        util_format_combine_swizzles(swizzle, swizzle_format, swizzle_view);
+        util_format_compose_swizzles(swizzle_format, swizzle_view, swizzle);
     } else {
         memcpy(swizzle, swizzle_format, 4);
     }
@@ -926,7 +914,7 @@
     if (!buffer) {
         tex->buf_size = tex->tex.size_in_bytes;
         tex->buf = rws->buffer_create(rws, tex->tex.size_in_bytes, 2048,
-                                         base->bind, base->usage, tex->domain);
+                                      base->bind, tex->domain);
 
         if (!tex->buf) {
             FREE(tex);
diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h
index 4586bb2..158a387 100644
--- a/src/gallium/drivers/r300/r300_texture.h
+++ b/src/gallium/drivers/r300/r300_texture.h
@@ -35,10 +35,6 @@
 struct r300_resource;
 struct r300_screen;
 
-void util_format_combine_swizzles(unsigned char *dst,
-                                  const unsigned char *swz1,
-                                  const unsigned char *swz2);
-
 unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format,
                                    const unsigned char *swizzle_view,
                                    boolean dxtc_swizzle);
diff --git a/src/gallium/drivers/r300/r300_texture_desc.c b/src/gallium/drivers/r300/r300_texture_desc.c
index da5778b..fe4f8dd 100644
--- a/src/gallium/drivers/r300/r300_texture_desc.c
+++ b/src/gallium/drivers/r300/r300_texture_desc.c
@@ -360,9 +360,9 @@
         unsigned i, pipes;
 
         if (screen->caps.family == CHIP_FAMILY_RV530) {
-            pipes = screen->caps.num_z_pipes;
+            pipes = screen->info.r300_num_z_pipes;
         } else {
-            pipes = screen->caps.num_frag_pipes;
+            pipes = screen->info.r300_num_gb_pipes;
         }
 
         for (i = 0; i <= tex->b.b.b.last_level; i++) {
diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
index 0561ab9..07a3f3c 100644
--- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c
+++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
@@ -22,8 +22,7 @@
 
 #include "r300_tgsi_to_rc.h"
 
-#include "radeon_compiler.h"
-#include "radeon_program.h"
+#include "compiler/radeon_compiler.h"
 
 #include "tgsi/tgsi_info.h"
 #include "tgsi/tgsi_parse.h"
diff --git a/src/gallium/drivers/r300/r300_transfer.c b/src/gallium/drivers/r300/r300_transfer.c
index e2ea4cb..6596402 100644
--- a/src/gallium/drivers/r300/r300_transfer.c
+++ b/src/gallium/drivers/r300/r300_transfer.c
@@ -97,7 +97,7 @@
         referenced_hw = TRUE;
     } else {
         referenced_hw =
-            r300->rws->buffer_is_busy(tex->buf);
+            r300->rws->buffer_is_busy(tex->buf, RADEON_USAGE_READWRITE);
     }
 
     blittable = desc->layout == UTIL_FORMAT_LAYOUT_PLAIN ||
diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c
index b319890..a5e8fd6 100644
--- a/src/gallium/drivers/r300/r300_vs.c
+++ b/src/gallium/drivers/r300/r300_vs.c
@@ -32,7 +32,7 @@
 #include "tgsi/tgsi_parse.h"
 #include "tgsi/tgsi_ureg.h"
 
-#include "radeon_compiler.h"
+#include "compiler/radeon_compiler.h"
 
 /* Convert info about VS output semantics into r300_shader_semantics. */
 static void r300_shader_read_vs_outputs(
diff --git a/src/gallium/drivers/r300/r300_vs.h b/src/gallium/drivers/r300/r300_vs.h
index 170de6c..a482ddc 100644
--- a/src/gallium/drivers/r300/r300_vs.h
+++ b/src/gallium/drivers/r300/r300_vs.h
@@ -26,7 +26,7 @@
 
 #include "pipe/p_state.h"
 #include "tgsi/tgsi_scan.h"
-#include "radeon_code.h"
+#include "compiler/radeon_code.h"
 
 #include "r300_context.h"
 #include "r300_shader_semantics.h"
diff --git a/src/gallium/drivers/r600/Android.mk b/src/gallium/drivers/r600/Android.mk
new file mode 100644
index 0000000..994ae07
--- /dev/null
+++ b/src/gallium/drivers/r600/Android.mk
@@ -0,0 +1,42 @@
+# Mesa 3-D graphics library
+#
+# Copyright (C) 2010-2011 Chia-I Wu <olvaffe@gmail.com>
+# Copyright (C) 2010-2011 LunarG Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+LOCAL_PATH := $(call my-dir)
+
+# get C_SOURCES
+include $(LOCAL_PATH)/Makefile.sources
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(C_SOURCES)
+
+LOCAL_CFLAGS := -std=c99
+
+LOCAL_C_INCLUDES := \
+	$(DRM_TOP) \
+	$(DRM_TOP)/include/drm
+
+LOCAL_MODULE := libmesa_pipe_r600
+
+include $(GALLIUM_COMMON_MK)
+include $(BUILD_STATIC_LIBRARY)
diff --git a/src/gallium/drivers/r600/Makefile b/src/gallium/drivers/r600/Makefile
index 7e21e3e..0e68fe9 100644
--- a/src/gallium/drivers/r600/Makefile
+++ b/src/gallium/drivers/r600/Makefile
@@ -6,20 +6,7 @@
 LIBRARY_INCLUDES = \
 	$(shell pkg-config libdrm --cflags-only-I)
 
-C_SOURCES = \
-	r600_asm.c \
-	r600_blit.c \
-	r600_buffer.c \
-	r600_pipe.c \
-	r600_query.c \
-	r600_resource.c \
-	r600_shader.c \
-	r600_state.c \
-	r600_texture.c \
-	r700_asm.c \
-	evergreen_state.c \
-	eg_asm.c \
-	r600_translate.c \
-	r600_state_common.c
+# get C_SOURCES
+include Makefile.sources
 
 include ../../Makefile.template
diff --git a/src/gallium/drivers/r600/Makefile.sources b/src/gallium/drivers/r600/Makefile.sources
new file mode 100644
index 0000000..0366394
--- /dev/null
+++ b/src/gallium/drivers/r600/Makefile.sources
@@ -0,0 +1,15 @@
+C_SOURCES := \
+	r600_asm.c \
+	r600_blit.c \
+	r600_buffer.c \
+	r600_pipe.c \
+	r600_query.c \
+	r600_resource.c \
+	r600_shader.c \
+	r600_state.c \
+	r600_texture.c \
+	r700_asm.c \
+	evergreen_state.c \
+	eg_asm.c \
+	r600_translate.c \
+	r600_state_common.c
diff --git a/src/gallium/drivers/r600/SConscript b/src/gallium/drivers/r600/SConscript
index 19f07b2..be12255 100644
--- a/src/gallium/drivers/r600/SConscript
+++ b/src/gallium/drivers/r600/SConscript
@@ -11,22 +11,8 @@
 
 r600 = env.ConvenienceLibrary(
     target = 'r600',
-    source = [
-        'r600_asm.c',
-        'r600_buffer.c',
-        'r600_blit.c',
-        'r600_pipe.c',
-        'r600_query.c',
-        'r600_resource.c',
-        'r600_shader.c',
-        'r600_state.c',
-        'r600_state_common.c',
-        'r600_texture.c',
-        'r600_translate.c',
-        'r700_asm.c',
-        'evergreen_state.c',
-        'eg_asm.c',
-    ])
+    source = env.ParseSourceList('Makefile.sources', 'C_SOURCES')
+    )
 
 env.Alias('r600', r600)
 
diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c
index c95872b..ca25b34 100644
--- a/src/gallium/drivers/r600/eg_asm.c
+++ b/src/gallium/drivers/r600/eg_asm.c
@@ -29,7 +29,7 @@
 #include "r600_opcodes.h"
 #include "evergreend.h"
 
-int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
+int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf)
 {
 	unsigned id = cf->id;
 
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 4605c83..2135b8a 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -262,22 +262,16 @@
 	case PIPE_FORMAT_Z16_UNORM:
 		return V_028040_Z_16;
 	case PIPE_FORMAT_Z24X8_UNORM:
-		return V_028040_Z_24;
 	case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
 		return V_028040_Z_24;
+	case PIPE_FORMAT_Z32_FLOAT:
+	case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED:
+		return V_028040_Z_32_FLOAT;
 	default:
 		return ~0U;
 	}
 }
 
-static uint32_t r600_translate_stencilformat(enum pipe_format format)
-{
-	if (format == PIPE_FORMAT_Z24_UNORM_S8_USCALED)
-		return 1;
-	else
-		return 0;
-}
-
 static uint32_t r600_translate_colorswap(enum pipe_format format)
 {
 	switch (format) {
@@ -360,6 +354,7 @@
 
 	case PIPE_FORMAT_R11G11B10_FLOAT:
 	case PIPE_FORMAT_R32_FLOAT:
+	case PIPE_FORMAT_Z32_FLOAT:
 	case PIPE_FORMAT_R16G16_FLOAT:
 	case PIPE_FORMAT_R16G16_UNORM:
 		return V_028C70_SWAP_STD;
@@ -369,6 +364,7 @@
 	case PIPE_FORMAT_R16G16B16A16_UNORM:
 	case PIPE_FORMAT_R16G16B16A16_SNORM:
 	case PIPE_FORMAT_R16G16B16A16_FLOAT:
+	case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED:
 
 	/* 128-bit buffers. */
 	case PIPE_FORMAT_R32G32B32A32_FLOAT:
@@ -453,7 +449,11 @@
 	case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
 		return V_028C70_COLOR_24_8;
 
+	case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED:
+		return V_028C70_COLOR_X24_8_32_FLOAT;
+
 	case PIPE_FORMAT_R32_FLOAT:
+	case PIPE_FORMAT_Z32_FLOAT:
 		return V_028C70_COLOR_32_FLOAT;
 
 	case PIPE_FORMAT_R16G16_FLOAT:
@@ -541,6 +541,7 @@
 
 		case V_028C70_COLOR_32_32_FLOAT:
 		case V_028C70_COLOR_32_32:
+		case V_028C70_COLOR_X24_8_32_FLOAT:
 			return ENDIAN_8IN32;
 
 		/* 96-bit buffers. */
@@ -638,10 +639,10 @@
 		return;
 
 	rstate->id = R600_PIPE_STATE_BLEND_COLOR;
-	r600_pipe_state_add_reg(rstate, R_028414_CB_BLEND_RED, fui(state->color[0]), 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028418_CB_BLEND_GREEN, fui(state->color[1]), 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_02841C_CB_BLEND_BLUE, fui(state->color[2]), 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028420_CB_BLEND_ALPHA, fui(state->color[3]), 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_028414_CB_BLEND_RED, fui(state->color[0]), 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028418_CB_BLEND_GREEN, fui(state->color[1]), 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_02841C_CB_BLEND_BLUE, fui(state->color[2]), 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028420_CB_BLEND_ALPHA, fui(state->color[3]), 0xFFFFFFFF, NULL, 0);
 
 	free(rctx->states[R600_PIPE_STATE_BLEND_COLOR]);
 	rctx->states[R600_PIPE_STATE_BLEND_COLOR] = rstate;
@@ -686,13 +687,13 @@
 	blend->cb_target_mask = target_mask;
 	
 	r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL,
-				color_control, 0xFFFFFFFD, NULL);
+				color_control, 0xFFFFFFFD, NULL, 0);
 
 	if (rctx->chip_class != CAYMAN)
-		r600_pipe_state_add_reg(rstate, R_028C3C_PA_SC_AA_MASK, 0xFFFFFFFF, 0xFFFFFFFF, NULL);
+		r600_pipe_state_add_reg(rstate, R_028C3C_PA_SC_AA_MASK, 0xFFFFFFFF, 0xFFFFFFFF, NULL, 0);
 	else {
-		r600_pipe_state_add_reg(rstate, CM_R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 0xFFFFFFFF, 0xFFFFFFFF, NULL);
-		r600_pipe_state_add_reg(rstate, CM_R_028C3C_PA_SC_AA_MASK_X0Y1_X1Y1, 0xFFFFFFFF, 0xFFFFFFFF, NULL);
+		r600_pipe_state_add_reg(rstate, CM_R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 0xFFFFFFFF, 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg(rstate, CM_R_028C3C_PA_SC_AA_MASK_X0Y1_X1Y1, 0xFFFFFFFF, 0xFFFFFFFF, NULL, 0);
 	}
 
 	for (int i = 0; i < 8; i++) {
@@ -723,7 +724,7 @@
 		}
 	}
 	for (int i = 0; i < 8; i++) {
-		r600_pipe_state_add_reg(rstate, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl[i], 0xFFFFFFFF, NULL);
+		r600_pipe_state_add_reg(rstate, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl[i], 0xFFFFFFFF, NULL, 0);
 	}
 
 	return rstate;
@@ -791,27 +792,27 @@
 		S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) |
 		S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE);
 	/* TODO db_render_override depends on query */
-	r600_pipe_state_add_reg(rstate, R_028028_DB_STENCIL_CLEAR, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_02802C_DB_DEPTH_CLEAR, 0x3F800000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028410_SX_ALPHA_TEST_CONTROL, alpha_test_control, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_028028_DB_STENCIL_CLEAR, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_02802C_DB_DEPTH_CLEAR, 0x3F800000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028410_SX_ALPHA_TEST_CONTROL, alpha_test_control, 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028430_DB_STENCILREFMASK, stencil_ref_mask,
-				0xFFFFFFFF & C_028430_STENCILREF, NULL);
+				0xFFFFFFFF & C_028430_STENCILREF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028434_DB_STENCILREFMASK_BF, stencil_ref_mask_bf,
-				0xFFFFFFFF & C_028434_STENCILREF_BF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0286DC_SPI_FOG_CNTL, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028800_DB_DEPTH_CONTROL, db_depth_control, 0xFFFFFFFF, NULL);
+				0xFFFFFFFF & C_028434_STENCILREF_BF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0286DC_SPI_FOG_CNTL, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028800_DB_DEPTH_CONTROL, db_depth_control, 0xFFFFFFFF, NULL, 0);
 	/* The DB_SHADER_CONTROL mask is 0xFFFFFFBC since Z_EXPORT_ENABLE,
 	 * STENCIL_EXPORT_ENABLE and KILL_ENABLE are controlled by
 	 * evergreen_pipe_shader_ps().*/
-	r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, db_shader_control, 0xFFFFFFBC, NULL);
-	r600_pipe_state_add_reg(rstate, R_028000_DB_RENDER_CONTROL, db_render_control, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_02800C_DB_RENDER_OVERRIDE, db_render_override, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028AC8_DB_PRELOAD_CONTROL, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028B70_DB_ALPHA_TO_MASK, 0x0000AA00, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, db_shader_control, 0xFFFFFFBC, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028000_DB_RENDER_CONTROL, db_render_control, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_02800C_DB_RENDER_OVERRIDE, db_render_override, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028AC8_DB_PRELOAD_CONTROL, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028B70_DB_ALPHA_TO_MASK, 0x0000AA00, 0xFFFFFFFF, NULL, 0);
 
 	return rstate;
 }
@@ -856,7 +857,7 @@
 			tmp |= S_0286D4_PNT_SPRITE_TOP_1(1);
 		}
 	}
-	r600_pipe_state_add_reg(rstate, R_0286D4_SPI_INTERP_CONTROL_0, tmp, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_0286D4_SPI_INTERP_CONTROL_0, tmp, 0xFFFFFFFF, NULL, 0);
 
 	polygon_dual_mode = (state->fill_front != PIPE_POLYGON_MODE_FILL ||
 				state->fill_back != PIPE_POLYGON_MODE_FILL);
@@ -870,44 +871,44 @@
 		S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_tri) |
 		S_028814_POLY_MODE(polygon_dual_mode) |
 		S_028814_POLYMODE_FRONT_PTYPE(r600_translate_fill(state->fill_front)) |
-		S_028814_POLYMODE_BACK_PTYPE(r600_translate_fill(state->fill_back)), 0xFFFFFFFF, NULL);
+		S_028814_POLYMODE_BACK_PTYPE(r600_translate_fill(state->fill_back)), 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate, R_02881C_PA_CL_VS_OUT_CNTL,
 			S_02881C_USE_VTX_POINT_SIZE(state->point_size_per_vertex) |
-			S_02881C_VS_OUT_MISC_VEC_ENA(state->point_size_per_vertex), 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028820_PA_CL_NANINF_CNTL, 0x00000000, 0xFFFFFFFF, NULL);
+			S_02881C_VS_OUT_MISC_VEC_ENA(state->point_size_per_vertex), 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028820_PA_CL_NANINF_CNTL, 0x00000000, 0xFFFFFFFF, NULL, 0);
 	/* point size 12.4 fixed point */
 	tmp = (unsigned)(state->point_size * 8.0);
-	r600_pipe_state_add_reg(rstate, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp), 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A04_PA_SU_POINT_MINMAX, 0x80000000, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp), 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A04_PA_SU_POINT_MINMAX, 0x80000000, 0xFFFFFFFF, NULL, 0);
 
 	tmp = (unsigned)state->line_width * 8;
-	r600_pipe_state_add_reg(rstate, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp), 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp), 0xFFFFFFFF, NULL, 0);
 
 	if (rctx->chip_class == CAYMAN) {
-		r600_pipe_state_add_reg(rstate, CM_R_028BDC_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL);
+		r600_pipe_state_add_reg(rstate, CM_R_028BDC_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL, 0);
 		r600_pipe_state_add_reg(rstate, CM_R_028BE4_PA_SU_VTX_CNTL,
 					S_028C08_PIX_CENTER_HALF(state->gl_rasterization_rules),
-					0xFFFFFFFF, NULL);
-		r600_pipe_state_add_reg(rstate, CM_R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
-		r600_pipe_state_add_reg(rstate, CM_R_028BEC_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
-		r600_pipe_state_add_reg(rstate, CM_R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
-		r600_pipe_state_add_reg(rstate, CM_R_028BF4_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
+					0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg(rstate, CM_R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg(rstate, CM_R_028BEC_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg(rstate, CM_R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg(rstate, CM_R_028BF4_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL, 0);
 
 
 	} else {
-		r600_pipe_state_add_reg(rstate, R_028C00_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL);
+		r600_pipe_state_add_reg(rstate, R_028C00_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL, 0);
 
-		r600_pipe_state_add_reg(rstate, R_028C0C_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
-		r600_pipe_state_add_reg(rstate, R_028C10_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
-		r600_pipe_state_add_reg(rstate, R_028C14_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
-		r600_pipe_state_add_reg(rstate, R_028C18_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
+		r600_pipe_state_add_reg(rstate, R_028C0C_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg(rstate, R_028C10_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg(rstate, R_028C14_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg(rstate, R_028C18_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL, 0);
 
 		r600_pipe_state_add_reg(rstate, R_028C08_PA_SU_VTX_CNTL,
 					S_028C08_PIX_CENTER_HALF(state->gl_rasterization_rules),
-					0xFFFFFFFF, NULL);
+					0xFFFFFFFF, NULL, 0);
 	}
-	r600_pipe_state_add_reg(rstate, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_02820C_PA_SC_CLIPRECT_RULE, clip_rule, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_02820C_PA_SC_CLIPRECT_RULE, clip_rule, 0xFFFFFFFF, NULL, 0);
 	return rstate;
 }
 
@@ -933,22 +934,22 @@
 			S_03C000_MIP_FILTER(r600_tex_mipfilter(state->min_mip_filter)) |
 			S_03C000_MAX_ANISO(r600_tex_aniso_filter(state->max_anisotropy)) |
 			S_03C000_DEPTH_COMPARE_FUNCTION(r600_tex_compare(state->compare_func)) |
-			S_03C000_BORDER_COLOR_TYPE(uc.ui ? V_03C000_SQ_TEX_BORDER_COLOR_REGISTER : 0), 0xFFFFFFFF, NULL);
+			S_03C000_BORDER_COLOR_TYPE(uc.ui ? V_03C000_SQ_TEX_BORDER_COLOR_REGISTER : 0), 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg_noblock(rstate, R_03C004_SQ_TEX_SAMPLER_WORD1_0,
 			S_03C004_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) |
 			S_03C004_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8)),
-			0xFFFFFFFF, NULL);
+			0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg_noblock(rstate, R_03C008_SQ_TEX_SAMPLER_WORD2_0,
 					S_03C008_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) |
 					(state->seamless_cube_map ? 0 : S_03C008_DISABLE_CUBE_WRAP(1)) |
 					S_03C008_TYPE(1),
-					0xFFFFFFFF, NULL);
+					0xFFFFFFFF, NULL, 0);
 
 	if (uc.ui) {
-		r600_pipe_state_add_reg_noblock(rstate, R_00A404_TD_PS_SAMPLER0_BORDER_RED, fui(state->border_color[0]), 0xFFFFFFFF, NULL);
-		r600_pipe_state_add_reg_noblock(rstate, R_00A408_TD_PS_SAMPLER0_BORDER_GREEN, fui(state->border_color[1]), 0xFFFFFFFF, NULL);
-		r600_pipe_state_add_reg_noblock(rstate, R_00A40C_TD_PS_SAMPLER0_BORDER_BLUE, fui(state->border_color[2]), 0xFFFFFFFF, NULL);
-		r600_pipe_state_add_reg_noblock(rstate, R_00A410_TD_PS_SAMPLER0_BORDER_ALPHA, fui(state->border_color[3]), 0xFFFFFFFF, NULL);
+		r600_pipe_state_add_reg_noblock(rstate, R_00A404_TD_PS_SAMPLER0_BORDER_RED, fui(state->border_color[0]), 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg_noblock(rstate, R_00A408_TD_PS_SAMPLER0_BORDER_GREEN, fui(state->border_color[1]), 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg_noblock(rstate, R_00A40C_TD_PS_SAMPLER0_BORDER_BLUE, fui(state->border_color[2]), 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg_noblock(rstate, R_00A410_TD_PS_SAMPLER0_BORDER_ALPHA, fui(state->border_color[3]), 0xFFFFFFFF, NULL, 0);
 	}
 	return rstate;
 }
@@ -957,43 +958,39 @@
 							struct pipe_resource *texture,
 							const struct pipe_sampler_view *state)
 {
-	struct r600_pipe_sampler_view *resource = CALLOC_STRUCT(r600_pipe_sampler_view);
+	struct r600_pipe_sampler_view *view = CALLOC_STRUCT(r600_pipe_sampler_view);
 	struct r600_pipe_resource_state *rstate;
-	const struct util_format_description *desc;
-	struct r600_resource_texture *tmp;
+	struct r600_resource_texture *tmp = (struct r600_resource_texture*)texture;
 	struct r600_resource *rbuffer;
 	unsigned format, endian;
 	uint32_t word4 = 0, yuv_format = 0, pitch = 0;
 	unsigned char swizzle[4], array_mode = 0, tile_type = 0;
 	struct r600_bo *bo[2];
 
-	if (resource == NULL)
+	if (view == NULL)
 		return NULL;
-	rstate = &resource->state;
+	rstate = &view->state;
 
 	/* initialize base object */
-	resource->base = *state;
-	resource->base.texture = NULL;
+	view->base = *state;
+	view->base.texture = NULL;
 	pipe_reference(NULL, &texture->reference);
-	resource->base.texture = texture;
-	resource->base.reference.count = 1;
-	resource->base.context = ctx;
+	view->base.texture = texture;
+	view->base.reference.count = 1;
+	view->base.context = ctx;
 
 	swizzle[0] = state->swizzle_r;
 	swizzle[1] = state->swizzle_g;
 	swizzle[2] = state->swizzle_b;
 	swizzle[3] = state->swizzle_a;
+
 	format = r600_translate_texformat(ctx->screen, state->format,
 					  swizzle,
 					  &word4, &yuv_format);
 	if (format == ~0) {
 		format = 0;
 	}
-	desc = util_format_description(state->format);
-	if (desc == NULL) {
-		R600_ERR("unknow format %d\n", state->format);
-	}
-	tmp = (struct r600_resource_texture *)texture;
+
 	if (tmp->depth && !tmp->is_flushing_texture) {
 		r600_texture_depth_flush(ctx, texture, TRUE);
 		tmp = tmp->flushed_depth_texture;
@@ -1016,6 +1013,8 @@
 
 	rstate->bo[0] = bo[0];
 	rstate->bo[1] = bo[1];
+	rstate->bo_usage[0] = RADEON_USAGE_READ;
+	rstate->bo_usage[1] = RADEON_USAGE_READ;
 	rstate->val[0] = (S_030000_DIM(r600_tex_dim(texture->target)) |
 			  S_030000_PITCH((pitch / 8) - 1) |
 			  S_030000_NON_DISP_TILING_ORDER(tile_type) |
@@ -1023,8 +1022,8 @@
 	rstate->val[1] = (S_030004_TEX_HEIGHT(texture->height0 - 1) |
 			  S_030004_TEX_DEPTH(texture->depth0 - 1) |
 			  S_030004_ARRAY_MODE(array_mode));
-	rstate->val[2] = (tmp->offset[0] + r600_bo_offset(bo[0])) >> 8;
-	rstate->val[3] = (tmp->offset[1] + r600_bo_offset(bo[1])) >> 8;
+	rstate->val[2] = tmp->offset[0] >> 8;
+	rstate->val[3] = tmp->offset[1] >> 8;
 	rstate->val[4] = (word4 |
 			  S_030010_SRF_MODE_ALL(V_030010_SRF_MODE_ZERO_CLAMP_MINUS_ONE) |
 			  S_030010_ENDIAN_SWAP(endian) |
@@ -1036,7 +1035,7 @@
 	rstate->val[7] = (S_03001C_DATA_FORMAT(format) |
 			  S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_TEXTURE));
 
-	return &resource->base;
+	return &view->base;
 }
 
 static void evergreen_set_vs_sampler_view(struct pipe_context *ctx, unsigned count,
@@ -1131,21 +1130,21 @@
 	for (int i = 0; i < state->nr; i++) {
 		r600_pipe_state_add_reg(rstate,
 					R_0285BC_PA_CL_UCP0_X + i * 16,
-					fui(state->ucp[i][0]), 0xFFFFFFFF, NULL);
+					fui(state->ucp[i][0]), 0xFFFFFFFF, NULL, 0);
 		r600_pipe_state_add_reg(rstate,
 					R_0285C0_PA_CL_UCP0_Y + i * 16,
-					fui(state->ucp[i][1]) , 0xFFFFFFFF, NULL);
+					fui(state->ucp[i][1]) , 0xFFFFFFFF, NULL, 0);
 		r600_pipe_state_add_reg(rstate,
 					R_0285C4_PA_CL_UCP0_Z + i * 16,
-					fui(state->ucp[i][2]), 0xFFFFFFFF, NULL);
+					fui(state->ucp[i][2]), 0xFFFFFFFF, NULL, 0);
 		r600_pipe_state_add_reg(rstate,
 					R_0285C8_PA_CL_UCP0_W + i * 16,
-					fui(state->ucp[i][3]), 0xFFFFFFFF, NULL);
+					fui(state->ucp[i][3]), 0xFFFFFFFF, NULL, 0);
 	}
 	r600_pipe_state_add_reg(rstate, R_028810_PA_CL_CLIP_CNTL,
 			S_028810_PS_UCP_MODE(3) | ((1 << state->nr) - 1) |
 			S_028810_ZCLIP_NEAR_DISABLE(state->depth_clamp) |
-			S_028810_ZCLIP_FAR_DISABLE(state->depth_clamp), 0xFFFFFFFF, NULL);
+			S_028810_ZCLIP_FAR_DISABLE(state->depth_clamp), 0xFFFFFFFF, NULL, 0);
 
 	free(rctx->states[R600_PIPE_STATE_CLIP]);
 	rctx->states[R600_PIPE_STATE_CLIP] = rstate;
@@ -1176,28 +1175,28 @@
 	br = S_028244_BR_X(state->maxx) | S_028244_BR_Y(state->maxy);
 	r600_pipe_state_add_reg(rstate,
 				R_028210_PA_SC_CLIPRECT_0_TL, tl,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028214_PA_SC_CLIPRECT_0_BR, br,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028218_PA_SC_CLIPRECT_1_TL, tl,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_02821C_PA_SC_CLIPRECT_1_BR, br,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028220_PA_SC_CLIPRECT_2_TL, tl,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028224_PA_SC_CLIPRECT_2_BR, br,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028228_PA_SC_CLIPRECT_3_TL, tl,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_02822C_PA_SC_CLIPRECT_3_BR, br,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 
 	free(rctx->states[R600_PIPE_STATE_SCISSOR]);
 	rctx->states[R600_PIPE_STATE_SCISSOR] = rstate;
@@ -1219,11 +1218,11 @@
 	tmp = S_028430_STENCILREF(state->ref_value[0]);
 	r600_pipe_state_add_reg(rstate,
 				R_028430_DB_STENCILREFMASK, tmp,
-				~C_028430_STENCILREF, NULL);
+				~C_028430_STENCILREF, NULL, 0);
 	tmp = S_028434_STENCILREF_BF(state->ref_value[1]);
 	r600_pipe_state_add_reg(rstate,
 				R_028434_DB_STENCILREFMASK_BF, tmp,
-				~C_028434_STENCILREF_BF, NULL);
+				~C_028434_STENCILREF_BF, NULL, 0);
 
 	free(rctx->states[R600_PIPE_STATE_STENCIL_REF]);
 	rctx->states[R600_PIPE_STATE_STENCIL_REF] = rstate;
@@ -1241,15 +1240,15 @@
 
 	rctx->viewport = *state;
 	rstate->id = R600_PIPE_STATE_VIEWPORT;
-	r600_pipe_state_add_reg(rstate, R_0282D0_PA_SC_VPORT_ZMIN_0, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0282D4_PA_SC_VPORT_ZMAX_0, 0x3F800000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_02843C_PA_CL_VPORT_XSCALE_0, fui(state->scale[0]), 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028444_PA_CL_VPORT_YSCALE_0, fui(state->scale[1]), 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_02844C_PA_CL_VPORT_ZSCALE_0, fui(state->scale[2]), 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028440_PA_CL_VPORT_XOFFSET_0, fui(state->translate[0]), 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028448_PA_CL_VPORT_YOFFSET_0, fui(state->translate[1]), 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028450_PA_CL_VPORT_ZOFFSET_0, fui(state->translate[2]), 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028818_PA_CL_VTE_CNTL, 0x0000043F, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_0282D0_PA_SC_VPORT_ZMIN_0, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0282D4_PA_SC_VPORT_ZMAX_0, 0x3F800000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_02843C_PA_CL_VPORT_XSCALE_0, fui(state->scale[0]), 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028444_PA_CL_VPORT_YSCALE_0, fui(state->scale[1]), 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_02844C_PA_CL_VPORT_ZSCALE_0, fui(state->scale[2]), 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028440_PA_CL_VPORT_XOFFSET_0, fui(state->translate[0]), 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028448_PA_CL_VPORT_YOFFSET_0, fui(state->translate[1]), 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028450_PA_CL_VPORT_ZOFFSET_0, fui(state->translate[2]), 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028818_PA_CL_VTE_CNTL, 0x0000043F, 0xFFFFFFFF, NULL, 0);
 
 	free(rctx->states[R600_PIPE_STATE_VIEWPORT]);
 	rctx->states[R600_PIPE_STATE_VIEWPORT] = rstate;
@@ -1354,86 +1353,82 @@
 	/* FIXME handle enabling of CB beyond BASE8 which has different offset */
 	r600_pipe_state_add_reg(rstate,
 				R_028C60_CB_COLOR0_BASE + cb * 0x3C,
-				(offset +  r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]);
+				offset >> 8, 0xFFFFFFFF, bo[0], RADEON_USAGE_READWRITE);
 	r600_pipe_state_add_reg(rstate,
 				R_028C78_CB_COLOR0_DIM + cb * 0x3C,
-				0x0, 0xFFFFFFFF, NULL);
+				0x0, 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028C70_CB_COLOR0_INFO + cb * 0x3C,
-				color_info, 0xFFFFFFFF, bo[0]);
+				color_info, 0xFFFFFFFF, bo[0], RADEON_USAGE_READWRITE);
 	r600_pipe_state_add_reg(rstate,
 				R_028C64_CB_COLOR0_PITCH + cb * 0x3C,
 				S_028C64_PITCH_TILE_MAX(pitch),
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028C68_CB_COLOR0_SLICE + cb * 0x3C,
 				S_028C68_SLICE_TILE_MAX(slice),
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028C6C_CB_COLOR0_VIEW + cb * 0x3C,
-				0x00000000, 0xFFFFFFFF, NULL);
+				0x00000000, 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028C74_CB_COLOR0_ATTRIB + cb * 0x3C,
 				S_028C74_NON_DISP_TILING_ORDER(tile_type),
-				0xFFFFFFFF, bo[0]);
+				0xFFFFFFFF, bo[0], RADEON_USAGE_READWRITE);
 }
 
 static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rstate,
-			const struct pipe_framebuffer_state *state)
+			 const struct pipe_framebuffer_state *state)
 {
 	struct r600_resource_texture *rtex;
-	struct r600_resource *rbuffer;
 	struct r600_surface *surf;
-	unsigned level;
-	unsigned pitch, slice, format, stencil_format;
+	unsigned level, first_layer;
+	unsigned pitch, slice, format;
 	unsigned offset;
 
 	if (state->zsbuf == NULL)
 		return;
 
-	level = state->zsbuf->u.tex.level;
-
 	surf = (struct r600_surface *)state->zsbuf;
-	rtex = (struct r600_resource_texture*)state->zsbuf->texture;
+	rtex = (struct r600_resource_texture*)surf->base.texture;
 
-	rbuffer = &rtex->resource;
-
-	/* XXX quite sure for dx10+ hw don't need any offset hacks */
-	offset = r600_texture_get_offset((struct r600_resource_texture *)state->zsbuf->texture,
-					 level, state->zsbuf->u.tex.first_layer);
+	level = surf->base.u.tex.level;
+	first_layer = surf->base.u.tex.first_layer;
+	offset = r600_texture_get_offset(rtex, level, first_layer);
 	pitch = rtex->pitch_in_blocks[level] / 8 - 1;
 	slice = rtex->pitch_in_blocks[level] * surf->aligned_height / 64 - 1;
-	format = r600_translate_dbformat(state->zsbuf->texture->format);
-	stencil_format = r600_translate_stencilformat(state->zsbuf->texture->format);
+	format = r600_translate_dbformat(rtex->real_format);
 
 	r600_pipe_state_add_reg(rstate, R_028048_DB_Z_READ_BASE,
-				(offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo);
+				offset >> 8, 0xFFFFFFFF, rtex->resource.bo, RADEON_USAGE_READWRITE);
 	r600_pipe_state_add_reg(rstate, R_028050_DB_Z_WRITE_BASE,
-				(offset  + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo);
+				offset >> 8, 0xFFFFFFFF, rtex->resource.bo, RADEON_USAGE_READWRITE);
+	r600_pipe_state_add_reg(rstate, R_028008_DB_DEPTH_VIEW, 0x00000000, 0xFFFFFFFF, NULL, 0);
 
-	if (stencil_format) {
-		uint32_t stencil_offset;
+	if (rtex->stencil) {
+		uint32_t stencil_offset =
+			r600_texture_get_offset(rtex->stencil, level, first_layer);
 
-		stencil_offset = ((surf->aligned_height * rtex->pitch_in_bytes[level]) + 255) & ~255;
 		r600_pipe_state_add_reg(rstate, R_02804C_DB_STENCIL_READ_BASE,
-					(offset + stencil_offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo);
+					stencil_offset >> 8, 0xFFFFFFFF, rtex->stencil->resource.bo, RADEON_USAGE_READWRITE);
 		r600_pipe_state_add_reg(rstate, R_028054_DB_STENCIL_WRITE_BASE,
-					(offset + stencil_offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo);
+					stencil_offset >> 8, 0xFFFFFFFF, rtex->stencil->resource.bo, RADEON_USAGE_READWRITE);
+		r600_pipe_state_add_reg(rstate, R_028044_DB_STENCIL_INFO,
+					1, 0xFFFFFFFF, rtex->stencil->resource.bo, RADEON_USAGE_READWRITE);
+	} else {
+		r600_pipe_state_add_reg(rstate, R_028044_DB_STENCIL_INFO,
+					0, 0xFFFFFFFF, NULL, RADEON_USAGE_READWRITE);
 	}
 
-	r600_pipe_state_add_reg(rstate, R_028008_DB_DEPTH_VIEW, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028044_DB_STENCIL_INFO,
-				S_028044_FORMAT(stencil_format), 0xFFFFFFFF, rbuffer->bo);
-
 	r600_pipe_state_add_reg(rstate, R_028040_DB_Z_INFO,
 				S_028040_ARRAY_MODE(rtex->array_mode[level]) | S_028040_FORMAT(format),
-				0xFFFFFFFF, rbuffer->bo);
+				0xFFFFFFFF, rtex->resource.bo, RADEON_USAGE_READWRITE);
 	r600_pipe_state_add_reg(rstate, R_028058_DB_DEPTH_SIZE,
 				S_028058_PITCH_TILE_MAX(pitch),
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate, R_02805C_DB_DEPTH_SLICE,
 				S_02805C_SLICE_TILE_MAX(slice),
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 }
 
 static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
@@ -1492,49 +1487,49 @@
 
 	r600_pipe_state_add_reg(rstate,
 				R_028240_PA_SC_GENERIC_SCISSOR_TL, tl,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028244_PA_SC_GENERIC_SCISSOR_BR, br,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028250_PA_SC_VPORT_SCISSOR_0_TL, tl,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028254_PA_SC_VPORT_SCISSOR_0_BR, br,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028030_PA_SC_SCREEN_SCISSOR_TL, tl,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028034_PA_SC_SCREEN_SCISSOR_BR, br,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028204_PA_SC_WINDOW_SCISSOR_TL, tl,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028208_PA_SC_WINDOW_SCISSOR_BR, br,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028200_PA_SC_WINDOW_OFFSET, 0x00000000,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028230_PA_SC_EDGERULE, 0xAAAAAAAA,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 
 	r600_pipe_state_add_reg(rstate, R_028238_CB_TARGET_MASK,
-				0x00000000, target_mask, NULL);
+				0x00000000, target_mask, NULL, 0);
 	r600_pipe_state_add_reg(rstate, R_02823C_CB_SHADER_MASK,
-				shader_mask, 0xFFFFFFFF, NULL);
+				shader_mask, 0xFFFFFFFF, NULL, 0);
 
 
 	if (rctx->chip_class == CAYMAN) {
 		r600_pipe_state_add_reg(rstate, CM_R_028BE0_PA_SC_AA_CONFIG,
-					0x00000000, 0xFFFFFFFF, NULL);
+					0x00000000, 0xFFFFFFFF, NULL, 0);
 	} else {
 		r600_pipe_state_add_reg(rstate, R_028C04_PA_SC_AA_CONFIG,
-					0x00000000, 0xFFFFFFFF, NULL);
+					0x00000000, 0xFFFFFFFF, NULL, 0);
 		r600_pipe_state_add_reg(rstate, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX,
-					0x00000000, 0xFFFFFFFF, NULL);
+					0x00000000, 0xFFFFFFFF, NULL, 0);
 	}
 
 	free(rctx->states[R600_PIPE_STATE_FRAMEBUFFER]);
@@ -1609,78 +1604,78 @@
 
 	tmp = 0x00000000;
 	tmp |= S_008C00_EXPORT_SRC_C(1);
-	r600_pipe_state_add_reg(rstate, R_008C00_SQ_CONFIG, tmp, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_008C00_SQ_CONFIG, tmp, 0xFFFFFFFF, NULL, 0);
 
 	/* always set the temp clauses */
-	r600_pipe_state_add_reg(rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, S_008C04_NUM_CLAUSE_TEMP_GPRS(4), 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2, 0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8), 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, S_008C04_NUM_CLAUSE_TEMP_GPRS(4), 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2, 0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8), 0xFFFFFFFF, NULL, 0);
 
-	r600_pipe_state_add_reg(rstate, R_028A48_PA_SC_MODE_CNTL_0, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL_1, 0x0, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_028A48_PA_SC_MODE_CNTL_0, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL_1, 0x0, 0xFFFFFFFF, NULL, 0);
 
-	r600_pipe_state_add_reg(rstate, R_028A10_VGT_OUTPUT_PATH_CNTL, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A14_VGT_HOS_CNTL, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A20_VGT_HOS_REUSE_DEPTH, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A24_VGT_GROUP_PRIM_TYPE, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A28_VGT_GROUP_FIRST_DECR, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A2C_VGT_GROUP_DECR, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A30_VGT_GROUP_VECT_0_CNTL, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A34_VGT_GROUP_VECT_1_CNTL, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A40_VGT_GS_MODE, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028B94_VGT_STRMOUT_CONFIG, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028AB4_VGT_REUSE_OFF, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028AB8_VGT_VTX_CNT_EN, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_008A14_PA_CL_ENHANCE, (3 << 1) | 1, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_028A10_VGT_OUTPUT_PATH_CNTL, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A14_VGT_HOS_CNTL, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A20_VGT_HOS_REUSE_DEPTH, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A24_VGT_GROUP_PRIM_TYPE, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A28_VGT_GROUP_FIRST_DECR, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A2C_VGT_GROUP_DECR, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A30_VGT_GROUP_VECT_0_CNTL, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A34_VGT_GROUP_VECT_1_CNTL, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A40_VGT_GS_MODE, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028B94_VGT_STRMOUT_CONFIG, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028AB4_VGT_REUSE_OFF, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028AB8_VGT_VTX_CNT_EN, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_008A14_PA_CL_ENHANCE, (3 << 1) | 1, 0xFFFFFFFF, NULL, 0);
 
-	r600_pipe_state_add_reg(rstate, R_028380_SQ_VTX_SEMANTIC_0, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028384_SQ_VTX_SEMANTIC_1, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028388_SQ_VTX_SEMANTIC_2, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_02838C_SQ_VTX_SEMANTIC_3, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028390_SQ_VTX_SEMANTIC_4, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028394_SQ_VTX_SEMANTIC_5, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028398_SQ_VTX_SEMANTIC_6, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_02839C_SQ_VTX_SEMANTIC_7, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283A0_SQ_VTX_SEMANTIC_8, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283A4_SQ_VTX_SEMANTIC_9, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283A8_SQ_VTX_SEMANTIC_10, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283AC_SQ_VTX_SEMANTIC_11, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283B0_SQ_VTX_SEMANTIC_12, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283B4_SQ_VTX_SEMANTIC_13, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283B8_SQ_VTX_SEMANTIC_14, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283BC_SQ_VTX_SEMANTIC_15, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283C0_SQ_VTX_SEMANTIC_16, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283C4_SQ_VTX_SEMANTIC_17, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283C8_SQ_VTX_SEMANTIC_18, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283CC_SQ_VTX_SEMANTIC_19, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283D0_SQ_VTX_SEMANTIC_20, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283D4_SQ_VTX_SEMANTIC_21, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283D8_SQ_VTX_SEMANTIC_22, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283DC_SQ_VTX_SEMANTIC_23, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283E0_SQ_VTX_SEMANTIC_24, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283E4_SQ_VTX_SEMANTIC_25, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283E8_SQ_VTX_SEMANTIC_26, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283EC_SQ_VTX_SEMANTIC_27, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283F0_SQ_VTX_SEMANTIC_28, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283F4_SQ_VTX_SEMANTIC_29, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283F8_SQ_VTX_SEMANTIC_30, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283FC_SQ_VTX_SEMANTIC_31, 0x0, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_028380_SQ_VTX_SEMANTIC_0, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028384_SQ_VTX_SEMANTIC_1, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028388_SQ_VTX_SEMANTIC_2, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_02838C_SQ_VTX_SEMANTIC_3, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028390_SQ_VTX_SEMANTIC_4, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028394_SQ_VTX_SEMANTIC_5, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028398_SQ_VTX_SEMANTIC_6, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_02839C_SQ_VTX_SEMANTIC_7, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283A0_SQ_VTX_SEMANTIC_8, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283A4_SQ_VTX_SEMANTIC_9, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283A8_SQ_VTX_SEMANTIC_10, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283AC_SQ_VTX_SEMANTIC_11, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283B0_SQ_VTX_SEMANTIC_12, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283B4_SQ_VTX_SEMANTIC_13, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283B8_SQ_VTX_SEMANTIC_14, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283BC_SQ_VTX_SEMANTIC_15, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283C0_SQ_VTX_SEMANTIC_16, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283C4_SQ_VTX_SEMANTIC_17, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283C8_SQ_VTX_SEMANTIC_18, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283CC_SQ_VTX_SEMANTIC_19, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283D0_SQ_VTX_SEMANTIC_20, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283D4_SQ_VTX_SEMANTIC_21, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283D8_SQ_VTX_SEMANTIC_22, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283DC_SQ_VTX_SEMANTIC_23, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283E0_SQ_VTX_SEMANTIC_24, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283E4_SQ_VTX_SEMANTIC_25, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283E8_SQ_VTX_SEMANTIC_26, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283EC_SQ_VTX_SEMANTIC_27, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283F0_SQ_VTX_SEMANTIC_28, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283F4_SQ_VTX_SEMANTIC_29, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283F8_SQ_VTX_SEMANTIC_30, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283FC_SQ_VTX_SEMANTIC_31, 0x0, 0xFFFFFFFF, NULL, 0);
 
-	r600_pipe_state_add_reg(rstate, R_028810_PA_CL_CLIP_CNTL, 0x0, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_028810_PA_CL_CLIP_CNTL, 0x0, 0xFFFFFFFF, NULL, 0);
 
-	r600_pipe_state_add_reg(rstate, CM_R_028BD4_PA_SC_CENTROID_PRIORITY_0, 0x76543210, 0xffffffff, 0);
-	r600_pipe_state_add_reg(rstate, CM_R_028BD8_PA_SC_CENTROID_PRIORITY_1, 0xfedcba98, 0xffffffff, 0);
+	r600_pipe_state_add_reg(rstate, CM_R_028BD4_PA_SC_CENTROID_PRIORITY_0, 0x76543210, 0xffffffff, NULL, 0);
+	r600_pipe_state_add_reg(rstate, CM_R_028BD8_PA_SC_CENTROID_PRIORITY_1, 0xfedcba98, 0xffffffff, NULL, 0);
 
-	r600_pipe_state_add_reg(rstate, CM_R_0288E8_SQ_LDS_ALLOC, 0, 0xffffffff, NULL);
-	r600_pipe_state_add_reg(rstate, R_0288EC_SQ_LDS_ALLOC_PS, 0, 0xffffffff, NULL);
+	r600_pipe_state_add_reg(rstate, CM_R_0288E8_SQ_LDS_ALLOC, 0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0288EC_SQ_LDS_ALLOC_PS, 0, 0xFFFFFFFF, NULL, 0);
 
-	r600_pipe_state_add_reg(rstate, CM_R_028804_DB_EQAA, 0x110000, 0xffffffff, NULL);
+	r600_pipe_state_add_reg(rstate, CM_R_028804_DB_EQAA, 0x110000, 0xFFFFFFFF, NULL, 0);
 	r600_context_pipe_state_set(&rctx->ctx, rstate);
 }
 
@@ -1964,39 +1959,39 @@
 	tmp |= S_008C00_VS_PRIO(vs_prio);
 	tmp |= S_008C00_GS_PRIO(gs_prio);
 	tmp |= S_008C00_ES_PRIO(es_prio);
-	r600_pipe_state_add_reg(rstate, R_008C00_SQ_CONFIG, tmp, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_008C00_SQ_CONFIG, tmp, 0xFFFFFFFF, NULL, 0);
 
 	/* enable dynamic GPR resource management */
 	if (r600_get_minor_version(rctx->radeon) >= 7) {
 		/* always set temp clauses */
 		r600_pipe_state_add_reg(rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1,
-					S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs), 0xFFFFFFFF, NULL);
-		r600_pipe_state_add_reg(rstate, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 0, 0xFFFFFFFF, NULL);
-		r600_pipe_state_add_reg(rstate, R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2, 0, 0xFFFFFFFF, NULL);
-		r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8), 0xFFFFFFFF, NULL);
+					S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs), 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg(rstate, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 0, 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg(rstate, R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2, 0, 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8), 0xFFFFFFFF, NULL, 0);
 		r600_pipe_state_add_reg(rstate, R_028838_SQ_DYN_GPR_RESOURCE_LIMIT_1,
 					S_028838_PS_GPRS(0x1e) |
 					S_028838_VS_GPRS(0x1e) |
 					S_028838_GS_GPRS(0x1e) |
 					S_028838_ES_GPRS(0x1e) |
 					S_028838_HS_GPRS(0x1e) |
-					S_028838_LS_GPRS(0x1e), 0xFFFFFFFF, NULL); /* workaround for hw issues with dyn gpr - must set all limits to 240 instead of 0, 0x1e == 240 / 8*/
+					S_028838_LS_GPRS(0x1e), 0xFFFFFFFF, NULL, 0); /* workaround for hw issues with dyn gpr - must set all limits to 240 instead of 0, 0x1e == 240 / 8*/
 	} else {
 		tmp = 0;
 		tmp |= S_008C04_NUM_PS_GPRS(num_ps_gprs);
 		tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs);
 		tmp |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs);
-		r600_pipe_state_add_reg(rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL);
+		r600_pipe_state_add_reg(rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL, 0);
 
 		tmp = 0;
 		tmp |= S_008C08_NUM_GS_GPRS(num_gs_gprs);
 		tmp |= S_008C08_NUM_ES_GPRS(num_es_gprs);
-		r600_pipe_state_add_reg(rstate, R_008C08_SQ_GPR_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL);
+		r600_pipe_state_add_reg(rstate, R_008C08_SQ_GPR_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL, 0);
 
 		tmp = 0;
 		tmp |= S_008C0C_NUM_HS_GPRS(num_hs_gprs);
 		tmp |= S_008C0C_NUM_HS_GPRS(num_ls_gprs);
-		r600_pipe_state_add_reg(rstate, R_008C0C_SQ_GPR_RESOURCE_MGMT_3, tmp, 0xFFFFFFFF, NULL);
+		r600_pipe_state_add_reg(rstate, R_008C0C_SQ_GPR_RESOURCE_MGMT_3, tmp, 0xFFFFFFFF, NULL, 0);
 	}
 
 	tmp = 0;
@@ -2004,109 +1999,109 @@
 	tmp |= S_008C18_NUM_VS_THREADS(num_vs_threads);
 	tmp |= S_008C18_NUM_GS_THREADS(num_gs_threads);
 	tmp |= S_008C18_NUM_ES_THREADS(num_es_threads);
-	r600_pipe_state_add_reg(rstate, R_008C18_SQ_THREAD_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_008C18_SQ_THREAD_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL, 0);
 
 	tmp = 0;
 	tmp |= S_008C1C_NUM_HS_THREADS(num_hs_threads);
 	tmp |= S_008C1C_NUM_LS_THREADS(num_ls_threads);
-	r600_pipe_state_add_reg(rstate, R_008C1C_SQ_THREAD_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_008C1C_SQ_THREAD_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL, 0);
 
 	tmp = 0;
 	tmp |= S_008C20_NUM_PS_STACK_ENTRIES(num_ps_stack_entries);
 	tmp |= S_008C20_NUM_VS_STACK_ENTRIES(num_vs_stack_entries);
-	r600_pipe_state_add_reg(rstate, R_008C20_SQ_STACK_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_008C20_SQ_STACK_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL, 0);
 
 	tmp = 0;
 	tmp |= S_008C24_NUM_GS_STACK_ENTRIES(num_gs_stack_entries);
 	tmp |= S_008C24_NUM_ES_STACK_ENTRIES(num_es_stack_entries);
-	r600_pipe_state_add_reg(rstate, R_008C24_SQ_STACK_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_008C24_SQ_STACK_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL, 0);
 
 	tmp = 0;
 	tmp |= S_008C28_NUM_HS_STACK_ENTRIES(num_hs_stack_entries);
 	tmp |= S_008C28_NUM_LS_STACK_ENTRIES(num_ls_stack_entries);
-	r600_pipe_state_add_reg(rstate, R_008C28_SQ_STACK_RESOURCE_MGMT_3, tmp, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_008C28_SQ_STACK_RESOURCE_MGMT_3, tmp, 0xFFFFFFFF, NULL, 0);
 
 	tmp = 0;
 	tmp |= S_008E2C_NUM_PS_LDS(0x1000);
 	tmp |= S_008E2C_NUM_LS_LDS(0x1000);
-	r600_pipe_state_add_reg(rstate, R_008E2C_SQ_LDS_RESOURCE_MGMT, tmp, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_008E2C_SQ_LDS_RESOURCE_MGMT, tmp, 0xFFFFFFFF, NULL, 0);
 
-	r600_pipe_state_add_reg(rstate, R_009100_SPI_CONFIG_CNTL, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_00913C_SPI_CONFIG_CNTL_1, S_00913C_VTX_DONE_DELAY(4), 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_009100_SPI_CONFIG_CNTL, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_00913C_SPI_CONFIG_CNTL_1, S_00913C_VTX_DONE_DELAY(4), 0xFFFFFFFF, NULL, 0);
 
 #if 0
-	r600_pipe_state_add_reg(rstate, R_028350_SX_MISC, 0x0, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_028350_SX_MISC, 0x0, 0xFFFFFFFF, NULL, 0);
 
-	r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x0, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x0, 0xFFFFFFFF, NULL, 0);
 #endif
-	r600_pipe_state_add_reg(rstate, R_028A48_PA_SC_MODE_CNTL_0, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL_1, 0x0, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_028A48_PA_SC_MODE_CNTL_0, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL_1, 0x0, 0xFFFFFFFF, NULL, 0);
 
-	r600_pipe_state_add_reg(rstate, R_028900_SQ_ESGS_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028904_SQ_GSVS_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028908_SQ_ESTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_02890C_SQ_GSTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028910_SQ_VSTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028914_SQ_PSTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_028900_SQ_ESGS_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028904_SQ_GSVS_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028908_SQ_ESTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_02890C_SQ_GSTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028910_SQ_VSTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028914_SQ_PSTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL, 0);
 
-	r600_pipe_state_add_reg(rstate, R_02891C_SQ_GS_VERT_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028920_SQ_GS_VERT_ITEMSIZE_1, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028924_SQ_GS_VERT_ITEMSIZE_2, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028928_SQ_GS_VERT_ITEMSIZE_3, 0x0, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_02891C_SQ_GS_VERT_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028920_SQ_GS_VERT_ITEMSIZE_1, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028924_SQ_GS_VERT_ITEMSIZE_2, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028928_SQ_GS_VERT_ITEMSIZE_3, 0x0, 0xFFFFFFFF, NULL, 0);
 
-	r600_pipe_state_add_reg(rstate, R_028A10_VGT_OUTPUT_PATH_CNTL, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A14_VGT_HOS_CNTL, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A20_VGT_HOS_REUSE_DEPTH, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A24_VGT_GROUP_PRIM_TYPE, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A28_VGT_GROUP_FIRST_DECR, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A2C_VGT_GROUP_DECR, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A30_VGT_GROUP_VECT_0_CNTL, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A34_VGT_GROUP_VECT_1_CNTL, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A40_VGT_GS_MODE, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028B94_VGT_STRMOUT_CONFIG, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028AB4_VGT_REUSE_OFF, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028AB8_VGT_VTX_CNT_EN, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_008A14_PA_CL_ENHANCE, (3 << 1) | 1, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_028A10_VGT_OUTPUT_PATH_CNTL, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A14_VGT_HOS_CNTL, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A20_VGT_HOS_REUSE_DEPTH, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A24_VGT_GROUP_PRIM_TYPE, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A28_VGT_GROUP_FIRST_DECR, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A2C_VGT_GROUP_DECR, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A30_VGT_GROUP_VECT_0_CNTL, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A34_VGT_GROUP_VECT_1_CNTL, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A40_VGT_GS_MODE, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028B94_VGT_STRMOUT_CONFIG, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028AB4_VGT_REUSE_OFF, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028AB8_VGT_VTX_CNT_EN, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_008A14_PA_CL_ENHANCE, (3 << 1) | 1, 0xFFFFFFFF, NULL, 0);
 
-	r600_pipe_state_add_reg(rstate, R_028380_SQ_VTX_SEMANTIC_0, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028384_SQ_VTX_SEMANTIC_1, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028388_SQ_VTX_SEMANTIC_2, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_02838C_SQ_VTX_SEMANTIC_3, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028390_SQ_VTX_SEMANTIC_4, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028394_SQ_VTX_SEMANTIC_5, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028398_SQ_VTX_SEMANTIC_6, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_02839C_SQ_VTX_SEMANTIC_7, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283A0_SQ_VTX_SEMANTIC_8, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283A4_SQ_VTX_SEMANTIC_9, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283A8_SQ_VTX_SEMANTIC_10, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283AC_SQ_VTX_SEMANTIC_11, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283B0_SQ_VTX_SEMANTIC_12, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283B4_SQ_VTX_SEMANTIC_13, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283B8_SQ_VTX_SEMANTIC_14, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283BC_SQ_VTX_SEMANTIC_15, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283C0_SQ_VTX_SEMANTIC_16, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283C4_SQ_VTX_SEMANTIC_17, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283C8_SQ_VTX_SEMANTIC_18, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283CC_SQ_VTX_SEMANTIC_19, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283D0_SQ_VTX_SEMANTIC_20, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283D4_SQ_VTX_SEMANTIC_21, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283D8_SQ_VTX_SEMANTIC_22, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283DC_SQ_VTX_SEMANTIC_23, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283E0_SQ_VTX_SEMANTIC_24, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283E4_SQ_VTX_SEMANTIC_25, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283E8_SQ_VTX_SEMANTIC_26, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283EC_SQ_VTX_SEMANTIC_27, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283F0_SQ_VTX_SEMANTIC_28, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283F4_SQ_VTX_SEMANTIC_29, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283F8_SQ_VTX_SEMANTIC_30, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283FC_SQ_VTX_SEMANTIC_31, 0x0, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_028380_SQ_VTX_SEMANTIC_0, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028384_SQ_VTX_SEMANTIC_1, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028388_SQ_VTX_SEMANTIC_2, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_02838C_SQ_VTX_SEMANTIC_3, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028390_SQ_VTX_SEMANTIC_4, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028394_SQ_VTX_SEMANTIC_5, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028398_SQ_VTX_SEMANTIC_6, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_02839C_SQ_VTX_SEMANTIC_7, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283A0_SQ_VTX_SEMANTIC_8, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283A4_SQ_VTX_SEMANTIC_9, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283A8_SQ_VTX_SEMANTIC_10, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283AC_SQ_VTX_SEMANTIC_11, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283B0_SQ_VTX_SEMANTIC_12, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283B4_SQ_VTX_SEMANTIC_13, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283B8_SQ_VTX_SEMANTIC_14, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283BC_SQ_VTX_SEMANTIC_15, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283C0_SQ_VTX_SEMANTIC_16, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283C4_SQ_VTX_SEMANTIC_17, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283C8_SQ_VTX_SEMANTIC_18, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283CC_SQ_VTX_SEMANTIC_19, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283D0_SQ_VTX_SEMANTIC_20, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283D4_SQ_VTX_SEMANTIC_21, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283D8_SQ_VTX_SEMANTIC_22, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283DC_SQ_VTX_SEMANTIC_23, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283E0_SQ_VTX_SEMANTIC_24, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283E4_SQ_VTX_SEMANTIC_25, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283E8_SQ_VTX_SEMANTIC_26, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283EC_SQ_VTX_SEMANTIC_27, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283F0_SQ_VTX_SEMANTIC_28, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283F4_SQ_VTX_SEMANTIC_29, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283F8_SQ_VTX_SEMANTIC_30, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283FC_SQ_VTX_SEMANTIC_31, 0x0, 0xFFFFFFFF, NULL, 0);
 
-	r600_pipe_state_add_reg(rstate, R_028810_PA_CL_CLIP_CNTL, 0x0, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_028810_PA_CL_CLIP_CNTL, 0x0, 0xFFFFFFFF, NULL, 0);
 
 	r600_context_pipe_state_set(&rctx->ctx, rstate);
 }
@@ -2128,6 +2123,7 @@
 			offset_units *= 2.0f;
 			break;
 		case PIPE_FORMAT_Z32_FLOAT:
+		case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED:
 			depth = -23;
 			offset_units *= 1.0f;
 			offset_db_fmt_cntl |= S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
@@ -2143,19 +2139,19 @@
 		offset_db_fmt_cntl |= S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(depth);
 		r600_pipe_state_add_reg(&state,
 				R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE,
-				fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL);
+				fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL, 0);
 		r600_pipe_state_add_reg(&state,
 				R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET,
-				fui(offset_units), 0xFFFFFFFF, NULL);
+				fui(offset_units), 0xFFFFFFFF, NULL, 0);
 		r600_pipe_state_add_reg(&state,
 				R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE,
-				fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL);
+				fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL, 0);
 		r600_pipe_state_add_reg(&state,
 				R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET,
-				fui(offset_units), 0xFFFFFFFF, NULL);
+				fui(offset_units), 0xFFFFFFFF, NULL, 0);
 		r600_pipe_state_add_reg(&state,
 				R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
-				offset_db_fmt_cntl, 0xFFFFFFFF, NULL);
+				offset_db_fmt_cntl, 0xFFFFFFFF, NULL, 0);
 		r600_context_pipe_state_set(&rctx->ctx, &state);
 	}
 }
@@ -2252,32 +2248,32 @@
 				  S_0286E0_LINEAR_CENTROID_ENA(have_centroid);
 
 	r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0,
-				spi_ps_in_control_0, 0xFFFFFFFF, NULL);
+				spi_ps_in_control_0, 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1,
-				spi_ps_in_control_1, 0xFFFFFFFF, NULL);
+				spi_ps_in_control_1, 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate, R_0286E4_SPI_PS_IN_CONTROL_2,
-				0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL);
+				0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_0286E0_SPI_BARYC_CNTL,
 				spi_baryc_cntl,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 
 	r600_pipe_state_add_reg(rstate,
 				R_028840_SQ_PGM_START_PS,
-				(r600_bo_offset(shader->bo)) >> 8, 0xFFFFFFFF, shader->bo);
+				0, 0xFFFFFFFF, shader->bo, RADEON_USAGE_READ);
 	r600_pipe_state_add_reg(rstate,
 				R_028844_SQ_PGM_RESOURCES_PS,
 				S_028844_NUM_GPRS(rshader->bc.ngpr) |
 				S_028844_PRIME_CACHE_ON_DRAW(1) |
 				S_028844_STACK_SIZE(rshader->bc.nstack),
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028848_SQ_PGM_RESOURCES_2_PS,
-				0x0, 0xFFFFFFFF, NULL);
+				0x0, 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_02884C_SQ_PGM_EXPORTS_PS,
-				exports_ps, 0xFFFFFFFF, NULL);
+				exports_ps, 0xFFFFFFFF, NULL, 0);
 	/* FIXME: Evergreen doesn't seem to support MULTIWRITE_ENABLE. */
 	/* only set some bits here, the other bits are set in the dsa state */
 	r600_pipe_state_add_reg(rstate,
@@ -2286,10 +2282,10 @@
 				S_02880C_Z_EXPORT_ENABLE(1) |
 				S_02880C_STENCIL_EXPORT_ENABLE(1) |
 				S_02880C_KILL_ENABLE(1),
-				NULL);
+				NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_03A200_SQ_LOOP_CONST_0, 0x01000FFF,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 }
 
 void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader)
@@ -2298,7 +2294,7 @@
 	struct r600_pipe_state *rstate = &shader->rstate;
 	struct r600_shader *rshader = &shader->shader;
 	unsigned spi_vs_out_id[10];
-	unsigned i, tmp;
+	unsigned i, tmp, nparams;
 
 	/* clear previous register */
 	rstate->nregs = 0;
@@ -2314,28 +2310,36 @@
 	for (i = 0; i < 10; i++) {
 		r600_pipe_state_add_reg(rstate,
 					R_02861C_SPI_VS_OUT_ID_0 + i * 4,
-					spi_vs_out_id[i], 0xFFFFFFFF, NULL);
+					spi_vs_out_id[i], 0xFFFFFFFF, NULL, 0);
 	}
 
+	/* Certain attributes (position, psize, etc.) don't count as params.
+	 * VS is required to export at least one param and r600_shader_from_tgsi()
+	 * takes care of adding a dummy export.
+	 */
+	nparams = rshader->noutput - rshader->npos;
+	if (nparams < 1)
+		nparams = 1;
+
 	r600_pipe_state_add_reg(rstate,
 			R_0286C4_SPI_VS_OUT_CONFIG,
-			S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2),
-			0xFFFFFFFF, NULL);
+			S_0286C4_VS_EXPORT_COUNT(nparams - 1),
+			0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 			R_028860_SQ_PGM_RESOURCES_VS,
 			S_028860_NUM_GPRS(rshader->bc.ngpr) |
 			S_028860_STACK_SIZE(rshader->bc.nstack),
-			0xFFFFFFFF, NULL);
+			0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028864_SQ_PGM_RESOURCES_2_VS,
-				0x0, 0xFFFFFFFF, NULL);
+				0x0, 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 			R_02885C_SQ_PGM_START_VS,
-			(r600_bo_offset(shader->bo)) >> 8, 0xFFFFFFFF, shader->bo);
+			0, 0xFFFFFFFF, shader->bo, RADEON_USAGE_READ);
 
 	r600_pipe_state_add_reg(rstate,
 				R_03A200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 }
 
 void evergreen_fetch_shader(struct pipe_context *ctx,
@@ -2346,10 +2350,10 @@
 	rstate->id = R600_PIPE_STATE_FETCH_SHADER;
 	rstate->nregs = 0;
 	r600_pipe_state_add_reg(rstate, R_0288A8_SQ_PGM_RESOURCES_FS,
-				0x00000000, 0xFFFFFFFF, NULL);
+				0x00000000, 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_START_FS,
-				(r600_bo_offset(ve->fetch_shader)) >> 8,
-				0xFFFFFFFF, ve->fetch_shader);
+				0,
+				0xFFFFFFFF, ve->fetch_shader, RADEON_USAGE_READ);
 }
 
 void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx)
@@ -2363,7 +2367,7 @@
 	r600_pipe_state_add_reg(rstate,
 				R_02880C_DB_SHADER_CONTROL,
 				0x0,
-				S_02880C_DUAL_EXPORT_ENABLE(1), NULL);
+				S_02880C_DUAL_EXPORT_ENABLE(1), NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028000_DB_RENDER_CONTROL,
 				S_028000_DEPTH_COPY_ENABLE(1) |
@@ -2371,7 +2375,7 @@
 				S_028000_COPY_CENTROID(1),
 				S_028000_DEPTH_COPY_ENABLE(1) |
 				S_028000_STENCIL_COPY_ENABLE(1) |
-				S_028000_COPY_CENTROID(1), NULL);
+				S_028000_COPY_CENTROID(1), NULL, 0);
 	return rstate;
 }
 
@@ -2397,9 +2401,11 @@
 
 void evergreen_pipe_mod_buffer_resource(struct r600_pipe_resource_state *rstate,
 					struct r600_resource *rbuffer,
-					unsigned offset, unsigned stride)
+					unsigned offset, unsigned stride,
+					enum radeon_bo_usage usage)
 {
 	rstate->bo[0] = rbuffer->bo;
+	rstate->bo_usage[0] = usage;
 	rstate->val[0] = offset;
 	rstate->val[1] = rbuffer->bo_size - offset - 1;
 	rstate->val[2] = S_030008_ENDIAN_SWAP(r600_endian_swap(32)) |
diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h
index 96dbd4d..9a8c353 100644
--- a/src/gallium/drivers/r600/evergreend.h
+++ b/src/gallium/drivers/r600/evergreend.h
@@ -1697,6 +1697,10 @@
 #define R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL           0x00028A3C
 #define R_028A48_PA_SC_MODE_CNTL_0                   0x00028A48
 #define R_028A4C_PA_SC_MODE_CNTL_1                   0x00028A4C
+#define R_028A94_VGT_MULTI_PRIM_IB_RESET_EN          0x00028A94
+#define   S_028A94_RESET_EN(x)                         (((x) & 0x1) << 0)
+#define   G_028A94_RESET_EN(x)                         (((x) >> 0) & 0x1)
+#define   C_028A94_RESET_EN                            0xFFFFFFFE
 #define R_028AB4_VGT_REUSE_OFF                       0x00028AB4
 #define R_028AB8_VGT_VTX_CNT_EN                      0x00028AB8
 #define R_028ABC_DB_HTILE_SURFACE                    0x00028ABC
diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h
index 61adc7e..f24146e 100644
--- a/src/gallium/drivers/r600/r600.h
+++ b/src/gallium/drivers/r600/r600.h
@@ -26,14 +26,8 @@
 #ifndef R600_H
 #define R600_H
 
-#include <assert.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <util/u_double_list.h>
-#include <util/u_inlines.h>
-#include <pipe/p_compiler.h>
-
-#define RADEON_CTX_MAX_PM4	(64 * 1024 / 4)
+#include "../../winsys/radeon/drm/radeon_winsys.h"
+#include "util/u_double_list.h"
 
 #define R600_ERR(fmt, args...) \
 	fprintf(stderr, "EE %s:%d %s - "fmt, __FILE__, __LINE__, __func__, ##args)
@@ -94,31 +88,32 @@
 unsigned r600_get_clock_crystal_freq(struct radeon *radeon);
 unsigned r600_get_minor_version(struct radeon *radeon);
 unsigned r600_get_num_backends(struct radeon *radeon);
+unsigned r600_get_num_tile_pipes(struct radeon *radeon);
+unsigned r600_get_backend_map(struct radeon *radeon);
 
 /* r600_bo.c */
 struct r600_bo;
+struct radeon_winsys_cs;
+
 struct r600_bo *r600_bo(struct radeon *radeon,
 			unsigned size, unsigned alignment,
 			unsigned binding, unsigned usage);
-struct r600_bo *r600_bo_handle(struct radeon *radeon,
-				unsigned handle, unsigned *array_mode);
-void *r600_bo_map(struct radeon *radeon, struct r600_bo *bo, unsigned usage, void *ctx);
+struct r600_bo *r600_bo_handle(struct radeon *radeon, struct winsys_handle *whandle,
+				unsigned *stride, unsigned *array_mode);
+void *r600_bo_map(struct radeon *radeon, struct r600_bo *bo, struct radeon_winsys_cs *cs, unsigned usage);
 void r600_bo_unmap(struct radeon *radeon, struct r600_bo *bo);
 boolean r600_bo_get_winsys_handle(struct radeon *radeon, struct r600_bo *pb_bo,
-				unsigned stride, struct winsys_handle *whandle);
-static INLINE unsigned r600_bo_offset(struct r600_bo *bo)
-{
-	return 0;
-}
-void r600_bo_destroy(struct radeon *radeon, struct r600_bo *bo);
+				  unsigned stride, struct winsys_handle *whandle);
+
+void r600_bo_destroy(struct r600_bo *bo);
 
 /* this relies on the pipe_reference being the first member of r600_bo */
-static INLINE void r600_bo_reference(struct radeon *radeon, struct r600_bo **dst, struct r600_bo *src)
+static INLINE void r600_bo_reference(struct r600_bo **dst, struct r600_bo *src)
 {
 	struct r600_bo *old = *dst;
 
 	if (pipe_reference((struct pipe_reference *)(*dst), (struct pipe_reference *)src)) {
-		r600_bo_destroy(radeon, old);
+		r600_bo_destroy(old);
 	}
 	*dst = src;
 }
@@ -145,6 +140,7 @@
 	u32				mask;
 	struct r600_block 		*block;
 	struct r600_bo			*bo;
+	enum radeon_bo_usage		bo_usage;
 	u32				id;
 };
 
@@ -157,7 +153,8 @@
 struct r600_pipe_resource_state {
 	unsigned			id;
 	u32                             val[8];
-	struct r600_bo *bo[2];
+	struct r600_bo			*bo[2];
+	enum radeon_bo_usage		bo_usage[2]; /* XXX set these */
 };
 
 #define R600_BLOCK_STATUS_ENABLED	(1 << 0)
@@ -168,6 +165,7 @@
 
 struct r600_block_reloc {
 	struct r600_bo		*bo;
+	enum radeon_bo_usage	bo_usage;
 	unsigned		flush_flags;
 	unsigned		flush_mask;
 	unsigned		bo_pm4_index;
@@ -195,18 +193,6 @@
 };
 
 /*
- * relocation
- */
-#pragma pack(1)
-struct r600_reloc {
-	uint32_t	handle;
-	uint32_t	read_domain;
-	uint32_t	write_domain;
-	uint32_t	flags;
-};
-#pragma pack()
-
-/*
  * query
  */
 struct r600_query {
@@ -243,6 +229,8 @@
 
 struct r600_context {
 	struct radeon		*radeon;
+	struct radeon_winsys_cs	*cs;
+
 	struct r600_range	*range;
 	unsigned		nblocks;
 	struct r600_block	**blocks;
@@ -250,18 +238,19 @@
 	struct list_head	resource_dirty;
 	struct list_head	enable_list;
 	unsigned		pm4_ndwords;
-	unsigned		pm4_cdwords;
 	unsigned		pm4_dirty_cdwords;
 	unsigned		ctx_pm4_ndwords;
 	unsigned		init_dwords;
-	unsigned		nreloc;
+
 	unsigned		creloc;
-	struct r600_reloc	*reloc;
-	struct radeon_bo	**bo;
+	struct r600_bo		**bo;
+
 	u32			*pm4;
+	unsigned		pm4_cdwords;
+
 	struct list_head	query_list;
 	unsigned		num_query_running;
-	struct list_head	fenced_bo;
+	unsigned		backend_mask;
 	unsigned                max_db; /* for OQ */
 	unsigned                num_dest_buffers;
 	unsigned		flags;
@@ -282,6 +271,7 @@
 	struct r600_bo		*indices;
 };
 
+void r600_get_backend_mask(struct r600_context *ctx);
 int r600_context_init(struct r600_context *ctx, struct radeon *radeon);
 void r600_context_fini(struct r600_context *ctx);
 void r600_context_pipe_state_set(struct r600_context *ctx, struct r600_pipe_state *state);
@@ -290,8 +280,7 @@
 void r600_context_pipe_state_set_fs_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid);
 void r600_context_pipe_state_set_ps_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id);
 void r600_context_pipe_state_set_vs_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id);
-void r600_context_flush(struct r600_context *ctx);
-void r600_context_dump_bof(struct r600_context *ctx, const char *file);
+void r600_context_flush(struct r600_context *ctx, unsigned flags);
 void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw);
 
 struct r600_query *r600_context_query_create(struct r600_context *ctx, unsigned query_type);
@@ -319,18 +308,21 @@
 void evergreen_context_pipe_state_set_ps_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id);
 void evergreen_context_pipe_state_set_vs_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id);
 
-struct radeon *radeon_decref(struct radeon *radeon);
+struct radeon *radeon_destroy(struct radeon *radeon);
 
 void _r600_pipe_state_add_reg(struct r600_context *ctx,
 			      struct r600_pipe_state *state,
 			      u32 offset, u32 value, u32 mask,
 			      u32 range_id, u32 block_id,
-			      struct r600_bo *bo);
+			      struct r600_bo *bo,
+			      enum radeon_bo_usage usage);
 
 void r600_pipe_state_add_reg_noblock(struct r600_pipe_state *state,
 				     u32 offset, u32 value, u32 mask,
-				     struct r600_bo *bo);
-#define r600_pipe_state_add_reg(state, offset, value, mask, bo) _r600_pipe_state_add_reg(&rctx->ctx, state, offset, value, mask, CTX_RANGE_ID(offset), CTX_BLOCK_ID(offset), bo)
+				     struct r600_bo *bo,
+				     enum radeon_bo_usage usage);
+
+#define r600_pipe_state_add_reg(state, offset, value, mask, bo, usage) _r600_pipe_state_add_reg(&rctx->ctx, state, offset, value, mask, CTX_RANGE_ID(offset), CTX_BLOCK_ID(offset), bo, usage)
 
 static inline void r600_pipe_state_mod_reg(struct r600_pipe_state *state,
 					   u32 value)
@@ -340,10 +332,12 @@
 }
 
 static inline void r600_pipe_state_mod_reg_bo(struct r600_pipe_state *state,
-					   u32 value, struct r600_bo *bo)
+					      u32 value, struct r600_bo *bo,
+					      enum radeon_bo_usage usage)
 {
 	state->regs[state->nregs].value = value;
 	state->regs[state->nregs].bo = bo;
+	state->regs[state->nregs].bo_usage = usage;
 	state->nregs++;
 }
 
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 5fae2b0..27febdf 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -36,7 +36,7 @@
 #define NUM_OF_CYCLES 3
 #define NUM_OF_COMPONENTS 4
 
-static inline unsigned int r600_bc_get_num_operands(struct r600_bc *bc, struct r600_bc_alu *alu)
+static inline unsigned int r600_bytecode_get_num_operands(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
 {
 	if(alu->is_op3)
 		return 3;
@@ -88,6 +88,7 @@
 		case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT:
 		case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN:
 		case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS:
+		case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE:
 			return 1;
 		default: R600_ERR(
 			"Need instruction operand number for 0x%x.\n", alu->inst);
@@ -140,6 +141,7 @@
 		case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT:
 		case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN:
 		case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS:
+		case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE:
 			return 1;
 		default: R600_ERR(
 			"Need instruction operand number for 0x%x.\n", alu->inst);
@@ -150,11 +152,11 @@
 	return 3;
 }
 
-int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id);
+int r700_bytecode_alu_build(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, unsigned id);
 
-static struct r600_bc_cf *r600_bc_cf(void)
+static struct r600_bytecode_cf *r600_bytecode_cf(void)
 {
-	struct r600_bc_cf *cf = CALLOC_STRUCT(r600_bc_cf);
+	struct r600_bytecode_cf *cf = CALLOC_STRUCT(r600_bytecode_cf);
 
 	if (cf == NULL)
 		return NULL;
@@ -165,9 +167,9 @@
 	return cf;
 }
 
-static struct r600_bc_alu *r600_bc_alu(void)
+static struct r600_bytecode_alu *r600_bytecode_alu(void)
 {
-	struct r600_bc_alu *alu = CALLOC_STRUCT(r600_bc_alu);
+	struct r600_bytecode_alu *alu = CALLOC_STRUCT(r600_bytecode_alu);
 
 	if (alu == NULL)
 		return NULL;
@@ -175,9 +177,9 @@
 	return alu;
 }
 
-static struct r600_bc_vtx *r600_bc_vtx(void)
+static struct r600_bytecode_vtx *r600_bytecode_vtx(void)
 {
-	struct r600_bc_vtx *vtx = CALLOC_STRUCT(r600_bc_vtx);
+	struct r600_bytecode_vtx *vtx = CALLOC_STRUCT(r600_bytecode_vtx);
 
 	if (vtx == NULL)
 		return NULL;
@@ -185,9 +187,9 @@
 	return vtx;
 }
 
-static struct r600_bc_tex *r600_bc_tex(void)
+static struct r600_bytecode_tex *r600_bytecode_tex(void)
 {
-	struct r600_bc_tex *tex = CALLOC_STRUCT(r600_bc_tex);
+	struct r600_bytecode_tex *tex = CALLOC_STRUCT(r600_bytecode_tex);
 
 	if (tex == NULL)
 		return NULL;
@@ -195,15 +197,15 @@
 	return tex;
 }
 
-void r600_bc_init(struct r600_bc *bc, enum chip_class chip_class)
+void r600_bytecode_init(struct r600_bytecode *bc, enum chip_class chip_class)
 {
 	LIST_INITHEAD(&bc->cf);
 	bc->chip_class = chip_class;
 }
 
-static int r600_bc_add_cf(struct r600_bc *bc)
+static int r600_bytecode_add_cf(struct r600_bytecode *bc)
 {
-	struct r600_bc_cf *cf = r600_bc_cf();
+	struct r600_bytecode_cf *cf = r600_bytecode_cf();
 
 	if (cf == NULL)
 		return -ENOMEM;
@@ -217,7 +219,7 @@
 	return 0;
 }
 
-int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output)
+int r600_bytecode_add_output(struct r600_bytecode *bc, const struct r600_bytecode_output *output)
 {
 	int r;
 
@@ -252,16 +254,16 @@
 		}
 	}
 
-	r = r600_bc_add_cf(bc);
+	r = r600_bytecode_add_cf(bc);
 	if (r)
 		return r;
 	bc->cf_last->inst = output->inst;
-	memcpy(&bc->cf_last->output, output, sizeof(struct r600_bc_output));
+	memcpy(&bc->cf_last->output, output, sizeof(struct r600_bytecode_output));
 	return 0;
 }
 
 /* alu instructions that can ony exits once per group */
-static int is_alu_once_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
+static int is_alu_once_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
 {
 	switch (bc->chip_class) {
 	case R600:
@@ -342,7 +344,7 @@
 	}
 }
 
-static int is_alu_reduction_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
+static int is_alu_reduction_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
 {
 	switch (bc->chip_class) {
 	case R600:
@@ -363,7 +365,7 @@
 	}
 }
 
-static int is_alu_cube_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
+static int is_alu_cube_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
 {
 	switch (bc->chip_class) {
 	case R600:
@@ -378,7 +380,7 @@
 	}
 }
 
-static int is_alu_mova_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
+static int is_alu_mova_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
 {
 	switch (bc->chip_class) {
 	case R600:
@@ -396,7 +398,7 @@
 }
 
 /* alu instructions that can only execute on the vector unit */
-static int is_alu_vec_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
+static int is_alu_vec_unit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
 {
 	return is_alu_reduction_inst(bc, alu) ||
 		is_alu_mova_inst(bc, alu) ||
@@ -405,7 +407,7 @@
 }
 
 /* alu instructions that can only execute on the trans unit */
-static int is_alu_trans_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
+static int is_alu_trans_unit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
 {
 	switch (bc->chip_class) {
 	case R600:
@@ -476,23 +478,23 @@
 }
 
 /* alu instructions that can execute on any unit */
-static int is_alu_any_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
+static int is_alu_any_unit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
 {
 	return !is_alu_vec_unit_inst(bc, alu) &&
 		!is_alu_trans_unit_inst(bc, alu);
 }
 
-static int assign_alu_units(struct r600_bc *bc, struct r600_bc_alu *alu_first,
-			    struct r600_bc_alu *assignment[5])
+static int assign_alu_units(struct r600_bytecode *bc, struct r600_bytecode_alu *alu_first,
+			    struct r600_bytecode_alu *assignment[5])
 {
-	struct r600_bc_alu *alu;
+	struct r600_bytecode_alu *alu;
 	unsigned i, chan, trans;
 	int max_slots = bc->chip_class == CAYMAN ? 4 : 5;
 
 	for (i = 0; i < max_slots; i++)
 		assignment[i] = NULL;
 
-	for (alu = alu_first; alu; alu = LIST_ENTRY(struct r600_bc_alu, alu->list.next, list)) {
+	for (alu = alu_first; alu; alu = LIST_ENTRY(struct r600_bytecode_alu, alu->list.next, list)) {
 		chan = alu->dst.chan;
 		if (max_slots == 4)
 			trans = 0;
@@ -571,7 +573,7 @@
 	return 0;
 }
 
-static int reserve_cfile(struct r600_bc *bc, struct alu_bank_swizzle *bs, unsigned sel, unsigned chan)
+static int reserve_cfile(struct r600_bytecode *bc, struct alu_bank_swizzle *bs, unsigned sel, unsigned chan)
 {
 	int res, num_res = 4;
 	if (bc->chip_class >= R700) {
@@ -613,12 +615,12 @@
 		sel <= V_SQ_ALU_SRC_LITERAL);
 }
 
-static int check_vector(struct r600_bc *bc, struct r600_bc_alu *alu,
+static int check_vector(struct r600_bytecode *bc, struct r600_bytecode_alu *alu,
 			struct alu_bank_swizzle *bs, int bank_swizzle)
 {
 	int r, src, num_src, sel, elem, cycle;
 
-	num_src = r600_bc_get_num_operands(bc, alu);
+	num_src = r600_bytecode_get_num_operands(bc, alu);
 	for (src = 0; src < num_src; src++) {
 		sel = alu->src[src].sel;
 		elem = alu->src[src].chan;
@@ -643,12 +645,12 @@
 	return 0;
 }
 
-static int check_scalar(struct r600_bc *bc, struct r600_bc_alu *alu,
+static int check_scalar(struct r600_bytecode *bc, struct r600_bytecode_alu *alu,
 			struct alu_bank_swizzle *bs, int bank_swizzle)
 {
 	int r, src, num_src, const_count, sel, elem, cycle;
 
-	num_src = r600_bc_get_num_operands(bc, alu);
+	num_src = r600_bytecode_get_num_operands(bc, alu);
 	for (const_count = 0, src = 0; src < num_src; ++src) {
 		sel = alu->src[src].sel;
 		elem = alu->src[src].chan;
@@ -689,20 +691,24 @@
 	return 0;
 }
 
-static int check_and_set_bank_swizzle(struct r600_bc *bc,
-				      struct r600_bc_alu *slots[5])
+static int check_and_set_bank_swizzle(struct r600_bytecode *bc,
+				      struct r600_bytecode_alu *slots[5])
 {
 	struct alu_bank_swizzle bs;
 	int bank_swizzle[5];
-	int i, r = 0, forced = 0;
+	int i, r = 0, forced = 1;
 	boolean scalar_only = bc->chip_class == CAYMAN ? false : true;
 	int max_slots = bc->chip_class == CAYMAN ? 4 : 5;
 
 	for (i = 0; i < max_slots; i++) {
-		if (slots[i] && slots[i]->bank_swizzle_force) {
-			slots[i]->bank_swizzle = slots[i]->bank_swizzle_force;
-			forced = 1;
+		if (slots[i]) {
+			if (slots[i]->bank_swizzle_force) {
+				slots[i]->bank_swizzle = slots[i]->bank_swizzle_force;
+			} else {
+				forced = 0;
+			}
 		}
+
 		if (i < 4 && slots[i])
 			scalar_only = false;
 	}
@@ -712,7 +718,11 @@
 	/* Just check every possible combination of bank swizzle.
 	 * Not very efficent, but works on the first try in most of the cases. */
 	for (i = 0; i < 4; i++)
-		bank_swizzle[i] = SQ_ALU_VEC_012;
+		if (!slots[i] || !slots[i]->bank_swizzle_force)
+			bank_swizzle[i] = SQ_ALU_VEC_012;
+		else
+			bank_swizzle[i] = slots[i]->bank_swizzle;
+
 	bank_swizzle[4] = SQ_ALU_SCL_210;
 	while(bank_swizzle[4] <= SQ_ALU_SCL_221) {
 
@@ -749,11 +759,13 @@
 			bank_swizzle[4]++;
 		} else {
 			for (i = 0; i < max_slots; i++) {
-				bank_swizzle[i]++;
-				if (bank_swizzle[i] <= SQ_ALU_VEC_210)
-					break;
-				else
-					bank_swizzle[i] = SQ_ALU_VEC_012;
+				if (!slots[i] || !slots[i]->bank_swizzle_force) {
+					bank_swizzle[i]++;
+					if (bank_swizzle[i] <= SQ_ALU_VEC_210)
+						break;
+					else
+						bank_swizzle[i] = SQ_ALU_VEC_012;
+				}
 			}
 		}
 	}
@@ -762,10 +774,10 @@
 	return -1;
 }
 
-static int replace_gpr_with_pv_ps(struct r600_bc *bc,
-				  struct r600_bc_alu *slots[5], struct r600_bc_alu *alu_prev)
+static int replace_gpr_with_pv_ps(struct r600_bytecode *bc,
+				  struct r600_bytecode_alu *slots[5], struct r600_bytecode_alu *alu_prev)
 {
-	struct r600_bc_alu *prev[5];
+	struct r600_bytecode_alu *prev[5];
 	int gpr[5], chan[5];
 	int i, j, r, src, num_src;
 	int max_slots = bc->chip_class == CAYMAN ? 4 : 5;
@@ -775,7 +787,7 @@
 		return r;
 
 	for (i = 0; i < max_slots; ++i) {
-		if(prev[i] && prev[i]->dst.write && !prev[i]->dst.rel) {
+		if (prev[i] && (prev[i]->dst.write || prev[i]->is_op3) && !prev[i]->dst.rel) {
 			gpr[i] = prev[i]->dst.sel;
 			/* cube writes more than PV.X */
 			if (!is_alu_cube_inst(bc, prev[i]) && is_alu_reduction_inst(bc, prev[i]))
@@ -787,11 +799,11 @@
 	}
 
 	for (i = 0; i < max_slots; ++i) {
-		struct r600_bc_alu *alu = slots[i];
+		struct r600_bytecode_alu *alu = slots[i];
 		if(!alu)
 			continue;
 
-		num_src = r600_bc_get_num_operands(bc, alu);
+		num_src = r600_bytecode_get_num_operands(bc, alu);
 		for (src = 0; src < num_src; ++src) {
 			if (!is_gpr(alu->src[src].sel) || alu->src[src].rel)
 				continue;
@@ -819,7 +831,7 @@
 	return 0;
 }
 
-void r600_bc_special_constants(u32 value, unsigned *sel, unsigned *neg)
+void r600_bytecode_special_constants(u32 value, unsigned *sel, unsigned *neg)
 {
 	switch(value) {
 	case 0:
@@ -852,10 +864,10 @@
 }
 
 /* compute how many literal are needed */
-static int r600_bc_alu_nliterals(struct r600_bc *bc, struct r600_bc_alu *alu,
+static int r600_bytecode_alu_nliterals(struct r600_bytecode *bc, struct r600_bytecode_alu *alu,
 				 uint32_t literal[4], unsigned *nliteral)
 {
-	unsigned num_src = r600_bc_get_num_operands(bc, alu);
+	unsigned num_src = r600_bytecode_get_num_operands(bc, alu);
 	unsigned i, j;
 
 	for (i = 0; i < num_src; ++i) {
@@ -878,11 +890,11 @@
 	return 0;
 }
 
-static void r600_bc_alu_adjust_literals(struct r600_bc *bc,
-					struct r600_bc_alu *alu,
+static void r600_bytecode_alu_adjust_literals(struct r600_bytecode *bc,
+					struct r600_bytecode_alu *alu,
 					uint32_t literal[4], unsigned nliteral)
 {
-	unsigned num_src = r600_bc_get_num_operands(bc, alu);
+	unsigned num_src = r600_bytecode_get_num_operands(bc, alu);
 	unsigned i, j;
 
 	for (i = 0; i < num_src; ++i) {
@@ -898,11 +910,11 @@
 	}
 }
 
-static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5],
-			     struct r600_bc_alu *alu_prev)
+static int merge_inst_groups(struct r600_bytecode *bc, struct r600_bytecode_alu *slots[5],
+			     struct r600_bytecode_alu *alu_prev)
 {
-	struct r600_bc_alu *prev[5];
-	struct r600_bc_alu *result[5] = { NULL };
+	struct r600_bytecode_alu *prev[5];
+	struct r600_bytecode_alu *result[5] = { NULL };
 
 	uint32_t literal[4], prev_literal[4];
 	unsigned nliteral = 0, prev_nliteral = 0;
@@ -917,13 +929,13 @@
 		return r;
 
 	for (i = 0; i < max_slots; ++i) {
-		struct r600_bc_alu *alu;
+		struct r600_bytecode_alu *alu;
 
 		/* check number of literals */
 		if (prev[i]) {
-			if (r600_bc_alu_nliterals(bc, prev[i], literal, &nliteral))
+			if (r600_bytecode_alu_nliterals(bc, prev[i], literal, &nliteral))
 				return 0;
-			if (r600_bc_alu_nliterals(bc, prev[i], prev_literal, &prev_nliteral))
+			if (r600_bytecode_alu_nliterals(bc, prev[i], prev_literal, &prev_nliteral))
 				return 0;
 			if (is_alu_mova_inst(bc, prev[i])) {
 				if (have_rel)
@@ -932,7 +944,7 @@
 			}
 			num_once_inst += is_alu_once_inst(bc, prev[i]);
 		}
-		if (slots[i] && r600_bc_alu_nliterals(bc, slots[i], literal, &nliteral))
+		if (slots[i] && r600_bytecode_alu_nliterals(bc, slots[i], literal, &nliteral))
 			return 0;
 
 		/* Let's check used slots. */
@@ -968,7 +980,7 @@
 		}
 
 		/* Let's check source gprs */
-		num_src = r600_bc_get_num_operands(bc, alu);
+		num_src = r600_bytecode_get_num_operands(bc, alu);
 		for (src = 0; src < num_src; ++src) {
 			if (alu->src[src].rel) {
 				if (have_mova)
@@ -1018,7 +1030,7 @@
 	}
 
 	/* determine new last instruction */
-	LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list)->last = 1;
+	LIST_ENTRY(struct r600_bytecode_alu, bc->cf_last->alu.prev, list)->last = 1;
 
 	/* determine new first instruction */
 	for (i = 0; i < max_slots; ++i) {
@@ -1038,9 +1050,9 @@
  * probably do slightly better by recognizing that we actually have two
  * consecutive lines of 16 constants, but the resulting code would also be
  * somewhat more complicated. */
-static int r600_bc_alloc_kcache_lines(struct r600_bc *bc, struct r600_bc_alu *alu, int type)
+static int r600_bytecode_alloc_kcache_lines(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, int type)
 {
-	struct r600_bc_kcache *kcache = bc->cf_last->kcache;
+	struct r600_bytecode_kcache *kcache = bc->cf_last->kcache;
 	unsigned int required_lines;
 	unsigned int free_lines = 0;
 	unsigned int cache_line[3];
@@ -1093,7 +1105,7 @@
 
 	/* Start a new ALU clause if needed. */
 	if (required_lines > free_lines) {
-		if ((r = r600_bc_add_cf(bc))) {
+		if ((r = r600_bytecode_add_cf(bc))) {
 			return r;
 		}
 		bc->cf_last->inst = (type << 3);
@@ -1148,15 +1160,15 @@
 	return 0;
 }
 
-int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int type)
+int r600_bytecode_add_alu_type(struct r600_bytecode *bc, const struct r600_bytecode_alu *alu, int type)
 {
-	struct r600_bc_alu *nalu = r600_bc_alu();
-	struct r600_bc_alu *lalu;
+	struct r600_bytecode_alu *nalu = r600_bytecode_alu();
+	struct r600_bytecode_alu *lalu;
 	int i, r;
 
 	if (nalu == NULL)
 		return -ENOMEM;
-	memcpy(nalu, alu, sizeof(struct r600_bc_alu));
+	memcpy(nalu, alu, sizeof(struct r600_bytecode_alu));
 
 	if (bc->cf_last != NULL && bc->cf_last->inst != (type << 3)) {
 		/* check if we could add it anyway */
@@ -1174,7 +1186,7 @@
 
 	/* cf can contains only alu or only vtx or only tex */
 	if (bc->cf_last == NULL || bc->force_add_cf) {
-		r = r600_bc_add_cf(bc);
+		r = r600_bytecode_add_cf(bc);
 		if (r) {
 			free(nalu);
 			return r;
@@ -1184,7 +1196,7 @@
 
 	/* Setup the kcache for this ALU instruction. This will start a new
 	 * ALU clause if needed. */
-	if ((r = r600_bc_alloc_kcache_lines(bc, nalu, type))) {
+	if ((r = r600_bytecode_alloc_kcache_lines(bc, nalu, type))) {
 		free(nalu);
 		return r;
 	}
@@ -1198,7 +1210,7 @@
 			bc->ngpr = nalu->src[i].sel + 1;
 		}
 		if (nalu->src[i].sel == V_SQ_ALU_SRC_LITERAL)
-			r600_bc_special_constants(nalu->src[i].value,
+			r600_bytecode_special_constants(nalu->src[i].value,
 				&nalu->src[i].sel, &nalu->src[i].neg);
 	}
 	if (nalu->dst.sel >= bc->ngpr) {
@@ -1213,7 +1225,7 @@
 	if (nalu->last) {
 		uint32_t literal[4];
 		unsigned nliteral;
-		struct r600_bc_alu *slots[5];
+		struct r600_bytecode_alu *slots[5];
 		int max_slots = bc->chip_class == CAYMAN ? 4 : 5;
 		r = assign_alu_units(bc, bc->cf_last->curr_bs_head, slots);
 		if (r)
@@ -1237,7 +1249,7 @@
 
 		for (i = 0, nliteral = 0; i < max_slots; i++) {
 			if (slots[i]) {
-				r = r600_bc_alu_nliterals(bc, slots[i], literal, &nliteral);
+				r = r600_bytecode_alu_nliterals(bc, slots[i], literal, &nliteral);
 				if (r)
 					return r;
 			}
@@ -1257,12 +1269,12 @@
 	return 0;
 }
 
-int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu)
+int r600_bytecode_add_alu(struct r600_bytecode *bc, const struct r600_bytecode_alu *alu)
 {
-	return r600_bc_add_alu_type(bc, alu, BC_INST(bc, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
+	return r600_bytecode_add_alu_type(bc, alu, BC_INST(bc, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
 }
 
-static unsigned r600_bc_num_tex_and_vtx_instructions(const struct r600_bc *bc)
+static unsigned r600_bytecode_num_tex_and_vtx_instructions(const struct r600_bytecode *bc)
 {
 	switch (bc->chip_class) {
 	case R600:
@@ -1281,7 +1293,7 @@
 	}
 }
 
-static inline boolean last_inst_was_vtx_fetch(struct r600_bc *bc)
+static inline boolean last_inst_was_vtx_fetch(struct r600_bytecode *bc)
 {
 	if (bc->chip_class == CAYMAN) {
 		if (bc->cf_last->inst != CM_V_SQ_CF_WORD1_SQ_CF_INST_TC)
@@ -1294,20 +1306,20 @@
 	return FALSE;
 }
 
-int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx)
+int r600_bytecode_add_vtx(struct r600_bytecode *bc, const struct r600_bytecode_vtx *vtx)
 {
-	struct r600_bc_vtx *nvtx = r600_bc_vtx();
+	struct r600_bytecode_vtx *nvtx = r600_bytecode_vtx();
 	int r;
 
 	if (nvtx == NULL)
 		return -ENOMEM;
-	memcpy(nvtx, vtx, sizeof(struct r600_bc_vtx));
+	memcpy(nvtx, vtx, sizeof(struct r600_bytecode_vtx));
 
 	/* cf can contains only alu or only vtx or only tex */
 	if (bc->cf_last == NULL ||
 	    last_inst_was_vtx_fetch(bc) ||
 	    bc->force_add_cf) {
-		r = r600_bc_add_cf(bc);
+		r = r600_bytecode_add_cf(bc);
 		if (r) {
 			free(nvtx);
 			return r;
@@ -1321,24 +1333,24 @@
 	/* each fetch use 4 dwords */
 	bc->cf_last->ndw += 4;
 	bc->ndw += 4;
-	if ((bc->cf_last->ndw / 4) >= r600_bc_num_tex_and_vtx_instructions(bc))
+	if ((bc->cf_last->ndw / 4) >= r600_bytecode_num_tex_and_vtx_instructions(bc))
 		bc->force_add_cf = 1;
 	return 0;
 }
 
-int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex)
+int r600_bytecode_add_tex(struct r600_bytecode *bc, const struct r600_bytecode_tex *tex)
 {
-	struct r600_bc_tex *ntex = r600_bc_tex();
+	struct r600_bytecode_tex *ntex = r600_bytecode_tex();
 	int r;
 
 	if (ntex == NULL)
 		return -ENOMEM;
-	memcpy(ntex, tex, sizeof(struct r600_bc_tex));
+	memcpy(ntex, tex, sizeof(struct r600_bytecode_tex));
 
 	/* we can't fetch data und use it as texture lookup address in the same TEX clause */
 	if (bc->cf_last != NULL &&
 		bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_TEX) {
-		struct r600_bc_tex *ttex;
+		struct r600_bytecode_tex *ttex;
 		LIST_FOR_EACH_ENTRY(ttex, &bc->cf_last->tex, list) {
 			if (ttex->dst_gpr == ntex->src_gpr) {
 				bc->force_add_cf = 1;
@@ -1354,7 +1366,7 @@
 	if (bc->cf_last == NULL ||
 		bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_TEX ||
 	        bc->force_add_cf) {
-		r = r600_bc_add_cf(bc);
+		r = r600_bytecode_add_cf(bc);
 		if (r) {
 			free(ntex);
 			return r;
@@ -1371,15 +1383,15 @@
 	/* each texture fetch use 4 dwords */
 	bc->cf_last->ndw += 4;
 	bc->ndw += 4;
-	if ((bc->cf_last->ndw / 4) >= r600_bc_num_tex_and_vtx_instructions(bc))
+	if ((bc->cf_last->ndw / 4) >= r600_bytecode_num_tex_and_vtx_instructions(bc))
 		bc->force_add_cf = 1;
 	return 0;
 }
 
-int r600_bc_add_cfinst(struct r600_bc *bc, int inst)
+int r600_bytecode_add_cfinst(struct r600_bytecode *bc, int inst)
 {
 	int r;
-	r = r600_bc_add_cf(bc);
+	r = r600_bytecode_add_cf(bc);
 	if (r)
 		return r;
 
@@ -1388,13 +1400,13 @@
 	return 0;
 }
 
-int cm_bc_add_cf_end(struct r600_bc *bc)
+int cm_bytecode_add_cf_end(struct r600_bytecode *bc)
 {
-	return r600_bc_add_cfinst(bc, CM_V_SQ_CF_WORD1_SQ_CF_INST_END);
+	return r600_bytecode_add_cfinst(bc, CM_V_SQ_CF_WORD1_SQ_CF_INST_END);
 }
 
 /* common to all 3 families */
-static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsigned id)
+static int r600_bytecode_vtx_build(struct r600_bytecode *bc, struct r600_bytecode_vtx *vtx, unsigned id)
 {
 	bc->bytecode[id] = S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id) |
 			S_SQ_VTX_WORD0_FETCH_TYPE(vtx->fetch_type) |
@@ -1423,7 +1435,7 @@
 }
 
 /* common to all 3 families */
-static int r600_bc_tex_build(struct r600_bc *bc, struct r600_bc_tex *tex, unsigned id)
+static int r600_bytecode_tex_build(struct r600_bytecode *bc, struct r600_bytecode_tex *tex, unsigned id)
 {
 	bc->bytecode[id++] = S_SQ_TEX_WORD0_TEX_INST(tex->inst) |
 				S_SQ_TEX_WORD0_RESOURCE_ID(tex->resource_id) |
@@ -1453,7 +1465,7 @@
 }
 
 /* r600 only, r700/eg bits in r700_asm.c */
-static int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id)
+static int r600_bytecode_alu_build(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, unsigned id)
 {
 	/* don't replace gpr by pv or ps for destination register */
 	bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) |
@@ -1494,7 +1506,7 @@
 	return 0;
 }
 
-static void r600_bc_cf_vtx_build(uint32_t *bytecode, const struct r600_bc_cf *cf)
+static void r600_bytecode_cf_vtx_build(uint32_t *bytecode, const struct r600_bytecode_cf *cf)
 {
 	*bytecode++ = S_SQ_CF_WORD0_ADDR(cf->addr >> 1);
 	*bytecode++ = S_SQ_CF_WORD1_CF_INST(cf->inst) |
@@ -1503,7 +1515,7 @@
 }
 
 /* common for r600/r700 - eg in eg_asm.c */
-static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
+static int r600_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf)
 {
 	unsigned id = cf->id;
 
@@ -1529,9 +1541,9 @@
 	case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
 	case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
 		if (bc->chip_class == R700)
-			r700_bc_cf_vtx_build(&bc->bytecode[id], cf);
+			r700_bytecode_cf_vtx_build(&bc->bytecode[id], cf);
 		else
-			r600_bc_cf_vtx_build(&bc->bytecode[id], cf);
+			r600_bytecode_cf_vtx_build(&bc->bytecode[id], cf);
 		break;
 	case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
 	case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
@@ -1571,12 +1583,12 @@
 	return 0;
 }
 
-int r600_bc_build(struct r600_bc *bc)
+int r600_bytecode_build(struct r600_bytecode *bc)
 {
-	struct r600_bc_cf *cf;
-	struct r600_bc_alu *alu;
-	struct r600_bc_vtx *vtx;
-	struct r600_bc_tex *tex;
+	struct r600_bytecode_cf *cf;
+	struct r600_bytecode_alu *alu;
+	struct r600_bytecode_vtx *vtx;
+	struct r600_bytecode_tex *tex;
 	uint32_t literal[4];
 	unsigned nliteral;
 	unsigned addr;
@@ -1636,9 +1648,9 @@
 	LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
 		addr = cf->addr;
 		if (bc->chip_class >= EVERGREEN)
-			r = eg_bc_cf_build(bc, cf);
+			r = eg_bytecode_cf_build(bc, cf);
 		else
-			r = r600_bc_cf_build(bc, cf);
+			r = r600_bytecode_cf_build(bc, cf);
 		if (r)
 			return r;
 		switch (cf->inst) {
@@ -1649,18 +1661,18 @@
 			nliteral = 0;
 			memset(literal, 0, sizeof(literal));
 			LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
-				r = r600_bc_alu_nliterals(bc, alu, literal, &nliteral);
+				r = r600_bytecode_alu_nliterals(bc, alu, literal, &nliteral);
 				if (r)
 					return r;
-				r600_bc_alu_adjust_literals(bc, alu, literal, nliteral);
+				r600_bytecode_alu_adjust_literals(bc, alu, literal, nliteral);
 				switch(bc->chip_class) {
 				case R600:
-					r = r600_bc_alu_build(bc, alu, addr);
+					r = r600_bytecode_alu_build(bc, alu, addr);
 					break;
 				case R700:
 				case EVERGREEN: /* eg alu is same encoding as r700 */
 				case CAYMAN: /* eg alu is same encoding as r700 */
-					r = r700_bc_alu_build(bc, alu, addr);
+					r = r700_bytecode_alu_build(bc, alu, addr);
 					break;
 				default:
 					R600_ERR("unknown chip class %d.\n", bc->chip_class);
@@ -1681,7 +1693,7 @@
 		case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
 		case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
 			LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
-				r = r600_bc_vtx_build(bc, vtx, addr);
+				r = r600_bytecode_vtx_build(bc, vtx, addr);
 				if (r)
 					return r;
 				addr += 4;
@@ -1690,14 +1702,14 @@
 		case V_SQ_CF_WORD1_SQ_CF_INST_TEX:
 			if (bc->chip_class == CAYMAN) {
 				LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
-					r = r600_bc_vtx_build(bc, vtx, addr);
+					r = r600_bytecode_vtx_build(bc, vtx, addr);
 					if (r)
 						return r;
 					addr += 4;
 				}
 			}
 			LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) {
-				r = r600_bc_tex_build(bc, tex, addr);
+				r = r600_bytecode_tex_build(bc, tex, addr);
 				if (r)
 					return r;
 				addr += 4;
@@ -1726,17 +1738,17 @@
 	return 0;
 }
 
-void r600_bc_clear(struct r600_bc *bc)
+void r600_bytecode_clear(struct r600_bytecode *bc)
 {
-	struct r600_bc_cf *cf = NULL, *next_cf;
+	struct r600_bytecode_cf *cf = NULL, *next_cf;
 
 	free(bc->bytecode);
 	bc->bytecode = NULL;
 
 	LIST_FOR_EACH_ENTRY_SAFE(cf, next_cf, &bc->cf, list) {
-		struct r600_bc_alu *alu = NULL, *next_alu;
-		struct r600_bc_tex *tex = NULL, *next_tex;
-		struct r600_bc_tex *vtx = NULL, *next_vtx;
+		struct r600_bytecode_alu *alu = NULL, *next_alu;
+		struct r600_bytecode_tex *tex = NULL, *next_tex;
+		struct r600_bytecode_tex *vtx = NULL, *next_vtx;
 
 		LIST_FOR_EACH_ENTRY_SAFE(alu, next_alu, &cf->alu, list) {
 			free(alu);
@@ -1762,12 +1774,12 @@
 	LIST_INITHEAD(&cf->list);
 }
 
-void r600_bc_dump(struct r600_bc *bc)
+void r600_bytecode_dump(struct r600_bytecode *bc)
 {
-	struct r600_bc_cf *cf = NULL;
-	struct r600_bc_alu *alu = NULL;
-	struct r600_bc_vtx *vtx = NULL;
-	struct r600_bc_tex *tex = NULL;
+	struct r600_bytecode_cf *cf = NULL;
+	struct r600_bytecode_alu *alu = NULL;
+	struct r600_bytecode_vtx *vtx = NULL;
+	struct r600_bytecode_tex *tex = NULL;
 
 	unsigned i, id;
 	uint32_t literal[4];
@@ -1866,7 +1878,7 @@
 		id = cf->addr;
 		nliteral = 0;
 		LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
-			r600_bc_alu_nliterals(bc, alu, literal, &nliteral);
+			r600_bytecode_alu_nliterals(bc, alu, literal, &nliteral);
 
 			fprintf(stderr, "%04d %08X   ", id, bc->bytecode[id]);
 			fprintf(stderr, "SRC0(SEL:%d ", alu->src[0].sel);
@@ -2120,8 +2132,8 @@
 {
 	static int dump_shaders = -1;
 
-	struct r600_bc bc;
-	struct r600_bc_vtx vtx;
+	struct r600_bytecode bc;
+	struct r600_bytecode_vtx vtx;
 	struct pipe_vertex_element *elements = ve->elements;
 	const struct util_format_description *desc;
 	unsigned fetch_resource_start = rctx->chip_class >= EVERGREEN ? 0 : 160;
@@ -2142,11 +2154,11 @@
 	}
 
 	memset(&bc, 0, sizeof(bc));
-	r600_bc_init(&bc, rctx->chip_class);
+	r600_bytecode_init(&bc, rctx->chip_class);
 
 	for (i = 0; i < ve->count; i++) {
 		if (elements[i].instance_divisor > 1) {
-			struct r600_bc_alu alu;
+			struct r600_bytecode_alu alu;
 
 			memset(&alu, 0, sizeof(alu));
 			alu.inst = BC_INST(&bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
@@ -2161,8 +2173,8 @@
 			alu.dst.write = 1;
 			alu.last = 1;
 
-			if ((r = r600_bc_add_alu(&bc, &alu))) {
-				r600_bc_clear(&bc);
+			if ((r = r600_bytecode_add_alu(&bc, &alu))) {
+				r600_bytecode_clear(&bc);
 				return r;
 			}
 		}
@@ -2173,7 +2185,7 @@
 		r600_vertex_data_type(ve->elements[i].src_format, &format, &num_format, &format_comp, &endian);
 		desc = util_format_description(ve->elements[i].src_format);
 		if (desc == NULL) {
-			r600_bc_clear(&bc);
+			r600_bytecode_clear(&bc);
 			R600_ERR("unknown format %d\n", ve->elements[i].src_format);
 			return -EINVAL;
 		}
@@ -2198,16 +2210,16 @@
 		vtx.offset = elements[i].src_offset;
 		vtx.endian = endian;
 
-		if ((r = r600_bc_add_vtx(&bc, &vtx))) {
-			r600_bc_clear(&bc);
+		if ((r = r600_bytecode_add_vtx(&bc, &vtx))) {
+			r600_bytecode_clear(&bc);
 			return r;
 		}
 	}
 
-	r600_bc_add_cfinst(&bc, BC_INST(&bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN));
+	r600_bytecode_add_cfinst(&bc, BC_INST(&bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN));
 
-	if ((r = r600_bc_build(&bc))) {
-		r600_bc_clear(&bc);
+	if ((r = r600_bytecode_build(&bc))) {
+		r600_bytecode_clear(&bc);
 		return r;
 	}
 
@@ -2216,7 +2228,7 @@
 
 	if (dump_shaders) {
 		fprintf(stderr, "--------------------------------------------------------------\n");
-		r600_bc_dump(&bc);
+		r600_bytecode_dump(&bc);
 		fprintf(stderr, "______________________________________________________________\n");
 	}
 
@@ -2225,14 +2237,14 @@
 	/* use PIPE_BIND_VERTEX_BUFFER so we use the cache buffer manager */
 	ve->fetch_shader = r600_bo(rctx->radeon, ve->fs_size, 256, PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_IMMUTABLE);
 	if (ve->fetch_shader == NULL) {
-		r600_bc_clear(&bc);
+		r600_bytecode_clear(&bc);
 		return -ENOMEM;
 	}
 
-	bytecode = r600_bo_map(rctx->radeon, ve->fetch_shader, 0, NULL);
+	bytecode = r600_bo_map(rctx->radeon, ve->fetch_shader, rctx->ctx.cs, PIPE_TRANSFER_WRITE);
 	if (bytecode == NULL) {
-		r600_bc_clear(&bc);
-		r600_bo_reference(rctx->radeon, &ve->fetch_shader, NULL);
+		r600_bytecode_clear(&bc);
+		r600_bo_reference(&ve->fetch_shader, NULL);
 		return -ENOMEM;
 	}
 
@@ -2245,7 +2257,7 @@
 	}
 
 	r600_bo_unmap(rctx->radeon, ve->fetch_shader);
-	r600_bc_clear(&bc);
+	r600_bytecode_clear(&bc);
 
 	if (rctx->chip_class >= EVERGREEN)
 		evergreen_fetch_shader(&rctx->context, ve);
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index cbdaacf..61caa4b 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -23,12 +23,10 @@
 #ifndef R600_ASM_H
 #define R600_ASM_H
 
-#include "util/u_double_list.h"
-
 struct r600_vertex_element;
 struct r600_pipe_context;
 
-struct r600_bc_alu_src {
+struct r600_bytecode_alu_src {
 	unsigned			sel;
 	unsigned			chan;
 	unsigned			neg;
@@ -37,7 +35,7 @@
 	uint32_t			value;
 };
 
-struct r600_bc_alu_dst {
+struct r600_bytecode_alu_dst {
 	unsigned			sel;
 	unsigned			chan;
 	unsigned			clamp;
@@ -45,10 +43,10 @@
 	unsigned			rel;
 };
 
-struct r600_bc_alu {
+struct r600_bytecode_alu {
 	struct list_head		list;
-	struct r600_bc_alu_src		src[3];
-	struct r600_bc_alu_dst		dst;
+	struct r600_bytecode_alu_src		src[3];
+	struct r600_bytecode_alu_dst		dst;
 	unsigned			inst;
 	unsigned			last;
 	unsigned			is_op3;
@@ -58,7 +56,7 @@
 	unsigned			omod;
 };
 
-struct r600_bc_tex {
+struct r600_bytecode_tex {
 	struct list_head		list;
 	unsigned			inst;
 	unsigned			resource_id;
@@ -85,7 +83,7 @@
 	unsigned			src_sel_w;
 };
 
-struct r600_bc_vtx {
+struct r600_bytecode_vtx {
 	struct list_head		list;
 	unsigned			inst;
 	unsigned			fetch_type;
@@ -107,7 +105,7 @@
 	unsigned			endian;
 };
 
-struct r600_bc_output {
+struct r600_bytecode_output {
 	unsigned			array_base;
 	unsigned			type;
 	unsigned			end_of_program;
@@ -122,13 +120,13 @@
 	unsigned			barrier;
 };
 
-struct r600_bc_kcache {
+struct r600_bytecode_kcache {
 	unsigned			bank;
 	unsigned			mode;
 	unsigned			addr;
 };
 
-struct r600_bc_cf {
+struct r600_bytecode_cf {
 	struct list_head		list;
 	unsigned			inst;
 	unsigned			addr;
@@ -137,15 +135,15 @@
 	unsigned			cond;
 	unsigned			pop_count;
 	unsigned			cf_addr; /* control flow addr */
-	struct r600_bc_kcache		kcache[2];
+	struct r600_bytecode_kcache		kcache[2];
 	unsigned			r6xx_uses_waterfall;
 	struct list_head		alu;
 	struct list_head		tex;
 	struct list_head		vtx;
-	struct r600_bc_output		output;
-	struct r600_bc_alu		*curr_bs_head;
-	struct r600_bc_alu		*prev_bs_head;
-	struct r600_bc_alu		*prev2_bs_head;
+	struct r600_bytecode_output		output;
+	struct r600_bytecode_alu		*curr_bs_head;
+	struct r600_bytecode_alu		*prev_bs_head;
+	struct r600_bytecode_alu		*prev2_bs_head;
 };
 
 #define FC_NONE				0
@@ -157,8 +155,8 @@
 
 struct r600_cf_stack_entry {
 	int				type;
-	struct r600_bc_cf		*start;
-	struct r600_bc_cf		**mid; /* used to store the else point */
+	struct r600_bytecode_cf		*start;
+	struct r600_bytecode_cf		**mid; /* used to store the else point */
 	int				num_mid;
 };
 
@@ -170,11 +168,11 @@
 	int				max;
 };
 
-struct r600_bc {
+struct r600_bytecode {
 	enum chip_class			chip_class;
 	int				type;
 	struct list_head		cf;
-	struct r600_bc_cf		*cf_last;
+	struct r600_bytecode_cf		*cf_last;
 	unsigned			ndw;
 	unsigned			ncf;
 	unsigned			ngpr;
@@ -189,27 +187,27 @@
 };
 
 /* eg_asm.c */
-int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf);
+int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf);
 
 /* r600_asm.c */
-void r600_bc_init(struct r600_bc *bc, enum chip_class chip_class);
-void r600_bc_clear(struct r600_bc *bc);
-int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu);
-int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx);
-int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex);
-int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output);
-int r600_bc_build(struct r600_bc *bc);
-int r600_bc_add_cfinst(struct r600_bc *bc, int inst);
-int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int type);
-void r600_bc_special_constants(u32 value, unsigned *sel, unsigned *neg);
-void r600_bc_dump(struct r600_bc *bc);
+void r600_bytecode_init(struct r600_bytecode *bc, enum chip_class chip_class);
+void r600_bytecode_clear(struct r600_bytecode *bc);
+int r600_bytecode_add_alu(struct r600_bytecode *bc, const struct r600_bytecode_alu *alu);
+int r600_bytecode_add_vtx(struct r600_bytecode *bc, const struct r600_bytecode_vtx *vtx);
+int r600_bytecode_add_tex(struct r600_bytecode *bc, const struct r600_bytecode_tex *tex);
+int r600_bytecode_add_output(struct r600_bytecode *bc, const struct r600_bytecode_output *output);
+int r600_bytecode_build(struct r600_bytecode *bc);
+int r600_bytecode_add_cfinst(struct r600_bytecode *bc, int inst);
+int r600_bytecode_add_alu_type(struct r600_bytecode *bc, const struct r600_bytecode_alu *alu, int type);
+void r600_bytecode_special_constants(u32 value, unsigned *sel, unsigned *neg);
+void r600_bytecode_dump(struct r600_bytecode *bc);
 
-int cm_bc_add_cf_end(struct r600_bc *bc);
+int cm_bytecode_add_cf_end(struct r600_bytecode *bc);
 
 int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, struct r600_vertex_element *ve);
 
 /* r700_asm.c */
-void r700_bc_cf_vtx_build(uint32_t *bytecode, const struct r600_bc_cf *cf);
-int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id);
+void r700_bytecode_cf_vtx_build(uint32_t *bytecode, const struct r600_bytecode_cf *cf);
+int r700_bytecode_alu_build(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, unsigned id);
 
 #endif
diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c
index 35e68b6..2f7e871 100644
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -111,7 +111,7 @@
 	if (!texture->dirty_db)
 		return;
 
-	surf_tmpl.format = texture->resource.b.b.b.format;
+	surf_tmpl.format = texture->real_format;
 	surf_tmpl.u.tex.level = level;
 	surf_tmpl.u.tex.first_layer = 0;
 	surf_tmpl.u.tex.last_layer = 0;
@@ -119,7 +119,7 @@
 
 	zsurf = ctx->create_surface(ctx, &texture->resource.b.b.b, &surf_tmpl);
 
-	surf_tmpl.format = ((struct pipe_resource*)texture->flushed_depth_texture)->format;
+	surf_tmpl.format = texture->flushed_depth_texture->real_format;
 	surf_tmpl.usage = PIPE_BIND_RENDER_TARGET;
 	cbsurf = ctx->create_surface(ctx,
 			(struct pipe_resource*)texture->flushed_depth_texture, &surf_tmpl);
@@ -233,8 +233,8 @@
 	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
 
 	r600_blitter_begin(ctx, R600_COPY);
-	util_blitter_copy_region(rctx->blitter, dst, dst_level, dstx, dsty, dstz,
-				 src, src_level, src_box, TRUE);
+	util_blitter_copy_texture(rctx->blitter, dst, dst_level, dstx, dsty, dstz,
+				  src, src_level, src_box, TRUE);
 	r600_blitter_end(ctx);
 }
 
@@ -249,7 +249,7 @@
 				   struct texture_orig_info *orig)
 {
 	struct r600_resource_texture *rtex = (struct r600_resource_texture*)tex;
-	unsigned pixsize = util_format_get_blocksize(tex->format);
+	unsigned pixsize = util_format_get_blocksize(rtex->real_format);
 	int new_format;
 	int new_height, new_width;
 
@@ -269,7 +269,6 @@
 	tex->width0 = new_width;
 	tex->height0 = new_height;
 	tex->format = new_format;
-
 }
 
 static void r600_reset_blittable_to_compressed(struct pipe_resource *tex,
diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c
index 72f352d..ca2415a 100644
--- a/src/gallium/drivers/r600/r600_buffer.c
+++ b/src/gallium/drivers/r600/r600_buffer.c
@@ -33,11 +33,6 @@
 #include <util/u_memory.h>
 #include "util/u_upload_mgr.h"
 
-#include "state_tracker/drm_driver.h"
-
-#include <xf86drm.h>
-#include "radeon_drm.h"
-
 #include "r600.h"
 #include "r600_pipe.h"
 
@@ -48,7 +43,7 @@
 	struct r600_resource_buffer *rbuffer = r600_buffer(buf);
 
 	if (rbuffer->r.bo) {
-		r600_bo_reference((struct radeon*)screen->winsys, &rbuffer->r.bo, NULL);
+		r600_bo_reference(&rbuffer->r.bo, NULL);
 	}
 	rbuffer->r.bo = NULL;
 	util_slab_free(&rscreen->pool_buffers, rbuffer);
@@ -81,12 +76,13 @@
 				      struct pipe_transfer *transfer)
 {
 	struct r600_resource_buffer *rbuffer = r600_buffer(transfer->resource);
+	struct r600_pipe_context *rctx = (struct r600_pipe_context*)pipe;
 	uint8_t *data;
 
 	if (rbuffer->r.b.user_ptr)
 		return (uint8_t*)rbuffer->r.b.user_ptr + transfer->box.x;
 
-	data = r600_bo_map((struct radeon*)pipe->winsys, rbuffer->r.bo, transfer->usage, pipe);
+	data = r600_bo_map(rctx->screen->radeon, rbuffer->r.bo, rctx->ctx.cs, transfer->usage);
 	if (!data)
 		return NULL;
 
@@ -97,12 +93,13 @@
 					struct pipe_transfer *transfer)
 {
 	struct r600_resource_buffer *rbuffer = r600_buffer(transfer->resource);
+	struct r600_pipe_context *rctx = (struct r600_pipe_context*)pipe;
 
 	if (rbuffer->r.b.user_ptr)
 		return;
 
 	if (rbuffer->r.bo)
-		r600_bo_unmap((struct radeon*)pipe->winsys, rbuffer->r.bo);
+		r600_bo_unmap(rctx->screen->radeon, rbuffer->r.bo);
 }
 
 static void r600_buffer_transfer_flush_region(struct pipe_context *pipe,
@@ -127,20 +124,20 @@
 						unsigned stride,
 						unsigned layer_stride)
 {
-	struct radeon *ws = (struct radeon*)pipe->winsys;
+	struct r600_pipe_context *rctx = (struct r600_pipe_context*)pipe;
+	struct radeon *radeon = rctx->screen->radeon;
 	struct r600_resource_buffer *rbuffer = r600_buffer(resource);
 	uint8_t *map = NULL;
 
 	assert(rbuffer->r.b.user_ptr == NULL);
 
-	map = r600_bo_map(ws, rbuffer->r.bo,
-			  PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD | usage,
-			  pipe);
+	map = r600_bo_map(radeon, rbuffer->r.bo, rctx->ctx.cs,
+			  PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD | usage);
 
 	memcpy(map + box->x, data, box->width);
 
 	if (rbuffer->r.bo)
-		r600_bo_unmap(ws, rbuffer->r.bo);
+		r600_bo_unmap(radeon, rbuffer->r.bo);
 }
 
 static const struct u_resource_vtbl r600_buffer_vtbl =
@@ -175,7 +172,7 @@
 	rbuffer->r.size = rbuffer->r.b.b.b.width0;
 	rbuffer->r.bo_size = rbuffer->r.size;
 
-	bo = r600_bo((struct radeon*)screen->winsys,
+	bo = r600_bo(rscreen->radeon,
 		     rbuffer->r.b.b.b.width0,
 		     alignment, rbuffer->r.b.b.b.bind,
 		     rbuffer->r.b.b.b.usage);
@@ -219,18 +216,18 @@
 struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen,
 					      struct winsys_handle *whandle)
 {
-	struct radeon *rw = (struct radeon*)screen->winsys;
+	struct radeon *rw = ((struct r600_screen*)screen)->radeon;
 	struct r600_resource *rbuffer;
 	struct r600_bo *bo = NULL;
 
-	bo = r600_bo_handle(rw, whandle->handle, NULL);
+	bo = r600_bo_handle(rw, whandle, NULL, NULL);
 	if (bo == NULL) {
 		return NULL;
 	}
 
 	rbuffer = CALLOC_STRUCT(r600_resource);
 	if (rbuffer == NULL) {
-		r600_bo_reference(rw, &bo, NULL);
+		r600_bo_reference(&bo, NULL);
 		return NULL;
 	}
 
diff --git a/src/gallium/drivers/r600/r600_formats.h b/src/gallium/drivers/r600/r600_formats.h
index 1c1089d..b822cba 100644
--- a/src/gallium/drivers/r600/r600_formats.h
+++ b/src/gallium/drivers/r600/r600_formats.h
@@ -99,7 +99,6 @@
 
 	/* No fixed, no double. */
 	if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN ||
-	    desc->channel[i].type == UTIL_FORMAT_TYPE_FIXED ||
 	    (desc->channel[i].size == 64 &&
 	     desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT))
 		return false;
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index 615f068..ceaebbb 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -47,12 +47,14 @@
 #include "r600_resource.h"
 #include "r600_shader.h"
 #include "r600_pipe.h"
+#include "../../winsys/r600/drm/r600_drm_public.h"
 
 /*
  * pipe_context
  */
 static struct r600_fence *r600_create_fence(struct r600_pipe_context *ctx)
 {
+	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
 	struct r600_fence *fence = NULL;
 
 	if (!ctx->fences.bo) {
@@ -62,7 +64,8 @@
 			R600_ERR("r600: failed to create bo for fence objects\n");
 			return NULL;
 		}
-		ctx->fences.data = r600_bo_map(ctx->radeon, ctx->fences.bo, PIPE_TRANSFER_UNSYNCHRONIZED, NULL);
+		ctx->fences.data = r600_bo_map(ctx->radeon, ctx->fences.bo, rctx->ctx.cs,
+					       PIPE_TRANSFER_UNSYNCHRONIZED | PIPE_TRANSFER_WRITE);
 	}
 
 	if (!LIST_IS_EMPTY(&ctx->fences.pool)) {
@@ -113,29 +116,28 @@
 	return fence;
 }
 
-static void r600_flush(struct pipe_context *ctx,
-			struct pipe_fence_handle **fence)
+
+void r600_flush(struct pipe_context *ctx, struct pipe_fence_handle **fence,
+		unsigned flags)
 {
 	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
 	struct r600_fence **rfence = (struct r600_fence**)fence;
 
-#if 0
-	static int dc = 0;
-	char dname[256];
-#endif
-
 	if (rfence)
 		*rfence = r600_create_fence(rctx);
 
-#if 0
-	sprintf(dname, "gallium-%08d.bof", dc);
-	if (dc < 20) {
-		r600_context_dump_bof(&rctx->ctx, dname);
-		R600_ERR("dumped %s\n", dname);
-	}
-	dc++;
-#endif
-	r600_context_flush(&rctx->ctx);
+	r600_context_flush(&rctx->ctx, flags);
+}
+
+static void r600_flush_from_st(struct pipe_context *ctx,
+			       struct pipe_fence_handle **fence)
+{
+	r600_flush(ctx, fence, 0);
+}
+
+static void r600_flush_from_winsys(void *ctx, unsigned flags)
+{
+	r600_flush((struct pipe_context*)ctx, NULL, flags);
 }
 
 static void r600_update_num_contexts(struct r600_screen *rscreen, int diff)
@@ -184,7 +186,7 @@
 		}
 
 		r600_bo_unmap(rctx->radeon, rctx->fences.bo);
-		r600_bo_reference(rctx->radeon, &rctx->fences.bo, NULL);
+		r600_bo_reference(&rctx->fences.bo, NULL);
 	}
 
 	r600_update_num_contexts(rctx->screen, -1);
@@ -206,7 +208,7 @@
 	rctx->context.screen = screen;
 	rctx->context.priv = priv;
 	rctx->context.destroy = r600_destroy_context;
-	rctx->context.flush = r600_flush;
+	rctx->context.flush = r600_flush_from_st;
 
 	/* Easy accessing of screen/winsys. */
 	rctx->screen = rscreen;
@@ -256,6 +258,8 @@
 		return NULL;
 	}
 
+	rctx->screen->ws->cs_set_flush_callback(rctx->ctx.cs, r600_flush_from_winsys, rctx);
+
 	util_slab_create(&rctx->pool_transfers,
 			 sizeof(struct pipe_transfer), 64,
 			 UTIL_SLAB_SINGLETHREADED);
@@ -269,6 +273,7 @@
 		r600_destroy_context(&rctx->context);
 		return NULL;
 	}
+	rctx->vbuf_mgr->caps.format_fixed32 = 0;
 
 	rctx->blitter = util_blitter_create(&rctx->context);
 	if (rctx->blitter == NULL) {
@@ -355,6 +360,7 @@
 	case PIPE_CAP_SM3:
 	case PIPE_CAP_SEAMLESS_CUBE_MAP:
 	case PIPE_CAP_FRAGMENT_COLOR_CLAMP_CONTROL:
+	case PIPE_CAP_PRIMITIVE_RESTART:
 		return 1;
 
 	/* Supported except the original R600. */
@@ -369,7 +375,6 @@
 
 	/* Unsupported features. */
 	case PIPE_CAP_STREAM_OUTPUT:
-	case PIPE_CAP_PRIMITIVE_RESTART:
 	case PIPE_CAP_TGSI_INSTANCEID:
 	case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
 	case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
@@ -481,6 +486,8 @@
 		return 1;
 	case PIPE_SHADER_CAP_SUBROUTINES:
 		return 0;
+	case PIPE_SHADER_CAP_INTEGERS:
+		return 0;
 	default:
 		return 0;
 	}
@@ -498,6 +505,8 @@
 	case PIPE_VIDEO_CAP_MAX_WIDTH:
 	case PIPE_VIDEO_CAP_MAX_HEIGHT:
 		return vl_video_buffer_max_size(screen);
+	case PIPE_VIDEO_CAP_NUM_BUFFERS_DESIRED:
+		return vl_num_buffers_desired(screen, profile);
 	default:
 		return 0;
 	}
@@ -510,7 +519,8 @@
 	if (rscreen == NULL)
 		return;
 
-	radeon_decref(rscreen->radeon);
+	radeon_destroy(rscreen->radeon);
+	rscreen->ws->destroy(rscreen->ws);
 
 	util_slab_destroy(&rscreen->pool_buffers);
 	pipe_mutex_destroy(rscreen->mutex_num_contexts);
@@ -574,17 +584,19 @@
 	return TRUE;
 }
 
-struct pipe_screen *r600_screen_create(struct radeon *radeon)
+struct pipe_screen *r600_screen_create(struct radeon_winsys *ws)
 {
 	struct r600_screen *rscreen;
+	struct radeon *radeon = radeon_create(ws);
 
 	rscreen = CALLOC_STRUCT(r600_screen);
 	if (rscreen == NULL) {
 		return NULL;
 	}
 
+	rscreen->ws = ws;
 	rscreen->radeon = radeon;
-	rscreen->screen.winsys = (struct pipe_winsys*)radeon;
+	rscreen->screen.winsys = (struct pipe_winsys*)ws;
 	rscreen->screen.destroy = r600_destroy_screen;
 	rscreen->screen.get_name = r600_get_name;
 	rscreen->screen.get_vendor = r600_get_vendor;
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 6f399ed..2747f54 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -26,6 +26,8 @@
 #ifndef R600_PIPE_H
 #define R600_PIPE_H
 
+#include "../../winsys/radeon/drm/radeon_winsys.h"
+
 #include <pipe/p_state.h>
 #include <pipe/p_screen.h>
 #include <pipe/p_context.h>
@@ -72,6 +74,7 @@
 
 struct r600_screen {
 	struct pipe_screen		screen;
+	struct radeon_winsys		*ws;
 	struct radeon			*radeon;
 	struct r600_tiling_info		*tiling_info;
 	struct util_slab_mempool	pool_buffers;
@@ -183,7 +186,7 @@
 	struct r600_pipe_state		*states[R600_PIPE_NSTATES];
 	struct r600_context		ctx;
 	struct r600_vertex_element	*vertex_elements;
-	struct r600_pipe_resource_state		fs_resource[PIPE_MAX_ATTRIBS];
+	struct r600_pipe_resource_state	fs_resource[PIPE_MAX_ATTRIBS];
 	struct pipe_framebuffer_state	framebuffer;
 	struct pipe_index_buffer	index_buffer;
 	unsigned			cb_target_mask;
@@ -247,7 +250,8 @@
 					 struct r600_pipe_resource_state *rstate);
 void evergreen_pipe_mod_buffer_resource(struct r600_pipe_resource_state *rstate,
 					struct r600_resource *rbuffer,
-					unsigned offset, unsigned stride);
+					unsigned offset, unsigned stride,
+					enum radeon_bo_usage usage);
 boolean evergreen_is_format_supported(struct pipe_screen *screen,
 				      enum pipe_format format,
 				      enum pipe_texture_target target,
@@ -270,6 +274,11 @@
 					      struct winsys_handle *whandle);
 void r600_upload_index_buffer(struct r600_pipe_context *rctx, struct r600_drawl *draw);
 
+
+/* r600_pipe.c */
+void r600_flush(struct pipe_context *ctx, struct pipe_fence_handle **fence,
+		unsigned flags);
+
 /* r600_query.c */
 void r600_init_query_functions(struct r600_pipe_context *rctx);
 
@@ -294,7 +303,8 @@
 				    struct r600_pipe_resource_state *rstate);
 void r600_pipe_mod_buffer_resource(struct r600_pipe_resource_state *rstate,
 				   struct r600_resource *rbuffer,
-				   unsigned offset, unsigned stride);
+				   unsigned offset, unsigned stride,
+				   enum radeon_bo_usage usage);
 void r600_adjust_gprs(struct r600_pipe_context *rctx);
 boolean r600_is_format_supported(struct pipe_screen *screen,
 				 enum pipe_format format,
diff --git a/src/gallium/drivers/r600/r600_public.h b/src/gallium/drivers/r600/r600_public.h
index f197020..e4fe23a 100644
--- a/src/gallium/drivers/r600/r600_public.h
+++ b/src/gallium/drivers/r600/r600_public.h
@@ -23,6 +23,8 @@
 #ifndef R600_PUBLIC_H
 #define R600_PUBLIC_H
 
-struct pipe_screen *r600_screen_create(struct radeon *radeon);
+struct radeon_winsys;
+
+struct pipe_screen *r600_screen_create(struct radeon_winsys *ws);
 
 #endif
diff --git a/src/gallium/drivers/r600/r600_resource.h b/src/gallium/drivers/r600/r600_resource.h
index 836e749..d9d29db 100644
--- a/src/gallium/drivers/r600/r600_resource.h
+++ b/src/gallium/drivers/r600/r600_resource.h
@@ -52,6 +52,12 @@
 
 struct r600_resource_texture {
 	struct r600_resource		resource;
+
+	/* If this resource is a depth-stencil buffer on evergreen, this contains
+	 * the depth part of the format. There is a separate stencil resource
+	 * for the stencil buffer below. */
+	enum pipe_format		real_format;
+
 	unsigned			offset[PIPE_MAX_TEXTURE_LEVELS];
 	unsigned			pitch_in_bytes[PIPE_MAX_TEXTURE_LEVELS];  /* transfer */
 	unsigned			pitch_in_blocks[PIPE_MAX_TEXTURE_LEVELS]; /* texture resource */
@@ -62,6 +68,7 @@
 	unsigned			tile_type;
 	unsigned			depth;
 	unsigned			dirty_db;
+	struct r600_resource_texture    *stencil; /* Stencil is in a separate buffer on Evergreen. */
 	struct r600_resource_texture	*flushed_depth_texture;
 	boolean				is_flushing_texture;
 
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 3e21ad1..c37bb72 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -86,7 +86,7 @@
 		if (shader->bo == NULL) {
 			return -ENOMEM;
 		}
-		ptr = (uint32_t*)r600_bo_map(rctx->radeon, shader->bo, 0, NULL);
+		ptr = (uint32_t*)r600_bo_map(rctx->radeon, shader->bo, rctx->ctx.cs, PIPE_TRANSFER_WRITE);
 		if (R600_BIG_ENDIAN) {
 			for (i = 0; i < rshader->bc.ndw; ++i) {
 				ptr[i] = bswap_32(rshader->bc.bytecode[i]);
@@ -140,13 +140,13 @@
 		R600_ERR("translation from TGSI failed !\n");
 		return r;
 	}
-	r = r600_bc_build(&shader->shader.bc);
+	r = r600_bytecode_build(&shader->shader.bc);
 	if (r) {
 		R600_ERR("building bytecode failed !\n");
 		return r;
 	}
 	if (dump_shaders) {
-		r600_bc_dump(&shader->shader.bc);
+		r600_bytecode_dump(&shader->shader.bc);
 		fprintf(stderr, "______________________________________________________________\n");
 	}
 	return r600_pipe_shader(ctx, shader);
@@ -154,10 +154,8 @@
 
 void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader)
 {
-	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
-
-	r600_bo_reference(rctx->radeon, &shader->bo, NULL);
-	r600_bc_clear(&shader->shader.bc);
+	r600_bo_reference(&shader->bo, NULL);
+	r600_bytecode_clear(&shader->shader.bc);
 
 	memset(&shader->shader,0,sizeof(struct r600_shader));
 }
@@ -185,7 +183,7 @@
 	unsigned				temp_reg;
 	unsigned				ar_reg;
 	struct r600_shader_tgsi_instruction	*inst_info;
-	struct r600_bc				*bc;
+	struct r600_bytecode				*bc;
 	struct r600_shader			*shader;
 	struct r600_shader_src			src[4];
 	u32					*literals;
@@ -246,7 +244,7 @@
 static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
 {
 	int i, r;
-	struct r600_bc_alu alu;
+	struct r600_bytecode_alu alu;
 	int gpr = 0, base_chan = 0;
 	int ij_index = 0;
 
@@ -272,7 +270,7 @@
 	base_chan = (2 * (ij_index % 2)) + 1;
 
 	for (i = 0; i < 8; i++) {
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 
 		if (i < 4)
 			alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW;
@@ -294,7 +292,7 @@
 		alu.bank_swizzle_force = SQ_ALU_VEC_210;
 		if ((i % 4) == 3)
 			alu.last = 1;
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 	}
@@ -332,6 +330,12 @@
 		ctx->shader->output[i].sid = d->Semantic.Index;
 		ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i;
 		ctx->shader->output[i].interpolate = d->Declaration.Interpolate;
+		if (ctx->type == TGSI_PROCESSOR_VERTEX) {
+			/* these don't count as vertex param exports */
+			if ((ctx->shader->output[i].name == TGSI_SEMANTIC_POSITION) ||
+			    (ctx->shader->output[i].name == TGSI_SEMANTIC_PSIZE))
+				ctx->shader->npos++;
+		}
 		break;
 	case TGSI_FILE_CONSTANT:
 	case TGSI_FILE_TEMPORARY:
@@ -341,8 +345,8 @@
 
 	case TGSI_FILE_SYSTEM_VALUE:
 		if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
-			struct r600_bc_alu alu;
-			memset(&alu, 0, sizeof(struct r600_bc_alu));
+			struct r600_bytecode_alu alu;
+			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 
 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT);
 			alu.src[0].sel = 0;
@@ -353,7 +357,7 @@
 			alu.dst.write = 1;
 			alu.last = 1;
 
-			if ((r = r600_bc_add_alu(ctx->bc, &alu)))
+			if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
 				return r;
 			break;
 		}
@@ -436,7 +440,7 @@
 			(tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) {
 
 			index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX;
-			r600_bc_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg);
+			r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg);
 			if (r600_src->sel != V_SQ_ALU_SRC_LITERAL)
 				return;
 		}
@@ -460,12 +464,12 @@
 
 static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg)
 {
-	struct r600_bc_vtx vtx;
+	struct r600_bytecode_vtx vtx;
 	unsigned int ar_reg;
 	int r;
 
 	if (offset) {
-		struct r600_bc_alu alu;
+		struct r600_bytecode_alu alu;
 
 		memset(&alu, 0, sizeof(alu));
 
@@ -479,7 +483,7 @@
 		alu.dst.write = 1;
 		alu.last = 1;
 
-		if ((r = r600_bc_add_alu(ctx->bc, &alu)))
+		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
 			return r;
 
 		ar_reg = dst_reg;
@@ -502,7 +506,7 @@
 	vtx.srf_mode_all = 1;		/* SRF_MODE_NO_ZERO */
 	vtx.endian = r600_endian_swap(32);
 
-	if ((r = r600_bc_add_vtx(ctx->bc, &vtx)))
+	if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx)))
 		return r;
 
 	return 0;
@@ -511,7 +515,7 @@
 static int tgsi_split_constant(struct r600_shader_ctx *ctx)
 {
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
-	struct r600_bc_alu alu;
+	struct r600_bytecode_alu alu;
 	int i, j, k, nconst, r;
 
 	for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
@@ -536,7 +540,7 @@
 		} else if (j > 0) {
 			int treg = r600_get_temp(ctx);
 			for (k = 0; k < 4; k++) {
-				memset(&alu, 0, sizeof(struct r600_bc_alu));
+				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
 				alu.src[0].sel = ctx->src[i].sel;
 				alu.src[0].chan = k;
@@ -546,7 +550,7 @@
 				alu.dst.write = 1;
 				if (k == 3)
 					alu.last = 1;
-				r = r600_bc_add_alu(ctx->bc, &alu);
+				r = r600_bytecode_add_alu(ctx->bc, &alu);
 				if (r)
 					return r;
 			}
@@ -562,7 +566,7 @@
 static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx)
 {
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
-	struct r600_bc_alu alu;
+	struct r600_bytecode_alu alu;
 	int i, j, k, nliteral, r;
 
 	for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
@@ -574,7 +578,7 @@
 		if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
 			int treg = r600_get_temp(ctx);
 			for (k = 0; k < 4; k++) {
-				memset(&alu, 0, sizeof(struct r600_bc_alu));
+				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
 				alu.src[0].sel = ctx->src[i].sel;
 				alu.src[0].chan = k;
@@ -584,7 +588,7 @@
 				alu.dst.write = 1;
 				if (k == 3)
 					alu.last = 1;
-				r = r600_bc_add_alu(ctx->bc, &alu);
+				r = r600_bytecode_add_alu(ctx->bc, &alu);
 				if (r)
 					return r;
 			}
@@ -602,14 +606,14 @@
 	struct tgsi_full_immediate *immediate;
 	struct tgsi_full_property *property;
 	struct r600_shader_ctx ctx;
-	struct r600_bc_output output[32];
+	struct r600_bytecode_output output[32];
 	unsigned output_done, noutput;
 	unsigned opcode;
 	int i, j, r = 0, pos0;
 
 	ctx.bc = &shader->bc;
 	ctx.shader = shader;
-	r600_bc_init(ctx.bc, rctx->chip_class);
+	r600_bytecode_init(ctx.bc, rctx->chip_class);
 	ctx.tokens = tokens;
 	tgsi_scan_shader(tokens, &ctx.info);
 	tgsi_parse_init(&ctx.parse, tokens);
@@ -649,18 +653,18 @@
 	if (ctx.type == TGSI_PROCESSOR_VERTEX) {
 		ctx.file_offset[TGSI_FILE_INPUT] = 1;
 		if (ctx.bc->chip_class >= EVERGREEN) {
-			r600_bc_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
+			r600_bytecode_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
 		} else {
-			r600_bc_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
+			r600_bytecode_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
 		}
 	}
 	if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chip_class >= EVERGREEN) {
 		ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
 	}
 	ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
-						ctx.info.file_count[TGSI_FILE_INPUT];
+						ctx.info.file_max[TGSI_FILE_INPUT] + 1;
 	ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
-						ctx.info.file_count[TGSI_FILE_OUTPUT];
+						ctx.info.file_max[TGSI_FILE_OUTPUT] + 1;
 
 	/* Outside the GPR range. This will be translated to one of the
 	 * kcache banks later. */
@@ -668,7 +672,7 @@
 
 	ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
 	ctx.ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
-			ctx.info.file_count[TGSI_FILE_TEMPORARY];
+			ctx.info.file_max[TGSI_FILE_TEMPORARY] + 1;
 	ctx.temp_reg = ctx.ar_reg + 1;
 
 	ctx.nliterals = 0;
@@ -742,8 +746,8 @@
 
 				int j;
 				for (j = 0; j < 4; j++) {
-					struct r600_bc_alu alu;
-					memset(&alu, 0, sizeof(struct r600_bc_alu));
+					struct r600_bytecode_alu alu;
+					memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 
 					/* MOV_SAT R, R */
 					alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
@@ -757,7 +761,7 @@
 					if (j == 3) {
 						alu.last = 1;
 					}
-					r = r600_bc_add_alu(ctx.bc, &alu);
+					r = r600_bytecode_add_alu(ctx.bc, &alu);
 					if (r)
 						return r;
 				}
@@ -768,7 +772,7 @@
 	/* export output */
 	j = 0;
 	for (i = 0, pos0 = 0; i < noutput; i++) {
-		memset(&output[i], 0, sizeof(struct r600_bc_output));
+		memset(&output[i], 0, sizeof(struct r600_bytecode_output));
 		output[i + j].gpr = shader->output[i].gpr;
 		output[i + j].elem_size = 3;
 		output[i + j].swizzle_x = 0;
@@ -801,7 +805,7 @@
 				output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
 				if (shader->fs_write_all && (rctx->chip_class >= EVERGREEN)) {
 					for (j = 1; j < shader->nr_cbufs; j++) {
-						memset(&output[i + j], 0, sizeof(struct r600_bc_output));
+						memset(&output[i + j], 0, sizeof(struct r600_bytecode_output));
 						output[i + j].gpr = shader->output[i].gpr;
 						output[i + j].elem_size = 3;
 						output[i + j].swizzle_x = 0;
@@ -850,7 +854,7 @@
 			}
 		}
 		if (!pos0) {
-			memset(&output[i], 0, sizeof(struct r600_bc_output));
+			memset(&output[i], 0, sizeof(struct r600_bytecode_output));
 			output[i].gpr = 0;
 			output[i].elem_size = 3;
 			output[i].swizzle_x = 0;
@@ -867,7 +871,7 @@
 	}
 	/* add fake pixel export */
 	if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) {
-		memset(&output[0], 0, sizeof(struct r600_bc_output));
+		memset(&output[0], 0, sizeof(struct r600_bytecode_output));
 		output[0].gpr = 0;
 		output[0].elem_size = 3;
 		output[0].swizzle_x = 7;
@@ -895,13 +899,13 @@
 	}
 	/* add output to bytecode */
 	for (i = 0; i < noutput; i++) {
-		r = r600_bc_add_output(ctx.bc, &output[i]);
+		r = r600_bytecode_add_output(ctx.bc, &output[i]);
 		if (r)
 			goto out_err;
 	}
 	/* add program end */
 	if (ctx.bc->chip_class == CAYMAN)
-		cm_bc_add_cf_end(ctx.bc);
+		cm_bytecode_add_cf_end(ctx.bc);
 
 	free(ctx.literals);
 	tgsi_parse_free(&ctx.parse);
@@ -924,7 +928,7 @@
 	return 0;
 }
 
-static void r600_bc_src(struct r600_bc_alu_src *bc_src,
+static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src,
 			const struct r600_shader_src *shader_src,
 			unsigned chan)
 {
@@ -936,13 +940,13 @@
 	bc_src->value = shader_src->value[bc_src->chan];
 }
 
-static void r600_bc_src_set_abs(struct r600_bc_alu_src *bc_src)
+static void r600_bytecode_src_set_abs(struct r600_bytecode_alu_src *bc_src)
 {
 	bc_src->abs = 1;
 	bc_src->neg = 0;
 }
 
-static void r600_bc_src_toggle_neg(struct r600_bc_alu_src *bc_src)
+static void r600_bytecode_src_toggle_neg(struct r600_bytecode_alu_src *bc_src)
 {
 	bc_src->neg = !bc_src->neg;
 }
@@ -950,7 +954,7 @@
 static void tgsi_dst(struct r600_shader_ctx *ctx,
 		     const struct tgsi_full_dst_register *tgsi_dst,
 		     unsigned swizzle,
-		     struct r600_bc_alu_dst *r600_dst)
+		     struct r600_bytecode_alu_dst *r600_dst)
 {
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
 
@@ -980,7 +984,7 @@
 static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
 {
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
-	struct r600_bc_alu alu;
+	struct r600_bytecode_alu alu;
 	int i, j, r;
 	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
 
@@ -988,25 +992,25 @@
 		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
 			continue;
 
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
 
 		alu.inst = ctx->inst_info->r600_opcode;
 		if (!swap) {
 			for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
-				r600_bc_src(&alu.src[j], &ctx->src[j], i);
+				r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
 			}
 		} else {
-			r600_bc_src(&alu.src[0], &ctx->src[1], i);
-			r600_bc_src(&alu.src[1], &ctx->src[0], i);
+			r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
+			r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
 		}
 		/* handle some special cases */
 		switch (ctx->inst_info->tgsi_opcode) {
 		case TGSI_OPCODE_SUB:
-			r600_bc_src_toggle_neg(&alu.src[1]);
+			r600_bytecode_src_toggle_neg(&alu.src[1]);
 			break;
 		case TGSI_OPCODE_ABS:
-			r600_bc_src_set_abs(&alu.src[0]);
+			r600_bytecode_src_set_abs(&alu.src[0]);
 			break;
 		default:
 			break;
@@ -1014,7 +1018,7 @@
 		if (i == lasti) {
 			alu.last = 1;
 		}
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 	}
@@ -1035,21 +1039,21 @@
 {
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
 	int i, j, r;
-	struct r600_bc_alu alu;
+	struct r600_bytecode_alu alu;
 	int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
 	
 	for (i = 0 ; i < last_slot; i++) {
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 		alu.inst = ctx->inst_info->r600_opcode;
 		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
-			r600_bc_src(&alu.src[j], &ctx->src[j], 0);
+			r600_bytecode_src(&alu.src[j], &ctx->src[j], 0);
 		}
 		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
 		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
 
 		if (i == last_slot - 1)
 			alu.last = 1;
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 	}
@@ -1068,9 +1072,9 @@
 	static float neg_pi = -3.1415926535;
 
 	int r;
-	struct r600_bc_alu alu;
+	struct r600_bytecode_alu alu;
 
-	memset(&alu, 0, sizeof(struct r600_bc_alu));
+	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
 	alu.is_op3 = 1;
 
@@ -1078,7 +1082,7 @@
 	alu.dst.sel = ctx->temp_reg;
 	alu.dst.write = 1;
 
-	r600_bc_src(&alu.src[0], &ctx->src[0], 0);
+	r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
 
 	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
 	alu.src[1].chan = 0;
@@ -1086,11 +1090,11 @@
 	alu.src[2].sel = V_SQ_ALU_SRC_0_5;
 	alu.src[2].chan = 0;
 	alu.last = 1;
-	r = r600_bc_add_alu(ctx->bc, &alu);
+	r = r600_bytecode_add_alu(ctx->bc, &alu);
 	if (r)
 		return r;
 
-	memset(&alu, 0, sizeof(struct r600_bc_alu));
+	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
 
 	alu.dst.chan = 0;
@@ -1100,11 +1104,11 @@
 	alu.src[0].sel = ctx->temp_reg;
 	alu.src[0].chan = 0;
 	alu.last = 1;
-	r = r600_bc_add_alu(ctx->bc, &alu);
+	r = r600_bytecode_add_alu(ctx->bc, &alu);
 	if (r)
 		return r;
 
-	memset(&alu, 0, sizeof(struct r600_bc_alu));
+	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
 	alu.is_op3 = 1;
 
@@ -1130,7 +1134,7 @@
 	}
 
 	alu.last = 1;
-	r = r600_bc_add_alu(ctx->bc, &alu);
+	r = r600_bytecode_add_alu(ctx->bc, &alu);
 	if (r)
 		return r;
 	return 0;
@@ -1139,7 +1143,7 @@
 static int cayman_trig(struct r600_shader_ctx *ctx)
 {
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
-	struct r600_bc_alu alu;
+	struct r600_bytecode_alu alu;
 	int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
 	int i, r;
 
@@ -1149,7 +1153,7 @@
 
 
 	for (i = 0; i < last_slot; i++) {
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 		alu.inst = ctx->inst_info->r600_opcode;
 		alu.dst.chan = i;
 
@@ -1160,7 +1164,7 @@
 		alu.src[0].chan = 0;
 		if (i == last_slot - 1)
 			alu.last = 1;
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 	}
@@ -1170,7 +1174,7 @@
 static int tgsi_trig(struct r600_shader_ctx *ctx)
 {
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
-	struct r600_bc_alu alu;
+	struct r600_bytecode_alu alu;
 	int i, r;
 	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
 
@@ -1178,7 +1182,7 @@
 	if (r)
 		return r;
 
-	memset(&alu, 0, sizeof(struct r600_bc_alu));
+	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 	alu.inst = ctx->inst_info->r600_opcode;
 	alu.dst.chan = 0;
 	alu.dst.sel = ctx->temp_reg;
@@ -1187,7 +1191,7 @@
 	alu.src[0].sel = ctx->temp_reg;
 	alu.src[0].chan = 0;
 	alu.last = 1;
-	r = r600_bc_add_alu(ctx->bc, &alu);
+	r = r600_bytecode_add_alu(ctx->bc, &alu);
 	if (r)
 		return r;
 
@@ -1196,14 +1200,14 @@
 		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
 			continue;
 
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
 
 		alu.src[0].sel = ctx->temp_reg;
 		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
 		if (i == lasti)
 			alu.last = 1;
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 	}
@@ -1213,7 +1217,7 @@
 static int tgsi_scs(struct r600_shader_ctx *ctx)
 {
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
-	struct r600_bc_alu alu;
+	struct r600_bytecode_alu alu;
 	int i, r;
 
 	/* We'll only need the trig stuff if we are going to write to the
@@ -1229,7 +1233,7 @@
 	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
 		if (ctx->bc->chip_class == CAYMAN) {
 			for (i = 0 ; i < 3; i++) {
-				memset(&alu, 0, sizeof(struct r600_bc_alu));
+				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
 				tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
 
@@ -1241,19 +1245,19 @@
 				alu.src[0].chan = 0;
 				if (i == 2)
 					alu.last = 1;
-				r = r600_bc_add_alu(ctx->bc, &alu);
+				r = r600_bytecode_add_alu(ctx->bc, &alu);
 				if (r)
 					return r;
 			}
 		} else {
-			memset(&alu, 0, sizeof(struct r600_bc_alu));
+			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
 			tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
 
 			alu.src[0].sel = ctx->temp_reg;
 			alu.src[0].chan = 0;
 			alu.last = 1;
-			r = r600_bc_add_alu(ctx->bc, &alu);
+			r = r600_bytecode_add_alu(ctx->bc, &alu);
 			if (r)
 				return r;
 		}
@@ -1263,7 +1267,7 @@
 	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
 		if (ctx->bc->chip_class == CAYMAN) {
 			for (i = 0 ; i < 3; i++) {
-				memset(&alu, 0, sizeof(struct r600_bc_alu));
+				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
 				tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
 				if (i == 1)
@@ -1274,19 +1278,19 @@
 				alu.src[0].chan = 0;
 				if (i == 2)
 					alu.last = 1;
-				r = r600_bc_add_alu(ctx->bc, &alu);
+				r = r600_bytecode_add_alu(ctx->bc, &alu);
 				if (r)
 					return r;
 			}
 		} else {
-			memset(&alu, 0, sizeof(struct r600_bc_alu));
+			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
 			tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
 
 			alu.src[0].sel = ctx->temp_reg;
 			alu.src[0].chan = 0;
 			alu.last = 1;
-			r = r600_bc_add_alu(ctx->bc, &alu);
+			r = r600_bytecode_add_alu(ctx->bc, &alu);
 			if (r)
 				return r;
 		}
@@ -1294,7 +1298,7 @@
 
 	/* dst.z = 0.0; */
 	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
 
@@ -1305,14 +1309,14 @@
 
 		alu.last = 1;
 
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 	}
 
 	/* dst.w = 1.0; */
 	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
 
@@ -1323,7 +1327,7 @@
 
 		alu.last = 1;
 
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 	}
@@ -1333,11 +1337,11 @@
 
 static int tgsi_kill(struct r600_shader_ctx *ctx)
 {
-	struct r600_bc_alu alu;
+	struct r600_bytecode_alu alu;
 	int i, r;
 
 	for (i = 0; i < 4; i++) {
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 		alu.inst = ctx->inst_info->r600_opcode;
 
 		alu.dst.chan = i;
@@ -1348,12 +1352,12 @@
 			alu.src[1].sel = V_SQ_ALU_SRC_1;
 			alu.src[1].neg = 1;
 		} else {
-			r600_bc_src(&alu.src[1], &ctx->src[0], i);
+			r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
 		}
 		if (i == 3) {
 			alu.last = 1;
 		}
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 	}
@@ -1367,13 +1371,13 @@
 static int tgsi_lit(struct r600_shader_ctx *ctx)
 {
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
-	struct r600_bc_alu alu;
+	struct r600_bytecode_alu alu;
 	int r;
 
 	/* tmp.x = max(src.y, 0.0) */
-	memset(&alu, 0, sizeof(struct r600_bc_alu));
+	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
-	r600_bc_src(&alu.src[0], &ctx->src[0], 1);
+	r600_bytecode_src(&alu.src[0], &ctx->src[0], 1);
 	alu.src[1].sel  = V_SQ_ALU_SRC_0; /*0.0*/
 	alu.src[1].chan = 1;
 
@@ -1382,7 +1386,7 @@
 	alu.dst.write = 1;
 
 	alu.last = 1;
-	r = r600_bc_add_alu(ctx->bc, &alu);
+	r = r600_bytecode_add_alu(ctx->bc, &alu);
 	if (r)
 		return r;
 
@@ -1395,7 +1399,7 @@
 		if (ctx->bc->chip_class == CAYMAN) {
 			for (i = 0; i < 3; i++) {
 				/* tmp.z = log(tmp.x) */
-				memset(&alu, 0, sizeof(struct r600_bc_alu));
+				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
 				alu.src[0].sel = ctx->temp_reg;
 				alu.src[0].chan = 0;
@@ -1407,13 +1411,13 @@
 				} else
 					alu.dst.write = 0;
 				
-				r = r600_bc_add_alu(ctx->bc, &alu);
+				r = r600_bytecode_add_alu(ctx->bc, &alu);
 				if (r)
 					return r;
 			}
 		} else {
 			/* tmp.z = log(tmp.x) */
-			memset(&alu, 0, sizeof(struct r600_bc_alu));
+			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
 			alu.src[0].sel = ctx->temp_reg;
 			alu.src[0].chan = 0;
@@ -1421,7 +1425,7 @@
 			alu.dst.chan = 2;
 			alu.dst.write = 1;
 			alu.last = 1;
-			r = r600_bc_add_alu(ctx->bc, &alu);
+			r = r600_bytecode_add_alu(ctx->bc, &alu);
 			if (r)
 				return r;
 		}
@@ -1430,25 +1434,25 @@
 		sel = alu.dst.sel;
 
 		/* tmp.x = amd MUL_LIT(tmp.z, src.w, src.x ) */
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT);
 		alu.src[0].sel  = sel;
 		alu.src[0].chan = chan;
-		r600_bc_src(&alu.src[1], &ctx->src[0], 3);
-		r600_bc_src(&alu.src[2], &ctx->src[0], 0);
+		r600_bytecode_src(&alu.src[1], &ctx->src[0], 3);
+		r600_bytecode_src(&alu.src[2], &ctx->src[0], 0);
 		alu.dst.sel = ctx->temp_reg;
 		alu.dst.chan = 0;
 		alu.dst.write = 1;
 		alu.is_op3 = 1;
 		alu.last = 1;
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 
 		if (ctx->bc->chip_class == CAYMAN) {
 			for (i = 0; i < 3; i++) {
 				/* dst.z = exp(tmp.x) */
-				memset(&alu, 0, sizeof(struct r600_bc_alu));
+				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
 				alu.src[0].sel = ctx->temp_reg;
 				alu.src[0].chan = 0;
@@ -1458,56 +1462,56 @@
 					alu.last = 1;
 				} else
 					alu.dst.write = 0;
-				r = r600_bc_add_alu(ctx->bc, &alu);
+				r = r600_bytecode_add_alu(ctx->bc, &alu);
 				if (r)
 					return r;
 			}
 		} else {
 			/* dst.z = exp(tmp.x) */
-			memset(&alu, 0, sizeof(struct r600_bc_alu));
+			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
 			alu.src[0].sel = ctx->temp_reg;
 			alu.src[0].chan = 0;
 			tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
 			alu.last = 1;
-			r = r600_bc_add_alu(ctx->bc, &alu);
+			r = r600_bytecode_add_alu(ctx->bc, &alu);
 			if (r)
 				return r;
 		}
 	}
 
 	/* dst.x, <- 1.0  */
-	memset(&alu, 0, sizeof(struct r600_bc_alu));
+	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
 	alu.src[0].sel  = V_SQ_ALU_SRC_1; /*1.0*/
 	alu.src[0].chan = 0;
 	tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
 	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
-	r = r600_bc_add_alu(ctx->bc, &alu);
+	r = r600_bytecode_add_alu(ctx->bc, &alu);
 	if (r)
 		return r;
 
 	/* dst.y = max(src.x, 0.0) */
-	memset(&alu, 0, sizeof(struct r600_bc_alu));
+	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
-	r600_bc_src(&alu.src[0], &ctx->src[0], 0);
+	r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
 	alu.src[1].sel  = V_SQ_ALU_SRC_0; /*0.0*/
 	alu.src[1].chan = 0;
 	tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
 	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
-	r = r600_bc_add_alu(ctx->bc, &alu);
+	r = r600_bytecode_add_alu(ctx->bc, &alu);
 	if (r)
 		return r;
 
 	/* dst.w, <- 1.0  */
-	memset(&alu, 0, sizeof(struct r600_bc_alu));
+	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
 	alu.src[0].sel  = V_SQ_ALU_SRC_1;
 	alu.src[0].chan = 0;
 	tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
 	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
 	alu.last = 1;
-	r = r600_bc_add_alu(ctx->bc, &alu);
+	r = r600_bytecode_add_alu(ctx->bc, &alu);
 	if (r)
 		return r;
 
@@ -1517,10 +1521,10 @@
 static int tgsi_rsq(struct r600_shader_ctx *ctx)
 {
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
-	struct r600_bc_alu alu;
+	struct r600_bytecode_alu alu;
 	int i, r;
 
-	memset(&alu, 0, sizeof(struct r600_bc_alu));
+	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 
 	/* FIXME:
 	 * For state trackers other than OpenGL, we'll want to use
@@ -1529,13 +1533,13 @@
 	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED);
 
 	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
-		r600_bc_src(&alu.src[i], &ctx->src[i], 0);
-		r600_bc_src_set_abs(&alu.src[i]);
+		r600_bytecode_src(&alu.src[i], &ctx->src[i], 0);
+		r600_bytecode_src_set_abs(&alu.src[i]);
 	}
 	alu.dst.sel = ctx->temp_reg;
 	alu.dst.write = 1;
 	alu.last = 1;
-	r = r600_bc_add_alu(ctx->bc, &alu);
+	r = r600_bytecode_add_alu(ctx->bc, &alu);
 	if (r)
 		return r;
 	/* replicate result */
@@ -1545,11 +1549,11 @@
 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
 {
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
-	struct r600_bc_alu alu;
+	struct r600_bytecode_alu alu;
 	int i, r;
 
 	for (i = 0; i < 4; i++) {
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 		alu.src[0].sel = ctx->temp_reg;
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
 		alu.dst.chan = i;
@@ -1557,7 +1561,7 @@
 		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
 		if (i == 3)
 			alu.last = 1;
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 	}
@@ -1567,18 +1571,18 @@
 static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
 {
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
-	struct r600_bc_alu alu;
+	struct r600_bytecode_alu alu;
 	int i, r;
 
-	memset(&alu, 0, sizeof(struct r600_bc_alu));
+	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 	alu.inst = ctx->inst_info->r600_opcode;
 	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
-		r600_bc_src(&alu.src[i], &ctx->src[i], 0);
+		r600_bytecode_src(&alu.src[i], &ctx->src[i], 0);
 	}
 	alu.dst.sel = ctx->temp_reg;
 	alu.dst.write = 1;
 	alu.last = 1;
-	r = r600_bc_add_alu(ctx->bc, &alu);
+	r = r600_bytecode_add_alu(ctx->bc, &alu);
 	if (r)
 		return r;
 	/* replicate result */
@@ -1589,38 +1593,38 @@
 {
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
 	int i, r;
-	struct r600_bc_alu alu;
+	struct r600_bytecode_alu alu;
 	int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
 
 	for (i = 0; i < 3; i++) {
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
-		r600_bc_src(&alu.src[0], &ctx->src[0], 0);
+		r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
 		alu.dst.sel = ctx->temp_reg;
 		alu.dst.chan = i;
 		alu.dst.write = 1;
 		if (i == 2)
 			alu.last = 1;
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 	}
 
 	/* b * LOG2(a) */
-	memset(&alu, 0, sizeof(struct r600_bc_alu));
+	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
-	r600_bc_src(&alu.src[0], &ctx->src[1], 0);
+	r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
 	alu.src[1].sel = ctx->temp_reg;
 	alu.dst.sel = ctx->temp_reg;
 	alu.dst.write = 1;
 	alu.last = 1;
-	r = r600_bc_add_alu(ctx->bc, &alu);
+	r = r600_bytecode_add_alu(ctx->bc, &alu);
 	if (r)
 		return r;
 
 	for (i = 0; i < last_slot; i++) {
 		/* POW(a,b) = EXP2(b * LOG2(a))*/
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
 		alu.src[0].sel = ctx->temp_reg;
 
@@ -1628,7 +1632,7 @@
 		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
 		if (i == last_slot - 1)
 			alu.last = 1;
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 	}
@@ -1637,38 +1641,38 @@
 
 static int tgsi_pow(struct r600_shader_ctx *ctx)
 {
-	struct r600_bc_alu alu;
+	struct r600_bytecode_alu alu;
 	int r;
 
 	/* LOG2(a) */
-	memset(&alu, 0, sizeof(struct r600_bc_alu));
+	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
-	r600_bc_src(&alu.src[0], &ctx->src[0], 0);
+	r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
 	alu.dst.sel = ctx->temp_reg;
 	alu.dst.write = 1;
 	alu.last = 1;
-	r = r600_bc_add_alu(ctx->bc, &alu);
+	r = r600_bytecode_add_alu(ctx->bc, &alu);
 	if (r)
 		return r;
 	/* b * LOG2(a) */
-	memset(&alu, 0, sizeof(struct r600_bc_alu));
+	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
-	r600_bc_src(&alu.src[0], &ctx->src[1], 0);
+	r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
 	alu.src[1].sel = ctx->temp_reg;
 	alu.dst.sel = ctx->temp_reg;
 	alu.dst.write = 1;
 	alu.last = 1;
-	r = r600_bc_add_alu(ctx->bc, &alu);
+	r = r600_bytecode_add_alu(ctx->bc, &alu);
 	if (r)
 		return r;
 	/* POW(a,b) = EXP2(b * LOG2(a))*/
-	memset(&alu, 0, sizeof(struct r600_bc_alu));
+	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
 	alu.src[0].sel = ctx->temp_reg;
 	alu.dst.sel = ctx->temp_reg;
 	alu.dst.write = 1;
 	alu.last = 1;
-	r = r600_bc_add_alu(ctx->bc, &alu);
+	r = r600_bytecode_add_alu(ctx->bc, &alu);
 	if (r)
 		return r;
 	return tgsi_helper_tempx_replicate(ctx);
@@ -1677,32 +1681,32 @@
 static int tgsi_ssg(struct r600_shader_ctx *ctx)
 {
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
-	struct r600_bc_alu alu;
+	struct r600_bytecode_alu alu;
 	int i, r;
 
 	/* tmp = (src > 0 ? 1 : src) */
 	for (i = 0; i < 4; i++) {
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
 		alu.is_op3 = 1;
 
 		alu.dst.sel = ctx->temp_reg;
 		alu.dst.chan = i;
 
-		r600_bc_src(&alu.src[0], &ctx->src[0], i);
+		r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
 		alu.src[1].sel = V_SQ_ALU_SRC_1;
-		r600_bc_src(&alu.src[2], &ctx->src[0], i);
+		r600_bytecode_src(&alu.src[2], &ctx->src[0], i);
 
 		if (i == 3)
 			alu.last = 1;
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 	}
 
 	/* dst = (-tmp > 0 ? -1 : tmp) */
 	for (i = 0; i < 4; i++) {
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
 		alu.is_op3 = 1;
 		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
@@ -1719,7 +1723,7 @@
 
 		if (i == 3)
 			alu.last = 1;
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 	}
@@ -1728,11 +1732,11 @@
 
 static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
 {
-	struct r600_bc_alu alu;
+	struct r600_bytecode_alu alu;
 	int i, r;
 
 	for (i = 0; i < 4; i++) {
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 		if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP);
 			alu.dst.chan = i;
@@ -1745,7 +1749,7 @@
 		if (i == 3) {
 			alu.last = 1;
 		}
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 	}
@@ -1755,7 +1759,7 @@
 static int tgsi_op3(struct r600_shader_ctx *ctx)
 {
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
-	struct r600_bc_alu alu;
+	struct r600_bytecode_alu alu;
 	int i, j, r;
 	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
 
@@ -1763,10 +1767,10 @@
 		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
 			continue;
 
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 		alu.inst = ctx->inst_info->r600_opcode;
 		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
-			r600_bc_src(&alu.src[j], &ctx->src[j], i);
+			r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
 		}
 
 		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
@@ -1776,7 +1780,7 @@
 		if (i == lasti) {
 			alu.last = 1;
 		}
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 	}
@@ -1786,14 +1790,14 @@
 static int tgsi_dp(struct r600_shader_ctx *ctx)
 {
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
-	struct r600_bc_alu alu;
+	struct r600_bytecode_alu alu;
 	int i, j, r;
 
 	for (i = 0; i < 4; i++) {
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 		alu.inst = ctx->inst_info->r600_opcode;
 		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
-			r600_bc_src(&alu.src[j], &ctx->src[j], i);
+			r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
 		}
 
 		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
@@ -1826,7 +1830,7 @@
 		if (i == 3) {
 			alu.last = 1;
 		}
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 	}
@@ -1853,8 +1857,8 @@
 {
 	static float one_point_five = 1.5f;
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
-	struct r600_bc_tex tex;
-	struct r600_bc_alu alu;
+	struct r600_bytecode_tex tex;
+	struct r600_bytecode_alu alu;
 	unsigned src_gpr;
 	int r, i, j;
 	int opcode;
@@ -1872,7 +1876,7 @@
 
 		for (i = 1; i < 3; i++) {
 			/* set gradients h/v */
-			memset(&tex, 0, sizeof(struct r600_bc_tex));
+			memset(&tex, 0, sizeof(struct r600_bytecode_tex));
 			tex.inst = (i == 1) ? SQ_TEX_INST_SET_GRADIENTS_H :
 				SQ_TEX_INST_SET_GRADIENTS_V;
 			tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
@@ -1886,15 +1890,15 @@
 				tex.src_sel_w = 3;
 
 				for (j = 0; j < 4; j++) {
-					memset(&alu, 0, sizeof(struct r600_bc_alu));
+					memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 					alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
-                                        r600_bc_src(&alu.src[0], &ctx->src[i], j);
+                                        r600_bytecode_src(&alu.src[0], &ctx->src[i], j);
                                         alu.dst.sel = tex.src_gpr;
                                         alu.dst.chan = j;
                                         if (j == 3)
                                                 alu.last = 1;
                                         alu.dst.write = 1;
-                                        r = r600_bc_add_alu(ctx->bc, &alu);
+                                        r = r600_bytecode_add_alu(ctx->bc, &alu);
                                         if (r)
                                                 return r;
 				}
@@ -1915,7 +1919,7 @@
 				tex.coord_type_z = 1;
 				tex.coord_type_w = 1;
 			}
-			r = r600_bc_add_tex(ctx->bc, &tex);
+			r = r600_bytecode_add_tex(ctx->bc, &tex);
 			if (r)
 				return r;
 		}
@@ -1925,9 +1929,9 @@
 		if (ctx->bc->chip_class == CAYMAN) {
 			out_chan = 2;
 			for (i = 0; i < 3; i++) {
-				memset(&alu, 0, sizeof(struct r600_bc_alu));
+				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
-				r600_bc_src(&alu.src[0], &ctx->src[0], 3);
+				r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
 
 				alu.dst.sel = ctx->temp_reg;
 				alu.dst.chan = i;
@@ -1935,40 +1939,40 @@
 					alu.last = 1;
 				if (out_chan == i)
 					alu.dst.write = 1;
-				r = r600_bc_add_alu(ctx->bc, &alu);
+				r = r600_bytecode_add_alu(ctx->bc, &alu);
 				if (r)
 					return r;
 			}
 
 		} else {
 			out_chan = 3;
-			memset(&alu, 0, sizeof(struct r600_bc_alu));
+			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
-			r600_bc_src(&alu.src[0], &ctx->src[0], 3);
+			r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
 
 			alu.dst.sel = ctx->temp_reg;
 			alu.dst.chan = out_chan;
 			alu.last = 1;
 			alu.dst.write = 1;
-			r = r600_bc_add_alu(ctx->bc, &alu);
+			r = r600_bytecode_add_alu(ctx->bc, &alu);
 			if (r)
 				return r;
 		}
 
 		for (i = 0; i < 3; i++) {
-			memset(&alu, 0, sizeof(struct r600_bc_alu));
+			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
 			alu.src[0].sel = ctx->temp_reg;
 			alu.src[0].chan = out_chan;
-			r600_bc_src(&alu.src[1], &ctx->src[0], i);
+			r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
 			alu.dst.sel = ctx->temp_reg;
 			alu.dst.chan = i;
 			alu.dst.write = 1;
-			r = r600_bc_add_alu(ctx->bc, &alu);
+			r = r600_bytecode_add_alu(ctx->bc, &alu);
 			if (r)
 				return r;
 		}
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
 		alu.src[0].sel = V_SQ_ALU_SRC_1;
 		alu.src[0].chan = 0;
@@ -1976,7 +1980,7 @@
 		alu.dst.chan = 3;
 		alu.last = 1;
 		alu.dst.write = 1;
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 		src_loaded = TRUE;
@@ -1989,16 +1993,16 @@
 
 		/* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
 		for (i = 0; i < 4; i++) {
-			memset(&alu, 0, sizeof(struct r600_bc_alu));
+			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE);
-			r600_bc_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
-			r600_bc_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]);
+			r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
+			r600_bytecode_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]);
 			alu.dst.sel = ctx->temp_reg;
 			alu.dst.chan = i;
 			if (i == 3)
 				alu.last = 1;
 			alu.dst.write = 1;
-			r = r600_bc_add_alu(ctx->bc, &alu);
+			r = r600_bytecode_add_alu(ctx->bc, &alu);
 			if (r)
 				return r;
 		}
@@ -2006,7 +2010,7 @@
 		/* tmp1.z = RCP_e(|tmp1.z|) */
 		if (ctx->bc->chip_class == CAYMAN) {
 			for (i = 0; i < 3; i++) {
-				memset(&alu, 0, sizeof(struct r600_bc_alu));
+				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
 				alu.src[0].sel = ctx->temp_reg;
 				alu.src[0].chan = 2;
@@ -2017,12 +2021,12 @@
 					alu.dst.write = 1;
 				if (i == 2)
 					alu.last = 1;
-				r = r600_bc_add_alu(ctx->bc, &alu);
+				r = r600_bytecode_add_alu(ctx->bc, &alu);
 				if (r)
 					return r;
 			}
 		} else {
-			memset(&alu, 0, sizeof(struct r600_bc_alu));
+			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
 			alu.src[0].sel = ctx->temp_reg;
 			alu.src[0].chan = 2;
@@ -2031,7 +2035,7 @@
 			alu.dst.chan = 2;
 			alu.dst.write = 1;
 			alu.last = 1;
-			r = r600_bc_add_alu(ctx->bc, &alu);
+			r = r600_bytecode_add_alu(ctx->bc, &alu);
 			if (r)
 				return r;
 		}
@@ -2040,7 +2044,7 @@
 		 * MULADD R0.y,  R0.y,  PS1,  (0x3FC00000, 1.5f).x
 		 * muladd has no writemask, have to use another temp
 		 */
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
 		alu.is_op3 = 1;
 
@@ -2057,11 +2061,11 @@
 		alu.dst.chan = 0;
 		alu.dst.write = 1;
 
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
 		alu.is_op3 = 1;
 
@@ -2079,7 +2083,7 @@
 		alu.dst.write = 1;
 
 		alu.last = 1;
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 
@@ -2089,15 +2093,15 @@
 
 	if (src_requires_loading && !src_loaded) {
 		for (i = 0; i < 4; i++) {
-			memset(&alu, 0, sizeof(struct r600_bc_alu));
+			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
-			r600_bc_src(&alu.src[0], &ctx->src[0], i);
+			r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
 			alu.dst.sel = ctx->temp_reg;
 			alu.dst.chan = i;
 			if (i == 3)
 				alu.last = 1;
 			alu.dst.write = 1;
-			r = r600_bc_add_alu(ctx->bc, &alu);
+			r = r600_bytecode_add_alu(ctx->bc, &alu);
 			if (r)
 				return r;
 		}
@@ -2120,7 +2124,7 @@
 		}
 	}
 
-	memset(&tex, 0, sizeof(struct r600_bc_tex));
+	memset(&tex, 0, sizeof(struct r600_bytecode_tex));
 	tex.inst = opcode;
 
 	tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
@@ -2167,7 +2171,7 @@
 	if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)
 		tex.src_sel_w = tex.src_sel_z;
 
-	r = r600_bc_add_tex(ctx->bc, &tex);
+	r = r600_bytecode_add_tex(ctx->bc, &tex);
 	if (r)
 		return r;
 
@@ -2178,7 +2182,7 @@
 static int tgsi_lrp(struct r600_shader_ctx *ctx)
 {
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
-	struct r600_bc_alu alu;
+	struct r600_bytecode_alu alu;
 	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
 	unsigned i;
 	int r;
@@ -2189,17 +2193,17 @@
 			if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
 				continue;
 
-			memset(&alu, 0, sizeof(struct r600_bc_alu));
+			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
-			r600_bc_src(&alu.src[0], &ctx->src[1], i);
-			r600_bc_src(&alu.src[1], &ctx->src[2], i);
+			r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
+			r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
 			alu.omod = 3;
 			tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
 			alu.dst.chan = i;
 			if (i == lasti) {
 				alu.last = 1;
 			}
-			r = r600_bc_add_alu(ctx->bc, &alu);
+			r = r600_bytecode_add_alu(ctx->bc, &alu);
 			if (r)
 				return r;
 		}
@@ -2211,19 +2215,19 @@
 		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
 			continue;
 
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
 		alu.src[0].sel = V_SQ_ALU_SRC_1;
 		alu.src[0].chan = 0;
-		r600_bc_src(&alu.src[1], &ctx->src[0], i);
-		r600_bc_src_toggle_neg(&alu.src[1]);
+		r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
+		r600_bytecode_src_toggle_neg(&alu.src[1]);
 		alu.dst.sel = ctx->temp_reg;
 		alu.dst.chan = i;
 		if (i == lasti) {
 			alu.last = 1;
 		}
 		alu.dst.write = 1;
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 	}
@@ -2233,18 +2237,18 @@
 		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
 			continue;
 
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
 		alu.src[0].sel = ctx->temp_reg;
 		alu.src[0].chan = i;
-		r600_bc_src(&alu.src[1], &ctx->src[2], i);
+		r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
 		alu.dst.sel = ctx->temp_reg;
 		alu.dst.chan = i;
 		if (i == lasti) {
 			alu.last = 1;
 		}
 		alu.dst.write = 1;
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 	}
@@ -2254,11 +2258,11 @@
 		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
 			continue;
 
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
 		alu.is_op3 = 1;
-		r600_bc_src(&alu.src[0], &ctx->src[0], i);
-		r600_bc_src(&alu.src[1], &ctx->src[1], i);
+		r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
+		r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
 		alu.src[2].sel = ctx->temp_reg;
 		alu.src[2].chan = i;
 
@@ -2267,7 +2271,7 @@
 		if (i == lasti) {
 			alu.last = 1;
 		}
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 	}
@@ -2277,7 +2281,7 @@
 static int tgsi_cmp(struct r600_shader_ctx *ctx)
 {
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
-	struct r600_bc_alu alu;
+	struct r600_bytecode_alu alu;
 	int i, r;
 	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
 
@@ -2285,18 +2289,18 @@
 		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
 			continue;
 
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE);
-		r600_bc_src(&alu.src[0], &ctx->src[0], i);
-		r600_bc_src(&alu.src[1], &ctx->src[2], i);
-		r600_bc_src(&alu.src[2], &ctx->src[1], i);
+		r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
+		r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
+		r600_bytecode_src(&alu.src[2], &ctx->src[1], i);
 		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
 		alu.dst.chan = i;
 		alu.dst.write = 1;
 		alu.is_op3 = 1;
 		if (i == lasti)
 			alu.last = 1;
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 	}
@@ -2308,7 +2312,7 @@
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
 	static const unsigned int src0_swizzle[] = {2, 0, 1};
 	static const unsigned int src1_swizzle[] = {1, 2, 0};
-	struct r600_bc_alu alu;
+	struct r600_bytecode_alu alu;
 	uint32_t use_temp = 0;
 	int i, r;
 
@@ -2316,11 +2320,11 @@
 		use_temp = 1;
 
 	for (i = 0; i < 4; i++) {
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
 		if (i < 3) {
-			r600_bc_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
-			r600_bc_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]);
+			r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
+			r600_bytecode_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]);
 		} else {
 			alu.src[0].sel = V_SQ_ALU_SRC_0;
 			alu.src[0].chan = i;
@@ -2334,18 +2338,18 @@
 
 		if (i == 3)
 			alu.last = 1;
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 	}
 
 	for (i = 0; i < 4; i++) {
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
 
 		if (i < 3) {
-			r600_bc_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]);
-			r600_bc_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]);
+			r600_bytecode_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]);
+			r600_bytecode_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]);
 		} else {
 			alu.src[0].sel = V_SQ_ALU_SRC_0;
 			alu.src[0].chan = i;
@@ -2366,7 +2370,7 @@
 		alu.is_op3 = 1;
 		if (i == 3)
 			alu.last = 1;
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 	}
@@ -2378,22 +2382,22 @@
 static int tgsi_exp(struct r600_shader_ctx *ctx)
 {
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
-	struct r600_bc_alu alu;
+	struct r600_bytecode_alu alu;
 	int r;
 	int i;
 
 	/* result.x = 2^floor(src); */
 	if (inst->Dst[0].Register.WriteMask & 1) {
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
-		r600_bc_src(&alu.src[0], &ctx->src[0], 0);
+		r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
 
 		alu.dst.sel = ctx->temp_reg;
 		alu.dst.chan = 0;
 		alu.dst.write = 1;
 		alu.last = 1;
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 
@@ -2409,7 +2413,7 @@
 					alu.dst.write = 1;
 				if (i == 2)
 					alu.last = 1;
-				r = r600_bc_add_alu(ctx->bc, &alu);
+				r = r600_bytecode_add_alu(ctx->bc, &alu);
 				if (r)
 					return r;
 			}
@@ -2422,7 +2426,7 @@
 			alu.dst.chan = 0;
 			alu.dst.write = 1;
 			alu.last = 1;
-			r = r600_bc_add_alu(ctx->bc, &alu);
+			r = r600_bytecode_add_alu(ctx->bc, &alu);
 			if (r)
 				return r;
 		}
@@ -2430,10 +2434,10 @@
 
 	/* result.y = tmp - floor(tmp); */
 	if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
-		r600_bc_src(&alu.src[0], &ctx->src[0], 0);
+		r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
 
 		alu.dst.sel = ctx->temp_reg;
 #if 0
@@ -2446,7 +2450,7 @@
 
 		alu.last = 1;
 
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 	}
@@ -2455,9 +2459,9 @@
 	if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
 		if (ctx->bc->chip_class == CAYMAN) {
 			for (i = 0; i < 3; i++) {
-				memset(&alu, 0, sizeof(struct r600_bc_alu));
+				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
-				r600_bc_src(&alu.src[0], &ctx->src[0], 0);
+				r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
 
 				alu.dst.sel = ctx->temp_reg;
 				alu.dst.chan = i;
@@ -2466,14 +2470,14 @@
 					alu.last = 1;
 				}
 
-				r = r600_bc_add_alu(ctx->bc, &alu);
+				r = r600_bytecode_add_alu(ctx->bc, &alu);
 				if (r)
 					return r;
 			}
 		} else {
-			memset(&alu, 0, sizeof(struct r600_bc_alu));
+			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
-			r600_bc_src(&alu.src[0], &ctx->src[0], 0);
+			r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
 
 			alu.dst.sel = ctx->temp_reg;
 			alu.dst.write = 1;
@@ -2481,7 +2485,7 @@
 
 			alu.last = 1;
 
-			r = r600_bc_add_alu(ctx->bc, &alu);
+			r = r600_bytecode_add_alu(ctx->bc, &alu);
 			if (r)
 				return r;
 		}
@@ -2489,7 +2493,7 @@
 
 	/* result.w = 1.0;*/
 	if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
 		alu.src[0].sel = V_SQ_ALU_SRC_1;
@@ -2499,7 +2503,7 @@
 		alu.dst.chan = 3;
 		alu.dst.write = 1;
 		alu.last = 1;
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 	}
@@ -2509,7 +2513,7 @@
 static int tgsi_log(struct r600_shader_ctx *ctx)
 {
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
-	struct r600_bc_alu alu;
+	struct r600_bytecode_alu alu;
 	int r;
 	int i;
 
@@ -2517,11 +2521,11 @@
 	if (inst->Dst[0].Register.WriteMask & 1) {
 		if (ctx->bc->chip_class == CAYMAN) {
 			for (i = 0; i < 3; i++) {
-				memset(&alu, 0, sizeof(struct r600_bc_alu));
+				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 
 				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
-				r600_bc_src(&alu.src[0], &ctx->src[0], 0);
-				r600_bc_src_set_abs(&alu.src[0]);
+				r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
+				r600_bytecode_src_set_abs(&alu.src[0]);
 			
 				alu.dst.sel = ctx->temp_reg;
 				alu.dst.chan = i;
@@ -2529,23 +2533,23 @@
 					alu.dst.write = 1;
 				if (i == 2)
 					alu.last = 1;
-				r = r600_bc_add_alu(ctx->bc, &alu);
+				r = r600_bytecode_add_alu(ctx->bc, &alu);
 				if (r)
 					return r;
 			}
 
 		} else {
-			memset(&alu, 0, sizeof(struct r600_bc_alu));
+			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 
 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
-			r600_bc_src(&alu.src[0], &ctx->src[0], 0);
-			r600_bc_src_set_abs(&alu.src[0]);
+			r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
+			r600_bytecode_src_set_abs(&alu.src[0]);
 			
 			alu.dst.sel = ctx->temp_reg;
 			alu.dst.chan = 0;
 			alu.dst.write = 1;
 			alu.last = 1;
-			r = r600_bc_add_alu(ctx->bc, &alu);
+			r = r600_bytecode_add_alu(ctx->bc, &alu);
 			if (r)
 				return r;
 		}
@@ -2559,7 +2563,7 @@
 		alu.dst.write = 1;
 		alu.last = 1;
 
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 	}
@@ -2569,11 +2573,11 @@
 
 		if (ctx->bc->chip_class == CAYMAN) {
 			for (i = 0; i < 3; i++) {
-				memset(&alu, 0, sizeof(struct r600_bc_alu));
+				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 
 				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
-				r600_bc_src(&alu.src[0], &ctx->src[0], 0);
-				r600_bc_src_set_abs(&alu.src[0]);
+				r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
+				r600_bytecode_src_set_abs(&alu.src[0]);
 
 				alu.dst.sel = ctx->temp_reg;
 				alu.dst.chan = i;
@@ -2582,28 +2586,28 @@
 				if (i == 2)
 					alu.last = 1;
 				
-				r = r600_bc_add_alu(ctx->bc, &alu);
+				r = r600_bytecode_add_alu(ctx->bc, &alu);
 				if (r)
 					return r;	
 			}
 		} else {
-			memset(&alu, 0, sizeof(struct r600_bc_alu));
+			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 
 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
-			r600_bc_src(&alu.src[0], &ctx->src[0], 0);
-			r600_bc_src_set_abs(&alu.src[0]);
+			r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
+			r600_bytecode_src_set_abs(&alu.src[0]);
 
 			alu.dst.sel = ctx->temp_reg;
 			alu.dst.chan = 1;
 			alu.dst.write = 1;
 			alu.last = 1;
 
-			r = r600_bc_add_alu(ctx->bc, &alu);
+			r = r600_bytecode_add_alu(ctx->bc, &alu);
 			if (r)
 				return r;
 		}
 
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
 		alu.src[0].sel = ctx->temp_reg;
@@ -2614,13 +2618,13 @@
 		alu.dst.write = 1;
 		alu.last = 1;
 
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 
 		if (ctx->bc->chip_class == CAYMAN) {
 			for (i = 0; i < 3; i++) {
-				memset(&alu, 0, sizeof(struct r600_bc_alu));
+				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
 				alu.src[0].sel = ctx->temp_reg;
 				alu.src[0].chan = 1;
@@ -2632,12 +2636,12 @@
 				if (i == 2)
 					alu.last = 1;
 
-				r = r600_bc_add_alu(ctx->bc, &alu);
+				r = r600_bytecode_add_alu(ctx->bc, &alu);
 				if (r)
 					return r;
 			}
 		} else {
-			memset(&alu, 0, sizeof(struct r600_bc_alu));
+			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
 			alu.src[0].sel = ctx->temp_reg;
 			alu.src[0].chan = 1;
@@ -2647,14 +2651,14 @@
 			alu.dst.write = 1;
 			alu.last = 1;
 
-			r = r600_bc_add_alu(ctx->bc, &alu);
+			r = r600_bytecode_add_alu(ctx->bc, &alu);
 			if (r)
 				return r;
 		}
 
 		if (ctx->bc->chip_class == CAYMAN) {
 			for (i = 0; i < 3; i++) {
-				memset(&alu, 0, sizeof(struct r600_bc_alu));
+				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
 				alu.src[0].sel = ctx->temp_reg;
 				alu.src[0].chan = 1;
@@ -2666,12 +2670,12 @@
 				if (i == 2)
 					alu.last = 1;
 				
-				r = r600_bc_add_alu(ctx->bc, &alu);
+				r = r600_bytecode_add_alu(ctx->bc, &alu);
 				if (r)
 					return r;
 			}
 		} else {
-			memset(&alu, 0, sizeof(struct r600_bc_alu));
+			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
 			alu.src[0].sel = ctx->temp_reg;
 			alu.src[0].chan = 1;
@@ -2681,17 +2685,17 @@
 			alu.dst.write = 1;
 			alu.last = 1;
 
-			r = r600_bc_add_alu(ctx->bc, &alu);
+			r = r600_bytecode_add_alu(ctx->bc, &alu);
 			if (r)
 				return r;
 		}
 
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
 
-		r600_bc_src(&alu.src[0], &ctx->src[0], 0);
-		r600_bc_src_set_abs(&alu.src[0]);
+		r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
+		r600_bytecode_src_set_abs(&alu.src[0]);
 
 		alu.src[1].sel = ctx->temp_reg;
 		alu.src[1].chan = 1;
@@ -2701,7 +2705,7 @@
 		alu.dst.write = 1;
 		alu.last = 1;
 
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 	}
@@ -2710,11 +2714,11 @@
 	if ((inst->Dst[0].Register.WriteMask >> 2) & 1) {
 		if (ctx->bc->chip_class == CAYMAN) {
 			for (i = 0; i < 3; i++) {
-				memset(&alu, 0, sizeof(struct r600_bc_alu));
+				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 
 				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
-				r600_bc_src(&alu.src[0], &ctx->src[0], 0);
-				r600_bc_src_set_abs(&alu.src[0]);
+				r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
+				r600_bytecode_src_set_abs(&alu.src[0]);
 
 				alu.dst.sel = ctx->temp_reg;
 				if (i == 2)
@@ -2723,23 +2727,23 @@
 				if (i == 2)
 					alu.last = 1;
 
-				r = r600_bc_add_alu(ctx->bc, &alu);
+				r = r600_bytecode_add_alu(ctx->bc, &alu);
 				if (r)
 					return r;
 			}
 		} else {
-			memset(&alu, 0, sizeof(struct r600_bc_alu));
+			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 
 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
-			r600_bc_src(&alu.src[0], &ctx->src[0], 0);
-			r600_bc_src_set_abs(&alu.src[0]);
+			r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
+			r600_bytecode_src_set_abs(&alu.src[0]);
 
 			alu.dst.sel = ctx->temp_reg;
 			alu.dst.write = 1;
 			alu.dst.chan = 2;
 			alu.last = 1;
 
-			r = r600_bc_add_alu(ctx->bc, &alu);
+			r = r600_bytecode_add_alu(ctx->bc, &alu);
 			if (r)
 				return r;
 		}
@@ -2747,7 +2751,7 @@
 
 	/* result.w = 1.0; */
 	if ((inst->Dst[0].Register.WriteMask >> 3) & 1) {
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
 		alu.src[0].sel = V_SQ_ALU_SRC_1;
@@ -2758,7 +2762,7 @@
 		alu.dst.write = 1;
 		alu.last = 1;
 
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 	}
@@ -2769,10 +2773,10 @@
 static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
 {
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
-	struct r600_bc_alu alu;
+	struct r600_bytecode_alu alu;
 	int r;
 
-	memset(&alu, 0, sizeof(struct r600_bc_alu));
+	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 
 	switch (inst->Instruction.Opcode) {
 	case TGSI_OPCODE_ARL:
@@ -2786,11 +2790,11 @@
 		return -1;
 	}
 
-	r600_bc_src(&alu.src[0], &ctx->src[0], 0);
+	r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
 	alu.last = 1;
 	alu.dst.sel = ctx->ar_reg;
 	alu.dst.write = 1;
-	r = r600_bc_add_alu(ctx->bc, &alu);
+	r = r600_bytecode_add_alu(ctx->bc, &alu);
 	if (r)
 		return r;
 
@@ -2800,12 +2804,12 @@
 	 * between ARL and AR usage. The easy way to do that is to remove
 	 * the MOVA here, and load it for the first AR access after ar_reg
 	 * has been modified in each clause. */
-	memset(&alu, 0, sizeof(struct r600_bc_alu));
+	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 	alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
 	alu.src[0].sel = ctx->ar_reg;
 	alu.src[0].chan = 0;
 	alu.last = 1;
-	r = r600_bc_add_alu(ctx->bc, &alu);
+	r = r600_bytecode_add_alu(ctx->bc, &alu);
 	if (r)
 		return r;
 	return 0;
@@ -2814,19 +2818,19 @@
 {
 	/* TODO from r600c, ar values don't persist between clauses */
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
-	struct r600_bc_alu alu;
+	struct r600_bytecode_alu alu;
 	int r;
 
 	switch (inst->Instruction.Opcode) {
 	case TGSI_OPCODE_ARL:
 		memset(&alu, 0, sizeof(alu));
 		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR;
-		r600_bc_src(&alu.src[0], &ctx->src[0], 0);
+		r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
 		alu.dst.sel = ctx->ar_reg;
 		alu.dst.write = 1;
 		alu.last = 1;
 
-		if ((r = r600_bc_add_alu(ctx->bc, &alu)))
+		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
 			return r;
 
 		memset(&alu, 0, sizeof(alu));
@@ -2836,18 +2840,18 @@
 		alu.dst.write = 1;
 		alu.last = 1;
 
-		if ((r = r600_bc_add_alu(ctx->bc, &alu)))
+		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
 			return r;
 		break;
 	case TGSI_OPCODE_ARR:
 		memset(&alu, 0, sizeof(alu));
 		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
-		r600_bc_src(&alu.src[0], &ctx->src[0], 0);
+		r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
 		alu.dst.sel = ctx->ar_reg;
 		alu.dst.write = 1;
 		alu.last = 1;
 
-		if ((r = r600_bc_add_alu(ctx->bc, &alu)))
+		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
 			return r;
 		break;
 	default:
@@ -2860,7 +2864,7 @@
 	alu.src[0].sel = ctx->ar_reg;
 	alu.last = 1;
 
-	r = r600_bc_add_alu(ctx->bc, &alu);
+	r = r600_bytecode_add_alu(ctx->bc, &alu);
 	if (r)
 		return r;
 	ctx->bc->cf_last->r6xx_uses_waterfall = 1;
@@ -2870,11 +2874,11 @@
 static int tgsi_opdst(struct r600_shader_ctx *ctx)
 {
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
-	struct r600_bc_alu alu;
+	struct r600_bytecode_alu alu;
 	int i, r = 0;
 
 	for (i = 0; i < 4; i++) {
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
 		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
@@ -2882,17 +2886,17 @@
 		if (i == 0 || i == 3) {
 			alu.src[0].sel = V_SQ_ALU_SRC_1;
 		} else {
-			r600_bc_src(&alu.src[0], &ctx->src[0], i);
+			r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
 		}
 
 		if (i == 0 || i == 2) {
 			alu.src[1].sel = V_SQ_ALU_SRC_1;
 		} else {
-			r600_bc_src(&alu.src[1], &ctx->src[1], i);
+			r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
 		}
 		if (i == 3)
 			alu.last = 1;
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 	}
@@ -2901,10 +2905,10 @@
 
 static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
 {
-	struct r600_bc_alu alu;
+	struct r600_bytecode_alu alu;
 	int r;
 
-	memset(&alu, 0, sizeof(struct r600_bc_alu));
+	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 	alu.inst = opcode;
 	alu.predicate = 1;
 
@@ -2912,13 +2916,13 @@
 	alu.dst.write = 1;
 	alu.dst.chan = 0;
 
-	r600_bc_src(&alu.src[0], &ctx->src[0], 0);
+	r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
 	alu.src[1].sel = V_SQ_ALU_SRC_0;
 	alu.src[1].chan = 0;
 
 	alu.last = 1;
 
-	r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
+	r = r600_bytecode_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
 	if (r)
 		return r;
 	return 0;
@@ -2926,25 +2930,34 @@
 
 static int pops(struct r600_shader_ctx *ctx, int pops)
 {
-	int alu_pop = 3;
-	if (ctx->bc->cf_last) {
-		if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) << 3)
-			alu_pop = 0;
-		else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3)
-			alu_pop = 1;
+	unsigned force_pop = ctx->bc->force_add_cf;
+
+	if (!force_pop) {
+		int alu_pop = 3;
+		if (ctx->bc->cf_last) {
+			if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) << 3)
+				alu_pop = 0;
+			else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3)
+				alu_pop = 1;
+		}
+		alu_pop += pops;
+		if (alu_pop == 1) {
+			ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3;
+			ctx->bc->force_add_cf = 1;
+		} else if (alu_pop == 2) {
+			ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER) << 3;
+			ctx->bc->force_add_cf = 1;
+		} else {
+			force_pop = 1;
+		}
 	}
-	alu_pop += pops;
-	if (alu_pop == 1) {
-		ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3;
-		ctx->bc->force_add_cf = 1;
-	} else if (alu_pop == 2) {
-		ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER) << 3;
-		ctx->bc->force_add_cf = 1;
-	} else {
-		r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
+
+	if (force_pop) {
+		r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
 		ctx->bc->cf_last->pop_count = pops;
 		ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
 	}
+
 	return 0;
 }
 
@@ -3011,8 +3024,8 @@
 {
 	struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
 
-	sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid,
-						sizeof(struct r600_bc_cf *) * (sp->num_mid + 1));
+	sp->mid = (struct r600_bytecode_cf **)realloc((void *)sp->mid,
+						sizeof(struct r600_bytecode_cf *) * (sp->num_mid + 1));
 	sp->mid[sp->num_mid] = ctx->bc->cf_last;
 	sp->num_mid++;
 }
@@ -3040,14 +3053,14 @@
 #if 0
 static int emit_return(struct r600_shader_ctx *ctx)
 {
-	r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
+	r600_bytecode_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
 	return 0;
 }
 
 static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
 {
 
-	r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
+	r600_bytecode_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
 	ctx->bc->cf_last->pop_count = pops;
 	/* TODO work out offset */
 	return 0;
@@ -3076,7 +3089,7 @@
 {
 	emit_testflag(ctx);
 
-	r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
+	r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
 	ctx->bc->cf_last->pop_count = 1;
 
 	fc_set_mid(ctx, fc_sp);
@@ -3089,7 +3102,7 @@
 {
 	emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE));
 
-	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
+	r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
 
 	fc_pushlevel(ctx, FC_IF);
 
@@ -3099,7 +3112,7 @@
 
 static int tgsi_else(struct r600_shader_ctx *ctx)
 {
-	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE));
+	r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE));
 	ctx->bc->cf_last->pop_count = 1;
 
 	fc_set_mid(ctx, ctx->bc->fc_sp);
@@ -3129,7 +3142,7 @@
 
 static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
 {
-	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL));
+	r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL));
 
 	fc_pushlevel(ctx, FC_LOOP);
 
@@ -3142,7 +3155,7 @@
 {
 	int i;
 
-	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END));
+	r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END));
 
 	if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
 		R600_ERR("loop/endloop in shader code are not paired.\n");
@@ -3182,7 +3195,7 @@
 		return -EINVAL;
 	}
 
-	r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
+	r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
 	ctx->bc->cf_last->pop_count = 1;
 
 	fc_set_mid(ctx, fscp);
@@ -3228,7 +3241,7 @@
 	{TGSI_OPCODE_FRC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
 	{TGSI_OPCODE_CLAMP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
 	{TGSI_OPCODE_FLR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
-	{TGSI_OPCODE_ROUND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+	{TGSI_OPCODE_ROUND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
 	{TGSI_OPCODE_EX2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
 	{TGSI_OPCODE_LG2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
 	{TGSI_OPCODE_POW,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
@@ -3353,6 +3366,18 @@
 	{TGSI_OPCODE_CASE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
 	{TGSI_OPCODE_DEFAULT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
 	{TGSI_OPCODE_ENDSWITCH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+	{TGSI_OPCODE_LOAD,      0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_LOAD_MS,   0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_SAMPLE,    0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_SAMPLE_B,  0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_SAMPLE_C,  0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_SAMPLE_D,  0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_SAMPLE_L,  0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_GATHER4,   0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_RESINFO,	0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
 	{TGSI_OPCODE_LAST,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
 };
 
@@ -3386,7 +3411,7 @@
 	{TGSI_OPCODE_FRC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
 	{TGSI_OPCODE_CLAMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
 	{TGSI_OPCODE_FLR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
-	{TGSI_OPCODE_ROUND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+	{TGSI_OPCODE_ROUND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
 	{TGSI_OPCODE_EX2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
 	{TGSI_OPCODE_LG2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
 	{TGSI_OPCODE_POW,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
@@ -3511,6 +3536,18 @@
 	{TGSI_OPCODE_CASE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
 	{TGSI_OPCODE_DEFAULT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
 	{TGSI_OPCODE_ENDSWITCH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+	{TGSI_OPCODE_LOAD,      0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_LOAD_MS,   0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_SAMPLE,    0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_SAMPLE_B,  0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_SAMPLE_C,  0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_SAMPLE_D,  0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_SAMPLE_L,  0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_GATHER4,   0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_RESINFO,	0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
 	{TGSI_OPCODE_LAST,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
 };
 
@@ -3544,7 +3581,7 @@
 	{TGSI_OPCODE_FRC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
 	{TGSI_OPCODE_CLAMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
 	{TGSI_OPCODE_FLR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
-	{TGSI_OPCODE_ROUND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+	{TGSI_OPCODE_ROUND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
 	{TGSI_OPCODE_EX2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, cayman_emit_float_instr},
 	{TGSI_OPCODE_LG2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, cayman_emit_float_instr},
 	{TGSI_OPCODE_POW,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, cayman_pow},
@@ -3669,5 +3706,17 @@
 	{TGSI_OPCODE_CASE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
 	{TGSI_OPCODE_DEFAULT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
 	{TGSI_OPCODE_ENDSWITCH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+	{TGSI_OPCODE_LOAD,      0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_LOAD_MS,   0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_SAMPLE,    0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_SAMPLE_B,  0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_SAMPLE_C,  0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_SAMPLE_D,  0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_SAMPLE_L,  0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_GATHER4,   0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_RESINFO,	0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
 	{TGSI_OPCODE_LAST,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
 };
diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h
index 3ba84bd..ada369a 100644
--- a/src/gallium/drivers/r600/r600_shader.h
+++ b/src/gallium/drivers/r600/r600_shader.h
@@ -37,9 +37,10 @@
 
 struct r600_shader {
 	unsigned		processor_type;
-	struct r600_bc		bc;
+	struct r600_bytecode		bc;
 	unsigned		ninput;
 	unsigned		noutput;
+	unsigned		npos;
 	unsigned		nlds;
 	struct r600_shader_io	input[32];
 	struct r600_shader_io	output[32];
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index 01406f2..fba2af8 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -662,19 +662,19 @@
 		offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS(depth);
 		r600_pipe_state_add_reg(&state,
 				R_028E00_PA_SU_POLY_OFFSET_FRONT_SCALE,
-				fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL);
+				fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL, 0);
 		r600_pipe_state_add_reg(&state,
 				R_028E04_PA_SU_POLY_OFFSET_FRONT_OFFSET,
-				fui(offset_units), 0xFFFFFFFF, NULL);
+				fui(offset_units), 0xFFFFFFFF, NULL, 0);
 		r600_pipe_state_add_reg(&state,
 				R_028E08_PA_SU_POLY_OFFSET_BACK_SCALE,
-				fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL);
+				fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL, 0);
 		r600_pipe_state_add_reg(&state,
 				R_028E0C_PA_SU_POLY_OFFSET_BACK_OFFSET,
-				fui(offset_units), 0xFFFFFFFF, NULL);
+				fui(offset_units), 0xFFFFFFFF, NULL, 0);
 		r600_pipe_state_add_reg(&state,
 				R_028DF8_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
-				offset_db_fmt_cntl, 0xFFFFFFFF, NULL);
+				offset_db_fmt_cntl, 0xFFFFFFFF, NULL, 0);
 		r600_context_pipe_state_set(&rctx->ctx, &state);
 	}
 }
@@ -689,10 +689,10 @@
 		return;
 
 	rstate->id = R600_PIPE_STATE_BLEND_COLOR;
-	r600_pipe_state_add_reg(rstate, R_028414_CB_BLEND_RED, fui(state->color[0]), 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028418_CB_BLEND_GREEN, fui(state->color[1]), 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_02841C_CB_BLEND_BLUE, fui(state->color[2]), 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028420_CB_BLEND_ALPHA, fui(state->color[3]), 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_028414_CB_BLEND_RED, fui(state->color[0]), 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028418_CB_BLEND_GREEN, fui(state->color[1]), 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_02841C_CB_BLEND_BLUE, fui(state->color[2]), 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028420_CB_BLEND_ALPHA, fui(state->color[3]), 0xFFFFFFFF, NULL, 0);
 	free(rctx->states[R600_PIPE_STATE_BLEND_COLOR]);
 	rctx->states[R600_PIPE_STATE_BLEND_COLOR] = rstate;
 	r600_context_pipe_state_set(&rctx->ctx, rstate);
@@ -742,7 +742,7 @@
 	blend->cb_target_mask = target_mask;
 	/* MULTIWRITE_ENABLE is controlled by r600_pipe_shader_ps(). */
 	r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL,
-				color_control, 0xFFFFFFFD, NULL);
+				color_control, 0xFFFFFFFD, NULL, 0);
 
 	for (int i = 0; i < 8; i++) {
 		/* state->rt entries > 0 only written if independent blending */
@@ -773,9 +773,9 @@
 
 		/* R600 does not support per-MRT blends */
 		if (rctx->family > CHIP_R600)
-			r600_pipe_state_add_reg(rstate, R_028780_CB_BLEND0_CONTROL + i * 4, bc, 0xFFFFFFFF, NULL);
+			r600_pipe_state_add_reg(rstate, R_028780_CB_BLEND0_CONTROL + i * 4, bc, 0xFFFFFFFF, NULL, 0);
 		if (i == 0)
-			r600_pipe_state_add_reg(rstate, R_028804_CB_BLEND_CONTROL, bc, 0xFFFFFFFF, NULL);
+			r600_pipe_state_add_reg(rstate, R_028804_CB_BLEND_CONTROL, bc, 0xFFFFFFFF, NULL, 0);
 	}
 	return rstate;
 }
@@ -842,28 +842,28 @@
 		S_028D10_FORCE_HIS_ENABLE0(V_028D10_FORCE_DISABLE) |
 		S_028D10_FORCE_HIS_ENABLE1(V_028D10_FORCE_DISABLE);
 	/* TODO db_render_override depends on query */
-	r600_pipe_state_add_reg(rstate, R_028028_DB_STENCIL_CLEAR, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_02802C_DB_DEPTH_CLEAR, 0x3F800000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028410_SX_ALPHA_TEST_CONTROL, alpha_test_control, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_028028_DB_STENCIL_CLEAR, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_02802C_DB_DEPTH_CLEAR, 0x3F800000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028410_SX_ALPHA_TEST_CONTROL, alpha_test_control, 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028430_DB_STENCILREFMASK, stencil_ref_mask,
-				0xFFFFFFFF & C_028430_STENCILREF, NULL);
+				0xFFFFFFFF & C_028430_STENCILREF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028434_DB_STENCILREFMASK_BF, stencil_ref_mask_bf,
-				0xFFFFFFFF & C_028434_STENCILREF_BF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0286E0_SPI_FOG_FUNC_SCALE, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0286E4_SPI_FOG_FUNC_BIAS, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0286DC_SPI_FOG_CNTL, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028800_DB_DEPTH_CONTROL, db_depth_control, 0xFFFFFFFF, NULL);
+				0xFFFFFFFF & C_028434_STENCILREF_BF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0286E0_SPI_FOG_FUNC_SCALE, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0286E4_SPI_FOG_FUNC_BIAS, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0286DC_SPI_FOG_CNTL, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028800_DB_DEPTH_CONTROL, db_depth_control, 0xFFFFFFFF, NULL, 0);
 	/* The DB_SHADER_CONTROL mask is 0xFFFFFFBC since Z_EXPORT_ENABLE,
 	 * STENCIL_EXPORT_ENABLE and KILL_ENABLE are controlled by
 	 * r600_pipe_shader_ps().*/
-	r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, db_shader_control, 0xFFFFFFBC, NULL);
-	r600_pipe_state_add_reg(rstate, R_028D0C_DB_RENDER_CONTROL, db_render_control, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028D10_DB_RENDER_OVERRIDE, db_render_override, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028D2C_DB_SRESULTS_COMPARE_STATE1, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028D30_DB_PRELOAD_CONTROL, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028D44_DB_ALPHA_TO_MASK, 0x0000AA00, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, db_shader_control, 0xFFFFFFBC, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028D0C_DB_RENDER_CONTROL, db_render_control, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028D10_DB_RENDER_OVERRIDE, db_render_override, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028D2C_DB_SRESULTS_COMPARE_STATE1, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028D30_DB_PRELOAD_CONTROL, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028D44_DB_ALPHA_TO_MASK, 0x0000AA00, 0xFFFFFFFF, NULL, 0);
 
 	return rstate;
 }
@@ -907,7 +907,7 @@
 			tmp |= S_0286D4_PNT_SPRITE_TOP_1(1);
 		}
 	}
-	r600_pipe_state_add_reg(rstate, R_0286D4_SPI_INTERP_CONTROL_0, tmp, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_0286D4_SPI_INTERP_CONTROL_0, tmp, 0xFFFFFFFF, NULL, 0);
 
 	polygon_dual_mode = (state->fill_front != PIPE_POLYGON_MODE_FILL ||
 				state->fill_back != PIPE_POLYGON_MODE_FILL);
@@ -921,33 +921,33 @@
 		S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_tri) |
 		S_028814_POLY_MODE(polygon_dual_mode) |
 		S_028814_POLYMODE_FRONT_PTYPE(r600_translate_fill(state->fill_front)) |
-		S_028814_POLYMODE_BACK_PTYPE(r600_translate_fill(state->fill_back)), 0xFFFFFFFF, NULL);
+		S_028814_POLYMODE_BACK_PTYPE(r600_translate_fill(state->fill_back)), 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate, R_02881C_PA_CL_VS_OUT_CNTL,
 			S_02881C_USE_VTX_POINT_SIZE(state->point_size_per_vertex) |
-			S_02881C_VS_OUT_MISC_VEC_ENA(state->point_size_per_vertex), 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028820_PA_CL_NANINF_CNTL, 0x00000000, 0xFFFFFFFF, NULL);
+			S_02881C_VS_OUT_MISC_VEC_ENA(state->point_size_per_vertex), 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028820_PA_CL_NANINF_CNTL, 0x00000000, 0xFFFFFFFF, NULL, 0);
 	/* point size 12.4 fixed point */
 	tmp = (unsigned)(state->point_size * 8.0);
-	r600_pipe_state_add_reg(rstate, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp), 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A04_PA_SU_POINT_MINMAX, 0x80000000, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp), 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A04_PA_SU_POINT_MINMAX, 0x80000000, 0xFFFFFFFF, NULL, 0);
 
 	tmp = (unsigned)state->line_width * 8;
-	r600_pipe_state_add_reg(rstate, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp), 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp), 0xFFFFFFFF, NULL, 0);
 
-	r600_pipe_state_add_reg(rstate, R_028A0C_PA_SC_LINE_STIPPLE, 0x00000005, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A48_PA_SC_MPASS_PS_CNTL, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028C00_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_028A0C_PA_SC_LINE_STIPPLE, 0x00000005, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A48_PA_SC_MPASS_PS_CNTL, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028C00_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL, 0);
 
 	r600_pipe_state_add_reg(rstate, R_028C08_PA_SU_VTX_CNTL,
 				S_028C08_PIX_CENTER_HALF(state->gl_rasterization_rules),
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 
-	r600_pipe_state_add_reg(rstate, R_028C0C_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028C10_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028C14_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028C18_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028DFC_PA_SU_POLY_OFFSET_CLAMP, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_02820C_PA_SC_CLIPRECT_RULE, clip_rule, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_028C0C_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028C10_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028C14_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028C18_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028DFC_PA_SU_POLY_OFFSET_CLAMP, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_02820C_PA_SC_CLIPRECT_RULE, clip_rule, 0xFFFFFFFF, NULL, 0);
 
 	return rstate;
 }
@@ -977,17 +977,17 @@
 					S_03C000_MIP_FILTER(r600_tex_mipfilter(state->min_mip_filter)) |
 					S_03C000_MAX_ANISO(r600_tex_aniso_filter(state->max_anisotropy)) |
 					S_03C000_DEPTH_COMPARE_FUNCTION(r600_tex_compare(state->compare_func)) |
-					S_03C000_BORDER_COLOR_TYPE(uc.ui ? V_03C000_SQ_TEX_BORDER_COLOR_REGISTER : 0), 0xFFFFFFFF, NULL);
+					S_03C000_BORDER_COLOR_TYPE(uc.ui ? V_03C000_SQ_TEX_BORDER_COLOR_REGISTER : 0), 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg_noblock(rstate, R_03C004_SQ_TEX_SAMPLER_WORD1_0,
 					S_03C004_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 6)) |
 					S_03C004_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 6)) |
-					S_03C004_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 6)), 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg_noblock(rstate, R_03C008_SQ_TEX_SAMPLER_WORD2_0, S_03C008_TYPE(1), 0xFFFFFFFF, NULL);
+					S_03C004_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 6)), 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg_noblock(rstate, R_03C008_SQ_TEX_SAMPLER_WORD2_0, S_03C008_TYPE(1), 0xFFFFFFFF, NULL, 0);
 	if (uc.ui) {
-		r600_pipe_state_add_reg_noblock(rstate, R_00A400_TD_PS_SAMPLER0_BORDER_RED, fui(state->border_color[0]), 0xFFFFFFFF, NULL);
-		r600_pipe_state_add_reg_noblock(rstate, R_00A404_TD_PS_SAMPLER0_BORDER_GREEN, fui(state->border_color[1]), 0xFFFFFFFF, NULL);
-		r600_pipe_state_add_reg_noblock(rstate, R_00A408_TD_PS_SAMPLER0_BORDER_BLUE, fui(state->border_color[2]), 0xFFFFFFFF, NULL);
-		r600_pipe_state_add_reg_noblock(rstate, R_00A40C_TD_PS_SAMPLER0_BORDER_ALPHA, fui(state->border_color[3]), 0xFFFFFFFF, NULL);
+		r600_pipe_state_add_reg_noblock(rstate, R_00A400_TD_PS_SAMPLER0_BORDER_RED, fui(state->border_color[0]), 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg_noblock(rstate, R_00A404_TD_PS_SAMPLER0_BORDER_GREEN, fui(state->border_color[1]), 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg_noblock(rstate, R_00A408_TD_PS_SAMPLER0_BORDER_BLUE, fui(state->border_color[2]), 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg_noblock(rstate, R_00A40C_TD_PS_SAMPLER0_BORDER_ALPHA, fui(state->border_color[3]), 0xFFFFFFFF, NULL, 0);
 	}
 	return rstate;
 }
@@ -996,10 +996,9 @@
 							struct pipe_resource *texture,
 							const struct pipe_sampler_view *state)
 {
-	struct r600_pipe_sampler_view *resource = CALLOC_STRUCT(r600_pipe_sampler_view);
+	struct r600_pipe_sampler_view *view = CALLOC_STRUCT(r600_pipe_sampler_view);
 	struct r600_pipe_resource_state *rstate;
-	const struct util_format_description *desc;
-	struct r600_resource_texture *tmp;
+	struct r600_resource_texture *tmp = (struct r600_resource_texture*)texture;
 	struct r600_resource *rbuffer;
 	unsigned format, endian;
 	uint32_t word4 = 0, yuv_format = 0, pitch = 0;
@@ -1007,43 +1006,42 @@
 	struct r600_bo *bo[2];
 	unsigned width, height, depth, offset_level, last_level;
 
-	if (resource == NULL)
+	if (view == NULL)
 		return NULL;
-	rstate = &resource->state;
+	rstate = &view->state;
 
 	/* initialize base object */
-	resource->base = *state;
-	resource->base.texture = NULL;
+	view->base = *state;
+	view->base.texture = NULL;
 	pipe_reference(NULL, &texture->reference);
-	resource->base.texture = texture;
-	resource->base.reference.count = 1;
-	resource->base.context = ctx;
+	view->base.texture = texture;
+	view->base.reference.count = 1;
+	view->base.context = ctx;
 
 	swizzle[0] = state->swizzle_r;
 	swizzle[1] = state->swizzle_g;
 	swizzle[2] = state->swizzle_b;
 	swizzle[3] = state->swizzle_a;
+
 	format = r600_translate_texformat(ctx->screen, state->format,
 					  swizzle,
 					  &word4, &yuv_format);
 	if (format == ~0) {
 		format = 0;
 	}
-	desc = util_format_description(state->format);
-	if (desc == NULL) {
-		R600_ERR("unknown format %d\n", state->format);
-	}
-	tmp = (struct r600_resource_texture *)texture;
+
 	if (tmp->depth && !tmp->is_flushing_texture) {
 	        r600_texture_depth_flush(ctx, texture, TRUE);
 		tmp = tmp->flushed_depth_texture;
 	}
+
 	endian = r600_colorformat_endian_swap(format);
 
 	if (tmp->force_int_type) {
 		word4 &= C_038010_NUM_FORMAT_ALL;
 		word4 |= S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_INT);
 	}
+
 	rbuffer = &tmp->resource;
 	bo[0] = rbuffer->bo;
 	bo[1] = rbuffer->bo;
@@ -1068,6 +1066,8 @@
 
 	rstate->bo[0] = bo[0];
 	rstate->bo[1] = bo[1];
+	rstate->bo_usage[0] = RADEON_USAGE_READ;
+	rstate->bo_usage[1] = RADEON_USAGE_READ;
 
 	rstate->val[0] = (S_038000_DIM(r600_tex_dim(texture->target)) |
 			  S_038000_TILE_MODE(array_mode) |
@@ -1077,8 +1077,8 @@
 	rstate->val[1] = (S_038004_TEX_HEIGHT(height - 1) |
 			  S_038004_TEX_DEPTH(depth - 1) |
 			  S_038004_DATA_FORMAT(format));
-	rstate->val[2] = (tmp->offset[offset_level] + r600_bo_offset(bo[0])) >> 8;
-	rstate->val[3] = (tmp->offset[offset_level+1] + r600_bo_offset(bo[1])) >> 8;
+	rstate->val[2] = tmp->offset[offset_level] >> 8;
+	rstate->val[3] = tmp->offset[offset_level+1] >> 8;
 	rstate->val[4] = (word4 |
 			  S_038010_SRF_MODE_ALL(V_038010_SRF_MODE_ZERO_CLAMP_MINUS_ONE) |
 			  S_038010_REQUEST_SIZE(1) |
@@ -1090,7 +1090,7 @@
 	rstate->val[6] = (S_038018_TYPE(V_038010_SQ_TEX_VTX_VALID_TEXTURE) |
 			  S_038018_MAX_ANISO(4 /* max 16 samples */));
 
-	return &resource->base;
+	return &view->base;
 }
 
 static void r600_set_vs_sampler_view(struct pipe_context *ctx, unsigned count,
@@ -1157,7 +1157,7 @@
 	rstate->id = R600_PIPE_STATE_SEAMLESS_CUBEMAP;
 	r600_pipe_state_add_reg(rstate, R_009508_TA_CNTL_AUX,
 				(enable ? 0 : S_009508_DISABLE_CUBE_WRAP(1)),
-				1, NULL);
+				1, NULL, 0);
 
 	free(rctx->states[R600_PIPE_STATE_SEAMLESS_CUBEMAP]);
 	rctx->states[R600_PIPE_STATE_SEAMLESS_CUBEMAP] = rstate;
@@ -1215,21 +1215,21 @@
 	for (int i = 0; i < state->nr; i++) {
 		r600_pipe_state_add_reg(rstate,
 					R_028E20_PA_CL_UCP0_X + i * 16,
-					fui(state->ucp[i][0]), 0xFFFFFFFF, NULL);
+					fui(state->ucp[i][0]), 0xFFFFFFFF, NULL, 0);
 		r600_pipe_state_add_reg(rstate,
 					R_028E24_PA_CL_UCP0_Y + i * 16,
-					fui(state->ucp[i][1]) , 0xFFFFFFFF, NULL);
+					fui(state->ucp[i][1]) , 0xFFFFFFFF, NULL, 0);
 		r600_pipe_state_add_reg(rstate,
 					R_028E28_PA_CL_UCP0_Z + i * 16,
-					fui(state->ucp[i][2]), 0xFFFFFFFF, NULL);
+					fui(state->ucp[i][2]), 0xFFFFFFFF, NULL, 0);
 		r600_pipe_state_add_reg(rstate,
 					R_028E2C_PA_CL_UCP0_W + i * 16,
-					fui(state->ucp[i][3]), 0xFFFFFFFF, NULL);
+					fui(state->ucp[i][3]), 0xFFFFFFFF, NULL, 0);
 	}
 	r600_pipe_state_add_reg(rstate, R_028810_PA_CL_CLIP_CNTL,
 			S_028810_PS_UCP_MODE(3) | ((1 << state->nr) - 1) |
 			S_028810_ZCLIP_NEAR_DISABLE(state->depth_clamp) |
-			S_028810_ZCLIP_FAR_DISABLE(state->depth_clamp), 0xFFFFFFFF, NULL);
+			S_028810_ZCLIP_FAR_DISABLE(state->depth_clamp), 0xFFFFFFFF, NULL, 0);
 
 	free(rctx->states[R600_PIPE_STATE_CLIP]);
 	rctx->states[R600_PIPE_STATE_CLIP] = rstate;
@@ -1260,28 +1260,28 @@
 	br = S_028244_BR_X(state->maxx) | S_028244_BR_Y(state->maxy);
 	r600_pipe_state_add_reg(rstate,
 				R_028210_PA_SC_CLIPRECT_0_TL, tl,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028214_PA_SC_CLIPRECT_0_BR, br,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028218_PA_SC_CLIPRECT_1_TL, tl,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_02821C_PA_SC_CLIPRECT_1_BR, br,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028220_PA_SC_CLIPRECT_2_TL, tl,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028224_PA_SC_CLIPRECT_2_BR, br,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028228_PA_SC_CLIPRECT_3_TL, tl,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_02822C_PA_SC_CLIPRECT_3_BR, br,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 
 	free(rctx->states[R600_PIPE_STATE_SCISSOR]);
 	rctx->states[R600_PIPE_STATE_SCISSOR] = rstate;
@@ -1303,11 +1303,11 @@
 	tmp = S_028430_STENCILREF(state->ref_value[0]);
 	r600_pipe_state_add_reg(rstate,
 				R_028430_DB_STENCILREFMASK, tmp,
-				~C_028430_STENCILREF, NULL);
+				~C_028430_STENCILREF, NULL, 0);
 	tmp = S_028434_STENCILREF_BF(state->ref_value[1]);
 	r600_pipe_state_add_reg(rstate,
 				R_028434_DB_STENCILREFMASK_BF, tmp,
-				~C_028434_STENCILREF_BF, NULL);
+				~C_028434_STENCILREF_BF, NULL, 0);
 
 	free(rctx->states[R600_PIPE_STATE_STENCIL_REF]);
 	rctx->states[R600_PIPE_STATE_STENCIL_REF] = rstate;
@@ -1325,15 +1325,15 @@
 
 	rctx->viewport = *state;
 	rstate->id = R600_PIPE_STATE_VIEWPORT;
-	r600_pipe_state_add_reg(rstate, R_0282D0_PA_SC_VPORT_ZMIN_0, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0282D4_PA_SC_VPORT_ZMAX_0, 0x3F800000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_02843C_PA_CL_VPORT_XSCALE_0, fui(state->scale[0]), 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028444_PA_CL_VPORT_YSCALE_0, fui(state->scale[1]), 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_02844C_PA_CL_VPORT_ZSCALE_0, fui(state->scale[2]), 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028440_PA_CL_VPORT_XOFFSET_0, fui(state->translate[0]), 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028448_PA_CL_VPORT_YOFFSET_0, fui(state->translate[1]), 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028450_PA_CL_VPORT_ZOFFSET_0, fui(state->translate[2]), 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028818_PA_CL_VTE_CNTL, 0x0000043F, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_0282D0_PA_SC_VPORT_ZMIN_0, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0282D4_PA_SC_VPORT_ZMAX_0, 0x3F800000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_02843C_PA_CL_VPORT_XSCALE_0, fui(state->scale[0]), 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028444_PA_CL_VPORT_YSCALE_0, fui(state->scale[1]), 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_02844C_PA_CL_VPORT_ZSCALE_0, fui(state->scale[2]), 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028440_PA_CL_VPORT_XOFFSET_0, fui(state->translate[0]), 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028448_PA_CL_VPORT_YOFFSET_0, fui(state->translate[1]), 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028450_PA_CL_VPORT_ZOFFSET_0, fui(state->translate[2]), 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028818_PA_CL_VTE_CNTL, 0x0000043F, 0xFFFFFFFF, NULL, 0);
 
 	free(rctx->states[R600_PIPE_STATE_VIEWPORT]);
 	rctx->states[R600_PIPE_STATE_VIEWPORT] = rstate;
@@ -1441,27 +1441,27 @@
 
 	r600_pipe_state_add_reg(rstate,
 				R_028040_CB_COLOR0_BASE + cb * 4,
-				(offset + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]);
+				offset >> 8, 0xFFFFFFFF, bo[0], RADEON_USAGE_READWRITE);
 	r600_pipe_state_add_reg(rstate,
 				R_0280A0_CB_COLOR0_INFO + cb * 4,
-				color_info, 0xFFFFFFFF, bo[0]);
+				color_info, 0xFFFFFFFF, bo[0], RADEON_USAGE_READWRITE);
 	r600_pipe_state_add_reg(rstate,
 				R_028060_CB_COLOR0_SIZE + cb * 4,
 				S_028060_PITCH_TILE_MAX(pitch) |
 				S_028060_SLICE_TILE_MAX(slice),
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028080_CB_COLOR0_VIEW + cb * 4,
-				0x00000000, 0xFFFFFFFF, NULL);
+				0x00000000, 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_0280E0_CB_COLOR0_FRAG + cb * 4,
-				r600_bo_offset(bo[1]) >> 8, 0xFFFFFFFF, bo[1]);
+				0, 0xFFFFFFFF, bo[1], RADEON_USAGE_READWRITE);
 	r600_pipe_state_add_reg(rstate,
 				R_0280C0_CB_COLOR0_TILE + cb * 4,
-				r600_bo_offset(bo[2]) >> 8, 0xFFFFFFFF, bo[2]);
+				0, 0xFFFFFFFF, bo[2], RADEON_USAGE_READWRITE);
 	r600_pipe_state_add_reg(rstate,
 				R_028100_CB_COLOR0_MASK + cb * 4,
-				0x00000000, 0xFFFFFFFF, NULL);
+				0x00000000, 0xFFFFFFFF, NULL, 0);
 }
 
 static void r600_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rstate,
@@ -1492,16 +1492,16 @@
 	format = r600_translate_dbformat(state->zsbuf->texture->format);
 
 	r600_pipe_state_add_reg(rstate, R_02800C_DB_DEPTH_BASE,
-				(offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo);
+				offset >> 8, 0xFFFFFFFF, rbuffer->bo, RADEON_USAGE_READWRITE);
 	r600_pipe_state_add_reg(rstate, R_028000_DB_DEPTH_SIZE,
 				S_028000_PITCH_TILE_MAX(pitch) | S_028000_SLICE_TILE_MAX(slice),
-				0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028004_DB_DEPTH_VIEW, 0x00000000, 0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028004_DB_DEPTH_VIEW, 0x00000000, 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate, R_028010_DB_DEPTH_INFO,
 				S_028010_ARRAY_MODE(rtex->array_mode[level]) | S_028010_FORMAT(format),
-				0xFFFFFFFF, rbuffer->bo);
+				0xFFFFFFFF, rbuffer->bo, RADEON_USAGE_READWRITE);
 	r600_pipe_state_add_reg(rstate, R_028D34_DB_PREFETCH_LIMIT,
-				(surf->aligned_height / 8) - 1, 0xFFFFFFFF, NULL);
+				(surf->aligned_height / 8) - 1, 0xFFFFFFFF, NULL, 0);
 }
 
 static void r600_set_framebuffer_state(struct pipe_context *ctx,
@@ -1546,59 +1546,59 @@
 
 	r600_pipe_state_add_reg(rstate,
 				R_028030_PA_SC_SCREEN_SCISSOR_TL, tl,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028034_PA_SC_SCREEN_SCISSOR_BR, br,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028204_PA_SC_WINDOW_SCISSOR_TL, tl,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028208_PA_SC_WINDOW_SCISSOR_BR, br,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028240_PA_SC_GENERIC_SCISSOR_TL, tl,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028244_PA_SC_GENERIC_SCISSOR_BR, br,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028250_PA_SC_VPORT_SCISSOR_0_TL, tl,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028254_PA_SC_VPORT_SCISSOR_0_BR, br,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028200_PA_SC_WINDOW_OFFSET, 0x00000000,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	if (rctx->chip_class >= R700) {
 		r600_pipe_state_add_reg(rstate,
 					R_028230_PA_SC_EDGERULE, 0xAAAAAAAA,
-					0xFFFFFFFF, NULL);
+					0xFFFFFFFF, NULL, 0);
 	}
 
 	r600_pipe_state_add_reg(rstate, R_0287A0_CB_SHADER_CONTROL,
-				shader_control, 0xFFFFFFFF, NULL);
+				shader_control, 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate, R_028238_CB_TARGET_MASK,
-				0x00000000, target_mask, NULL);
+				0x00000000, target_mask, NULL, 0);
 	r600_pipe_state_add_reg(rstate, R_02823C_CB_SHADER_MASK,
-				shader_mask, 0xFFFFFFFF, NULL);
+				shader_mask, 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate, R_028C04_PA_SC_AA_CONFIG,
-				0x00000000, 0xFFFFFFFF, NULL);
+				0x00000000, 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX,
-				0x00000000, 0xFFFFFFFF, NULL);
+				0x00000000, 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate, R_028C20_PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX,
-				0x00000000, 0xFFFFFFFF, NULL);
+				0x00000000, 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate, R_028C30_CB_CLRCMP_CONTROL,
-				0x01000000, 0xFFFFFFFF, NULL);
+				0x01000000, 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate, R_028C34_CB_CLRCMP_SRC,
-				0x00000000, 0xFFFFFFFF, NULL);
+				0x00000000, 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate, R_028C38_CB_CLRCMP_DST,
-				0x000000FF, 0xFFFFFFFF, NULL);
+				0x000000FF, 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate, R_028C3C_CB_CLRCMP_MSK,
-				0xFFFFFFFF, 0xFFFFFFFF, NULL);
+				0xFFFFFFFF, 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate, R_028C48_PA_SC_AA_MASK,
-				0xFFFFFFFF, 0xFFFFFFFF, NULL);
+				0xFFFFFFFF, 0xFFFFFFFF, NULL, 0);
 
 	free(rctx->states[R600_PIPE_STATE_FRAMEBUFFER]);
 	rctx->states[R600_PIPE_STATE_FRAMEBUFFER] = rstate;
@@ -1674,7 +1674,7 @@
 	if (rctx->chip_class >= EVERGREEN)
 		return;
 
-	if (!rctx->ps_shader && !rctx->vs_shader)
+	if (!rctx->ps_shader || !rctx->vs_shader)
 		return;
 
 	if (rctx->ps_shader->shader.bc.ngpr > rctx->default_ps_gprs)
@@ -1695,7 +1695,7 @@
 	tmp |= S_008C04_NUM_PS_GPRS(num_ps_gprs);
 	tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs);
 	rstate.nregs = 0;
-	r600_pipe_state_add_reg(&rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, tmp, 0x0FFFFFFF, NULL);
+	r600_pipe_state_add_reg(&rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, tmp, 0x0FFFFFFF, NULL, 0);
 
 	r600_context_pipe_state_set(&rctx->ctx, &rstate);
 }
@@ -1866,20 +1866,20 @@
 	tmp |= S_008C00_VS_PRIO(vs_prio);
 	tmp |= S_008C00_GS_PRIO(gs_prio);
 	tmp |= S_008C00_ES_PRIO(es_prio);
-	r600_pipe_state_add_reg(rstate, R_008C00_SQ_CONFIG, tmp, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_008C00_SQ_CONFIG, tmp, 0xFFFFFFFF, NULL, 0);
 
 	/* SQ_GPR_RESOURCE_MGMT_1 */
 	tmp = 0;
 	tmp |= S_008C04_NUM_PS_GPRS(num_ps_gprs);
 	tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs);
 	tmp |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs);
-	r600_pipe_state_add_reg(rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL, 0);
 
 	/* SQ_GPR_RESOURCE_MGMT_2 */
 	tmp = 0;
 	tmp |= S_008C08_NUM_GS_GPRS(num_gs_gprs);
 	tmp |= S_008C08_NUM_ES_GPRS(num_es_gprs);
-	r600_pipe_state_add_reg(rstate, R_008C08_SQ_GPR_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_008C08_SQ_GPR_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL, 0);
 
 	/* SQ_THREAD_RESOURCE_MGMT */
 	tmp = 0;
@@ -1887,78 +1887,78 @@
 	tmp |= S_008C0C_NUM_VS_THREADS(num_vs_threads);
 	tmp |= S_008C0C_NUM_GS_THREADS(num_gs_threads);
 	tmp |= S_008C0C_NUM_ES_THREADS(num_es_threads);
-	r600_pipe_state_add_reg(rstate, R_008C0C_SQ_THREAD_RESOURCE_MGMT, tmp, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_008C0C_SQ_THREAD_RESOURCE_MGMT, tmp, 0xFFFFFFFF, NULL, 0);
 
 	/* SQ_STACK_RESOURCE_MGMT_1 */
 	tmp = 0;
 	tmp |= S_008C10_NUM_PS_STACK_ENTRIES(num_ps_stack_entries);
 	tmp |= S_008C10_NUM_VS_STACK_ENTRIES(num_vs_stack_entries);
-	r600_pipe_state_add_reg(rstate, R_008C10_SQ_STACK_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_008C10_SQ_STACK_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL, 0);
 
 	/* SQ_STACK_RESOURCE_MGMT_2 */
 	tmp = 0;
 	tmp |= S_008C14_NUM_GS_STACK_ENTRIES(num_gs_stack_entries);
 	tmp |= S_008C14_NUM_ES_STACK_ENTRIES(num_es_stack_entries);
-	r600_pipe_state_add_reg(rstate, R_008C14_SQ_STACK_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_008C14_SQ_STACK_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL, 0);
 
-	r600_pipe_state_add_reg(rstate, R_009714_VC_ENHANCE, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028350_SX_MISC, 0x00000000, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_009714_VC_ENHANCE, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028350_SX_MISC, 0x00000000, 0xFFFFFFFF, NULL, 0);
 
 	if (rctx->chip_class >= R700) {
-		r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x00004000, 0xFFFFFFFF, NULL);
+		r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x00004000, 0xFFFFFFFF, NULL, 0);
 		r600_pipe_state_add_reg(rstate, R_009508_TA_CNTL_AUX,
 					S_009508_DISABLE_CUBE_ANISO(1) |
 					S_009508_SYNC_GRADIENT(1) |
 					S_009508_SYNC_WALKER(1) |
-					S_009508_SYNC_ALIGNER(1), 0xFFFFFFFF, NULL);
-		r600_pipe_state_add_reg(rstate, R_009830_DB_DEBUG, 0x00000000, 0xFFFFFFFF, NULL);
-		r600_pipe_state_add_reg(rstate, R_009838_DB_WATERMARKS, 0x00420204, 0xFFFFFFFF, NULL);
-		r600_pipe_state_add_reg(rstate, R_0286C8_SPI_THREAD_GROUPING, 0x00000000, 0xFFFFFFFF, NULL);
-		r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL, 0x00514002, 0xFFFFFFFF, NULL);
+					S_009508_SYNC_ALIGNER(1), 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg(rstate, R_009830_DB_DEBUG, 0x00000000, 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg(rstate, R_009838_DB_WATERMARKS, 0x00420204, 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg(rstate, R_0286C8_SPI_THREAD_GROUPING, 0x00000000, 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL, 0x00514002, 0xFFFFFFFF, NULL, 0);
 	} else {
-		r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x00000000, 0xFFFFFFFF, NULL);
+		r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x00000000, 0xFFFFFFFF, NULL, 0);
 		r600_pipe_state_add_reg(rstate, R_009508_TA_CNTL_AUX,
 					S_009508_DISABLE_CUBE_ANISO(1) |
 					S_009508_SYNC_GRADIENT(1) |
 					S_009508_SYNC_WALKER(1) |
-					S_009508_SYNC_ALIGNER(1), 0xFFFFFFFF, NULL);
-		r600_pipe_state_add_reg(rstate, R_009830_DB_DEBUG, 0x82000000, 0xFFFFFFFF, NULL);
-		r600_pipe_state_add_reg(rstate, R_009838_DB_WATERMARKS, 0x01020204, 0xFFFFFFFF, NULL);
-		r600_pipe_state_add_reg(rstate, R_0286C8_SPI_THREAD_GROUPING, 0x00000001, 0xFFFFFFFF, NULL);
-		r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL, 0x00004012, 0xFFFFFFFF, NULL);
+					S_009508_SYNC_ALIGNER(1), 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg(rstate, R_009830_DB_DEBUG, 0x82000000, 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg(rstate, R_009838_DB_WATERMARKS, 0x01020204, 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg(rstate, R_0286C8_SPI_THREAD_GROUPING, 0x00000001, 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL, 0x00004012, 0xFFFFFFFF, NULL, 0);
 	}
-	r600_pipe_state_add_reg(rstate, R_0288A8_SQ_ESGS_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0288AC_SQ_GSVS_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0288B0_SQ_ESTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0288B4_SQ_GSTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0288B8_SQ_VSTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0288BC_SQ_PSTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0288C0_SQ_FBUF_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0288C4_SQ_REDUC_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0288C8_SQ_GS_VERT_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A10_VGT_OUTPUT_PATH_CNTL, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A14_VGT_HOS_CNTL, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A20_VGT_HOS_REUSE_DEPTH, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A24_VGT_GROUP_PRIM_TYPE, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A28_VGT_GROUP_FIRST_DECR, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A2C_VGT_GROUP_DECR, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A30_VGT_GROUP_VECT_0_CNTL, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A34_VGT_GROUP_VECT_1_CNTL, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A40_VGT_GS_MODE, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028AB0_VGT_STRMOUT_EN, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028AB4_VGT_REUSE_OFF, 0x00000001, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028AB8_VGT_VTX_CNT_EN, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028B20_VGT_STRMOUT_BUFFER_EN, 0x00000000, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_0288A8_SQ_ESGS_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0288AC_SQ_GSVS_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0288B0_SQ_ESTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0288B4_SQ_GSTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0288B8_SQ_VSTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0288BC_SQ_PSTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0288C0_SQ_FBUF_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0288C4_SQ_REDUC_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0288C8_SQ_GS_VERT_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A10_VGT_OUTPUT_PATH_CNTL, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A14_VGT_HOS_CNTL, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A20_VGT_HOS_REUSE_DEPTH, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A24_VGT_GROUP_PRIM_TYPE, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A28_VGT_GROUP_FIRST_DECR, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A2C_VGT_GROUP_DECR, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A30_VGT_GROUP_VECT_0_CNTL, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A34_VGT_GROUP_VECT_1_CNTL, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A40_VGT_GS_MODE, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028AB0_VGT_STRMOUT_EN, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028AB4_VGT_REUSE_OFF, 0x00000001, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028AB8_VGT_VTX_CNT_EN, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028B20_VGT_STRMOUT_BUFFER_EN, 0x00000000, 0xFFFFFFFF, NULL, 0);
 
-	r600_pipe_state_add_reg(rstate, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A84_VGT_PRIMITIVEID_EN, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028AA4_VGT_INSTANCE_STEP_RATE_1, 0x00000000, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A84_VGT_PRIMITIVEID_EN, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028AA4_VGT_INSTANCE_STEP_RATE_1, 0x00000000, 0xFFFFFFFF, NULL, 0);
 	r600_context_pipe_state_set(&rctx->ctx, rstate);
 }
 
@@ -2022,38 +2022,38 @@
 			S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr);
 	}
 
-	r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, spi_ps_in_control_1, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, spi_ps_in_control_1, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028840_SQ_PGM_START_PS,
-				r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
+				0, 0xFFFFFFFF, shader->bo, RADEON_USAGE_READ);
 	r600_pipe_state_add_reg(rstate,
 				R_028850_SQ_PGM_RESOURCES_PS,
 				S_028868_NUM_GPRS(rshader->bc.ngpr) |
 				S_028868_STACK_SIZE(rshader->bc.nstack),
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028854_SQ_PGM_EXPORTS_PS,
-				exports_ps, 0xFFFFFFFF, NULL);
+				exports_ps, 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_0288CC_SQ_PGM_CF_OFFSET_PS,
-				0x00000000, 0xFFFFFFFF, NULL);
+				0x00000000, 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL,
 				S_028808_MULTIWRITE_ENABLE(!!rshader->fs_write_all),
 				S_028808_MULTIWRITE_ENABLE(1),
-				NULL);
+				NULL, 0);
 	/* only set some bits here, the other bits are set in the dsa state */
 	r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL,
 				db_shader_control,
 				S_02880C_Z_EXPORT_ENABLE(1) |
 				S_02880C_STENCIL_REF_EXPORT_ENABLE(1) |
 				S_02880C_KILL_ENABLE(1),
-				NULL);
+				NULL, 0);
 
 	r600_pipe_state_add_reg(rstate,
 				R_03E200_SQ_LOOP_CONST_0, 0x01000FFF,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 }
 
 void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader)
@@ -2062,7 +2062,7 @@
 	struct r600_pipe_state *rstate = &shader->rstate;
 	struct r600_shader *rshader = &shader->shader;
 	unsigned spi_vs_out_id[10];
-	unsigned i, tmp;
+	unsigned i, tmp, nparams;
 
 	/* clear previous register */
 	rstate->nregs = 0;
@@ -2081,28 +2081,36 @@
 	for (i = 0; i < 10; i++) {
 		r600_pipe_state_add_reg(rstate,
 					R_028614_SPI_VS_OUT_ID_0 + i * 4,
-					spi_vs_out_id[i], 0xFFFFFFFF, NULL);
+					spi_vs_out_id[i], 0xFFFFFFFF, NULL, 0);
 	}
 
+	/* Certain attributes (position, psize, etc.) don't count as params.
+	 * VS is required to export at least one param and r600_shader_from_tgsi()
+	 * takes care of adding a dummy export.
+	 */
+	nparams = rshader->noutput - rshader->npos;
+	if (nparams < 1)
+		nparams = 1;
+
 	r600_pipe_state_add_reg(rstate,
 			R_0286C4_SPI_VS_OUT_CONFIG,
-			S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2),
-			0xFFFFFFFF, NULL);
+			S_0286C4_VS_EXPORT_COUNT(nparams - 1),
+			0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 			R_028868_SQ_PGM_RESOURCES_VS,
 			S_028868_NUM_GPRS(rshader->bc.ngpr) |
 			S_028868_STACK_SIZE(rshader->bc.nstack),
-			0xFFFFFFFF, NULL);
+			0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 			R_0288D0_SQ_PGM_CF_OFFSET_VS,
-			0x00000000, 0xFFFFFFFF, NULL);
+			0x00000000, 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 			R_028858_SQ_PGM_START_VS,
-			r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
+			0, 0xFFFFFFFF, shader->bo, RADEON_USAGE_READ);
 
 	r600_pipe_state_add_reg(rstate,
 				R_03E200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 }
 
 void r600_fetch_shader(struct pipe_context *ctx,
@@ -2115,12 +2123,12 @@
 	rstate->id = R600_PIPE_STATE_FETCH_SHADER;
 	rstate->nregs = 0;
 	r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_RESOURCES_FS,
-				0x00000000, 0xFFFFFFFF, NULL);
+				0x00000000, 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate, R_0288DC_SQ_PGM_CF_OFFSET_FS,
-				0x00000000, 0xFFFFFFFF, NULL);
+				0x00000000, 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate, R_028894_SQ_PGM_START_FS,
-				r600_bo_offset(ve->fetch_shader) >> 8,
-				0xFFFFFFFF, ve->fetch_shader);
+				0,
+				0xFFFFFFFF, ve->fetch_shader, RADEON_USAGE_READ);
 }
 
 void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx)
@@ -2149,7 +2157,7 @@
 	r600_pipe_state_add_reg(rstate,
 				R_02880C_DB_SHADER_CONTROL,
 				0x0,
-				S_02880C_DUAL_EXPORT_ENABLE(1), NULL);
+				S_02880C_DUAL_EXPORT_ENABLE(1), NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028D0C_DB_RENDER_CONTROL,
 				S_028D0C_DEPTH_COPY_ENABLE(1) |
@@ -2157,7 +2165,7 @@
 				S_028D0C_COPY_CENTROID(1),
 				S_028D0C_DEPTH_COPY_ENABLE(1) |
 				S_028D0C_STENCIL_COPY_ENABLE(1) |
-				S_028D0C_COPY_CENTROID(1), NULL);
+				S_028D0C_COPY_CENTROID(1), NULL, 0);
 	return rstate;
 }
 
@@ -2178,10 +2186,12 @@
 
 void r600_pipe_mod_buffer_resource(struct r600_pipe_resource_state *rstate,
 				   struct r600_resource *rbuffer,
-				   unsigned offset, unsigned stride)
+				   unsigned offset, unsigned stride,
+				   enum radeon_bo_usage usage)
 {
 	rstate->val[0] = offset;
 	rstate->bo[0] = rbuffer->bo;
+	rstate->bo_usage[0] = usage;
 	rstate->val[1] = rbuffer->bo_size - offset - 1;
 	rstate->val[2] = S_038008_ENDIAN_SWAP(r600_endian_swap(32)) |
 	                 S_038008_STRIDE(stride);
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index 408eaed..853458f 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -150,7 +150,7 @@
 		rctx->states[rstate->id] = NULL;
 	}
 	for (int i = 0; i < rstate->nregs; i++) {
-		r600_bo_reference(rctx->radeon, &rstate->regs[i].bo, NULL);
+		r600_bo_reference(&rstate->regs[i].bo, NULL);
 	}
 	free(rstate);
 }
@@ -181,7 +181,7 @@
 	if (rctx->vertex_elements == state)
 		rctx->vertex_elements = NULL;
 
-	r600_bo_reference(rctx->radeon, &v->fetch_shader, NULL);
+	r600_bo_reference(&v->fetch_shader, NULL);
 	u_vbuf_mgr_destroy_vertex_elements(rctx->vbuf_mgr, v->vmgr_elements);
 	FREE(state);
 }
@@ -336,7 +336,7 @@
 	rstate.nregs = 0;
 	if (rctx->export_16bpc)
 		alpha_ref &= ~0x1FFF;
-	r600_pipe_state_add_reg(&rstate, R_028438_SX_ALPHA_REF, alpha_ref, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(&rstate, R_028438_SX_ALPHA_REF, alpha_ref, 0xFFFFFFFF, NULL, 0);
 
 	r600_context_pipe_state_set(&rctx->ctx, &rstate);
 	rctx->alpha_ref_dirty = false;
@@ -349,7 +349,7 @@
 	rstate->nregs = 0;
 	rstate->id = R600_PIPE_STATE_SPI;
 	for (i = 0; i < 32; i++) {
-		r600_pipe_state_add_reg(rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, 0, 0xFFFFFFFF, NULL);
+		r600_pipe_state_add_reg(rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, 0, 0xFFFFFFFF, NULL, 0);
 	}
 }
 
@@ -418,7 +418,6 @@
 	}
 
 	r600_upload_const_buffer(rctx, &rbuffer, &offset);
-	offset += r600_bo_offset(rbuffer->r.bo);
 
 	switch (shader) {
 	case PIPE_SHADER_VERTEX:
@@ -426,10 +425,10 @@
 		r600_pipe_state_add_reg(&rctx->vs_const_buffer,
 					R_028180_ALU_CONST_BUFFER_SIZE_VS_0,
 					ALIGN_DIVUP(buffer->width0 >> 4, 16),
-					0xFFFFFFFF, NULL);
+					0xFFFFFFFF, NULL, 0);
 		r600_pipe_state_add_reg(&rctx->vs_const_buffer,
 					R_028980_ALU_CONST_CACHE_VS_0,
-					offset >> 8, 0xFFFFFFFF, rbuffer->r.bo);
+					offset >> 8, 0xFFFFFFFF, rbuffer->r.bo, RADEON_USAGE_READ);
 		r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_const_buffer);
 
 		rstate = &rctx->vs_const_buffer_resource[index];
@@ -442,10 +441,10 @@
 		}
 
 		if (rctx->chip_class >= EVERGREEN) {
-			evergreen_pipe_mod_buffer_resource(rstate, &rbuffer->r, offset, 16);
+			evergreen_pipe_mod_buffer_resource(rstate, &rbuffer->r, offset, 16, RADEON_USAGE_READ);
 			evergreen_context_pipe_state_set_vs_resource(&rctx->ctx, rstate, index);
 		} else {
-			r600_pipe_mod_buffer_resource(rstate, &rbuffer->r, offset, 16);
+			r600_pipe_mod_buffer_resource(rstate, &rbuffer->r, offset, 16, RADEON_USAGE_READ);
 			r600_context_pipe_state_set_vs_resource(&rctx->ctx, rstate, index);
 		}
 		break;
@@ -454,10 +453,10 @@
 		r600_pipe_state_add_reg(&rctx->ps_const_buffer,
 					R_028140_ALU_CONST_BUFFER_SIZE_PS_0,
 					ALIGN_DIVUP(buffer->width0 >> 4, 16),
-					0xFFFFFFFF, NULL);
+					0xFFFFFFFF, NULL, 0);
 		r600_pipe_state_add_reg(&rctx->ps_const_buffer,
 					R_028940_ALU_CONST_CACHE_PS_0,
-					offset >> 8, 0xFFFFFFFF, rbuffer->r.bo);
+					offset >> 8, 0xFFFFFFFF, rbuffer->r.bo, RADEON_USAGE_READ);
 		r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_const_buffer);
 
 		rstate = &rctx->ps_const_buffer_resource[index];
@@ -469,10 +468,10 @@
 			}
 		}
 		if (rctx->chip_class >= EVERGREEN) {
-			evergreen_pipe_mod_buffer_resource(rstate, &rbuffer->r, offset, 16);
+			evergreen_pipe_mod_buffer_resource(rstate, &rbuffer->r, offset, 16, RADEON_USAGE_READ);
 			evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, rstate, index);
 		} else {
-			r600_pipe_mod_buffer_resource(rstate, &rbuffer->r, offset, 16);
+			r600_pipe_mod_buffer_resource(rstate, &rbuffer->r, offset, 16, RADEON_USAGE_READ);
 			r600_context_pipe_state_set_ps_resource(&rctx->ctx, rstate, index);
 		}
 		break;
@@ -518,7 +517,7 @@
 		}
 		if (vertex_buffer == NULL || rbuffer == NULL)
 			continue;
-		offset += vertex_buffer->buffer_offset + r600_bo_offset(rbuffer->bo);
+		offset += vertex_buffer->buffer_offset;
 
 		if (!rstate->id) {
 			if (rctx->chip_class >= EVERGREEN) {
@@ -529,10 +528,10 @@
 		}
 
 		if (rctx->chip_class >= EVERGREEN) {
-			evergreen_pipe_mod_buffer_resource(rstate, rbuffer, offset, vertex_buffer->stride);
+			evergreen_pipe_mod_buffer_resource(rstate, rbuffer, offset, vertex_buffer->stride, RADEON_USAGE_READ);
 			evergreen_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i);
 		} else {
-			r600_pipe_mod_buffer_resource(rstate, rbuffer, offset, vertex_buffer->stride);
+			r600_pipe_mod_buffer_resource(rstate, rbuffer, offset, vertex_buffer->stride, RADEON_USAGE_READ);
 			r600_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i);
 		}
 	}
@@ -615,16 +614,18 @@
 	if (rctx->vgt.id != R600_PIPE_STATE_VGT) {
 		rctx->vgt.id = R600_PIPE_STATE_VGT;
 		rctx->vgt.nregs = 0;
-		r600_pipe_state_add_reg(&rctx->vgt, R_008958_VGT_PRIMITIVE_TYPE, prim, 0xFFFFFFFF, NULL);
-		r600_pipe_state_add_reg(&rctx->vgt, R_028238_CB_TARGET_MASK, rctx->cb_target_mask & mask, 0xFFFFFFFF, NULL);
-		r600_pipe_state_add_reg(&rctx->vgt, R_028400_VGT_MAX_VTX_INDX, draw.info.max_index, 0xFFFFFFFF, NULL);
-		r600_pipe_state_add_reg(&rctx->vgt, R_028404_VGT_MIN_VTX_INDX, draw.info.min_index, 0xFFFFFFFF, NULL);
-		r600_pipe_state_add_reg(&rctx->vgt, R_028408_VGT_INDX_OFFSET, draw.info.index_bias, 0xFFFFFFFF, NULL);
-		r600_pipe_state_add_reg(&rctx->vgt, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0xFFFFFFFF, NULL);
-		r600_pipe_state_add_reg(&rctx->vgt, R_03CFF4_SQ_VTX_START_INST_LOC, draw.info.start_instance, 0xFFFFFFFF, NULL);
+		r600_pipe_state_add_reg(&rctx->vgt, R_008958_VGT_PRIMITIVE_TYPE, prim, 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg(&rctx->vgt, R_028238_CB_TARGET_MASK, rctx->cb_target_mask & mask, 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg(&rctx->vgt, R_028400_VGT_MAX_VTX_INDX, draw.info.max_index, 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg(&rctx->vgt, R_028404_VGT_MIN_VTX_INDX, draw.info.min_index, 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg(&rctx->vgt, R_028408_VGT_INDX_OFFSET, draw.info.index_bias, 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg(&rctx->vgt, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, draw.info.restart_index, 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg(&rctx->vgt, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, draw.info.primitive_restart, 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg(&rctx->vgt, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg(&rctx->vgt, R_03CFF4_SQ_VTX_START_INST_LOC, draw.info.start_instance, 0xFFFFFFFF, NULL, 0);
 		r600_pipe_state_add_reg(&rctx->vgt, R_028814_PA_SU_SC_MODE_CNTL,
 					0,
-					S_028814_PROVOKING_VTX_LAST(1), NULL);
+					S_028814_PROVOKING_VTX_LAST(1), NULL, 0);
 
 	}
 
@@ -634,6 +635,8 @@
 	r600_pipe_state_mod_reg(&rctx->vgt, draw.info.max_index);
 	r600_pipe_state_mod_reg(&rctx->vgt, draw.info.min_index);
 	r600_pipe_state_mod_reg(&rctx->vgt, draw.info.index_bias);
+	r600_pipe_state_mod_reg(&rctx->vgt, draw.info.restart_index);
+	r600_pipe_state_mod_reg(&rctx->vgt, draw.info.primitive_restart);
 	r600_pipe_state_mod_reg(&rctx->vgt, 0);
 	r600_pipe_state_mod_reg(&rctx->vgt, draw.info.start_instance);
 	if (draw.info.mode == PIPE_PRIM_QUADS || draw.info.mode == PIPE_PRIM_QUAD_STRIP || draw.info.mode == PIPE_PRIM_POLYGON) {
@@ -676,11 +679,14 @@
 			      struct r600_pipe_state *state,
 			      u32 offset, u32 value, u32 mask,
 			      u32 range_id, u32 block_id,
-			      struct r600_bo *bo)
+			      struct r600_bo *bo,
+			      enum radeon_bo_usage usage)
 {
 	struct r600_range *range;
 	struct r600_block *block;
 
+	if (bo) assert(usage);
+
 	range = &ctx->range[range_id];
 	block = range->blocks[block_id];
 	state->regs[state->nregs].block = block;
@@ -689,6 +695,7 @@
 	state->regs[state->nregs].value = value;
 	state->regs[state->nregs].mask = mask;
 	state->regs[state->nregs].bo = bo;
+	state->regs[state->nregs].bo_usage = usage;
 
 	state->nregs++;
 	assert(state->nregs < R600_BLOCK_MAX_REG);
@@ -696,13 +703,17 @@
 
 void r600_pipe_state_add_reg_noblock(struct r600_pipe_state *state,
 				     u32 offset, u32 value, u32 mask,
-				     struct r600_bo *bo)
+				     struct r600_bo *bo,
+				     enum radeon_bo_usage usage)
 {
+	if (bo) assert(usage);
+
 	state->regs[state->nregs].id = offset;
 	state->regs[state->nregs].block = NULL;
 	state->regs[state->nregs].value = value;
 	state->regs[state->nregs].mask = mask;
 	state->regs[state->nregs].bo = bo;
+	state->regs[state->nregs].bo_usage = usage;
 
 	state->nregs++;
 	assert(state->nregs < R600_BLOCK_MAX_REG);
diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c
index e9e8b27..7c1bd9d 100644
--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@@ -31,7 +31,6 @@
 #include <util/u_math.h>
 #include <util/u_inlines.h>
 #include <util/u_memory.h>
-#include "state_tracker/drm_driver.h"
 #include "pipebuffer/pb_buffer.h"
 #include "r600_pipe.h"
 #include "r600_resource.h"
@@ -67,7 +66,7 @@
 				  rtransfer->staging_texture,
 				  0, &sbox);
 
-        ctx->flush(ctx, NULL);
+	r600_flush(ctx, NULL, RADEON_FLUSH_ASYNC);
 }
 
 unsigned r600_texture_get_offset(struct r600_resource_texture *rtex,
@@ -174,15 +173,15 @@
 {
 	struct pipe_resource *ptex = &rtex->resource.b.b.b;
 	unsigned nblocksx, block_align, width;
-	unsigned blocksize = util_format_get_blocksize(ptex->format);
+	unsigned blocksize = util_format_get_blocksize(rtex->real_format);
 
 	if (rtex->pitch_override)
 		return rtex->pitch_override / blocksize;
 
 	width = mip_minify(ptex->width0, level);
-	nblocksx = util_format_get_nblocksx(ptex->format, width);
+	nblocksx = util_format_get_nblocksx(rtex->real_format, width);
 
-	block_align = r600_get_block_alignment(screen, ptex->format,
+	block_align = r600_get_block_alignment(screen, rtex->real_format,
 					      rtex->array_mode[level]);
 	nblocksx = align(nblocksx, block_align);
 	return nblocksx;
@@ -196,9 +195,19 @@
 	unsigned height, tile_height;
 
 	height = mip_minify(ptex->height0, level);
-	height = util_format_get_nblocksy(ptex->format, height);
+	height = util_format_get_nblocksy(rtex->real_format, height);
 	tile_height = r600_get_height_alignment(screen,
 						rtex->array_mode[level]);
+
+	/* XXX Hack around an alignment issue. Less tests fail with this.
+	 *
+	 * The thing is depth-stencil buffers should be tiled, i.e.
+	 * the alignment should be >=8. If I make them tiled, stencil starts
+	 * working because it no longer overlaps with the depth buffer
+	 * in memory, but texturing like drawpix-stencil breaks. */
+	if (util_format_is_depth_or_stencil(rtex->real_format) && tile_height < 8)
+		tile_height = 8;
+
 	height = align(height, tile_height);
 	return height;
 }
@@ -221,7 +230,7 @@
 		unsigned w, h, tile_height, tile_width;
 
 		tile_height = r600_get_height_alignment(screen, array_mode);
-		tile_width = r600_get_block_alignment(screen, ptex->format, array_mode);
+		tile_width = r600_get_block_alignment(screen, rtex->real_format, array_mode);
 
 		w = mip_minify(ptex->width0, level);
 		h = mip_minify(ptex->height0, level);
@@ -239,14 +248,14 @@
 			       unsigned array_mode)
 {
 	struct pipe_resource *ptex = &rtex->resource.b.b.b;
-	struct radeon *radeon = (struct radeon *)screen->winsys;
+	struct radeon *radeon = ((struct r600_screen*)screen)->radeon;
 	enum chip_class chipc = r600_get_family_class(radeon);
 	unsigned size, layer_size, i, offset;
-	unsigned nblocksx, nblocksy, extra_size = 0;
+	unsigned nblocksx, nblocksy;
 
 	for (i = 0, offset = 0; i <= ptex->last_level; i++) {
-		unsigned blocksize = util_format_get_blocksize(ptex->format);
-		unsigned base_align = r600_get_base_alignment(screen, ptex->format, array_mode);
+		unsigned blocksize = util_format_get_blocksize(rtex->real_format);
+		unsigned base_align = r600_get_base_alignment(screen, rtex->real_format, array_mode);
 
 		r600_texture_set_array_mode(screen, rtex, i, array_mode);
 
@@ -265,10 +274,6 @@
 		else
 			size = layer_size * ptex->array_size;
 
-		/* evergreen stores depth and stencil separately */
-		if ((chipc >= EVERGREEN) && util_format_is_depth_or_stencil(ptex->format))
-			extra_size = align(extra_size + (nblocksx * nblocksy * 1), base_align);
-
 		/* align base image and start of miptree */
 		if ((i == 0) || (i == 1))
 			offset = align(offset, base_align);
@@ -279,7 +284,7 @@
 
 		offset += size;
 	}
-	rtex->size = offset + extra_size;
+	rtex->size = offset;
 }
 
 /* Figure out whether u_blitter will fallback to a transfer operation.
@@ -329,7 +334,7 @@
 {
 	struct r600_resource_texture *rtex = (struct r600_resource_texture*)ptex;
 	struct r600_resource *resource = &rtex->resource;
-	struct radeon *radeon = (struct radeon *)screen->winsys;
+	struct radeon *radeon = ((struct r600_screen*)screen)->radeon;
 
 	return r600_bo_get_winsys_handle(radeon, resource->bo,
 			rtex->pitch_in_bytes[0], whandle);
@@ -340,13 +345,12 @@
 {
 	struct r600_resource_texture *rtex = (struct r600_resource_texture*)ptex;
 	struct r600_resource *resource = &rtex->resource;
-	struct radeon *radeon = (struct radeon *)screen->winsys;
 
 	if (rtex->flushed_depth_texture)
 		pipe_resource_reference((struct pipe_resource **)&rtex->flushed_depth_texture, NULL);
 
 	if (resource->bo) {
-		r600_bo_reference(radeon, &resource->bo, NULL);
+		r600_bo_reference(&resource->bo, NULL);
 	}
 	FREE(rtex);
 }
@@ -369,11 +373,12 @@
 			   unsigned array_mode,
 			   unsigned pitch_in_bytes_override,
 			   unsigned max_buffer_size,
-			   struct r600_bo *bo)
+			   struct r600_bo *bo,
+			   boolean alloc_bo)
 {
 	struct r600_resource_texture *rtex;
 	struct r600_resource *resource;
-	struct radeon *radeon = (struct radeon *)screen->winsys;
+	struct radeon *radeon = ((struct r600_screen*)screen)->radeon;
 
 	rtex = CALLOC_STRUCT(r600_resource_texture);
 	if (rtex == NULL)
@@ -386,59 +391,112 @@
 	resource->b.b.b.screen = screen;
 	resource->bo = bo;
 	rtex->pitch_override = pitch_in_bytes_override;
+	rtex->real_format = base->format;
+
+	/* We must split depth and stencil into two separate buffers on Evergreen. */
+	if (!(base->flags & R600_RESOURCE_FLAG_TRANSFER) &&
+	    r600_get_family_class(((struct r600_screen*)screen)->radeon) >= EVERGREEN &&
+	    util_format_is_depth_and_stencil(base->format)) {
+		struct pipe_resource stencil;
+		unsigned stencil_pitch_override = 0;
+
+		switch (base->format) {
+		case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
+			rtex->real_format = PIPE_FORMAT_Z24X8_UNORM;
+			break;
+		case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
+			rtex->real_format = PIPE_FORMAT_X8Z24_UNORM;
+			break;
+		case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED:
+			rtex->real_format = PIPE_FORMAT_Z32_FLOAT;
+			break;
+		default:
+			assert(0);
+			FREE(rtex);
+			return NULL;
+		}
+
+		/* Divide the pitch in bytes by 4 for stencil, because it has a smaller pixel size. */
+		if (pitch_in_bytes_override) {
+			assert(base->format == PIPE_FORMAT_Z24_UNORM_S8_USCALED ||
+			       base->format == PIPE_FORMAT_S8_USCALED_Z24_UNORM);
+			stencil_pitch_override = pitch_in_bytes_override / 4;
+		}
+
+		/* Allocate the stencil buffer. */
+		stencil = *base;
+		stencil.format = PIPE_FORMAT_S8_USCALED;
+		rtex->stencil = r600_texture_create_object(screen, &stencil, array_mode,
+							   stencil_pitch_override,
+							   max_buffer_size, NULL, FALSE);
+		if (!rtex->stencil) {
+			FREE(rtex);
+			return NULL;
+		}
+		/* Proceed in creating the depth buffer. */
+	}
+
 	/* only mark depth textures the HW can hit as depth textures */
-	if (util_format_is_depth_or_stencil(base->format) && permit_hardware_blit(screen, base))
+	if (util_format_is_depth_or_stencil(rtex->real_format) && permit_hardware_blit(screen, base))
 		rtex->depth = 1;
 
 	r600_setup_miptree(screen, rtex, array_mode);
 
+	/* If we initialized separate stencil for Evergreen. place it after depth. */
+	if (rtex->stencil) {
+		unsigned stencil_align, stencil_offset;
+
+		stencil_align = r600_get_base_alignment(screen, rtex->stencil->real_format, array_mode);
+		stencil_offset = align(rtex->size, stencil_align);
+
+		for (unsigned i = 0; i <= rtex->stencil->resource.b.b.b.last_level; i++)
+			rtex->stencil->offset[i] += stencil_offset;
+
+		rtex->size = stencil_offset + rtex->stencil->size;
+	}
+
 	resource->size = rtex->size;
 
-	if (!resource->bo) {
+	/* Now create the backing buffer. */
+	if (!resource->bo && alloc_bo) {
 		struct pipe_resource *ptex = &rtex->resource.b.b.b;
-		int base_align = r600_get_base_alignment(screen, ptex->format, array_mode);
+		unsigned base_align = r600_get_base_alignment(screen, ptex->format, array_mode);
 
 		resource->bo = r600_bo(radeon, rtex->size, base_align, base->bind, base->usage);
 		if (!resource->bo) {
+			pipe_resource_reference((struct pipe_resource**)&rtex->stencil, NULL);
 			FREE(rtex);
 			return NULL;
 		}
 	}
+
+	if (rtex->stencil)
+		rtex->stencil->resource.bo = rtex->resource.bo;
 	return rtex;
 }
 
+DEBUG_GET_ONCE_BOOL_OPTION(tiling_enabled, "R600_TILING", FALSE);
+
 struct pipe_resource *r600_texture_create(struct pipe_screen *screen,
 						const struct pipe_resource *templ)
 {
+	struct radeon *radeon = ((struct r600_screen*)screen)->radeon;
 	unsigned array_mode = 0;
-	static int force_tiling = -1;
 
-	/* Would like some magic "get_bool_option_once" routine.
-	 */
-	if (force_tiling == -1) {
-#if 0
-		/* reenable when 2D tiling is fixed better */
-		struct r600_screen *rscreen = (struct r600_screen *)screen;
-		if (r600_get_minor_version(rscreen->radeon) >= 9)
-			force_tiling = debug_get_bool_option("R600_TILING", TRUE);
-#endif
-		force_tiling = debug_get_bool_option("R600_TILING", FALSE);
-	}
-
-	if (force_tiling && permit_hardware_blit(screen, templ)) {
-		if (!(templ->flags & R600_RESOURCE_FLAG_TRANSFER) &&
-		    !(templ->bind & PIPE_BIND_SCANOUT)) {
+	if (!(templ->flags & R600_RESOURCE_FLAG_TRANSFER) &&
+	    !(templ->bind & PIPE_BIND_SCANOUT)) {
+		if (util_format_is_compressed(templ->format)) {
+			array_mode = V_038000_ARRAY_1D_TILED_THIN1;
+		}
+		else if (debug_get_option_tiling_enabled() &&
+			 r600_get_minor_version(radeon) >= 9 &&
+			 permit_hardware_blit(screen, templ)) {
 			array_mode = V_038000_ARRAY_2D_TILED_THIN1;
 		}
 	}
 
-	if (!(templ->flags & R600_RESOURCE_FLAG_TRANSFER) &&
-	    util_format_is_compressed(templ->format))
-		array_mode = V_038000_ARRAY_1D_TILED_THIN1;
-
 	return (struct pipe_resource *)r600_texture_create_object(screen, templ, array_mode,
-								  0, 0, NULL);
-
+								  0, 0, NULL, TRUE);
 }
 
 static struct pipe_surface *r600_create_surface(struct pipe_context *pipe,
@@ -483,8 +541,9 @@
 					       const struct pipe_resource *templ,
 					       struct winsys_handle *whandle)
 {
-	struct radeon *rw = (struct radeon*)screen->winsys;
+	struct radeon *rw = ((struct r600_screen*)screen)->radeon;
 	struct r600_bo *bo = NULL;
+	unsigned stride = 0;
 	unsigned array_mode = 0;
 
 	/* Support only 2D textures without mipmaps */
@@ -492,15 +551,13 @@
 	      templ->depth0 != 1 || templ->last_level != 0)
 		return NULL;
 
-	bo = r600_bo_handle(rw, whandle->handle, &array_mode);
+	bo = r600_bo_handle(rw, whandle, &stride, &array_mode);
 	if (bo == NULL) {
 		return NULL;
 	}
 
 	return (struct pipe_resource *)r600_texture_create_object(screen, templ, array_mode,
-								  whandle->stride,
-								  0,
-								  bo);
+								  stride, 0, bo, FALSE);
 }
 
 int r600_texture_depth_flush(struct pipe_context *ctx,
@@ -590,6 +647,9 @@
 		(texture->flags & R600_RESOURCE_FLAG_TRANSFER))
 		use_staging_texture = FALSE;
 
+	if (use_staging_texture && (usage & PIPE_TRANSFER_MAP_DIRECTLY))
+		return NULL;
+
 	trans = CALLOC_STRUCT(r600_transfer);
 	if (trans == NULL)
 		return NULL;
@@ -648,7 +708,7 @@
 		if (usage & PIPE_TRANSFER_READ) {
 			r600_copy_to_staging_texture(ctx, trans);
 			/* Always referenced in the blit. */
-                        ctx->flush(ctx, NULL);
+			r600_flush(ctx, NULL, 0);
 		}
 		return &trans->transfer;
 	}
@@ -684,10 +744,11 @@
 void* r600_texture_transfer_map(struct pipe_context *ctx,
 				struct pipe_transfer* transfer)
 {
+	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
 	struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;
 	struct r600_bo *bo;
 	enum pipe_format format = transfer->resource->format;
-	struct radeon *radeon = (struct radeon *)ctx->screen->winsys;
+	struct radeon *radeon = rctx->screen->radeon;
 	unsigned offset = 0;
 	char *map;
 
@@ -706,7 +767,7 @@
 			transfer->box.x / util_format_get_blockwidth(format) * util_format_get_blocksize(format);
 	}
 
-	if (!(map = r600_bo_map(radeon, bo, transfer->usage, ctx))) {
+	if (!(map = r600_bo_map(radeon, bo, rctx->ctx.cs, transfer->usage))) {
 		return NULL;
 	}
 
@@ -717,7 +778,7 @@
 				 struct pipe_transfer* transfer)
 {
 	struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;
-	struct radeon *radeon = (struct radeon *)ctx->screen->winsys;
+	struct radeon *radeon = ((struct r600_screen*)ctx->screen)->radeon;
 	struct r600_bo *bo;
 
 	if (rtransfer->staging_texture) {
@@ -754,11 +815,7 @@
 	};
 
 	if (swizzle_view) {
-		/* Combine two sets of swizzles. */
-		for (i = 0; i < 4; i++) {
-			swizzle[i] = swizzle_view[i] <= UTIL_FORMAT_SWIZZLE_W ?
-				swizzle_format[swizzle_view[i]] : swizzle_view[i];
-		}
+		util_format_compose_swizzles(swizzle_format, swizzle_view, swizzle);
 	} else {
 		memcpy(swizzle, swizzle_format, 4);
 	}
diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h
index f6eec24..de458cf 100644
--- a/src/gallium/drivers/r600/r600d.h
+++ b/src/gallium/drivers/r600/r600d.h
@@ -28,6 +28,32 @@
 
 #define R600_TEXEL_PITCH_ALIGNMENT_MASK        0x7
 
+/* evergreen values */
+#define EG_RESOURCE_OFFSET                 0x00030000
+#define EG_RESOURCE_END                    0x00034000
+#define EG_LOOP_CONST_OFFSET               0x0003A200
+#define EG_LOOP_CONST_END                  0x0003A26C
+#define EG_BOOL_CONST_OFFSET               0x0003A500
+#define EG_BOOL_CONST_END                  0x0003A506
+
+#define R600_CONFIG_REG_OFFSET                 0X00008000
+#define R600_CONFIG_REG_END                    0X0000AC00
+#define R600_CONTEXT_REG_OFFSET                0X00028000
+#define R600_CONTEXT_REG_END                   0X00029000
+#define R600_ALU_CONST_OFFSET                  0X00030000
+#define R600_ALU_CONST_END                     0X00032000
+#define R600_RESOURCE_OFFSET                   0X00038000
+#define R600_RESOURCE_END                      0X0003C000
+#define R600_SAMPLER_OFFSET                    0X0003C000
+#define R600_SAMPLER_END                       0X0003CFF0
+#define R600_CTL_CONST_OFFSET                  0X0003CFF0
+#define R600_CTL_CONST_END                     0X0003E200
+#define R600_LOOP_CONST_OFFSET                 0X0003E200
+#define R600_LOOP_CONST_END                    0X0003E380
+#define R600_BOOL_CONST_OFFSET                 0X0003E380
+#define R600_BOOL_CONST_END                    0X00040000
+
+
 #define PKT3_NOP                               0x10
 #define PKT3_INDIRECT_BUFFER_END               0x17
 #define PKT3_SET_PREDICATION                   0x20
@@ -66,11 +92,38 @@
 #define PKT3_SET_SAMPLER                       0x6E
 #define PKT3_SET_CTL_CONST                     0x6F
 #define PKT3_SURFACE_BASE_UPDATE               0x73
+#define		SURFACE_BASE_UPDATE_DEPTH      (1 << 0)
+#define		SURFACE_BASE_UPDATE_COLOR(x)   (2 << (x))
+#define		SURFACE_BASE_UPDATE_STRMOUT(x) (0x200 << (x))
+
+#define EVENT_TYPE_PS_PARTIAL_FLUSH            0x10
+#define EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT 0x14
+#define EVENT_TYPE_ZPASS_DONE                  0x15
+#define EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT   0x16
+#define		EVENT_TYPE(x)                           ((x) << 0)
+#define		EVENT_INDEX(x)                          ((x) << 8)
+                /* 0 - any non-TS event
+		 * 1 - ZPASS_DONE
+		 * 2 - SAMPLE_PIPELINESTAT
+		 * 3 - SAMPLE_STREAMOUTSTAT*
+		 * 4 - *S_PARTIAL_FLUSH
+		 * 5 - TS events
+		 */
 
 #define PREDICATION_OP_CLEAR 0x0
 #define PREDICATION_OP_ZPASS 0x1
 #define PREDICATION_OP_PRIMCOUNT 0x2
 
+#define PRED_OP(x) ((x) << 16)
+
+#define PREDICATION_CONTINUE (1 << 31)
+
+#define PREDICATION_HINT_WAIT (0 << 12)
+#define PREDICATION_HINT_NOWAIT_DRAW (1 << 12)
+
+#define PREDICATION_DRAW_NOT_VISIBLE (0 << 8)
+#define PREDICATION_DRAW_VISIBLE (1 << 8)
+
 #define PKT_TYPE_S(x)                   (((x) & 0x3) << 30)
 #define PKT_TYPE_G(x)                   (((x) >> 30) & 0x3)
 #define PKT_TYPE_C                      0x3FFFFFFF
@@ -83,8 +136,9 @@
 #define PKT3_IT_OPCODE_S(x)             (((x) & 0xFF) << 8)
 #define PKT3_IT_OPCODE_G(x)             (((x) >> 8) & 0xFF)
 #define PKT3_IT_OPCODE_C                0xFFFF00FF
+#define PKT3_PRED_S(x)               (((x) >> 0) & 0x1)
 #define PKT0(index, count) (PKT_TYPE_S(0) | PKT0_BASE_INDEX_S(index) | PKT_COUNT_S(count))
-#define PKT3(op, count) (PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op) | PKT_COUNT_S(count))
+#define PKT3(op, count, predicate) (PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op) | PKT_COUNT_S(count) | PKT3_PRED_S(predicate))
 
 /* Registers */
 #define R_008C00_SQ_CONFIG                           0x00008C00
diff --git a/src/gallium/drivers/r600/r700_asm.c b/src/gallium/drivers/r600/r700_asm.c
index b3c7d14..74efe22 100644
--- a/src/gallium/drivers/r600/r700_asm.c
+++ b/src/gallium/drivers/r600/r700_asm.c
@@ -26,7 +26,7 @@
 #include "r600_asm.h"
 #include "r700_sq.h"
 
-void r700_bc_cf_vtx_build(uint32_t *bytecode, const struct r600_bc_cf *cf)
+void r700_bytecode_cf_vtx_build(uint32_t *bytecode, const struct r600_bytecode_cf *cf)
 {
 	unsigned count = (cf->ndw / 4) - 1;
 	*bytecode++ = S_SQ_CF_WORD0_ADDR(cf->addr >> 1);
@@ -36,7 +36,7 @@
 			S_SQ_CF_WORD1_COUNT_3(count >> 3);
 }
 
-int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id)
+int r700_bytecode_alu_build(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, unsigned id)
 {
 	bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) |
 		S_SQ_ALU_WORD0_SRC0_REL(alu->src[0].rel) |
diff --git a/src/gallium/drivers/softpipe/Android.mk b/src/gallium/drivers/softpipe/Android.mk
new file mode 100644
index 0000000..d198fa5
--- /dev/null
+++ b/src/gallium/drivers/softpipe/Android.mk
@@ -0,0 +1,67 @@
+# Mesa 3-D graphics library
+#
+# Copyright (C) 2010-2011 Chia-I Wu <olvaffe@gmail.com>
+# Copyright (C) 2010-2011 LunarG Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+LOCAL_PATH := $(call my-dir)
+
+# from Makefile
+C_SOURCES = \
+	sp_fs_exec.c \
+	sp_fs_sse.c \
+	sp_clear.c \
+	sp_fence.c \
+	sp_flush.c \
+	sp_query.c \
+	sp_context.c \
+	sp_draw_arrays.c \
+	sp_prim_vbuf.c \
+	sp_quad_pipe.c \
+	sp_quad_stipple.c \
+	sp_quad_depth_test.c \
+	sp_quad_fs.c \
+	sp_quad_blend.c \
+	sp_screen.c \
+        sp_setup.c \
+	sp_state_blend.c \
+	sp_state_clip.c \
+	sp_state_derived.c \
+	sp_state_sampler.c \
+	sp_state_shader.c \
+	sp_state_so.c \
+	sp_state_rasterizer.c \
+	sp_state_surface.c \
+	sp_state_vertex.c \
+	sp_texture.c \
+	sp_tex_sample.c \
+	sp_tex_tile_cache.c \
+	sp_tile_cache.c \
+	sp_surface.c
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := \
+	$(C_SOURCES)
+
+LOCAL_MODULE := libmesa_pipe_softpipe
+
+include $(GALLIUM_COMMON_MK)
+include $(BUILD_STATIC_LIBRARY)
diff --git a/src/gallium/drivers/softpipe/sp_clear.c b/src/gallium/drivers/softpipe/sp_clear.c
index ae3f00f..22e8a2e 100644
--- a/src/gallium/drivers/softpipe/sp_clear.c
+++ b/src/gallium/drivers/softpipe/sp_clear.c
@@ -60,7 +60,7 @@
       return;
 
 #if 0
-   softpipe_update_derived(softpipe); /* not needed?? */
+   softpipe_update_derived(softpipe, PIPE_PRIM_TRIANGLES); /* not needed?? */
 #endif
 
    if (buffers & PIPE_CLEAR_COLOR) {
diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c
index 2c43602..c97b033 100644
--- a/src/gallium/drivers/softpipe/sp_context.c
+++ b/src/gallium/drivers/softpipe/sp_context.c
@@ -35,6 +35,7 @@
 #include "pipe/p_defines.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
+#include "util/u_pstipple.h"
 #include "util/u_inlines.h"
 #include "tgsi/tgsi_exec.h"
 #include "vl/vl_decoder.h"
@@ -90,6 +91,14 @@
    struct softpipe_context *softpipe = softpipe_context( pipe );
    uint i;
 
+#if DO_PSTIPPLE_IN_HELPER_MODULE
+   if (softpipe->pstipple.sampler)
+      pipe->delete_sampler_state(pipe, softpipe->pstipple.sampler);
+
+   pipe_resource_reference(&softpipe->pstipple.texture, NULL);
+   pipe_sampler_view_reference(&softpipe->pstipple.sampler_view, NULL);
+#endif
+
    if (softpipe->draw)
       draw_destroy( softpipe->draw );
 
@@ -346,6 +355,11 @@
 
    sp_init_surface_functions(softpipe);
 
+#if DO_PSTIPPLE_IN_HELPER_MODULE
+   /* create the polgon stipple sampler */
+   softpipe->pstipple.sampler = util_pstipple_create_sampler(&softpipe->pipe);
+#endif
+
    return &softpipe->pipe;
 
  fail:
diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h
index a572ee8..410b0a6 100644
--- a/src/gallium/drivers/softpipe/sp_context.h
+++ b/src/gallium/drivers/softpipe/sp_context.h
@@ -38,8 +38,11 @@
 #include "sp_quad_pipe.h"
 
 
-/** Do polygon stipple in the driver here, or in the draw module? */
-#define DO_PSTIPPLE_IN_DRAW_MODULE 1
+/** Do polygon stipple in the draw module? */
+#define DO_PSTIPPLE_IN_DRAW_MODULE 0
+
+/** Do polygon stipple with the util module? */
+#define DO_PSTIPPLE_IN_HELPER_MODULE 1
 
 
 struct softpipe_vbuf_render;
@@ -64,6 +67,7 @@
    struct pipe_depth_stencil_alpha_state *depth_stencil;
    struct pipe_rasterizer_state *rasterizer;
    struct sp_fragment_shader *fs;
+   struct sp_fragment_shader_variant *fs_variant;
    struct sp_vertex_shader *vs;
    struct sp_geometry_shader *gs;
    struct sp_velems_state *velems;
@@ -143,6 +147,13 @@
    struct pipe_query *render_cond_query;
    uint render_cond_mode;
 
+   /** Polygon stipple items */
+   struct {
+      struct pipe_resource *texture;
+      struct pipe_sampler_state *sampler;
+      struct pipe_sampler_view *sampler_view;
+   } pstipple;
+
    /** Software quad rendering pipeline */
    struct {
       struct quad_stage *shade;
diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c
index 01b4ca9..69b5b96 100644
--- a/src/gallium/drivers/softpipe/sp_draw_arrays.c
+++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c
@@ -64,7 +64,7 @@
    sp->reduced_api_prim = u_reduced_prim(mode);
 
    if (sp->dirty) {
-      softpipe_update_derived(sp);
+      softpipe_update_derived(sp, sp->reduced_api_prim);
    }
 
    softpipe_map_transfers(sp);
@@ -122,7 +122,7 @@
    sp->reduced_api_prim = u_reduced_prim(info->mode);
 
    if (sp->dirty) {
-      softpipe_update_derived(sp);
+      softpipe_update_derived(sp, sp->reduced_api_prim);
    }
 
    softpipe_map_transfers(sp);
diff --git a/src/gallium/drivers/softpipe/sp_fs.h b/src/gallium/drivers/softpipe/sp_fs.h
index 4792ace..d46d7d5 100644
--- a/src/gallium/drivers/softpipe/sp_fs.h
+++ b/src/gallium/drivers/softpipe/sp_fs.h
@@ -31,17 +31,15 @@
 #ifndef SP_FS_H
 #define SP_FS_H
 
-struct sp_fragment_shader *
-softpipe_create_fs_exec(struct softpipe_context *softpipe,
-		       const struct pipe_shader_state *templ);
 
-struct sp_fragment_shader *
-softpipe_create_fs_sse(struct softpipe_context *softpipe,
-		       const struct pipe_shader_state *templ);
+struct sp_fragment_shader_variant *
+softpipe_create_fs_variant_exec(struct softpipe_context *softpipe,
+                                const struct pipe_shader_state *templ);
 
-struct sp_fragment_shader *
-softpipe_create_fs_llvm(struct softpipe_context *softpipe,
-			const struct pipe_shader_state *templ);
+struct sp_fragment_shader_variant *
+softpipe_create_fs_variant_sse(struct softpipe_context *softpipe,
+                               const struct pipe_shader_state *templ);
+
 
 struct tgsi_interp_coef;
 struct tgsi_exec_vector;
diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c
index 346e1b4..779b8c4 100644
--- a/src/gallium/drivers/softpipe/sp_fs_exec.c
+++ b/src/gallium/drivers/softpipe/sp_fs_exec.c
@@ -42,25 +42,25 @@
 
 
 /**
- * Subclass of sp_fragment_shader
+ * Subclass of sp_fragment_shader_variant
  */
 struct sp_exec_fragment_shader
 {
-   struct sp_fragment_shader base;
+   struct sp_fragment_shader_variant base;
    /* No other members for now */
 };
 
 
 /** cast wrapper */
 static INLINE struct sp_exec_fragment_shader *
-sp_exec_fragment_shader(const struct sp_fragment_shader *base)
+sp_exec_fragment_shader(const struct sp_fragment_shader_variant *var)
 {
-   return (struct sp_exec_fragment_shader *) base;
+   return (struct sp_exec_fragment_shader *) var;
 }
 
 
 static void
-exec_prepare( const struct sp_fragment_shader *base,
+exec_prepare( const struct sp_fragment_shader_variant *var,
 	      struct tgsi_exec_machine *machine,
 	      struct tgsi_sampler **samplers )
 {
@@ -68,9 +68,9 @@
     * Bind tokens/shader to the interpreter's machine state.
     * Avoid redundant binding.
     */
-   if (machine->Tokens != base->shader.tokens) {
+   if (machine->Tokens != var->tokens) {
       tgsi_exec_machine_bind_shader( machine,
-                                     base->shader.tokens,
+                                     var->tokens,
                                      PIPE_MAX_SAMPLERS,
                                      samplers );
    }
@@ -118,7 +118,7 @@
  * interface:
  */
 static unsigned 
-exec_run( const struct sp_fragment_shader *base,
+exec_run( const struct sp_fragment_shader_variant *var,
 	  struct tgsi_exec_machine *machine,
 	  struct quad_header *quad )
 {
@@ -136,9 +136,9 @@
 
    /* store outputs */
    {
-      const ubyte *sem_name = base->info.output_semantic_name;
-      const ubyte *sem_index = base->info.output_semantic_index;
-      const uint n = base->info.num_outputs;
+      const ubyte *sem_name = var->info.output_semantic_name;
+      const ubyte *sem_index = var->info.output_semantic_index;
+      const uint n = var->info.num_outputs;
       uint i;
       for (i = 0; i < n; i++) {
          switch (sem_name[i]) {
@@ -180,29 +180,23 @@
 
 
 static void 
-exec_delete( struct sp_fragment_shader *base )
+exec_delete( struct sp_fragment_shader_variant *var )
 {
-   FREE((void *) base->shader.tokens);
-   FREE(base);
+   FREE( (void *) var->tokens );
+   FREE(var);
 }
 
 
-struct sp_fragment_shader *
-softpipe_create_fs_exec(struct softpipe_context *softpipe,
-			const struct pipe_shader_state *templ)
+struct sp_fragment_shader_variant *
+softpipe_create_fs_variant_exec(struct softpipe_context *softpipe,
+                                const struct pipe_shader_state *templ)
 {
    struct sp_exec_fragment_shader *shader;
 
-   /* Decide whether we'll be codegenerating this shader and if so do
-    * that now.
-    */
-
    shader = CALLOC_STRUCT(sp_exec_fragment_shader);
    if (!shader)
       return NULL;
 
-   /* we need to keep a local copy of the tokens */
-   shader->base.shader.tokens = tgsi_dup_tokens(templ->tokens);
    shader->base.prepare = exec_prepare;
    shader->base.run = exec_run;
    shader->base.delete = exec_delete;
diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c
index 5b18cd0..c873af1 100644
--- a/src/gallium/drivers/softpipe/sp_fs_sse.c
+++ b/src/gallium/drivers/softpipe/sp_fs_sse.c
@@ -48,11 +48,11 @@
 
 
 /**
- * Subclass of sp_fragment_shader
+ * Subclass of sp_fragment_shader_variant
  */
 struct sp_sse_fragment_shader
 {
-   struct sp_fragment_shader base;
+   struct sp_fragment_shader_variant base;
    struct x86_function sse2_program;
    tgsi_sse2_fs_function func;
    float immediates[TGSI_EXEC_NUM_IMMEDIATES][4];
@@ -61,14 +61,14 @@
 
 /** cast wrapper */
 static INLINE struct sp_sse_fragment_shader *
-sp_sse_fragment_shader(const struct sp_fragment_shader *base)
+sp_sse_fragment_shader(const struct sp_fragment_shader_variant *base)
 {
    return (struct sp_sse_fragment_shader *) base;
 }
 
 
 static void
-fs_sse_prepare( const struct sp_fragment_shader *base,
+fs_sse_prepare( const struct sp_fragment_shader_variant *base,
 		struct tgsi_exec_machine *machine,
 		struct tgsi_sampler **samplers )
 {
@@ -119,7 +119,7 @@
  * TODO: process >1 quad at a time
  */
 static unsigned 
-fs_sse_run( const struct sp_fragment_shader *base,
+fs_sse_run( const struct sp_fragment_shader_variant *base,
 	    struct tgsi_exec_machine *machine,
 	    struct quad_header *quad )
 {
@@ -189,7 +189,7 @@
 
 
 static void 
-fs_sse_delete( struct sp_fragment_shader *base )
+fs_sse_delete( struct sp_fragment_shader_variant *base )
 {
    struct sp_sse_fragment_shader *shader = sp_sse_fragment_shader(base);
 
@@ -198,9 +198,9 @@
 }
 
 
-struct sp_fragment_shader *
-softpipe_create_fs_sse(struct softpipe_context *softpipe,
-		       const struct pipe_shader_state *templ)
+struct sp_fragment_shader_variant *
+softpipe_create_fs_variant_sse(struct softpipe_context *softpipe,
+                               const struct pipe_shader_state *templ)
 {
    struct sp_sse_fragment_shader *shader;
 
@@ -226,7 +226,6 @@
       return NULL;
    }
 
-   shader->base.shader.tokens = NULL; /* don't hold reference to templ->tokens */
    shader->base.prepare = fs_sse_prepare;
    shader->base.run = fs_sse_run;
    shader->base.delete = fs_sse_delete;
@@ -239,9 +238,9 @@
 
 /* Maybe put this variant in the header file.
  */
-struct sp_fragment_shader *
-softpipe_create_fs_sse(struct softpipe_context *softpipe,
-		       const struct pipe_shader_state *templ)
+struct sp_fragment_shader_variant *
+softpipe_create_fs_variant_sse(struct softpipe_context *softpipe,
+                               const struct pipe_shader_state *templ)
 {
    return NULL;
 }
diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c
index 76cfc0b..c881194 100644
--- a/src/gallium/drivers/softpipe/sp_quad_blend.c
+++ b/src/gallium/drivers/softpipe/sp_quad_blend.c
@@ -240,6 +240,7 @@
    static const float one[4] = { 1, 1, 1, 1 };
    struct softpipe_context *softpipe = qs->softpipe;
    float source[4][QUAD_SIZE] = { { 0 } };
+   float blend_dest[4][QUAD_SIZE];
 
    /*
     * Compute src/first term RGB
@@ -480,79 +481,85 @@
       assert(0 && "invalid alpha src factor");
    }
 
+   /* Save the original dest for use in masking */
+   VEC4_COPY(blend_dest[0], dest[0]);
+   VEC4_COPY(blend_dest[1], dest[1]);
+   VEC4_COPY(blend_dest[2], dest[2]);
+   VEC4_COPY(blend_dest[3], dest[3]);
+
 
    /*
-    * Compute dest/second term RGB
+    * Compute blend_dest/second term RGB
     */
    switch (softpipe->blend->rt[blend_index].rgb_dst_factor) {
    case PIPE_BLENDFACTOR_ONE:
-      /* dest = dest * 1   NO-OP, leave dest as-is */
+      /* blend_dest = blend_dest * 1   NO-OP, leave blend_dest as-is */
       break;
    case PIPE_BLENDFACTOR_SRC_COLOR:
-      VEC4_MUL(dest[0], dest[0], quadColor[0]); /* R */
-      VEC4_MUL(dest[1], dest[1], quadColor[1]); /* G */
-      VEC4_MUL(dest[2], dest[2], quadColor[2]); /* B */
+      VEC4_MUL(blend_dest[0], blend_dest[0], quadColor[0]); /* R */
+      VEC4_MUL(blend_dest[1], blend_dest[1], quadColor[1]); /* G */
+      VEC4_MUL(blend_dest[2], blend_dest[2], quadColor[2]); /* B */
       break;
    case PIPE_BLENDFACTOR_SRC_ALPHA:
-      VEC4_MUL(dest[0], dest[0], quadColor[3]); /* R * A */
-      VEC4_MUL(dest[1], dest[1], quadColor[3]); /* G * A */
-      VEC4_MUL(dest[2], dest[2], quadColor[3]); /* B * A */
+      VEC4_MUL(blend_dest[0], blend_dest[0], quadColor[3]); /* R * A */
+      VEC4_MUL(blend_dest[1], blend_dest[1], quadColor[3]); /* G * A */
+      VEC4_MUL(blend_dest[2], blend_dest[2], quadColor[3]); /* B * A */
       break;
    case PIPE_BLENDFACTOR_DST_ALPHA:
       if (has_dst_alpha) {
-         VEC4_MUL(dest[0], dest[0], dest[3]); /* R * A */
-         VEC4_MUL(dest[1], dest[1], dest[3]); /* G * A */
-         VEC4_MUL(dest[2], dest[2], dest[3]); /* B * A */
+         VEC4_MUL(blend_dest[0], blend_dest[0], blend_dest[3]); /* R * A */
+         VEC4_MUL(blend_dest[1], blend_dest[1], blend_dest[3]); /* G * A */
+         VEC4_MUL(blend_dest[2], blend_dest[2], blend_dest[3]); /* B * A */
       }
       else {
-         /* dest = dest * 1   NO-OP, leave dest as-is */
+         /* blend_dest = blend_dest * 1   NO-OP, leave blend_dest as-is */
       }
       break;
    case PIPE_BLENDFACTOR_DST_COLOR:
-      VEC4_MUL(dest[0], dest[0], dest[0]); /* R */
-      VEC4_MUL(dest[1], dest[1], dest[1]); /* G */
-      VEC4_MUL(dest[2], dest[2], dest[2]); /* B */
+      VEC4_MUL(blend_dest[0], blend_dest[0], blend_dest[0]); /* R */
+      VEC4_MUL(blend_dest[1], blend_dest[1], blend_dest[1]); /* G */
+      VEC4_MUL(blend_dest[2], blend_dest[2], blend_dest[2]); /* B */
       break;
    case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
       if (has_dst_alpha) {
          const float *alpha = quadColor[3];
          float diff[4], temp[4];
-         VEC4_SUB(diff, one, dest[3]);
+         VEC4_SUB(diff, one, blend_dest[3]);
          VEC4_MIN(temp, alpha, diff);
-         VEC4_MUL(dest[0], quadColor[0], temp); /* R */
-         VEC4_MUL(dest[1], quadColor[1], temp); /* G */
-         VEC4_MUL(dest[2], quadColor[2], temp); /* B */
+         VEC4_MUL(blend_dest[0], quadColor[0], temp); /* R */
+         VEC4_MUL(blend_dest[1], quadColor[1], temp); /* G */
+         VEC4_MUL(blend_dest[2], quadColor[2], temp); /* B */
       }
       else {
-         VEC4_COPY(dest[0], zero); /* R */
-         VEC4_COPY(dest[1], zero); /* G */
-         VEC4_COPY(dest[2], zero); /* B */
+         VEC4_COPY(blend_dest[0], zero); /* R */
+         VEC4_COPY(blend_dest[1], zero); /* G */
+         VEC4_COPY(blend_dest[2], zero); /* B */
       }
       break;
    case PIPE_BLENDFACTOR_CONST_COLOR:
    {
       float comp[4];
       VEC4_SCALAR(comp, softpipe->blend_color.color[0]); /* R */
-      VEC4_MUL(dest[0], dest[0], comp); /* R */
+      VEC4_MUL(blend_dest[0], blend_dest[0], comp); /* R */
       VEC4_SCALAR(comp, softpipe->blend_color.color[1]); /* G */
-      VEC4_MUL(dest[1], dest[1], comp); /* G */
+      VEC4_MUL(blend_dest[1], blend_dest[1], comp); /* G */
       VEC4_SCALAR(comp, softpipe->blend_color.color[2]); /* B */
-      VEC4_MUL(dest[2], dest[2], comp); /* B */
+      VEC4_MUL(blend_dest[2], blend_dest[2], comp); /* B */
    }
    break;
    case PIPE_BLENDFACTOR_CONST_ALPHA:
    {
       float comp[4];
       VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */
-      VEC4_MUL(dest[0], dest[0], comp); /* R */
-      VEC4_MUL(dest[1], dest[1], comp); /* G */
-      VEC4_MUL(dest[2], dest[2], comp); /* B */
+      VEC4_MUL(blend_dest[0], blend_dest[0], comp); /* R */
+      VEC4_MUL(blend_dest[1], blend_dest[1], comp); /* G */
+      VEC4_MUL(blend_dest[2], blend_dest[2], comp); /* B */
    }
    break;
    case PIPE_BLENDFACTOR_ZERO:
-      VEC4_COPY(dest[0], zero); /* R */
-      VEC4_COPY(dest[1], zero); /* G */
-      VEC4_COPY(dest[2], zero); /* B */
+      VEC4_COPY(blend_dest[0], zero); /* R */
+      VEC4_COPY(blend_dest[1], zero); /* G */
+      VEC4_COPY(blend_dest[2], zero); /* B */
       break;
    case PIPE_BLENDFACTOR_SRC1_COLOR:
    case PIPE_BLENDFACTOR_SRC1_ALPHA:
@@ -563,45 +570,45 @@
    {
       float inv_comp[4];
       VEC4_SUB(inv_comp, one, quadColor[0]); /* R */
-      VEC4_MUL(dest[0], inv_comp, dest[0]); /* R */
+      VEC4_MUL(blend_dest[0], inv_comp, blend_dest[0]); /* R */
       VEC4_SUB(inv_comp, one, quadColor[1]); /* G */
-      VEC4_MUL(dest[1], inv_comp, dest[1]); /* G */
+      VEC4_MUL(blend_dest[1], inv_comp, blend_dest[1]); /* G */
       VEC4_SUB(inv_comp, one, quadColor[2]); /* B */
-      VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */
+      VEC4_MUL(blend_dest[2], inv_comp, blend_dest[2]); /* B */
    }
    break;
    case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
    {
       float one_minus_alpha[QUAD_SIZE];
       VEC4_SUB(one_minus_alpha, one, quadColor[3]);
-      VEC4_MUL(dest[0], dest[0], one_minus_alpha); /* R */
-      VEC4_MUL(dest[1], dest[1], one_minus_alpha); /* G */
-      VEC4_MUL(dest[2], dest[2], one_minus_alpha); /* B */
+      VEC4_MUL(blend_dest[0], blend_dest[0], one_minus_alpha); /* R */
+      VEC4_MUL(blend_dest[1], blend_dest[1], one_minus_alpha); /* G */
+      VEC4_MUL(blend_dest[2], blend_dest[2], one_minus_alpha); /* B */
    }
    break;
    case PIPE_BLENDFACTOR_INV_DST_ALPHA:
       if (has_dst_alpha) {
          float inv_comp[4];
-         VEC4_SUB(inv_comp, one, dest[3]); /* A */
-         VEC4_MUL(dest[0], inv_comp, dest[0]); /* R */
-         VEC4_MUL(dest[1], inv_comp, dest[1]); /* G */
-         VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */
+         VEC4_SUB(inv_comp, one, blend_dest[3]); /* A */
+         VEC4_MUL(blend_dest[0], inv_comp, blend_dest[0]); /* R */
+         VEC4_MUL(blend_dest[1], inv_comp, blend_dest[1]); /* G */
+         VEC4_MUL(blend_dest[2], inv_comp, blend_dest[2]); /* B */
       }
       else {
-         VEC4_COPY(dest[0], zero); /* R */
-         VEC4_COPY(dest[1], zero); /* G */
-         VEC4_COPY(dest[2], zero); /* B */
+         VEC4_COPY(blend_dest[0], zero); /* R */
+         VEC4_COPY(blend_dest[1], zero); /* G */
+         VEC4_COPY(blend_dest[2], zero); /* B */
       }
    break;
    case PIPE_BLENDFACTOR_INV_DST_COLOR:
    {
       float inv_comp[4];
-      VEC4_SUB(inv_comp, one, dest[0]); /* R */
-      VEC4_MUL(dest[0], dest[0], inv_comp); /* R */
-      VEC4_SUB(inv_comp, one, dest[1]); /* G */
-      VEC4_MUL(dest[1], dest[1], inv_comp); /* G */
-      VEC4_SUB(inv_comp, one, dest[2]); /* B */
-      VEC4_MUL(dest[2], dest[2], inv_comp); /* B */
+      VEC4_SUB(inv_comp, one, blend_dest[0]); /* R */
+      VEC4_MUL(blend_dest[0], blend_dest[0], inv_comp); /* R */
+      VEC4_SUB(inv_comp, one, blend_dest[1]); /* G */
+      VEC4_MUL(blend_dest[1], blend_dest[1], inv_comp); /* G */
+      VEC4_SUB(inv_comp, one, blend_dest[2]); /* B */
+      VEC4_MUL(blend_dest[2], blend_dest[2], inv_comp); /* B */
    }
    break;
    case PIPE_BLENDFACTOR_INV_CONST_COLOR:
@@ -609,22 +616,22 @@
       float inv_comp[4];
       /* R */
       VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[0]);
-      VEC4_MUL(dest[0], dest[0], inv_comp);
+      VEC4_MUL(blend_dest[0], blend_dest[0], inv_comp);
       /* G */
       VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[1]);
-      VEC4_MUL(dest[1], dest[1], inv_comp);
+      VEC4_MUL(blend_dest[1], blend_dest[1], inv_comp);
       /* B */
       VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[2]);
-      VEC4_MUL(dest[2], dest[2], inv_comp);
+      VEC4_MUL(blend_dest[2], blend_dest[2], inv_comp);
    }
    break;
    case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
    {
       float inv_comp[4];
       VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]);
-      VEC4_MUL(dest[0], dest[0], inv_comp);
-      VEC4_MUL(dest[1], dest[1], inv_comp);
-      VEC4_MUL(dest[2], dest[2], inv_comp);
+      VEC4_MUL(blend_dest[0], blend_dest[0], inv_comp);
+      VEC4_MUL(blend_dest[1], blend_dest[1], inv_comp);
+      VEC4_MUL(blend_dest[2], blend_dest[2], inv_comp);
    }
    break;
    case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
@@ -637,29 +644,29 @@
    }
 
    /*
-    * Compute dest/second term A
+    * Compute blend_dest/second term A
     */
    switch (softpipe->blend->rt[blend_index].alpha_dst_factor) {
    case PIPE_BLENDFACTOR_ONE:
-      /* dest = dest * 1   NO-OP, leave dest as-is */
+      /* blend_dest = blend_dest * 1   NO-OP, leave blend_dest as-is */
       break;
    case PIPE_BLENDFACTOR_SRC_COLOR:
       /* fall-through */
    case PIPE_BLENDFACTOR_SRC_ALPHA:
-      VEC4_MUL(dest[3], dest[3], quadColor[3]); /* A * A */
+      VEC4_MUL(blend_dest[3], blend_dest[3], quadColor[3]); /* A * A */
       break;
    case PIPE_BLENDFACTOR_DST_COLOR:
       /* fall-through */
    case PIPE_BLENDFACTOR_DST_ALPHA:
       if (has_dst_alpha) {
-         VEC4_MUL(dest[3], dest[3], dest[3]); /* A */
+         VEC4_MUL(blend_dest[3], blend_dest[3], blend_dest[3]); /* A */
       }
       else {
-         /* dest = dest * 1   NO-OP, leave dest as-is */
+         /* blend_dest = blend_dest * 1   NO-OP, leave blend_dest as-is */
       }
       break;
    case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
-      /* dest = dest * 1   NO-OP, leave dest as-is */
+      /* blend_dest = blend_dest * 1   NO-OP, leave blend_dest as-is */
       break;
    case PIPE_BLENDFACTOR_CONST_COLOR:
       /* fall-through */
@@ -667,11 +674,11 @@
    {
       float comp[4];
       VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */
-      VEC4_MUL(dest[3], dest[3], comp); /* A */
+      VEC4_MUL(blend_dest[3], blend_dest[3], comp); /* A */
    }
    break;
    case PIPE_BLENDFACTOR_ZERO:
-      VEC4_COPY(dest[3], zero); /* A */
+      VEC4_COPY(blend_dest[3], zero); /* A */
       break;
    case PIPE_BLENDFACTOR_INV_SRC_COLOR:
       /* fall-through */
@@ -679,7 +686,7 @@
    {
       float one_minus_alpha[QUAD_SIZE];
       VEC4_SUB(one_minus_alpha, one, quadColor[3]);
-      VEC4_MUL(dest[3], dest[3], one_minus_alpha); /* A */
+      VEC4_MUL(blend_dest[3], blend_dest[3], one_minus_alpha); /* A */
    }
    break;
    case PIPE_BLENDFACTOR_INV_DST_COLOR:
@@ -687,11 +694,11 @@
    case PIPE_BLENDFACTOR_INV_DST_ALPHA:
       if (has_dst_alpha) {
          float inv_comp[4];
-         VEC4_SUB(inv_comp, one, dest[3]); /* A */
-         VEC4_MUL(dest[3], inv_comp, dest[3]); /* A */
+         VEC4_SUB(inv_comp, one, blend_dest[3]); /* A */
+         VEC4_MUL(blend_dest[3], inv_comp, blend_dest[3]); /* A */
       }
       else {
-         VEC4_COPY(dest[3], zero); /* A */
+         VEC4_COPY(blend_dest[3], zero); /* A */
       }
       break;
    case PIPE_BLENDFACTOR_INV_CONST_COLOR:
@@ -700,7 +707,7 @@
    {
       float inv_comp[4];
       VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]);
-      VEC4_MUL(dest[3], dest[3], inv_comp);
+      VEC4_MUL(blend_dest[3], blend_dest[3], inv_comp);
    }
    break;
    default:
@@ -712,29 +719,29 @@
     */
    switch (softpipe->blend->rt[blend_index].rgb_func) {
    case PIPE_BLEND_ADD:
-      VEC4_ADD_SAT(quadColor[0], source[0], dest[0]); /* R */
-      VEC4_ADD_SAT(quadColor[1], source[1], dest[1]); /* G */
-      VEC4_ADD_SAT(quadColor[2], source[2], dest[2]); /* B */
+      VEC4_ADD_SAT(quadColor[0], source[0], blend_dest[0]); /* R */
+      VEC4_ADD_SAT(quadColor[1], source[1], blend_dest[1]); /* G */
+      VEC4_ADD_SAT(quadColor[2], source[2], blend_dest[2]); /* B */
       break;
    case PIPE_BLEND_SUBTRACT:
-      VEC4_SUB_SAT(quadColor[0], source[0], dest[0]); /* R */
-      VEC4_SUB_SAT(quadColor[1], source[1], dest[1]); /* G */
-      VEC4_SUB_SAT(quadColor[2], source[2], dest[2]); /* B */
+      VEC4_SUB_SAT(quadColor[0], source[0], blend_dest[0]); /* R */
+      VEC4_SUB_SAT(quadColor[1], source[1], blend_dest[1]); /* G */
+      VEC4_SUB_SAT(quadColor[2], source[2], blend_dest[2]); /* B */
       break;
    case PIPE_BLEND_REVERSE_SUBTRACT:
-      VEC4_SUB_SAT(quadColor[0], dest[0], source[0]); /* R */
-      VEC4_SUB_SAT(quadColor[1], dest[1], source[1]); /* G */
-      VEC4_SUB_SAT(quadColor[2], dest[2], source[2]); /* B */
+      VEC4_SUB_SAT(quadColor[0], blend_dest[0], source[0]); /* R */
+      VEC4_SUB_SAT(quadColor[1], blend_dest[1], source[1]); /* G */
+      VEC4_SUB_SAT(quadColor[2], blend_dest[2], source[2]); /* B */
       break;
    case PIPE_BLEND_MIN:
-      VEC4_MIN(quadColor[0], source[0], dest[0]); /* R */
-      VEC4_MIN(quadColor[1], source[1], dest[1]); /* G */
-      VEC4_MIN(quadColor[2], source[2], dest[2]); /* B */
+      VEC4_MIN(quadColor[0], source[0], blend_dest[0]); /* R */
+      VEC4_MIN(quadColor[1], source[1], blend_dest[1]); /* G */
+      VEC4_MIN(quadColor[2], source[2], blend_dest[2]); /* B */
       break;
    case PIPE_BLEND_MAX:
-      VEC4_MAX(quadColor[0], source[0], dest[0]); /* R */
-      VEC4_MAX(quadColor[1], source[1], dest[1]); /* G */
-      VEC4_MAX(quadColor[2], source[2], dest[2]); /* B */
+      VEC4_MAX(quadColor[0], source[0], blend_dest[0]); /* R */
+      VEC4_MAX(quadColor[1], source[1], blend_dest[1]); /* G */
+      VEC4_MAX(quadColor[2], source[2], blend_dest[2]); /* B */
       break;
    default:
       assert(0 && "invalid rgb blend func");
@@ -745,19 +752,19 @@
     */
    switch (softpipe->blend->rt[blend_index].alpha_func) {
    case PIPE_BLEND_ADD:
-      VEC4_ADD_SAT(quadColor[3], source[3], dest[3]); /* A */
+      VEC4_ADD_SAT(quadColor[3], source[3], blend_dest[3]); /* A */
       break;
    case PIPE_BLEND_SUBTRACT:
-      VEC4_SUB_SAT(quadColor[3], source[3], dest[3]); /* A */
+      VEC4_SUB_SAT(quadColor[3], source[3], blend_dest[3]); /* A */
       break;
    case PIPE_BLEND_REVERSE_SUBTRACT:
-      VEC4_SUB_SAT(quadColor[3], dest[3], source[3]); /* A */
+      VEC4_SUB_SAT(quadColor[3], blend_dest[3], source[3]); /* A */
       break;
    case PIPE_BLEND_MIN:
-      VEC4_MIN(quadColor[3], source[3], dest[3]); /* A */
+      VEC4_MIN(quadColor[3], source[3], blend_dest[3]); /* A */
       break;
    case PIPE_BLEND_MAX:
-      VEC4_MAX(quadColor[3], source[3], dest[3]); /* A */
+      VEC4_MAX(quadColor[3], source[3], blend_dest[3]); /* A */
       break;
    default:
       assert(0 && "invalid alpha blend func");
@@ -797,7 +804,7 @@
    unsigned cbuf;
    boolean write_all;
 
-   write_all = softpipe->fs->color0_writes_all_cbufs;
+   write_all = softpipe->fs_variant->info.color0_writes_all_cbufs;
 
    for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) 
    {
@@ -810,17 +817,25 @@
                               quads[0]->input.y0);
       boolean has_dst_alpha
          = util_format_has_alpha(softpipe->framebuffer.cbufs[cbuf]->format);
-      uint q, i, j, qbuf;
-
-      qbuf = write_all ? 0 : cbuf;
+      uint q, i, j;
 
       for (q = 0; q < nr; q++) {
          struct quad_header *quad = quads[q];
          float (*quadColor)[4];
+         float temp_quad_color[QUAD_SIZE][4];
          const int itx = (quad->input.x0 & (TILE_SIZE-1));
          const int ity = (quad->input.y0 & (TILE_SIZE-1));
 
-         quadColor = quad->output.color[qbuf];
+         if (write_all) {
+            for (j = 0; j < QUAD_SIZE; j++) {
+               for (i = 0; i < 4; i++) {
+                  temp_quad_color[i][j] = quad->output.color[0][i][j];
+               }
+            }
+            quadColor = temp_quad_color;
+         } else {
+            quadColor = quad->output.color[cbuf];
+         }
 
          /* get/swizzle dest colors
           */
diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c
index 15f3a8f..a349f0d 100644
--- a/src/gallium/drivers/softpipe/sp_quad_depth_test.c
+++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c
@@ -727,9 +727,9 @@
                           unsigned nr)
 {
    unsigned i, pass = 0;
-   const struct sp_fragment_shader *fs = qs->softpipe->fs;
-   boolean interp_depth = !fs->info.writes_z;
-   boolean shader_stencil_ref = fs->info.writes_stencil;
+   const struct tgsi_shader_info *fsInfo = &qs->softpipe->fs_variant->info;
+   boolean interp_depth = !fsInfo->writes_z;
+   boolean shader_stencil_ref = fsInfo->writes_stencil;
    struct depth_data data;
 
    data.use_shader_stencil_refs = FALSE;
@@ -838,7 +838,9 @@
                   struct quad_header *quads[],
                   unsigned nr)
 {
-   boolean interp_depth = !qs->softpipe->fs->info.writes_z;
+   const struct tgsi_shader_info *fsInfo = &qs->softpipe->fs_variant->info;
+
+   boolean interp_depth = !fsInfo->writes_z;
 
    boolean alpha = qs->softpipe->depth_stencil->alpha.enabled;
 
diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c
index 90f4787..d74d6d4 100644
--- a/src/gallium/drivers/softpipe/sp_quad_fs.c
+++ b/src/gallium/drivers/softpipe/sp_quad_fs.c
@@ -74,7 +74,7 @@
    struct tgsi_exec_machine *machine = softpipe->fs_machine;
 
    /* run shader */
-   return softpipe->fs->run( softpipe->fs, machine, quad );
+   return softpipe->fs_variant->run( softpipe->fs_variant, machine, quad );
 }
 
 
@@ -140,10 +140,10 @@
 {
    struct softpipe_context *softpipe = qs->softpipe;
 
-   softpipe->fs->prepare( softpipe->fs, 
-			  softpipe->fs_machine,
-			  (struct tgsi_sampler **)
-                             softpipe->tgsi.frag_samplers_list );
+   softpipe->fs_variant->prepare( softpipe->fs_variant, 
+                                  softpipe->fs_machine,
+                                  (struct tgsi_sampler **)
+                                  softpipe->tgsi.frag_samplers_list );
 
    qs->next->begin(qs->next);
 }
diff --git a/src/gallium/drivers/softpipe/sp_quad_pipe.c b/src/gallium/drivers/softpipe/sp_quad_pipe.c
index 2cfd02a..0c4506a 100644
--- a/src/gallium/drivers/softpipe/sp_quad_pipe.c
+++ b/src/gallium/drivers/softpipe/sp_quad_pipe.c
@@ -30,9 +30,9 @@
 #include "sp_state.h"
 #include "pipe/p_shader_tokens.h"
 
+
 static void
-sp_push_quad_first( struct softpipe_context *sp,
-                    struct quad_stage *quad )
+insert_stage_at_head(struct softpipe_context *sp, struct quad_stage *quad)
 {
    quad->next = sp->quad.first;
    sp->quad.first = quad;
@@ -46,24 +46,24 @@
       sp->depth_stencil->depth.enabled &&
       sp->framebuffer.zsbuf &&
       !sp->depth_stencil->alpha.enabled &&
-      !sp->fs->info.uses_kill &&
-      !sp->fs->info.writes_z &&
-      !sp->fs->info.writes_stencil;
+      !sp->fs_variant->info.uses_kill &&
+      !sp->fs_variant->info.writes_z &&
+      !sp->fs_variant->info.writes_stencil;
 
    sp->quad.first = sp->quad.blend;
 
    if (early_depth_test) {
-      sp_push_quad_first( sp, sp->quad.shade );
-      sp_push_quad_first( sp, sp->quad.depth_test );
+      insert_stage_at_head( sp, sp->quad.shade );
+      insert_stage_at_head( sp, sp->quad.depth_test );
    }
    else {
-      sp_push_quad_first( sp, sp->quad.depth_test );
-      sp_push_quad_first( sp, sp->quad.shade );
+      insert_stage_at_head( sp, sp->quad.depth_test );
+      insert_stage_at_head( sp, sp->quad.shade );
    }
 
-#if !DO_PSTIPPLE_IN_DRAW_MODULE
+#if !DO_PSTIPPLE_IN_DRAW_MODULE && !DO_PSTIPPLE_IN_HELPER_MODULE
    if (sp->rasterizer->poly_stipple_enable)
-      sp_push_quad_first( sp, sp->quad.pstipple );
+      insert_stage_at_head( sp, sp->quad.pstipple );
 #endif
 }
 
diff --git a/src/gallium/drivers/softpipe/sp_query.c b/src/gallium/drivers/softpipe/sp_query.c
index 4ae69c1..88f4257 100644
--- a/src/gallium/drivers/softpipe/sp_query.c
+++ b/src/gallium/drivers/softpipe/sp_query.c
@@ -157,7 +157,7 @@
       /*os_get_time is in microseconds*/
       td.frequency = 1000000;
       td.disjoint = FALSE;
-      memcpy(vresult, &sq->so,
+      memcpy(vresult, &td,
              sizeof(struct pipe_query_data_timestamp_disjoint));
    }
       break;
diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
index 1e58d27..960ab8c 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -185,6 +185,8 @@
    case PIPE_VIDEO_CAP_MAX_WIDTH:
    case PIPE_VIDEO_CAP_MAX_HEIGHT:
       return vl_video_buffer_max_size(screen);
+   case PIPE_VIDEO_CAP_NUM_BUFFERS_DESIRED:
+      return vl_num_buffers_desired(screen, profile);
    default:
       return 0;
    }
diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c
index 0ce28f4..656d001 100644
--- a/src/gallium/drivers/softpipe/sp_setup.c
+++ b/src/gallium/drivers/softpipe/sp_setup.c
@@ -568,17 +568,18 @@
 static void
 setup_fragcoord_coeff(struct setup_context *setup, uint slot)
 {
-   struct sp_fragment_shader* spfs = setup->softpipe->fs;
+   const struct tgsi_shader_info *fsInfo = &setup->softpipe->fs_variant->info;
+
    /*X*/
-   setup->coef[slot].a0[0] = spfs->pixel_center_integer ? 0.0 : 0.5;
+   setup->coef[slot].a0[0] = fsInfo->pixel_center_integer ? 0.0 : 0.5;
    setup->coef[slot].dadx[0] = 1.0;
    setup->coef[slot].dady[0] = 0.0;
    /*Y*/
    setup->coef[slot].a0[1] =
-		   (spfs->origin_lower_left ? setup->softpipe->framebuffer.height-1 : 0)
-		   + (spfs->pixel_center_integer ? 0.0 : 0.5);
+		   (fsInfo->origin_lower_left ? setup->softpipe->framebuffer.height-1 : 0)
+		   + (fsInfo->pixel_center_integer ? 0.0 : 0.5);
    setup->coef[slot].dadx[1] = 0.0;
-   setup->coef[slot].dady[1] = spfs->origin_lower_left ? -1.0 : 1.0;
+   setup->coef[slot].dady[1] = fsInfo->origin_lower_left ? -1.0 : 1.0;
    /*Z*/
    setup->coef[slot].a0[2] = setup->posCoef.a0[2];
    setup->coef[slot].dadx[2] = setup->posCoef.dadx[2];
@@ -599,7 +600,7 @@
 setup_tri_coefficients(struct setup_context *setup)
 {
    struct softpipe_context *softpipe = setup->softpipe;
-   const struct sp_fragment_shader *spfs = softpipe->fs;
+   const struct tgsi_shader_info *fsInfo = &setup->softpipe->fs_variant->info;
    const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe);
    uint fragSlot;
    float v[3];
@@ -618,7 +619,7 @@
 
    /* setup interpolation for all the remaining attributes:
     */
-   for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) {
+   for (fragSlot = 0; fragSlot < fsInfo->num_inputs; fragSlot++) {
       const uint vertSlot = vinfo->attrib[fragSlot].src_index;
       uint j;
 
@@ -632,7 +633,7 @@
             tri_apply_cylindrical_wrap(setup->vmin[vertSlot][j],
                                        setup->vmid[vertSlot][j],
                                        setup->vmax[vertSlot][j],
-                                       spfs->info.input_cylindrical_wrap[fragSlot] & (1 << j),
+                                       fsInfo->input_cylindrical_wrap[fragSlot] & (1 << j),
                                        v);
             tri_linear_coeff(setup, &setup->coef[fragSlot], j, v);
          }
@@ -642,7 +643,7 @@
             tri_apply_cylindrical_wrap(setup->vmin[vertSlot][j],
                                        setup->vmid[vertSlot][j],
                                        setup->vmax[vertSlot][j],
-                                       spfs->info.input_cylindrical_wrap[fragSlot] & (1 << j),
+                                       fsInfo->input_cylindrical_wrap[fragSlot] & (1 << j),
                                        v);
             tri_persp_coeff(setup, &setup->coef[fragSlot], j, v);
          }
@@ -654,7 +655,7 @@
          assert(0);
       }
 
-      if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) {
+      if (fsInfo->input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) {
          /* convert 0 to 1.0 and 1 to -1.0 */
          setup->coef[fragSlot].a0[0] = setup->facing * -2.0f + 1.0f;
          setup->coef[fragSlot].dadx[0] = 0.0;
@@ -939,7 +940,7 @@
                         const float (*v1)[4])
 {
    struct softpipe_context *softpipe = setup->softpipe;
-   const struct sp_fragment_shader *spfs = softpipe->fs;
+   const struct tgsi_shader_info *fsInfo = &setup->softpipe->fs_variant->info;
    const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe);
    uint fragSlot;
    float area;
@@ -974,7 +975,7 @@
 
    /* setup interpolation for all the remaining attributes:
     */
-   for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) {
+   for (fragSlot = 0; fragSlot < fsInfo->num_inputs; fragSlot++) {
       const uint vertSlot = vinfo->attrib[fragSlot].src_index;
       uint j;
 
@@ -987,7 +988,7 @@
          for (j = 0; j < NUM_CHANNELS; j++) {
             line_apply_cylindrical_wrap(setup->vmin[vertSlot][j],
                                         setup->vmax[vertSlot][j],
-                                        spfs->info.input_cylindrical_wrap[fragSlot] & (1 << j),
+                                        fsInfo->input_cylindrical_wrap[fragSlot] & (1 << j),
                                         v);
             line_linear_coeff(setup, &setup->coef[fragSlot], j, v);
          }
@@ -996,7 +997,7 @@
          for (j = 0; j < NUM_CHANNELS; j++) {
             line_apply_cylindrical_wrap(setup->vmin[vertSlot][j],
                                         setup->vmax[vertSlot][j],
-                                        spfs->info.input_cylindrical_wrap[fragSlot] & (1 << j),
+                                        fsInfo->input_cylindrical_wrap[fragSlot] & (1 << j),
                                         v);
             line_persp_coeff(setup, &setup->coef[fragSlot], j, v);
          }
@@ -1008,7 +1009,7 @@
          assert(0);
       }
 
-      if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) {
+      if (fsInfo->input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) {
          /* convert 0 to 1.0 and 1 to -1.0 */
          setup->coef[fragSlot].a0[0] = setup->facing * -2.0f + 1.0f;
          setup->coef[fragSlot].dadx[0] = 0.0;
@@ -1188,7 +1189,7 @@
                const float (*v0)[4])
 {
    struct softpipe_context *softpipe = setup->softpipe;
-   const struct sp_fragment_shader *spfs = softpipe->fs;
+   const struct tgsi_shader_info *fsInfo = &setup->softpipe->fs_variant->info;
    const int sizeAttr = setup->softpipe->psize_slot;
    const float size
       = sizeAttr > 0 ? v0[sizeAttr][0]
@@ -1232,7 +1233,7 @@
    const_coeff(setup, &setup->posCoef, 0, 2);
    const_coeff(setup, &setup->posCoef, 0, 3);
 
-   for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) {
+   for (fragSlot = 0; fragSlot < fsInfo->num_inputs; fragSlot++) {
       const uint vertSlot = vinfo->attrib[fragSlot].src_index;
       uint j;
 
@@ -1255,7 +1256,7 @@
          assert(0);
       }
 
-      if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) {
+      if (fsInfo->input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) {
          /* convert 0 to 1.0 and 1 to -1.0 */
          setup->coef[fragSlot].a0[0] = setup->facing * -2.0f + 1.0f;
          setup->coef[fragSlot].dadx[0] = 0.0;
@@ -1396,7 +1397,7 @@
    struct softpipe_context *sp = setup->softpipe;
 
    if (sp->dirty) {
-      softpipe_update_derived(sp);
+      softpipe_update_derived(sp, sp->reduced_api_prim);
    }
 
    /* Note: nr_attrs is only used for debugging (vertex printing) */
diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h
index bb19f8c..ec4c8cf 100644
--- a/src/gallium/drivers/softpipe/sp_state.h
+++ b/src/gallium/drivers/softpipe/sp_state.h
@@ -60,34 +60,45 @@
 struct vertex_info;
 
 
-/**
- * Subclass of pipe_shader_state (though it doesn't really need to be).
- *
- * This is starting to look an awful lot like a quad pipeline stage...
- */
-struct sp_fragment_shader {
-   struct pipe_shader_state shader;
+struct sp_fragment_shader_variant_key
+{
+   boolean polygon_stipple;
+};
 
+
+struct sp_fragment_shader_variant
+{
+   const struct tgsi_token *tokens;
+   struct sp_fragment_shader_variant_key key;
    struct tgsi_shader_info info;
 
+   unsigned stipple_sampler_unit;
+
+   /* See comments about this elsewhere */
+#if 0
    struct draw_fragment_shader *draw_shader;
+#endif
 
-   boolean origin_lower_left; /**< fragment shader uses lower left position origin? */
-   boolean pixel_center_integer; /**< fragment shader uses integer pixel center? */
-   boolean color0_writes_all_cbufs; /**< fragment shader writes color0 to all bound cbufs */
-   void (*prepare)( const struct sp_fragment_shader *shader,
-		    struct tgsi_exec_machine *machine,
-		    struct tgsi_sampler **samplers);
+   void (*prepare)(const struct sp_fragment_shader_variant *shader,
+		   struct tgsi_exec_machine *machine,
+		   struct tgsi_sampler **samplers);
 
-   /* Run the shader - this interface will get cleaned up in the
-    * future:
-    */
-   unsigned (*run)( const struct sp_fragment_shader *shader,
-		    struct tgsi_exec_machine *machine,
-		    struct quad_header *quad );
+   unsigned (*run)(const struct sp_fragment_shader_variant *shader,
+		   struct tgsi_exec_machine *machine,
+		   struct quad_header *quad);
+
+   /* Deletes this instance of the object */
+   void (*delete)(struct sp_fragment_shader_variant *shader);
+
+   struct sp_fragment_shader_variant *next;
+};
 
 
-   void (*delete)( struct sp_fragment_shader * );
+/** Subclass of pipe_shader_state */
+struct sp_fragment_shader {
+   struct pipe_shader_state shader;
+   struct sp_fragment_shader_variant *variants;
+   struct draw_fragment_shader *draw_shader;
 };
 
 
@@ -141,7 +152,7 @@
                                const struct pipe_framebuffer_state *);
 
 void
-softpipe_update_derived( struct softpipe_context *softpipe );
+softpipe_update_derived(struct softpipe_context *softpipe, unsigned prim);
 
 void
 softpipe_draw_vbo(struct pipe_context *pipe,
@@ -170,4 +181,16 @@
 softpipe_get_vbuf_vertex_info(struct softpipe_context *softpipe);
 
 
+struct sp_fragment_shader_variant *
+softpipe_find_fs_variant(struct softpipe_context *softpipe,
+                         struct sp_fragment_shader *fs,
+                         const struct sp_fragment_shader_variant_key *key);
+
+
+struct sp_fragment_shader_variant *
+softpipe_find_fs_variant(struct softpipe_context *softpipe,
+                         struct sp_fragment_shader *fs,
+                         const struct sp_fragment_shader_variant_key *key);
+
+
 #endif
diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c
index f9590eb..fd68808 100644
--- a/src/gallium/drivers/softpipe/sp_state_derived.c
+++ b/src/gallium/drivers/softpipe/sp_state_derived.c
@@ -25,8 +25,10 @@
  * 
  **************************************************************************/
 
+#include "util/u_inlines.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
+#include "util/u_pstipple.h"
 #include "pipe/p_shader_tokens.h"
 #include "draw/draw_context.h"
 #include "draw/draw_vertex.h"
@@ -64,7 +66,7 @@
 
    if (vinfo->num_attribs == 0) {
       /* compute vertex layout now */
-      const struct sp_fragment_shader *spfs = softpipe->fs;
+      const struct tgsi_shader_info *fsInfo = &softpipe->fs_variant->info;
       struct vertex_info *vinfo_vbuf = &softpipe->vertex_info_vbuf;
       const uint num = draw_num_shader_outputs(softpipe->draw);
       uint i;
@@ -84,11 +86,11 @@
        * from the vertex shader.
        */
       vinfo->num_attribs = 0;
-      for (i = 0; i < spfs->info.num_inputs; i++) {
+      for (i = 0; i < fsInfo->num_inputs; i++) {
          int src;
          enum interp_mode interp;
 
-         switch (spfs->info.input_interpolate[i]) {
+         switch (fsInfo->input_interpolate[i]) {
          case TGSI_INTERPOLATE_CONSTANT:
             interp = INTERP_CONSTANT;
             break;
@@ -103,7 +105,7 @@
             interp = INTERP_LINEAR;
          }
 
-         switch (spfs->info.input_semantic_name[i]) {
+         switch (fsInfo->input_semantic_name[i]) {
          case TGSI_SEMANTIC_POSITION:
             interp = INTERP_POS;
             break;
@@ -117,8 +119,8 @@
 
          /* this includes texcoords and varying vars */
          src = draw_find_shader_output(softpipe->draw,
-                                       spfs->info.input_semantic_name[i],
-                                       spfs->info.input_semantic_index[i]);
+                                       fsInfo->input_semantic_name[i],
+                                       fsInfo->input_semantic_index[i]);
          draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src);
       }
 
@@ -241,10 +243,101 @@
 }
 
 
+static void
+update_fragment_shader(struct softpipe_context *softpipe, unsigned prim)
+{
+   struct sp_fragment_shader_variant_key key;
+
+   memset(&key, 0, sizeof(key));
+
+   if (prim == PIPE_PRIM_TRIANGLES)
+      key.polygon_stipple = softpipe->rasterizer->poly_stipple_enable;
+
+   if (softpipe->fs) {
+      softpipe->fs_variant = softpipe_find_fs_variant(softpipe,
+                                                      softpipe->fs, &key);
+   }
+   else {
+      softpipe->fs_variant = NULL;
+   }
+
+   /* This would be the logical place to pass the fragment shader
+    * to the draw module.  However, doing this here, during state
+    * validation, causes problems with the 'draw' module helpers for
+    * wide/AA/stippled lines.
+    * In principle, the draw's fragment shader should be per-variant
+    * but that doesn't work.  So we use a single draw fragment shader
+    * per fragment shader, not per variant.
+    */
+#if 0
+   if (softpipe->fs_variant) {
+      draw_bind_fragment_shader(softpipe->draw,
+                                softpipe->fs_variant->draw_shader);
+   }
+   else {
+      draw_bind_fragment_shader(softpipe->draw, NULL);
+   }
+#endif
+}
+
+
+/**
+ * This should be called when the polygon stipple pattern changes.
+ * We create a new texture from the stipple pattern and create a new
+ * sampler view.
+ */
+static void
+update_polygon_stipple_pattern(struct softpipe_context *softpipe)
+{
+   struct pipe_resource *tex;
+   struct pipe_sampler_view *view;
+
+   tex = util_pstipple_create_stipple_texture(&softpipe->pipe,
+                                              softpipe->poly_stipple.stipple);
+   pipe_resource_reference(&softpipe->pstipple.texture, tex);
+
+   view = util_pstipple_create_sampler_view(&softpipe->pipe, tex);
+   pipe_sampler_view_reference(&softpipe->pstipple.sampler_view, view);
+}
+
+
+/**
+ * Should be called when polygon stipple is enabled/disabled or when
+ * the fragment shader changes.
+ * We add/update the fragment sampler and sampler views to sample from
+ * the polygon stipple texture.  The texture unit that we use depends on
+ * the fragment shader (we need to use a unit not otherwise used by the
+ * shader).
+ */
+static void
+update_polygon_stipple_enable(struct softpipe_context *softpipe, unsigned prim)
+{
+   if (prim == PIPE_PRIM_TRIANGLES &&
+       softpipe->fs_variant->key.polygon_stipple) {
+      const unsigned unit = softpipe->fs_variant->stipple_sampler_unit;
+
+      assert(unit >= softpipe->num_fragment_samplers);
+
+      /* sampler state */
+      softpipe->fragment_samplers[unit] = softpipe->pstipple.sampler;
+
+      /* sampler view */
+      pipe_sampler_view_reference(&softpipe->fragment_sampler_views[unit],
+                                  softpipe->pstipple.sampler_view);
+
+      sp_tex_tile_cache_set_sampler_view(softpipe->fragment_tex_cache[unit],
+                                         softpipe->pstipple.sampler_view);
+
+      softpipe->dirty |= SP_NEW_SAMPLER;
+   }
+}
+
+
 /* Hopefully this will remain quite simple, otherwise need to pull in
  * something like the state tracker mechanism.
  */
-void softpipe_update_derived( struct softpipe_context *softpipe )
+void
+softpipe_update_derived(struct softpipe_context *softpipe, unsigned prim)
 {
    struct softpipe_screen *sp_screen = softpipe_screen(softpipe->pipe.screen);
 
@@ -254,7 +347,24 @@
       softpipe->tex_timestamp = sp_screen->timestamp;
       softpipe->dirty |= SP_NEW_TEXTURE;
    }
-      
+
+#if DO_PSTIPPLE_IN_HELPER_MODULE
+   if (softpipe->dirty & SP_NEW_STIPPLE)
+      /* before updating samplers! */
+      update_polygon_stipple_pattern(softpipe);
+#endif
+
+   if (softpipe->dirty & (SP_NEW_RASTERIZER |
+                          SP_NEW_FS))
+      update_fragment_shader(softpipe, prim);
+
+#if DO_PSTIPPLE_IN_HELPER_MODULE
+   if (softpipe->dirty & (SP_NEW_RASTERIZER |
+                          SP_NEW_STIPPLE |
+                          SP_NEW_FS))
+      update_polygon_stipple_enable(softpipe, prim);
+#endif
+
    if (softpipe->dirty & (SP_NEW_SAMPLER |
                           SP_NEW_TEXTURE |
                           SP_NEW_FS | 
diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c
index 60331bc..16023c9 100644
--- a/src/gallium/drivers/softpipe/sp_state_sampler.c
+++ b/src/gallium/drivers/softpipe/sp_state_sampler.c
@@ -373,8 +373,9 @@
       }
    }
 
-   for (i = 0; i <= softpipe->fs->info.file_max[TGSI_FILE_SAMPLER]; i++) {
+   for (i = 0; i <= softpipe->fs_variant->info.file_max[TGSI_FILE_SAMPLER]; i++) {
       if (softpipe->fragment_samplers[i]) {
+         assert(softpipe->fragment_sampler_views[i]->texture);
          softpipe->tgsi.frag_samplers_list[i] =
             get_sampler_variant( i,
                                  sp_sampler(softpipe->fragment_samplers[i]),
diff --git a/src/gallium/drivers/softpipe/sp_state_shader.c b/src/gallium/drivers/softpipe/sp_state_shader.c
index 3dec5de..da89527 100644
--- a/src/gallium/drivers/softpipe/sp_state_shader.c
+++ b/src/gallium/drivers/softpipe/sp_state_shader.c
@@ -33,6 +33,7 @@
 #include "pipe/p_defines.h"
 #include "util/u_memory.h"
 #include "util/u_inlines.h"
+#include "util/u_pstipple.h"
 #include "draw/draw_context.h"
 #include "draw/draw_vs.h"
 #include "draw/draw_gs.h"
@@ -42,46 +43,105 @@
 #include "tgsi/tgsi_parse.h"
 
 
+/**
+ * Create a new fragment shader variant.
+ */
+static struct sp_fragment_shader_variant *
+create_fs_variant(struct softpipe_context *softpipe,
+                  struct sp_fragment_shader *fs,
+                  const struct sp_fragment_shader_variant_key *key)
+{
+   struct sp_fragment_shader_variant *var;
+   struct pipe_shader_state *stipple_fs = NULL, *curfs = &fs->shader;
+   unsigned unit = 0;
+
+   if (key->polygon_stipple) {
+      /* get new shader that implements polygon stippling */
+      stipple_fs = util_pstipple_create_fragment_shader(&softpipe->pipe,
+                                                        curfs, &unit);
+      curfs = stipple_fs;
+   }
+
+   /* codegen, create variant object */
+   var = softpipe_create_fs_variant_sse(softpipe, curfs);
+   if (!var) {
+      var = softpipe_create_fs_variant_exec(softpipe, curfs);
+   }
+
+   if (var) {
+      var->key = *key;
+      var->tokens = tgsi_dup_tokens(curfs->tokens);
+      var->stipple_sampler_unit = unit;
+
+      tgsi_scan_shader(var->tokens, &var->info);
+
+      /* See comments elsewhere about draw fragment shaders */
+#if 0
+      /* draw's fs state */
+      var->draw_shader = draw_create_fragment_shader(softpipe->draw,
+                                                     &fs->shader);
+      if (!var->draw_shader) {
+         var->delete(var);
+         FREE((void *) var->tokens);
+         return NULL;
+      }
+#endif
+
+      /* insert variant into linked list */
+      var->next = fs->variants;
+      fs->variants = var;
+   }
+
+   if (stipple_fs) {
+      free((void *) stipple_fs->tokens);
+      free(stipple_fs);
+   }
+
+   return var;
+}
+
+
+struct sp_fragment_shader_variant *
+softpipe_find_fs_variant(struct softpipe_context *sp,
+                         struct sp_fragment_shader *fs,
+                         const struct sp_fragment_shader_variant_key *key)
+{
+   struct sp_fragment_shader_variant *var;
+
+   for (var = fs->variants; var; var = var->next) {
+      if (memcmp(&var->key, key, sizeof(*key)) == 0) {
+         /* found it */
+         return var;
+      }
+   }
+
+   return create_fs_variant(sp, fs, key);
+}
+
+
 static void *
 softpipe_create_fs_state(struct pipe_context *pipe,
                          const struct pipe_shader_state *templ)
 {
    struct softpipe_context *softpipe = softpipe_context(pipe);
-   struct sp_fragment_shader *state;
-   unsigned i;
+   struct sp_fragment_shader *state = CALLOC_STRUCT(sp_fragment_shader);
 
    /* debug */
    if (softpipe->dump_fs) 
       tgsi_dump(templ->tokens, 0);
 
-   /* codegen */
-   state = softpipe_create_fs_sse( softpipe, templ );
-   if (!state) {
-      state = softpipe_create_fs_exec( softpipe, templ );
-   }
-
-   if (!state)
-      return NULL;
+   /* we need to keep a local copy of the tokens */
+   state->shader.tokens = tgsi_dup_tokens(templ->tokens);
 
    /* draw's fs state */
-   state->draw_shader = draw_create_fragment_shader(softpipe->draw, templ);
+   state->draw_shader = draw_create_fragment_shader(softpipe->draw,
+                                                    &state->shader);
    if (!state->draw_shader) {
-      state->delete( state );
+      FREE((void *) state->shader.tokens);
+      FREE(state);
       return NULL;
    }
 
-   /* get/save the summary info for this shader */
-   tgsi_scan_shader(templ->tokens, &state->info);
-
-   for (i = 0; i < state->info.num_properties; ++i) {
-      if (state->info.properties[i].name == TGSI_PROPERTY_FS_COORD_ORIGIN)
-         state->origin_lower_left = state->info.properties[i].data[0];
-      else if (state->info.properties[i].name == TGSI_PROPERTY_FS_COORD_PIXEL_CENTER)
-	 state->pixel_center_integer = state->info.properties[i].data[0];
-      else if (state->info.properties[i].name == TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS)
-	 state->color0_writes_all_cbufs = state->info.properties[i].data[0];
-   }
-
    return state;
 }
 
@@ -90,6 +150,7 @@
 softpipe_bind_fs_state(struct pipe_context *pipe, void *fs)
 {
    struct softpipe_context *softpipe = softpipe_context(pipe);
+   struct sp_fragment_shader *state = (struct sp_fragment_shader *) fs;
 
    if (softpipe->fs == fs)
       return;
@@ -98,8 +159,14 @@
 
    softpipe->fs = fs;
 
-   draw_bind_fragment_shader(softpipe->draw,
-                             (softpipe->fs ? softpipe->fs->draw_shader : NULL));
+   if (fs == NULL)
+      softpipe->fs_variant = NULL;
+
+   if (state)
+      draw_bind_fragment_shader(softpipe->draw,
+                                state->draw_shader);
+   else
+      draw_bind_fragment_shader(softpipe->draw, NULL);
 
    softpipe->dirty |= SP_NEW_FS;
 }
@@ -110,8 +177,9 @@
 {
    struct softpipe_context *softpipe = softpipe_context(pipe);
    struct sp_fragment_shader *state = fs;
+   struct sp_fragment_shader_variant *var, *next_var;
 
-   assert(fs != softpipe_context(pipe)->fs);
+   assert(fs != softpipe->fs);
 
    if (softpipe->fs_machine->Tokens == state->shader.tokens) {
       /* unbind the shader from the tgsi executor if we're
@@ -120,9 +188,23 @@
       tgsi_exec_machine_bind_shader(softpipe->fs_machine, NULL, 0, NULL);
    }
 
+   /* delete variants */
+   for (var = state->variants; var; var = next_var) {
+      next_var = var->next;
+
+      assert(var != softpipe->fs_variant);
+
+      /* See comments elsewhere about draw fragment shaders */
+#if 0
+      draw_delete_fragment_shader(softpipe->draw, var->draw_shader);
+#endif
+
+      var->delete(var);
+   }
+
    draw_delete_fragment_shader(softpipe->draw, state->draw_shader);
 
-   state->delete( state );
+   FREE((void *) state->shader.tokens);
 }
 
 
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c
index f730948..89c6536 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -2566,7 +2566,114 @@
    FREE(samp);
 }
 
+static void
+sample_get_dims(struct tgsi_sampler *tgsi_sampler, int level,
+		int dims[4])
+{
+    struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
+    const struct pipe_sampler_view *view = samp->view;
+    const struct pipe_resource *texture = view->texture;
 
+    /* undefined according to EXT_gpu_program */
+    level += view->u.tex.first_level;
+    if (level > view->u.tex.last_level)
+	return;
+
+    dims[0] = u_minify(texture->width0, level);
+
+    switch(texture->target) {
+    case PIPE_TEXTURE_1D_ARRAY:
+       dims[1] = texture->array_size;
+       /* fallthrough */
+    case PIPE_TEXTURE_1D:
+    case PIPE_BUFFER:
+       return;
+    case PIPE_TEXTURE_2D_ARRAY:
+       dims[2] = texture->array_size;
+       /* fallthrough */
+    case PIPE_TEXTURE_2D:
+    case PIPE_TEXTURE_CUBE:
+    case PIPE_TEXTURE_RECT:
+       dims[1] = u_minify(texture->height0, level);
+       return;
+    case PIPE_TEXTURE_3D:
+       dims[1] = u_minify(texture->height0, level);
+       dims[2] = u_minify(texture->depth0, level);
+       return;
+    default:
+       assert(!"unexpected texture target in sample_get_dims()");
+       return;
+    }
+}
+
+/* this function is only used for unfiltered texel gets
+   via the TGSI TXF opcode. */
+static void
+sample_get_texels(struct tgsi_sampler *tgsi_sampler,
+	   const int v_i[QUAD_SIZE],
+	   const int v_j[QUAD_SIZE],
+	   const int v_k[QUAD_SIZE],
+	   const int lod[QUAD_SIZE],
+	   float rgba[NUM_CHANNELS][QUAD_SIZE])
+{
+   const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
+   union tex_tile_address addr;
+   const struct pipe_resource *texture = samp->view->texture;
+   int j, c;
+   const float *tx;
+
+   addr.value = 0;
+   /* TODO write a better test for LOD */
+   addr.bits.level = lod[0];
+
+   switch(texture->target) {
+   case PIPE_TEXTURE_1D:
+      for (j = 0; j < QUAD_SIZE; j++) {
+	 tx = get_texel_2d(samp, addr, v_i[j], 0);
+	 for (c = 0; c < 4; c++) {
+	    rgba[c][j] = tx[c];
+	 }
+      }
+      break;
+   case PIPE_TEXTURE_1D_ARRAY:
+      for (j = 0; j < QUAD_SIZE; j++) {
+	 tx = get_texel_1d_array(samp, addr, v_i[j], v_j[j]);
+	 for (c = 0; c < 4; c++) {
+	    rgba[c][j] = tx[c];
+	 }
+      }
+      break;
+   case PIPE_TEXTURE_2D:
+   case PIPE_TEXTURE_RECT:
+      for (j = 0; j < QUAD_SIZE; j++) {
+	 tx = get_texel_2d(samp, addr, v_i[j], v_j[j]);
+	 for (c = 0; c < 4; c++) {
+	    rgba[c][j] = tx[c];
+	 }
+      }
+      break;
+   case PIPE_TEXTURE_2D_ARRAY:
+      for (j = 0; j < QUAD_SIZE; j++) {
+	 tx = get_texel_2d_array(samp, addr, v_i[j], v_j[j], v_k[j]);
+	 for (c = 0; c < 4; c++) {
+	    rgba[c][j] = tx[c];
+	 }
+      }
+      break;
+   case PIPE_TEXTURE_3D:
+      for (j = 0; j < QUAD_SIZE; j++) {
+	 tx = get_texel_3d(samp, addr, v_i[j], v_j[j], v_k[j]);
+	 for (c = 0; c < 4; c++) {
+	    rgba[c][j] = tx[c];
+	 }
+      }
+      break;
+   case PIPE_TEXTURE_CUBE: /* TXF can't work on CUBE according to spec */
+   default:
+      assert(!"Unknown or CUBE texture type in TXF processing\n");
+      break;
+   }
+}
 /**
  * Create a sampler variant for a given set of non-orthogonal state.
  */
@@ -2692,5 +2799,7 @@
       samp->base.get_samples = samp->sample_target;
    }
 
+   samp->base.get_dims = sample_get_dims;
+   samp->base.get_texel = sample_get_texels;
    return samp;
 }
diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c
index b847cf3..4a60f63 100644
--- a/src/gallium/drivers/svga/svga_screen.c
+++ b/src/gallium/drivers/svga/svga_screen.c
@@ -71,13 +71,22 @@
 static const char *
 svga_get_name( struct pipe_screen *pscreen )
 {
+   const char *build = "", *llvm = "", *mutex = "";
+   static char name[100];
 #ifdef DEBUG
    /* Only return internal details in the DEBUG version:
     */
-   return "SVGA3D; build: DEBUG; mutex: " PIPE_ATOMIC;
-#else
-   return "SVGA3D; build: RELEASE; ";
+   build = "build: DEBUG;";
+   mutex = "mutex: " PIPE_ATOMIC ";";
+#ifdef HAVE_LLVM
+   llvm = "LLVM;";
 #endif
+#else
+   build = "build: RELEASE;";
+#endif
+
+   util_snprintf(name, sizeof(name), "SVGA3D; %s %s %s", build, mutex, llvm);
+   return name;
 }
 
 
@@ -245,6 +254,8 @@
          return 0;
       case PIPE_SHADER_CAP_SUBROUTINES:
          return 0;
+      case PIPE_SHADER_CAP_INTEGERS:
+         return 0;
       }
       break;
    case PIPE_SHADER_VERTEX:
@@ -286,6 +297,8 @@
          return 1;
       case PIPE_SHADER_CAP_SUBROUTINES:
          return 0;
+      case PIPE_SHADER_CAP_INTEGERS:
+         return 0;
       default:
          break;
       }
diff --git a/src/gallium/include/pipe/p_compiler.h b/src/gallium/include/pipe/p_compiler.h
index 8c788f4..05de9ff 100644
--- a/src/gallium/include/pipe/p_compiler.h
+++ b/src/gallium/include/pipe/p_compiler.h
@@ -67,7 +67,9 @@
 
 
 #if !defined(__HAIKU__) && !defined(__USE_MISC)
+#if !defined(PIPE_OS_ANDROID)
 typedef unsigned int       uint;
+#endif
 typedef unsigned short     ushort;
 #endif
 typedef unsigned char      ubyte;
diff --git a/src/gallium/include/pipe/p_config.h b/src/gallium/include/pipe/p_config.h
index eea3d79..b3a7b33 100644
--- a/src/gallium/include/pipe/p_config.h
+++ b/src/gallium/include/pipe/p_config.h
@@ -99,9 +99,9 @@
 #endif
 #endif
 
-#if defined(__PPC__)
+#if defined(__ppc__) || defined(__ppc64__) || defined(__PPC__)
 #define PIPE_ARCH_PPC
-#if defined(__PPC64__)
+#if defined(__ppc64__) || defined(__PPC64__)
 #define PIPE_ARCH_PPC_64
 #endif
 #endif
@@ -120,6 +120,15 @@
 # define PIPE_ARCH_BIG_ENDIAN
 #endif
 
+#elif defined(__APPLE__)
+#include <machine/endian.h>
+
+#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN
+# define PIPE_ARCH_LITTLE_ENDIAN
+#elif __DARWIN_BYTE_ORDER == __DARWIN_BIG_ENDIAN
+# define PIPE_ARCH_BIG_ENDIAN
+#endif
+
 #else
 
 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
@@ -145,6 +154,14 @@
 #define PIPE_OS_UNIX
 #endif
 
+/*
+ * Android defines __linux__ so PIPE_OS_LINUX and PIPE_OS_UNIX will also be
+ * defined.
+ */
+#if defined(ANDROID)
+#define PIPE_OS_ANDROID
+#endif
+
 #if defined(__FreeBSD__)
 #define PIPE_OS_FREEBSD
 #define PIPE_OS_BSD
diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h
index 3f6d90d..da3ee87 100644
--- a/src/gallium/include/pipe/p_context.h
+++ b/src/gallium/include/pipe/p_context.h
@@ -49,6 +49,7 @@
 struct pipe_query;
 struct pipe_poly_stipple;
 struct pipe_rasterizer_state;
+struct pipe_resolve_info;
 struct pipe_resource;
 struct pipe_sampler_state;
 struct pipe_sampler_view;
@@ -268,13 +269,10 @@
 
    /**
     * Resolve a multisampled resource into a non-multisampled one.
-    * Source and destination must have the same size and same format.
+    * Source and destination must be of the same format.
     */
    void (*resource_resolve)(struct pipe_context *pipe,
-                            struct pipe_resource *dst,
-                            unsigned dst_layer,
-                            struct pipe_resource *src,
-                            unsigned src_layer);
+                            const struct pipe_resolve_info *info);
 
    /*@}*/
 
diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h
index 79b8969..795de1f 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -99,6 +99,9 @@
 #define PIPE_MASK_B  0x4
 #define PIPE_MASK_A  0x8
 #define PIPE_MASK_RGBA 0xf
+#define PIPE_MASK_Z  0x10
+#define PIPE_MASK_S  0x20
+#define PIPE_MASK_ZS 0x30
 
 
 /**
@@ -468,6 +471,7 @@
    PIPE_CAP_MIXED_COLORBUFFER_FORMATS = 46,
    PIPE_CAP_SEAMLESS_CUBE_MAP = 47,
    PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE = 48,
+   PIPE_CAP_SCALED_RESOLVE = 49
 };
 
 /* Shader caps not specific to any single stage */
@@ -491,6 +495,7 @@
    PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR = 14,
    PIPE_SHADER_CAP_INDIRECT_CONST_ADDR = 15,
    PIPE_SHADER_CAP_SUBROUTINES = 16, /* BGNSUB, ENDSUB, CAL, RET */
+   PIPE_SHADER_CAP_INTEGERS = 17
 };
 
 
diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h
index d442c15..840b3ee 100644
--- a/src/gallium/include/pipe/p_state.h
+++ b/src/gallium/include/pipe/p_state.h
@@ -483,6 +483,34 @@
 };
 
 
+/**
+ * Information to describe a resource_resolve call.
+ */
+struct pipe_resolve_info
+{
+   struct {
+      struct pipe_resource *res;
+      unsigned level;
+      unsigned layer;
+      int x0; /**< always left */
+      int y0; /**< always top */
+      int x1; /**< determines scale if PIPE_CAP_SCALED_RESOLVE is supported */
+      int y1; /**< determines scale if PIPE_CAP_SCALED_RESOLVE is supported */
+   } dst;
+
+   struct {
+      struct pipe_resource *res;
+      unsigned layer;
+      int x0;
+      int y0;
+      int x1; /**< may be < x0 only if PIPE_CAP_SCALED_RESOLVE is supported */
+      int y1; /**< may be < y1 even if PIPE_CAP_SCALED_RESOLVE not supported */
+   } src;
+
+   unsigned mask; /**< PIPE_MASK_RGBA, Z, S or ZS */
+};
+
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/gallium/include/pipe/p_video_decoder.h b/src/gallium/include/pipe/p_video_decoder.h
index f063d8f..2aa4001 100644
--- a/src/gallium/include/pipe/p_video_decoder.h
+++ b/src/gallium/include/pipe/p_video_decoder.h
@@ -59,75 +59,74 @@
    void (*destroy)(struct pipe_video_decoder *decoder);
 
    /**
-    * Creates a buffer as decoding input
+    * Creates a decoder buffer
     */
-   struct pipe_video_decode_buffer *(*create_buffer)(struct pipe_video_decoder *decoder);
+   void *(*create_buffer)(struct pipe_video_decoder *decoder);
 
    /**
-    * flush decoder buffer to video hardware
+    * Destroys a decoder buffer
     */
-   void (*flush_buffer)(struct pipe_video_decode_buffer *decbuf,
-                        unsigned num_ycbcr_blocks[3],
-                        struct pipe_video_buffer *ref_frames[2],
-                        struct pipe_video_buffer *dst);
-};
-
-/**
- * input buffer for a decoder
- */
-struct pipe_video_decode_buffer
-{
-   struct pipe_video_decoder *decoder;
+   void (*destroy_buffer)(struct pipe_video_decoder *decoder, void *buffer);
 
    /**
-    * destroy this decode buffer
+    * set the current decoder buffer
     */
-   void (*destroy)(struct pipe_video_decode_buffer *decbuf);
+   void (*set_decode_buffer)(struct pipe_video_decoder *decoder, void *buffer);
 
    /**
-    * map the input buffer into memory before starting decoding
+    * set the picture parameters for the next frame
+    * only used for bitstream decoding
     */
-   void (*begin_frame)(struct pipe_video_decode_buffer *decbuf);
+   void (*set_picture_parameters)(struct pipe_video_decoder *decoder,
+                                  struct pipe_picture_desc *picture);
 
    /**
     * set the quantification matrixes
     */
-   void (*set_quant_matrix)(struct pipe_video_decode_buffer *decbuf,
-                            const uint8_t intra_matrix[64],
-                            const uint8_t non_intra_matrix[64]);
+   void (*set_quant_matrix)(struct pipe_video_decoder *decoder,
+                            const struct pipe_quant_matrix *matrix);
 
    /**
-    * get the pointer where to put the ycbcr blocks of a component
+    * set target where video data is decoded to
     */
-   struct pipe_ycbcr_block *(*get_ycbcr_stream)(struct pipe_video_decode_buffer *, int component);
+   void (*set_decode_target)(struct pipe_video_decoder *decoder,
+                             struct pipe_video_buffer *target);
 
    /**
-    * get the pointer where to put the ycbcr dct block data of a component
+    * set reference frames for motion compensation
     */
-   short *(*get_ycbcr_buffer)(struct pipe_video_decode_buffer *, int component);
+   void (*set_reference_frames)(struct pipe_video_decoder *decoder,
+                                struct pipe_video_buffer **ref_frames,
+                                unsigned num_ref_frames);
 
    /**
-    * get the stride of the mv buffer
+    * start decoding of a new frame
     */
-   unsigned (*get_mv_stream_stride)(struct pipe_video_decode_buffer *decbuf);
+   void (*begin_frame)(struct pipe_video_decoder *decoder);
 
    /**
-    * get the pointer where to put the motion vectors of a ref frame
+    * decode a macroblock
     */
-   struct pipe_motionvector *(*get_mv_stream)(struct pipe_video_decode_buffer *decbuf, int ref_frame);
+   void (*decode_macroblock)(struct pipe_video_decoder *decoder,
+                             const struct pipe_macroblock *macroblocks,
+                             unsigned num_macroblocks);
 
    /**
     * decode a bitstream
     */
-   void (*decode_bitstream)(struct pipe_video_decode_buffer *decbuf,
-                            unsigned num_bytes, const void *data,
-                            struct pipe_picture_desc *picture,
-                            unsigned num_ycbcr_blocks[3]);
+   void (*decode_bitstream)(struct pipe_video_decoder *decoder,
+                            unsigned num_bytes, const void *data);
 
    /**
-    * unmap decoder buffer before flushing
+    * end decoding of the current frame
     */
-   void (*end_frame)(struct pipe_video_decode_buffer *decbuf);
+   void (*end_frame)(struct pipe_video_decoder *decoder);
+
+   /**
+    * flush any outstanding command buffers to the hardware
+    * should be called before a video_buffer is acessed by the state tracker again
+    */
+   void (*flush)(struct pipe_video_decoder *decoder);
 };
 
 /**
diff --git a/src/gallium/include/pipe/p_video_enums.h b/src/gallium/include/pipe/p_video_enums.h
index 492ab84..ea25a25 100644
--- a/src/gallium/include/pipe/p_video_enums.h
+++ b/src/gallium/include/pipe/p_video_enums.h
@@ -51,6 +51,7 @@
    PIPE_VIDEO_CAP_NPOT_TEXTURES = 1,
    PIPE_VIDEO_CAP_MAX_WIDTH = 2,
    PIPE_VIDEO_CAP_MAX_HEIGHT = 3,
+   PIPE_VIDEO_CAP_NUM_BUFFERS_DESIRED = 4
 };
 
 enum pipe_video_codec
diff --git a/src/gallium/include/pipe/p_video_state.h b/src/gallium/include/pipe/p_video_state.h
index 2a64ffb..f655ed4 100644
--- a/src/gallium/include/pipe/p_video_state.h
+++ b/src/gallium/include/pipe/p_video_state.h
@@ -43,60 +43,67 @@
    unsigned x, y, w, h;
 };
 
-enum pipe_mpeg12_picture_type
+/*
+ * see table 6-12 in the spec
+ */
+enum pipe_mpeg12_picture_coding_type
 {
-   PIPE_MPEG12_PICTURE_TYPE_FIELD_TOP,
-   PIPE_MPEG12_PICTURE_TYPE_FIELD_BOTTOM,
-   PIPE_MPEG12_PICTURE_TYPE_FRAME
+   PIPE_MPEG12_PICTURE_CODING_TYPE_I = 0x01,
+   PIPE_MPEG12_PICTURE_CODING_TYPE_P = 0x02,
+   PIPE_MPEG12_PICTURE_CODING_TYPE_B = 0x03,
+   PIPE_MPEG12_PICTURE_CODING_TYPE_D = 0x04
 };
 
-enum pipe_mpeg12_dct_intra
+/*
+ * see table 6-14 in the spec
+ */
+enum pipe_mpeg12_picture_structure
 {
-   PIPE_MPEG12_DCT_DELTA = 0,
-   PIPE_MPEG12_DCT_INTRA = 1
+   PIPE_MPEG12_PICTURE_STRUCTURE_RESERVED = 0x00,
+   PIPE_MPEG12_PICTURE_STRUCTURE_FIELD_TOP = 0x01,
+   PIPE_MPEG12_PICTURE_STRUCTURE_FIELD_BOTTOM = 0x02,
+   PIPE_MPEG12_PICTURE_STRUCTURE_FRAME = 0x03
 };
 
+/*
+ * flags for macroblock_type, see section 6.3.17.1 in the spec
+ */
+enum pipe_mpeg12_macroblock_type
+{
+   PIPE_MPEG12_MB_TYPE_QUANT = 0x01,
+   PIPE_MPEG12_MB_TYPE_MOTION_FORWARD = 0x02,
+   PIPE_MPEG12_MB_TYPE_MOTION_BACKWARD = 0x04,
+   PIPE_MPEG12_MB_TYPE_PATTERN = 0x08,
+   PIPE_MPEG12_MB_TYPE_INTRA = 0x10
+};
+
+/*
+ * flags for motion_type, see table 6-17 and 6-18 in the spec
+ */
+enum pipe_mpeg12_motion_type
+{
+   PIPE_MPEG12_MO_TYPE_RESERVED = 0x00,
+   PIPE_MPEG12_MO_TYPE_FIELD = 0x01,
+   PIPE_MPEG12_MO_TYPE_FRAME = 0x02,
+   PIPE_MPEG12_MO_TYPE_16x8 = 0x02,
+   PIPE_MPEG12_MO_TYPE_DUAL_PRIME = 0x03
+};
+
+/*
+ * see section 6.3.17.1 and table 6-19 in the spec
+ */
 enum pipe_mpeg12_dct_type
 {
    PIPE_MPEG12_DCT_TYPE_FRAME = 0,
    PIPE_MPEG12_DCT_TYPE_FIELD = 1
 };
 
-enum pipe_video_field_select
+enum pipe_mpeg12_field_select
 {
-   PIPE_VIDEO_FRAME = 0,
-   PIPE_VIDEO_TOP_FIELD = 1,
-   PIPE_VIDEO_BOTTOM_FIELD = 3,
-
-   /* TODO
-   PIPE_VIDEO_DUALPRIME
-   PIPE_VIDEO_16x8
-   */
-};
-
-enum pipe_video_mv_weight
-{
-   PIPE_VIDEO_MV_WEIGHT_MIN = 0,
-   PIPE_VIDEO_MV_WEIGHT_HALF = 128,
-   PIPE_VIDEO_MV_WEIGHT_MAX = 256
-};
-
-/* bitfields because this is used as a vertex buffer element */
-struct pipe_motionvector
-{
-   struct {
-      short x, y;
-      ushort field_select; /**< enum pipe_video_field_select */
-      ushort weight;  /**< enum pipe_video_mv_weight  */
-   } top, bottom;
-};
-
-/* bitfields because this is used as a vertex buffer element */
-struct pipe_ycbcr_block
-{
-   ubyte x, y;
-   ubyte intra;  /**< enum pipe_mpeg12_dct_intra */
-   ubyte coding; /**< enum pipe_mpeg12_dct_type */
+   PIPE_MPEG12_FS_FIRST_FORWARD = 0x01,
+   PIPE_MPEG12_FS_FIRST_BACKWARD = 0x02,
+   PIPE_MPEG12_FS_SECOND_FORWARD = 0x04,
+   PIPE_MPEG12_FS_SECOND_BACKWARD = 0x08
 };
 
 struct pipe_picture_desc
@@ -104,6 +111,16 @@
    enum pipe_video_profile profile;
 };
 
+struct pipe_quant_matrix
+{
+   enum pipe_video_codec codec;
+};
+
+struct pipe_macroblock
+{
+   enum pipe_video_codec codec;
+};
+
 struct pipe_mpeg12_picture_desc
 {
    struct pipe_picture_desc base;
@@ -115,9 +132,58 @@
    unsigned alternate_scan;
    unsigned intra_vlc_format;
    unsigned concealment_motion_vectors;
+   unsigned intra_dc_precision;
    unsigned f_code[2][2];
 };
 
+struct pipe_mpeg12_quant_matrix
+{
+   struct pipe_quant_matrix base;
+
+   const uint8_t *intra_matrix;
+   const uint8_t *non_intra_matrix;
+};
+
+struct pipe_mpeg12_macroblock
+{
+   struct pipe_macroblock base;
+
+   /* see section 6.3.17 in the spec */
+   unsigned short x, y;
+
+   /* see section 6.3.17.1 in the spec */
+   unsigned char macroblock_type;
+
+   union {
+      struct {
+         /* see table 6-17 in the spec */
+         unsigned int frame_motion_type:2;
+
+         /* see table 6-18 in the spec */
+         unsigned int field_motion_type:2;
+
+         /* see table 6-19 in the spec */
+         unsigned int dct_type:1;
+      } bits;
+      unsigned int value;
+   } macroblock_modes;
+
+    /* see section 6.3.17.2 in the spec */
+   unsigned char motion_vertical_field_select;
+
+   /* see Table 7-7 in the spec */
+   short PMV[2][2][2];
+
+   /* see figure 6.10-12 in the spec */
+   unsigned short coded_block_pattern;
+
+   /* see figure 6.10-12 in the spec */
+   short *blocks;
+
+   /* Number of skipped macroblocks after this macroblock */
+   unsigned short num_skipped_macroblocks;
+};
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h b/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h
index 12f2aad..0a31cf1 100644
--- a/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h
+++ b/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h
@@ -352,9 +352,9 @@
 	{
 		for(unsigned i = 0; i < count; ++i)
 		{
-			if(constbufs[i] != constant_buffers[s][i].p)
+			if(constbufs[i] != constant_buffers[s][start + i].p)
 			{
-				constant_buffers[s][i] = constbufs[i];
+				constant_buffers[s][start + i] = constbufs[i];
 				if(s < caps.stages && start + i < caps.constant_buffers[s])
 					pipe->set_constant_buffer(pipe, s, start + i, constbufs[i] ? constbufs[i]->resource : NULL);
 			}
@@ -391,11 +391,12 @@
 			{
 				samplers[s][start + i] = samps[i];
 				sampler_csos[s].v[start + i] = samps[i] ? samps[i]->object : default_sampler;
+				last_different = i;
 			}
 			if(last_different >= 0)
 			{
 				num_samplers[s] = std::max(num_samplers[s], start + last_different + 1);
-				update_flags |= (UPDATE_SAMPLERS_SHIFT + s);
+				update_flags |= 1 << (UPDATE_SAMPLERS_SHIFT + s);
 			}
 		}
 	}
@@ -1726,9 +1727,26 @@
 		SYNCHRONIZED;
 		GalliumD3D11Resource<>* dst = (GalliumD3D11Resource<>*)dst_resource;
 		GalliumD3D11Resource<>* src = (GalliumD3D11Resource<>*)src_resource;
-		unsigned dst_layer = d3d11_subresource_to_face(dst->resource, dst_subresource);
-		unsigned src_layer = d3d11_subresource_to_face(src->resource, src_subresource);
-		pipe->resource_resolve(pipe, dst->resource, dst_layer, src->resource, src_layer);
+		struct pipe_resolve_info info;
+
+		info.dst.res = dst->resource;
+		info.src.res = src->resource;
+		info.dst.level = 0;
+		info.dst.layer = d3d11_subresource_to_face(dst->resource, dst_subresource);
+		info.src.layer = d3d11_subresource_to_face(src->resource, src_subresource);
+
+		info.src.x0 = 0;
+		info.src.x1 = info.src.res->width0;
+		info.src.y0 = 0;
+		info.src.y1 = info.src.res->height0;
+		info.dst.x0 = 0;
+		info.dst.x1 = info.dst.res->width0;
+		info.dst.y0 = 0;
+		info.dst.y1 = info.dst.res->height0;
+
+		info.mask = PIPE_MASK_RGBA | PIPE_MASK_ZS;
+
+		pipe->resource_resolve(pipe, &info);
 	}
 
 #if API >= 11
diff --git a/src/gallium/state_trackers/dri/common/dri_context.c b/src/gallium/state_trackers/dri/common/dri_context.c
index e6612b1..bc8dacb 100644
--- a/src/gallium/state_trackers/dri/common/dri_context.c
+++ b/src/gallium/state_trackers/dri/common/dri_context.c
@@ -48,6 +48,16 @@
    driInitExtensions(st->ctx, NULL, GL_FALSE);
 }
 
+static void
+dri_pp_query(struct dri_context *ctx)
+{
+   unsigned int i;
+
+   for (i = 0; i < PP_FILTERS; i++) {
+      ctx->pp_enabled[i] = driQueryOptioni(&ctx->optionCache, pp_filters[i].name);
+   }
+}
+
 GLboolean
 dri_create_context(gl_api api, const struct gl_config * visual,
 		   __DRIcontext * cPriv, void *sharedContextPrivate)
@@ -105,6 +115,11 @@
    if (api == API_OPENGL)
       dri_init_extensions(ctx);
 
+   // Context successfully created. See if post-processing is requested.
+   dri_pp_query(ctx);
+
+   ctx->pp = pp_init(screen->base.screen, ctx->pp_enabled);
+
    return GL_TRUE;
 
  fail:
@@ -134,6 +149,8 @@
    ctx->st->flush(ctx->st, 0, NULL);
    ctx->st->destroy(ctx->st);
 
+   if (ctx->pp) pp_free(ctx->pp);
+
    FREE(ctx);
 }
 
@@ -187,6 +204,13 @@
 
    ctx->stapi->make_current(ctx->stapi, ctx->st, &draw->base, &read->base);
 
+   // This is ok to call here. If they are already init, it's a no-op.
+   if (draw->textures[ST_ATTACHMENT_BACK_LEFT] && draw->textures[ST_ATTACHMENT_DEPTH_STENCIL]
+      && ctx->pp)
+         pp_init_fbos(ctx->pp, draw->textures[ST_ATTACHMENT_BACK_LEFT]->width0,
+            draw->textures[ST_ATTACHMENT_BACK_LEFT]->height0,
+            draw->textures[ST_ATTACHMENT_DEPTH_STENCIL]);
+
    return GL_TRUE;
 }
 
diff --git a/src/gallium/state_trackers/dri/common/dri_context.h b/src/gallium/state_trackers/dri/common/dri_context.h
index 35105e8..cfc8e334 100644
--- a/src/gallium/state_trackers/dri/common/dri_context.h
+++ b/src/gallium/state_trackers/dri/common/dri_context.h
@@ -34,6 +34,7 @@
 
 #include "pipe/p_compiler.h"
 #include "dri_wrapper.h"
+#include "postprocess/filters.h"
 
 struct pipe_context;
 struct pipe_fence;
@@ -61,6 +62,8 @@
    /* gallium */
    struct st_api *stapi;
    struct st_context_iface *st;
+   struct pp_queue_t *pp;
+   unsigned int pp_enabled[PP_FILTERS];
 };
 
 static INLINE struct dri_context *
diff --git a/src/gallium/state_trackers/dri/common/dri_screen.c b/src/gallium/state_trackers/dri/common/dri_screen.c
index 5931df9..dcb6fdf 100644
--- a/src/gallium/state_trackers/dri/common/dri_screen.c
+++ b/src/gallium/state_trackers/dri/common/dri_screen.c
@@ -42,15 +42,25 @@
 #include "util/u_debug.h"
 
 PUBLIC const char __driConfigOptions[] =
-   DRI_CONF_BEGIN DRI_CONF_SECTION_PERFORMANCE
-   DRI_CONF_FTHROTTLE_MODE(DRI_CONF_FTHROTTLE_IRQS)
-   DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0)
-   DRI_CONF_SECTION_END DRI_CONF_SECTION_QUALITY
-/* DRI_CONF_FORCE_S3TC_ENABLE(false) */
-   DRI_CONF_ALLOW_LARGE_TEXTURES(1)
-   DRI_CONF_SECTION_END DRI_CONF_END;
+   DRI_CONF_BEGIN
+      DRI_CONF_SECTION_PERFORMANCE
+         DRI_CONF_FTHROTTLE_MODE(DRI_CONF_FTHROTTLE_IRQS)
+         DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0)
+      DRI_CONF_SECTION_END
 
-static const uint __driNConfigOptions = 3;
+      DRI_CONF_SECTION_QUALITY
+/*       DRI_CONF_FORCE_S3TC_ENABLE(false) */
+         DRI_CONF_ALLOW_LARGE_TEXTURES(1)
+         DRI_CONF_PP_CELSHADE(0)
+         DRI_CONF_PP_NORED(0)
+         DRI_CONF_PP_NOGREEN(0)
+         DRI_CONF_PP_NOBLUE(0)
+         DRI_CONF_PP_JIMENEZMLAA(0, 0, 32)
+         DRI_CONF_PP_JIMENEZMLAA_COLOR(0, 0, 32)
+      DRI_CONF_SECTION_END
+   DRI_CONF_END;
+
+static const uint __driNConfigOptions = 9;
 
 static const __DRIconfig **
 dri_fill_in_modes(struct dri_screen *screen,
diff --git a/src/gallium/state_trackers/dri/drm/dri2.c b/src/gallium/state_trackers/dri/drm/dri2.c
index fe4ddb3..6cf2375 100644
--- a/src/gallium/state_trackers/dri/drm/dri2.c
+++ b/src/gallium/state_trackers/dri/drm/dri2.c
@@ -44,8 +44,19 @@
  * DRI2 flush extension.
  */
 static void
-dri2_flush_drawable(__DRIdrawable *draw)
+dri2_flush_drawable(__DRIdrawable *dPriv)
 {
+   struct dri_context *ctx = dri_get_current(dPriv->driScreenPriv);
+   struct dri_drawable *drawable = dri_drawable(dPriv);
+
+   struct pipe_resource *ptex = drawable->textures[ST_ATTACHMENT_BACK_LEFT];
+
+   if (ctx) {
+      if (ptex && ctx->pp && drawable->textures[ST_ATTACHMENT_DEPTH_STENCIL])
+         pp_run(ctx->pp, ptex, ptex, drawable->textures[ST_ATTACHMENT_DEPTH_STENCIL]);
+
+      ctx->st->flush(ctx->st, 0, NULL);
+   }
 }
 
 static void
@@ -266,7 +277,6 @@
    struct dri_screen *screen = dri_screen(sPriv);
    struct dri2_buffer *buffer;
    struct pipe_resource templ;
-   enum st_attachment_type statt;
    enum pipe_format pf;
    unsigned bind = 0;
    struct winsys_handle whandle;
@@ -274,22 +284,16 @@
    switch (attachment) {
       case __DRI_BUFFER_FRONT_LEFT:
       case __DRI_BUFFER_FAKE_FRONT_LEFT:
-         statt = ST_ATTACHMENT_FRONT_LEFT;
          bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW;
          break;
       case __DRI_BUFFER_BACK_LEFT:
-         statt = ST_ATTACHMENT_BACK_LEFT;
          bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW;
          break;
       case __DRI_BUFFER_DEPTH:
       case __DRI_BUFFER_DEPTH_STENCIL:
       case __DRI_BUFFER_STENCIL:
-            statt = ST_ATTACHMENT_DEPTH_STENCIL;
             bind = PIPE_BIND_DEPTH_STENCIL; /* XXX sampler? */
          break;
-      default:
-         statt = ST_ATTACHMENT_INVALID;
-         break;
    }
 
    switch (format) {
@@ -662,20 +666,6 @@
 }
 
 static boolean
-dri2_create_context(gl_api api, const struct gl_config * visual,
-                    __DRIcontext * cPriv, void *sharedContextPrivate)
-{
-   struct dri_context *ctx = NULL;
-
-   if (!dri_create_context(api, visual, cPriv, sharedContextPrivate))
-      return FALSE;
-
-   ctx = cPriv->driverPrivate;
-
-   return TRUE;
-}
-
-static boolean
 dri2_create_buffer(__DRIscreen * sPriv,
                    __DRIdrawable * dPriv,
                    const struct gl_config * visual, boolean isPixmap)
@@ -702,7 +692,7 @@
    .InitScreen = NULL,
    .InitScreen2 = dri2_init_screen,
    .DestroyScreen = dri_destroy_screen,
-   .CreateContext = dri2_create_context,
+   .CreateContext = dri_create_context,
    .DestroyContext = dri_destroy_context,
    .CreateBuffer = dri2_create_buffer,
    .DestroyBuffer = dri_destroy_buffer,
diff --git a/src/gallium/state_trackers/dri/sw/drisw.c b/src/gallium/state_trackers/dri/sw/drisw.c
index a1879a8..082df55 100644
--- a/src/gallium/state_trackers/dri/sw/drisw.c
+++ b/src/gallium/state_trackers/dri/sw/drisw.c
@@ -136,6 +136,9 @@
    ptex = drawable->textures[ST_ATTACHMENT_BACK_LEFT];
 
    if (ptex) {
+      if (ctx->pp && drawable->textures[ST_ATTACHMENT_DEPTH_STENCIL])
+         pp_run(ctx->pp, ptex, ptex, drawable->textures[ST_ATTACHMENT_DEPTH_STENCIL]);
+
       ctx->st->flush(ctx->st, ST_FLUSH_FRONT, NULL);
 
       drisw_copy_to_front(dPriv, ptex);
diff --git a/src/gallium/state_trackers/egl/Android.mk b/src/gallium/state_trackers/egl/Android.mk
new file mode 100644
index 0000000..e459bd4
--- /dev/null
+++ b/src/gallium/state_trackers/egl/Android.mk
@@ -0,0 +1,54 @@
+# Mesa 3-D graphics library
+#
+# Copyright (C) 2010-2011 Chia-I Wu <olvaffe@gmail.com>
+# Copyright (C) 2010-2011 LunarG Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+LOCAL_PATH := $(call my-dir)
+
+common_SOURCES := \
+	common/egl_g3d.c \
+	common/egl_g3d_api.c \
+	common/egl_g3d_image.c \
+	common/egl_g3d_st.c \
+	common/egl_g3d_sync.c \
+	common/native_helper.c
+
+android_SOURCES := \
+	android/native_android.cpp
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := \
+	$(common_SOURCES) \
+	$(android_SOURCES)
+
+LOCAL_CFLAGS := -DHAVE_ANDROID_BACKEND
+
+LOCAL_C_INCLUDES := \
+	$(GALLIUM_TOP)/state_trackers/egl \
+	$(GALLIUM_TOP)/winsys/sw \
+	$(MESA_TOP)/src/egl/main \
+	$(DRM_GRALLOC_TOP)
+
+LOCAL_MODULE := libmesa_st_egl
+
+include $(GALLIUM_COMMON_MK)
+include $(BUILD_STATIC_LIBRARY)
diff --git a/src/gallium/state_trackers/egl/android/native_android.cpp b/src/gallium/state_trackers/egl/android/native_android.cpp
new file mode 100644
index 0000000..211d6a2
--- /dev/null
+++ b/src/gallium/state_trackers/egl/android/native_android.cpp
@@ -0,0 +1,835 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  7.12
+ *
+ * Copyright (C) 2010-2011 Chia-I Wu <olvaffe@gmail.com>
+ * Copyright (C) 2010-2011 LunarG Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#define LOG_TAG "EGL-GALLIUM"
+#include <cutils/log.h>
+#include <cutils/properties.h>
+#include <hardware/gralloc.h>
+#include <utils/Errors.h>
+#include <ui/android_native_buffer.h>
+
+extern "C" {
+#include "egllog.h"
+}
+
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+#include "util/u_box.h"
+#include "common/native.h"
+#include "common/native_helper.h"
+#include "android/android_sw_winsys.h"
+#include "state_tracker/drm_driver.h"
+
+struct android_config;
+
+struct android_display {
+   struct native_display base;
+
+   boolean use_drm;
+   const struct native_event_handler *event_handler;
+   struct android_config *configs;
+   int num_configs;
+};
+
+struct android_surface {
+   struct native_surface base;
+
+   struct android_display *adpy;
+   android_native_window_t *win;
+
+   /* staging color buffer for when buffer preserving is enabled */
+   struct pipe_resource *color_res;
+
+   uint stamp;
+   android_native_buffer_t *buf;
+   struct pipe_resource *buf_res;
+
+   /* cache the current back buffers */
+   struct {
+      int width;
+      int height;
+      int format;
+   } cache_key;
+   void *cache_handles[2];
+   struct pipe_resource *cache_resources[2];
+};
+
+struct android_config {
+   struct native_config base;
+};
+
+static INLINE struct android_display *
+android_display(const struct native_display *ndpy)
+{
+   return (struct android_display *) ndpy;
+}
+
+static INLINE struct android_surface *
+android_surface(const struct native_surface *nsurf)
+{
+   return (struct android_surface *) nsurf;
+}
+
+static INLINE struct android_config *
+android_config(const struct native_config *nconf)
+{
+   return (struct android_config *) nconf;
+}
+
+namespace android {
+
+static enum pipe_format
+get_pipe_format(int native)
+{
+   enum pipe_format fmt;
+
+   switch (native) {
+   case HAL_PIXEL_FORMAT_RGBA_8888:
+      fmt = PIPE_FORMAT_R8G8B8A8_UNORM;
+      break;
+   case HAL_PIXEL_FORMAT_RGBX_8888:
+      fmt = PIPE_FORMAT_R8G8B8X8_UNORM;
+      break;
+   case HAL_PIXEL_FORMAT_RGB_888:
+      fmt = PIPE_FORMAT_R8G8B8_UNORM;
+      break;
+   case HAL_PIXEL_FORMAT_RGB_565:
+      fmt = PIPE_FORMAT_B5G6R5_UNORM;
+      break;
+   case HAL_PIXEL_FORMAT_BGRA_8888:
+      fmt = PIPE_FORMAT_B8G8R8A8_UNORM;
+      break;
+   case HAL_PIXEL_FORMAT_RGBA_5551:
+      /* fmt = PIPE_FORMAT_A1B5G5R5_UNORM; */
+   case HAL_PIXEL_FORMAT_RGBA_4444:
+      /* fmt = PIPE_FORMAT_A4B4G4R4_UNORM; */
+   default:
+      LOGE("unsupported native format 0x%x", native);
+      fmt = PIPE_FORMAT_NONE;
+      break;
+   }
+
+   return fmt;
+}
+
+#include <gralloc_drm_handle.h>
+static int
+get_handle_name(buffer_handle_t handle)
+{
+   struct gralloc_drm_handle_t *dh;
+
+   /* check that the buffer is allocated by drm_gralloc and cast */
+   dh = gralloc_drm_handle(handle);
+
+   return (dh) ? dh->name : 0;
+}
+
+/**
+ * Import an android_native_buffer_t allocated by the server.
+ */
+static struct pipe_resource *
+import_buffer(struct android_display *adpy, const struct pipe_resource *templ,
+              struct android_native_buffer_t *abuf)
+{
+   struct pipe_screen *screen = adpy->base.screen;
+   struct pipe_resource *res;
+
+   if (templ->bind & PIPE_BIND_RENDER_TARGET) {
+      if (!screen->is_format_supported(screen, templ->format,
+               templ->target, 0, PIPE_BIND_RENDER_TARGET))
+         LOGW("importing unsupported buffer as render target");
+   }
+   if (templ->bind & PIPE_BIND_SAMPLER_VIEW) {
+      if (!screen->is_format_supported(screen, templ->format,
+               templ->target, 0, PIPE_BIND_SAMPLER_VIEW))
+         LOGW("importing unsupported buffer as sampler view");
+   }
+
+   if (adpy->use_drm) {
+      struct winsys_handle handle;
+
+      memset(&handle, 0, sizeof(handle));
+      handle.type = DRM_API_HANDLE_TYPE_SHARED;
+      /* for DRM, we need the GEM name */
+      handle.handle = get_handle_name(abuf->handle);
+      if (!handle.handle) {
+         LOGE("unable to import invalid buffer %p", abuf);
+         return NULL;
+      }
+
+      handle.stride =
+         abuf->stride * util_format_get_blocksize(templ->format);
+
+      res = screen->resource_from_handle(screen, templ, &handle);
+   }
+   else {
+      struct android_winsys_handle handle;
+
+      memset(&handle, 0, sizeof(handle));
+      handle.handle = abuf->handle;
+      handle.stride =
+         abuf->stride * util_format_get_blocksize(templ->format);
+
+      res = screen->resource_from_handle(screen,
+            templ, (struct winsys_handle *) &handle);
+   }
+
+   if (!res)
+      LOGE("failed to import buffer %p", abuf);
+
+   return res;
+}
+
+static void
+android_surface_clear_cache(struct native_surface *nsurf)
+{
+   struct android_surface *asurf = android_surface(nsurf);
+   int i;
+
+   for (i = 0; i < Elements(asurf->cache_handles); i++) {
+      asurf->cache_handles[i] = NULL;
+      pipe_resource_reference(&asurf->cache_resources[i], NULL);
+   }
+
+   memset(&asurf->cache_key, 0, sizeof(asurf->cache_key));
+}
+
+static struct pipe_resource *
+android_surface_add_cache(struct native_surface *nsurf,
+                          struct android_native_buffer_t *abuf)
+{
+   struct android_surface *asurf = android_surface(nsurf);
+   void *handle;
+   int idx;
+
+   /* how about abuf->usage? */
+   if (asurf->cache_key.width != abuf->width ||
+       asurf->cache_key.height != abuf->height ||
+       asurf->cache_key.format != abuf->format)
+      android_surface_clear_cache(&asurf->base);
+
+   if (asurf->adpy->use_drm)
+      handle = (void *) get_handle_name(abuf->handle);
+   else
+      handle = (void *) abuf->handle;
+   /* NULL is invalid */
+   if (!handle) {
+      LOGE("invalid buffer native buffer %p", abuf);
+      return NULL;
+   }
+
+   /* find the slot to use */
+   for (idx = 0; idx < Elements(asurf->cache_handles); idx++) {
+      if (asurf->cache_handles[idx] == handle || !asurf->cache_handles[idx])
+         break;
+   }
+   if (idx == Elements(asurf->cache_handles)) {
+      LOGW("cache full: buf %p, width %d, height %d, format %d, usage 0x%x",
+            abuf, abuf->width, abuf->height, abuf->format, abuf->usage);
+      android_surface_clear_cache(&asurf->base);
+      idx = 0;
+   }
+
+   if (idx == 0) {
+      asurf->cache_key.width = abuf->width;
+      asurf->cache_key.height = abuf->height;
+      asurf->cache_key.format = abuf->format;
+   }
+
+   if (!asurf->cache_handles[idx]) {
+      struct pipe_resource templ;
+
+      assert(!asurf->cache_resources[idx]);
+
+      memset(&templ, 0, sizeof(templ));
+      templ.target = PIPE_TEXTURE_2D;
+      templ.format = get_pipe_format(asurf->buf->format);
+      templ.bind = PIPE_BIND_RENDER_TARGET;
+      if (!asurf->adpy->use_drm) {
+         templ.bind |= PIPE_BIND_TRANSFER_WRITE |
+                       PIPE_BIND_TRANSFER_READ;
+      }
+
+      templ.width0 = asurf->buf->width;
+      templ.height0 = asurf->buf->height;
+      templ.depth0 = 1;
+      templ.array_size = 1;
+
+      if (templ.format != PIPE_FORMAT_NONE) {
+         asurf->cache_resources[idx] =
+            import_buffer(asurf->adpy, &templ, asurf->buf);
+      }
+      else {
+         asurf->cache_resources[idx] = NULL;
+      }
+
+      asurf->cache_handles[idx] = handle;
+   }
+
+   return asurf->cache_resources[idx];
+}
+
+/**
+ * Dequeue the next back buffer for rendering.
+ */
+static boolean
+android_surface_dequeue_buffer(struct native_surface *nsurf)
+{
+   struct android_surface *asurf = android_surface(nsurf);
+   struct pipe_resource *res;
+
+   if (asurf->win->dequeueBuffer(asurf->win, &asurf->buf) != NO_ERROR) {
+      LOGE("failed to dequeue window %p", asurf->win);
+      return FALSE;
+   }
+
+   asurf->buf->common.incRef(&asurf->buf->common);
+   asurf->win->lockBuffer(asurf->win, asurf->buf);
+
+   res = android_surface_add_cache(&asurf->base, asurf->buf);
+   if (!res)
+      return FALSE;
+
+   pipe_resource_reference(&asurf->buf_res, res);
+
+   return TRUE;
+}
+
+/**
+ * Enqueue the back buffer.  This will make it the next front buffer.
+ */
+static boolean
+android_surface_enqueue_buffer(struct native_surface *nsurf)
+{
+   struct android_surface *asurf = android_surface(nsurf);
+
+   pipe_resource_reference(&asurf->buf_res, NULL);
+
+   asurf->win->queueBuffer(asurf->win, asurf->buf);
+
+   asurf->buf->common.decRef(&asurf->buf->common);
+   asurf->buf = NULL;
+
+   return TRUE;
+}
+
+static boolean
+android_surface_swap_buffers(struct native_surface *nsurf)
+{
+   struct android_surface *asurf = android_surface(nsurf);
+   struct android_display *adpy = asurf->adpy;
+
+   if (!asurf->buf)
+      return TRUE;
+
+   android_surface_enqueue_buffer(&asurf->base);
+
+   asurf->stamp++;
+   adpy->event_handler->invalid_surface(&adpy->base,
+         &asurf->base, asurf->stamp);
+
+   return TRUE;
+}
+
+static void
+copy_resources(struct native_display *ndpy,
+               struct pipe_resource *src,
+               struct pipe_resource *dst)
+{
+   struct pipe_context *pipe;
+   struct pipe_box box;
+
+   pipe = ndpy_get_copy_context(ndpy);
+   if (!pipe)
+      return;
+
+   u_box_origin_2d(src->width0, src->height0, &box);
+   pipe->resource_copy_region(pipe, dst, 0, 0, 0, 0, src, 0, &box);
+   pipe->flush(pipe, NULL);
+}
+
+static boolean
+android_surface_present(struct native_surface *nsurf,
+                        enum native_attachment natt,
+                        boolean preserve,
+                        uint swap_interval)
+{
+   struct android_surface *asurf = android_surface(nsurf);
+   struct android_display *adpy = asurf->adpy;
+   boolean ret;
+
+   if (swap_interval || natt != NATIVE_ATTACHMENT_BACK_LEFT)
+      return FALSE;
+
+   /* we always render to color_res first when it exists */
+   if (asurf->color_res) {
+      copy_resources(&adpy->base, asurf->color_res, asurf->buf_res);
+      if (!preserve)
+         pipe_resource_reference(&asurf->color_res, NULL);
+   }
+   else if (preserve) {
+      struct pipe_resource templ;
+
+      memset(&templ, 0, sizeof(templ));
+      templ.target = asurf->buf_res->target;
+      templ.format = asurf->buf_res->format;
+      templ.bind = PIPE_BIND_RENDER_TARGET;
+      templ.width0 = asurf->buf_res->width0;
+      templ.height0 = asurf->buf_res->height0;
+      templ.depth0 = asurf->buf_res->depth0;
+      templ.array_size = asurf->buf_res->array_size;
+
+      asurf->color_res =
+         adpy->base.screen->resource_create(adpy->base.screen, &templ);
+      if (!asurf->color_res)
+         return FALSE;
+
+      /* preserve the contents */
+      copy_resources(&adpy->base, asurf->buf_res, asurf->color_res);
+   }
+
+   return android_surface_swap_buffers(nsurf);
+}
+
+static boolean
+android_surface_validate(struct native_surface *nsurf, uint attachment_mask,
+                         unsigned int *seq_num, struct pipe_resource **textures,
+                         int *width, int *height)
+{
+   struct android_surface *asurf = android_surface(nsurf);
+   struct winsys_handle handle;
+
+   if (!asurf->buf) {
+      if (!android_surface_dequeue_buffer(&asurf->base))
+         return FALSE;
+
+      /* color_res must be compatible with buf_res */
+      if (asurf->color_res &&
+          (asurf->color_res->format != asurf->buf_res->format ||
+           asurf->color_res->width0 != asurf->buf_res->width0 ||
+           asurf->color_res->height0 != asurf->buf_res->height0))
+         pipe_resource_reference(&asurf->color_res, NULL);
+   }
+
+   if (textures) {
+      /* we have access to only the back buffer */
+      const enum native_attachment att = NATIVE_ATTACHMENT_BACK_LEFT;
+
+      if (native_attachment_mask_test(attachment_mask, att)) {
+         textures[att] = NULL;
+         pipe_resource_reference(&textures[att],
+               (asurf->color_res) ? asurf->color_res : asurf->buf_res);
+      }
+   }
+
+   if (seq_num)
+      *seq_num = asurf->stamp;
+   if (width)
+      *width = asurf->buf->width;
+   if (height)
+      *height = asurf->buf->height;
+
+   return TRUE;
+}
+
+static void
+android_surface_wait(struct native_surface *nsurf)
+{
+}
+
+static void
+android_surface_destroy(struct native_surface *nsurf)
+{
+   struct android_surface *asurf = android_surface(nsurf);
+   int i;
+
+   pipe_resource_reference(&asurf->color_res, NULL);
+
+   if (asurf->buf)
+      android_surface_enqueue_buffer(&asurf->base);
+
+   android_surface_clear_cache(&asurf->base);
+
+   asurf->win->common.decRef(&asurf->win->common);
+
+   FREE(asurf);
+}
+
+static struct native_surface *
+android_display_create_window_surface(struct native_display *ndpy,
+                                      EGLNativeWindowType win,
+                                      const struct native_config *nconf)
+{
+   struct android_display *adpy = android_display(ndpy);
+   struct android_config *aconf = android_config(nconf);
+   struct android_surface *asurf;
+   enum pipe_format format;
+   int val;
+
+   if (win->common.magic != ANDROID_NATIVE_WINDOW_MAGIC) {
+      LOGE("invalid native window with magic 0x%x", win->common.magic);
+      return NULL;
+   }
+   if (win->query(win, NATIVE_WINDOW_FORMAT, &val)) {
+      LOGE("failed to query native window format");
+      return NULL;
+   }
+   format = get_pipe_format(val);
+   if (format != nconf->color_format) {
+      LOGW("native window format 0x%x != config format 0x%x",
+            format, nconf->color_format);
+      if (!adpy->base.screen->is_format_supported(adpy->base.screen,
+               format, PIPE_TEXTURE_2D, 0, PIPE_BIND_RENDER_TARGET)) {
+         LOGE("and the native window cannot be used as a render target");
+         return NULL;
+      }
+   }
+
+   asurf = CALLOC_STRUCT(android_surface);
+   if (!asurf)
+      return NULL;
+
+   asurf->adpy = adpy;
+   asurf->win = win;
+   asurf->win->common.incRef(&asurf->win->common);
+
+   /* request buffers that are for CPU access */
+   if (!adpy->use_drm) {
+      native_window_set_usage(asurf->win,
+            GRALLOC_USAGE_SW_READ_OFTEN | GRALLOC_USAGE_SW_WRITE_OFTEN);
+   }
+
+   asurf->base.destroy = android_surface_destroy;
+   asurf->base.present = android_surface_present;
+   asurf->base.validate = android_surface_validate;
+   asurf->base.wait = android_surface_wait;
+
+   return &asurf->base;
+}
+
+static boolean
+android_display_init_configs(struct native_display *ndpy)
+{
+   struct android_display *adpy = android_display(ndpy);
+   const int native_formats[] = {
+      HAL_PIXEL_FORMAT_RGBA_8888,
+      HAL_PIXEL_FORMAT_RGBX_8888,
+      HAL_PIXEL_FORMAT_RGB_888,
+      HAL_PIXEL_FORMAT_RGB_565,
+      HAL_PIXEL_FORMAT_BGRA_8888,
+   };
+   int i;
+
+   adpy->configs = (struct android_config *)
+      CALLOC(Elements(native_formats), sizeof(*adpy->configs));
+   if (!adpy->configs)
+      return FALSE;
+
+   for (i = 0; i < Elements(native_formats); i++) {
+      enum pipe_format color_format;
+      struct android_config *aconf;
+
+      color_format = get_pipe_format(native_formats[i]);
+      if (color_format == PIPE_FORMAT_NONE ||
+          !adpy->base.screen->is_format_supported(adpy->base.screen,
+               color_format, PIPE_TEXTURE_2D, 0, PIPE_BIND_RENDER_TARGET)) {
+         LOGI("skip unsupported native format 0x%x", native_formats[i]);
+         continue;
+      }
+
+      aconf = &adpy->configs[adpy->num_configs++];
+      /* only the back buffer */
+      aconf->base.buffer_mask = 1 << NATIVE_ATTACHMENT_BACK_LEFT;
+      aconf->base.color_format = color_format;
+      aconf->base.window_bit = TRUE;
+
+      aconf->base.native_visual_id = native_formats[i];
+      aconf->base.native_visual_type = native_formats[i];
+   }
+
+   return TRUE;
+}
+
+static boolean
+android_display_init_drm(struct native_display *ndpy)
+{
+   struct android_display *adpy = android_display(ndpy);
+   const hw_module_t *mod;
+   int fd, err;
+
+   /* get the authorized fd from gralloc */
+   err = hw_get_module(GRALLOC_HARDWARE_MODULE_ID, &mod);
+   if (!err) {
+      const gralloc_module_t *gr = (gralloc_module_t *) mod;
+
+      err = -EINVAL;
+      if (gr->perform)
+         err = gr->perform(gr, GRALLOC_MODULE_PERFORM_GET_DRM_FD, &fd);
+   }
+   if (!err && fd >= 0) {
+      adpy->base.screen =
+         adpy->event_handler->new_drm_screen(&adpy->base, NULL, fd);
+   }
+
+   if (adpy->base.screen) {
+      LOGI("using DRM screen");
+      return TRUE;
+   }
+   else {
+      LOGW("failed to create DRM screen");
+      LOGW("will fall back to other EGL drivers if any");
+      return FALSE;
+   }
+}
+
+static boolean
+android_display_init_sw(struct native_display *ndpy)
+{
+   struct android_display *adpy = android_display(ndpy);
+   struct sw_winsys *ws;
+
+   ws = android_create_sw_winsys();
+   if (ws) {
+      adpy->base.screen =
+         adpy->event_handler->new_sw_screen(&adpy->base, ws);
+   }
+
+   if (adpy->base.screen) {
+      LOGI("using SW screen");
+      return TRUE;
+   }
+   else {
+      LOGE("failed to create SW screen");
+      return FALSE;
+   }
+}
+
+static boolean
+android_display_init_screen(struct native_display *ndpy)
+{
+   struct android_display *adpy = android_display(ndpy);
+
+   if (adpy->use_drm)
+      android_display_init_drm(&adpy->base);
+   else
+      android_display_init_sw(&adpy->base);
+
+   if (!adpy->base.screen)
+      return FALSE;
+
+   if (!android_display_init_configs(&adpy->base)) {
+      adpy->base.screen->destroy(adpy->base.screen);
+      adpy->base.screen = NULL;
+      return FALSE;
+   }
+
+   return TRUE;
+}
+
+static void
+android_display_destroy(struct native_display *ndpy)
+{
+   struct android_display *adpy = android_display(ndpy);
+
+   FREE(adpy->configs);
+   if (adpy->base.screen)
+      adpy->base.screen->destroy(adpy->base.screen);
+   FREE(adpy);
+}
+
+static const struct native_config **
+android_display_get_configs(struct native_display *ndpy, int *num_configs)
+{
+   struct android_display *adpy = android_display(ndpy);
+   const struct native_config **configs;
+   int i;
+
+   configs = (const struct native_config **)
+      MALLOC(adpy->num_configs * sizeof(*configs));
+   if (configs) {
+      for (i = 0; i < adpy->num_configs; i++)
+         configs[i] = (const struct native_config *) &adpy->configs[i];
+      if (num_configs)
+         *num_configs = adpy->num_configs;
+   }
+
+   return configs;
+}
+
+static int
+android_display_get_param(struct native_display *ndpy,
+                          enum native_param_type param)
+{
+   int val;
+
+   switch (param) {
+   case NATIVE_PARAM_PRESERVE_BUFFER:
+      val = 1;
+      break;
+   default:
+      val = 0;
+      break;
+   }
+
+   return val;
+}
+
+static struct pipe_resource *
+android_display_import_buffer(struct native_display *ndpy,
+                              struct native_buffer *nbuf)
+{
+   struct android_display *adpy = android_display(ndpy);
+   struct android_native_buffer_t *abuf;
+   enum pipe_format format;
+   struct pipe_resource templ;
+
+   if (nbuf->type != NATIVE_BUFFER_ANDROID)
+      return NULL;
+
+   abuf = nbuf->u.android;
+
+   if (!abuf || abuf->common.magic != ANDROID_NATIVE_BUFFER_MAGIC ||
+       abuf->common.version != sizeof(*abuf)) {
+      LOGE("invalid android native buffer");
+      return NULL;
+   }
+
+   format = get_pipe_format(abuf->format);
+   if (format == PIPE_FORMAT_NONE)
+      return NULL;
+
+   memset(&templ, 0, sizeof(templ));
+   templ.target = PIPE_TEXTURE_2D;
+   templ.format = format;
+   /* assume for texturing only */
+   templ.bind = PIPE_BIND_SAMPLER_VIEW;
+   templ.width0 = abuf->width;
+   templ.height0 = abuf->height;
+   templ.depth0 = 1;
+   templ.array_size = 1;
+
+   return import_buffer(adpy, &templ, abuf);
+}
+
+static boolean
+android_display_export_buffer(struct native_display *ndpy,
+                              struct pipe_resource *res,
+                              struct native_buffer *nbuf)
+{
+   return FALSE;
+}
+
+static struct native_display_buffer android_display_buffer = {
+   android_display_import_buffer,
+   android_display_export_buffer
+};
+
+static struct android_display *
+android_display_create(const struct native_event_handler *event_handler,
+                       boolean use_sw)
+{
+   struct android_display *adpy;
+   char value[PROPERTY_VALUE_MAX];
+   boolean force_sw;
+
+   /* check if SW renderer is forced */
+   if (property_get("debug.mesa.software", value, NULL))
+      force_sw = (atoi(value) != 0);
+   else
+      force_sw = debug_get_bool_option("EGL_SOFTWARE", FALSE);
+   if (force_sw)
+      use_sw = TRUE;
+
+   adpy = CALLOC_STRUCT(android_display);
+   if (!adpy)
+      return NULL;
+
+   adpy->event_handler = event_handler;
+   adpy->use_drm = !use_sw;
+
+   adpy->base.init_screen = android_display_init_screen;
+   adpy->base.destroy = android_display_destroy;
+   adpy->base.get_param = android_display_get_param;
+   adpy->base.get_configs = android_display_get_configs;
+   adpy->base.create_window_surface = android_display_create_window_surface;
+
+   adpy->base.buffer = &android_display_buffer;
+
+   return adpy;
+}
+
+static const struct native_event_handler *android_event_handler;
+
+static struct native_display *
+native_create_display(void *dpy, boolean use_sw)
+{
+   struct android_display *adpy;
+
+   adpy = android_display_create(android_event_handler, use_sw);
+
+   return (adpy) ? &adpy->base : NULL;
+}
+
+static const struct native_platform android_platform = {
+   "Android", /* name */
+   native_create_display
+};
+
+}; /* namespace android */
+
+using namespace android;
+
+static void
+android_log(EGLint level, const char *msg)
+{
+   switch (level) {
+   case _EGL_DEBUG:
+      LOGD("%s", msg);
+      break;
+   case _EGL_INFO:
+      LOGI("%s", msg);
+      break;
+   case _EGL_WARNING:
+      LOGW("%s", msg);
+      break;
+   case _EGL_FATAL:
+      LOG_FATAL("%s", msg);
+      break;
+   default:
+      break;
+   }
+}
+
+const struct native_platform *
+native_get_android_platform(const struct native_event_handler *event_handler)
+{
+   android_event_handler = event_handler;
+   /* use Android logger */
+   _eglSetLogProc(android_log);
+
+   return &android_platform;
+}
diff --git a/src/gallium/state_trackers/egl/common/egl_g3d.c b/src/gallium/state_trackers/egl/common/egl_g3d.c
index 6649f02..b5e3d99 100644
--- a/src/gallium/state_trackers/egl/common/egl_g3d.c
+++ b/src/gallium/state_trackers/egl/common/egl_g3d.c
@@ -132,6 +132,12 @@
          nplat = native_get_fbdev_platform(&egl_g3d_native_event_handler);
 #endif
          break;
+      case _EGL_PLATFORM_ANDROID:
+         plat_name = "Android";
+#ifdef HAVE_ANDROID_BACKEND
+         nplat = native_get_android_platform(&egl_g3d_native_event_handler);
+#endif
+         break;
       default:
          break;
       }
@@ -572,6 +578,11 @@
    if (dpy->Platform == _EGL_PLATFORM_WAYLAND && gdpy->native->buffer)
       dpy->Extensions.MESA_drm_image = EGL_TRUE;
 
+#ifdef EGL_ANDROID_image_native_buffer
+   if (dpy->Platform == _EGL_PLATFORM_ANDROID && gdpy->native->buffer)
+      dpy->Extensions.ANDROID_image_native_buffer = EGL_TRUE;
+#endif
+
 #ifdef EGL_WL_bind_wayland_display
    if (gdpy->native->wayland_bufmgr)
       dpy->Extensions.WL_bind_wayland_display = EGL_TRUE;
diff --git a/src/gallium/state_trackers/egl/common/egl_g3d_image.c b/src/gallium/state_trackers/egl/common/egl_g3d_image.c
index 7e9a29b..4d90c40 100644
--- a/src/gallium/state_trackers/egl/common/egl_g3d_image.c
+++ b/src/gallium/state_trackers/egl/common/egl_g3d_image.c
@@ -202,6 +202,24 @@
 
 #endif /* EGL_WL_bind_wayland_display */
 
+#ifdef EGL_ANDROID_image_native_buffer
+
+static struct pipe_resource *
+egl_g3d_reference_android_native_buffer(_EGLDisplay *dpy,
+                                        struct android_native_buffer_t *buf)
+{
+   struct egl_g3d_display *gdpy = egl_g3d_display(dpy);
+   struct native_buffer nbuf;
+
+   memset(&nbuf, 0, sizeof(nbuf));
+   nbuf.type = NATIVE_BUFFER_ANDROID;
+   nbuf.u.android = buf;
+    
+   return gdpy->native->buffer->import_buffer(gdpy->native, &nbuf);
+}
+
+#endif /* EGL_ANDROID_image_native_buffer */
+
 _EGLImage *
 egl_g3d_create_image(_EGLDriver *drv, _EGLDisplay *dpy, _EGLContext *ctx,
                      EGLenum target, EGLClientBuffer buffer,
@@ -239,6 +257,12 @@
             (struct wl_buffer *) buffer, &gimg->base, attribs);
       break;
 #endif
+#ifdef EGL_ANDROID_image_native_buffer
+   case EGL_NATIVE_BUFFER_ANDROID:
+      ptex = egl_g3d_reference_android_native_buffer(dpy,
+            (struct android_native_buffer_t *) buffer);
+      break;
+#endif
    default:
       ptex = NULL;
       break;
diff --git a/src/gallium/state_trackers/egl/common/egl_g3d_st.c b/src/gallium/state_trackers/egl/common/egl_g3d_st.c
index 60c3e33..b839f84 100644
--- a/src/gallium/state_trackers/egl/common/egl_g3d_st.c
+++ b/src/gallium/state_trackers/egl/common/egl_g3d_st.c
@@ -126,7 +126,7 @@
 }
 
 static void
-pbuffer_allocate_render_texture(struct egl_g3d_surface *gsurf)
+pbuffer_allocate_pbuffer_texture(struct egl_g3d_surface *gsurf)
 {
    struct egl_g3d_display *gdpy =
       egl_g3d_display(gsurf->base.Resource.Display);
@@ -141,7 +141,8 @@
    templ.depth0 = 1;
    templ.array_size = 1;
    templ.format = gsurf->stvis.color_format;
-   templ.bind = PIPE_BIND_RENDER_TARGET;
+   /* for rendering and binding to texture */
+   templ.bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW;
 
    ptex = screen->resource_create(screen, &templ);
    gsurf->render_texture = ptex;
@@ -166,7 +167,7 @@
       if (!gsurf->render_texture) {
          switch (gsurf->client_buffer_type) {
          case EGL_NONE:
-            pbuffer_allocate_render_texture(gsurf);
+            pbuffer_allocate_pbuffer_texture(gsurf);
             break;
          case EGL_OPENVG_IMAGE:
             pbuffer_reference_openvg_image(gsurf);
diff --git a/src/gallium/state_trackers/egl/common/native.h b/src/gallium/state_trackers/egl/common/native.h
index fc50ee4..58593a4 100644
--- a/src/gallium/state_trackers/egl/common/native.h
+++ b/src/gallium/state_trackers/egl/common/native.h
@@ -293,6 +293,9 @@
 const struct native_platform *
 native_get_fbdev_platform(const struct native_event_handler *event_handler);
 
+const struct native_platform *
+native_get_android_platform(const struct native_event_handler *event_handler);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/gallium/state_trackers/egl/common/native_buffer.h b/src/gallium/state_trackers/egl/common/native_buffer.h
index b8a66d1..503ed58 100644
--- a/src/gallium/state_trackers/egl/common/native_buffer.h
+++ b/src/gallium/state_trackers/egl/common/native_buffer.h
@@ -33,9 +33,11 @@
 #include "pipe/p_state.h"
 
 struct native_display;
+struct android_native_buffer_t;
 
 enum native_buffer_type {
    NATIVE_BUFFER_DRM,
+   NATIVE_BUFFER_ANDROID,
 
    NUM_NATIVE_BUFFERS
 };
@@ -50,6 +52,8 @@
          unsigned handle; /**< the handle of the GEM object */
          unsigned stride;
       } drm;
+
+      struct android_native_buffer_t *android; /**< opaque native buffer */
    } u;
 };
 
diff --git a/src/gallium/state_trackers/egl/drm/native_drm.c b/src/gallium/state_trackers/egl/drm/native_drm.c
index 47910de..c013769 100644
--- a/src/gallium/state_trackers/egl/drm/native_drm.c
+++ b/src/gallium/state_trackers/egl/drm/native_drm.c
@@ -134,8 +134,11 @@
    if (drmdpy->device_name)
       FREE(drmdpy->device_name);
 
-   if (drmdpy->fd >= 0)
-      close(drmdpy->fd);
+   if (drmdpy->own_gbm) {
+      gbm_device_destroy(&drmdpy->gbmdrm->base.base);
+      if (drmdpy->fd >= 0)
+         close(drmdpy->fd);
+   }
 
    FREE(drmdpy);
 }
@@ -258,7 +261,7 @@
 }
 
 static struct native_display *
-drm_create_display(struct gbm_gallium_drm_device *gbmdrm,
+drm_create_display(struct gbm_gallium_drm_device *gbmdrm, int own_gbm,
                    const struct native_event_handler *event_handler)
 {
    struct drm_display *drmdpy;
@@ -267,6 +270,8 @@
    if (!drmdpy)
       return NULL;
 
+   drmdpy->gbmdrm = gbmdrm;
+   drmdpy->own_gbm = own_gbm;
    drmdpy->fd = gbmdrm->base.base.fd;
    drmdpy->device_name = drm_get_device_name(drmdpy->fd);
 
@@ -302,22 +307,30 @@
 {
    struct gbm_gallium_drm_device *gbm;
    int fd;
+   int own_gbm = 0;
 
    gbm = dpy;
 
    if (gbm == NULL) {
       fd = open("/dev/dri/card0", O_RDWR);
+      /* FIXME: Use an internal constructor to create a gbm
+       * device with gallium backend directly, without setenv */
+      setenv("GBM_BACKEND", "gbm_gallium_drm.so", 1);
       gbm = gbm_gallium_drm_device(gbm_create_device(fd));
+      own_gbm = 1;
    }
 
    if (gbm == NULL)
       return NULL;
    
    if (strcmp(gbm_device_get_backend_name(&gbm->base.base), "drm") != 0 ||
-       gbm->base.type != GBM_DRM_DRIVER_TYPE_GALLIUM)
+       gbm->base.type != GBM_DRM_DRIVER_TYPE_GALLIUM) {
+      if (own_gbm)
+         gbm_device_destroy(&gbm->base.base);
       return NULL;
+   }
 
-   return drm_create_display(gbm, drm_event_handler);
+   return drm_create_display(gbm, own_gbm, drm_event_handler);
 }
 
 static const struct native_platform drm_platform = {
diff --git a/src/gallium/state_trackers/egl/drm/native_drm.h b/src/gallium/state_trackers/egl/drm/native_drm.h
index 675a58a..18cebf4 100644
--- a/src/gallium/state_trackers/egl/drm/native_drm.h
+++ b/src/gallium/state_trackers/egl/drm/native_drm.h
@@ -41,6 +41,8 @@
 #include "common/native_wayland_drm_bufmgr_helper.h"
 #endif
 
+#include "gbm_gallium_drmint.h"
+
 struct drm_config;
 struct drm_crtc;
 struct drm_connector;
@@ -52,6 +54,8 @@
 
    const struct native_event_handler *event_handler;
 
+   struct gbm_gallium_drm_device *gbmdrm;
+   int own_gbm;
    int fd;
    char *device_name;
    struct drm_config *config;
diff --git a/src/gallium/state_trackers/vdpau/decode.c b/src/gallium/state_trackers/vdpau/decode.c
index 269c7a4..50d63ea 100644
--- a/src/gallium/state_trackers/vdpau/decode.c
+++ b/src/gallium/state_trackers/vdpau/decode.c
@@ -82,13 +82,22 @@
       goto error_decoder;
    }
 
+   vldecoder->num_buffers = pipe->screen->get_video_param
+   (
+      pipe->screen, p_profile,
+      PIPE_VIDEO_CAP_NUM_BUFFERS_DESIRED
+   );
    vldecoder->cur_buffer = 0;
 
-   for (i = 0; i < VL_NUM_DECODE_BUFFERS; ++i) {
-      vldecoder->buffer[i] = vldecoder->decoder->create_buffer(vldecoder->decoder);
-      if (!vldecoder->buffer[i]) {
+   vldecoder->buffers = CALLOC(vldecoder->num_buffers, sizeof(void*));
+   if (!vldecoder->buffers)
+         goto error_alloc_buffers;
+
+   for (i = 0; i < vldecoder->num_buffers; ++i) {
+      vldecoder->buffers[i] = vldecoder->decoder->create_buffer(vldecoder->decoder);
+      if (!vldecoder->buffers[i]) {
          ret = VDP_STATUS_ERROR;
-         goto error_buffer;
+         goto error_create_buffers;
       }
    }
 
@@ -103,11 +112,15 @@
    return VDP_STATUS_OK;
 
 error_handle:
-error_buffer:
+error_create_buffers:
 
-   for (i = 0; i < VL_NUM_DECODE_BUFFERS; ++i)
-      if (vldecoder->buffer[i])
-         vldecoder->buffer[i]->destroy(vldecoder->buffer[i]);
+   for (i = 0; i < vldecoder->num_buffers; ++i)
+      if (vldecoder->buffers[i])
+         vldecoder->decoder->destroy_buffer(vldecoder->decoder, vldecoder->buffers[i]);
+
+   FREE(vldecoder->buffers);
+
+error_alloc_buffers:
 
    vldecoder->decoder->destroy(vldecoder->decoder);
 
@@ -128,9 +141,11 @@
    if (!vldecoder)
       return VDP_STATUS_INVALID_HANDLE;
 
-   for (i = 0; i < VL_NUM_DECODE_BUFFERS; ++i)
-      if (vldecoder->buffer[i])
-         vldecoder->buffer[i]->destroy(vldecoder->buffer[i]);
+   for (i = 0; i < vldecoder->num_buffers; ++i)
+      if (vldecoder->buffers[i])
+         vldecoder->decoder->destroy_buffer(vldecoder->decoder, vldecoder->buffers[i]);
+
+   FREE(vldecoder->buffers);
 
    vldecoder->decoder->destroy(vldecoder->decoder);
 
@@ -161,38 +176,37 @@
 }
 
 static VdpStatus
-vlVdpDecoderRenderMpeg2(struct pipe_video_decoder *decoder,
-                        struct pipe_video_decode_buffer *buffer,
-                        struct pipe_video_buffer *target,
-                        VdpPictureInfoMPEG1Or2 *picture_info,
-                        uint32_t bitstream_buffer_count,
-                        VdpBitstreamBuffer const *bitstream_buffers)
+vlVdpDecoderRenderMpeg12(struct pipe_video_decoder *decoder,
+                         VdpPictureInfoMPEG1Or2 *picture_info,
+                         uint32_t bitstream_buffer_count,
+                         VdpBitstreamBuffer const *bitstream_buffers)
 {
    struct pipe_mpeg12_picture_desc picture;
+   struct pipe_mpeg12_quant_matrix quant;
    struct pipe_video_buffer *ref_frames[2];
-   uint8_t intra_quantizer_matrix[64];
-   unsigned num_ycbcr_blocks[3] = { 0, 0, 0 };
    unsigned i;
 
    VDPAU_MSG(VDPAU_TRACE, "[VDPAU] Decoding MPEG2\n");
 
+   i = 0;
+
    /* if surfaces equals VDP_STATUS_INVALID_HANDLE, they are not used */
-   if (picture_info->forward_reference ==  VDP_INVALID_HANDLE)
-      ref_frames[0] = NULL;
-   else {
-      ref_frames[0] = ((vlVdpSurface *)vlGetDataHTAB(picture_info->forward_reference))->video_buffer;
-      if (!ref_frames[0])
+   if (picture_info->forward_reference !=  VDP_INVALID_HANDLE) {
+      ref_frames[i] = ((vlVdpSurface *)vlGetDataHTAB(picture_info->forward_reference))->video_buffer;
+      if (!ref_frames[i])
          return VDP_STATUS_INVALID_HANDLE;
+      ++i;
    }
 
-   if (picture_info->backward_reference ==  VDP_INVALID_HANDLE)
-      ref_frames[1] = NULL;
-   else {
-      ref_frames[1] = ((vlVdpSurface *)vlGetDataHTAB(picture_info->backward_reference))->video_buffer;
-      if (!ref_frames[1])
+   if (picture_info->backward_reference !=  VDP_INVALID_HANDLE) {
+      ref_frames[i] = ((vlVdpSurface *)vlGetDataHTAB(picture_info->backward_reference))->video_buffer;
+      if (!ref_frames[i])
          return VDP_STATUS_INVALID_HANDLE;
+      ++i;
    }
 
+   decoder->set_reference_frames(decoder, ref_frames, i);
+
    memset(&picture, 0, sizeof(picture));
    picture.base.profile = decoder->profile;
    picture.picture_coding_type = picture_info->picture_coding_type;
@@ -202,24 +216,28 @@
    picture.alternate_scan = picture_info->alternate_scan;
    picture.intra_vlc_format = picture_info->intra_vlc_format;
    picture.concealment_motion_vectors = picture_info->concealment_motion_vectors;
+   picture.intra_dc_precision = picture_info->intra_dc_precision;
    picture.f_code[0][0] = picture_info->f_code[0][0] - 1;
    picture.f_code[0][1] = picture_info->f_code[0][1] - 1;
    picture.f_code[1][0] = picture_info->f_code[1][0] - 1;
    picture.f_code[1][1] = picture_info->f_code[1][1] - 1;
 
-   buffer->begin_frame(buffer);
+   decoder->set_picture_parameters(decoder, &picture.base);
 
-   memcpy(intra_quantizer_matrix, picture_info->intra_quantizer_matrix, sizeof(intra_quantizer_matrix));
-   intra_quantizer_matrix[0] = 1 << (7 - picture_info->intra_dc_precision);
-   buffer->set_quant_matrix(buffer, intra_quantizer_matrix, picture_info->non_intra_quantizer_matrix);
+   memset(&quant, 0, sizeof(quant));
+   quant.base.codec = PIPE_VIDEO_CODEC_MPEG12;
+   quant.intra_matrix = picture_info->intra_quantizer_matrix;
+   quant.non_intra_matrix = picture_info->non_intra_quantizer_matrix;
+
+   decoder->set_quant_matrix(decoder, &quant.base);
+
+   decoder->begin_frame(decoder);
 
    for (i = 0; i < bitstream_buffer_count; ++i)
-      buffer->decode_bitstream(buffer, bitstream_buffers[i].bitstream_bytes,
-                               bitstream_buffers[i].bitstream, &picture.base, num_ycbcr_blocks);
+      decoder->decode_bitstream(decoder, bitstream_buffers[i].bitstream_bytes,
+                                bitstream_buffers[i].bitstream);
 
-   buffer->end_frame(buffer);
-
-   decoder->flush_buffer(buffer, num_ycbcr_blocks, ref_frames, target);
+   decoder->end_frame(decoder);
 
    return VDP_STATUS_OK;
 }
@@ -254,17 +272,19 @@
       // TODO: Recreate decoder with correct chroma
       return VDP_STATUS_INVALID_CHROMA_TYPE;
 
-   // TODO: Right now only mpeg2 is supported.
+   // TODO: Right now only mpeg 1 & 2 is supported.
    switch (vldecoder->decoder->profile)   {
+   case PIPE_VIDEO_PROFILE_MPEG1:
    case PIPE_VIDEO_PROFILE_MPEG2_SIMPLE:
    case PIPE_VIDEO_PROFILE_MPEG2_MAIN:
       ++vldecoder->cur_buffer;
-      vldecoder->cur_buffer %= VL_NUM_DECODE_BUFFERS;
-      return vlVdpDecoderRenderMpeg2(vldecoder->decoder,
-                                     vldecoder->buffer[vldecoder->cur_buffer],
-                                     vlsurf->video_buffer,
-                                     (VdpPictureInfoMPEG1Or2 *)picture_info,
-                                     bitstream_buffer_count,bitstream_buffers);
+      vldecoder->cur_buffer %= vldecoder->num_buffers;
+
+      vldecoder->decoder->set_decode_buffer(vldecoder->decoder, vldecoder->buffers[vldecoder->cur_buffer]);
+      vldecoder->decoder->set_decode_target(vldecoder->decoder, vlsurf->video_buffer);
+
+      return vlVdpDecoderRenderMpeg12(vldecoder->decoder, (VdpPictureInfoMPEG1Or2 *)picture_info,
+                                      bitstream_buffer_count, bitstream_buffers);
       break;
 
    default:
diff --git a/src/gallium/state_trackers/vdpau/mixer.c b/src/gallium/state_trackers/vdpau/mixer.c
index d518700..fbd24a2 100644
--- a/src/gallium/state_trackers/vdpau/mixer.c
+++ b/src/gallium/state_trackers/vdpau/mixer.c
@@ -157,8 +157,7 @@
 
    vl_compositor_clear_layers(&vmixer->compositor);
    vl_compositor_set_buffer_layer(&vmixer->compositor, 0, surf->video_buffer, NULL, NULL);
-   vl_compositor_render(&vmixer->compositor, PIPE_MPEG12_PICTURE_TYPE_FRAME,
-                        dst->surface, NULL, NULL);
+   vl_compositor_render(&vmixer->compositor, dst->surface, NULL, NULL);
 
    return VDP_STATUS_OK;
 }
diff --git a/src/gallium/state_trackers/vdpau/presentation.c b/src/gallium/state_trackers/vdpau/presentation.c
index 1176c7a..7e324db 100644
--- a/src/gallium/state_trackers/vdpau/presentation.c
+++ b/src/gallium/state_trackers/vdpau/presentation.c
@@ -169,8 +169,7 @@
 
    vl_compositor_clear_layers(&pq->compositor);
    vl_compositor_set_rgba_layer(&pq->compositor, 0, surf->sampler_view, NULL, NULL);
-   vl_compositor_render(&pq->compositor, PIPE_MPEG12_PICTURE_TYPE_FRAME,
-                        drawable_surface, NULL, NULL);
+   vl_compositor_render(&pq->compositor, drawable_surface, NULL, NULL);
 
    pq->device->context->pipe->screen->flush_frontbuffer
    (
diff --git a/src/gallium/state_trackers/vdpau/vdpau_private.h b/src/gallium/state_trackers/vdpau/vdpau_private.h
index e5d9456..5482eff 100644
--- a/src/gallium/state_trackers/vdpau/vdpau_private.h
+++ b/src/gallium/state_trackers/vdpau/vdpau_private.h
@@ -46,7 +46,6 @@
 #define TOSTRING(x) QUOTEME(x)
 #define INFORMATION_STRING TOSTRING(INFORMATION)
 #define VL_HANDLES
-#define VL_NUM_DECODE_BUFFERS 4
 
 static inline enum pipe_video_chroma_format
 ChromaToPipe(VdpChromaType vdpau_type)
@@ -256,7 +255,8 @@
 {
    vlVdpDevice *device;
    struct pipe_video_decoder *decoder;
-   struct pipe_video_decode_buffer *buffer[VL_NUM_DECODE_BUFFERS];
+   unsigned num_buffers;
+   void **buffers;
    unsigned cur_buffer;
 } vlVdpDecoder;
 
diff --git a/src/gallium/state_trackers/xorg/SConscript b/src/gallium/state_trackers/xorg/SConscript
index 4ea4ec4..1768f70 100644
--- a/src/gallium/state_trackers/xorg/SConscript
+++ b/src/gallium/state_trackers/xorg/SConscript
@@ -32,6 +32,7 @@
     'xorg_output.c',
     'xorg_renderer.c',
     'xorg_xv.c',
+    'xorg_xvmc.c',
 ]
 
 st_xorg = env.ConvenienceLibrary(
diff --git a/src/gallium/state_trackers/xorg/xorg_composite.c b/src/gallium/state_trackers/xorg/xorg_composite.c
index f696b72..61ba6bd 100644
--- a/src/gallium/state_trackers/xorg/xorg_composite.c
+++ b/src/gallium/state_trackers/xorg/xorg_composite.c
@@ -4,6 +4,7 @@
 #include "xorg_exa_tgsi.h"
 
 #include "cso_cache/cso_context.h"
+#include "util/u_format.h"
 #include "util/u_sampler.h"
 
 
@@ -52,18 +53,17 @@
 
 
 static INLINE void
-pixel_to_float4(Pixel pixel, float *color)
+pixel_to_float4(Pixel pixel, float *color, enum pipe_format format)
 {
-   CARD32	    r, g, b, a;
+   const struct util_format_description *format_desc;
+   uint8_t packed[4];
 
-   a = (pixel >> 24) & 0xff;
-   r = (pixel >> 16) & 0xff;
-   g = (pixel >>  8) & 0xff;
-   b = (pixel >>  0) & 0xff;
-   color[0] = ((float)r) / 255.;
-   color[1] = ((float)g) / 255.;
-   color[2] = ((float)b) / 255.;
-   color[3] = ((float)a) / 255.;
+   format_desc = util_format_description(format);
+   packed[0] = pixel;
+   packed[1] = pixel >> 8;
+   packed[2] = pixel >> 16;
+   packed[3] = pixel >> 24;
+   format_desc->unpack_rgba_float(color, 0, packed, 0, 1, 1);
 }
 
 static boolean
@@ -311,7 +311,7 @@
             vs_traits |= VS_SOLID_FILL;
             debug_assert(pSrcPicture->format == PICT_a8r8g8b8);
             pixel_to_float4(pSrcPicture->pSourcePict->solidFill.color,
-                            exa->solid_color);
+                            exa->solid_color, PIPE_FORMAT_B8G8R8A8_UNORM);
             exa->has_solid_color = TRUE;
          } else {
             debug_assert("!gradients not supported");
@@ -533,7 +533,7 @@
    unsigned vs_traits, fs_traits;
    struct xorg_shader shader;
 
-   pixel_to_float4(fg, exa->solid_color);
+   pixel_to_float4(fg, exa->solid_color, pixmap->tex->format);
    exa->has_solid_color = TRUE;
 
 #if 0
diff --git a/src/gallium/state_trackers/xorg/xorg_dri2.c b/src/gallium/state_trackers/xorg/xorg_dri2.c
index 6f2c52e..3350ac7 100644
--- a/src/gallium/state_trackers/xorg/xorg_dri2.c
+++ b/src/gallium/state_trackers/xorg/xorg_dri2.c
@@ -372,13 +372,15 @@
     save_accel = ms->exa->accel;
     ms->exa->accel = TRUE;
 
-    /* In case it won't be though, make sure the GPU copy contents of the
-     * source pixmap will be used for the software fallback - presumably the
-     * client modified them before calling in here.
-     */
-    exaMoveInPixmap(src_priv->pPixmap);
-    DamageRegionAppend(src_draw, pRegion);
-    DamageRegionProcessPending(src_draw);
+    if (pSrcBuffer->attachment != DRI2BufferFrontLeft) {
+	/* In case it won't be though, make sure the GPU copy contents of the
+	 * source pixmap will be used for the software fallback - presumably the
+	 * client modified them before calling in here.
+	 */
+	exaMoveInPixmap(src_priv->pPixmap);
+	DamageRegionAppend(src_draw, pRegion);
+	DamageRegionProcessPending(src_draw);
+    }
 
    if (cust && cust->winsys_context_throttle)
        cust->winsys_context_throttle(cust, ms->ctx, THROTTLE_SWAP);
diff --git a/src/gallium/state_trackers/xorg/xorg_driver.c b/src/gallium/state_trackers/xorg/xorg_driver.c
index 063ae92..0ade319 100644
--- a/src/gallium/state_trackers/xorg/xorg_driver.c
+++ b/src/gallium/state_trackers/xorg/xorg_driver.c
@@ -817,7 +817,7 @@
 				&ms->swapThrottling) ?
 	X_CONFIG : X_DEFAULT;
 
-    ms->dirtyThrottling = cust ?  cust->dirty_throttling : TRUE;
+    ms->dirtyThrottling = cust ?  cust->dirty_throttling : FALSE;
     from_dt = xf86GetOptValBool(ms->Options, OPTION_THROTTLE_DIRTY,
 				&ms->dirtyThrottling) ?
 	X_CONFIG : X_DEFAULT;
diff --git a/src/gallium/state_trackers/xorg/xorg_tracker.h b/src/gallium/state_trackers/xorg/xorg_tracker.h
index 664e8c7..84a3a2f 100644
--- a/src/gallium/state_trackers/xorg/xorg_tracker.h
+++ b/src/gallium/state_trackers/xorg/xorg_tracker.h
@@ -222,4 +222,11 @@
 xorg_xv_init(ScreenPtr pScreen);
 
 
+/***********************************************************************
+ * xorg_xvmc.c
+ */
+void
+xorg_xvmc_init(ScreenPtr pScreen, char *name);
+
+
 #endif /* _XORG_TRACKER_H_ */
diff --git a/src/gallium/state_trackers/xorg/xorg_xv.c b/src/gallium/state_trackers/xorg/xorg_xv.c
index af4992f..67fd6df 100644
--- a/src/gallium/state_trackers/xorg/xorg_xv.c
+++ b/src/gallium/state_trackers/xorg/xorg_xv.c
@@ -750,6 +750,8 @@
 
    if (num_adaptors) {
       xf86XVScreenInit(pScreen, adaptors, num_adaptors);
+      if (textured_adapter)
+         xorg_xvmc_init(pScreen, textured_adapter->name);
    } else {
       xf86DrvMsg(pScrn->scrnIndex, X_WARNING,
                  "Disabling Xv because no adaptors could be initialized.\n");
diff --git a/src/gallium/state_trackers/xorg/xorg_xvmc.c b/src/gallium/state_trackers/xorg/xorg_xvmc.c
new file mode 100644
index 0000000..0f3f3f0
--- /dev/null
+++ b/src/gallium/state_trackers/xorg/xorg_xvmc.c
@@ -0,0 +1,119 @@
+#include "xorg_tracker.h"
+
+#include <xf86.h>
+#include <xf86xv.h>
+#include <xf86xvmc.h>
+#include <X11/extensions/Xv.h>
+#include <X11/extensions/XvMC.h>
+#include <fourcc.h>
+
+#define FOURCC_RGB 0x0000003
+#define XVIMAGE_RGB								\
+{										\
+	FOURCC_RGB,								\
+	XvRGB,									\
+	LSBFirst,								\
+	{									\
+		'R', 'G', 'B', 0x00,						\
+		0x00,0x00,0x00,0x10,0x80,0x00,0x00,0xAA,0x00,0x38,0x9B,0x71	\
+	},									\
+	32,									\
+	XvPacked,								\
+	1,									\
+	24, 0x00FF0000, 0x0000FF00, 0x000000FF,					\
+	0, 0, 0,								\
+	0, 0, 0,								\
+	0, 0, 0,								\
+	{									\
+		'B','G','R','X',						\
+		0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0		\
+	},									\
+	XvTopToBottom								\
+}
+
+static int subpicture_index_list[] = {
+   FOURCC_RGB,
+   FOURCC_IA44,
+   FOURCC_AI44
+};
+
+static XF86MCImageIDList subpicture_list =
+{
+   sizeof(subpicture_index_list)/sizeof(*subpicture_index_list),
+   subpicture_index_list
+};
+
+static XF86MCSurfaceInfoRec yv12_mpeg2_surface =
+{
+   FOURCC_I420,
+   XVMC_CHROMA_FORMAT_420,
+   0,
+   2048, 2048, 2048, 2048,
+   XVMC_IDCT | XVMC_MPEG_2,
+   XVMC_SUBPICTURE_INDEPENDENT_SCALING | XVMC_BACKEND_SUBPICTURE,
+   &subpicture_list
+};
+
+static const XF86MCSurfaceInfoRec uyvy_mpeg2_surface =
+{
+   FOURCC_UYVY,
+   XVMC_CHROMA_FORMAT_422,
+   0,
+   2048, 2048, 2048, 2048,
+   XVMC_IDCT | XVMC_MPEG_2,
+   XVMC_SUBPICTURE_INDEPENDENT_SCALING | XVMC_BACKEND_SUBPICTURE,
+   &subpicture_list
+};
+
+static XF86MCSurfaceInfoPtr surfaces[] =
+{
+   (XF86MCSurfaceInfoPtr)&yv12_mpeg2_surface,
+   (XF86MCSurfaceInfoPtr)&uyvy_mpeg2_surface
+};
+
+static const XF86ImageRec rgb_subpicture = XVIMAGE_RGB;
+static const XF86ImageRec ia44_subpicture = XVIMAGE_IA44;
+static const XF86ImageRec ai44_subpicture = XVIMAGE_AI44;
+
+static XF86ImagePtr subpictures[] =
+{
+   (XF86ImagePtr)&rgb_subpicture,
+   (XF86ImagePtr)&ia44_subpicture,
+   (XF86ImagePtr)&ai44_subpicture
+};
+
+static const XF86MCAdaptorRec adaptor_template =
+{
+   "",
+   sizeof(surfaces)/sizeof(*surfaces),
+   surfaces,
+   sizeof(subpictures)/sizeof(*subpictures),
+   subpictures,
+   (xf86XvMCCreateContextProcPtr)NULL,
+   (xf86XvMCDestroyContextProcPtr)NULL,
+   (xf86XvMCCreateSurfaceProcPtr)NULL,
+   (xf86XvMCDestroySurfaceProcPtr)NULL,
+   (xf86XvMCCreateSubpictureProcPtr)NULL,
+   (xf86XvMCDestroySubpictureProcPtr)NULL
+};
+
+void
+xorg_xvmc_init(ScreenPtr pScreen, char *name)
+{
+   ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
+   XF86MCAdaptorPtr adaptorXvMC = xf86XvMCCreateAdaptorRec();
+   if (!adaptorXvMC)
+      return;
+
+   *adaptorXvMC = adaptor_template;
+   adaptorXvMC->name = name;
+   xf86DrvMsg(pScrn->scrnIndex, X_INFO,
+              "[XvMC] Associated with %s.\n", name);
+   if (!xf86XvMCScreenInit(pScreen, 1, &adaptorXvMC))
+      xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
+                 "[XvMC] Failed to initialize extension.\n");
+   else
+      xf86DrvMsg(pScrn->scrnIndex, X_INFO,
+                 "[XvMC] Extension initialized.\n");
+   xf86XvMCDestroyAdaptorRec(adaptorXvMC);
+}
diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c
index 0c53b73..79bd9c6 100644
--- a/src/gallium/state_trackers/xorg/xvmc/surface.c
+++ b/src/gallium/state_trackers/xorg/xvmc/surface.c
@@ -42,266 +42,125 @@
 
 #include "xvmc_private.h"
 
-static const unsigned const_empty_block_mask_420[3][2][2] = {
-   { { 0x20, 0x10 },  { 0x08, 0x04 } },
-   { { 0x02, 0x02 },  { 0x02, 0x02 } },
-   { { 0x01, 0x01 },  { 0x01, 0x01 } }
-};
-
-static enum pipe_mpeg12_picture_type PictureToPipe(int xvmc_pic)
-{
-   switch (xvmc_pic) {
-      case XVMC_TOP_FIELD:
-         return PIPE_MPEG12_PICTURE_TYPE_FIELD_TOP;
-      case XVMC_BOTTOM_FIELD:
-         return PIPE_MPEG12_PICTURE_TYPE_FIELD_BOTTOM;
-      case XVMC_FRAME_PICTURE:
-         return PIPE_MPEG12_PICTURE_TYPE_FRAME;
-      default:
-         assert(0);
-   }
-
-   XVMC_MSG(XVMC_ERR, "[XvMC] Unrecognized picture type 0x%08X.\n", xvmc_pic);
-
-   return -1;
-}
-
-static inline void
-MacroBlockTypeToPipeWeights(const XvMCMacroBlock *xvmc_mb, unsigned weights[2])
-{
-   assert(xvmc_mb);
-
-   switch (xvmc_mb->macroblock_type & (XVMC_MB_TYPE_MOTION_FORWARD | XVMC_MB_TYPE_MOTION_BACKWARD)) {
-   case XVMC_MB_TYPE_MOTION_FORWARD:
-      weights[0] = PIPE_VIDEO_MV_WEIGHT_MAX;
-      weights[1] = PIPE_VIDEO_MV_WEIGHT_MIN;
-      break;
-
-   case (XVMC_MB_TYPE_MOTION_FORWARD | XVMC_MB_TYPE_MOTION_BACKWARD):
-      weights[0] = PIPE_VIDEO_MV_WEIGHT_HALF;
-      weights[1] = PIPE_VIDEO_MV_WEIGHT_HALF;
-      break;
-
-   case XVMC_MB_TYPE_MOTION_BACKWARD:
-      weights[0] = PIPE_VIDEO_MV_WEIGHT_MIN;
-      weights[1] = PIPE_VIDEO_MV_WEIGHT_MAX;
-      break;
-
-   default:
-      /* workaround for xines xxmc video out plugin */
-      if (!(xvmc_mb->macroblock_type & ~XVMC_MB_TYPE_PATTERN)) {
-         weights[0] = PIPE_VIDEO_MV_WEIGHT_MAX;
-         weights[1] = PIPE_VIDEO_MV_WEIGHT_MIN;
-      } else {
-         weights[0] = PIPE_VIDEO_MV_WEIGHT_MIN;
-         weights[1] = PIPE_VIDEO_MV_WEIGHT_MIN;
-      }
-      break;
-   }
-}
-
-static inline struct pipe_motionvector
-MotionVectorToPipe(const XvMCMacroBlock *xvmc_mb, unsigned vector,
-                   unsigned field_select_mask, unsigned weight)
-{
-   struct pipe_motionvector mv;
-
-   assert(xvmc_mb);
-
-   switch (xvmc_mb->motion_type) {
-   case XVMC_PREDICTION_FRAME:
-      mv.top.x = xvmc_mb->PMV[0][vector][0];
-      mv.top.y = xvmc_mb->PMV[0][vector][1];
-      mv.top.field_select = PIPE_VIDEO_FRAME;
-      mv.top.weight = weight;
-
-      mv.bottom.x = xvmc_mb->PMV[0][vector][0];
-      mv.bottom.y = xvmc_mb->PMV[0][vector][1];
-      mv.bottom.weight = weight;
-      mv.bottom.field_select = PIPE_VIDEO_FRAME;
-      break;
-
-   case XVMC_PREDICTION_FIELD:
-      mv.top.x = xvmc_mb->PMV[0][vector][0];
-      mv.top.y = xvmc_mb->PMV[0][vector][1];
-      mv.top.field_select = (xvmc_mb->motion_vertical_field_select & field_select_mask) ?
-         PIPE_VIDEO_BOTTOM_FIELD : PIPE_VIDEO_TOP_FIELD;
-      mv.top.weight = weight;
-
-      mv.bottom.x = xvmc_mb->PMV[1][vector][0];
-      mv.bottom.y = xvmc_mb->PMV[1][vector][1];
-      mv.bottom.field_select = (xvmc_mb->motion_vertical_field_select & (field_select_mask << 2)) ?
-         PIPE_VIDEO_BOTTOM_FIELD : PIPE_VIDEO_TOP_FIELD;
-      mv.bottom.weight = weight;
-      break;
-
-   default: // TODO: Support DUALPRIME and 16x8
-      break;
-   }
-
-   return mv;
-}
-
-static inline void
-UploadYcbcrBlocks(XvMCSurfacePrivate *surface,
-                  const XvMCMacroBlock *xvmc_mb,
-                  const XvMCBlockArray *xvmc_blocks)
-{
-   enum pipe_mpeg12_dct_intra intra;
-   enum pipe_mpeg12_dct_type coding;
-
-   unsigned tb, x, y, luma_blocks;
-   short *blocks;
-
-   assert(surface);
-   assert(xvmc_mb);
-
-   if (!xvmc_mb->coded_block_pattern)
-      return;
-
-   intra = xvmc_mb->macroblock_type & XVMC_MB_TYPE_INTRA ?
-           PIPE_MPEG12_DCT_INTRA : PIPE_MPEG12_DCT_DELTA;
-
-   coding = xvmc_mb->dct_type == XVMC_DCT_TYPE_FIELD ?
-            PIPE_MPEG12_DCT_TYPE_FIELD : PIPE_MPEG12_DCT_TYPE_FRAME;
-
-   blocks = xvmc_blocks->blocks + xvmc_mb->index * BLOCK_SIZE_SAMPLES;
-
-   for (y = 0, luma_blocks = 0; y < 2; ++y) {
-      for (x = 0; x < 2; ++x, ++tb) {
-         if (xvmc_mb->coded_block_pattern & const_empty_block_mask_420[0][y][x]) {
-
-            struct pipe_ycbcr_block *stream = surface->ycbcr[0].stream;
-            stream->x = xvmc_mb->x * 2 + x;
-            stream->y = xvmc_mb->y * 2 + y;
-            stream->intra = intra;
-            stream->coding = coding;
-
-            surface->ycbcr[0].num_blocks_added++;
-            surface->ycbcr[0].stream++;
-
-            luma_blocks++;
-         }
-      }
-   }
-
-   if (luma_blocks > 0) {
-      memcpy(surface->ycbcr[0].buffer, blocks, BLOCK_SIZE_BYTES * luma_blocks);
-      surface->ycbcr[0].buffer += BLOCK_SIZE_SAMPLES * luma_blocks;
-      blocks += BLOCK_SIZE_SAMPLES * luma_blocks;
-   }
-
-   /* TODO: Implement 422, 444 */
-   //assert(ctx->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
-
-   for (tb = 1; tb < 3; ++tb) {
-      if (xvmc_mb->coded_block_pattern & const_empty_block_mask_420[tb][0][0]) {
-
-         struct pipe_ycbcr_block *stream = surface->ycbcr[tb].stream;
-         stream->x = xvmc_mb->x;
-         stream->y = xvmc_mb->y;
-         stream->intra = intra;
-         stream->coding = PIPE_MPEG12_DCT_TYPE_FRAME;
-
-         memcpy(surface->ycbcr[tb].buffer, blocks, BLOCK_SIZE_BYTES);
-
-         surface->ycbcr[tb].num_blocks_added++;
-         surface->ycbcr[tb].stream++;
-         surface->ycbcr[tb].buffer += BLOCK_SIZE_SAMPLES;
-         blocks += BLOCK_SIZE_SAMPLES;
-      }
-   }
-
-}
-
 static void
-MacroBlocksToPipe(XvMCSurfacePrivate *surface,
+MacroBlocksToPipe(XvMCContextPrivate *context,
+                  XvMCSurfacePrivate *surface,
                   unsigned int xvmc_picture_structure,
                   const XvMCMacroBlock *xvmc_mb,
                   const XvMCBlockArray *xvmc_blocks,
+                  struct pipe_mpeg12_macroblock *mb,
                   unsigned int num_macroblocks)
 {
-   unsigned int i, j;
+   unsigned int i, j, k;
 
    assert(xvmc_mb);
    assert(xvmc_blocks);
    assert(num_macroblocks);
 
-   for (i = 0; i < num_macroblocks; ++i) {
-      unsigned mv_pos = xvmc_mb->x + surface->mv_stride * xvmc_mb->y;
-      unsigned mv_weights[2];
+   for (; num_macroblocks > 0; --num_macroblocks) {
+      mb->base.codec = PIPE_VIDEO_CODEC_MPEG12;
+      mb->x = xvmc_mb->x;
+      mb->y = xvmc_mb->y;
+      mb->macroblock_type = xvmc_mb->macroblock_type;
 
-      if (xvmc_mb->macroblock_type & (XVMC_MB_TYPE_PATTERN | XVMC_MB_TYPE_INTRA))
-         UploadYcbcrBlocks(surface, xvmc_mb, xvmc_blocks);
+      switch (xvmc_picture_structure) {
+      case XVMC_FRAME_PICTURE:
+         mb->macroblock_modes.bits.frame_motion_type = xvmc_mb->motion_type;
+         mb->macroblock_modes.bits.field_motion_type = 0;
+         break;
 
-      MacroBlockTypeToPipeWeights(xvmc_mb, mv_weights);
+      case XVMC_TOP_FIELD:
+      case XVMC_BOTTOM_FIELD:
+         mb->macroblock_modes.bits.frame_motion_type = 0;
+         mb->macroblock_modes.bits.field_motion_type = xvmc_mb->motion_type;
+         break;
 
-      for (j = 0; j < 2; ++j) {
-         if (!surface->ref[j].mv) continue;
-
-         surface->ref[j].mv[mv_pos] = MotionVectorToPipe
-         (
-            xvmc_mb, j,
-            j ? XVMC_SELECT_FIRST_BACKWARD : XVMC_SELECT_FIRST_FORWARD,
-            mv_weights[j]
-         );
+      default:
+         assert(0);
       }
 
+      mb->macroblock_modes.bits.dct_type = xvmc_mb->dct_type;
+      mb->motion_vertical_field_select = xvmc_mb->motion_vertical_field_select;
+
+      for (i = 0; i < 2; ++i)
+         for (j = 0; j < 2; ++j)
+            for (k = 0; k < 2; ++k)
+               mb->PMV[i][j][k] = xvmc_mb->PMV[i][j][k];
+
+      mb->coded_block_pattern = xvmc_mb->coded_block_pattern;
+      mb->blocks = xvmc_blocks->blocks + xvmc_mb->index * BLOCK_SIZE_SAMPLES;
+      mb->num_skipped_macroblocks = 0;
+
       ++xvmc_mb;
+      ++mb;
    }
 }
 
 static void
-unmap_and_flush_surface(XvMCSurfacePrivate *surface)
+SetDecoderStatus(XvMCSurfacePrivate *surface)
 {
+   struct pipe_video_decoder *decoder;
    struct pipe_video_buffer *ref_frames[2];
+
    XvMCContextPrivate *context_priv;
-   unsigned i, num_ycbcr_blocks[3];
+
+   unsigned i, num_refs = 0;
+
+   assert(surface);
+
+   context_priv = surface->context->privData;
+   decoder = context_priv->decoder;
+
+   decoder->set_decode_buffer(decoder, surface->decode_buffer);
+   decoder->set_decode_target(decoder, surface->video_buffer);
+
+   for (i = 0; i < 2; ++i) {
+      if (surface->ref[i]) {
+         XvMCSurfacePrivate *ref = surface->ref[i]->privData;
+
+         if (ref)
+            ref_frames[num_refs++] = ref->video_buffer;
+      }
+   }
+   decoder->set_reference_frames(decoder, ref_frames, num_refs);
+}
+
+static void
+RecursiveEndFrame(XvMCSurfacePrivate *surface)
+{
+   XvMCContextPrivate *context_priv;
+   unsigned i;
 
    assert(surface);
 
    context_priv = surface->context->privData;
 
    for ( i = 0; i < 2; ++i ) {
-      if (surface->ref[i].surface) {
-         XvMCSurfacePrivate *ref = surface->ref[i].surface->privData;
+      if (surface->ref[i]) {
+         XvMCSurface *ref = surface->ref[i];
 
          assert(ref);
 
-         unmap_and_flush_surface(ref);
-         surface->ref[i].surface = NULL;
-         ref_frames[i] = ref->video_buffer;
-      } else {
-         ref_frames[i] = NULL;
+         surface->ref[i] = NULL;
+         RecursiveEndFrame(ref->privData);
+         surface->ref[i] = ref;
       }
    }
 
-   if (surface->mapped) {
-      surface->decode_buffer->end_frame(surface->decode_buffer);
-      for (i = 0; i < 3; ++i)
-         num_ycbcr_blocks[i] = surface->ycbcr[i].num_blocks_added;
-      context_priv->decoder->flush_buffer(surface->decode_buffer,
-                                          num_ycbcr_blocks,
-                                          ref_frames,
-                                          surface->video_buffer);
-      surface->mapped = 0;
+   if (surface->frame_started) {
+      surface->frame_started = 0;
+      SetDecoderStatus(surface);
+
+      for (i = 0; i < 2; ++i)
+         surface->ref[i] = NULL;
+
+      context_priv->decoder->end_frame(context_priv->decoder);
    }
 }
 
 PUBLIC
 Status XvMCCreateSurface(Display *dpy, XvMCContext *context, XvMCSurface *surface)
 {
-   static const uint8_t dummy_quant[64] = {
-      0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
-      0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
-      0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
-      0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
-      0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
-      0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
-      0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
-      0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10
-   };
-
    XvMCContextPrivate *context_priv;
    struct pipe_context *pipe;
    XvMCSurfacePrivate *surface_priv;
@@ -323,9 +182,6 @@
       return BadAlloc;
 
    surface_priv->decode_buffer = context_priv->decoder->create_buffer(context_priv->decoder);
-   surface_priv->decode_buffer->set_quant_matrix(surface_priv->decode_buffer, dummy_quant, dummy_quant);
-
-   surface_priv->mv_stride = surface_priv->decode_buffer->get_mv_stream_stride(surface_priv->decode_buffer);
    surface_priv->video_buffer = pipe->create_video_buffer
    (
       pipe, PIPE_FORMAT_NV12, context_priv->decoder->chroma_format,
@@ -355,15 +211,15 @@
                          XvMCMacroBlockArray *macroblocks, XvMCBlockArray *blocks
 )
 {
-   struct pipe_video_decode_buffer *t_buffer;
+   struct pipe_mpeg12_macroblock mb[num_macroblocks];
+   struct pipe_video_decoder *decoder;
 
+   XvMCContextPrivate *context_priv;
    XvMCSurfacePrivate *target_surface_priv;
    XvMCSurfacePrivate *past_surface_priv;
    XvMCSurfacePrivate *future_surface_priv;
    XvMCMacroBlock *xvmc_mb;
 
-   unsigned i;
-
    XVMC_MSG(XVMC_TRACE, "[XvMC] Rendering to surface %p, with past %p and future %p\n",
             target_surface, past_surface, future_surface);
 
@@ -394,6 +250,9 @@
 
    assert(flags == 0 || flags == XVMC_SECOND_FIELD);
 
+   context_priv = context->privData;
+   decoder = context_priv->decoder;
+
    target_surface_priv = target_surface->privData;
    past_surface_priv = past_surface ? past_surface->privData : NULL;
    future_surface_priv = future_surface ? future_surface->privData : NULL;
@@ -402,50 +261,39 @@
    assert(!past_surface || past_surface_priv->context == context);
    assert(!future_surface || future_surface_priv->context == context);
 
-   t_buffer = target_surface_priv->decode_buffer;
-
-   // enshure that all reference frames are flushed
-   // not really nessasary, but speeds ups rendering
+   // call end frame on all referenced frames
    if (past_surface)
-      unmap_and_flush_surface(past_surface->privData);
+      RecursiveEndFrame(past_surface->privData);
 
    if (future_surface)
-      unmap_and_flush_surface(future_surface->privData);
+      RecursiveEndFrame(future_surface->privData);
 
    xvmc_mb = macroblocks->macro_blocks + first_macroblock;
 
    /* If the surface we're rendering hasn't changed the ref frames shouldn't change. */
-   if (target_surface_priv->mapped && (
-       target_surface_priv->ref[0].surface != past_surface ||
-       target_surface_priv->ref[1].surface != future_surface ||
+   if (target_surface_priv->frame_started && (
+       target_surface_priv->ref[0] != past_surface ||
+       target_surface_priv->ref[1] != future_surface ||
        (xvmc_mb->x == 0 && xvmc_mb->y == 0))) {
 
-      // If they change anyway we need to clear our surface
-      unmap_and_flush_surface(target_surface_priv);
+      // If they change anyway we must assume that the current frame is ended
+      RecursiveEndFrame(target_surface_priv);
    }
 
-   if (!target_surface_priv->mapped) {
-      t_buffer->begin_frame(t_buffer);
+   target_surface_priv->ref[0] = past_surface;
+   target_surface_priv->ref[1] = future_surface;
 
-      for (i = 0; i < 3; ++i) {
-         target_surface_priv->ycbcr[i].num_blocks_added = 0;
-         target_surface_priv->ycbcr[i].stream = t_buffer->get_ycbcr_stream(t_buffer, i);
-         target_surface_priv->ycbcr[i].buffer = t_buffer->get_ycbcr_buffer(t_buffer, i);
-      }
+   SetDecoderStatus(target_surface_priv);
 
-      for (i = 0; i < 2; ++i) {
-         target_surface_priv->ref[i].surface = i == 0 ? past_surface : future_surface;
-
-         if (target_surface_priv->ref[i].surface)
-            target_surface_priv->ref[i].mv = t_buffer->get_mv_stream(t_buffer, i);
-         else
-            target_surface_priv->ref[i].mv = NULL;
-      }
-
-      target_surface_priv->mapped = 1;
+   if (!target_surface_priv->frame_started) {
+      target_surface_priv->frame_started = 1;
+      decoder->begin_frame(decoder);
    }
 
-   MacroBlocksToPipe(target_surface_priv, picture_structure, xvmc_mb, blocks, num_macroblocks);
+   MacroBlocksToPipe(context_priv, target_surface_priv, picture_structure,
+                     xvmc_mb, blocks, mb, num_macroblocks);
+
+   context_priv->decoder->decode_macroblock(context_priv->decoder, &mb[0].base, num_macroblocks);
 
    XVMC_MSG(XVMC_TRACE, "[XvMC] Submitted surface %p for rendering.\n", target_surface);
 
@@ -543,7 +391,9 @@
    assert(desty + desth - 1 < drawable_surface->height);
     */
 
-   unmap_and_flush_surface(surface_priv);
+   RecursiveEndFrame(surface_priv);
+
+   context_priv->decoder->flush(context_priv->decoder);
 
    vl_compositor_clear_layers(compositor);
    vl_compositor_set_buffer_layer(compositor, 0, surface_priv->video_buffer, &src_rect, NULL);
@@ -567,7 +417,7 @@
    // Workaround for r600g, there seems to be a bug in the fence refcounting code
    pipe->screen->fence_reference(pipe->screen, &surface_priv->fence, NULL);
 
-   vl_compositor_render(compositor, PictureToPipe(flags), context_priv->drawable_surface, &dst_rect, NULL);
+   vl_compositor_render(compositor, context_priv->drawable_surface, &dst_rect, NULL);
                         
    pipe->flush(pipe, &surface_priv->fence);
 
@@ -630,6 +480,7 @@
 Status XvMCDestroySurface(Display *dpy, XvMCSurface *surface)
 {
    XvMCSurfacePrivate *surface_priv;
+   XvMCContextPrivate *context_priv;
 
    XVMC_MSG(XVMC_TRACE, "[XvMC] Destroying surface %p.\n", surface);
 
@@ -639,10 +490,13 @@
       return XvMCBadSurface;
 
    surface_priv = surface->privData;
+   context_priv = surface_priv->context->privData;
    
-   if (surface_priv->mapped)
-      surface_priv->decode_buffer->end_frame(surface_priv->decode_buffer);
-   surface_priv->decode_buffer->destroy(surface_priv->decode_buffer);
+   if (surface_priv->frame_started) {
+      SetDecoderStatus(surface_priv);
+      context_priv->decoder->end_frame(context_priv->decoder);
+   }
+   context_priv->decoder->destroy_buffer(context_priv->decoder, surface_priv->decode_buffer);
    surface_priv->video_buffer->destroy(surface_priv->video_buffer);
    FREE(surface_priv);
    surface->privData = NULL;
diff --git a/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h b/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
index 5f8d9d1..fd14ac9 100644
--- a/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
+++ b/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
@@ -45,7 +45,6 @@
 struct vl_context;
 
 struct pipe_video_decoder;
-struct pipe_video_decode_buffer;
 struct pipe_video_buffer;
 
 struct pipe_sampler_view;
@@ -70,22 +69,13 @@
 
 typedef struct
 {
-   struct pipe_video_decode_buffer *decode_buffer;
+   void *decode_buffer;
    struct pipe_video_buffer *video_buffer;
 
-   bool mapped; // are we still mapped to memory?
+   // have we allready told the decoder to start a frame
+   bool frame_started;
 
-   struct {
-      unsigned num_blocks_added;
-      struct pipe_ycbcr_block *stream;
-      short *buffer;
-   } ycbcr[3];
-
-   unsigned mv_stride;
-   struct {
-      XvMCSurface *surface;
-      struct pipe_motionvector *mv;
-   } ref[2];
+   XvMCSurface *ref[2];
 
    struct pipe_fence_handle *fence;
 
diff --git a/src/gallium/targets/dri-r300/target.c b/src/gallium/targets/dri-r300/target.c
index b48bcad..9b6d816 100644
--- a/src/gallium/targets/dri-r300/target.c
+++ b/src/gallium/targets/dri-r300/target.c
@@ -1,4 +1,3 @@
-
 #include "target-helpers/inline_debug_helper.h"
 #include "state_tracker/drm_driver.h"
 #include "radeon/drm/radeon_drm_public.h"
diff --git a/src/gallium/targets/dri-r600/Makefile b/src/gallium/targets/dri-r600/Makefile
index 0c4de20..2d74630 100644
--- a/src/gallium/targets/dri-r600/Makefile
+++ b/src/gallium/targets/dri-r600/Makefile
@@ -7,6 +7,7 @@
 	$(TOP)/src/gallium/drivers/r600/libr600.a \
 	$(TOP)/src/gallium/state_trackers/dri/drm/libdridrm.a \
 	$(TOP)/src/gallium/winsys/r600/drm/libr600winsys.a \
+	$(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \
 	$(TOP)/src/gallium/drivers/trace/libtrace.a \
 	$(TOP)/src/gallium/drivers/rbug/librbug.a \
 	$(TOP)/src/gallium/drivers/noop/libnoop.a
diff --git a/src/gallium/targets/dri-r600/SConscript b/src/gallium/targets/dri-r600/SConscript
index 1df11a8..c10d65b 100644
--- a/src/gallium/targets/dri-r600/SConscript
+++ b/src/gallium/targets/dri-r600/SConscript
@@ -6,6 +6,7 @@
 
 env.Prepend(LIBS = [
     st_dri,
+    radeonwinsys,
     r600winsys,
     r600,
     trace,
diff --git a/src/gallium/targets/dri-r600/target.c b/src/gallium/targets/dri-r600/target.c
index 8753e2b..1b8b681 100644
--- a/src/gallium/targets/dri-r600/target.c
+++ b/src/gallium/targets/dri-r600/target.c
@@ -1,14 +1,14 @@
 #include "state_tracker/drm_driver.h"
 #include "target-helpers/inline_debug_helper.h"
-#include "r600/drm/r600_drm_public.h"
+#include "radeon/drm/radeon_drm_public.h"
 #include "r600/r600_public.h"
 
 static struct pipe_screen *create_screen(int fd)
 {
-   struct radeon *radeon;
+   struct radeon_winsys *radeon;
    struct pipe_screen *screen;
 
-   radeon = r600_drm_winsys_create(fd);
+   radeon = radeon_drm_winsys_create(fd);
    if (!radeon)
       return NULL;
 
diff --git a/src/gallium/targets/egl-static/Android.mk b/src/gallium/targets/egl-static/Android.mk
new file mode 100644
index 0000000..ebc89ea
--- /dev/null
+++ b/src/gallium/targets/egl-static/Android.mk
@@ -0,0 +1,56 @@
+# Mesa 3-D graphics library
+#
+# Copyright (C) 2010-2011 Chia-I Wu <olvaffe@gmail.com>
+# Copyright (C) 2010-2011 LunarG Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+LOCAL_PATH := $(call my-dir)
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := \
+	egl.c \
+	egl_pipe.c \
+	egl_st.c
+
+LOCAL_CFLAGS := \
+	-DFEATURE_ES1=1 \
+	-DFEATURE_ES2=1 \
+	-D_EGL_MAIN=_eglBuiltInDriverGALLIUM
+
+LOCAL_C_INCLUDES := \
+	$(GALLIUM_TOP)/state_trackers/vega \
+	$(GALLIUM_TOP)/state_trackers/egl \
+	$(MESA_TOP)/src/egl/main \
+	$(MESA_TOP)/src/mesa \
+	$(DRM_TOP)/include/drm \
+	$(DRM_TOP)
+
+# swrast
+LOCAL_CFLAGS += -DGALLIUM_SOFTPIPE
+
+ifneq ($(filter r600g, $(MESA_GPU_DRIVERS)),)
+LOCAL_CFLAGS += -D_EGL_PIPE_R600=1
+endif
+
+LOCAL_MODULE := libmesa_egl_gallium
+
+include $(GALLIUM_COMMON_MK)
+include $(BUILD_STATIC_LIBRARY)
diff --git a/src/gallium/targets/egl-static/Makefile b/src/gallium/targets/egl-static/Makefile
index 69e7eec..1583ab1 100644
--- a/src/gallium/targets/egl-static/Makefile
+++ b/src/gallium/targets/egl-static/Makefile
@@ -117,11 +117,12 @@
 
 # r300
 ifneq ($(findstring radeon/drm,$(GALLIUM_WINSYS_DIRS)),)
+ifneq ($(findstring r300,$(GALLIUM_DRIVERS_DIRS)),)
 egl_CPPFLAGS += -D_EGL_PIPE_R300=1
 egl_LIBS += \
 	$(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \
 	$(TOP)/src/gallium/drivers/r300/libr300.a
-egl_SYS += -ldrm_radeon
+endif
 endif
 
 # r600
@@ -129,8 +130,8 @@
 egl_CPPFLAGS += -D_EGL_PIPE_R600=1
 egl_LIBS += \
 	$(TOP)/src/gallium/winsys/r600/drm/libr600winsys.a \
+	$(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \
 	$(TOP)/src/gallium/drivers/r600/libr600.a
-egl_SYS += -ldrm_radeon
 endif
 
 # vmwgfx
@@ -141,10 +142,18 @@
 	$(TOP)/src/gallium/drivers/svga/libsvga.a
 endif
 
-# swrast
+# softpipe
+ifneq ($(findstring softpipe,$(GALLIUM_DRIVERS_DIRS)),)
 egl_CPPFLAGS += -DGALLIUM_SOFTPIPE -DGALLIUM_RBUG -DGALLIUM_TRACE
 egl_LIBS += $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a
 egl_SYS += -lm
+endif
+
+# llvmpipe
+ifneq ($(findstring llvmpipe,$(GALLIUM_DRIVERS_DIRS)),)
+egl_CPPFLAGS += -DGALLIUM_LLVMPIPE
+egl_LIBS += $(TOP)/src/gallium/drivers/llvmpipe/libllvmpipe.a
+endif
 
 # sort to remove duplicates
 egl_CPPFLAGS := $(sort $(egl_CPPFLAGS))
@@ -158,8 +167,6 @@
 
 # LLVM
 ifeq ($(MESA_LLVM),1)
-egl_CPPFLAGS += -DGALLIUM_LLVMPIPE
-egl_LIBS += $(TOP)/src/gallium/drivers/llvmpipe/libllvmpipe.a
 egl_SYS += $(LLVM_LIBS)
 LDFLAGS += $(LLVM_LDFLAGS)
 
diff --git a/src/gallium/targets/egl-static/egl.c b/src/gallium/targets/egl-static/egl.c
index 568f549..a7aee27 100644
--- a/src/gallium/targets/egl-static/egl.c
+++ b/src/gallium/targets/egl-static/egl.c
@@ -109,6 +109,70 @@
    return (*chip_id >= 0);
 }
 
+#elif defined(PIPE_OS_ANDROID)
+
+#include <xf86drm.h>
+/* for i915 */
+#include <i915_drm.h>
+/* for radeon */
+#include <radeon_drm.h>
+/* for util_strcmp */
+#include "util/u_string.h"
+
+static boolean
+drm_fd_get_pci_id(int fd, int *vendor_id, int *chip_id)
+{
+   drmVersionPtr version;
+
+   *chip_id = -1;
+
+   version = drmGetVersion(fd);
+   if (!version) {
+      _eglLog(_EGL_WARNING, "invalid drm fd");
+      return FALSE;
+   }
+   if (!version->name) {
+      _eglLog(_EGL_WARNING, "unable to determine the driver name");
+      drmFreeVersion(version);
+      return FALSE;
+   }
+
+   if (util_strcmp(version->name, "i915") == 0) {
+      struct drm_i915_getparam gp;
+      int ret;
+
+      *vendor_id = 0x8086;
+
+      memset(&gp, 0, sizeof(gp));
+      gp.param = I915_PARAM_CHIPSET_ID;
+      gp.value = chip_id;
+      ret = drmCommandWriteRead(fd, DRM_I915_GETPARAM, &gp, sizeof(gp));
+      if (ret) {
+         _eglLog(_EGL_WARNING, "failed to get param for i915");
+	 *chip_id = -1;
+      }
+   }
+   else if (util_strcmp(version->name, "radeon") == 0) {
+      struct drm_radeon_info info;
+      int ret;
+
+      *vendor_id = 0x1002;
+
+      memset(&info, 0, sizeof(info));
+      info.request = RADEON_INFO_DEVICE_ID;
+      info.value = (unsigned long) chip_id;
+      ret = drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info));
+      if (ret) {
+         _eglLog(_EGL_WARNING, "failed to get info for radeon");
+	 *chip_id = -1;
+      }
+   }
+
+   drmFreeVersion(version);
+
+   return (*chip_id >= 0);
+}
+
 #else
 
 static boolean
@@ -157,13 +221,21 @@
 static struct pipe_screen *
 create_drm_screen(const char *name, int fd)
 {
+   struct pipe_screen *screen;
+
    if (!name) {
       name = drm_fd_get_screen_name(fd);
       if (!name)
          return NULL;
    }
 
-   return egl_pipe_create_drm_screen(name, fd);
+   screen = egl_pipe_create_drm_screen(name, fd);
+   if (screen)
+      _eglLog(_EGL_INFO, "created a pipe screen for %s", name);
+   else
+      _eglLog(_EGL_WARNING, "failed to create a pipe screen for %s", name);
+
+   return screen;
 }
 
 static struct pipe_screen *
diff --git a/src/gallium/targets/egl-static/egl_pipe.c b/src/gallium/targets/egl-static/egl_pipe.c
index 658c532..f2b50bd 100644
--- a/src/gallium/targets/egl-static/egl_pipe.c
+++ b/src/gallium/targets/egl-static/egl_pipe.c
@@ -42,7 +42,6 @@
 #include "radeon/drm/radeon_drm_public.h"
 #include "r300/r300_public.h"
 /* for r600 */
-#include "r600/drm/r600_drm_public.h"
 #include "r600/r600_public.h"
 /* for vmwgfx */
 #include "svga/drm/svga_drm_public.h"
@@ -141,10 +140,10 @@
 pipe_r600_create_screen(int fd)
 {
 #if _EGL_PIPE_R600
-   struct radeon *rw;
+   struct radeon_winsys *rw;
    struct pipe_screen *screen;
 
-   rw = r600_drm_winsys_create(fd);
+   rw = radeon_drm_winsys_create(fd);
    if (!rw)
       return NULL;
 
diff --git a/src/gallium/targets/gbm/Makefile b/src/gallium/targets/gbm/Makefile
index 5310425..033a1ac 100644
--- a/src/gallium/targets/gbm/Makefile
+++ b/src/gallium/targets/gbm/Makefile
@@ -15,7 +15,7 @@
 	       -I$(TOP)/src/gallium/auxiliary \
 	       -I$(TOP)/src/gallium/include \
 
-GBM_LIBS = $(LIBUDEV_LIBS) $(LIBDRM_LIB) \
+GBM_LIBS = $(LIBUDEV_LIBS) $(LIBDRM_LIB) -lm \
 	   $(TOP)/src/gallium/state_trackers/gbm/libgbm.a \
 	   $(TOP)/src/gallium/drivers/identity/libidentity.a \
 	   $(TOP)/src/gallium/drivers/galahad/libgalahad.a \
@@ -79,26 +79,30 @@
 r300_LIBS = \
 	$(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \
 	$(TOP)/src/gallium/drivers/r300/libr300.a
-r300_SYS = -ldrm_radeon
 
 # r600 pipe driver
 r600_LIBS = \
 	$(TOP)/src/gallium/winsys/r600/drm/libr600winsys.a \
+	$(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \
 	$(TOP)/src/gallium/drivers/r600/libr600.a
-r600_SYS = -ldrm_radeon
 
 # vmwgfx pipe driver
 vmwgfx_LIBS = \
 	$(TOP)/src/gallium/winsys/svga/drm/libsvgadrm.a \
 	$(TOP)/src/gallium/drivers/svga/libsvga.a
 
+
+
 # LLVM
 ifeq ($(MESA_LLVM),1)
-pipe_LIBS += $(TOP)/src/gallium/drivers/llvmpipe/libllvmpipe.a
 pipe_SYS += $(LLVM_LIBS)
 pipe_LDFLAGS += $(LLVM_LDFLAGS)
 endif
 
+ifneq ($(findstring llvmpipe,$(GALLIUM_DRIVERS_DIRS)),)
+pipe_LIBS += $(TOP)/src/gallium/drivers/llvmpipe/libllvmpipe.a
+endif
+
 # determine the targets/sources
 pipe_TARGETS =
 pipe_SOURCES =
@@ -119,9 +123,11 @@
 endif
 
 ifneq ($(findstring radeon/drm,$(GALLIUM_WINSYS_DIRS)),)
+ifneq ($(findstring r300,$(GALLIUM_DRIVERS_DIRS)),)
 pipe_TARGETS += $(PIPE_PREFIX)r300.so
 pipe_SOURCES += pipe_r300.c
 endif
+endif
 
 ifneq ($(findstring r600/drm,$(GALLIUM_WINSYS_DIRS)),)
 pipe_TARGETS += $(PIPE_PREFIX)r600.so
@@ -148,7 +154,7 @@
 	@$(INSTALL) -d $(dir $@)
 	$(INSTALL) $< $(dir $@)
 
-$(pipe_TARGETS): $(PIPE_PREFIX)%.so: pipe_%.o
+$(pipe_TARGETS): $(PIPE_PREFIX)%.so: pipe_%.o $(pipe_LIBS) $($*_LIBS)
 	$(MKLIB) -o $@ -noprefix -linker '$(CC)' \
 		-ldflags '-L$(TOP)/$(LIB_DIR) $(pipe_LDFLAGS) $(LDFLAGS)' \
 		$(MKLIB_OPTIONS) $< \
diff --git a/src/gallium/targets/gbm/pipe_r600.c b/src/gallium/targets/gbm/pipe_r600.c
index 486a659..9f61a51 100644
--- a/src/gallium/targets/gbm/pipe_r600.c
+++ b/src/gallium/targets/gbm/pipe_r600.c
@@ -1,16 +1,15 @@
-
 #include "state_tracker/drm_driver.h"
 #include "target-helpers/inline_debug_helper.h"
-#include "r600/drm/r600_drm_public.h"
+#include "radeon/drm/radeon_drm_public.h"
 #include "r600/r600_public.h"
 
 static struct pipe_screen *
 create_screen(int fd)
 {
-   struct radeon *rw;
+   struct radeon_winsys *rw;
    struct pipe_screen *screen;
 
-   rw = r600_drm_winsys_create(fd);
+   rw = radeon_drm_winsys_create(fd);
    if (!rw)
       return NULL;
 
diff --git a/src/gallium/targets/va-r600/Makefile b/src/gallium/targets/va-r600/Makefile
index 28797ad..d09a3aa 100644
--- a/src/gallium/targets/va-r600/Makefile
+++ b/src/gallium/targets/va-r600/Makefile
@@ -10,6 +10,7 @@
 	$(TOP)/src/gallium/drivers/r600/libr600.a \
 	$(TOP)/src/gallium/winsys/g3dvl/dri/libvldri.a \
         $(TOP)/src/gallium/winsys/r600/drm/libr600winsys.a \
+	$(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \
 	$(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \
         $(TOP)/src/gallium/drivers/trace/libtrace.a \
 	$(TOP)/src/gallium/auxiliary/libgallium.a
diff --git a/src/gallium/targets/va-r600/target.c b/src/gallium/targets/va-r600/target.c
index 8753e2b..1b8b681 100644
--- a/src/gallium/targets/va-r600/target.c
+++ b/src/gallium/targets/va-r600/target.c
@@ -1,14 +1,14 @@
 #include "state_tracker/drm_driver.h"
 #include "target-helpers/inline_debug_helper.h"
-#include "r600/drm/r600_drm_public.h"
+#include "radeon/drm/radeon_drm_public.h"
 #include "r600/r600_public.h"
 
 static struct pipe_screen *create_screen(int fd)
 {
-   struct radeon *radeon;
+   struct radeon_winsys *radeon;
    struct pipe_screen *screen;
 
-   radeon = r600_drm_winsys_create(fd);
+   radeon = radeon_drm_winsys_create(fd);
    if (!radeon)
       return NULL;
 
diff --git a/src/gallium/targets/vdpau-r600/Makefile b/src/gallium/targets/vdpau-r600/Makefile
index 0fd817b..c2d95af 100644
--- a/src/gallium/targets/vdpau-r600/Makefile
+++ b/src/gallium/targets/vdpau-r600/Makefile
@@ -7,6 +7,7 @@
         $(TOP)/src/gallium/drivers/r600/libr600.a \
 	$(TOP)/src/gallium/winsys/g3dvl/dri/libvldri.a \
         $(TOP)/src/gallium/winsys/r600/drm/libr600winsys.a \
+	$(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \
         $(TOP)/src/gallium/drivers/trace/libtrace.a \
 	$(TOP)/src/gallium/auxiliary/libgallium.a
 
diff --git a/src/gallium/targets/vdpau-r600/target.c b/src/gallium/targets/vdpau-r600/target.c
index 8753e2b..1b8b681 100644
--- a/src/gallium/targets/vdpau-r600/target.c
+++ b/src/gallium/targets/vdpau-r600/target.c
@@ -1,14 +1,14 @@
 #include "state_tracker/drm_driver.h"
 #include "target-helpers/inline_debug_helper.h"
-#include "r600/drm/r600_drm_public.h"
+#include "radeon/drm/radeon_drm_public.h"
 #include "r600/r600_public.h"
 
 static struct pipe_screen *create_screen(int fd)
 {
-   struct radeon *radeon;
+   struct radeon_winsys *radeon;
    struct pipe_screen *screen;
 
-   radeon = r600_drm_winsys_create(fd);
+   radeon = radeon_drm_winsys_create(fd);
    if (!radeon)
       return NULL;
 
diff --git a/src/gallium/targets/xorg-r600/Makefile b/src/gallium/targets/xorg-r600/Makefile
new file mode 100644
index 0000000..4577ba6
--- /dev/null
+++ b/src/gallium/targets/xorg-r600/Makefile
@@ -0,0 +1,25 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBNAME = r600g_drv.so
+
+C_SOURCES = \
+	target.c \
+	xorg.c
+
+DRIVER_DEFINES = \
+	-DHAVE_CONFIG_H -DGALLIUM_RBUG -DGALLIUM_TRACE -DGALLIUM_GALAHAD
+
+DRIVER_PIPES = \
+	$(TOP)/src/gallium/state_trackers/xorg/libxorgtracker.a \
+	$(TOP)/src/gallium/drivers/r600/libr600.a \
+	$(TOP)/src/gallium/winsys/r600/drm/libr600winsys.a \
+	$(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \
+	$(TOP)/src/gallium/drivers/galahad/libgalahad.a \
+	$(TOP)/src/gallium/drivers/trace/libtrace.a \
+	$(TOP)/src/gallium/drivers/rbug/librbug.a
+
+DRIVER_LINKS = \
+	$(shell pkg-config --libs libdrm)
+
+include ../Makefile.xorg
diff --git a/src/gallium/targets/xorg-r600/target.c b/src/gallium/targets/xorg-r600/target.c
new file mode 100644
index 0000000..6042435
--- /dev/null
+++ b/src/gallium/targets/xorg-r600/target.c
@@ -0,0 +1,26 @@
+
+#include "target-helpers/inline_debug_helper.h"
+#include "state_tracker/drm_driver.h"
+#include "radeon/drm/radeon_drm_public.h"
+#include "r600/r600_public.h"
+
+static struct pipe_screen *
+create_screen(int fd)
+{
+   struct radeon_winsys *sws;
+   struct pipe_screen *screen;
+
+   sws = radeon_drm_winsys_create(fd);
+   if (!sws)
+      return NULL;
+
+   screen = r600_screen_create(sws);
+   if (!screen)
+      return NULL;
+
+   screen = debug_screen_wrap(screen);
+
+   return screen;
+}
+
+DRM_DRIVER_DESCRIPTOR("r600", "radeon", create_screen)
diff --git a/src/gallium/targets/xorg-r600/xorg.c b/src/gallium/targets/xorg-r600/xorg.c
new file mode 100644
index 0000000..120cf6d
--- /dev/null
+++ b/src/gallium/targets/xorg-r600/xorg.c
@@ -0,0 +1,148 @@
+/*
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ *
+ * Author: Alan Hourihane <alanh@tungstengraphics.com>
+ * Author: Jakob Bornecrantz <wallbraker@gmail.com>
+ * Author: Corbin Simpson <MostAwesomedude@gmail.com>
+ *
+ */
+
+#include "../../state_trackers/xorg/xorg_winsys.h"
+
+static void r600_xorg_identify(int flags);
+static Bool r600_xorg_pci_probe(DriverPtr driver,
+				 int entity_num,
+				 struct pci_device *device,
+				 intptr_t match_data);
+
+static const struct pci_id_match r600_xorg_device_match[] = {
+    {0x1002, PCI_MATCH_ANY, PCI_MATCH_ANY, PCI_MATCH_ANY, 0, 0, 0},
+    {0, 0, 0},
+};
+
+static SymTabRec r600_xorg_chipsets[] = {
+    {PCI_MATCH_ANY, "AMD R6xx Graphics Chipset"},
+    {-1, NULL}
+};
+
+static PciChipsets r600_xorg_pci_devices[] = {
+    {PCI_MATCH_ANY, PCI_MATCH_ANY, NULL},
+    {-1, -1, NULL}
+};
+
+static XF86ModuleVersionInfo r600_xorg_version = {
+    "r600g",
+    MODULEVENDORSTRING,
+    MODINFOSTRING1,
+    MODINFOSTRING2,
+    XORG_VERSION_CURRENT,
+    0, 1, 0, /* major, minor, patch */
+    ABI_CLASS_VIDEODRV,
+    ABI_VIDEODRV_VERSION,
+    MOD_CLASS_VIDEODRV,
+    {0, 0, 0, 0}
+};
+
+/*
+ * Xorg driver exported structures
+ */
+
+_X_EXPORT DriverRec r600_driver = {
+    1,
+    "r600g",
+    r600_xorg_identify,
+    NULL,
+    xorg_tracker_available_options,
+    NULL,
+    0,
+    NULL,
+    r600_xorg_device_match,
+    r600_xorg_pci_probe
+};
+
+static MODULESETUPPROTO(r600_xorg_setup);
+
+_X_EXPORT XF86ModuleData r600gModuleData = {
+    &r600_xorg_version,
+    r600_xorg_setup,
+    NULL
+};
+
+/*
+ * Xorg driver functions
+ */
+
+static pointer
+r600_xorg_setup(pointer module, pointer opts, int *errmaj, int *errmin)
+{
+    static Bool setupDone = 0;
+
+    /* This module should be loaded only once, but check to be sure.
+     */
+    if (!setupDone) {
+	setupDone = 1;
+	xf86AddDriver(&r600_driver, module, HaveDriverFuncs);
+
+	/*
+	 * The return value must be non-NULL on success even though there
+	 * is no TearDownProc.
+	 */
+	return (pointer) 1;
+    } else {
+	if (errmaj)
+	    *errmaj = LDR_ONCEONLY;
+	return NULL;
+    }
+}
+
+static void
+r600_xorg_identify(int flags)
+{
+    xf86PrintChipsets("r600", "Driver for R6xx Gallium with KMS",
+		      r600_xorg_chipsets);
+}
+
+static Bool
+r600_xorg_pci_probe(DriverPtr driver,
+	  int entity_num, struct pci_device *device, intptr_t match_data)
+{
+    ScrnInfoPtr scrn = NULL;
+    EntityInfoPtr entity;
+
+    scrn = xf86ConfigPciEntity(scrn, 0, entity_num, r600_xorg_pci_devices,
+			       NULL, NULL, NULL, NULL, NULL);
+    if (scrn != NULL) {
+	scrn->driverVersion = 1;
+	scrn->driverName = "r600";
+	scrn->name = "R600G";
+	scrn->Probe = NULL;
+
+	entity = xf86GetEntityInfo(entity_num);
+
+	/* Use all the functions from the xorg tracker */
+	xorg_tracker_set_functions(scrn);
+    }
+    return scrn != NULL;
+}
diff --git a/src/gallium/targets/xvmc-r600/Makefile b/src/gallium/targets/xvmc-r600/Makefile
index 0bb72f1..a10a42d 100644
--- a/src/gallium/targets/xvmc-r600/Makefile
+++ b/src/gallium/targets/xvmc-r600/Makefile
@@ -7,6 +7,7 @@
         $(TOP)/src/gallium/drivers/r600/libr600.a \
 	$(TOP)/src/gallium/winsys/g3dvl/dri/libvldri.a \
         $(TOP)/src/gallium/winsys/r600/drm/libr600winsys.a \
+	$(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \
         $(TOP)/src/gallium/drivers/trace/libtrace.a \
 	$(TOP)/src/gallium/auxiliary/libgallium.a
 
diff --git a/src/gallium/targets/xvmc-r600/target.c b/src/gallium/targets/xvmc-r600/target.c
index 8753e2b..1b8b681 100644
--- a/src/gallium/targets/xvmc-r600/target.c
+++ b/src/gallium/targets/xvmc-r600/target.c
@@ -1,14 +1,14 @@
 #include "state_tracker/drm_driver.h"
 #include "target-helpers/inline_debug_helper.h"
-#include "r600/drm/r600_drm_public.h"
+#include "radeon/drm/radeon_drm_public.h"
 #include "r600/r600_public.h"
 
 static struct pipe_screen *create_screen(int fd)
 {
-   struct radeon *radeon;
+   struct radeon_winsys *radeon;
    struct pipe_screen *screen;
 
-   radeon = r600_drm_winsys_create(fd);
+   radeon = radeon_drm_winsys_create(fd);
    if (!radeon)
       return NULL;
 
diff --git a/src/gallium/winsys/r600/drm/Android.mk b/src/gallium/winsys/r600/drm/Android.mk
new file mode 100644
index 0000000..eb79caa
--- /dev/null
+++ b/src/gallium/winsys/r600/drm/Android.mk
@@ -0,0 +1,43 @@
+# Mesa 3-D graphics library
+#
+# Copyright (C) 2010-2011 Chia-I Wu <olvaffe@gmail.com>
+# Copyright (C) 2010-2011 LunarG Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+LOCAL_PATH := $(call my-dir)
+
+# get C_SOURCES
+include $(LOCAL_PATH)/Makefile.sources
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(C_SOURCES)
+
+LOCAL_CFLAGS := -std=c99
+
+LOCAL_C_INCLUDES := \
+	$(GALLIUM_TOP)/drivers/r600 \
+	$(DRM_TOP) \
+	$(DRM_TOP)/include/drm
+
+LOCAL_MODULE := libmesa_winsys_r600
+
+include $(GALLIUM_COMMON_MK)
+include $(BUILD_STATIC_LIBRARY)
diff --git a/src/gallium/winsys/r600/drm/Makefile b/src/gallium/winsys/r600/drm/Makefile
index fb7b09b..c23286c 100644
--- a/src/gallium/winsys/r600/drm/Makefile
+++ b/src/gallium/winsys/r600/drm/Makefile
@@ -4,15 +4,8 @@
 
 LIBNAME = r600winsys
 
-C_SOURCES = \
-	bof.c \
-	evergreen_hw_context.c \
-	radeon_bo.c \
-	radeon_pciid.c \
-	r600_bo.c \
-	r600_drm.c \
-	r600_hw_context.c \
-	r600_bomgr.c
+# get C_SOURCES
+include Makefile.sources
 
 LIBRARY_INCLUDES = -I$(TOP)/src/gallium/drivers/r600 \
 		   -I$(TOP)/include \
diff --git a/src/gallium/winsys/r600/drm/Makefile.sources b/src/gallium/winsys/r600/drm/Makefile.sources
new file mode 100644
index 0000000..9798017
--- /dev/null
+++ b/src/gallium/winsys/r600/drm/Makefile.sources
@@ -0,0 +1,7 @@
+C_SOURCES := \
+	evergreen_hw_context.c \
+	radeon_pciid.c \
+	r600_bo.c \
+	r600_drm.c \
+	r600_hw_context.c
+
diff --git a/src/gallium/winsys/r600/drm/SConscript b/src/gallium/winsys/r600/drm/SConscript
index f55bb26..2d0d80e 100644
--- a/src/gallium/winsys/r600/drm/SConscript
+++ b/src/gallium/winsys/r600/drm/SConscript
@@ -2,16 +2,7 @@
 
 env = env.Clone()
 
-r600_sources = [
-    'bof.c',
-    'evergreen_hw_context.c',
-    'radeon_bo.c',
-    'radeon_pciid.c',
-    'r600_bo.c',
-    'r600_drm.c',
-    'r600_hw_context.c',
-    'r600_bomgr.c',
-]
+r600_sources = env.ParseSourceList('Makefile.sources', 'C_SOURCES')
 
 env.PkgUseModules('DRM_RADEON')
 
diff --git a/src/gallium/winsys/r600/drm/bof.c b/src/gallium/winsys/r600/drm/bof.c
deleted file mode 100644
index 5c923ad..0000000
--- a/src/gallium/winsys/r600/drm/bof.c
+++ /dev/null
@@ -1,477 +0,0 @@
-/*
- * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- *      Jerome Glisse
- */
-#include <errno.h>
-#include <stdlib.h>
-#include <string.h>
-#include "bof.h"
-
-/*
- * helpers
- */
-static int bof_entry_grow(bof_t *bof)
-{
-	bof_t **array;
-
-	if (bof->array_size < bof->nentry)
-		return 0;
-	array = realloc(bof->array, (bof->nentry + 16) * sizeof(void*));
-	if (array == NULL)
-		return -ENOMEM;
-	bof->array = array;
-	bof->nentry += 16;
-	return 0;
-}
-
-/*
- * object
- */
-bof_t *bof_object(void)
-{
-	bof_t *object;
-
-	object = calloc(1, sizeof(bof_t));
-	if (object == NULL)
-		return NULL;
-	object->refcount = 1;
-	object->type = BOF_TYPE_OBJECT;
-	object->size = 12;
-	return object;
-}
-
-bof_t *bof_object_get(bof_t *object, const char *keyname)
-{
-	unsigned i;
-
-	for (i = 0; i < object->array_size; i += 2) {
-		if (!strcmp(object->array[i]->value, keyname)) {
-			return object->array[i + 1];
-		}
-	}
-	return NULL;
-}
-
-int bof_object_set(bof_t *object, const char *keyname, bof_t *value)
-{
-	bof_t *key;
-	int r;
-
-	if (object->type != BOF_TYPE_OBJECT)
-		return -EINVAL;
-	r = bof_entry_grow(object);
-	if (r)
-		return r;
-	key = bof_string(keyname);
-	if (key == NULL)
-		return -ENOMEM;
-	object->array[object->array_size++] = key;
-	object->array[object->array_size++] = value;
-	object->size += value->size;
-	object->size += key->size;
-	bof_incref(value);
-	return 0;
-}
-
-/*
- * array
- */
-bof_t *bof_array(void)
-{
-	bof_t *array = bof_object();
-
-	if (array == NULL)
-		return NULL;
-	array->type = BOF_TYPE_ARRAY;
-	array->size = 12;
-	return array;
-}
-
-int bof_array_append(bof_t *array, bof_t *value)
-{
-	int r;
-	if (array->type != BOF_TYPE_ARRAY)
-		return -EINVAL;
-	r = bof_entry_grow(array);
-	if (r)
-		return r;
-	array->array[array->array_size++] = value;
-	array->size += value->size;
-	bof_incref(value);
-	return 0;
-}
-
-bof_t *bof_array_get(bof_t *bof, unsigned i)
-{
-	if (!bof_is_array(bof) || i >= bof->array_size)
-		return NULL;
-	return bof->array[i];
-}
-
-unsigned bof_array_size(bof_t *bof)
-{
-	if (!bof_is_array(bof))
-		return 0;
-	return bof->array_size;
-}
-
-/*
- * blob
- */
-bof_t *bof_blob(unsigned size, void *value)
-{
-	bof_t *blob = bof_object();
-
-	if (blob == NULL)
-		return NULL;
-	blob->type = BOF_TYPE_BLOB;
-	blob->value = calloc(1, size);
-	if (blob->value == NULL) {
-		bof_decref(blob);
-		return NULL;
-	}
-	blob->size = size;
-	memcpy(blob->value, value, size);
-	blob->size += 12;
-	return blob;
-}
-
-unsigned bof_blob_size(bof_t *bof)
-{
-	if (!bof_is_blob(bof))
-		return 0;
-	return bof->size - 12;
-}
-
-void *bof_blob_value(bof_t *bof)
-{
-	if (!bof_is_blob(bof))
-		return NULL;
-	return bof->value;
-}
-
-/*
- * string
- */
-bof_t *bof_string(const char *value)
-{
-	bof_t *string = bof_object();
-
-	if (string == NULL)
-		return NULL;
-	string->type = BOF_TYPE_STRING;
-	string->size = strlen(value) + 1;
-	string->value = calloc(1, string->size);
-	if (string->value == NULL) {
-		bof_decref(string);
-		return NULL;
-	}
-	strcpy(string->value, value);
-	string->size += 12;
-	return string;
-}
-
-/*
- *  int32
- */
-bof_t *bof_int32(int32_t value)
-{
-	bof_t *int32 = bof_object();
-
-	if (int32 == NULL)
-		return NULL;
-	int32->type = BOF_TYPE_INT32;
-	int32->size = 4;
-	int32->value = calloc(1, int32->size);
-	if (int32->value == NULL) {
-		bof_decref(int32);
-		return NULL;
-	}
-	memcpy(int32->value, &value, 4);
-	int32->size += 12;
-	return int32;
-}
-
-int32_t bof_int32_value(bof_t *bof)
-{
-	return *((uint32_t*)bof->value);
-}
-
-/*
- *  common
- */
-static void bof_indent(int level)
-{
-	int i;
-
-	for (i = 0; i < level; i++)
-		fprintf(stderr, " ");
-}
-
-static void bof_print_bof(bof_t *bof, int level, int entry)
-{
-	bof_indent(level);
-	if (bof == NULL) {
-		fprintf(stderr, "--NULL-- for entry %d\n", entry);
-		return;
-	}
-	switch (bof->type) {
-	case BOF_TYPE_STRING:
-		fprintf(stderr, "%p string [%s %d]\n", bof, (char*)bof->value, bof->size);
-		break;
-	case BOF_TYPE_INT32:
-		fprintf(stderr, "%p int32 [%d %d]\n", bof, *(int*)bof->value, bof->size);
-		break;
-	case BOF_TYPE_BLOB:
-		fprintf(stderr, "%p blob [%d]\n", bof, bof->size);
-		break;
-	case BOF_TYPE_NULL:
-		fprintf(stderr, "%p null [%d]\n", bof, bof->size);
-		break;
-	case BOF_TYPE_OBJECT:
-		fprintf(stderr, "%p object [%d %d]\n", bof, bof->array_size / 2, bof->size);
-		break;
-	case BOF_TYPE_ARRAY:
-		fprintf(stderr, "%p array [%d %d]\n", bof, bof->array_size, bof->size);
-		break;
-	default:
-		fprintf(stderr, "%p unknown [%d]\n", bof, bof->type);
-		return;
-	}
-}
-
-static void bof_print_rec(bof_t *bof, int level, int entry)
-{
-	unsigned i;
-
-	bof_print_bof(bof, level, entry);
-	for (i = 0; i < bof->array_size; i++) {
-		bof_print_rec(bof->array[i], level + 2, i);
-	}
-}
-
-void bof_print(bof_t *bof)
-{
-	bof_print_rec(bof, 0, 0);
-}
-
-static int bof_read(bof_t *root, FILE *file, long end, int level)
-{
-	bof_t *bof = NULL;
-	int r;
-
-	if (ftell(file) >= end) {
-		return 0;
-	}
-	r = bof_entry_grow(root);
-	if (r)
-		return r;
-	bof = bof_object();
-	if (bof == NULL)
-		return -ENOMEM;
-	bof->offset = ftell(file);
-	r = fread(&bof->type, 4, 1, file);
-	if (r != 1)
-		goto out_err;
-	r = fread(&bof->size, 4, 1, file);
-	if (r != 1)
-		goto out_err;
-	r = fread(&bof->array_size, 4, 1, file);
-	if (r != 1)
-		goto out_err;
-	switch (bof->type) {
-	case BOF_TYPE_STRING:
-	case BOF_TYPE_INT32:
-	case BOF_TYPE_BLOB:
-		bof->value = calloc(1, bof->size - 12);
-		if (bof->value == NULL) {
-			goto out_err;
-		}
-		r = fread(bof->value, bof->size - 12, 1, file);
-		if (r != 1) {
-			fprintf(stderr, "error reading %d\n", bof->size - 12);
-			goto out_err;
-		}
-		break;
-	case BOF_TYPE_NULL:
-		return 0;
-	case BOF_TYPE_OBJECT:
-	case BOF_TYPE_ARRAY:
-		r = bof_read(bof, file, bof->offset + bof->size, level + 2);
-		if (r)
-			goto out_err;
-		break;
-	default:
-		fprintf(stderr, "invalid type %d\n", bof->type);
-		goto out_err;
-	}
-	root->array[root->centry++] = bof;
-	return bof_read(root, file, end, level);
-out_err:
-	bof_decref(bof);
-	return -EINVAL;
-}
-
-bof_t *bof_load_file(const char *filename)
-{
-	bof_t *root = bof_object();
-	int r;
-
-	if (root == NULL) {
-		fprintf(stderr, "%s failed to create root object\n", __func__);
-		return NULL;
-	}
-	root->file = fopen(filename, "r");
-	if (root->file == NULL)
-		goto out_err;
-	r = fseek(root->file, 0L, SEEK_SET);
-	if (r) {
-		fprintf(stderr, "%s failed to seek into file %s\n", __func__, filename);
-		goto out_err;
-	}
-	root->offset = ftell(root->file);
-	r = fread(&root->type, 4, 1, root->file);
-	if (r != 1)
-		goto out_err;
-	r = fread(&root->size, 4, 1, root->file);
-	if (r != 1)
-		goto out_err;
-	r = fread(&root->array_size, 4, 1, root->file);
-	if (r != 1)
-		goto out_err;
-	r = bof_read(root, root->file, root->offset + root->size, 2);
-	if (r)
-		goto out_err;
-	return root;
-out_err:
-	bof_decref(root);
-	return NULL;
-}
-
-void bof_incref(bof_t *bof)
-{
-	bof->refcount++;
-}
-
-void bof_decref(bof_t *bof)
-{
-	unsigned i;
-
-	if (bof == NULL)
-		return;
-	if (--bof->refcount > 0)
-		return;
-	for (i = 0; i < bof->array_size; i++) {
-		bof_decref(bof->array[i]);
-		bof->array[i] = NULL;
-	}
-	bof->array_size = 0;
-	if (bof->file) {
-		fclose(bof->file);
-		bof->file = NULL;
-	}
-	free(bof->array);
-	free(bof->value);
-	free(bof);
-}
-
-static int bof_file_write(bof_t *bof, FILE *file)
-{
-	unsigned i;
-	int r;
-
-	r = fwrite(&bof->type, 4, 1, file);
-	if (r != 1)
-		return -EINVAL;
-	r = fwrite(&bof->size, 4, 1, file);
-	if (r != 1)
-		return -EINVAL;
-	r = fwrite(&bof->array_size, 4, 1, file);
-	if (r != 1)
-		return -EINVAL;
-	switch (bof->type) {
-	case BOF_TYPE_NULL:
-		if (bof->size)
-			return -EINVAL;
-		break;
-	case BOF_TYPE_STRING:
-	case BOF_TYPE_INT32:
-	case BOF_TYPE_BLOB:
-		r = fwrite(bof->value, bof->size - 12, 1, file);
-		if (r != 1)
-			return -EINVAL;
-		break;
-	case BOF_TYPE_OBJECT:
-	case BOF_TYPE_ARRAY:
-		for (i = 0; i < bof->array_size; i++) {
-			r = bof_file_write(bof->array[i], file);
-			if (r)
-				return r;
-		}
-		break;
-	default:
-		return -EINVAL;
-	}
-	return 0;
-}
-
-int bof_dump_file(bof_t *bof, const char *filename)
-{
-	unsigned i;
-	int r = 0;
-
-	if (bof->file) {
-		fclose(bof->file);
-		bof->file = NULL;
-	}
-	bof->file = fopen(filename, "w");
-	if (bof->file == NULL) {
-		fprintf(stderr, "%s failed to open file %s\n", __func__, filename);
-		r = -EINVAL;
-		goto out_err;
-	}
-	r = fseek(bof->file, 0L, SEEK_SET);
-	if (r) {
-		fprintf(stderr, "%s failed to seek into file %s\n", __func__, filename);
-		goto out_err;
-	}
-	r = fwrite(&bof->type, 4, 1, bof->file);
-	if (r != 1)
-		goto out_err;
-	r = fwrite(&bof->size, 4, 1, bof->file);
-	if (r != 1)
-		goto out_err;
-	r = fwrite(&bof->array_size, 4, 1, bof->file);
-	if (r != 1)
-		goto out_err;
-	for (i = 0; i < bof->array_size; i++) {
-		r = bof_file_write(bof->array[i], bof->file);
-		if (r)
-			return r;
-	}
-out_err:
-	fclose(bof->file);
-	bof->file = NULL;
-	return r;
-}
diff --git a/src/gallium/winsys/r600/drm/bof.h b/src/gallium/winsys/r600/drm/bof.h
deleted file mode 100644
index 014affb..0000000
--- a/src/gallium/winsys/r600/drm/bof.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- *      Jerome Glisse
- */
-#ifndef BOF_H
-#define BOF_H
-
-#include <stdio.h>
-#include <stdint.h>
-
-#define BOF_TYPE_STRING		0
-#define BOF_TYPE_NULL		1
-#define BOF_TYPE_BLOB		2
-#define BOF_TYPE_OBJECT		3
-#define BOF_TYPE_ARRAY		4
-#define BOF_TYPE_INT32		5
-
-struct bof;
-
-typedef struct bof {
-	struct bof	**array;
-	unsigned	centry;
-	unsigned	nentry;
-	unsigned	refcount;
-	FILE		*file;
-	uint32_t	type;
-	uint32_t	size;
-	uint32_t	array_size;
-	void		*value;
-	long		offset;
-} bof_t;
-
-extern int bof_file_flush(bof_t *root);
-extern bof_t *bof_file_new(const char *filename);
-extern int bof_object_dump(bof_t *object, const char *filename);
-
-/* object */
-extern bof_t *bof_object(void);
-extern bof_t *bof_object_get(bof_t *object, const char *keyname);
-extern int bof_object_set(bof_t *object, const char *keyname, bof_t *value);
-/* array */
-extern bof_t *bof_array(void);
-extern int bof_array_append(bof_t *array, bof_t *value);
-extern bof_t *bof_array_get(bof_t *bof, unsigned i);
-extern unsigned bof_array_size(bof_t *bof);
-/* blob */
-extern bof_t *bof_blob(unsigned size, void *value);
-extern unsigned bof_blob_size(bof_t *bof);
-extern void *bof_blob_value(bof_t *bof);
-/* string */
-extern bof_t *bof_string(const char *value);
-/* int32 */
-extern bof_t *bof_int32(int32_t value);
-extern int32_t bof_int32_value(bof_t *bof);
-/* common functions */
-extern void bof_decref(bof_t *bof);
-extern void bof_incref(bof_t *bof);
-extern bof_t *bof_load_file(const char *filename);
-extern int bof_dump_file(bof_t *bof, const char *filename);
-extern void bof_print(bof_t *bof);
-
-static inline int bof_is_object(bof_t *bof){return (bof->type == BOF_TYPE_OBJECT);}
-static inline int bof_is_blob(bof_t *bof){return (bof->type == BOF_TYPE_BLOB);}
-static inline int bof_is_null(bof_t *bof){return (bof->type == BOF_TYPE_NULL);}
-static inline int bof_is_int32(bof_t *bof){return (bof->type == BOF_TYPE_INT32);}
-static inline int bof_is_array(bof_t *bof){return (bof->type == BOF_TYPE_ARRAY);}
-static inline int bof_is_string(bof_t *bof){return (bof->type == BOF_TYPE_STRING);}
-
-#endif
diff --git a/src/gallium/winsys/r600/drm/evergreen_hw_context.c b/src/gallium/winsys/r600/drm/evergreen_hw_context.c
index 60d2e28..3417eb3 100644
--- a/src/gallium/winsys/r600/drm/evergreen_hw_context.c
+++ b/src/gallium/winsys/r600/drm/evergreen_hw_context.c
@@ -23,20 +23,11 @@
  * Authors:
  *      Jerome Glisse
  */
-#include <errno.h>
-#include <stdint.h>
-#include <string.h>
-#include <stdlib.h>
-#include <assert.h>
-#include "xf86drm.h"
 #include "r600.h"
-#include "evergreend.h"
-#include "radeon_drm.h"
-#include "bof.h"
-#include "pipe/p_compiler.h"
-#include "util/u_inlines.h"
-#include "util/u_memory.h"
 #include "r600_priv.h"
+#include "evergreend.h"
+#include "util/u_memory.h"
+#include <errno.h>
 
 #define GROUP_FORCE_NEW_BLOCK	0
 
@@ -168,6 +159,7 @@
 	{R_028404_VGT_MIN_VTX_INDX, 0, 0, 0},
 	{R_028408_VGT_INDX_OFFSET, 0, 0, 0},
 	{R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, 0, 0, 0},
+	{R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, 0, 0, 0},
 	{GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
 	{R_028410_SX_ALPHA_TEST_CONTROL, 0, 0, 0},
 	{R_028414_CB_BLEND_RED, 0, 0, 0},
@@ -532,6 +524,7 @@
 	{R_028404_VGT_MIN_VTX_INDX, 0, 0, 0},
 	{R_028408_VGT_INDX_OFFSET, 0, 0, 0},
 	{R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, 0, 0, 0},
+	{R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, 0, 0, 0},
 	{GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
 	{R_028410_SX_ALPHA_TEST_CONTROL, 0, 0, 0},
 	{R_028414_CB_BLEND_RED, 0, 0, 0},
@@ -909,6 +902,7 @@
 
 	memset(ctx, 0, sizeof(struct r600_context));
 	ctx->radeon = radeon;
+
 	LIST_INITHEAD(&ctx->query_list);
 
 	/* init dirty list */
@@ -992,33 +986,23 @@
 	if (r)
 		goto out_err;
 
+	ctx->cs = radeon->ws->cs_create(radeon->ws);
+
 	/* allocate cs variables */
-	ctx->nreloc = RADEON_CTX_MAX_PM4;
-	ctx->reloc = calloc(ctx->nreloc, sizeof(struct r600_reloc));
-	if (ctx->reloc == NULL) {
-		r = -ENOMEM;
-		goto out_err;
-	}
-	ctx->bo = calloc(ctx->nreloc, sizeof(void *));
+	ctx->bo = calloc(RADEON_MAX_CMDBUF_DWORDS, sizeof(void *));
 	if (ctx->bo == NULL) {
 		r = -ENOMEM;
 		goto out_err;
 	}
-	ctx->pm4_ndwords = RADEON_CTX_MAX_PM4;
-	ctx->pm4 = calloc(ctx->pm4_ndwords, 4);
-	if (ctx->pm4 == NULL) {
-		r = -ENOMEM;
-		goto out_err;
-	}
+	ctx->pm4_ndwords = RADEON_MAX_CMDBUF_DWORDS;
+	ctx->pm4 = ctx->cs->buf;
 
 	r600_init_cs(ctx);
 	/* save 16dwords space for fence mecanism */
 	ctx->pm4_ndwords -= 16;
-
 	ctx->max_db = 8;
 
-	LIST_INITHEAD(&ctx->fenced_bo);
-
+	r600_get_backend_mask(ctx);
 	return 0;
 out_err:
 	r600_context_fini(ctx);
@@ -1154,10 +1138,6 @@
 
 	if (draw->indices) {
 		ndwords = 11;
-		/* make sure there is enough relocation space before scheduling draw */
-		if (ctx->creloc >= (ctx->nreloc - 1)) {
-			r600_context_flush(ctx);
-		}
 	}
 
 	/* queries need some special values */
@@ -1174,11 +1154,11 @@
 
 	/* update the max dword count to make sure we have enough space
 	 * reserved for flushing the destination caches */
-	ctx->pm4_ndwords = RADEON_CTX_MAX_PM4 - ctx->num_dest_buffers * 7 - 16;
+	ctx->pm4_ndwords = RADEON_MAX_CMDBUF_DWORDS - ctx->num_dest_buffers * 7 - 16;
 
 	if ((ctx->pm4_dirty_cdwords + ndwords + ctx->pm4_cdwords) > ctx->pm4_ndwords) {
 		/* need to flush */
-		r600_context_flush(ctx);
+		r600_context_flush(ctx, RADEON_FLUSH_ASYNC);
 	}
 	/* at that point everythings is flushed and ctx->pm4_cdwords = 0 */
 	if ((ctx->pm4_dirty_cdwords + ndwords) > ctx->pm4_ndwords) {
@@ -1203,13 +1183,12 @@
 	pm4[3] = draw->vgt_num_instances;
 	if (draw->indices) {
 	        pm4[4] = PKT3(PKT3_DRAW_INDEX, 3, ctx->predicate_drawing);
-		pm4[5] = draw->indices_bo_offset + r600_bo_offset(draw->indices);
+		pm4[5] = draw->indices_bo_offset;
 		pm4[6] = 0;
 		pm4[7] = draw->vgt_num_indices;
 		pm4[8] = draw->vgt_draw_initiator;
 		pm4[9] = PKT3(PKT3_NOP, 0, ctx->predicate_drawing);
-		pm4[10] = 0;
-		r600_context_bo_reloc(ctx, &pm4[10], draw->indices);
+		pm4[10] = r600_context_bo_reloc(ctx, draw->indices, RADEON_USAGE_READ);
 	} else {
 		pm4[4] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, ctx->predicate_drawing);
 		pm4[5] = draw->vgt_num_indices;
@@ -1270,4 +1249,3 @@
 
 	ctx->flags &= ~R600_CONTEXT_DST_CACHES_DIRTY;
 }
-
diff --git a/src/gallium/winsys/r600/drm/r600_bo.c b/src/gallium/winsys/r600/drm/r600_bo.c
index 0f5b063..4beedad 100644
--- a/src/gallium/winsys/r600/drm/r600_bo.c
+++ b/src/gallium/winsys/r600/drm/r600_bo.c
@@ -23,176 +23,109 @@
  * Authors:
  *      Dave Airlie
  */
-#include <pipe/p_compiler.h>
-#include <pipe/p_screen.h>
-#include <pipebuffer/pb_bufmgr.h>
-#include "state_tracker/drm_driver.h"
 #include "r600_priv.h"
 #include "r600d.h"
-#include "drm.h"
-#include "radeon_drm.h"
+#include "state_tracker/drm_driver.h"
 
 struct r600_bo *r600_bo(struct radeon *radeon,
 			unsigned size, unsigned alignment,
 			unsigned binding, unsigned usage)
 {
 	struct r600_bo *bo;
-	struct radeon_bo *rbo;
+	struct pb_buffer *pb;
 	uint32_t initial_domain, domains;
 	  
 	/* Staging resources particpate in transfers and blits only
 	 * and are used for uploads and downloads from regular
 	 * resources.  We generate them internally for some transfers.
 	 */
-	if (usage == PIPE_USAGE_STAGING)
-		domains = RADEON_GEM_DOMAIN_CPU | RADEON_GEM_DOMAIN_GTT;
-	else
-		domains = (RADEON_GEM_DOMAIN_CPU |
-				RADEON_GEM_DOMAIN_GTT |
-				RADEON_GEM_DOMAIN_VRAM);
+	if (usage == PIPE_USAGE_STAGING) {
+		domains = RADEON_DOMAIN_GTT;
+		initial_domain = RADEON_DOMAIN_GTT;
+	} else {
+		domains = RADEON_DOMAIN_GTT | RADEON_DOMAIN_VRAM;
 
-	if (binding & (PIPE_BIND_CONSTANT_BUFFER | PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER)) {
-		bo = r600_bomgr_bo_create(radeon->bomgr, size, alignment, *radeon->cfence);
-		if (bo) {
-			bo->domains = domains;
-			return bo;
+		switch(usage) {
+		case PIPE_USAGE_DYNAMIC:
+		case PIPE_USAGE_STREAM:
+		case PIPE_USAGE_STAGING:
+			initial_domain = RADEON_DOMAIN_GTT;
+			break;
+		case PIPE_USAGE_DEFAULT:
+		case PIPE_USAGE_STATIC:
+		case PIPE_USAGE_IMMUTABLE:
+		default:
+			initial_domain = RADEON_DOMAIN_VRAM;
+			break;
 		}
 	}
 
-	switch(usage) {
-	case PIPE_USAGE_DYNAMIC:
-	case PIPE_USAGE_STREAM:
-	case PIPE_USAGE_STAGING:
-		initial_domain = RADEON_GEM_DOMAIN_GTT;
-		break;
-	case PIPE_USAGE_DEFAULT:
-	case PIPE_USAGE_STATIC:
-	case PIPE_USAGE_IMMUTABLE:
-	default:
-		initial_domain = RADEON_GEM_DOMAIN_VRAM;
-		break;
-	}
-	rbo = radeon_bo(radeon, 0, size, alignment, initial_domain);
-	if (rbo == NULL) {
+	pb = radeon->ws->buffer_create(radeon->ws, size, alignment, binding, initial_domain);
+	if (!pb) {
 		return NULL;
 	}
 
 	bo = calloc(1, sizeof(struct r600_bo));
-	bo->size = size;
-	bo->alignment = alignment;
 	bo->domains = domains;
-	bo->bo = rbo;
-	if (binding & (PIPE_BIND_CONSTANT_BUFFER | PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER)) {
-		r600_bomgr_bo_init(radeon->bomgr, bo);
-	}
+	bo->buf = pb;
+	bo->cs_buf = radeon->ws->buffer_get_cs_handle(pb);
 
 	pipe_reference_init(&bo->reference, 1);
 	return bo;
 }
 
-struct r600_bo *r600_bo_handle(struct radeon *radeon,
-			       unsigned handle, unsigned *array_mode)
+struct r600_bo *r600_bo_handle(struct radeon *radeon, struct winsys_handle *whandle,
+			       unsigned *stride, unsigned *array_mode)
 {
+	struct pb_buffer *pb;
 	struct r600_bo *bo = calloc(1, sizeof(struct r600_bo));
-	struct radeon_bo *rbo;
 
-	rbo = bo->bo = radeon_bo(radeon, handle, 0, 0, 0);
-	if (rbo == NULL) {
+	pb = bo->buf = radeon->ws->buffer_from_handle(radeon->ws, whandle, stride, NULL);
+	if (!pb) {
 		free(bo);
 		return NULL;
 	}
-	bo->size = rbo->size;
-	bo->domains = (RADEON_GEM_DOMAIN_CPU |
-			RADEON_GEM_DOMAIN_GTT |
-			RADEON_GEM_DOMAIN_VRAM);
 
 	pipe_reference_init(&bo->reference, 1);
+	bo->domains = RADEON_DOMAIN_GTT | RADEON_DOMAIN_VRAM;
+	bo->cs_buf = radeon->ws->buffer_get_cs_handle(pb);
 
-	radeon_bo_get_tiling_flags(radeon, rbo, &bo->tiling_flags, &bo->kernel_pitch);
+	if (stride)
+		*stride = whandle->stride;
+
 	if (array_mode) {
-		if (bo->tiling_flags) {
-			if (bo->tiling_flags & RADEON_TILING_MACRO)
-				*array_mode = V_0280A0_ARRAY_2D_TILED_THIN1;
-			else if (bo->tiling_flags & RADEON_TILING_MICRO)
-				*array_mode = V_0280A0_ARRAY_1D_TILED_THIN1;
-		} else {
+		enum radeon_bo_layout micro, macro;
+
+		radeon->ws->buffer_get_tiling(bo->buf, &micro, &macro);
+
+		if (macro == RADEON_LAYOUT_TILED)
+			*array_mode = V_0280A0_ARRAY_2D_TILED_THIN1;
+		else if (micro == RADEON_LAYOUT_TILED)
+			*array_mode = V_0280A0_ARRAY_1D_TILED_THIN1;
+		else
 			*array_mode = 0;
-		}
 	}
 	return bo;
 }
 
-void *r600_bo_map(struct radeon *radeon, struct r600_bo *bo, unsigned usage, void *ctx)
+void *r600_bo_map(struct radeon *radeon, struct r600_bo *bo, struct radeon_winsys_cs *cs, unsigned usage)
 {
-	struct pipe_context *pctx = ctx;
-
-	if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) {
-		radeon_bo_map(radeon, bo->bo);
-		return (uint8_t *) bo->bo->data + bo->offset;
-	}
-
-	if (p_atomic_read(&bo->bo->reference.count) > 1) {
-		if (usage & PIPE_TRANSFER_DONTBLOCK) {
-			return NULL;
-		}
-		if (ctx) {
-                        pctx->flush(pctx, NULL);
-		}
-	}
-
-	if (usage & PIPE_TRANSFER_DONTBLOCK) {
-		uint32_t domain;
-
-		if (radeon_bo_busy(radeon, bo->bo, &domain))
-			return NULL;
-		if (radeon_bo_map(radeon, bo->bo)) {
-			return NULL;
-		}
-		goto out;
-	}
-
-	radeon_bo_map(radeon, bo->bo);
-	if (radeon_bo_wait(radeon, bo->bo)) {
-		radeon_bo_unmap(radeon, bo->bo);
-		return NULL;
-	}
-
-out:
-	return (uint8_t *) bo->bo->data + bo->offset;
+	return radeon->ws->buffer_map(bo->buf, cs, usage);
 }
 
 void r600_bo_unmap(struct radeon *radeon, struct r600_bo *bo)
 {
-	radeon_bo_unmap(radeon, bo->bo);
+	radeon->ws->buffer_unmap(bo->buf);
 }
 
-void r600_bo_destroy(struct radeon *radeon, struct r600_bo *bo)
+void r600_bo_destroy(struct r600_bo *bo)
 {
-	if (bo->manager_id) {
-		if (!r600_bomgr_bo_destroy(radeon->bomgr, bo)) {
-			/* destroy is delayed by buffer manager */
-			return;
-		}
-	}
-	radeon_bo_reference(radeon, &bo->bo, NULL);
+	pb_reference(&bo->buf, NULL);
 	free(bo);
 }
 
 boolean r600_bo_get_winsys_handle(struct radeon *radeon, struct r600_bo *bo,
-				unsigned stride, struct winsys_handle *whandle)
+				  unsigned stride, struct winsys_handle *whandle)
 {
-	whandle->stride = stride;
-	switch(whandle->type) {
-	case DRM_API_HANDLE_TYPE_KMS:
-		whandle->handle = bo->bo->handle;
-		break;
-	case DRM_API_HANDLE_TYPE_SHARED:
-		if (radeon_bo_get_name(radeon, bo->bo, &whandle->handle))
-			return FALSE;
-		break;
-	default:
-		return FALSE;
-	}
-
-	return TRUE;
+	return radeon->ws->buffer_get_handle(bo->buf, stride, whandle);
 }
diff --git a/src/gallium/winsys/r600/drm/r600_bomgr.c b/src/gallium/winsys/r600/drm/r600_bomgr.c
deleted file mode 100644
index 4918d5e..0000000
--- a/src/gallium/winsys/r600/drm/r600_bomgr.c
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * Copyright 2010 VMWare.
- * Copyright 2010 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- *      Jose Fonseca <jrfonseca-at-vmware-dot-com>
- *      Thomas Hellström <thomas-at-vmware-dot-com>
- *      Jerome Glisse <jglisse@redhat.com>
- */
-#include <util/u_memory.h>
-#include <util/u_double_list.h>
-#include <util/u_time.h>
-#include <pipebuffer/pb_bufmgr.h>
-#include "r600_priv.h"
-
-static void r600_bomgr_timeout_flush(struct r600_bomgr *mgr)
-{
-	struct r600_bo *bo, *tmp;
-	int64_t now;
-
-	now = os_time_get();
-	LIST_FOR_EACH_ENTRY_SAFE(bo, tmp, &mgr->delayed, list) {
-		if(!os_time_timeout(bo->start, bo->end, now))
-			break;
-
-		mgr->num_delayed--;
-		bo->manager_id = 0;
-		LIST_DEL(&bo->list);
-		r600_bo_destroy(mgr->radeon, bo);
-	}
-}
-
-static INLINE int r600_bo_is_compat(struct r600_bomgr *mgr,
-					struct r600_bo *bo,
-					unsigned size,
-					unsigned alignment,
-					unsigned cfence)
-{
-	if(bo->size < size) {
-		return 0;
-	}
-
-	/* be lenient with size */
-	if(bo->size >= 2*size) {
-		return 0;
-	}
-
-	if(!pb_check_alignment(alignment, bo->alignment)) {
-		return 0;
-	}
-
-	if (!fence_is_after(cfence, bo->fence)) {
-		return 0;
-	}
-
-	return 1;
-}
-
-struct r600_bo *r600_bomgr_bo_create(struct r600_bomgr *mgr,
-					unsigned size,
-					unsigned alignment,
-					unsigned cfence)
-{
-	struct r600_bo *bo, *tmp;
-	int64_t now;
-
-
-	pipe_mutex_lock(mgr->mutex);
-
-	now = os_time_get();
-	LIST_FOR_EACH_ENTRY_SAFE(bo, tmp, &mgr->delayed, list) {
-		if(r600_bo_is_compat(mgr, bo, size, alignment, cfence)) {
-			LIST_DEL(&bo->list);
-			--mgr->num_delayed;
-			r600_bomgr_timeout_flush(mgr);
-			pipe_mutex_unlock(mgr->mutex);
-			LIST_INITHEAD(&bo->list);
-			pipe_reference_init(&bo->reference, 1);
-			return bo;
-		}
-
-		if(os_time_timeout(bo->start, bo->end, now)) {
-			mgr->num_delayed--;
-			bo->manager_id = 0;
-			LIST_DEL(&bo->list);
-			r600_bo_destroy(mgr->radeon, bo);
-		}
-	}
-
-	pipe_mutex_unlock(mgr->mutex);
-	return NULL;
-}
-
-void r600_bomgr_bo_init(struct r600_bomgr *mgr, struct r600_bo *bo)
-{
-	LIST_INITHEAD(&bo->list);
-	bo->manager_id = 1;
-}
-
-boolean r600_bomgr_bo_destroy(struct r600_bomgr *mgr, struct r600_bo *bo)
-{
-	bo->start = os_time_get();
-	bo->end = bo->start + mgr->usecs;
-	pipe_mutex_lock(mgr->mutex);
-	LIST_ADDTAIL(&bo->list, &mgr->delayed);
-	++mgr->num_delayed;
-	pipe_mutex_unlock(mgr->mutex);
-	return FALSE;
-}
-
-void r600_bomgr_destroy(struct r600_bomgr *mgr)
-{
-	struct r600_bo *bo, *tmp;
-
-	pipe_mutex_lock(mgr->mutex);
-	LIST_FOR_EACH_ENTRY_SAFE(bo, tmp, &mgr->delayed, list) {
-		mgr->num_delayed--;
-		bo->manager_id = 0;
-		LIST_DEL(&bo->list);
-		r600_bo_destroy(mgr->radeon, bo);
-	}
-	pipe_mutex_unlock(mgr->mutex);
-
-	FREE(mgr);
-}
-
-struct r600_bomgr *r600_bomgr_create(struct radeon *radeon, unsigned usecs)
-{
-	struct r600_bomgr *mgr;
-
-	mgr = CALLOC_STRUCT(r600_bomgr);
-	if (mgr == NULL)
-		return NULL;
-
-	mgr->radeon = radeon;
-	mgr->usecs = usecs;
-	LIST_INITHEAD(&mgr->delayed);
-	mgr->num_delayed = 0;
-	pipe_mutex_init(mgr->mutex);
-
-	return mgr;
-}
diff --git a/src/gallium/winsys/r600/drm/r600_drm.c b/src/gallium/winsys/r600/drm/r600_drm.c
index ab0afea..7d5583f 100644
--- a/src/gallium/winsys/r600/drm/r600_drm.c
+++ b/src/gallium/winsys/r600/drm/r600_drm.c
@@ -25,29 +25,18 @@
  *      Corbin Simpson <MostAwesomeDude@gmail.com>
  *      Joakim Sindholt <opensource@zhasha.com>
  */
-#include <stdio.h>
-#include <errno.h>
-#include <sys/ioctl.h>
-#include "util/u_inlines.h"
-#include "util/u_debug.h"
-#include "util/u_hash_table.h"
-#include <pipebuffer/pb_bufmgr.h>
-#include "r600.h"
+
 #include "r600_priv.h"
 #include "r600_drm_public.h"
-#include "xf86drm.h"
-#include "radeon_drm.h"
+#include "util/u_memory.h"
+#include <errno.h>
 
-#ifndef RADEON_INFO_TILING_CONFIG
-#define RADEON_INFO_TILING_CONFIG 0x6
+#ifndef RADEON_INFO_NUM_TILE_PIPES
+#define RADEON_INFO_NUM_TILE_PIPES 0xb
 #endif
 
-#ifndef RADEON_INFO_CLOCK_CRYSTAL_FREQ
-#define RADEON_INFO_CLOCK_CRYSTAL_FREQ 0x9
-#endif
-
-#ifndef RADEON_INFO_NUM_BACKENDS
-#define RADEON_INFO_NUM_BACKENDS 0xa
+#ifndef RADEON_INFO_BACKEND_MAP
+#define RADEON_INFO_BACKEND_MAP 0xd
 #endif
 
 enum radeon_family r600_get_family(struct radeon *r600)
@@ -67,31 +56,27 @@
 
 unsigned r600_get_clock_crystal_freq(struct radeon *radeon)
 {
-	return radeon->clock_crystal_freq;
+	return radeon->info.r600_clock_crystal_freq;
 }
 
 unsigned r600_get_num_backends(struct radeon *radeon)
 {
-	return radeon->num_backends;
+	return radeon->info.r600_num_backends;
+}
+
+unsigned r600_get_num_tile_pipes(struct radeon *radeon)
+{
+	return radeon->info.r600_num_tile_pipes;
+}
+
+unsigned r600_get_backend_map(struct radeon *radeon)
+{
+	return radeon->info.r600_backend_map;
 }
 
 unsigned r600_get_minor_version(struct radeon *radeon)
 {
-	return radeon->minor_version;
-}
-
-
-static int radeon_get_device(struct radeon *radeon)
-{
-	struct drm_radeon_info info = {};
-	int r;
-
-	radeon->device = 0;
-	info.request = RADEON_INFO_DEVICE_ID;
-	info.value = (uintptr_t)&radeon->device;
-	r = drmCommandWriteRead(radeon->fd, DRM_RADEON_INFO, &info,
-			sizeof(struct drm_radeon_info));
-	return r;
+	return radeon->info.drm_minor;
 }
 
 static int r600_interpret_tiling(struct radeon *radeon, uint32_t tiling_config)
@@ -186,124 +171,32 @@
 
 static int radeon_drm_get_tiling(struct radeon *radeon)
 {
-	struct drm_radeon_info info = {};
-	int r;
-	uint32_t tiling_config = 0;
+	uint32_t tiling_config = radeon->info.r600_tiling_config;
 
-	info.request = RADEON_INFO_TILING_CONFIG;
-	info.value = (uintptr_t)&tiling_config;
-	r = drmCommandWriteRead(radeon->fd, DRM_RADEON_INFO, &info,
-				sizeof(struct drm_radeon_info));
-
-	if (r)
+	if (!tiling_config)
 		return 0;
 
 	if (radeon->chip_class == R600 || radeon->chip_class == R700) {
-		r = r600_interpret_tiling(radeon, tiling_config);
+		return r600_interpret_tiling(radeon, tiling_config);
 	} else {
-		r = eg_interpret_tiling(radeon, tiling_config);
+		return eg_interpret_tiling(radeon, tiling_config);
 	}
-	return r;
 }
 
-static int radeon_get_clock_crystal_freq(struct radeon *radeon)
+struct radeon *radeon_create(struct radeon_winsys *ws)
 {
-	struct drm_radeon_info info = {};
-	uint32_t clock_crystal_freq = 0;
-	int r;
-
-	info.request = RADEON_INFO_CLOCK_CRYSTAL_FREQ;
-	info.value = (uintptr_t)&clock_crystal_freq;
-	r = drmCommandWriteRead(radeon->fd, DRM_RADEON_INFO, &info,
-			sizeof(struct drm_radeon_info));
-	if (r)
-		return r;
-
-	radeon->clock_crystal_freq = clock_crystal_freq;
-	return 0;
-}
-
-
-static int radeon_get_num_backends(struct radeon *radeon)
-{
-	struct drm_radeon_info info = {};
-	uint32_t num_backends = 0;
-	int r;
-
-	info.request = RADEON_INFO_NUM_BACKENDS;
-	info.value = (uintptr_t)&num_backends;
-	r = drmCommandWriteRead(radeon->fd, DRM_RADEON_INFO, &info,
-			sizeof(struct drm_radeon_info));
-	if (r)
-		return r;
-
-	radeon->num_backends = num_backends;
-	return 0;
-}
-
-
-static int radeon_init_fence(struct radeon *radeon)
-{
-	radeon->fence = 1;
-	radeon->fence_bo = r600_bo(radeon, 4096, 0, 0, 0);
-	if (radeon->fence_bo == NULL) {
-		return -ENOMEM;
-	}
-	radeon->cfence = r600_bo_map(radeon, radeon->fence_bo, PIPE_TRANSFER_UNSYNCHRONIZED, NULL);
-	*radeon->cfence = 0;
-	return 0;
-}
-
-#define PTR_TO_UINT(x) ((unsigned)((intptr_t)(x)))
-
-static unsigned handle_hash(void *key)
-{
-    return PTR_TO_UINT(key);
-}
-
-static int handle_compare(void *key1, void *key2)
-{
-    return PTR_TO_UINT(key1) != PTR_TO_UINT(key2);
-}
-
-static struct radeon *radeon_new(int fd, unsigned device)
-{
-	struct radeon *radeon;
-	int r;
-	drmVersionPtr version;
-
-	radeon = calloc(1, sizeof(*radeon));
+	struct radeon *radeon = CALLOC_STRUCT(radeon);
 	if (radeon == NULL) {
 		return NULL;
 	}
-	radeon->fd = fd;
-	radeon->device = device;
-	radeon->refcount = 1;
 
-	version = drmGetVersion(radeon->fd);
-	if (version->version_major != 2) {
-		fprintf(stderr, "%s: DRM version is %d.%d.%d but this driver is "
-			"only compatible with 2.x.x\n", __FUNCTION__,
-			version->version_major, version->version_minor,
-			version->version_patchlevel);
-		drmFreeVersion(version);
-		exit(1);
-	}
+	radeon->ws = ws;
+	ws->query_info(ws, &radeon->info);
 
-	radeon->minor_version = version->version_minor;
-
-	drmFreeVersion(version);
-
-	r = radeon_get_device(radeon);
-	if (r) {
-		fprintf(stderr, "Failed to get device id\n");
-		return radeon_decref(radeon);
-	}
-
-	radeon->family = radeon_family_from_device(radeon->device);
+	radeon->family = radeon_family_from_device(radeon->info.pci_id);
 	if (radeon->family == CHIP_UNKNOWN) {
-		fprintf(stderr, "Unknown chipset 0x%04X\n", radeon->device);
-		return radeon_decref(radeon);
+		fprintf(stderr, "Unknown chipset 0x%04X\n", radeon->info.pci_id);
+		return radeon_destroy(radeon);
 	}
 	/* setup class */
 	switch (radeon->family) {
@@ -349,56 +242,21 @@
 		break;
 	default:
 		fprintf(stderr, "%s unknown or unsupported chipset 0x%04X\n",
-			__func__, radeon->device);
+			__func__, radeon->info.pci_id);
 		break;
 	}
 
 	if (radeon_drm_get_tiling(radeon))
 		return NULL;
 
-	/* get the GPU counter frequency, failure is non fatal */
-	radeon_get_clock_crystal_freq(radeon);
-
-	if (radeon->minor_version >= 9)
-		radeon_get_num_backends(radeon);
-
-	radeon->bomgr = r600_bomgr_create(radeon, 1000000);
-	if (radeon->bomgr == NULL) {
-		return NULL;
-	}
-	r = radeon_init_fence(radeon);
-	if (r) {
-		radeon_decref(radeon);
-		return NULL;
-	}
-
-	radeon->bo_handles = util_hash_table_create(handle_hash, handle_compare);
-	pipe_mutex_init(radeon->bo_handles_mutex);
 	return radeon;
 }
 
-struct radeon *r600_drm_winsys_create(int drmfd)
-{
-	return radeon_new(drmfd, 0);
-}
-
-struct radeon *radeon_decref(struct radeon *radeon)
+struct radeon *radeon_destroy(struct radeon *radeon)
 {
 	if (radeon == NULL)
 		return NULL;
-	if (--radeon->refcount > 0) {
-		return NULL;
-	}
 
-	util_hash_table_destroy(radeon->bo_handles);
-	pipe_mutex_destroy(radeon->bo_handles_mutex);
-	if (radeon->fence_bo) {
-		r600_bo_reference(radeon, &radeon->fence_bo, NULL);
-	}
-
-	if (radeon->bomgr)
-		r600_bomgr_destroy(radeon->bomgr);
-
-	free(radeon);
+	FREE(radeon);
 	return NULL;
 }
diff --git a/src/gallium/winsys/r600/drm/r600_drm_public.h b/src/gallium/winsys/r600/drm/r600_drm_public.h
index cfce8df..b8a37c7 100644
--- a/src/gallium/winsys/r600/drm/r600_drm_public.h
+++ b/src/gallium/winsys/r600/drm/r600_drm_public.h
@@ -26,8 +26,8 @@
 #ifndef R600_DRM_PUBLIC_H
 #define R600_DRM_PUBLIC_H
 
-struct radeon;
+struct radeon_winsys;
 
-struct radeon *r600_drm_winsys_create(int drmFD);
+struct radeon *radeon_create(struct radeon_winsys *ws);
 
 #endif
diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c
index 07bd544..6c5b4b8 100644
--- a/src/gallium/winsys/r600/drm/r600_hw_context.c
+++ b/src/gallium/winsys/r600/drm/r600_hw_context.c
@@ -23,23 +23,97 @@
  * Authors:
  *      Jerome Glisse
  */
-#include <errno.h>
-#include <stdint.h>
-#include <string.h>
-#include <stdlib.h>
-#include <assert.h>
-#include <pipe/p_compiler.h>
-#include <util/u_inlines.h>
-#include <util/u_memory.h>
-#include <pipebuffer/pb_bufmgr.h>
-#include "xf86drm.h"
-#include "radeon_drm.h"
 #include "r600_priv.h"
-#include "bof.h"
 #include "r600d.h"
+#include "util/u_memory.h"
+#include <errno.h>
 
 #define GROUP_FORCE_NEW_BLOCK	0
 
+/* Get backends mask */
+void r600_get_backend_mask(struct r600_context *ctx)
+{
+	struct r600_bo * buffer;
+	u32 * results;
+	unsigned num_backends = r600_get_num_backends(ctx->radeon);
+	unsigned i, mask = 0;
+
+	/* if backend_map query is supported by the kernel */
+	if (ctx->radeon->info.r600_backend_map_valid) {
+		unsigned num_tile_pipes = r600_get_num_tile_pipes(ctx->radeon);
+		unsigned backend_map = r600_get_backend_map(ctx->radeon);
+		unsigned item_width, item_mask;
+
+		if (ctx->radeon->chip_class >= EVERGREEN) {
+			item_width = 4;
+			item_mask = 0x7;
+		} else {
+			item_width = 2;
+			item_mask = 0x3;
+		}
+
+		while(num_tile_pipes--) {
+			i = backend_map & item_mask;
+			mask |= (1<<i);
+			backend_map >>= item_width;
+		}
+		if (mask != 0) {
+			ctx->backend_mask = mask;
+			return;
+		}
+	}
+
+	/* otherwise backup path for older kernels */
+
+	/* create buffer for event data */
+	buffer = r600_bo(ctx->radeon, ctx->max_db*16, 1, 0,
+				PIPE_USAGE_STAGING);
+	if (!buffer)
+		goto err;
+
+	/* initialize buffer with zeroes */
+	results = r600_bo_map(ctx->radeon, buffer, ctx->cs, PIPE_TRANSFER_WRITE);
+	if (results) {
+		memset(results, 0, ctx->max_db * 4 * 4);
+		r600_bo_unmap(ctx->radeon, buffer);
+
+		/* emit EVENT_WRITE for ZPASS_DONE */
+		ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
+		ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1);
+		ctx->pm4[ctx->pm4_cdwords++] = 0;
+		ctx->pm4[ctx->pm4_cdwords++] = 0;
+
+		ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
+		ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, buffer, RADEON_USAGE_WRITE);
+
+		/* execute */
+		r600_context_flush(ctx, 0);
+
+		/* analyze results */
+		results = r600_bo_map(ctx->radeon, buffer, ctx->cs, PIPE_TRANSFER_READ);
+		if (results) {
+			for(i = 0; i < ctx->max_db; i++) {
+				/* at least highest bit will be set if backend is used */
+				if (results[i*4 + 1])
+					mask |= (1<<i);
+			}
+			r600_bo_unmap(ctx->radeon, buffer);
+		}
+	}
+
+	r600_bo_reference(&buffer, NULL);
+
+	if (mask != 0) {
+		ctx->backend_mask = mask;
+		return;
+	}
+
+err:
+	/* fallback to old method - set num_backends lower bits to 1 */
+	ctx->backend_mask = (~((u32)0))>>(32-num_backends);
+	return;
+}
+
 static inline void r600_context_ps_partial_flush(struct r600_context *ctx)
 {
 	if (!(ctx->flags & R600_CONTEXT_DRAW_PENDING))
@@ -66,32 +140,6 @@
 	ctx->init_dwords = ctx->pm4_cdwords;
 }
 
-static void INLINE r600_context_update_fenced_list(struct r600_context *ctx)
-{
-	for (int i = 0; i < ctx->creloc; i++) {
-		if (!LIST_IS_EMPTY(&ctx->bo[i]->fencedlist))
-			LIST_DELINIT(&ctx->bo[i]->fencedlist);
-		LIST_ADDTAIL(&ctx->bo[i]->fencedlist, &ctx->fenced_bo);
-		ctx->bo[i]->fence = ctx->radeon->fence;
-		ctx->bo[i]->ctx = ctx;
-	}
-}
-
-static void INLINE r600_context_fence_wraparound(struct r600_context *ctx, unsigned fence)
-{
-	struct radeon_bo *bo = NULL;
-	struct radeon_bo *tmp;
-
-	LIST_FOR_EACH_ENTRY_SAFE(bo, tmp, &ctx->fenced_bo, fencedlist) {
-		if (bo->fence <= *ctx->radeon->cfence) {
-			LIST_DELINIT(&bo->fencedlist);
-			bo->fence = 0;
-		} else {
-			bo->fence = fence;
-		}
-	}
-}
-
 static void r600_init_block(struct r600_context *ctx,
 			    struct r600_block *block,
 			    const struct r600_reg *reg, int index, int nreg,
@@ -680,17 +728,6 @@
 	return r600_context_add_block(ctx, r600_loop_consts, nreg, PKT3_SET_LOOP_CONST, R600_LOOP_CONST_OFFSET);
 }
 
-static void r600_context_clear_fenced_bo(struct r600_context *ctx)
-{
-	struct radeon_bo *bo, *tmp;
-
-	LIST_FOR_EACH_ENTRY_SAFE(bo, tmp, &ctx->fenced_bo, fencedlist) {
-		LIST_DELINIT(&bo->fencedlist);
-		bo->fence = 0;
-		bo->ctx = NULL;
-	}
-}
-
 static void r600_free_resource_range(struct r600_context *ctx, struct r600_range *range, int nblocks)
 {
 	struct r600_block *block;
@@ -699,7 +736,7 @@
 		block = range->blocks[i];
 		if (block) {
 			for (int k = 1; k <= block->nbo; k++)
-				r600_bo_reference(ctx->radeon, &block->reloc[k].bo, NULL);
+				r600_bo_reference(&block->reloc[k].bo, NULL);
 			free(block);
 		}
 	}
@@ -724,7 +761,7 @@
 					range->blocks[CTX_BLOCK_ID(offset)] = NULL;
 				}
 				for (int k = 1; k <= block->nbo; k++) {
-					r600_bo_reference(ctx->radeon, &block->reloc[k].bo, NULL);
+					r600_bo_reference(&block->reloc[k].bo, NULL);
 				}
 				free(block);
 			}
@@ -736,11 +773,9 @@
 	r600_free_resource_range(ctx, &ctx->fs_resources, ctx->num_fs_resources);
 	free(ctx->range);
 	free(ctx->blocks);
-	free(ctx->reloc);
 	free(ctx->bo);
-	free(ctx->pm4);
+	ctx->radeon->ws->cs_destroy(ctx->cs);
 
-	r600_context_clear_fenced_bo(ctx);
 	memset(ctx, 0, sizeof(struct r600_context));
 }
 
@@ -797,6 +832,7 @@
 
 	memset(ctx, 0, sizeof(struct r600_context));
 	ctx->radeon = radeon;
+
 	LIST_INITHEAD(&ctx->query_list);
 
 	/* init dirty list */
@@ -872,33 +908,23 @@
 	if (r)
 		goto out_err;
 
+	ctx->cs = radeon->ws->cs_create(radeon->ws);
+
 	/* allocate cs variables */
-	ctx->nreloc = RADEON_CTX_MAX_PM4;
-	ctx->reloc = calloc(ctx->nreloc, sizeof(struct r600_reloc));
-	if (ctx->reloc == NULL) {
-		r = -ENOMEM;
-		goto out_err;
-	}
-	ctx->bo = calloc(ctx->nreloc, sizeof(void *));
+	ctx->bo = calloc(RADEON_MAX_CMDBUF_DWORDS, sizeof(void *));
 	if (ctx->bo == NULL) {
 		r = -ENOMEM;
 		goto out_err;
 	}
-	ctx->pm4_ndwords = RADEON_CTX_MAX_PM4;
-	ctx->pm4 = calloc(ctx->pm4_ndwords, 4);
-	if (ctx->pm4 == NULL) {
-		r = -ENOMEM;
-		goto out_err;
-	}
+	ctx->pm4_ndwords = RADEON_MAX_CMDBUF_DWORDS;
+	ctx->pm4 = ctx->cs->buf;
 
 	r600_init_cs(ctx);
 	/* save 16dwords space for fence mecanism */
 	ctx->pm4_ndwords -= 16;
-
-	LIST_INITHEAD(&ctx->fenced_bo);
-
 	ctx->max_db = 4;
 
+	r600_get_backend_mask(ctx);
 	return 0;
 out_err:
 	r600_context_fini(ctx);
@@ -912,7 +938,7 @@
 
 	if ((ctx->pm4_dirty_cdwords + ndwords + ctx->pm4_cdwords) > ctx->pm4_ndwords) {
 		/* need to flush */
-		r600_context_flush(ctx);
+		r600_context_flush(ctx, RADEON_FLUSH_ASYNC);
 	}
 
 	ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SURFACE_SYNC, 3, ctx->predicate_drawing);
@@ -923,11 +949,8 @@
 }
 
 void r600_context_bo_flush(struct r600_context *ctx, unsigned flush_flags,
-				unsigned flush_mask, struct r600_bo *rbo)
+				unsigned flush_mask, struct r600_bo *bo)
 {
-	struct radeon_bo *bo;
-
-	bo = rbo->bo;
 	/* if bo has already been flushed */
 	if (!(~bo->last_flush & flush_flags)) {
 		bo->last_flush &= flush_mask;
@@ -959,29 +982,15 @@
 	} else {
 		ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SURFACE_SYNC, 3, ctx->predicate_drawing);
 		ctx->pm4[ctx->pm4_cdwords++] = flush_flags;
-		ctx->pm4[ctx->pm4_cdwords++] = (bo->size + 255) >> 8;
+		ctx->pm4[ctx->pm4_cdwords++] = (bo->buf->base.size + 255) >> 8;
 		ctx->pm4[ctx->pm4_cdwords++] = 0x00000000;
 		ctx->pm4[ctx->pm4_cdwords++] = 0x0000000A;
 		ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, ctx->predicate_drawing);
-		ctx->pm4[ctx->pm4_cdwords++] = bo->reloc_id;
+		ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, bo, RADEON_USAGE_WRITE);
 	}
 	bo->last_flush = (bo->last_flush | flush_flags) & flush_mask;
 }
 
-void r600_context_get_reloc(struct r600_context *ctx, struct r600_bo *rbo)
-{
-	struct radeon_bo *bo = rbo->bo;
-	bo->reloc = &ctx->reloc[ctx->creloc];
-	bo->reloc_id = ctx->creloc * sizeof(struct r600_reloc) / 4;
-	ctx->reloc[ctx->creloc].handle = bo->handle;
-	ctx->reloc[ctx->creloc].read_domain = rbo->domains & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM);
-	ctx->reloc[ctx->creloc].write_domain = rbo->domains & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM);
-	ctx->reloc[ctx->creloc].flags = 0;
-	radeon_bo_reference(ctx->radeon, &ctx->bo[ctx->creloc], bo);
-	rbo->fence = ctx->radeon->fence;
-	ctx->creloc++;
-}
-
 void r600_context_reg(struct r600_context *ctx,
 		      unsigned offset, unsigned value,
 		      unsigned mask)
@@ -1057,8 +1066,8 @@
 		if (block->pm4_bo_index[id]) {
 			/* find relocation */
 			reloc_id = block->pm4_bo_index[id];
-			r600_bo_reference(ctx->radeon, &block->reloc[reloc_id].bo, reg->bo);
-			reg->bo->fence = ctx->radeon->fence;
+			r600_bo_reference(&block->reloc[reloc_id].bo, reg->bo);
+			block->reloc[reloc_id].bo_usage = reg->bo_usage;
 			/* always force dirty for relocs for now */
 			dirty |= R600_BLOCK_STATUS_DIRTY;
 		}
@@ -1094,10 +1103,10 @@
 	if (state == NULL) {
 		block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_RESOURCE_DIRTY);
 		if (block->reloc[1].bo)
-			block->reloc[1].bo->bo->binding &= ~BO_BOUND_TEXTURE;
+			block->reloc[1].bo->binding &= ~BO_BOUND_TEXTURE;
 
-		r600_bo_reference(ctx->radeon, &block->reloc[1].bo, NULL);
-		r600_bo_reference(ctx->radeon, &block->reloc[2].bo, NULL);
+		r600_bo_reference(&block->reloc[1].bo, NULL);
+		r600_bo_reference(&block->reloc[2].bo, NULL);
 		LIST_DELINIT(&block->list);
 		LIST_DELINIT(&block->enable_list);
 		return;
@@ -1117,39 +1126,32 @@
 
 	if (!dirty) {
 		if (is_vertex) {
-			if (block->reloc[1].bo->bo->handle != state->bo[0]->bo->handle)
+			if (block->reloc[1].bo->buf != state->bo[0]->buf)
 				dirty |= R600_BLOCK_STATUS_RESOURCE_DIRTY;
 		} else {
-			if ((block->reloc[1].bo->bo->handle != state->bo[0]->bo->handle) ||
-			    (block->reloc[2].bo->bo->handle != state->bo[1]->bo->handle))
+			if ((block->reloc[1].bo->buf != state->bo[0]->buf) ||
+			    (block->reloc[2].bo->buf != state->bo[1]->buf))
 				dirty |= R600_BLOCK_STATUS_RESOURCE_DIRTY;
 		}
 	}
-	if (!dirty) {
-		if (is_vertex)
-			state->bo[0]->fence = ctx->radeon->fence;
-		else {
-			state->bo[0]->fence = ctx->radeon->fence;
-			state->bo[1]->fence = ctx->radeon->fence;
-		}
-	} else {
+
+	if (dirty) {
 		if (is_vertex) {
 			/* VERTEX RESOURCE, we preted there is 2 bo to relocate so
 			 * we have single case btw VERTEX & TEXTURE resource
 			 */
-			r600_bo_reference(ctx->radeon, &block->reloc[1].bo, state->bo[0]);
-			r600_bo_reference(ctx->radeon, &block->reloc[2].bo, NULL);
-			state->bo[0]->fence = ctx->radeon->fence;
+			r600_bo_reference(&block->reloc[1].bo, state->bo[0]);
+			block->reloc[1].bo_usage = state->bo_usage[0];
+			r600_bo_reference(&block->reloc[2].bo, NULL);
 		} else {
 			/* TEXTURE RESOURCE */
-			r600_bo_reference(ctx->radeon, &block->reloc[1].bo, state->bo[0]);
-			r600_bo_reference(ctx->radeon, &block->reloc[2].bo, state->bo[1]);
-			state->bo[0]->fence = ctx->radeon->fence;
-			state->bo[1]->fence = ctx->radeon->fence;
-			state->bo[0]->bo->binding |= BO_BOUND_TEXTURE;
+			r600_bo_reference(&block->reloc[1].bo, state->bo[0]);
+			block->reloc[1].bo_usage = state->bo_usage[0];
+			r600_bo_reference(&block->reloc[2].bo, state->bo[1]);
+			block->reloc[2].bo_usage = state->bo_usage[1];
+			state->bo[0]->binding |= BO_BOUND_TEXTURE;
 		}
-	}
-	if (dirty) {
+
 		if (is_vertex)
 			block->status |= R600_BLOCK_STATUS_RESOURCE_VERTEX;
 		else
@@ -1281,7 +1283,6 @@
 
 void r600_context_block_emit_dirty(struct r600_context *ctx, struct r600_block *block)
 {
-	int id;
 	int optional = block->nbo == 0 && !(block->flags & REG_FLAG_DIRTY_ALWAYS);
 	int cp_dwords = block->pm4_ndwords, start_dword = 0;
 	int new_dwords = 0;
@@ -1297,14 +1298,13 @@
 		for (int j = 0; j < block->nreg; j++) {
 			if (block->pm4_bo_index[j]) {
 				/* find relocation */
-				id = block->pm4_bo_index[j];
-				r600_context_bo_reloc(ctx,
-						      &block->pm4[block->reloc[id].bo_pm4_index],
-						      block->reloc[id].bo);
+				struct r600_block_reloc *reloc = &block->reloc[block->pm4_bo_index[j]];
+				block->pm4[reloc->bo_pm4_index] =
+					r600_context_bo_reloc(ctx, reloc->bo, reloc->bo_usage);
 				r600_context_bo_flush(ctx,
-						      block->reloc[id].flush_flags,
-						      block->reloc[id].flush_mask,
-						      block->reloc[id].bo);
+						      reloc->flush_flags,
+						      reloc->flush_mask,
+						      reloc->bo);
 				nbo--;
 				if (nbo == 0)
 					break;
@@ -1338,7 +1338,6 @@
 
 void r600_context_block_resource_emit_dirty(struct r600_context *ctx, struct r600_block *block)
 {
-	int id;
 	int cp_dwords = block->pm4_ndwords;
 	int nbo = block->nbo;
 
@@ -1352,14 +1351,13 @@
 	for (int j = 0; j < nbo; j++) {
 		if (block->pm4_bo_index[j]) {
 			/* find relocation */
-			id = block->pm4_bo_index[j];
-			r600_context_bo_reloc(ctx,
-					      &block->pm4[block->reloc[id].bo_pm4_index],
-					      block->reloc[id].bo);
+			struct r600_block_reloc *reloc = &block->reloc[block->pm4_bo_index[j]];
+			block->pm4[reloc->bo_pm4_index] =
+				r600_context_bo_reloc(ctx, reloc->bo, reloc->bo_usage);
 			r600_context_bo_flush(ctx,
-					      block->reloc[id].flush_flags,
-					      block->reloc[id].flush_mask,
-					      block->reloc[id].bo);
+					      reloc->flush_flags,
+					      reloc->flush_mask,
+					      reloc->bo);
 		}
 	}
 	ctx->flags &= ~R600_CONTEXT_CHECK_EVENT_FLUSH;
@@ -1418,10 +1416,6 @@
 
 	if (draw->indices) {
 		ndwords = 11;
-		/* make sure there is enough relocation space before scheduling draw */
-		if (ctx->creloc >= (ctx->nreloc - 1)) {
-			r600_context_flush(ctx);
-		}
 	}
 
 	/* queries need some special values */
@@ -1440,11 +1434,11 @@
 
 	/* update the max dword count to make sure we have enough space
 	 * reserved for flushing the destination caches */
-	ctx->pm4_ndwords = RADEON_CTX_MAX_PM4 - ctx->num_dest_buffers * 7 - 16;
+	ctx->pm4_ndwords = RADEON_MAX_CMDBUF_DWORDS - ctx->num_dest_buffers * 7 - 16;
 
 	if ((ctx->pm4_dirty_cdwords + ndwords + ctx->pm4_cdwords) > ctx->pm4_ndwords) {
 		/* need to flush */
-		r600_context_flush(ctx);
+		r600_context_flush(ctx, RADEON_FLUSH_ASYNC);
 	}
 	/* at that point everythings is flushed and ctx->pm4_cdwords = 0 */
 	if ((ctx->pm4_dirty_cdwords + ndwords) > ctx->pm4_ndwords) {
@@ -1469,13 +1463,12 @@
 	pm4[3] = draw->vgt_num_instances;
 	if (draw->indices) {
 		pm4[4] = PKT3(PKT3_DRAW_INDEX, 3, ctx->predicate_drawing);
-		pm4[5] = draw->indices_bo_offset + r600_bo_offset(draw->indices);
+		pm4[5] = draw->indices_bo_offset;
 		pm4[6] = 0;
 		pm4[7] = draw->vgt_num_indices;
 		pm4[8] = draw->vgt_draw_initiator;
 		pm4[9] = PKT3(PKT3_NOP, 0, ctx->predicate_drawing);
-		pm4[10] = 0;
-		r600_context_bo_reloc(ctx, &pm4[10], draw->indices);
+		pm4[10] = r600_context_bo_reloc(ctx, draw->indices, RADEON_USAGE_READ);
 	} else {
 		pm4[4] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, ctx->predicate_drawing);
 		pm4[5] = draw->vgt_num_indices;
@@ -1489,13 +1482,8 @@
 	ctx->pm4_dirty_cdwords = 0;
 }
 
-void r600_context_flush(struct r600_context *ctx)
+void r600_context_flush(struct r600_context *ctx, unsigned flags)
 {
-	struct drm_radeon_cs drmib = {};
-	struct drm_radeon_cs_chunk chunks[2];
-	uint64_t chunk_array[2];
-	unsigned fence;
-	int r;
 	struct r600_block *enable_block = NULL;
 
 	if (ctx->pm4_cdwords == ctx->init_dwords)
@@ -1512,54 +1500,19 @@
 	/* partial flush is needed to avoid lockups on some chips with user fences */
 	ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
 	ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4);
-	/* emit fence */
-	ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0);
-	ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5);
-	ctx->pm4[ctx->pm4_cdwords++] = 0;
-	ctx->pm4[ctx->pm4_cdwords++] = (1 << 29) | (0 << 24);
-	ctx->pm4[ctx->pm4_cdwords++] = ctx->radeon->fence;
-	ctx->pm4[ctx->pm4_cdwords++] = 0;
-	ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
-	ctx->pm4[ctx->pm4_cdwords++] = 0;
-	r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], ctx->radeon->fence_bo);
 
-#if 1
-	/* emit cs */
-	drmib.num_chunks = 2;
-	drmib.chunks = (uint64_t)(uintptr_t)chunk_array;
-	chunks[0].chunk_id = RADEON_CHUNK_ID_IB;
-	chunks[0].length_dw = ctx->pm4_cdwords;
-	chunks[0].chunk_data = (uint64_t)(uintptr_t)ctx->pm4;
-	chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS;
-	chunks[1].length_dw = ctx->creloc * sizeof(struct r600_reloc) / 4;
-	chunks[1].chunk_data = (uint64_t)(uintptr_t)ctx->reloc;
-	chunk_array[0] = (uint64_t)(uintptr_t)&chunks[0];
-	chunk_array[1] = (uint64_t)(uintptr_t)&chunks[1];
-	r = drmCommandWriteRead(ctx->radeon->fd, DRM_RADEON_CS, &drmib,
-				sizeof(struct drm_radeon_cs));
-	if (r) {
-		fprintf(stderr, "radeon: The kernel rejected CS, "
-			"see dmesg for more information.\n");
-	}
-#else
-	*ctx->radeon->cfence = ctx->radeon->fence;
-#endif
+	/* Flush the CS. */
+	ctx->cs->cdw = ctx->pm4_cdwords;
+	ctx->radeon->ws->cs_flush(ctx->cs, flags);
 
-	r600_context_update_fenced_list(ctx);
-
-	fence = ctx->radeon->fence + 1;
-	if (fence < ctx->radeon->fence) {
-		/* wrap around */
-		fence = 1;
-		r600_context_fence_wraparound(ctx, fence);
-	}
-	ctx->radeon->fence = fence;
+	/* We need to get the pointer to the other CS,
+	 * the command streams are double-buffered. */
+	ctx->pm4 = ctx->cs->buf;
 
 	/* restart */
 	for (int i = 0; i < ctx->creloc; i++) {
-		ctx->bo[i]->reloc = NULL;
 		ctx->bo[i]->last_flush = 0;
-		radeon_bo_reference(ctx->radeon, &ctx->bo[i], NULL);
+		r600_bo_reference(&ctx->bo[i], NULL);
 	}
 	ctx->creloc = 0;
 	ctx->pm4_dirty_cdwords = 0;
@@ -1596,10 +1549,9 @@
 {
 	unsigned ndwords = 10;
 
-	if (((ctx->pm4_dirty_cdwords + ndwords + ctx->pm4_cdwords) > ctx->pm4_ndwords) ||
-	    (ctx->creloc >= (ctx->nreloc - 1))) {
+	if ((ctx->pm4_dirty_cdwords + ndwords + ctx->pm4_cdwords) > ctx->pm4_ndwords) {
 		/* need to flush */
-		r600_context_flush(ctx);
+		r600_context_flush(ctx, RADEON_FLUSH_ASYNC);
 	}
 
 	ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
@@ -1611,90 +1563,7 @@
 	ctx->pm4[ctx->pm4_cdwords++] = value;                   /* DATA_LO */
 	ctx->pm4[ctx->pm4_cdwords++] = 0;                       /* DATA_HI */
 	ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
-	ctx->pm4[ctx->pm4_cdwords++] = 0;
-	r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], fence_bo);
-}
-
-void r600_context_dump_bof(struct r600_context *ctx, const char *file)
-{
-	bof_t *bcs, *blob, *array, *bo, *size, *handle, *device_id, *root;
-	unsigned i;
-
-	root = device_id = bcs = blob = array = bo = size = handle = NULL;
-	root = bof_object();
-	if (root == NULL)
-		goto out_err;
-	device_id = bof_int32(ctx->radeon->device);
-	if (device_id == NULL)
-		goto out_err;
-	if (bof_object_set(root, "device_id", device_id))
-		goto out_err;
-	bof_decref(device_id);
-	device_id = NULL;
-	/* dump relocs */
-	blob = bof_blob(ctx->creloc * 16, ctx->reloc);
-	if (blob == NULL)
-		goto out_err;
-	if (bof_object_set(root, "reloc", blob))
-		goto out_err;
-	bof_decref(blob);
-	blob = NULL;
-	/* dump cs */
-	blob = bof_blob(ctx->pm4_cdwords * 4, ctx->pm4);
-	if (blob == NULL)
-		goto out_err;
-	if (bof_object_set(root, "pm4", blob))
-		goto out_err;
-	bof_decref(blob);
-	blob = NULL;
-	/* dump bo */
-	array = bof_array();
-	if (array == NULL)
-		goto out_err;
-	for (i = 0; i < ctx->creloc; i++) {
-		struct radeon_bo *rbo = ctx->bo[i];
-		bo = bof_object();
-		if (bo == NULL)
-			goto out_err;
-		size = bof_int32(rbo->size);
-		if (size == NULL)
-			goto out_err;
-		if (bof_object_set(bo, "size", size))
-			goto out_err;
-		bof_decref(size);
-		size = NULL;
-		handle = bof_int32(rbo->handle);
-		if (handle == NULL)
-			goto out_err;
-		if (bof_object_set(bo, "handle", handle))
-			goto out_err;
-		bof_decref(handle);
-		handle = NULL;
-		radeon_bo_map(ctx->radeon, rbo);
-		blob = bof_blob(rbo->size, rbo->data);
-		radeon_bo_unmap(ctx->radeon, rbo);
-		if (blob == NULL)
-			goto out_err;
-		if (bof_object_set(bo, "data", blob))
-			goto out_err;
-		bof_decref(blob);
-		blob = NULL;
-		if (bof_array_append(array, bo))
-			goto out_err;
-		bof_decref(bo);
-		bo = NULL;
-	}
-	if (bof_object_set(root, "bo", array))
-		goto out_err;
-	bof_dump_file(root, file);
-out_err:
-	bof_decref(blob);
-	bof_decref(array);
-	bof_decref(bo);
-	bof_decref(size);
-	bof_decref(handle);
-	bof_decref(device_id);
-	bof_decref(root);
+	ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, fence_bo, RADEON_USAGE_WRITE);
 }
 
 static boolean r600_query_result(struct r600_context *ctx, struct r600_query *query, boolean wait)
@@ -1704,9 +1573,9 @@
 	u32 *results, *current_result;
 
 	if (wait)
-		results = r600_bo_map(ctx->radeon, query->buffer, PIPE_TRANSFER_READ, NULL);
+		results = r600_bo_map(ctx->radeon, query->buffer, ctx->cs, PIPE_TRANSFER_READ);
 	else
-		results = r600_bo_map(ctx->radeon, query->buffer, PIPE_TRANSFER_DONTBLOCK | PIPE_TRANSFER_READ, NULL);
+		results = r600_bo_map(ctx->radeon, query->buffer, ctx->cs, PIPE_TRANSFER_DONTBLOCK | PIPE_TRANSFER_READ);
 	if (!results)
 		return FALSE;
 
@@ -1735,7 +1604,6 @@
 void r600_query_begin(struct r600_context *ctx, struct r600_query *query)
 {
 	unsigned required_space, new_results_end;
-	int num_backends = r600_get_num_backends(ctx->radeon);
 
 	/* query request needs 6/8 dwords for begin + 6/8 dwords for end */
 	if (query->type == PIPE_QUERY_TIME_ELAPSED)
@@ -1745,7 +1613,7 @@
 
 	if ((required_space + ctx->pm4_cdwords) > ctx->pm4_ndwords) {
 		/* need to flush */
-		r600_context_flush(ctx);
+		r600_context_flush(ctx, RADEON_FLUSH_ASYNC);
 	}
 
 	if (query->type == PIPE_QUERY_OCCLUSION_COUNTER) {
@@ -1756,7 +1624,7 @@
 			query->queries_emitted = 1;
 		} else {
 			if (++query->queries_emitted > query->buffer_size / query->result_size / 2)
-				r600_context_flush(ctx);
+				r600_context_flush(ctx, RADEON_FLUSH_ASYNC);
 		}
 	}
 
@@ -1767,7 +1635,7 @@
 	/* collect current results if query buffer is full */
 	if (new_results_end == query->results_start) {
 		if (!(query->state & R600_QUERY_STATE_FLUSHED))
-			r600_context_flush(ctx);
+			r600_context_flush(ctx, 0);
 		r600_query_result(ctx, query, TRUE);
 	}
 
@@ -1775,15 +1643,17 @@
 		u32 *results;
 		int i;
 
-		results = r600_bo_map(ctx->radeon, query->buffer, PIPE_TRANSFER_WRITE, NULL);
+		results = r600_bo_map(ctx->radeon, query->buffer, ctx->cs, PIPE_TRANSFER_WRITE);
 		if (results) {
 			results = (u32*)((char*)results + query->results_end);
 			memset(results, 0, query->result_size);
 
 			/* Set top bits for unused backends */
-			for (i = num_backends; i < ctx->max_db; i++) {
-				results[(i * 4)+1] = 0x80000000;
-				results[(i * 4)+3] = 0x80000000;
+			for (i = 0; i < ctx->max_db; i++) {
+				if (!(ctx->backend_mask & (1<<i))) {
+					results[(i * 4)+1] = 0x80000000;
+					results[(i * 4)+3] = 0x80000000;
+				}
 			}
 			r600_bo_unmap(ctx->radeon, query->buffer);
 		}
@@ -1793,19 +1663,18 @@
 	if (query->type == PIPE_QUERY_TIME_ELAPSED) {
 		ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0);
 		ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5);
-		ctx->pm4[ctx->pm4_cdwords++] = query->results_end + r600_bo_offset(query->buffer);
+		ctx->pm4[ctx->pm4_cdwords++] = query->results_end;
 		ctx->pm4[ctx->pm4_cdwords++] = (3 << 29);
 		ctx->pm4[ctx->pm4_cdwords++] = 0;
 		ctx->pm4[ctx->pm4_cdwords++] = 0;
 	} else {
 		ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
 		ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1);
-		ctx->pm4[ctx->pm4_cdwords++] = query->results_end + r600_bo_offset(query->buffer);
+		ctx->pm4[ctx->pm4_cdwords++] = query->results_end;
 		ctx->pm4[ctx->pm4_cdwords++] = 0;
 	}
 	ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
-	ctx->pm4[ctx->pm4_cdwords++] = 0;
-	r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], query->buffer);
+	ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, query->buffer, RADEON_USAGE_WRITE);
 
 	query->state |= R600_QUERY_STATE_STARTED;
 	query->state ^= R600_QUERY_STATE_ENDED;
@@ -1818,19 +1687,18 @@
 	if (query->type == PIPE_QUERY_TIME_ELAPSED) {
 		ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0);
 		ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5);
-		ctx->pm4[ctx->pm4_cdwords++] = query->results_end + 8 + r600_bo_offset(query->buffer);
+		ctx->pm4[ctx->pm4_cdwords++] = query->results_end + 8;
 		ctx->pm4[ctx->pm4_cdwords++] = (3 << 29);
 		ctx->pm4[ctx->pm4_cdwords++] = 0;
 		ctx->pm4[ctx->pm4_cdwords++] = 0;
 	} else {
 		ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
 		ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1);
-		ctx->pm4[ctx->pm4_cdwords++] = query->results_end + 8 + r600_bo_offset(query->buffer);
+		ctx->pm4[ctx->pm4_cdwords++] = query->results_end + 8;
 		ctx->pm4[ctx->pm4_cdwords++] = 0;
 	}
 	ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
-	ctx->pm4[ctx->pm4_cdwords++] = 0;
-	r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], query->buffer);
+	ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, query->buffer, RADEON_USAGE_WRITE);
 
 	query->results_end += query->result_size;
 	if (query->results_end >= query->buffer_size)
@@ -1848,7 +1716,7 @@
 {
 	if (operation == PREDICATION_OP_CLEAR) {
 		if (ctx->pm4_cdwords + 3 > ctx->pm4_ndwords)
-			r600_context_flush(ctx);
+			r600_context_flush(ctx, RADEON_FLUSH_ASYNC);
 
 		ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SET_PREDICATION, 1, 0);
 		ctx->pm4[ctx->pm4_cdwords++] = 0;
@@ -1864,7 +1732,7 @@
 		count /= query->result_size;
 
 		if (ctx->pm4_cdwords + 5 * count > ctx->pm4_ndwords)
-			r600_context_flush(ctx);
+			r600_context_flush(ctx, RADEON_FLUSH_ASYNC);
 
 		op = PRED_OP(operation) | PREDICATION_DRAW_VISIBLE |
 				(flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW);
@@ -1872,11 +1740,11 @@
 		/* emit predicate packets for all data blocks */
 		while (results_base != query->results_end) {
 			ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SET_PREDICATION, 1, 0);
-			ctx->pm4[ctx->pm4_cdwords++] = results_base + r600_bo_offset(query->buffer);
+			ctx->pm4[ctx->pm4_cdwords++] = results_base;
 			ctx->pm4[ctx->pm4_cdwords++] = op;
 			ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
-			ctx->pm4[ctx->pm4_cdwords++] = 0;
-			r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], query->buffer);
+			ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, query->buffer,
+									     RADEON_USAGE_READ);
 			results_base += query->result_size;
 			if (results_base >= query->buffer_size)
 				results_base = 0;
@@ -1926,7 +1794,7 @@
 
 void r600_context_query_destroy(struct r600_context *ctx, struct r600_query *query)
 {
-	r600_bo_reference(ctx->radeon, &query->buffer, NULL);
+	r600_bo_reference(&query->buffer, NULL);
 	LIST_DELINIT(&query->list);
 	free(query);
 }
@@ -1938,7 +1806,7 @@
 	uint64_t *result = (uint64_t*)vresult;
 
 	if (!(query->state & R600_QUERY_STATE_FLUSHED)) {
-		r600_context_flush(ctx);
+		r600_context_flush(ctx, 0);
 	}
 	if (!r600_query_result(ctx, query, wait))
 		return FALSE;
diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h
index 69f7251..1e90189 100644
--- a/src/gallium/winsys/r600/drm/r600_priv.h
+++ b/src/gallium/winsys/r600/drm/r600_priv.h
@@ -26,40 +26,20 @@
 #ifndef R600_PRIV_H
 #define R600_PRIV_H
 
-#include <errno.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include <assert.h>
-#include <util/u_double_list.h>
-#include <util/u_inlines.h>
-#include "util/u_hash_table.h"
-#include <os/os_thread.h>
 #include "r600.h"
+#include "../../radeon/drm/radeon_winsys.h"
+#include "util/u_hash_table.h"
+#include "os/os_thread.h"
 
 #define PKT_COUNT_C                     0xC000FFFF
 #define PKT_COUNT_S(x)                  (((x) & 0x3FFF) << 16)
 
-struct r600_bomgr;
-struct r600_bo;
-
 struct radeon {
-	int				fd;
-	int				refcount;
-	unsigned			device;
+	struct radeon_winsys		*ws;
+	struct radeon_info		info;
 	unsigned			family;
 	enum chip_class			chip_class;
 	struct r600_tiling_info		tiling_info;
-	struct r600_bomgr		*bomgr;
-	unsigned			fence;
-	unsigned			*cfence;
-	struct r600_bo			*fence_bo;
-	unsigned			clock_crystal_freq;
-	unsigned			num_backends;
-	unsigned                        minor_version;
-
-        /* List of buffer handles and its mutex. */
-	struct util_hash_table          *bo_handles;
-	pipe_mutex bo_handles_mutex;
 };
 
 /* these flags are used in register flags and added into block flags */
@@ -79,85 +59,25 @@
 };
 
 #define BO_BOUND_TEXTURE 1
-struct radeon_bo {
-	struct pipe_reference		reference;
-	unsigned			handle;
-	unsigned			size;
-	unsigned			alignment;
-	int				map_count;
-	void				*data;
-	struct list_head		fencedlist;
-	unsigned			fence;
-	struct r600_context		*ctx;
-	boolean				shared;
-	struct r600_reloc		*reloc;
-	unsigned			reloc_id;
-	unsigned			last_flush;
-	unsigned                        name;
-	unsigned                        binding;
-};
 
 struct r600_bo {
 	struct pipe_reference		reference; /* this must be the first member for the r600_bo_reference inline to work */
 	/* DO NOT MOVE THIS ^ */
-	unsigned			size;
-	unsigned			tiling_flags;
-	unsigned			kernel_pitch;
+	struct pb_buffer		*buf;
+	struct radeon_winsys_cs_handle	*cs_buf;
 	unsigned			domains;
-	struct radeon_bo		*bo;
-	unsigned			fence;
-	/* manager data */
-	struct list_head		list;
-	unsigned			manager_id;
-	unsigned			alignment;
-	unsigned			offset;
-	int64_t				start;
-	int64_t				end;
+	unsigned			last_flush;
+	unsigned                        binding;
 };
 
-struct r600_bomgr {
-	struct radeon			*radeon;
-	unsigned			usecs;
-	pipe_mutex			mutex;
-	struct list_head		delayed;
-	unsigned			num_delayed;
-};
-
-/*
- * r600_drm.c
- */
-struct radeon *r600_new(int fd, unsigned device);
-void r600_delete(struct radeon *r600);
-
 /*
  * radeon_pciid.c
  */
 unsigned radeon_family_from_device(unsigned device);
 
 /*
- * radeon_bo.c
- */
-struct radeon_bo *radeon_bo(struct radeon *radeon, unsigned handle,
-			    unsigned size, unsigned alignment, unsigned initial_domain);
-void radeon_bo_reference(struct radeon *radeon, struct radeon_bo **dst,
-			 struct radeon_bo *src);
-int radeon_bo_wait(struct radeon *radeon, struct radeon_bo *bo);
-int radeon_bo_busy(struct radeon *radeon, struct radeon_bo *bo, uint32_t *domain);
-int radeon_bo_fencelist(struct radeon *radeon, struct radeon_bo **bolist, uint32_t num_bo);
-int radeon_bo_get_tiling_flags(struct radeon *radeon,
-			       struct radeon_bo *bo,
-			       uint32_t *tiling_flags,
-			       uint32_t *pitch);
-int radeon_bo_get_name(struct radeon *radeon,
-		       struct radeon_bo *bo,
-		       uint32_t *name);
-int radeon_bo_fixed_map(struct radeon *radeon, struct radeon_bo *bo);
-
-/*
  * r600_hw_context.c
  */
-int r600_context_init_fence(struct r600_context *ctx);
-void r600_context_get_reloc(struct r600_context *ctx, struct r600_bo *rbo);
 void r600_context_bo_flush(struct r600_context *ctx, unsigned flush_flags,
 				unsigned flush_mask, struct r600_bo *rbo);
 struct r600_bo *r600_context_reg_bo(struct r600_context *ctx, unsigned offset);
@@ -175,70 +95,23 @@
 void r600_init_cs(struct r600_context *ctx);
 int r600_resource_init(struct r600_context *ctx, struct r600_range *range, unsigned offset, unsigned nblocks, unsigned stride, struct r600_reg *reg, int nreg, unsigned offset_base);
 
-static INLINE void r600_context_bo_reloc(struct r600_context *ctx, u32 *pm4, struct r600_bo *rbo)
+static INLINE unsigned r600_context_bo_reloc(struct r600_context *ctx, struct r600_bo *rbo,
+					     enum radeon_bo_usage usage)
 {
-	struct radeon_bo *bo = rbo->bo;
+	enum radeon_bo_domain rd = usage & RADEON_USAGE_READ ? rbo->domains : 0;
+	enum radeon_bo_domain wd = usage & RADEON_USAGE_WRITE ? rbo->domains : 0;
 
-	assert(bo != NULL);
+	assert(usage);
 
-	if (!bo->reloc)
-		r600_context_get_reloc(ctx, rbo);
+	unsigned reloc_index =
+		ctx->radeon->ws->cs_add_reloc(ctx->cs, rbo->cs_buf,
+					      rd, wd);
 
-	/* set PKT3 to point to proper reloc */
-	*pm4 = bo->reloc_id;
-}
+	if (reloc_index >= ctx->creloc)
+		ctx->creloc = reloc_index+1;
 
-/*
- * r600_bo.c
- */
-void r600_bo_destroy(struct radeon *radeon, struct r600_bo *bo);
-
-/*
- * r600_bomgr.c
- */
-struct r600_bomgr *r600_bomgr_create(struct radeon *radeon, unsigned usecs);
-void r600_bomgr_destroy(struct r600_bomgr *mgr);
-boolean r600_bomgr_bo_destroy(struct r600_bomgr *mgr, struct r600_bo *bo);
-void r600_bomgr_bo_init(struct r600_bomgr *mgr, struct r600_bo *bo);
-struct r600_bo *r600_bomgr_bo_create(struct r600_bomgr *mgr,
-					unsigned size,
-					unsigned alignment,
-					unsigned cfence);
-
-
-/*
- * helpers
- */
-
-
-/*
- * radeon_bo.c
- */
-static inline int radeon_bo_map(struct radeon *radeon, struct radeon_bo *bo)
-{
-	if (bo->map_count == 0 && !bo->data)
-		return radeon_bo_fixed_map(radeon, bo);
-	bo->map_count++;
-	return 0;
-}
-
-static inline void radeon_bo_unmap(struct radeon *radeon, struct radeon_bo *bo)
-{
-	bo->map_count--;
-	assert(bo->map_count >= 0);
-}
-
-/*
- * fence
- */
-static inline boolean fence_is_after(unsigned fence, unsigned ofence)
-{
-	/* handle wrap around */
-	if (fence < 0x80000000 && ofence > 0x80000000)
-		return TRUE;
-	if (fence > ofence)
-		return TRUE;
-	return FALSE;
+	r600_bo_reference(&ctx->bo[reloc_index], rbo);
+	return reloc_index * 4;
 }
 
 #endif
diff --git a/src/gallium/winsys/r600/drm/r600d.h b/src/gallium/winsys/r600/drm/r600d.h
deleted file mode 100644
index 4a19dcf..0000000
--- a/src/gallium/winsys/r600/drm/r600d.h
+++ /dev/null
@@ -1,2241 +0,0 @@
-/*
- * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- *      Jerome Glisse
- */
-#ifndef R600D_H
-#define R600D_H
-
-/* evergreen values */
-#define EG_RESOURCE_OFFSET                 0x00030000
-#define EG_RESOURCE_END                    0x00034000
-#define EG_LOOP_CONST_OFFSET               0x0003A200
-#define EG_LOOP_CONST_END                  0x0003A26C
-#define EG_BOOL_CONST_OFFSET               0x0003A500
-#define EG_BOOL_CONST_END                  0x0003A506
-
-
-#define R600_CONFIG_REG_OFFSET                 0X00008000
-#define R600_CONFIG_REG_END                    0X0000AC00
-#define R600_CONTEXT_REG_OFFSET                0X00028000
-#define R600_CONTEXT_REG_END                   0X00029000
-#define R600_ALU_CONST_OFFSET                  0X00030000
-#define R600_ALU_CONST_END                     0X00032000
-#define R600_RESOURCE_OFFSET                   0X00038000
-#define R600_RESOURCE_END                      0X0003C000
-#define R600_SAMPLER_OFFSET                    0X0003C000
-#define R600_SAMPLER_END                       0X0003CFF0
-#define R600_CTL_CONST_OFFSET                  0X0003CFF0
-#define R600_CTL_CONST_END                     0X0003E200
-#define R600_LOOP_CONST_OFFSET                 0X0003E200
-#define R600_LOOP_CONST_END                    0X0003E380
-#define R600_BOOL_CONST_OFFSET                 0X0003E380
-#define R600_BOOL_CONST_END                    0X00040000
-
-#define PKT3_NOP                               0x10
-#define PKT3_INDIRECT_BUFFER_END               0x17
-#define PKT3_SET_PREDICATION                   0x20
-#define PKT3_REG_RMW                           0x21
-#define PKT3_COND_EXEC                         0x22
-#define PKT3_PRED_EXEC                         0x23
-#define PKT3_START_3D_CMDBUF                   0x24
-#define PKT3_DRAW_INDEX_2                      0x27
-#define PKT3_CONTEXT_CONTROL                   0x28
-#define PKT3_DRAW_INDEX_IMMD_BE                0x29
-#define PKT3_INDEX_TYPE                        0x2A
-#define PKT3_DRAW_INDEX                        0x2B
-#define PKT3_DRAW_INDEX_AUTO                   0x2D
-#define PKT3_DRAW_INDEX_IMMD                   0x2E
-#define PKT3_NUM_INSTANCES                     0x2F
-#define PKT3_STRMOUT_BUFFER_UPDATE             0x34
-#define PKT3_INDIRECT_BUFFER_MP                0x38
-#define PKT3_MEM_SEMAPHORE                     0x39
-#define PKT3_MPEG_INDEX                        0x3A
-#define PKT3_WAIT_REG_MEM                      0x3C
-#define PKT3_MEM_WRITE                         0x3D
-#define PKT3_INDIRECT_BUFFER                   0x32
-#define PKT3_CP_INTERRUPT                      0x40
-#define PKT3_SURFACE_SYNC                      0x43
-#define PKT3_ME_INITIALIZE                     0x44
-#define PKT3_COND_WRITE                        0x45
-#define PKT3_EVENT_WRITE                       0x46
-#define PKT3_EVENT_WRITE_EOP                   0x47
-#define PKT3_ONE_REG_WRITE                     0x57
-#define PKT3_SET_CONFIG_REG                    0x68
-#define PKT3_SET_CONTEXT_REG                   0x69
-#define PKT3_SET_ALU_CONST                     0x6A
-#define PKT3_SET_BOOL_CONST                    0x6B
-#define PKT3_SET_LOOP_CONST                    0x6C
-#define PKT3_SET_RESOURCE                      0x6D
-#define PKT3_SET_SAMPLER                       0x6E
-#define PKT3_SET_CTL_CONST                     0x6F
-#define PKT3_SURFACE_BASE_UPDATE               0x73
-#define		SURFACE_BASE_UPDATE_DEPTH      (1 << 0)
-#define		SURFACE_BASE_UPDATE_COLOR(x)   (2 << (x))
-#define		SURFACE_BASE_UPDATE_STRMOUT(x) (0x200 << (x))
-
-#define EVENT_TYPE_PS_PARTIAL_FLUSH            0x10
-#define EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT 0x14
-#define EVENT_TYPE_ZPASS_DONE                  0x15
-#define EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT   0x16
-#define		EVENT_TYPE(x)                           ((x) << 0)
-#define		EVENT_INDEX(x)                          ((x) << 8)
-                /* 0 - any non-TS event
-		 * 1 - ZPASS_DONE
-		 * 2 - SAMPLE_PIPELINESTAT
-		 * 3 - SAMPLE_STREAMOUTSTAT*
-		 * 4 - *S_PARTIAL_FLUSH
-		 * 5 - TS events
-		 */
-
-#define PREDICATION_OP_CLEAR 0x0
-#define PREDICATION_OP_ZPASS 0x1
-#define PREDICATION_OP_PRIMCOUNT 0x2
-
-#define PRED_OP(x) ((x) << 16)
-
-#define PREDICATION_CONTINUE (1 << 31)
-
-#define PREDICATION_HINT_WAIT (0 << 12)
-#define PREDICATION_HINT_NOWAIT_DRAW (1 << 12)
-
-#define PREDICATION_DRAW_NOT_VISIBLE (0 << 8)
-#define PREDICATION_DRAW_VISIBLE (1 << 8)
-
-#define PKT_TYPE_S(x)                   (((x) & 0x3) << 30)
-#define PKT_TYPE_G(x)                   (((x) >> 30) & 0x3)
-#define PKT_TYPE_C                      0x3FFFFFFF
-#define PKT_COUNT_S(x)                  (((x) & 0x3FFF) << 16)
-#define PKT_COUNT_G(x)                  (((x) >> 16) & 0x3FFF)
-#define PKT_COUNT_C                     0xC000FFFF
-#define PKT0_BASE_INDEX_S(x)            (((x) & 0xFFFF) << 0)
-#define PKT0_BASE_INDEX_G(x)            (((x) >> 0) & 0xFFFF)
-#define PKT0_BASE_INDEX_C               0xFFFF0000
-#define PKT3_IT_OPCODE_S(x)             (((x) & 0xFF) << 8)
-#define PKT3_IT_OPCODE_G(x)             (((x) >> 8) & 0xFF)
-#define PKT3_IT_OPCODE_C                0xFFFF00FF
-#define PKT3_PRED_S(x)               (((x) >> 0) & 0x1)
-#define PKT0(index, count) (PKT_TYPE_S(0) | PKT0_BASE_INDEX_S(index) | PKT_COUNT_S(count))
-#define PKT3(op, count, predicate) (PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op) | PKT_COUNT_S(count) | PKT3_PRED_S(predicate))
-
-/* Registers */
-#define R_0280A0_CB_COLOR0_INFO                      0x0280A0
-#define   S_0280A0_ENDIAN(x)                           (((x) & 0x3) << 0)
-#define   G_0280A0_ENDIAN(x)                           (((x) >> 0) & 0x3)
-#define   C_0280A0_ENDIAN                              0xFFFFFFFC
-#define   S_0280A0_FORMAT(x)                           (((x) & 0x3F) << 2)
-#define   G_0280A0_FORMAT(x)                           (((x) >> 2) & 0x3F)
-#define   C_0280A0_FORMAT                              0xFFFFFF03
-#define     V_0280A0_COLOR_INVALID                     0x00000000
-#define     V_0280A0_COLOR_8                           0x00000001
-#define     V_0280A0_COLOR_4_4                         0x00000002
-#define     V_0280A0_COLOR_3_3_2                       0x00000003
-#define     V_0280A0_COLOR_16                          0x00000005
-#define     V_0280A0_COLOR_16_FLOAT                    0x00000006
-#define     V_0280A0_COLOR_8_8                         0x00000007
-#define     V_0280A0_COLOR_5_6_5                       0x00000008
-#define     V_0280A0_COLOR_6_5_5                       0x00000009
-#define     V_0280A0_COLOR_1_5_5_5                     0x0000000A
-#define     V_0280A0_COLOR_4_4_4_4                     0x0000000B
-#define     V_0280A0_COLOR_5_5_5_1                     0x0000000C
-#define     V_0280A0_COLOR_32                          0x0000000D
-#define     V_0280A0_COLOR_32_FLOAT                    0x0000000E
-#define     V_0280A0_COLOR_16_16                       0x0000000F
-#define     V_0280A0_COLOR_16_16_FLOAT                 0x00000010
-#define     V_0280A0_COLOR_8_24                        0x00000011
-#define     V_0280A0_COLOR_8_24_FLOAT                  0x00000012
-#define     V_0280A0_COLOR_24_8                        0x00000013
-#define     V_0280A0_COLOR_24_8_FLOAT                  0x00000014
-#define     V_0280A0_COLOR_10_11_11                    0x00000015
-#define     V_0280A0_COLOR_10_11_11_FLOAT              0x00000016
-#define     V_0280A0_COLOR_11_11_10                    0x00000017
-#define     V_0280A0_COLOR_11_11_10_FLOAT              0x00000018
-#define     V_0280A0_COLOR_2_10_10_10                  0x00000019
-#define     V_0280A0_COLOR_8_8_8_8                     0x0000001A
-#define     V_0280A0_COLOR_10_10_10_2                  0x0000001B
-#define     V_0280A0_COLOR_X24_8_32_FLOAT              0x0000001C
-#define     V_0280A0_COLOR_32_32                       0x0000001D
-#define     V_0280A0_COLOR_32_32_FLOAT                 0x0000001E
-#define     V_0280A0_COLOR_16_16_16_16                 0x0000001F
-#define     V_0280A0_COLOR_16_16_16_16_FLOAT           0x00000020
-#define     V_0280A0_COLOR_32_32_32_32                 0x00000022
-#define     V_0280A0_COLOR_32_32_32_32_FLOAT           0x00000023
-#define   S_0280A0_ARRAY_MODE(x)                       (((x) & 0xF) << 8)
-#define   G_0280A0_ARRAY_MODE(x)                       (((x) >> 8) & 0xF)
-#define   C_0280A0_ARRAY_MODE                          0xFFFFF0FF
-#define     V_0280A0_ARRAY_LINEAR_GENERAL              0x00000000
-#define     V_0280A0_ARRAY_LINEAR_ALIGNED              0x00000001
-#define     V_0280A0_ARRAY_1D_TILED_THIN1              0x00000002
-#define     V_0280A0_ARRAY_2D_TILED_THIN1              0x00000004
-#define   S_0280A0_NUMBER_TYPE(x)                      (((x) & 0x7) << 12)
-#define   G_0280A0_NUMBER_TYPE(x)                      (((x) >> 12) & 0x7)
-#define   C_0280A0_NUMBER_TYPE                         0xFFFF8FFF
-#define   S_0280A0_READ_SIZE(x)                        (((x) & 0x1) << 15)
-#define   G_0280A0_READ_SIZE(x)                        (((x) >> 15) & 0x1)
-#define   C_0280A0_READ_SIZE                           0xFFFF7FFF
-#define   S_0280A0_COMP_SWAP(x)                        (((x) & 0x3) << 16)
-#define   G_0280A0_COMP_SWAP(x)                        (((x) >> 16) & 0x3)
-#define   C_0280A0_COMP_SWAP                           0xFFFCFFFF
-#define   S_0280A0_TILE_MODE(x)                        (((x) & 0x3) << 18)
-#define   G_0280A0_TILE_MODE(x)                        (((x) >> 18) & 0x3)
-#define   C_0280A0_TILE_MODE                           0xFFF3FFFF
-#define   S_0280A0_BLEND_CLAMP(x)                      (((x) & 0x1) << 20)
-#define   G_0280A0_BLEND_CLAMP(x)                      (((x) >> 20) & 0x1)
-#define   C_0280A0_BLEND_CLAMP                         0xFFEFFFFF
-#define   S_0280A0_CLEAR_COLOR(x)                      (((x) & 0x1) << 21)
-#define   G_0280A0_CLEAR_COLOR(x)                      (((x) >> 21) & 0x1)
-#define   C_0280A0_CLEAR_COLOR                         0xFFDFFFFF
-#define   S_0280A0_BLEND_BYPASS(x)                     (((x) & 0x1) << 22)
-#define   G_0280A0_BLEND_BYPASS(x)                     (((x) >> 22) & 0x1)
-#define   C_0280A0_BLEND_BYPASS                        0xFFBFFFFF
-#define   S_0280A0_BLEND_FLOAT32(x)                    (((x) & 0x1) << 23)
-#define   G_0280A0_BLEND_FLOAT32(x)                    (((x) >> 23) & 0x1)
-#define   C_0280A0_BLEND_FLOAT32                       0xFF7FFFFF
-#define   S_0280A0_SIMPLE_FLOAT(x)                     (((x) & 0x1) << 24)
-#define   G_0280A0_SIMPLE_FLOAT(x)                     (((x) >> 24) & 0x1)
-#define   C_0280A0_SIMPLE_FLOAT                        0xFEFFFFFF
-#define   S_0280A0_ROUND_MODE(x)                       (((x) & 0x1) << 25)
-#define   G_0280A0_ROUND_MODE(x)                       (((x) >> 25) & 0x1)
-#define   C_0280A0_ROUND_MODE                          0xFDFFFFFF
-#define   S_0280A0_TILE_COMPACT(x)                     (((x) & 0x1) << 26)
-#define   G_0280A0_TILE_COMPACT(x)                     (((x) >> 26) & 0x1)
-#define   C_0280A0_TILE_COMPACT                        0xFBFFFFFF
-#define   S_0280A0_SOURCE_FORMAT(x)                    (((x) & 0x1) << 27)
-#define   G_0280A0_SOURCE_FORMAT(x)                    (((x) >> 27) & 0x1)
-#define   C_0280A0_SOURCE_FORMAT                       0xF7FFFFFF
-#define R_028060_CB_COLOR0_SIZE                      0x028060
-#define   S_028060_PITCH_TILE_MAX(x)                   (((x) & 0x3FF) << 0)
-#define   G_028060_PITCH_TILE_MAX(x)                   (((x) >> 0) & 0x3FF)
-#define   C_028060_PITCH_TILE_MAX                      0xFFFFFC00
-#define   S_028060_SLICE_TILE_MAX(x)                   (((x) & 0xFFFFF) << 10)
-#define   G_028060_SLICE_TILE_MAX(x)                   (((x) >> 10) & 0xFFFFF)
-#define   C_028060_SLICE_TILE_MAX                      0xC00003FF
-#define R_028800_DB_DEPTH_CONTROL                    0x028800
-#define   S_028800_STENCIL_ENABLE(x)                   (((x) & 0x1) << 0)
-#define   G_028800_STENCIL_ENABLE(x)                   (((x) >> 0) & 0x1)
-#define   C_028800_STENCIL_ENABLE                      0xFFFFFFFE
-#define   S_028800_Z_ENABLE(x)                         (((x) & 0x1) << 1)
-#define   G_028800_Z_ENABLE(x)                         (((x) >> 1) & 0x1)
-#define   C_028800_Z_ENABLE                            0xFFFFFFFD
-#define   S_028800_Z_WRITE_ENABLE(x)                   (((x) & 0x1) << 2)
-#define   G_028800_Z_WRITE_ENABLE(x)                   (((x) >> 2) & 0x1)
-#define   C_028800_Z_WRITE_ENABLE                      0xFFFFFFFB
-#define   S_028800_ZFUNC(x)                            (((x) & 0x7) << 4)
-#define   G_028800_ZFUNC(x)                            (((x) >> 4) & 0x7)
-#define   C_028800_ZFUNC                               0xFFFFFF8F
-#define   S_028800_BACKFACE_ENABLE(x)                  (((x) & 0x1) << 7)
-#define   G_028800_BACKFACE_ENABLE(x)                  (((x) >> 7) & 0x1)
-#define   C_028800_BACKFACE_ENABLE                     0xFFFFFF7F
-#define   S_028800_STENCILFUNC(x)                      (((x) & 0x7) << 8)
-#define   G_028800_STENCILFUNC(x)                      (((x) >> 8) & 0x7)
-#define   C_028800_STENCILFUNC                         0xFFFFF8FF
-#define   S_028800_STENCILFAIL(x)                      (((x) & 0x7) << 11)
-#define   G_028800_STENCILFAIL(x)                      (((x) >> 11) & 0x7)
-#define   C_028800_STENCILFAIL                         0xFFFFC7FF
-#define   S_028800_STENCILZPASS(x)                     (((x) & 0x7) << 14)
-#define   G_028800_STENCILZPASS(x)                     (((x) >> 14) & 0x7)
-#define   C_028800_STENCILZPASS                        0xFFFE3FFF
-#define   S_028800_STENCILZFAIL(x)                     (((x) & 0x7) << 17)
-#define   G_028800_STENCILZFAIL(x)                     (((x) >> 17) & 0x7)
-#define   C_028800_STENCILZFAIL                        0xFFF1FFFF
-#define   S_028800_STENCILFUNC_BF(x)                   (((x) & 0x7) << 20)
-#define   G_028800_STENCILFUNC_BF(x)                   (((x) >> 20) & 0x7)
-#define   C_028800_STENCILFUNC_BF                      0xFF8FFFFF
-#define   S_028800_STENCILFAIL_BF(x)                   (((x) & 0x7) << 23)
-#define   G_028800_STENCILFAIL_BF(x)                   (((x) >> 23) & 0x7)
-#define   C_028800_STENCILFAIL_BF                      0xFC7FFFFF
-#define   S_028800_STENCILZPASS_BF(x)                  (((x) & 0x7) << 26)
-#define   G_028800_STENCILZPASS_BF(x)                  (((x) >> 26) & 0x7)
-#define   C_028800_STENCILZPASS_BF                     0xE3FFFFFF
-#define   S_028800_STENCILZFAIL_BF(x)                  (((x) & 0x7) << 29)
-#define   G_028800_STENCILZFAIL_BF(x)                  (((x) >> 29) & 0x7)
-#define   C_028800_STENCILZFAIL_BF                     0x1FFFFFFF
-#define R_028010_DB_DEPTH_INFO                       0x028010
-#define   S_028010_FORMAT(x)                           (((x) & 0x7) << 0)
-#define   G_028010_FORMAT(x)                           (((x) >> 0) & 0x7)
-#define   C_028010_FORMAT                              0xFFFFFFF8
-#define     V_028010_DEPTH_INVALID                     0x00000000
-#define     V_028010_DEPTH_16                          0x00000001
-#define     V_028010_DEPTH_X8_24                       0x00000002
-#define     V_028010_DEPTH_8_24                        0x00000003
-#define     V_028010_DEPTH_X8_24_FLOAT                 0x00000004
-#define     V_028010_DEPTH_8_24_FLOAT                  0x00000005
-#define     V_028010_DEPTH_32_FLOAT                    0x00000006
-#define     V_028010_DEPTH_X24_8_32_FLOAT              0x00000007
-#define   S_028010_READ_SIZE(x)                        (((x) & 0x1) << 3)
-#define   G_028010_READ_SIZE(x)                        (((x) >> 3) & 0x1)
-#define   C_028010_READ_SIZE                           0xFFFFFFF7
-#define   S_028010_ARRAY_MODE(x)                       (((x) & 0xF) << 15)
-#define   G_028010_ARRAY_MODE(x)                       (((x) >> 15) & 0xF)
-#define   C_028010_ARRAY_MODE                          0xFFF87FFF
-#define   S_028010_TILE_SURFACE_ENABLE(x)              (((x) & 0x1) << 25)
-#define   G_028010_TILE_SURFACE_ENABLE(x)              (((x) >> 25) & 0x1)
-#define   C_028010_TILE_SURFACE_ENABLE                 0xFDFFFFFF
-#define   S_028010_TILE_COMPACT(x)                     (((x) & 0x1) << 26)
-#define   G_028010_TILE_COMPACT(x)                     (((x) >> 26) & 0x1)
-#define   C_028010_TILE_COMPACT                        0xFBFFFFFF
-#define   S_028010_ZRANGE_PRECISION(x)                 (((x) & 0x1) << 31)
-#define   G_028010_ZRANGE_PRECISION(x)                 (((x) >> 31) & 0x1)
-#define   C_028010_ZRANGE_PRECISION                    0x7FFFFFFF
-#define R_028000_DB_DEPTH_SIZE                       0x028000
-#define   S_028000_PITCH_TILE_MAX(x)                   (((x) & 0x3FF) << 0)
-#define   G_028000_PITCH_TILE_MAX(x)                   (((x) >> 0) & 0x3FF)
-#define   C_028000_PITCH_TILE_MAX                      0xFFFFFC00
-#define   S_028000_SLICE_TILE_MAX(x)                   (((x) & 0xFFFFF) << 10)
-#define   G_028000_SLICE_TILE_MAX(x)                   (((x) >> 10) & 0xFFFFF)
-#define   C_028000_SLICE_TILE_MAX                      0xC00003FF
-#define R_028004_DB_DEPTH_VIEW                       0x028004
-#define   S_028004_SLICE_START(x)                      (((x) & 0x7FF) << 0)
-#define   G_028004_SLICE_START(x)                      (((x) >> 0) & 0x7FF)
-#define   C_028004_SLICE_START                         0xFFFFF800
-#define   S_028004_SLICE_MAX(x)                        (((x) & 0x7FF) << 13)
-#define   G_028004_SLICE_MAX(x)                        (((x) >> 13) & 0x7FF)
-#define   C_028004_SLICE_MAX                           0xFF001FFF
-#define R_028D24_DB_HTILE_SURFACE                    0x028D24
-#define   S_028D24_HTILE_WIDTH(x)                      (((x) & 0x1) << 0)
-#define   G_028D24_HTILE_WIDTH(x)                      (((x) >> 0) & 0x1)
-#define   C_028D24_HTILE_WIDTH                         0xFFFFFFFE
-#define   S_028D24_HTILE_HEIGHT(x)                     (((x) & 0x1) << 1)
-#define   G_028D24_HTILE_HEIGHT(x)                     (((x) >> 1) & 0x1)
-#define   C_028D24_HTILE_HEIGHT                        0xFFFFFFFD
-#define   S_028D24_LINEAR(x)                           (((x) & 0x1) << 2)
-#define   G_028D24_LINEAR(x)                           (((x) >> 2) & 0x1)
-#define   C_028D24_LINEAR                              0xFFFFFFFB
-#define   S_028D24_FULL_CACHE(x)                       (((x) & 0x1) << 3)
-#define   G_028D24_FULL_CACHE(x)                       (((x) >> 3) & 0x1)
-#define   C_028D24_FULL_CACHE                          0xFFFFFFF7
-#define   S_028D24_HTILE_USES_PRELOAD_WIN(x)           (((x) & 0x1) << 4)
-#define   G_028D24_HTILE_USES_PRELOAD_WIN(x)           (((x) >> 4) & 0x1)
-#define   C_028D24_HTILE_USES_PRELOAD_WIN              0xFFFFFFEF
-#define   S_028D24_PRELOAD(x)                          (((x) & 0x1) << 5)
-#define   G_028D24_PRELOAD(x)                          (((x) >> 5) & 0x1)
-#define   C_028D24_PRELOAD                             0xFFFFFFDF
-#define   S_028D24_PREFETCH_WIDTH(x)                   (((x) & 0x3F) << 6)
-#define   G_028D24_PREFETCH_WIDTH(x)                   (((x) >> 6) & 0x3F)
-#define   C_028D24_PREFETCH_WIDTH                      0xFFFFF03F
-#define   S_028D24_PREFETCH_HEIGHT(x)                  (((x) & 0x3F) << 12)
-#define   G_028D24_PREFETCH_HEIGHT(x)                  (((x) >> 12) & 0x3F)
-#define   C_028D24_PREFETCH_HEIGHT                     0xFFFC0FFF
-#define R_028D34_DB_PREFETCH_LIMIT                   0x028D34
-#define   S_028D34_DEPTH_HEIGHT_TILE_MAX(x)            (((x) & 0x3FF) << 0)
-#define   G_028D34_DEPTH_HEIGHT_TILE_MAX(x)            (((x) >> 0) & 0x3FF)
-#define   C_028D34_DEPTH_HEIGHT_TILE_MAX               0xFFFFFC00
-#define R_028D10_DB_RENDER_OVERRIDE                  0x028D10
-#define   S_028D10_FORCE_HIZ_ENABLE(x)                 (((x) & 0x3) << 0)
-#define   G_028D10_FORCE_HIZ_ENABLE(x)                 (((x) >> 0) & 0x3)
-#define   C_028D10_FORCE_HIZ_ENABLE                    0xFFFFFFFC
-#define   S_028D10_FORCE_HIS_ENABLE0(x)                (((x) & 0x3) << 2)
-#define   G_028D10_FORCE_HIS_ENABLE0(x)                (((x) >> 2) & 0x3)
-#define   C_028D10_FORCE_HIS_ENABLE0                   0xFFFFFFF3
-#define   S_028D10_FORCE_HIS_ENABLE1(x)                (((x) & 0x3) << 4)
-#define   G_028D10_FORCE_HIS_ENABLE1(x)                (((x) >> 4) & 0x3)
-#define   C_028D10_FORCE_HIS_ENABLE1                   0xFFFFFFCF
-#define   S_028D10_FORCE_SHADER_Z_ORDER(x)             (((x) & 0x1) << 6)
-#define   G_028D10_FORCE_SHADER_Z_ORDER(x)             (((x) >> 6) & 0x1)
-#define   C_028D10_FORCE_SHADER_Z_ORDER                0xFFFFFFBF
-#define   S_028D10_FAST_Z_DISABLE(x)                   (((x) & 0x1) << 7)
-#define   G_028D10_FAST_Z_DISABLE(x)                   (((x) >> 7) & 0x1)
-#define   C_028D10_FAST_Z_DISABLE                      0xFFFFFF7F
-#define   S_028D10_FAST_STENCIL_DISABLE(x)             (((x) & 0x1) << 8)
-#define   G_028D10_FAST_STENCIL_DISABLE(x)             (((x) >> 8) & 0x1)
-#define   C_028D10_FAST_STENCIL_DISABLE                0xFFFFFEFF
-#define   S_028D10_NOOP_CULL_DISABLE(x)                (((x) & 0x1) << 9)
-#define   G_028D10_NOOP_CULL_DISABLE(x)                (((x) >> 9) & 0x1)
-#define   C_028D10_NOOP_CULL_DISABLE                   0xFFFFFDFF
-#define   S_028D10_FORCE_COLOR_KILL(x)                 (((x) & 0x1) << 10)
-#define   G_028D10_FORCE_COLOR_KILL(x)                 (((x) >> 10) & 0x1)
-#define   C_028D10_FORCE_COLOR_KILL                    0xFFFFFBFF
-#define   S_028D10_FORCE_Z_READ(x)                     (((x) & 0x1) << 11)
-#define   G_028D10_FORCE_Z_READ(x)                     (((x) >> 11) & 0x1)
-#define   C_028D10_FORCE_Z_READ                        0xFFFFF7FF
-#define   S_028D10_FORCE_STENCIL_READ(x)               (((x) & 0x1) << 12)
-#define   G_028D10_FORCE_STENCIL_READ(x)               (((x) >> 12) & 0x1)
-#define   C_028D10_FORCE_STENCIL_READ                  0xFFFFEFFF
-#define   S_028D10_FORCE_FULL_Z_RANGE(x)               (((x) & 0x3) << 13)
-#define   G_028D10_FORCE_FULL_Z_RANGE(x)               (((x) >> 13) & 0x3)
-#define   C_028D10_FORCE_FULL_Z_RANGE                  0xFFFF9FFF
-#define   S_028D10_FORCE_QC_SMASK_CONFLICT(x)          (((x) & 0x1) << 15)
-#define   G_028D10_FORCE_QC_SMASK_CONFLICT(x)          (((x) >> 15) & 0x1)
-#define   C_028D10_FORCE_QC_SMASK_CONFLICT             0xFFFF7FFF
-#define   S_028D10_DISABLE_VIEWPORT_CLAMP(x)           (((x) & 0x1) << 16)
-#define   G_028D10_DISABLE_VIEWPORT_CLAMP(x)           (((x) >> 16) & 0x1)
-#define   C_028D10_DISABLE_VIEWPORT_CLAMP              0xFFFEFFFF
-#define   S_028D10_IGNORE_SC_ZRANGE(x)                 (((x) & 0x1) << 17)
-#define   G_028D10_IGNORE_SC_ZRANGE(x)                 (((x) >> 17) & 0x1)
-#define   C_028D10_IGNORE_SC_ZRANGE                    0xFFFDFFFF
-#define R_028A40_VGT_GS_MODE                         0x028A40
-#define   S_028A40_MODE(x)                             (((x) & 0x3) << 0)
-#define   G_028A40_MODE(x)                             (((x) >> 0) & 0x3)
-#define   C_028A40_MODE                                0xFFFFFFFC
-#define   S_028A40_ES_PASSTHRU(x)                      (((x) & 0x1) << 2)
-#define   G_028A40_ES_PASSTHRU(x)                      (((x) >> 2) & 0x1)
-#define   C_028A40_ES_PASSTHRU                         0xFFFFFFFB
-#define   S_028A40_CUT_MODE(x)                         (((x) & 0x3) << 3)
-#define   G_028A40_CUT_MODE(x)                         (((x) >> 3) & 0x3)
-#define   C_028A40_CUT_MODE                            0xFFFFFFE7
-#define R_008DFC_SQ_CF_WORD0                         0x008DFC
-#define   S_008DFC_ADDR(x)                             (((x) & 0xFFFFFFFF) << 0)
-#define   G_008DFC_ADDR(x)                             (((x) >> 0) & 0xFFFFFFFF)
-#define   C_008DFC_ADDR                                0x00000000
-#define R_008DFC_SQ_CF_WORD1                         0x008DFC
-#define   S_008DFC_POP_COUNT(x)                        (((x) & 0x7) << 0)
-#define   G_008DFC_POP_COUNT(x)                        (((x) >> 0) & 0x7)
-#define   C_008DFC_POP_COUNT                           0xFFFFFFF8
-#define   S_008DFC_CF_CONST(x)                         (((x) & 0x1F) << 3)
-#define   G_008DFC_CF_CONST(x)                         (((x) >> 3) & 0x1F)
-#define   C_008DFC_CF_CONST                            0xFFFFFF07
-#define   S_008DFC_COND(x)                             (((x) & 0x3) << 8)
-#define   G_008DFC_COND(x)                             (((x) >> 8) & 0x3)
-#define   C_008DFC_COND                                0xFFFFFCFF
-#define   S_008DFC_COUNT(x)                            (((x) & 0x7) << 10)
-#define   G_008DFC_COUNT(x)                            (((x) >> 10) & 0x7)
-#define   C_008DFC_COUNT                               0xFFFFE3FF
-#define   S_008DFC_CALL_COUNT(x)                       (((x) & 0x3F) << 13)
-#define   G_008DFC_CALL_COUNT(x)                       (((x) >> 13) & 0x3F)
-#define   C_008DFC_CALL_COUNT                          0xFFF81FFF
-#define   S_008DFC_END_OF_PROGRAM(x)                   (((x) & 0x1) << 21)
-#define   G_008DFC_END_OF_PROGRAM(x)                   (((x) >> 21) & 0x1)
-#define   C_008DFC_END_OF_PROGRAM                      0xFFDFFFFF
-#define   S_008DFC_VALID_PIXEL_MODE(x)                 (((x) & 0x1) << 22)
-#define   G_008DFC_VALID_PIXEL_MODE(x)                 (((x) >> 22) & 0x1)
-#define   C_008DFC_VALID_PIXEL_MODE                    0xFFBFFFFF
-#define   S_008DFC_CF_INST(x)                          (((x) & 0x7F) << 23)
-#define   G_008DFC_CF_INST(x)                          (((x) >> 23) & 0x7F)
-#define   C_008DFC_CF_INST                             0xC07FFFFF
-#define     V_008DFC_SQ_CF_INST_NOP                    0x00000000
-#define     V_008DFC_SQ_CF_INST_TEX                    0x00000001
-#define     V_008DFC_SQ_CF_INST_VTX                    0x00000002
-#define     V_008DFC_SQ_CF_INST_VTX_TC                 0x00000003
-#define     V_008DFC_SQ_CF_INST_LOOP_START             0x00000004
-#define     V_008DFC_SQ_CF_INST_LOOP_END               0x00000005
-#define     V_008DFC_SQ_CF_INST_LOOP_START_DX10        0x00000006
-#define     V_008DFC_SQ_CF_INST_LOOP_START_NO_AL       0x00000007
-#define     V_008DFC_SQ_CF_INST_LOOP_CONTINUE          0x00000008
-#define     V_008DFC_SQ_CF_INST_LOOP_BREAK             0x00000009
-#define     V_008DFC_SQ_CF_INST_JUMP                   0x0000000A
-#define     V_008DFC_SQ_CF_INST_PUSH                   0x0000000B
-#define     V_008DFC_SQ_CF_INST_PUSH_ELSE              0x0000000C
-#define     V_008DFC_SQ_CF_INST_ELSE                   0x0000000D
-#define     V_008DFC_SQ_CF_INST_POP                    0x0000000E
-#define     V_008DFC_SQ_CF_INST_POP_JUMP               0x0000000F
-#define     V_008DFC_SQ_CF_INST_POP_PUSH               0x00000010
-#define     V_008DFC_SQ_CF_INST_POP_PUSH_ELSE          0x00000011
-#define     V_008DFC_SQ_CF_INST_CALL                   0x00000012
-#define     V_008DFC_SQ_CF_INST_CALL_FS                0x00000013
-#define     V_008DFC_SQ_CF_INST_RETURN                 0x00000014
-#define     V_008DFC_SQ_CF_INST_EMIT_VERTEX            0x00000015
-#define     V_008DFC_SQ_CF_INST_EMIT_CUT_VERTEX        0x00000016
-#define     V_008DFC_SQ_CF_INST_CUT_VERTEX             0x00000017
-#define     V_008DFC_SQ_CF_INST_KILL                   0x00000018
-#define   S_008DFC_WHOLE_QUAD_MODE(x)                  (((x) & 0x1) << 30)
-#define   G_008DFC_WHOLE_QUAD_MODE(x)                  (((x) >> 30) & 0x1)
-#define   C_008DFC_WHOLE_QUAD_MODE                     0xBFFFFFFF
-#define   S_008DFC_BARRIER(x)                          (((x) & 0x1) << 31)
-#define   G_008DFC_BARRIER(x)                          (((x) >> 31) & 0x1)
-#define   C_008DFC_BARRIER                             0x7FFFFFFF
-#define R_008DFC_SQ_CF_ALU_WORD0                     0x008DFC
-#define   S_008DFC_ALU_ADDR(x)                         (((x) & 0x3FFFFF) << 0)
-#define   G_008DFC_ALU_ADDR(x)                         (((x) >> 0) & 0x3FFFFF)
-#define   C_008DFC_ALU_ADDR                            0xFFC00000
-#define   S_008DFC_KCACHE_BANK0(x)                     (((x) & 0xF) << 22)
-#define   G_008DFC_KCACHE_BANK0(x)                     (((x) >> 22) & 0xF)
-#define   C_008DFC_KCACHE_BANK0                        0xFC3FFFFF
-#define   S_008DFC_KCACHE_BANK1(x)                     (((x) & 0xF) << 26)
-#define   G_008DFC_KCACHE_BANK1(x)                     (((x) >> 26) & 0xF)
-#define   C_008DFC_KCACHE_BANK1                        0xC3FFFFFF
-#define   S_008DFC_KCACHE_MODE0(x)                     (((x) & 0x3) << 30)
-#define   G_008DFC_KCACHE_MODE0(x)                     (((x) >> 30) & 0x3)
-#define   C_008DFC_KCACHE_MODE0                        0x3FFFFFFF
-#define R_008DFC_SQ_CF_ALU_WORD1                     0x008DFC
-#define   S_008DFC_KCACHE_MODE1(x)                     (((x) & 0x3) << 0)
-#define   G_008DFC_KCACHE_MODE1(x)                     (((x) >> 0) & 0x3)
-#define   C_008DFC_KCACHE_MODE1                        0xFFFFFFFC
-#define   S_008DFC_KCACHE_ADDR0(x)                     (((x) & 0xFF) << 2)
-#define   G_008DFC_KCACHE_ADDR0(x)                     (((x) >> 2) & 0xFF)
-#define   C_008DFC_KCACHE_ADDR0                        0xFFFFFC03
-#define   S_008DFC_KCACHE_ADDR1(x)                     (((x) & 0xFF) << 10)
-#define   G_008DFC_KCACHE_ADDR1(x)                     (((x) >> 10) & 0xFF)
-#define   C_008DFC_KCACHE_ADDR1                        0xFFFC03FF
-#define   S_008DFC_ALU_COUNT(x)                        (((x) & 0x7F) << 18)
-#define   G_008DFC_ALU_COUNT(x)                        (((x) >> 18) & 0x7F)
-#define   C_008DFC_ALU_COUNT                           0xFE03FFFF
-#define   S_008DFC_USES_WATERFALL(x)                   (((x) & 0x1) << 25)
-#define   G_008DFC_USES_WATERFALL(x)                   (((x) >> 25) & 0x1)
-#define   C_008DFC_USES_WATERFALL                      0xFDFFFFFF
-#define   S_008DFC_CF_ALU_INST(x)                      (((x) & 0xF) << 26)
-#define   G_008DFC_CF_ALU_INST(x)                      (((x) >> 26) & 0xF)
-#define   C_008DFC_CF_ALU_INST                         0xC3FFFFFF
-#define     V_008DFC_SQ_CF_INST_ALU                    0x00000008
-#define     V_008DFC_SQ_CF_INST_ALU_PUSH_BEFORE        0x00000009
-#define     V_008DFC_SQ_CF_INST_ALU_POP_AFTER          0x0000000A
-#define     V_008DFC_SQ_CF_INST_ALU_POP2_AFTER         0x0000000B
-#define     V_008DFC_SQ_CF_INST_ALU_CONTINUE           0x0000000D
-#define     V_008DFC_SQ_CF_INST_ALU_BREAK              0x0000000E
-#define     V_008DFC_SQ_CF_INST_ALU_ELSE_AFTER         0x0000000F
-#define   S_008DFC_WHOLE_QUAD_MODE(x)                  (((x) & 0x1) << 30)
-#define   G_008DFC_WHOLE_QUAD_MODE(x)                  (((x) >> 30) & 0x1)
-#define   C_008DFC_WHOLE_QUAD_MODE                     0xBFFFFFFF
-#define   S_008DFC_BARRIER(x)                          (((x) & 0x1) << 31)
-#define   G_008DFC_BARRIER(x)                          (((x) >> 31) & 0x1)
-#define   C_008DFC_BARRIER                             0x7FFFFFFF
-#define R_008DFC_SQ_CF_ALLOC_EXPORT_WORD0            0x008DFC
-#define   S_008DFC_ARRAY_BASE(x)                       (((x) & 0x1FFF) << 0)
-#define   G_008DFC_ARRAY_BASE(x)                       (((x) >> 0) & 0x1FFF)
-#define   C_008DFC_ARRAY_BASE                          0xFFFFE000
-#define   S_008DFC_TYPE(x)                             (((x) & 0x3) << 13)
-#define   G_008DFC_TYPE(x)                             (((x) >> 13) & 0x3)
-#define   C_008DFC_TYPE                                0xFFFF9FFF
-#define   S_008DFC_RW_GPR(x)                           (((x) & 0x7F) << 15)
-#define   G_008DFC_RW_GPR(x)                           (((x) >> 15) & 0x7F)
-#define   C_008DFC_RW_GPR                              0xFFC07FFF
-#define   S_008DFC_RW_REL(x)                           (((x) & 0x1) << 22)
-#define   G_008DFC_RW_REL(x)                           (((x) >> 22) & 0x1)
-#define   C_008DFC_RW_REL                              0xFFBFFFFF
-#define   S_008DFC_INDEX_GPR(x)                        (((x) & 0x7F) << 23)
-#define   G_008DFC_INDEX_GPR(x)                        (((x) >> 23) & 0x7F)
-#define   C_008DFC_INDEX_GPR                           0xC07FFFFF
-#define   S_008DFC_ELEM_SIZE(x)                        (((x) & 0x3) << 30)
-#define   G_008DFC_ELEM_SIZE(x)                        (((x) >> 30) & 0x3)
-#define   C_008DFC_ELEM_SIZE                           0x3FFFFFFF
-#define R_008DFC_SQ_CF_ALLOC_EXPORT_WORD1            0x008DFC
-#define   S_008DFC_BURST_COUNT(x)                      (((x) & 0xF) << 17)
-#define   G_008DFC_BURST_COUNT(x)                      (((x) >> 17) & 0xF)
-#define   C_008DFC_BURST_COUNT                         0xFFE1FFFF
-#define   S_008DFC_END_OF_PROGRAM(x)                   (((x) & 0x1) << 21)
-#define   G_008DFC_END_OF_PROGRAM(x)                   (((x) >> 21) & 0x1)
-#define   C_008DFC_END_OF_PROGRAM                      0xFFDFFFFF
-#define   S_008DFC_VALID_PIXEL_MODE(x)                 (((x) & 0x1) << 22)
-#define   G_008DFC_VALID_PIXEL_MODE(x)                 (((x) >> 22) & 0x1)
-#define   C_008DFC_VALID_PIXEL_MODE                    0xFFBFFFFF
-#define   S_008DFC_CF_INST(x)                          (((x) & 0x7F) << 23)
-#define   G_008DFC_CF_INST(x)                          (((x) >> 23) & 0x7F)
-#define   C_008DFC_CF_INST                             0xC07FFFFF
-#define     V_008DFC_SQ_CF_INST_MEM_STREAM0            0x00000020
-#define     V_008DFC_SQ_CF_INST_MEM_STREAM1            0x00000021
-#define     V_008DFC_SQ_CF_INST_MEM_STREAM2            0x00000022
-#define     V_008DFC_SQ_CF_INST_MEM_STREAM3            0x00000023
-#define     V_008DFC_SQ_CF_INST_MEM_SCRATCH            0x00000024
-#define     V_008DFC_SQ_CF_INST_MEM_REDUCTION          0x00000025
-#define     V_008DFC_SQ_CF_INST_MEM_RING               0x00000026
-#define     V_008DFC_SQ_CF_INST_EXPORT                 0x00000027
-#define     V_008DFC_SQ_CF_INST_EXPORT_DONE            0x00000028
-#define   S_008DFC_WHOLE_QUAD_MODE(x)                  (((x) & 0x1) << 30)
-#define   G_008DFC_WHOLE_QUAD_MODE(x)                  (((x) >> 30) & 0x1)
-#define   C_008DFC_WHOLE_QUAD_MODE                     0xBFFFFFFF
-#define   S_008DFC_BARRIER(x)                          (((x) & 0x1) << 31)
-#define   G_008DFC_BARRIER(x)                          (((x) >> 31) & 0x1)
-#define   C_008DFC_BARRIER                             0x7FFFFFFF
-#define R_008DFC_SQ_CF_ALLOC_EXPORT_WORD1_BUF        0x008DFC
-#define   S_008DFC_ARRAY_SIZE(x)                       (((x) & 0xFFF) << 0)
-#define   G_008DFC_ARRAY_SIZE(x)                       (((x) >> 0) & 0xFFF)
-#define   C_008DFC_ARRAY_SIZE                          0xFFFFF000
-#define   S_008DFC_COMP_MASK(x)                        (((x) & 0xF) << 12)
-#define   G_008DFC_COMP_MASK(x)                        (((x) >> 12) & 0xF)
-#define   C_008DFC_COMP_MASK                           0xFFFF0FFF
-#define R_008DFC_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ       0x008DFC
-#define   S_008DFC_SEL_X(x)                            (((x) & 0x7) << 0)
-#define   G_008DFC_SEL_X(x)                            (((x) >> 0) & 0x7)
-#define   C_008DFC_SEL_X                               0xFFFFFFF8
-#define   S_008DFC_SEL_Y(x)                            (((x) & 0x7) << 3)
-#define   G_008DFC_SEL_Y(x)                            (((x) >> 3) & 0x7)
-#define   C_008DFC_SEL_Y                               0xFFFFFFC7
-#define   S_008DFC_SEL_Z(x)                            (((x) & 0x7) << 6)
-#define   G_008DFC_SEL_Z(x)                            (((x) >> 6) & 0x7)
-#define   C_008DFC_SEL_Z                               0xFFFFFE3F
-#define   S_008DFC_SEL_W(x)                            (((x) & 0x7) << 9)
-#define   G_008DFC_SEL_W(x)                            (((x) >> 9) & 0x7)
-#define   C_008DFC_SEL_W                               0xFFFFF1FF
-#define R_008DFC_SQ_VTX_WORD0                        0x008DFC
-#define   S_008DFC_VTX_INST(x)                         (((x) & 0x1F) << 0)
-#define   G_008DFC_VTX_INST(x)                         (((x) >> 0) & 0x1F)
-#define   C_008DFC_VTX_INST                            0xFFFFFFE0
-#define   S_008DFC_FETCH_TYPE(x)                       (((x) & 0x3) << 5)
-#define   G_008DFC_FETCH_TYPE(x)                       (((x) >> 5) & 0x3)
-#define   C_008DFC_FETCH_TYPE                          0xFFFFFF9F
-#define   S_008DFC_FETCH_WHOLE_QUAD(x)                 (((x) & 0x1) << 7)
-#define   G_008DFC_FETCH_WHOLE_QUAD(x)                 (((x) >> 7) & 0x1)
-#define   C_008DFC_FETCH_WHOLE_QUAD                    0xFFFFFF7F
-#define   S_008DFC_BUFFER_ID(x)                        (((x) & 0xFF) << 8)
-#define   G_008DFC_BUFFER_ID(x)                        (((x) >> 8) & 0xFF)
-#define   C_008DFC_BUFFER_ID                           0xFFFF00FF
-#define   S_008DFC_SRC_GPR(x)                          (((x) & 0x7F) << 16)
-#define   G_008DFC_SRC_GPR(x)                          (((x) >> 16) & 0x7F)
-#define   C_008DFC_SRC_GPR                             0xFF80FFFF
-#define   S_008DFC_SRC_REL(x)                          (((x) & 0x1) << 23)
-#define   G_008DFC_SRC_REL(x)                          (((x) >> 23) & 0x1)
-#define   C_008DFC_SRC_REL                             0xFF7FFFFF
-#define   S_008DFC_SRC_SEL_X(x)                        (((x) & 0x3) << 24)
-#define   G_008DFC_SRC_SEL_X(x)                        (((x) >> 24) & 0x3)
-#define   C_008DFC_SRC_SEL_X                           0xFCFFFFFF
-#define   S_008DFC_MEGA_FETCH_COUNT(x)                 (((x) & 0x3F) << 26)
-#define   G_008DFC_MEGA_FETCH_COUNT(x)                 (((x) >> 26) & 0x3F)
-#define   C_008DFC_MEGA_FETCH_COUNT                    0x03FFFFFF
-#define R_008DFC_SQ_VTX_WORD1                        0x008DFC
-#define   S_008DFC_DST_SEL_X(x)                        (((x) & 0x7) << 9)
-#define   G_008DFC_DST_SEL_X(x)                        (((x) >> 9) & 0x7)
-#define   C_008DFC_DST_SEL_X                           0xFFFFF1FF
-#define   S_008DFC_DST_SEL_Y(x)                        (((x) & 0x7) << 12)
-#define   G_008DFC_DST_SEL_Y(x)                        (((x) >> 12) & 0x7)
-#define   C_008DFC_DST_SEL_Y                           0xFFFF8FFF
-#define   S_008DFC_DST_SEL_Z(x)                        (((x) & 0x7) << 15)
-#define   G_008DFC_DST_SEL_Z(x)                        (((x) >> 15) & 0x7)
-#define   C_008DFC_DST_SEL_Z                           0xFFFC7FFF
-#define   S_008DFC_DST_SEL_W(x)                        (((x) & 0x7) << 18)
-#define   G_008DFC_DST_SEL_W(x)                        (((x) >> 18) & 0x7)
-#define   C_008DFC_DST_SEL_W                           0xFFE3FFFF
-#define   S_008DFC_USE_CONST_FIELDS(x)                 (((x) & 0x1) << 21)
-#define   G_008DFC_USE_CONST_FIELDS(x)                 (((x) >> 21) & 0x1)
-#define   C_008DFC_USE_CONST_FIELDS                    0xFFDFFFFF
-#define   S_008DFC_DATA_FORMAT(x)                      (((x) & 0x3F) << 22)
-#define   G_008DFC_DATA_FORMAT(x)                      (((x) >> 22) & 0x3F)
-#define   C_008DFC_DATA_FORMAT                         0xF03FFFFF
-#define   S_008DFC_NUM_FORMAT_ALL(x)                   (((x) & 0x3) << 28)
-#define   G_008DFC_NUM_FORMAT_ALL(x)                   (((x) >> 28) & 0x3)
-#define   C_008DFC_NUM_FORMAT_ALL                      0xCFFFFFFF
-#define   S_008DFC_FORMAT_COMP_ALL(x)                  (((x) & 0x1) << 30)
-#define   G_008DFC_FORMAT_COMP_ALL(x)                  (((x) >> 30) & 0x1)
-#define   C_008DFC_FORMAT_COMP_ALL                     0xBFFFFFFF
-#define   S_008DFC_SRF_MODE_ALL(x)                     (((x) & 0x1) << 31)
-#define   G_008DFC_SRF_MODE_ALL(x)                     (((x) >> 31) & 0x1)
-#define   C_008DFC_SRF_MODE_ALL                        0x7FFFFFFF
-#define R_008DFC_SQ_VTX_WORD1_GPR                    0x008DFC
-#define   S_008DFC_DST_GPR(x)                          (((x) & 0x7F) << 0)
-#define   G_008DFC_DST_GPR(x)                          (((x) >> 0) & 0x7F)
-#define   C_008DFC_DST_GPR                             0xFFFFFF80
-#define   S_008DFC_DST_REL(x)                          (((x) & 0x1) << 7)
-#define   G_008DFC_DST_REL(x)                          (((x) >> 7) & 0x1)
-#define   C_008DFC_DST_REL                             0xFFFFFF7F
-#define R_008DFC_SQ_VTX_WORD2                        0x008DFC
-#define   S_008DFC_OFFSET(x)                           (((x) & 0xFFFF) << 0)
-#define   G_008DFC_OFFSET(x)                           (((x) >> 0) & 0xFFFF)
-#define   C_008DFC_OFFSET                              0xFFFF0000
-#define   S_008DFC_ENDIAN_SWAP(x)                      (((x) & 0x3) << 16)
-#define   G_008DFC_ENDIAN_SWAP(x)                      (((x) >> 16) & 0x3)
-#define   C_008DFC_ENDIAN_SWAP                         0xFFFCFFFF
-#define   S_008DFC_CONST_BUF_NO_STRIDE(x)              (((x) & 0x1) << 18)
-#define   G_008DFC_CONST_BUF_NO_STRIDE(x)              (((x) >> 18) & 0x1)
-#define   C_008DFC_CONST_BUF_NO_STRIDE                 0xFFFBFFFF
-#define   S_008DFC_MEGA_FETCH(x)                       (((x) & 0x1) << 19)
-#define   G_008DFC_MEGA_FETCH(x)                       (((x) >> 19) & 0x1)
-#define   C_008DFC_MEGA_FETCH                          0xFFF7FFFF
-#define   S_008DFC_ALT_CONST(x)                        (((x) & 0x1) << 20)
-#define   G_008DFC_ALT_CONST(x)                        (((x) >> 20) & 0x1)
-#define   C_008DFC_ALT_CONST                           0xFFEFFFFF
-#define R_008040_WAIT_UNTIL                          0x008040
-#define   S_008040_WAIT_CP_DMA_IDLE(x)                 (((x) & 0x1) << 8)
-#define   G_008040_WAIT_CP_DMA_IDLE(x)                 (((x) >> 8) & 0x1)
-#define   C_008040_WAIT_CP_DMA_IDLE                    0xFFFFFEFF
-#define   S_008040_WAIT_CMDFIFO(x)                     (((x) & 0x1) << 10)
-#define   G_008040_WAIT_CMDFIFO(x)                     (((x) >> 10) & 0x1)
-#define   C_008040_WAIT_CMDFIFO                        0xFFFFFBFF
-#define   S_008040_WAIT_2D_IDLE(x)                     (((x) & 0x1) << 14)
-#define   G_008040_WAIT_2D_IDLE(x)                     (((x) >> 14) & 0x1)
-#define   C_008040_WAIT_2D_IDLE                        0xFFFFBFFF
-#define   S_008040_WAIT_3D_IDLE(x)                     (((x) & 0x1) << 15)
-#define   G_008040_WAIT_3D_IDLE(x)                     (((x) >> 15) & 0x1)
-#define   C_008040_WAIT_3D_IDLE                        0xFFFF7FFF
-#define   S_008040_WAIT_2D_IDLECLEAN(x)                (((x) & 0x1) << 16)
-#define   G_008040_WAIT_2D_IDLECLEAN(x)                (((x) >> 16) & 0x1)
-#define   C_008040_WAIT_2D_IDLECLEAN                   0xFFFEFFFF
-#define   S_008040_WAIT_3D_IDLECLEAN(x)                (((x) & 0x1) << 17)
-#define   G_008040_WAIT_3D_IDLECLEAN(x)                (((x) >> 17) & 0x1)
-#define   C_008040_WAIT_3D_IDLECLEAN                   0xFFFDFFFF
-#define   S_008040_WAIT_EXTERN_SIG(x)                  (((x) & 0x1) << 19)
-#define   G_008040_WAIT_EXTERN_SIG(x)                  (((x) >> 19) & 0x1)
-#define   C_008040_WAIT_EXTERN_SIG                     0xFFF7FFFF
-#define   S_008040_CMDFIFO_ENTRIES(x)                  (((x) & 0x1F) << 20)
-#define   G_008040_CMDFIFO_ENTRIES(x)                  (((x) >> 20) & 0x1F)
-#define   C_008040_CMDFIFO_ENTRIES                     0xFE0FFFFF
-#define R_0286CC_SPI_PS_IN_CONTROL_0                 0x0286CC
-#define   S_0286CC_NUM_INTERP(x)                       (((x) & 0x3F) << 0)
-#define   G_0286CC_NUM_INTERP(x)                       (((x) >> 0) & 0x3F)
-#define   C_0286CC_NUM_INTERP                          0xFFFFFFC0
-#define   S_0286CC_POSITION_ENA(x)                     (((x) & 0x1) << 8)
-#define   G_0286CC_POSITION_ENA(x)                     (((x) >> 8) & 0x1)
-#define   C_0286CC_POSITION_ENA                        0xFFFFFEFF
-#define   S_0286CC_POSITION_CENTROID(x)                (((x) & 0x1) << 9)
-#define   G_0286CC_POSITION_CENTROID(x)                (((x) >> 9) & 0x1)
-#define   C_0286CC_POSITION_CENTROID                   0xFFFFFDFF
-#define   S_0286CC_POSITION_ADDR(x)                    (((x) & 0x1F) << 10)
-#define   G_0286CC_POSITION_ADDR(x)                    (((x) >> 10) & 0x1F)
-#define   C_0286CC_POSITION_ADDR                       0xFFFF83FF
-#define   S_0286CC_PARAM_GEN(x)                        (((x) & 0xF) << 15)
-#define   G_0286CC_PARAM_GEN(x)                        (((x) >> 15) & 0xF)
-#define   C_0286CC_PARAM_GEN                           0xFFF87FFF
-#define   S_0286CC_PARAM_GEN_ADDR(x)                   (((x) & 0x7F) << 19)
-#define   G_0286CC_PARAM_GEN_ADDR(x)                   (((x) >> 19) & 0x7F)
-#define   C_0286CC_PARAM_GEN_ADDR                      0xFC07FFFF
-#define   S_0286CC_BARYC_SAMPLE_CNTL(x)                (((x) & 0x3) << 26)
-#define   G_0286CC_BARYC_SAMPLE_CNTL(x)                (((x) >> 26) & 0x3)
-#define   C_0286CC_BARYC_SAMPLE_CNTL                   0xF3FFFFFF
-#define   S_0286CC_PERSP_GRADIENT_ENA(x)               (((x) & 0x1) << 28)
-#define   G_0286CC_PERSP_GRADIENT_ENA(x)               (((x) >> 28) & 0x1)
-#define   C_0286CC_PERSP_GRADIENT_ENA                  0xEFFFFFFF
-#define   S_0286CC_LINEAR_GRADIENT_ENA(x)              (((x) & 0x1) << 29)
-#define   G_0286CC_LINEAR_GRADIENT_ENA(x)              (((x) >> 29) & 0x1)
-#define   C_0286CC_LINEAR_GRADIENT_ENA                 0xDFFFFFFF
-#define   S_0286CC_POSITION_SAMPLE(x)                  (((x) & 0x1) << 30)
-#define   G_0286CC_POSITION_SAMPLE(x)                  (((x) >> 30) & 0x1)
-#define   C_0286CC_POSITION_SAMPLE                     0xBFFFFFFF
-#define   S_0286CC_BARYC_AT_SAMPLE_ENA(x)              (((x) & 0x1) << 31)
-#define   G_0286CC_BARYC_AT_SAMPLE_ENA(x)              (((x) >> 31) & 0x1)
-#define   C_0286CC_BARYC_AT_SAMPLE_ENA                 0x7FFFFFFF
-#define R_0286D0_SPI_PS_IN_CONTROL_1                 0x0286D0
-#define   S_0286D0_GEN_INDEX_PIX(x)                    (((x) & 0x1) << 0)
-#define   G_0286D0_GEN_INDEX_PIX(x)                    (((x) >> 0) & 0x1)
-#define   C_0286D0_GEN_INDEX_PIX                       0xFFFFFFFE
-#define   S_0286D0_GEN_INDEX_PIX_ADDR(x)               (((x) & 0x7F) << 1)
-#define   G_0286D0_GEN_INDEX_PIX_ADDR(x)               (((x) >> 1) & 0x7F)
-#define   C_0286D0_GEN_INDEX_PIX_ADDR                  0xFFFFFF01
-#define   S_0286D0_FRONT_FACE_ENA(x)                   (((x) & 0x1) << 8)
-#define   G_0286D0_FRONT_FACE_ENA(x)                   (((x) >> 8) & 0x1)
-#define   C_0286D0_FRONT_FACE_ENA                      0xFFFFFEFF
-#define   S_0286D0_FRONT_FACE_CHAN(x)                  (((x) & 0x3) << 9)
-#define   G_0286D0_FRONT_FACE_CHAN(x)                  (((x) >> 9) & 0x3)
-#define   C_0286D0_FRONT_FACE_CHAN                     0xFFFFF9FF
-#define   S_0286D0_FRONT_FACE_ALL_BITS(x)              (((x) & 0x1) << 11)
-#define   G_0286D0_FRONT_FACE_ALL_BITS(x)              (((x) >> 11) & 0x1)
-#define   C_0286D0_FRONT_FACE_ALL_BITS                 0xFFFFF7FF
-#define   S_0286D0_FRONT_FACE_ADDR(x)                  (((x) & 0x1F) << 12)
-#define   G_0286D0_FRONT_FACE_ADDR(x)                  (((x) >> 12) & 0x1F)
-#define   C_0286D0_FRONT_FACE_ADDR                     0xFFFE0FFF
-#define   S_0286D0_FOG_ADDR(x)                         (((x) & 0x7F) << 17)
-#define   G_0286D0_FOG_ADDR(x)                         (((x) >> 17) & 0x7F)
-#define   C_0286D0_FOG_ADDR                            0xFF01FFFF
-#define   S_0286D0_FIXED_PT_POSITION_ENA(x)            (((x) & 0x1) << 24)
-#define   G_0286D0_FIXED_PT_POSITION_ENA(x)            (((x) >> 24) & 0x1)
-#define   C_0286D0_FIXED_PT_POSITION_ENA               0xFEFFFFFF
-#define   S_0286D0_FIXED_PT_POSITION_ADDR(x)           (((x) & 0x1F) << 25)
-#define   G_0286D0_FIXED_PT_POSITION_ADDR(x)           (((x) >> 25) & 0x1F)
-#define   C_0286D0_FIXED_PT_POSITION_ADDR              0xC1FFFFFF
-#define R_0286C4_SPI_VS_OUT_CONFIG                   0x0286C4
-#define   S_0286C4_VS_PER_COMPONENT(x)                 (((x) & 0x1) << 0)
-#define   G_0286C4_VS_PER_COMPONENT(x)                 (((x) >> 0) & 0x1)
-#define   C_0286C4_VS_PER_COMPONENT                    0xFFFFFFFE
-#define   S_0286C4_VS_EXPORT_COUNT(x)                  (((x) & 0x1F) << 1)
-#define   G_0286C4_VS_EXPORT_COUNT(x)                  (((x) >> 1) & 0x1F)
-#define   C_0286C4_VS_EXPORT_COUNT                     0xFFFFFFC1
-#define   S_0286C4_VS_EXPORTS_FOG(x)                   (((x) & 0x1) << 8)
-#define   G_0286C4_VS_EXPORTS_FOG(x)                   (((x) >> 8) & 0x1)
-#define   C_0286C4_VS_EXPORTS_FOG                      0xFFFFFEFF
-#define   S_0286C4_VS_OUT_FOG_VEC_ADDR(x)              (((x) & 0x1F) << 9)
-#define   G_0286C4_VS_OUT_FOG_VEC_ADDR(x)              (((x) >> 9) & 0x1F)
-#define   C_0286C4_VS_OUT_FOG_VEC_ADDR                 0xFFFFC1FF
-#define R_028240_PA_SC_GENERIC_SCISSOR_TL            0x028240
-#define   S_028240_TL_X(x)                             (((x) & 0x3FFF) << 0)
-#define   G_028240_TL_X(x)                             (((x) >> 0) & 0x3FFF)
-#define   C_028240_TL_X                                0xFFFFC000
-#define   S_028240_TL_Y(x)                             (((x) & 0x3FFF) << 16)
-#define   G_028240_TL_Y(x)                             (((x) >> 16) & 0x3FFF)
-#define   C_028240_TL_Y                                0xC000FFFF
-#define   S_028240_WINDOW_OFFSET_DISABLE(x)            (((x) & 0x1) << 31)
-#define   G_028240_WINDOW_OFFSET_DISABLE(x)            (((x) >> 31) & 0x1)
-#define   C_028240_WINDOW_OFFSET_DISABLE               0x7FFFFFFF
-#define R_028244_PA_SC_GENERIC_SCISSOR_BR            0x028244
-#define   S_028244_BR_X(x)                             (((x) & 0x3FFF) << 0)
-#define   G_028244_BR_X(x)                             (((x) >> 0) & 0x3FFF)
-#define   C_028244_BR_X                                0xFFFFC000
-#define   S_028244_BR_Y(x)                             (((x) & 0x3FFF) << 16)
-#define   G_028244_BR_Y(x)                             (((x) >> 16) & 0x3FFF)
-#define   C_028244_BR_Y                                0xC000FFFF
-#define R_028030_PA_SC_SCREEN_SCISSOR_TL             0x028030
-#define   S_028030_TL_X(x)                             (((x) & 0x7FFF) << 0)
-#define   G_028030_TL_X(x)                             (((x) >> 0) & 0x7FFF)
-#define   C_028030_TL_X                                0xFFFF8000
-#define   S_028030_TL_Y(x)                             (((x) & 0x7FFF) << 16)
-#define   G_028030_TL_Y(x)                             (((x) >> 16) & 0x7FFF)
-#define   C_028030_TL_Y                                0x8000FFFF
-#define R_028034_PA_SC_SCREEN_SCISSOR_BR             0x028034
-#define   S_028034_BR_X(x)                             (((x) & 0x7FFF) << 0)
-#define   G_028034_BR_X(x)                             (((x) >> 0) & 0x7FFF)
-#define   C_028034_BR_X                                0xFFFF8000
-#define   S_028034_BR_Y(x)                             (((x) & 0x7FFF) << 16)
-#define   G_028034_BR_Y(x)                             (((x) >> 16) & 0x7FFF)
-#define   C_028034_BR_Y                                0x8000FFFF
-#define R_028204_PA_SC_WINDOW_SCISSOR_TL             0x028204
-#define   S_028204_TL_X(x)                             (((x) & 0x3FFF) << 0)
-#define   G_028204_TL_X(x)                             (((x) >> 0) & 0x3FFF)
-#define   C_028204_TL_X                                0xFFFFC000
-#define   S_028204_TL_Y(x)                             (((x) & 0x3FFF) << 16)
-#define   G_028204_TL_Y(x)                             (((x) >> 16) & 0x3FFF)
-#define   C_028204_TL_Y                                0xC000FFFF
-#define   S_028204_WINDOW_OFFSET_DISABLE(x)            (((x) & 0x1) << 31)
-#define   G_028204_WINDOW_OFFSET_DISABLE(x)            (((x) >> 31) & 0x1)
-#define   C_028204_WINDOW_OFFSET_DISABLE               0x7FFFFFFF
-#define R_028208_PA_SC_WINDOW_SCISSOR_BR             0x028208
-#define   S_028208_BR_X(x)                             (((x) & 0x3FFF) << 0)
-#define   G_028208_BR_X(x)                             (((x) >> 0) & 0x3FFF)
-#define   C_028208_BR_X                                0xFFFFC000
-#define   S_028208_BR_Y(x)                             (((x) & 0x3FFF) << 16)
-#define   G_028208_BR_Y(x)                             (((x) >> 16) & 0x3FFF)
-#define   C_028208_BR_Y                                0xC000FFFF
-#define R_0287F0_VGT_DRAW_INITIATOR                  0x0287F0
-#define   S_0287F0_SOURCE_SELECT(x)                    (((x) & 0x3) << 0)
-#define   G_0287F0_SOURCE_SELECT(x)                    (((x) >> 0) & 0x3)
-#define   C_0287F0_SOURCE_SELECT                       0xFFFFFFFC
-#define   S_0287F0_MAJOR_MODE(x)                       (((x) & 0x3) << 2)
-#define   G_0287F0_MAJOR_MODE(x)                       (((x) >> 2) & 0x3)
-#define   C_0287F0_MAJOR_MODE                          0xFFFFFFF3
-#define   S_0287F0_SPRITE_EN(x)                        (((x) & 0x1) << 4)
-#define   G_0287F0_SPRITE_EN(x)                        (((x) >> 4) & 0x1)
-#define   C_0287F0_SPRITE_EN                           0xFFFFFFEF
-#define   S_0287F0_NOT_EOP(x)                          (((x) & 0x1) << 5)
-#define   G_0287F0_NOT_EOP(x)                          (((x) >> 5) & 0x1)
-#define   C_0287F0_NOT_EOP                             0xFFFFFFDF
-#define   S_0287F0_USE_OPAQUE(x)                       (((x) & 0x1) << 6)
-#define   G_0287F0_USE_OPAQUE(x)                       (((x) >> 6) & 0x1)
-#define   C_0287F0_USE_OPAQUE                          0xFFFFFFBF
-#define R_0280A0_CB_COLOR0_INFO                      0x0280A0
-#define R_0280A4_CB_COLOR1_INFO                      0x0280A4
-#define R_0280A8_CB_COLOR2_INFO                      0x0280A8
-#define R_0280AC_CB_COLOR3_INFO                      0x0280AC
-#define R_0280B0_CB_COLOR4_INFO                      0x0280B0
-#define R_0280B4_CB_COLOR5_INFO                      0x0280B4
-#define R_0280B8_CB_COLOR6_INFO                      0x0280B8
-#define R_0280BC_CB_COLOR7_INFO                      0x0280BC
-#define R_02800C_DB_DEPTH_BASE                       0x02800C
-#define R_028000_DB_DEPTH_SIZE                       0x028000
-#define R_028004_DB_DEPTH_VIEW                       0x028004
-#define R_028010_DB_DEPTH_INFO                       0x028010
-#define R_028D24_DB_HTILE_SURFACE                    0x028D24
-#define R_028D34_DB_PREFETCH_LIMIT                   0x028D34
-#define R_0286D4_SPI_INTERP_CONTROL_0                0x0286D4
-#define R_028A48_PA_SC_MPASS_PS_CNTL                 0x028A48
-#define R_028C00_PA_SC_LINE_CNTL                     0x028C00
-#define R_028C04_PA_SC_AA_CONFIG                     0x028C04
-#define R_028C08_PA_SU_VTX_CNTL                      0x028C08
-#define R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX           0x028C1C
-#define R_028C48_PA_SC_AA_MASK                       0x028C48
-#define R_028810_PA_CL_CLIP_CNTL                     0x028810
-#define R_02881C_PA_CL_VS_OUT_CNTL                   0x02881C
-#define R_028820_PA_CL_NANINF_CNTL                   0x028820
-#define R_028C0C_PA_CL_GB_VERT_CLIP_ADJ              0x028C0C
-#define R_028C10_PA_CL_GB_VERT_DISC_ADJ              0x028C10
-#define R_028C14_PA_CL_GB_HORZ_CLIP_ADJ              0x028C14
-#define R_028C18_PA_CL_GB_HORZ_DISC_ADJ              0x028C18
-#define R_028814_PA_SU_SC_MODE_CNTL                  0x028814
-#define R_028A00_PA_SU_POINT_SIZE                    0x028A00
-#define R_028A04_PA_SU_POINT_MINMAX                  0x028A04
-#define R_028A08_PA_SU_LINE_CNTL                     0x028A08
-#define R_028A0C_PA_SC_LINE_STIPPLE                  0x028A0C
-#define R_028DF8_PA_SU_POLY_OFFSET_DB_FMT_CNTL       0x028DF8
-#define R_028DFC_PA_SU_POLY_OFFSET_CLAMP             0x028DFC
-#define R_028E00_PA_SU_POLY_OFFSET_FRONT_SCALE       0x028E00
-#define R_028E04_PA_SU_POLY_OFFSET_FRONT_OFFSET      0x028E04
-#define R_028E08_PA_SU_POLY_OFFSET_BACK_SCALE        0x028E08
-#define R_028E0C_PA_SU_POLY_OFFSET_BACK_OFFSET       0x028E0C
-#define R_028818_PA_CL_VTE_CNTL                      0x028818
-#define R_02843C_PA_CL_VPORT_XSCALE_0                0x02843C
-#define R_028444_PA_CL_VPORT_YSCALE_0                0x028444
-#define R_02844C_PA_CL_VPORT_ZSCALE_0                0x02844C
-#define R_028440_PA_CL_VPORT_XOFFSET_0               0x028440
-#define R_028448_PA_CL_VPORT_YOFFSET_0               0x028448
-#define R_028450_PA_CL_VPORT_ZOFFSET_0               0x028450
-#define R_028250_PA_SC_VPORT_SCISSOR_0_TL            0x028250
-#define R_028254_PA_SC_VPORT_SCISSOR_0_BR            0x028254
-#define R_028780_CB_BLEND0_CONTROL                   0x028780
-#define R_028784_CB_BLEND1_CONTROL                   0x028784
-#define R_028788_CB_BLEND2_CONTROL                   0x028788
-#define R_02878C_CB_BLEND3_CONTROL                   0x02878C
-#define R_028790_CB_BLEND4_CONTROL                   0x028790
-#define R_028794_CB_BLEND5_CONTROL                   0x028794
-#define R_028798_CB_BLEND6_CONTROL                   0x028798
-#define R_02879C_CB_BLEND7_CONTROL                   0x02879C
-#define R_028804_CB_BLEND_CONTROL                    0x028804
-#define R_028028_DB_STENCIL_CLEAR                    0x028028
-#define R_02802C_DB_DEPTH_CLEAR                      0x02802C
-#define R_028430_DB_STENCILREFMASK                   0x028430
-#define R_028434_DB_STENCILREFMASK_BF                0x028434
-#define R_028800_DB_DEPTH_CONTROL                    0x028800
-#define R_02880C_DB_SHADER_CONTROL                   0x02880C
-#define R_028D0C_DB_RENDER_CONTROL                   0x028D0C
-#define   S_028D0C_DEPTH_CLEAR_ENABLE(x)               (((x) & 0x1) << 0)
-#define   S_028D0C_STENCIL_CLEAR_ENABLE(x)             (((x) & 0x1) << 1)
-#define   S_028D0C_DEPTH_COPY_ENABLE(x)                (((x) & 0x1) << 2)
-#define   S_028D0C_STENCIL_COPY_ENABLE(x)              (((x) & 0x1) << 3)
-#define   S_028D0C_RESUMMARIZE_ENABLE(x)               (((x) & 0x1) << 4)
-#define   S_028D0C_STENCIL_COMPRESS_DISABLE(x)         (((x) & 0x1) << 5)
-#define   S_028D0C_DEPTH_COMPRESS_DISABLE(x)           (((x) & 0x1) << 6)
-#define   S_028D0C_COPY_CENTROID(x)                    (((x) & 0x1) << 7)
-#define   S_028D0C_COPY_SAMPLE(x)                      (((x) & 0x1) << 8)
-#define   S_028D0C_R700_PERFECT_ZPASS_COUNTS(x)        (((x) & 0x1) << 15)
-#define R_028D10_DB_RENDER_OVERRIDE                  0x028D10
-#define R_028D2C_DB_SRESULTS_COMPARE_STATE1          0x028D2C
-#define R_028D30_DB_PRELOAD_CONTROL                  0x028D30
-#define R_028D44_DB_ALPHA_TO_MASK                    0x028D44
-#define R_028868_SQ_PGM_RESOURCES_VS                 0x028868
-#define R_0286CC_SPI_PS_IN_CONTROL_0                 0x0286CC
-#define R_0286D0_SPI_PS_IN_CONTROL_1                 0x0286D0
-#define R_028644_SPI_PS_INPUT_CNTL_0                 0x028644
-#define R_028648_SPI_PS_INPUT_CNTL_1                 0x028648
-#define R_02864C_SPI_PS_INPUT_CNTL_2                 0x02864C
-#define R_028650_SPI_PS_INPUT_CNTL_3                 0x028650
-#define R_028654_SPI_PS_INPUT_CNTL_4                 0x028654
-#define R_028658_SPI_PS_INPUT_CNTL_5                 0x028658
-#define R_02865C_SPI_PS_INPUT_CNTL_6                 0x02865C
-#define R_028660_SPI_PS_INPUT_CNTL_7                 0x028660
-#define R_028664_SPI_PS_INPUT_CNTL_8                 0x028664
-#define R_028668_SPI_PS_INPUT_CNTL_9                 0x028668
-#define R_02866C_SPI_PS_INPUT_CNTL_10                0x02866C
-#define R_028670_SPI_PS_INPUT_CNTL_11                0x028670
-#define R_028674_SPI_PS_INPUT_CNTL_12                0x028674
-#define R_028678_SPI_PS_INPUT_CNTL_13                0x028678
-#define R_02867C_SPI_PS_INPUT_CNTL_14                0x02867C
-#define R_028680_SPI_PS_INPUT_CNTL_15                0x028680
-#define R_028684_SPI_PS_INPUT_CNTL_16                0x028684
-#define R_028688_SPI_PS_INPUT_CNTL_17                0x028688
-#define R_02868C_SPI_PS_INPUT_CNTL_18                0x02868C
-#define R_028690_SPI_PS_INPUT_CNTL_19                0x028690
-#define R_028694_SPI_PS_INPUT_CNTL_20                0x028694
-#define R_028698_SPI_PS_INPUT_CNTL_21                0x028698
-#define R_02869C_SPI_PS_INPUT_CNTL_22                0x02869C
-#define R_0286A0_SPI_PS_INPUT_CNTL_23                0x0286A0
-#define R_0286A4_SPI_PS_INPUT_CNTL_24                0x0286A4
-#define R_0286A8_SPI_PS_INPUT_CNTL_25                0x0286A8
-#define R_0286AC_SPI_PS_INPUT_CNTL_26                0x0286AC
-#define R_0286B0_SPI_PS_INPUT_CNTL_27                0x0286B0
-#define R_0286B4_SPI_PS_INPUT_CNTL_28                0x0286B4
-#define R_0286B8_SPI_PS_INPUT_CNTL_29                0x0286B8
-#define R_0286BC_SPI_PS_INPUT_CNTL_30                0x0286BC
-#define R_0286C0_SPI_PS_INPUT_CNTL_31                0x0286C0
-#define R_028850_SQ_PGM_RESOURCES_PS                 0x028850
-#define R_028854_SQ_PGM_EXPORTS_PS                   0x028854
-#define R_008958_VGT_PRIMITIVE_TYPE                  0x008958
-#define R_028A7C_VGT_DMA_INDEX_TYPE                  0x028A7C
-#define R_028A88_VGT_DMA_NUM_INSTANCES               0x028A88
-#define R_008970_VGT_NUM_INDICES                     0x008970
-#define R_0287F0_VGT_DRAW_INITIATOR                  0x0287F0
-#define R_028238_CB_TARGET_MASK                      0x028238
-#define R_02823C_CB_SHADER_MASK                      0x02823C
-#define R_028060_CB_COLOR0_SIZE                      0x028060
-#define   S_028060_PITCH_TILE_MAX(x)                   (((x) & 0x3FF) << 0)
-#define   G_028060_PITCH_TILE_MAX(x)                   (((x) >> 0) & 0x3FF)
-#define   C_028060_PITCH_TILE_MAX                      0xFFFFFC00
-#define   S_028060_SLICE_TILE_MAX(x)                   (((x) & 0xFFFFF) << 10)
-#define   G_028060_SLICE_TILE_MAX(x)                   (((x) >> 10) & 0xFFFFF)
-#define   C_028060_SLICE_TILE_MAX                      0xC00003FF
-#define R_028064_CB_COLOR1_SIZE                      0x028064
-#define R_028068_CB_COLOR2_SIZE                      0x028068
-#define R_02806C_CB_COLOR3_SIZE                      0x02806C
-#define R_028070_CB_COLOR4_SIZE                      0x028070
-#define R_028074_CB_COLOR5_SIZE                      0x028074
-#define R_028078_CB_COLOR6_SIZE                      0x028078
-#define R_02807C_CB_COLOR7_SIZE                      0x02807C
-#define R_028040_CB_COLOR0_BASE                      0x028040
-#define R_028044_CB_COLOR1_BASE                      0x028044
-#define R_028048_CB_COLOR2_BASE                      0x028048
-#define R_02804C_CB_COLOR3_BASE                      0x02804C
-#define R_028050_CB_COLOR4_BASE                      0x028050
-#define R_028054_CB_COLOR5_BASE                      0x028054
-#define R_028058_CB_COLOR6_BASE                      0x028058
-#define R_02805C_CB_COLOR7_BASE                      0x02805C
-#define R_028240_PA_SC_GENERIC_SCISSOR_TL            0x028240
-#define   S_028240_TL_X(x)                             (((x) & 0x3FFF) << 0)
-#define   G_028240_TL_X(x)                             (((x) >> 0) & 0x3FFF)
-#define   C_028240_TL_X                                0xFFFFC000
-#define   S_028240_TL_Y(x)                             (((x) & 0x3FFF) << 16)
-#define   G_028240_TL_Y(x)                             (((x) >> 16) & 0x3FFF)
-#define   C_028240_TL_Y                                0xC000FFFF
-#define R_028C04_PA_SC_AA_CONFIG                     0x028C04
-#define   S_028C04_MSAA_NUM_SAMPLES(x)                 (((x) & 0x3) << 0)
-#define   G_028C04_MSAA_NUM_SAMPLES(x)                 (((x) >> 0) & 0x3)
-#define   C_028C04_MSAA_NUM_SAMPLES                    0xFFFFFFFC
-#define   S_028C04_AA_MASK_CENTROID_DTMN(x)            (((x) & 0x1) << 4)
-#define   G_028C04_AA_MASK_CENTROID_DTMN(x)            (((x) >> 4) & 0x1)
-#define   C_028C04_AA_MASK_CENTROID_DTMN               0xFFFFFFEF
-#define   S_028C04_MAX_SAMPLE_DIST(x)                  (((x) & 0xF) << 13)
-#define   G_028C04_MAX_SAMPLE_DIST(x)                  (((x) >> 13) & 0xF)
-#define   C_028C04_MAX_SAMPLE_DIST                     0xFFFE1FFF
-#define R_0288CC_SQ_PGM_CF_OFFSET_PS                 0x0288CC
-#define R_0288DC_SQ_PGM_CF_OFFSET_FS                 0x0288DC
-#define R_0288D0_SQ_PGM_CF_OFFSET_VS                 0x0288D0
-#define R_028840_SQ_PGM_START_PS                     0x028840
-#define R_028894_SQ_PGM_START_FS                     0x028894
-#define R_028858_SQ_PGM_START_VS                     0x028858
-#define R_028080_CB_COLOR0_VIEW                      0x028080
-#define   S_028080_SLICE_START(x)                      (((x) & 0x7FF) << 0)
-#define   G_028080_SLICE_START(x)                      (((x) >> 0) & 0x7FF)
-#define   C_028080_SLICE_START                         0xFFFFF800
-#define   S_028080_SLICE_MAX(x)                        (((x) & 0x7FF) << 13)
-#define   G_028080_SLICE_MAX(x)                        (((x) >> 13) & 0x7FF)
-#define   C_028080_SLICE_MAX                           0xFF001FFF
-#define R_028084_CB_COLOR1_VIEW                      0x028084
-#define R_028088_CB_COLOR2_VIEW                      0x028088
-#define R_02808C_CB_COLOR3_VIEW                      0x02808C
-#define R_028090_CB_COLOR4_VIEW                      0x028090
-#define R_028094_CB_COLOR5_VIEW                      0x028094
-#define R_028098_CB_COLOR6_VIEW                      0x028098
-#define R_02809C_CB_COLOR7_VIEW                      0x02809C
-#define R_028100_CB_COLOR0_MASK                      0x028100
-#define   S_028100_CMASK_BLOCK_MAX(x)                  (((x) & 0xFFF) << 0)
-#define   G_028100_CMASK_BLOCK_MAX(x)                  (((x) >> 0) & 0xFFF)
-#define   C_028100_CMASK_BLOCK_MAX                     0xFFFFF000
-#define   S_028100_FMASK_TILE_MAX(x)                   (((x) & 0xFFFFF) << 12)
-#define   G_028100_FMASK_TILE_MAX(x)                   (((x) >> 12) & 0xFFFFF)
-#define   C_028100_FMASK_TILE_MAX                      0x00000FFF
-#define R_028104_CB_COLOR1_MASK                      0x028104
-#define R_028108_CB_COLOR2_MASK                      0x028108
-#define R_02810C_CB_COLOR3_MASK                      0x02810C
-#define R_028110_CB_COLOR4_MASK                      0x028110
-#define R_028114_CB_COLOR5_MASK                      0x028114
-#define R_028118_CB_COLOR6_MASK                      0x028118
-#define R_02811C_CB_COLOR7_MASK                      0x02811C
-#define R_028040_CB_COLOR0_BASE                      0x028040
-#define   S_028040_BASE_256B(x)                        (((x) & 0xFFFFFFFF) << 0)
-#define   G_028040_BASE_256B(x)                        (((x) >> 0) & 0xFFFFFFFF)
-#define   C_028040_BASE_256B                           0x00000000
-#define R_0280E0_CB_COLOR0_FRAG                      0x0280E0
-#define   S_0280E0_BASE_256B(x)                        (((x) & 0xFFFFFFFF) << 0)
-#define   G_0280E0_BASE_256B(x)                        (((x) >> 0) & 0xFFFFFFFF)
-#define   C_0280E0_BASE_256B                           0x00000000
-#define R_0280E4_CB_COLOR1_FRAG                      0x0280E4
-#define R_0280E8_CB_COLOR2_FRAG                      0x0280E8
-#define R_0280EC_CB_COLOR3_FRAG                      0x0280EC
-#define R_0280F0_CB_COLOR4_FRAG                      0x0280F0
-#define R_0280F4_CB_COLOR5_FRAG                      0x0280F4
-#define R_0280F8_CB_COLOR6_FRAG                      0x0280F8
-#define R_0280FC_CB_COLOR7_FRAG                      0x0280FC
-#define R_0280C0_CB_COLOR0_TILE                      0x0280C0
-#define   S_0280C0_BASE_256B(x)                        (((x) & 0xFFFFFFFF) << 0)
-#define   G_0280C0_BASE_256B(x)                        (((x) >> 0) & 0xFFFFFFFF)
-#define   C_0280C0_BASE_256B                           0x00000000
-#define R_0280C4_CB_COLOR1_TILE                      0x0280C4
-#define R_0280C8_CB_COLOR2_TILE                      0x0280C8
-#define R_0280CC_CB_COLOR3_TILE                      0x0280CC
-#define R_0280D0_CB_COLOR4_TILE                      0x0280D0
-#define R_0280D4_CB_COLOR5_TILE                      0x0280D4
-#define R_0280D8_CB_COLOR6_TILE                      0x0280D8
-#define R_0280DC_CB_COLOR7_TILE                      0x0280DC
-#define R_028808_CB_COLOR_CONTROL                    0x028808
-#define   S_028808_FOG_ENABLE(x)                       (((x) & 0x1) << 0)
-#define   G_028808_FOG_ENABLE(x)                       (((x) >> 0) & 0x1)
-#define   C_028808_FOG_ENABLE                          0xFFFFFFFE
-#define   S_028808_MULTIWRITE_ENABLE(x)                (((x) & 0x1) << 1)
-#define   G_028808_MULTIWRITE_ENABLE(x)                (((x) >> 1) & 0x1)
-#define   C_028808_MULTIWRITE_ENABLE                   0xFFFFFFFD
-#define   S_028808_DITHER_ENABLE(x)                    (((x) & 0x1) << 2)
-#define   G_028808_DITHER_ENABLE(x)                    (((x) >> 2) & 0x1)
-#define   C_028808_DITHER_ENABLE                       0xFFFFFFFB
-#define   S_028808_DEGAMMA_ENABLE(x)                   (((x) & 0x1) << 3)
-#define   G_028808_DEGAMMA_ENABLE(x)                   (((x) >> 3) & 0x1)
-#define   C_028808_DEGAMMA_ENABLE                      0xFFFFFFF7
-#define   S_028808_SPECIAL_OP(x)                       (((x) & 0x7) << 4)
-#define   G_028808_SPECIAL_OP(x)                       (((x) >> 4) & 0x7)
-#define   C_028808_SPECIAL_OP                          0xFFFFFF8F
-#define   S_028808_PER_MRT_BLEND(x)                    (((x) & 0x1) << 7)
-#define   G_028808_PER_MRT_BLEND(x)                    (((x) >> 7) & 0x1)
-#define   C_028808_PER_MRT_BLEND                       0xFFFFFF7F
-#define   S_028808_TARGET_BLEND_ENABLE(x)              (((x) & 0xFF) << 8)
-#define   G_028808_TARGET_BLEND_ENABLE(x)              (((x) >> 8) & 0xFF)
-#define   C_028808_TARGET_BLEND_ENABLE                 0xFFFF00FF
-#define   S_028808_ROP3(x)                             (((x) & 0xFF) << 16)
-#define   G_028808_ROP3(x)                             (((x) >> 16) & 0xFF)
-#define   C_028808_ROP3                                0xFF00FFFF
-#define R_028614_SPI_VS_OUT_ID_0                     0x028614
-#define   S_028614_SEMANTIC_0(x)                       (((x) & 0xFF) << 0)
-#define   G_028614_SEMANTIC_0(x)                       (((x) >> 0) & 0xFF)
-#define   C_028614_SEMANTIC_0                          0xFFFFFF00
-#define   S_028614_SEMANTIC_1(x)                       (((x) & 0xFF) << 8)
-#define   G_028614_SEMANTIC_1(x)                       (((x) >> 8) & 0xFF)
-#define   C_028614_SEMANTIC_1                          0xFFFF00FF
-#define   S_028614_SEMANTIC_2(x)                       (((x) & 0xFF) << 16)
-#define   G_028614_SEMANTIC_2(x)                       (((x) >> 16) & 0xFF)
-#define   C_028614_SEMANTIC_2                          0xFF00FFFF
-#define   S_028614_SEMANTIC_3(x)                       (((x) & 0xFF) << 24)
-#define   G_028614_SEMANTIC_3(x)                       (((x) >> 24) & 0xFF)
-#define   C_028614_SEMANTIC_3                          0x00FFFFFF
-#define R_028618_SPI_VS_OUT_ID_1                     0x028618
-#define R_02861C_SPI_VS_OUT_ID_2                     0x02861C
-#define R_028620_SPI_VS_OUT_ID_3                     0x028620
-#define R_028624_SPI_VS_OUT_ID_4                     0x028624
-#define R_028628_SPI_VS_OUT_ID_5                     0x028628
-#define R_02862C_SPI_VS_OUT_ID_6                     0x02862C
-#define R_028630_SPI_VS_OUT_ID_7                     0x028630
-#define R_028634_SPI_VS_OUT_ID_8                     0x028634
-#define R_028638_SPI_VS_OUT_ID_9                     0x028638
-#define R_038000_SQ_TEX_RESOURCE_WORD0_0             0x038000
-#define   S_038000_DIM(x)                              (((x) & 0x7) << 0)
-#define   G_038000_DIM(x)                              (((x) >> 0) & 0x7)
-#define   C_038000_DIM                                 0xFFFFFFF8
-#define   S_038000_TILE_MODE(x)                        (((x) & 0xF) << 3)
-#define   G_038000_TILE_MODE(x)                        (((x) >> 3) & 0xF)
-#define   C_038000_TILE_MODE                           0xFFFFFF87
-#define   S_038000_TILE_TYPE(x)                        (((x) & 0x1) << 7)
-#define   G_038000_TILE_TYPE(x)                        (((x) >> 7) & 0x1)
-#define   C_038000_TILE_TYPE                           0xFFFFFF7F
-#define   S_038000_PITCH(x)                            (((x) & 0x7FF) << 8)
-#define   G_038000_PITCH(x)                            (((x) >> 8) & 0x7FF)
-#define   C_038000_PITCH                               0xFFF800FF
-#define   S_038000_TEX_WIDTH(x)                        (((x) & 0x1FFF) << 19)
-#define   G_038000_TEX_WIDTH(x)                        (((x) >> 19) & 0x1FFF)
-#define   C_038000_TEX_WIDTH                           0x0007FFFF
-#define R_038004_SQ_TEX_RESOURCE_WORD1_0             0x038004
-#define   S_038004_TEX_HEIGHT(x)                       (((x) & 0x1FFF) << 0)
-#define   G_038004_TEX_HEIGHT(x)                       (((x) >> 0) & 0x1FFF)
-#define   C_038004_TEX_HEIGHT                          0xFFFFE000
-#define   S_038004_TEX_DEPTH(x)                        (((x) & 0x1FFF) << 13)
-#define   G_038004_TEX_DEPTH(x)                        (((x) >> 13) & 0x1FFF)
-#define   C_038004_TEX_DEPTH                           0xFC001FFF
-#define   S_038004_DATA_FORMAT(x)                      (((x) & 0x3F) << 26)
-#define   G_038004_DATA_FORMAT(x)                      (((x) >> 26) & 0x3F)
-#define   C_038004_DATA_FORMAT                         0x03FFFFFF
-#define     V_038004_COLOR_INVALID                     0x00000000
-#define     V_038004_COLOR_8                           0x00000001
-#define     V_038004_COLOR_4_4                         0x00000002
-#define     V_038004_COLOR_3_3_2                       0x00000003
-#define     V_038004_COLOR_16                          0x00000005
-#define     V_038004_COLOR_16_FLOAT                    0x00000006
-#define     V_038004_COLOR_8_8                         0x00000007
-#define     V_038004_COLOR_5_6_5                       0x00000008
-#define     V_038004_COLOR_6_5_5                       0x00000009
-#define     V_038004_COLOR_1_5_5_5                     0x0000000A
-#define     V_038004_COLOR_4_4_4_4                     0x0000000B
-#define     V_038004_COLOR_5_5_5_1                     0x0000000C
-#define     V_038004_COLOR_32                          0x0000000D
-#define     V_038004_COLOR_32_FLOAT                    0x0000000E
-#define     V_038004_COLOR_16_16                       0x0000000F
-#define     V_038004_COLOR_16_16_FLOAT                 0x00000010
-#define     V_038004_COLOR_8_24                        0x00000011
-#define     V_038004_COLOR_8_24_FLOAT                  0x00000012
-#define     V_038004_COLOR_24_8                        0x00000013
-#define     V_038004_COLOR_24_8_FLOAT                  0x00000014
-#define     V_038004_COLOR_10_11_11                    0x00000015
-#define     V_038004_COLOR_10_11_11_FLOAT              0x00000016
-#define     V_038004_COLOR_11_11_10                    0x00000017
-#define     V_038004_COLOR_11_11_10_FLOAT              0x00000018
-#define     V_038004_COLOR_2_10_10_10                  0x00000019
-#define     V_038004_COLOR_8_8_8_8                     0x0000001A
-#define     V_038004_COLOR_10_10_10_2                  0x0000001B
-#define     V_038004_COLOR_X24_8_32_FLOAT              0x0000001C
-#define     V_038004_COLOR_32_32                       0x0000001D
-#define     V_038004_COLOR_32_32_FLOAT                 0x0000001E
-#define     V_038004_COLOR_16_16_16_16                 0x0000001F
-#define     V_038004_COLOR_16_16_16_16_FLOAT           0x00000020
-#define     V_038004_COLOR_32_32_32_32                 0x00000022
-#define     V_038004_COLOR_32_32_32_32_FLOAT           0x00000023
-#define R_038008_SQ_TEX_RESOURCE_WORD2_0             0x038008
-#define   S_038008_BASE_ADDRESS(x)                     (((x) & 0xFFFFFFFF) << 0)
-#define   G_038008_BASE_ADDRESS(x)                     (((x) >> 0) & 0xFFFFFFFF)
-#define   C_038008_BASE_ADDRESS                        0x00000000
-#define R_03800C_SQ_TEX_RESOURCE_WORD3_0             0x03800C
-#define   S_03800C_MIP_ADDRESS(x)                      (((x) & 0xFFFFFFFF) << 0)
-#define   G_03800C_MIP_ADDRESS(x)                      (((x) >> 0) & 0xFFFFFFFF)
-#define   C_03800C_MIP_ADDRESS                         0x00000000
-#define R_038010_SQ_TEX_RESOURCE_WORD4_0             0x038010
-#define   S_038010_FORMAT_COMP_X(x)                    (((x) & 0x3) << 0)
-#define   G_038010_FORMAT_COMP_X(x)                    (((x) >> 0) & 0x3)
-#define   C_038010_FORMAT_COMP_X                       0xFFFFFFFC
-#define   S_038010_FORMAT_COMP_Y(x)                    (((x) & 0x3) << 2)
-#define   G_038010_FORMAT_COMP_Y(x)                    (((x) >> 2) & 0x3)
-#define   C_038010_FORMAT_COMP_Y                       0xFFFFFFF3
-#define   S_038010_FORMAT_COMP_Z(x)                    (((x) & 0x3) << 4)
-#define   G_038010_FORMAT_COMP_Z(x)                    (((x) >> 4) & 0x3)
-#define   C_038010_FORMAT_COMP_Z                       0xFFFFFFCF
-#define   S_038010_FORMAT_COMP_W(x)                    (((x) & 0x3) << 6)
-#define   G_038010_FORMAT_COMP_W(x)                    (((x) >> 6) & 0x3)
-#define   C_038010_FORMAT_COMP_W                       0xFFFFFF3F
-#define   S_038010_NUM_FORMAT_ALL(x)                   (((x) & 0x3) << 8)
-#define   G_038010_NUM_FORMAT_ALL(x)                   (((x) >> 8) & 0x3)
-#define   C_038010_NUM_FORMAT_ALL                      0xFFFFFCFF
-#define   S_038010_SRF_MODE_ALL(x)                     (((x) & 0x1) << 10)
-#define   G_038010_SRF_MODE_ALL(x)                     (((x) >> 10) & 0x1)
-#define   C_038010_SRF_MODE_ALL                        0xFFFFFBFF
-#define   S_038010_FORCE_DEGAMMA(x)                    (((x) & 0x1) << 11)
-#define   G_038010_FORCE_DEGAMMA(x)                    (((x) >> 11) & 0x1)
-#define   C_038010_FORCE_DEGAMMA                       0xFFFFF7FF
-#define   S_038010_ENDIAN_SWAP(x)                      (((x) & 0x3) << 12)
-#define   G_038010_ENDIAN_SWAP(x)                      (((x) >> 12) & 0x3)
-#define   C_038010_ENDIAN_SWAP                         0xFFFFCFFF
-#define   S_038010_REQUEST_SIZE(x)                     (((x) & 0x3) << 14)
-#define   G_038010_REQUEST_SIZE(x)                     (((x) >> 14) & 0x3)
-#define   C_038010_REQUEST_SIZE                        0xFFFF3FFF
-#define   S_038010_DST_SEL_X(x)                        (((x) & 0x7) << 16)
-#define   G_038010_DST_SEL_X(x)                        (((x) >> 16) & 0x7)
-#define   C_038010_DST_SEL_X                           0xFFF8FFFF
-#define   S_038010_DST_SEL_Y(x)                        (((x) & 0x7) << 19)
-#define   G_038010_DST_SEL_Y(x)                        (((x) >> 19) & 0x7)
-#define   C_038010_DST_SEL_Y                           0xFFC7FFFF
-#define   S_038010_DST_SEL_Z(x)                        (((x) & 0x7) << 22)
-#define   G_038010_DST_SEL_Z(x)                        (((x) >> 22) & 0x7)
-#define   C_038010_DST_SEL_Z                           0xFE3FFFFF
-#define   S_038010_DST_SEL_W(x)                        (((x) & 0x7) << 25)
-#define   G_038010_DST_SEL_W(x)                        (((x) >> 25) & 0x7)
-#define   C_038010_DST_SEL_W                           0xF1FFFFFF
-#define   S_038010_BASE_LEVEL(x)                       (((x) & 0xF) << 28)
-#define   G_038010_BASE_LEVEL(x)                       (((x) >> 28) & 0xF)
-#define   C_038010_BASE_LEVEL                          0x0FFFFFFF
-#define R_038014_SQ_TEX_RESOURCE_WORD5_0             0x038014
-#define   S_038014_LAST_LEVEL(x)                       (((x) & 0xF) << 0)
-#define   G_038014_LAST_LEVEL(x)                       (((x) >> 0) & 0xF)
-#define   C_038014_LAST_LEVEL                          0xFFFFFFF0
-#define   S_038014_BASE_ARRAY(x)                       (((x) & 0x1FFF) << 4)
-#define   G_038014_BASE_ARRAY(x)                       (((x) >> 4) & 0x1FFF)
-#define   C_038014_BASE_ARRAY                          0xFFFE000F
-#define   S_038014_LAST_ARRAY(x)                       (((x) & 0x1FFF) << 17)
-#define   G_038014_LAST_ARRAY(x)                       (((x) >> 17) & 0x1FFF)
-#define   C_038014_LAST_ARRAY                          0xC001FFFF
-#define R_038018_SQ_TEX_RESOURCE_WORD6_0             0x038018
-#define   S_038018_MPEG_CLAMP(x)                       (((x) & 0x3) << 0)
-#define   G_038018_MPEG_CLAMP(x)                       (((x) >> 0) & 0x3)
-#define   C_038018_MPEG_CLAMP                          0xFFFFFFFC
-#define   S_038018_PERF_MODULATION(x)                  (((x) & 0x7) << 5)
-#define   G_038018_PERF_MODULATION(x)                  (((x) >> 5) & 0x7)
-#define   C_038018_PERF_MODULATION                     0xFFFFFF1F
-#define   S_038018_INTERLACED(x)                       (((x) & 0x1) << 8)
-#define   G_038018_INTERLACED(x)                       (((x) >> 8) & 0x1)
-#define   C_038018_INTERLACED                          0xFFFFFEFF
-#define   S_038018_TYPE(x)                             (((x) & 0x3) << 30)
-#define   G_038018_TYPE(x)                             (((x) >> 30) & 0x3)
-#define   C_038018_TYPE                                0x3FFFFFFF
-#define R_008040_WAIT_UNTIL                          0x008040
-#define   S_008040_WAIT_CP_DMA_IDLE(x)                 (((x) & 0x1) << 8)
-#define   G_008040_WAIT_CP_DMA_IDLE(x)                 (((x) >> 8) & 0x1)
-#define   C_008040_WAIT_CP_DMA_IDLE                    0xFFFFFEFF
-#define   S_008040_WAIT_CMDFIFO(x)                     (((x) & 0x1) << 10)
-#define   G_008040_WAIT_CMDFIFO(x)                     (((x) >> 10) & 0x1)
-#define   C_008040_WAIT_CMDFIFO                        0xFFFFFBFF
-#define   S_008040_WAIT_2D_IDLE(x)                     (((x) & 0x1) << 14)
-#define   G_008040_WAIT_2D_IDLE(x)                     (((x) >> 14) & 0x1)
-#define   C_008040_WAIT_2D_IDLE                        0xFFFFBFFF
-#define   S_008040_WAIT_3D_IDLE(x)                     (((x) & 0x1) << 15)
-#define   G_008040_WAIT_3D_IDLE(x)                     (((x) >> 15) & 0x1)
-#define   C_008040_WAIT_3D_IDLE                        0xFFFF7FFF
-#define   S_008040_WAIT_2D_IDLECLEAN(x)                (((x) & 0x1) << 16)
-#define   G_008040_WAIT_2D_IDLECLEAN(x)                (((x) >> 16) & 0x1)
-#define   C_008040_WAIT_2D_IDLECLEAN                   0xFFFEFFFF
-#define   S_008040_WAIT_3D_IDLECLEAN(x)                (((x) & 0x1) << 17)
-#define   G_008040_WAIT_3D_IDLECLEAN(x)                (((x) >> 17) & 0x1)
-#define   C_008040_WAIT_3D_IDLECLEAN                   0xFFFDFFFF
-#define   S_008040_WAIT_EXTERN_SIG(x)                  (((x) & 0x1) << 19)
-#define   G_008040_WAIT_EXTERN_SIG(x)                  (((x) >> 19) & 0x1)
-#define   C_008040_WAIT_EXTERN_SIG                     0xFFF7FFFF
-#define   S_008040_CMDFIFO_ENTRIES(x)                  (((x) & 0x1F) << 20)
-#define   G_008040_CMDFIFO_ENTRIES(x)                  (((x) >> 20) & 0x1F)
-#define   C_008040_CMDFIFO_ENTRIES                     0xFE0FFFFF
-#define R_008958_VGT_PRIMITIVE_TYPE                  0x008958
-#define   S_008958_PRIM_TYPE(x)                        (((x) & 0x3F) << 0)
-#define   G_008958_PRIM_TYPE(x)                        (((x) >> 0) & 0x3F)
-#define   C_008958_PRIM_TYPE                           0xFFFFFFC0
-#define R_008C00_SQ_CONFIG                           0x008C00
-#define   S_008C00_VC_ENABLE(x)                        (((x) & 0x1) << 0)
-#define   G_008C00_VC_ENABLE(x)                        (((x) >> 0) & 0x1)
-#define   C_008C00_VC_ENABLE                           0xFFFFFFFE
-#define   S_008C00_EXPORT_SRC_C(x)                     (((x) & 0x1) << 1)
-#define   G_008C00_EXPORT_SRC_C(x)                     (((x) >> 1) & 0x1)
-#define   C_008C00_EXPORT_SRC_C                        0xFFFFFFFD
-#define   S_008C00_DX9_CONSTS(x)                       (((x) & 0x1) << 2)
-#define   G_008C00_DX9_CONSTS(x)                       (((x) >> 2) & 0x1)
-#define   C_008C00_DX9_CONSTS                          0xFFFFFFFB
-#define   S_008C00_ALU_INST_PREFER_VECTOR(x)           (((x) & 0x1) << 3)
-#define   G_008C00_ALU_INST_PREFER_VECTOR(x)           (((x) >> 3) & 0x1)
-#define   C_008C00_ALU_INST_PREFER_VECTOR              0xFFFFFFF7
-#define   S_008C00_DX10_CLAMP(x)                       (((x) & 0x1) << 4)
-#define   G_008C00_DX10_CLAMP(x)                       (((x) >> 4) & 0x1)
-#define   C_008C00_DX10_CLAMP                          0xFFFFFFEF
-#define   S_008C00_ALU_PREFER_ONE_WATERFALL(x)         (((x) & 0x1) << 5)
-#define   G_008C00_ALU_PREFER_ONE_WATERFALL(x)         (((x) >> 5) & 0x1)
-#define   C_008C00_ALU_PREFER_ONE_WATERFALL            0xFFFFFFDF
-#define   S_008C00_ALU_MAX_ONE_WATERFALL(x)            (((x) & 0x1) << 6)
-#define   G_008C00_ALU_MAX_ONE_WATERFALL(x)            (((x) >> 6) & 0x1)
-#define   C_008C00_ALU_MAX_ONE_WATERFALL               0xFFFFFFBF
-#define   S_008C00_CLAUSE_SEQ_PRIO(x)                  (((x) & 0x3) << 8)
-#define   G_008C00_CLAUSE_SEQ_PRIO(x)                  (((x) >> 8) & 0x3)
-#define   C_008C00_CLAUSE_SEQ_PRIO                     0xFFFFFCFF
-#define   S_008C00_PS_PRIO(x)                          (((x) & 0x3) << 24)
-#define   G_008C00_PS_PRIO(x)                          (((x) >> 24) & 0x3)
-#define   C_008C00_PS_PRIO                             0xFCFFFFFF
-#define   S_008C00_VS_PRIO(x)                          (((x) & 0x3) << 26)
-#define   G_008C00_VS_PRIO(x)                          (((x) >> 26) & 0x3)
-#define   C_008C00_VS_PRIO                             0xF3FFFFFF
-#define   S_008C00_GS_PRIO(x)                          (((x) & 0x3) << 28)
-#define   G_008C00_GS_PRIO(x)                          (((x) >> 28) & 0x3)
-#define   C_008C00_GS_PRIO                             0xCFFFFFFF
-#define   S_008C00_ES_PRIO(x)                          (((x) & 0x3) << 30)
-#define   G_008C00_ES_PRIO(x)                          (((x) >> 30) & 0x3)
-#define   C_008C00_ES_PRIO                             0x3FFFFFFF
-#define R_008C04_SQ_GPR_RESOURCE_MGMT_1              0x008C04
-#define   S_008C04_NUM_PS_GPRS(x)                      (((x) & 0xFF) << 0)
-#define   G_008C04_NUM_PS_GPRS(x)                      (((x) >> 0) & 0xFF)
-#define   C_008C04_NUM_PS_GPRS                         0xFFFFFF00
-#define   S_008C04_NUM_VS_GPRS(x)                      (((x) & 0xFF) << 16)
-#define   G_008C04_NUM_VS_GPRS(x)                      (((x) >> 16) & 0xFF)
-#define   C_008C04_NUM_VS_GPRS                         0xFF00FFFF
-#define   S_008C04_NUM_CLAUSE_TEMP_GPRS(x)             (((x) & 0xF) << 28)
-#define   G_008C04_NUM_CLAUSE_TEMP_GPRS(x)             (((x) >> 28) & 0xF)
-#define   C_008C04_NUM_CLAUSE_TEMP_GPRS                0x0FFFFFFF
-#define R_008C08_SQ_GPR_RESOURCE_MGMT_2              0x008C08
-#define   S_008C08_NUM_GS_GPRS(x)                      (((x) & 0xFF) << 0)
-#define   G_008C08_NUM_GS_GPRS(x)                      (((x) >> 0) & 0xFF)
-#define   C_008C08_NUM_GS_GPRS                         0xFFFFFF00
-#define   S_008C08_NUM_ES_GPRS(x)                      (((x) & 0xFF) << 16)
-#define   G_008C08_NUM_ES_GPRS(x)                      (((x) >> 16) & 0xFF)
-#define   C_008C08_NUM_ES_GPRS                         0xFF00FFFF
-#define R_008C0C_SQ_THREAD_RESOURCE_MGMT             0x008C0C
-#define   S_008C0C_NUM_PS_THREADS(x)                   (((x) & 0xFF) << 0)
-#define   G_008C0C_NUM_PS_THREADS(x)                   (((x) >> 0) & 0xFF)
-#define   C_008C0C_NUM_PS_THREADS                      0xFFFFFF00
-#define   S_008C0C_NUM_VS_THREADS(x)                   (((x) & 0xFF) << 8)
-#define   G_008C0C_NUM_VS_THREADS(x)                   (((x) >> 8) & 0xFF)
-#define   C_008C0C_NUM_VS_THREADS                      0xFFFF00FF
-#define   S_008C0C_NUM_GS_THREADS(x)                   (((x) & 0xFF) << 16)
-#define   G_008C0C_NUM_GS_THREADS(x)                   (((x) >> 16) & 0xFF)
-#define   C_008C0C_NUM_GS_THREADS                      0xFF00FFFF
-#define   S_008C0C_NUM_ES_THREADS(x)                   (((x) & 0xFF) << 24)
-#define   G_008C0C_NUM_ES_THREADS(x)                   (((x) >> 24) & 0xFF)
-#define   C_008C0C_NUM_ES_THREADS                      0x00FFFFFF
-#define R_008C10_SQ_STACK_RESOURCE_MGMT_1            0x008C10
-#define   S_008C10_NUM_PS_STACK_ENTRIES(x)             (((x) & 0xFFF) << 0)
-#define   G_008C10_NUM_PS_STACK_ENTRIES(x)             (((x) >> 0) & 0xFFF)
-#define   C_008C10_NUM_PS_STACK_ENTRIES                0xFFFFF000
-#define   S_008C10_NUM_VS_STACK_ENTRIES(x)             (((x) & 0xFFF) << 16)
-#define   G_008C10_NUM_VS_STACK_ENTRIES(x)             (((x) >> 16) & 0xFFF)
-#define   C_008C10_NUM_VS_STACK_ENTRIES                0xF000FFFF
-#define R_008C14_SQ_STACK_RESOURCE_MGMT_2            0x008C14
-#define   S_008C14_NUM_GS_STACK_ENTRIES(x)             (((x) & 0xFFF) << 0)
-#define   G_008C14_NUM_GS_STACK_ENTRIES(x)             (((x) >> 0) & 0xFFF)
-#define   C_008C14_NUM_GS_STACK_ENTRIES                0xFFFFF000
-#define   S_008C14_NUM_ES_STACK_ENTRIES(x)             (((x) & 0xFFF) << 16)
-#define   G_008C14_NUM_ES_STACK_ENTRIES(x)             (((x) >> 16) & 0xFFF)
-#define   C_008C14_NUM_ES_STACK_ENTRIES                0xF000FFFF
-#define R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ        0x008D8C
-#define   S_008D8C_RING0_OFFSET(x)                     (((x) & 0xFF) << 0)
-#define   G_008D8C_RING0_OFFSET(x)                     (((x) >> 0) & 0xFF)
-#define   C_008D8C_RING0_OFFSET                        0xFFFFFF00
-#define   S_008D8C_ISOLATE_ES_ENABLE(x)                (((x) & 0x1) << 12)
-#define   G_008D8C_ISOLATE_ES_ENABLE(x)                (((x) >> 12) & 0x1)
-#define   C_008D8C_ISOLATE_ES_ENABLE                   0xFFFFEFFF
-#define   S_008D8C_ISOLATE_GS_ENABLE(x)                (((x) & 0x1) << 13)
-#define   G_008D8C_ISOLATE_GS_ENABLE(x)                (((x) >> 13) & 0x1)
-#define   C_008D8C_ISOLATE_GS_ENABLE                   0xFFFFDFFF
-#define   S_008D8C_VS_PC_LIMIT_ENABLE(x)               (((x) & 0x1) << 14)
-#define   G_008D8C_VS_PC_LIMIT_ENABLE(x)               (((x) >> 14) & 0x1)
-#define   C_008D8C_VS_PC_LIMIT_ENABLE                  0xFFFFBFFF
-#define R_009508_TA_CNTL_AUX                         0x009508
-#define   S_009508_DISABLE_CUBE_WRAP(x)                (((x) & 0x1) << 0)
-#define   G_009508_DISABLE_CUBE_WRAP(x)                (((x) >> 0) & 0x1)
-#define   C_009508_DISABLE_CUBE_WRAP                   0xFFFFFFFE
-#define   S_009508_SYNC_GRADIENT(x)                    (((x) & 0x1) << 24)
-#define   G_009508_SYNC_GRADIENT(x)                    (((x) >> 24) & 0x1)
-#define   C_009508_SYNC_GRADIENT                       0xFEFFFFFF
-#define   S_009508_SYNC_WALKER(x)                      (((x) & 0x1) << 25)
-#define   G_009508_SYNC_WALKER(x)                      (((x) >> 25) & 0x1)
-#define   C_009508_SYNC_WALKER                         0xFDFFFFFF
-#define   S_009508_SYNC_ALIGNER(x)                     (((x) & 0x1) << 26)
-#define   G_009508_SYNC_ALIGNER(x)                     (((x) >> 26) & 0x1)
-#define   C_009508_SYNC_ALIGNER                        0xFBFFFFFF
-#define   S_009508_BILINEAR_PRECISION(x)               (((x) & 0x1) << 31)
-#define   G_009508_BILINEAR_PRECISION(x)               (((x) >> 31) & 0x1)
-#define   C_009508_BILINEAR_PRECISION                  0x7FFFFFFF
-#define R_009714_VC_ENHANCE                          0x009714
-#define R_009830_DB_DEBUG                            0x009830
-#define R_009838_DB_WATERMARKS                       0x009838
-#define   S_009838_DEPTH_FREE(x)                       (((x) & 0x1F) << 0)
-#define   G_009838_DEPTH_FREE(x)                       (((x) >> 0) & 0x1F)
-#define   C_009838_DEPTH_FREE                          0xFFFFFFE0
-#define   S_009838_DEPTH_FLUSH(x)                      (((x) & 0x3F) << 5)
-#define   G_009838_DEPTH_FLUSH(x)                      (((x) >> 5) & 0x3F)
-#define   C_009838_DEPTH_FLUSH                         0xFFFFF81F
-#define   S_009838_FORCE_SUMMARIZE(x)                  (((x) & 0xF) << 11)
-#define   G_009838_FORCE_SUMMARIZE(x)                  (((x) >> 11) & 0xF)
-#define   C_009838_FORCE_SUMMARIZE                     0xFFFF87FF
-#define   S_009838_DEPTH_PENDING_FREE(x)               (((x) & 0x1F) << 15)
-#define   G_009838_DEPTH_PENDING_FREE(x)               (((x) >> 15) & 0x1F)
-#define   C_009838_DEPTH_PENDING_FREE                  0xFFF07FFF
-#define   S_009838_DEPTH_CACHELINE_FREE(x)             (((x) & 0x1F) << 20)
-#define   G_009838_DEPTH_CACHELINE_FREE(x)             (((x) >> 20) & 0x1F)
-#define   C_009838_DEPTH_CACHELINE_FREE                0xFE0FFFFF
-#define   S_009838_EARLY_Z_PANIC_DISABLE(x)            (((x) & 0x1) << 25)
-#define   G_009838_EARLY_Z_PANIC_DISABLE(x)            (((x) >> 25) & 0x1)
-#define   C_009838_EARLY_Z_PANIC_DISABLE               0xFDFFFFFF
-#define   S_009838_LATE_Z_PANIC_DISABLE(x)             (((x) & 0x1) << 26)
-#define   G_009838_LATE_Z_PANIC_DISABLE(x)             (((x) >> 26) & 0x1)
-#define   C_009838_LATE_Z_PANIC_DISABLE                0xFBFFFFFF
-#define   S_009838_RE_Z_PANIC_DISABLE(x)               (((x) & 0x1) << 27)
-#define   G_009838_RE_Z_PANIC_DISABLE(x)               (((x) >> 27) & 0x1)
-#define   C_009838_RE_Z_PANIC_DISABLE                  0xF7FFFFFF
-#define   S_009838_DB_EXTRA_DEBUG(x)                   (((x) & 0xF) << 28)
-#define   G_009838_DB_EXTRA_DEBUG(x)                   (((x) >> 28) & 0xF)
-#define   C_009838_DB_EXTRA_DEBUG                      0x0FFFFFFF
-#define R_028030_PA_SC_SCREEN_SCISSOR_TL             0x028030
-#define   S_028030_TL_X(x)                             (((x) & 0x7FFF) << 0)
-#define   G_028030_TL_X(x)                             (((x) >> 0) & 0x7FFF)
-#define   C_028030_TL_X                                0xFFFF8000
-#define   S_028030_TL_Y(x)                             (((x) & 0x7FFF) << 16)
-#define   G_028030_TL_Y(x)                             (((x) >> 16) & 0x7FFF)
-#define   C_028030_TL_Y                                0x8000FFFF
-#define R_028034_PA_SC_SCREEN_SCISSOR_BR             0x028034
-#define   S_028034_BR_X(x)                             (((x) & 0x7FFF) << 0)
-#define   G_028034_BR_X(x)                             (((x) >> 0) & 0x7FFF)
-#define   C_028034_BR_X                                0xFFFF8000
-#define   S_028034_BR_Y(x)                             (((x) & 0x7FFF) << 16)
-#define   G_028034_BR_Y(x)                             (((x) >> 16) & 0x7FFF)
-#define   C_028034_BR_Y                                0x8000FFFF
-#define R_028200_PA_SC_WINDOW_OFFSET                 0x028200
-#define   S_028200_WINDOW_X_OFFSET(x)                  (((x) & 0x7FFF) << 0)
-#define   G_028200_WINDOW_X_OFFSET(x)                  (((x) >> 0) & 0x7FFF)
-#define   C_028200_WINDOW_X_OFFSET                     0xFFFF8000
-#define   S_028200_WINDOW_Y_OFFSET(x)                  (((x) & 0x7FFF) << 16)
-#define   G_028200_WINDOW_Y_OFFSET(x)                  (((x) >> 16) & 0x7FFF)
-#define   C_028200_WINDOW_Y_OFFSET                     0x8000FFFF
-#define R_028204_PA_SC_WINDOW_SCISSOR_TL             0x028204
-#define   S_028204_TL_X(x)                             (((x) & 0x3FFF) << 0)
-#define   G_028204_TL_X(x)                             (((x) >> 0) & 0x3FFF)
-#define   C_028204_TL_X                                0xFFFFC000
-#define   S_028204_TL_Y(x)                             (((x) & 0x3FFF) << 16)
-#define   G_028204_TL_Y(x)                             (((x) >> 16) & 0x3FFF)
-#define   C_028204_TL_Y                                0xC000FFFF
-#define   S_028204_WINDOW_OFFSET_DISABLE(x)            (((x) & 0x1) << 31)
-#define   G_028204_WINDOW_OFFSET_DISABLE(x)            (((x) >> 31) & 0x1)
-#define   C_028204_WINDOW_OFFSET_DISABLE               0x7FFFFFFF
-#define R_028208_PA_SC_WINDOW_SCISSOR_BR             0x028208
-#define   S_028208_BR_X(x)                             (((x) & 0x3FFF) << 0)
-#define   G_028208_BR_X(x)                             (((x) >> 0) & 0x3FFF)
-#define   C_028208_BR_X                                0xFFFFC000
-#define   S_028208_BR_Y(x)                             (((x) & 0x3FFF) << 16)
-#define   G_028208_BR_Y(x)                             (((x) >> 16) & 0x3FFF)
-#define   C_028208_BR_Y                                0xC000FFFF
-#define R_02820C_PA_SC_CLIPRECT_RULE                 0x02820C
-#define   S_02820C_CLIP_RULE(x)                        (((x) & 0xFFFF) << 0)
-#define   G_02820C_CLIP_RULE(x)                        (((x) >> 0) & 0xFFFF)
-#define   C_02820C_CLIP_RULE                           0xFFFF0000
-#define R_028210_PA_SC_CLIPRECT_0_TL                 0x028210
-#define   S_028210_TL_X(x)                             (((x) & 0x3FFF) << 0)
-#define   G_028210_TL_X(x)                             (((x) >> 0) & 0x3FFF)
-#define   C_028210_TL_X                                0xFFFFC000
-#define   S_028210_TL_Y(x)                             (((x) & 0x3FFF) << 16)
-#define   G_028210_TL_Y(x)                             (((x) >> 16) & 0x3FFF)
-#define   C_028210_TL_Y                                0xC000FFFF
-#define R_028214_PA_SC_CLIPRECT_0_BR                 0x028214
-#define   S_028214_BR_X(x)                             (((x) & 0x3FFF) << 0)
-#define   G_028214_BR_X(x)                             (((x) >> 0) & 0x3FFF)
-#define   C_028214_BR_X                                0xFFFFC000
-#define   S_028214_BR_Y(x)                             (((x) & 0x3FFF) << 16)
-#define   G_028214_BR_Y(x)                             (((x) >> 16) & 0x3FFF)
-#define   C_028214_BR_Y                                0xC000FFFF
-#define R_028218_PA_SC_CLIPRECT_1_TL                 0x028218
-#define R_02821C_PA_SC_CLIPRECT_1_BR                 0x02821C
-#define R_028220_PA_SC_CLIPRECT_2_TL                 0x028220
-#define R_028224_PA_SC_CLIPRECT_2_BR                 0x028224
-#define R_028228_PA_SC_CLIPRECT_3_TL                 0x028228
-#define R_02822C_PA_SC_CLIPRECT_3_BR                 0x02822C
-#define R_028230_PA_SC_EDGERULE                      0x028230
-#define R_028240_PA_SC_GENERIC_SCISSOR_TL            0x028240
-#define   S_028240_TL_X(x)                             (((x) & 0x3FFF) << 0)
-#define   G_028240_TL_X(x)                             (((x) >> 0) & 0x3FFF)
-#define   C_028240_TL_X                                0xFFFFC000
-#define   S_028240_TL_Y(x)                             (((x) & 0x3FFF) << 16)
-#define   G_028240_TL_Y(x)                             (((x) >> 16) & 0x3FFF)
-#define   C_028240_TL_Y                                0xC000FFFF
-#define   S_028240_WINDOW_OFFSET_DISABLE(x)            (((x) & 0x1) << 31)
-#define   G_028240_WINDOW_OFFSET_DISABLE(x)            (((x) >> 31) & 0x1)
-#define   C_028240_WINDOW_OFFSET_DISABLE               0x7FFFFFFF
-#define R_028244_PA_SC_GENERIC_SCISSOR_BR            0x028244
-#define   S_028244_BR_X(x)                             (((x) & 0x3FFF) << 0)
-#define   G_028244_BR_X(x)                             (((x) >> 0) & 0x3FFF)
-#define   C_028244_BR_X                                0xFFFFC000
-#define   S_028244_BR_Y(x)                             (((x) & 0x3FFF) << 16)
-#define   G_028244_BR_Y(x)                             (((x) >> 16) & 0x3FFF)
-#define   C_028244_BR_Y                                0xC000FFFF
-#define R_0282D0_PA_SC_VPORT_ZMIN_0                  0x0282D0
-#define   S_0282D0_VPORT_ZMIN(x)                       (((x) & 0xFFFFFFFF) << 0)
-#define   G_0282D0_VPORT_ZMIN(x)                       (((x) >> 0) & 0xFFFFFFFF)
-#define   C_0282D0_VPORT_ZMIN                          0x00000000
-#define R_0282D4_PA_SC_VPORT_ZMAX_0                  0x0282D4
-#define   S_0282D4_VPORT_ZMAX(x)                       (((x) & 0xFFFFFFFF) << 0)
-#define   G_0282D4_VPORT_ZMAX(x)                       (((x) >> 0) & 0xFFFFFFFF)
-#define   C_0282D4_VPORT_ZMAX                          0x00000000
-#define R_028350_SX_MISC                             0x028350
-#define   S_028350_MULTIPASS(x)                        (((x) & 0x1) << 0)
-#define   G_028350_MULTIPASS(x)                        (((x) >> 0) & 0x1)
-#define   C_028350_MULTIPASS                           0xFFFFFFFE
-#define R_028380_SQ_VTX_SEMANTIC_0                   0x028380
-#define   S_028380_SEMANTIC_ID(x)                      (((x) & 0xFF) << 0)
-#define   G_028380_SEMANTIC_ID(x)                      (((x) >> 0) & 0xFF)
-#define   C_028380_SEMANTIC_ID                         0xFFFFFF00
-#define R_028384_SQ_VTX_SEMANTIC_1                   0x028384
-#define R_028388_SQ_VTX_SEMANTIC_2                   0x028388
-#define R_02838C_SQ_VTX_SEMANTIC_3                   0x02838C
-#define R_028390_SQ_VTX_SEMANTIC_4                   0x028390
-#define R_028394_SQ_VTX_SEMANTIC_5                   0x028394
-#define R_028398_SQ_VTX_SEMANTIC_6                   0x028398
-#define R_02839C_SQ_VTX_SEMANTIC_7                   0x02839C
-#define R_0283A0_SQ_VTX_SEMANTIC_8                   0x0283A0
-#define R_0283A4_SQ_VTX_SEMANTIC_9                   0x0283A4
-#define R_0283A8_SQ_VTX_SEMANTIC_10                  0x0283A8
-#define R_0283AC_SQ_VTX_SEMANTIC_11                  0x0283AC
-#define R_0283B0_SQ_VTX_SEMANTIC_12                  0x0283B0
-#define R_0283B4_SQ_VTX_SEMANTIC_13                  0x0283B4
-#define R_0283B8_SQ_VTX_SEMANTIC_14                  0x0283B8
-#define R_0283BC_SQ_VTX_SEMANTIC_15                  0x0283BC
-#define R_0283C0_SQ_VTX_SEMANTIC_16                  0x0283C0
-#define R_0283C4_SQ_VTX_SEMANTIC_17                  0x0283C4
-#define R_0283C8_SQ_VTX_SEMANTIC_18                  0x0283C8
-#define R_0283CC_SQ_VTX_SEMANTIC_19                  0x0283CC
-#define R_0283D0_SQ_VTX_SEMANTIC_20                  0x0283D0
-#define R_0283D4_SQ_VTX_SEMANTIC_21                  0x0283D4
-#define R_0283D8_SQ_VTX_SEMANTIC_22                  0x0283D8
-#define R_0283DC_SQ_VTX_SEMANTIC_23                  0x0283DC
-#define R_0283E0_SQ_VTX_SEMANTIC_24                  0x0283E0
-#define R_0283E4_SQ_VTX_SEMANTIC_25                  0x0283E4
-#define R_0283E8_SQ_VTX_SEMANTIC_26                  0x0283E8
-#define R_0283EC_SQ_VTX_SEMANTIC_27                  0x0283EC
-#define R_0283F0_SQ_VTX_SEMANTIC_28                  0x0283F0
-#define R_0283F4_SQ_VTX_SEMANTIC_29                  0x0283F4
-#define R_0283F8_SQ_VTX_SEMANTIC_30                  0x0283F8
-#define R_0283FC_SQ_VTX_SEMANTIC_31                  0x0283FC
-#define R_028400_VGT_MAX_VTX_INDX                    0x028400
-#define   S_028400_MAX_INDX(x)                         (((x) & 0xFFFFFFFF) << 0)
-#define   G_028400_MAX_INDX(x)                         (((x) >> 0) & 0xFFFFFFFF)
-#define   C_028400_MAX_INDX                            0x00000000
-#define R_028404_VGT_MIN_VTX_INDX                    0x028404
-#define   S_028404_MIN_INDX(x)                         (((x) & 0xFFFFFFFF) << 0)
-#define   G_028404_MIN_INDX(x)                         (((x) >> 0) & 0xFFFFFFFF)
-#define   C_028404_MIN_INDX                            0x00000000
-#define R_028408_VGT_INDX_OFFSET                     0x028408
-#define   S_028408_INDX_OFFSET(x)                      (((x) & 0xFFFFFFFF) << 0)
-#define   G_028408_INDX_OFFSET(x)                      (((x) >> 0) & 0xFFFFFFFF)
-#define   C_028408_INDX_OFFSET                         0x00000000
-#define R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX        0x02840C
-#define   S_02840C_RESET_INDX(x)                       (((x) & 0xFFFFFFFF) << 0)
-#define   G_02840C_RESET_INDX(x)                       (((x) >> 0) & 0xFFFFFFFF)
-#define   C_02840C_RESET_INDX                          0x00000000
-#define R_028410_SX_ALPHA_TEST_CONTROL               0x028410
-#define   S_028410_ALPHA_FUNC(x)                       (((x) & 0x7) << 0)
-#define   G_028410_ALPHA_FUNC(x)                       (((x) >> 0) & 0x7)
-#define   C_028410_ALPHA_FUNC                          0xFFFFFFF8
-#define   S_028410_ALPHA_TEST_ENABLE(x)                (((x) & 0x1) << 3)
-#define   G_028410_ALPHA_TEST_ENABLE(x)                (((x) >> 3) & 0x1)
-#define   C_028410_ALPHA_TEST_ENABLE                   0xFFFFFFF7
-#define   S_028410_ALPHA_TEST_BYPASS(x)                (((x) & 0x1) << 8)
-#define   G_028410_ALPHA_TEST_BYPASS(x)                (((x) >> 8) & 0x1)
-#define   C_028410_ALPHA_TEST_BYPASS                   0xFFFFFEFF
-#define R_028414_CB_BLEND_RED                        0x028414
-#define   S_028414_BLEND_RED(x)                        (((x) & 0xFFFFFFFF) << 0)
-#define   G_028414_BLEND_RED(x)                        (((x) >> 0) & 0xFFFFFFFF)
-#define   C_028414_BLEND_RED                           0x00000000
-#define R_028418_CB_BLEND_GREEN                      0x028418
-#define   S_028418_BLEND_GREEN(x)                      (((x) & 0xFFFFFFFF) << 0)
-#define   G_028418_BLEND_GREEN(x)                      (((x) >> 0) & 0xFFFFFFFF)
-#define   C_028418_BLEND_GREEN                         0x00000000
-#define R_02841C_CB_BLEND_BLUE                       0x02841C
-#define   S_02841C_BLEND_BLUE(x)                       (((x) & 0xFFFFFFFF) << 0)
-#define   G_02841C_BLEND_BLUE(x)                       (((x) >> 0) & 0xFFFFFFFF)
-#define   C_02841C_BLEND_BLUE                          0x00000000
-#define R_028420_CB_BLEND_ALPHA                      0x028420
-#define   S_028420_BLEND_ALPHA(x)                      (((x) & 0xFFFFFFFF) << 0)
-#define   G_028420_BLEND_ALPHA(x)                      (((x) >> 0) & 0xFFFFFFFF)
-#define   C_028420_BLEND_ALPHA                         0x00000000
-#define R_028438_SX_ALPHA_REF                        0x028438
-#define   S_028438_ALPHA_REF(x)                        (((x) & 0xFFFFFFFF) << 0)
-#define   G_028438_ALPHA_REF(x)                        (((x) >> 0) & 0xFFFFFFFF)
-#define   C_028438_ALPHA_REF                           0x00000000
-#define R_0286C8_SPI_THREAD_GROUPING                 0x0286C8
-#define   S_0286C8_PS_GROUPING(x)                      (((x) & 0x1F) << 0)
-#define   G_0286C8_PS_GROUPING(x)                      (((x) >> 0) & 0x1F)
-#define   C_0286C8_PS_GROUPING                         0xFFFFFFE0
-#define   S_0286C8_VS_GROUPING(x)                      (((x) & 0x1F) << 8)
-#define   G_0286C8_VS_GROUPING(x)                      (((x) >> 8) & 0x1F)
-#define   C_0286C8_VS_GROUPING                         0xFFFFE0FF
-#define   S_0286C8_GS_GROUPING(x)                      (((x) & 0x1F) << 16)
-#define   G_0286C8_GS_GROUPING(x)                      (((x) >> 16) & 0x1F)
-#define   C_0286C8_GS_GROUPING                         0xFFE0FFFF
-#define   S_0286C8_ES_GROUPING(x)                      (((x) & 0x1F) << 24)
-#define   G_0286C8_ES_GROUPING(x)                      (((x) >> 24) & 0x1F)
-#define   C_0286C8_ES_GROUPING                         0xE0FFFFFF
-#define R_0286D8_SPI_INPUT_Z                         0x0286D8
-#define   S_0286D8_PROVIDE_Z_TO_SPI(x)                 (((x) & 0x1) << 0)
-#define   G_0286D8_PROVIDE_Z_TO_SPI(x)                 (((x) >> 0) & 0x1)
-#define   C_0286D8_PROVIDE_Z_TO_SPI                    0xFFFFFFFE
-#define R_0286DC_SPI_FOG_CNTL                        0x0286DC
-#define   S_0286DC_PASS_FOG_THROUGH_PS(x)              (((x) & 0x1) << 0)
-#define   G_0286DC_PASS_FOG_THROUGH_PS(x)              (((x) >> 0) & 0x1)
-#define   C_0286DC_PASS_FOG_THROUGH_PS                 0xFFFFFFFE
-#define   S_0286DC_PIXEL_FOG_FUNC(x)                   (((x) & 0x3) << 1)
-#define   G_0286DC_PIXEL_FOG_FUNC(x)                   (((x) >> 1) & 0x3)
-#define   C_0286DC_PIXEL_FOG_FUNC                      0xFFFFFFF9
-#define   S_0286DC_PIXEL_FOG_SRC_SEL(x)                (((x) & 0x1) << 3)
-#define   G_0286DC_PIXEL_FOG_SRC_SEL(x)                (((x) >> 3) & 0x1)
-#define   C_0286DC_PIXEL_FOG_SRC_SEL                   0xFFFFFFF7
-#define   S_0286DC_VS_FOG_CLAMP_DISABLE(x)             (((x) & 0x1) << 4)
-#define   G_0286DC_VS_FOG_CLAMP_DISABLE(x)             (((x) >> 4) & 0x1)
-#define   C_0286DC_VS_FOG_CLAMP_DISABLE                0xFFFFFFEF
-#define R_0286E0_SPI_FOG_FUNC_SCALE                  0x0286E0
-#define   S_0286E0_VALUE(x)                            (((x) & 0xFFFFFFFF) << 0)
-#define   G_0286E0_VALUE(x)                            (((x) >> 0) & 0xFFFFFFFF)
-#define   C_0286E0_VALUE                               0x00000000
-#define R_0286E4_SPI_FOG_FUNC_BIAS                   0x0286E4
-#define   S_0286E4_VALUE(x)                            (((x) & 0xFFFFFFFF) << 0)
-#define   G_0286E4_VALUE(x)                            (((x) >> 0) & 0xFFFFFFFF)
-#define   C_0286E4_VALUE                               0x00000000
-#define R_0287A0_CB_SHADER_CONTROL                   0x0287A0
-#define   S_0287A0_RT0_ENABLE(x)                       (((x) & 0x1) << 0)
-#define   G_0287A0_RT0_ENABLE(x)                       (((x) >> 0) & 0x1)
-#define   C_0287A0_RT0_ENABLE                          0xFFFFFFFE
-#define   S_0287A0_RT1_ENABLE(x)                       (((x) & 0x1) << 1)
-#define   G_0287A0_RT1_ENABLE(x)                       (((x) >> 1) & 0x1)
-#define   C_0287A0_RT1_ENABLE                          0xFFFFFFFD
-#define   S_0287A0_RT2_ENABLE(x)                       (((x) & 0x1) << 2)
-#define   G_0287A0_RT2_ENABLE(x)                       (((x) >> 2) & 0x1)
-#define   C_0287A0_RT2_ENABLE                          0xFFFFFFFB
-#define   S_0287A0_RT3_ENABLE(x)                       (((x) & 0x1) << 3)
-#define   G_0287A0_RT3_ENABLE(x)                       (((x) >> 3) & 0x1)
-#define   C_0287A0_RT3_ENABLE                          0xFFFFFFF7
-#define   S_0287A0_RT4_ENABLE(x)                       (((x) & 0x1) << 4)
-#define   G_0287A0_RT4_ENABLE(x)                       (((x) >> 4) & 0x1)
-#define   C_0287A0_RT4_ENABLE                          0xFFFFFFEF
-#define   S_0287A0_RT5_ENABLE(x)                       (((x) & 0x1) << 5)
-#define   G_0287A0_RT5_ENABLE(x)                       (((x) >> 5) & 0x1)
-#define   C_0287A0_RT5_ENABLE                          0xFFFFFFDF
-#define   S_0287A0_RT6_ENABLE(x)                       (((x) & 0x1) << 6)
-#define   G_0287A0_RT6_ENABLE(x)                       (((x) >> 6) & 0x1)
-#define   C_0287A0_RT6_ENABLE                          0xFFFFFFBF
-#define   S_0287A0_RT7_ENABLE(x)                       (((x) & 0x1) << 7)
-#define   G_0287A0_RT7_ENABLE(x)                       (((x) >> 7) & 0x1)
-#define   C_0287A0_RT7_ENABLE                          0xFFFFFF7F
-#define R_028894_SQ_PGM_START_FS                     0x028894
-#define   S_028894_PGM_START(x)                        (((x) & 0xFFFFFFFF) << 0)
-#define   G_028894_PGM_START(x)                        (((x) >> 0) & 0xFFFFFFFF)
-#define   C_028894_PGM_START                           0x00000000
-#define R_0288A4_SQ_PGM_RESOURCES_FS                 0x0288A4
-#define   S_0288A4_NUM_GPRS(x)                         (((x) & 0xFF) << 0)
-#define   G_0288A4_NUM_GPRS(x)                         (((x) >> 0) & 0xFF)
-#define   C_0288A4_NUM_GPRS                            0xFFFFFF00
-#define   S_0288A4_STACK_SIZE(x)                       (((x) & 0xFF) << 8)
-#define   G_0288A4_STACK_SIZE(x)                       (((x) >> 8) & 0xFF)
-#define   C_0288A4_STACK_SIZE                          0xFFFF00FF
-#define   S_0288A4_DX10_CLAMP(x)                       (((x) & 0x1) << 21)
-#define   G_0288A4_DX10_CLAMP(x)                       (((x) >> 21) & 0x1)
-#define   C_0288A4_DX10_CLAMP                          0xFFDFFFFF
-#define R_0288A8_SQ_ESGS_RING_ITEMSIZE               0x0288A8
-#define   S_0288A8_ITEMSIZE(x)                         (((x) & 0x7FFF) << 0)
-#define   G_0288A8_ITEMSIZE(x)                         (((x) >> 0) & 0x7FFF)
-#define   C_0288A8_ITEMSIZE                            0xFFFF8000
-#define R_0288AC_SQ_GSVS_RING_ITEMSIZE               0x0288AC
-#define   S_0288AC_ITEMSIZE(x)                         (((x) & 0x7FFF) << 0)
-#define   G_0288AC_ITEMSIZE(x)                         (((x) >> 0) & 0x7FFF)
-#define   C_0288AC_ITEMSIZE                            0xFFFF8000
-#define R_0288B0_SQ_ESTMP_RING_ITEMSIZE              0x0288B0
-#define   S_0288B0_ITEMSIZE(x)                         (((x) & 0x7FFF) << 0)
-#define   G_0288B0_ITEMSIZE(x)                         (((x) >> 0) & 0x7FFF)
-#define   C_0288B0_ITEMSIZE                            0xFFFF8000
-#define R_0288B4_SQ_GSTMP_RING_ITEMSIZE              0x0288B4
-#define   S_0288B4_ITEMSIZE(x)                         (((x) & 0x7FFF) << 0)
-#define   G_0288B4_ITEMSIZE(x)                         (((x) >> 0) & 0x7FFF)
-#define   C_0288B4_ITEMSIZE                            0xFFFF8000
-#define R_0288B8_SQ_VSTMP_RING_ITEMSIZE              0x0288B8
-#define   S_0288B8_ITEMSIZE(x)                         (((x) & 0x7FFF) << 0)
-#define   G_0288B8_ITEMSIZE(x)                         (((x) >> 0) & 0x7FFF)
-#define   C_0288B8_ITEMSIZE                            0xFFFF8000
-#define R_0288BC_SQ_PSTMP_RING_ITEMSIZE              0x0288BC
-#define   S_0288BC_ITEMSIZE(x)                         (((x) & 0x7FFF) << 0)
-#define   G_0288BC_ITEMSIZE(x)                         (((x) >> 0) & 0x7FFF)
-#define   C_0288BC_ITEMSIZE                            0xFFFF8000
-#define R_0288C0_SQ_FBUF_RING_ITEMSIZE               0x0288C0
-#define   S_0288C0_ITEMSIZE(x)                         (((x) & 0x7FFF) << 0)
-#define   G_0288C0_ITEMSIZE(x)                         (((x) >> 0) & 0x7FFF)
-#define   C_0288C0_ITEMSIZE                            0xFFFF8000
-#define R_0288C4_SQ_REDUC_RING_ITEMSIZE              0x0288C4
-#define   S_0288C4_ITEMSIZE(x)                         (((x) & 0x7FFF) << 0)
-#define   G_0288C4_ITEMSIZE(x)                         (((x) >> 0) & 0x7FFF)
-#define   C_0288C4_ITEMSIZE                            0xFFFF8000
-#define R_0288C8_SQ_GS_VERT_ITEMSIZE                 0x0288C8
-#define   S_0288C8_ITEMSIZE(x)                         (((x) & 0x7FFF) << 0)
-#define   G_0288C8_ITEMSIZE(x)                         (((x) >> 0) & 0x7FFF)
-#define   C_0288C8_ITEMSIZE                            0xFFFF8000
-#define R_0288DC_SQ_PGM_CF_OFFSET_FS                 0x0288DC
-#define   S_0288DC_PGM_CF_OFFSET(x)                    (((x) & 0xFFFFF) << 0)
-#define   G_0288DC_PGM_CF_OFFSET(x)                    (((x) >> 0) & 0xFFFFF)
-#define   C_0288DC_PGM_CF_OFFSET                       0xFFF00000
-#define R_028A10_VGT_OUTPUT_PATH_CNTL                0x028A10
-#define   S_028A10_PATH_SELECT(x)                      (((x) & 0x3) << 0)
-#define   G_028A10_PATH_SELECT(x)                      (((x) >> 0) & 0x3)
-#define   C_028A10_PATH_SELECT                         0xFFFFFFFC
-#define R_028A14_VGT_HOS_CNTL                        0x028A14
-#define   S_028A14_TESS_MODE(x)                        (((x) & 0x3) << 0)
-#define   G_028A14_TESS_MODE(x)                        (((x) >> 0) & 0x3)
-#define   C_028A14_TESS_MODE                           0xFFFFFFFC
-#define R_028A18_VGT_HOS_MAX_TESS_LEVEL              0x028A18
-#define   S_028A18_MAX_TESS(x)                         (((x) & 0xFFFFFFFF) << 0)
-#define   G_028A18_MAX_TESS(x)                         (((x) >> 0) & 0xFFFFFFFF)
-#define   C_028A18_MAX_TESS                            0x00000000
-#define R_028A1C_VGT_HOS_MIN_TESS_LEVEL              0x028A1C
-#define   S_028A1C_MIN_TESS(x)                         (((x) & 0xFFFFFFFF) << 0)
-#define   G_028A1C_MIN_TESS(x)                         (((x) >> 0) & 0xFFFFFFFF)
-#define   C_028A1C_MIN_TESS                            0x00000000
-#define R_028A20_VGT_HOS_REUSE_DEPTH                 0x028A20
-#define   S_028A20_REUSE_DEPTH(x)                      (((x) & 0xFF) << 0)
-#define   G_028A20_REUSE_DEPTH(x)                      (((x) >> 0) & 0xFF)
-#define   C_028A20_REUSE_DEPTH                         0xFFFFFF00
-#define R_028A24_VGT_GROUP_PRIM_TYPE                 0x028A24
-#define   S_028A24_PRIM_TYPE(x)                        (((x) & 0x1F) << 0)
-#define   G_028A24_PRIM_TYPE(x)                        (((x) >> 0) & 0x1F)
-#define   C_028A24_PRIM_TYPE                           0xFFFFFFE0
-#define   S_028A24_RETAIN_ORDER(x)                     (((x) & 0x1) << 14)
-#define   G_028A24_RETAIN_ORDER(x)                     (((x) >> 14) & 0x1)
-#define   C_028A24_RETAIN_ORDER                        0xFFFFBFFF
-#define   S_028A24_RETAIN_QUADS(x)                     (((x) & 0x1) << 15)
-#define   G_028A24_RETAIN_QUADS(x)                     (((x) >> 15) & 0x1)
-#define   C_028A24_RETAIN_QUADS                        0xFFFF7FFF
-#define   S_028A24_PRIM_ORDER(x)                       (((x) & 0x7) << 16)
-#define   G_028A24_PRIM_ORDER(x)                       (((x) >> 16) & 0x7)
-#define   C_028A24_PRIM_ORDER                          0xFFF8FFFF
-#define R_028A28_VGT_GROUP_FIRST_DECR                0x028A28
-#define   S_028A28_FIRST_DECR(x)                       (((x) & 0xF) << 0)
-#define   G_028A28_FIRST_DECR(x)                       (((x) >> 0) & 0xF)
-#define   C_028A28_FIRST_DECR                          0xFFFFFFF0
-#define R_028A2C_VGT_GROUP_DECR                      0x028A2C
-#define   S_028A2C_DECR(x)                             (((x) & 0xF) << 0)
-#define   G_028A2C_DECR(x)                             (((x) >> 0) & 0xF)
-#define   C_028A2C_DECR                                0xFFFFFFF0
-#define R_028A30_VGT_GROUP_VECT_0_CNTL               0x028A30
-#define   S_028A30_COMP_X_EN(x)                        (((x) & 0x1) << 0)
-#define   G_028A30_COMP_X_EN(x)                        (((x) >> 0) & 0x1)
-#define   C_028A30_COMP_X_EN                           0xFFFFFFFE
-#define   S_028A30_COMP_Y_EN(x)                        (((x) & 0x1) << 1)
-#define   G_028A30_COMP_Y_EN(x)                        (((x) >> 1) & 0x1)
-#define   C_028A30_COMP_Y_EN                           0xFFFFFFFD
-#define   S_028A30_COMP_Z_EN(x)                        (((x) & 0x1) << 2)
-#define   G_028A30_COMP_Z_EN(x)                        (((x) >> 2) & 0x1)
-#define   C_028A30_COMP_Z_EN                           0xFFFFFFFB
-#define   S_028A30_COMP_W_EN(x)                        (((x) & 0x1) << 3)
-#define   G_028A30_COMP_W_EN(x)                        (((x) >> 3) & 0x1)
-#define   C_028A30_COMP_W_EN                           0xFFFFFFF7
-#define   S_028A30_STRIDE(x)                           (((x) & 0xFF) << 8)
-#define   G_028A30_STRIDE(x)                           (((x) >> 8) & 0xFF)
-#define   C_028A30_STRIDE                              0xFFFF00FF
-#define   S_028A30_SHIFT(x)                            (((x) & 0xFF) << 16)
-#define   G_028A30_SHIFT(x)                            (((x) >> 16) & 0xFF)
-#define   C_028A30_SHIFT                               0xFF00FFFF
-#define R_028A34_VGT_GROUP_VECT_1_CNTL               0x028A34
-#define   S_028A34_COMP_X_EN(x)                        (((x) & 0x1) << 0)
-#define   G_028A34_COMP_X_EN(x)                        (((x) >> 0) & 0x1)
-#define   C_028A34_COMP_X_EN                           0xFFFFFFFE
-#define   S_028A34_COMP_Y_EN(x)                        (((x) & 0x1) << 1)
-#define   G_028A34_COMP_Y_EN(x)                        (((x) >> 1) & 0x1)
-#define   C_028A34_COMP_Y_EN                           0xFFFFFFFD
-#define   S_028A34_COMP_Z_EN(x)                        (((x) & 0x1) << 2)
-#define   G_028A34_COMP_Z_EN(x)                        (((x) >> 2) & 0x1)
-#define   C_028A34_COMP_Z_EN                           0xFFFFFFFB
-#define   S_028A34_COMP_W_EN(x)                        (((x) & 0x1) << 3)
-#define   G_028A34_COMP_W_EN(x)                        (((x) >> 3) & 0x1)
-#define   C_028A34_COMP_W_EN                           0xFFFFFFF7
-#define   S_028A34_STRIDE(x)                           (((x) & 0xFF) << 8)
-#define   G_028A34_STRIDE(x)                           (((x) >> 8) & 0xFF)
-#define   C_028A34_STRIDE                              0xFFFF00FF
-#define   S_028A34_SHIFT(x)                            (((x) & 0xFF) << 16)
-#define   G_028A34_SHIFT(x)                            (((x) >> 16) & 0xFF)
-#define   C_028A34_SHIFT                               0xFF00FFFF
-#define R_028A38_VGT_GROUP_VECT_0_FMT_CNTL           0x028A38
-#define   S_028A38_X_CONV(x)                           (((x) & 0xF) << 0)
-#define   G_028A38_X_CONV(x)                           (((x) >> 0) & 0xF)
-#define   C_028A38_X_CONV                              0xFFFFFFF0
-#define   S_028A38_X_OFFSET(x)                         (((x) & 0xF) << 4)
-#define   G_028A38_X_OFFSET(x)                         (((x) >> 4) & 0xF)
-#define   C_028A38_X_OFFSET                            0xFFFFFF0F
-#define   S_028A38_Y_CONV(x)                           (((x) & 0xF) << 8)
-#define   G_028A38_Y_CONV(x)                           (((x) >> 8) & 0xF)
-#define   C_028A38_Y_CONV                              0xFFFFF0FF
-#define   S_028A38_Y_OFFSET(x)                         (((x) & 0xF) << 12)
-#define   G_028A38_Y_OFFSET(x)                         (((x) >> 12) & 0xF)
-#define   C_028A38_Y_OFFSET                            0xFFFF0FFF
-#define   S_028A38_Z_CONV(x)                           (((x) & 0xF) << 16)
-#define   G_028A38_Z_CONV(x)                           (((x) >> 16) & 0xF)
-#define   C_028A38_Z_CONV                              0xFFF0FFFF
-#define   S_028A38_Z_OFFSET(x)                         (((x) & 0xF) << 20)
-#define   G_028A38_Z_OFFSET(x)                         (((x) >> 20) & 0xF)
-#define   C_028A38_Z_OFFSET                            0xFF0FFFFF
-#define   S_028A38_W_CONV(x)                           (((x) & 0xF) << 24)
-#define   G_028A38_W_CONV(x)                           (((x) >> 24) & 0xF)
-#define   C_028A38_W_CONV                              0xF0FFFFFF
-#define   S_028A38_W_OFFSET(x)                         (((x) & 0xF) << 28)
-#define   G_028A38_W_OFFSET(x)                         (((x) >> 28) & 0xF)
-#define   C_028A38_W_OFFSET                            0x0FFFFFFF
-#define R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL           0x028A3C
-#define   S_028A3C_X_CONV(x)                           (((x) & 0xF) << 0)
-#define   G_028A3C_X_CONV(x)                           (((x) >> 0) & 0xF)
-#define   C_028A3C_X_CONV                              0xFFFFFFF0
-#define   S_028A3C_X_OFFSET(x)                         (((x) & 0xF) << 4)
-#define   G_028A3C_X_OFFSET(x)                         (((x) >> 4) & 0xF)
-#define   C_028A3C_X_OFFSET                            0xFFFFFF0F
-#define   S_028A3C_Y_CONV(x)                           (((x) & 0xF) << 8)
-#define   G_028A3C_Y_CONV(x)                           (((x) >> 8) & 0xF)
-#define   C_028A3C_Y_CONV                              0xFFFFF0FF
-#define   S_028A3C_Y_OFFSET(x)                         (((x) & 0xF) << 12)
-#define   G_028A3C_Y_OFFSET(x)                         (((x) >> 12) & 0xF)
-#define   C_028A3C_Y_OFFSET                            0xFFFF0FFF
-#define   S_028A3C_Z_CONV(x)                           (((x) & 0xF) << 16)
-#define   G_028A3C_Z_CONV(x)                           (((x) >> 16) & 0xF)
-#define   C_028A3C_Z_CONV                              0xFFF0FFFF
-#define   S_028A3C_Z_OFFSET(x)                         (((x) & 0xF) << 20)
-#define   G_028A3C_Z_OFFSET(x)                         (((x) >> 20) & 0xF)
-#define   C_028A3C_Z_OFFSET                            0xFF0FFFFF
-#define   S_028A3C_W_CONV(x)                           (((x) & 0xF) << 24)
-#define   G_028A3C_W_CONV(x)                           (((x) >> 24) & 0xF)
-#define   C_028A3C_W_CONV                              0xF0FFFFFF
-#define   S_028A3C_W_OFFSET(x)                         (((x) & 0xF) << 28)
-#define   G_028A3C_W_OFFSET(x)                         (((x) >> 28) & 0xF)
-#define   C_028A3C_W_OFFSET                            0x0FFFFFFF
-#define R_028A40_VGT_GS_MODE                         0x028A40
-#define   S_028A40_MODE(x)                             (((x) & 0x3) << 0)
-#define   G_028A40_MODE(x)                             (((x) >> 0) & 0x3)
-#define   C_028A40_MODE                                0xFFFFFFFC
-#define   S_028A40_ES_PASSTHRU(x)                      (((x) & 0x1) << 2)
-#define   G_028A40_ES_PASSTHRU(x)                      (((x) >> 2) & 0x1)
-#define   C_028A40_ES_PASSTHRU                         0xFFFFFFFB
-#define   S_028A40_CUT_MODE(x)                         (((x) & 0x3) << 3)
-#define   G_028A40_CUT_MODE(x)                         (((x) >> 3) & 0x3)
-#define   C_028A40_CUT_MODE                            0xFFFFFFE7
-#define R_028A4C_PA_SC_MODE_CNTL                     0x028A4C
-#define   S_028A4C_MSAA_ENABLE(x)                      (((x) & 0x1) << 0)
-#define   G_028A4C_MSAA_ENABLE(x)                      (((x) >> 0) & 0x1)
-#define   C_028A4C_MSAA_ENABLE                         0xFFFFFFFE
-#define   S_028A4C_CLIPRECT_ENABLE(x)                  (((x) & 0x1) << 1)
-#define   G_028A4C_CLIPRECT_ENABLE(x)                  (((x) >> 1) & 0x1)
-#define   C_028A4C_CLIPRECT_ENABLE                     0xFFFFFFFD
-#define   S_028A4C_LINE_STIPPLE_ENABLE(x)              (((x) & 0x1) << 2)
-#define   G_028A4C_LINE_STIPPLE_ENABLE(x)              (((x) >> 2) & 0x1)
-#define   C_028A4C_LINE_STIPPLE_ENABLE                 0xFFFFFFFB
-#define   S_028A4C_MULTI_CHIP_PRIM_DISCARD_ENAB(x)     (((x) & 0x1) << 3)
-#define   G_028A4C_MULTI_CHIP_PRIM_DISCARD_ENAB(x)     (((x) >> 3) & 0x1)
-#define   C_028A4C_MULTI_CHIP_PRIM_DISCARD_ENAB        0xFFFFFFF7
-#define   S_028A4C_WALK_ORDER_ENABLE(x)                (((x) & 0x1) << 4)
-#define   G_028A4C_WALK_ORDER_ENABLE(x)                (((x) >> 4) & 0x1)
-#define   C_028A4C_WALK_ORDER_ENABLE                   0xFFFFFFEF
-#define   S_028A4C_HALVE_DETAIL_SAMPLE_PERF(x)         (((x) & 0x1) << 5)
-#define   G_028A4C_HALVE_DETAIL_SAMPLE_PERF(x)         (((x) >> 5) & 0x1)
-#define   C_028A4C_HALVE_DETAIL_SAMPLE_PERF            0xFFFFFFDF
-#define   S_028A4C_WALK_SIZE(x)                        (((x) & 0x1) << 6)
-#define   G_028A4C_WALK_SIZE(x)                        (((x) >> 6) & 0x1)
-#define   C_028A4C_WALK_SIZE                           0xFFFFFFBF
-#define   S_028A4C_WALK_ALIGNMENT(x)                   (((x) & 0x1) << 7)
-#define   G_028A4C_WALK_ALIGNMENT(x)                   (((x) >> 7) & 0x1)
-#define   C_028A4C_WALK_ALIGNMENT                      0xFFFFFF7F
-#define   S_028A4C_WALK_ALIGN8_PRIM_FITS_ST(x)         (((x) & 0x1) << 8)
-#define   G_028A4C_WALK_ALIGN8_PRIM_FITS_ST(x)         (((x) >> 8) & 0x1)
-#define   C_028A4C_WALK_ALIGN8_PRIM_FITS_ST            0xFFFFFEFF
-#define   S_028A4C_TILE_COVER_NO_SCISSOR(x)            (((x) & 0x1) << 9)
-#define   G_028A4C_TILE_COVER_NO_SCISSOR(x)            (((x) >> 9) & 0x1)
-#define   C_028A4C_TILE_COVER_NO_SCISSOR               0xFFFFFDFF
-#define   S_028A4C_KILL_PIX_POST_HI_Z(x)               (((x) & 0x1) << 10)
-#define   G_028A4C_KILL_PIX_POST_HI_Z(x)               (((x) >> 10) & 0x1)
-#define   C_028A4C_KILL_PIX_POST_HI_Z                  0xFFFFFBFF
-#define   S_028A4C_KILL_PIX_POST_DETAIL_MASK(x)        (((x) & 0x1) << 11)
-#define   G_028A4C_KILL_PIX_POST_DETAIL_MASK(x)        (((x) >> 11) & 0x1)
-#define   C_028A4C_KILL_PIX_POST_DETAIL_MASK           0xFFFFF7FF
-#define   S_028A4C_MULTI_CHIP_SUPERTILE_ENABLE(x)      (((x) & 0x1) << 12)
-#define   G_028A4C_MULTI_CHIP_SUPERTILE_ENABLE(x)      (((x) >> 12) & 0x1)
-#define   C_028A4C_MULTI_CHIP_SUPERTILE_ENABLE         0xFFFFEFFF
-#define   S_028A4C_TILE_COVER_DISABLE(x)               (((x) & 0x1) << 13)
-#define   G_028A4C_TILE_COVER_DISABLE(x)               (((x) >> 13) & 0x1)
-#define   C_028A4C_TILE_COVER_DISABLE                  0xFFFFDFFF
-#define   S_028A4C_FORCE_EOV_CNTDWN_ENABLE(x)          (((x) & 0x1) << 14)
-#define   G_028A4C_FORCE_EOV_CNTDWN_ENABLE(x)          (((x) >> 14) & 0x1)
-#define   C_028A4C_FORCE_EOV_CNTDWN_ENABLE             0xFFFFBFFF
-#define   S_028A4C_FORCE_EOV_TILE_ENABLE(x)            (((x) & 0x1) << 15)
-#define   G_028A4C_FORCE_EOV_TILE_ENABLE(x)            (((x) >> 15) & 0x1)
-#define   C_028A4C_FORCE_EOV_TILE_ENABLE               0xFFFF7FFF
-#define   S_028A4C_FORCE_EOV_REZ_ENABLE(x)             (((x) & 0x1) << 16)
-#define   G_028A4C_FORCE_EOV_REZ_ENABLE(x)             (((x) >> 16) & 0x1)
-#define   C_028A4C_FORCE_EOV_REZ_ENABLE                0xFFFEFFFF
-#define   S_028A4C_PS_ITER_SAMPLE(x)                   (((x) & 0x1) << 17)
-#define   G_028A4C_PS_ITER_SAMPLE(x)                   (((x) >> 17) & 0x1)
-#define   C_028A4C_PS_ITER_SAMPLE                      0xFFFDFFFF
-#define R_028A84_VGT_PRIMITIVEID_EN                  0x028A84
-#define   S_028A84_PRIMITIVEID_EN(x)                   (((x) & 0x1) << 0)
-#define   G_028A84_PRIMITIVEID_EN(x)                   (((x) >> 0) & 0x1)
-#define   C_028A84_PRIMITIVEID_EN                      0xFFFFFFFE
-#define R_028A94_VGT_MULTI_PRIM_IB_RESET_EN          0x028A94
-#define   S_028A94_RESET_EN(x)                         (((x) & 0x1) << 0)
-#define   G_028A94_RESET_EN(x)                         (((x) >> 0) & 0x1)
-#define   C_028A94_RESET_EN                            0xFFFFFFFE
-#define R_028AA0_VGT_INSTANCE_STEP_RATE_0            0x028AA0
-#define   S_028AA0_STEP_RATE(x)                        (((x) & 0xFFFFFFFF) << 0)
-#define   G_028AA0_STEP_RATE(x)                        (((x) >> 0) & 0xFFFFFFFF)
-#define   C_028AA0_STEP_RATE                           0x00000000
-#define R_028AA4_VGT_INSTANCE_STEP_RATE_1            0x028AA4
-#define   S_028AA4_STEP_RATE(x)                        (((x) & 0xFFFFFFFF) << 0)
-#define   G_028AA4_STEP_RATE(x)                        (((x) >> 0) & 0xFFFFFFFF)
-#define   C_028AA4_STEP_RATE                           0x00000000
-#define R_028AB0_VGT_STRMOUT_EN                      0x028AB0
-#define   S_028AB0_STREAMOUT(x)                        (((x) & 0x1) << 0)
-#define   G_028AB0_STREAMOUT(x)                        (((x) >> 0) & 0x1)
-#define   C_028AB0_STREAMOUT                           0xFFFFFFFE
-#define R_028AB4_VGT_REUSE_OFF                       0x028AB4
-#define   S_028AB4_REUSE_OFF(x)                        (((x) & 0x1) << 0)
-#define   G_028AB4_REUSE_OFF(x)                        (((x) >> 0) & 0x1)
-#define   C_028AB4_REUSE_OFF                           0xFFFFFFFE
-#define R_028AB8_VGT_VTX_CNT_EN                      0x028AB8
-#define   S_028AB8_VTX_CNT_EN(x)                       (((x) & 0x1) << 0)
-#define   G_028AB8_VTX_CNT_EN(x)                       (((x) >> 0) & 0x1)
-#define   C_028AB8_VTX_CNT_EN                          0xFFFFFFFE
-#define R_028B20_VGT_STRMOUT_BUFFER_EN               0x028B20
-#define   S_028B20_BUFFER_0_EN(x)                      (((x) & 0x1) << 0)
-#define   G_028B20_BUFFER_0_EN(x)                      (((x) >> 0) & 0x1)
-#define   C_028B20_BUFFER_0_EN                         0xFFFFFFFE
-#define   S_028B20_BUFFER_1_EN(x)                      (((x) & 0x1) << 1)
-#define   G_028B20_BUFFER_1_EN(x)                      (((x) >> 1) & 0x1)
-#define   C_028B20_BUFFER_1_EN                         0xFFFFFFFD
-#define   S_028B20_BUFFER_2_EN(x)                      (((x) & 0x1) << 2)
-#define   G_028B20_BUFFER_2_EN(x)                      (((x) >> 2) & 0x1)
-#define   C_028B20_BUFFER_2_EN                         0xFFFFFFFB
-#define   S_028B20_BUFFER_3_EN(x)                      (((x) & 0x1) << 3)
-#define   G_028B20_BUFFER_3_EN(x)                      (((x) >> 3) & 0x1)
-#define   C_028B20_BUFFER_3_EN                         0xFFFFFFF7
-#define R_028C20_PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX    0x028C20
-#define   S_028C20_S4_X(x)                             (((x) & 0xF) << 0)
-#define   G_028C20_S4_X(x)                             (((x) >> 0) & 0xF)
-#define   C_028C20_S4_X                                0xFFFFFFF0
-#define   S_028C20_S4_Y(x)                             (((x) & 0xF) << 4)
-#define   G_028C20_S4_Y(x)                             (((x) >> 4) & 0xF)
-#define   C_028C20_S4_Y                                0xFFFFFF0F
-#define   S_028C20_S5_X(x)                             (((x) & 0xF) << 8)
-#define   G_028C20_S5_X(x)                             (((x) >> 8) & 0xF)
-#define   C_028C20_S5_X                                0xFFFFF0FF
-#define   S_028C20_S5_Y(x)                             (((x) & 0xF) << 12)
-#define   G_028C20_S5_Y(x)                             (((x) >> 12) & 0xF)
-#define   C_028C20_S5_Y                                0xFFFF0FFF
-#define   S_028C20_S6_X(x)                             (((x) & 0xF) << 16)
-#define   G_028C20_S6_X(x)                             (((x) >> 16) & 0xF)
-#define   C_028C20_S6_X                                0xFFF0FFFF
-#define   S_028C20_S6_Y(x)                             (((x) & 0xF) << 20)
-#define   G_028C20_S6_Y(x)                             (((x) >> 20) & 0xF)
-#define   C_028C20_S6_Y                                0xFF0FFFFF
-#define   S_028C20_S7_X(x)                             (((x) & 0xF) << 24)
-#define   G_028C20_S7_X(x)                             (((x) >> 24) & 0xF)
-#define   C_028C20_S7_X                                0xF0FFFFFF
-#define   S_028C20_S7_Y(x)                             (((x) & 0xF) << 28)
-#define   G_028C20_S7_Y(x)                             (((x) >> 28) & 0xF)
-#define   C_028C20_S7_Y                                0x0FFFFFFF
-#define R_028C30_CB_CLRCMP_CONTROL                   0x028C30
-#define   S_028C30_CLRCMP_FCN_SRC(x)                   (((x) & 0x7) << 0)
-#define   G_028C30_CLRCMP_FCN_SRC(x)                   (((x) >> 0) & 0x7)
-#define   C_028C30_CLRCMP_FCN_SRC                      0xFFFFFFF8
-#define   S_028C30_CLRCMP_FCN_DST(x)                   (((x) & 0x7) << 8)
-#define   G_028C30_CLRCMP_FCN_DST(x)                   (((x) >> 8) & 0x7)
-#define   C_028C30_CLRCMP_FCN_DST                      0xFFFFF8FF
-#define   S_028C30_CLRCMP_FCN_SEL(x)                   (((x) & 0x3) << 24)
-#define   G_028C30_CLRCMP_FCN_SEL(x)                   (((x) >> 24) & 0x3)
-#define   C_028C30_CLRCMP_FCN_SEL                      0xFCFFFFFF
-#define R_028C34_CB_CLRCMP_SRC                       0x028C34
-#define   S_028C34_CLRCMP_SRC(x)                       (((x) & 0xFFFFFFFF) << 0)
-#define   G_028C34_CLRCMP_SRC(x)                       (((x) >> 0) & 0xFFFFFFFF)
-#define   C_028C34_CLRCMP_SRC                          0x00000000
-#define R_028C38_CB_CLRCMP_DST                       0x028C38
-#define   S_028C38_CLRCMP_DST(x)                       (((x) & 0xFFFFFFFF) << 0)
-#define   G_028C38_CLRCMP_DST(x)                       (((x) >> 0) & 0xFFFFFFFF)
-#define   C_028C38_CLRCMP_DST                          0x00000000
-#define R_028C3C_CB_CLRCMP_MSK                       0x028C3C
-#define   S_028C3C_CLRCMP_MSK(x)                       (((x) & 0xFFFFFFFF) << 0)
-#define   G_028C3C_CLRCMP_MSK(x)                       (((x) >> 0) & 0xFFFFFFFF)
-#define   C_028C3C_CLRCMP_MSK                          0x00000000
-#define R_0085F0_CP_COHER_CNTL                       0x0085F0
-#define   S_0085F0_DEST_BASE_0_ENA(x)                  (((x) & 0x1) << 0)
-#define   G_0085F0_DEST_BASE_0_ENA(x)                  (((x) >> 0) & 0x1)
-#define   C_0085F0_DEST_BASE_0_ENA                     0xFFFFFFFE
-#define   S_0085F0_DEST_BASE_1_ENA(x)                  (((x) & 0x1) << 1)
-#define   G_0085F0_DEST_BASE_1_ENA(x)                  (((x) >> 1) & 0x1)
-#define   C_0085F0_DEST_BASE_1_ENA                     0xFFFFFFFD
-#define   S_0085F0_SO0_DEST_BASE_ENA(x)                (((x) & 0x1) << 2)
-#define   G_0085F0_SO0_DEST_BASE_ENA(x)                (((x) >> 2) & 0x1)
-#define   C_0085F0_SO0_DEST_BASE_ENA                   0xFFFFFFFB
-#define   S_0085F0_SO1_DEST_BASE_ENA(x)                (((x) & 0x1) << 3)
-#define   G_0085F0_SO1_DEST_BASE_ENA(x)                (((x) >> 3) & 0x1)
-#define   C_0085F0_SO1_DEST_BASE_ENA                   0xFFFFFFF7
-#define   S_0085F0_SO2_DEST_BASE_ENA(x)                (((x) & 0x1) << 4)
-#define   G_0085F0_SO2_DEST_BASE_ENA(x)                (((x) >> 4) & 0x1)
-#define   C_0085F0_SO2_DEST_BASE_ENA                   0xFFFFFFEF
-#define   S_0085F0_SO3_DEST_BASE_ENA(x)                (((x) & 0x1) << 5)
-#define   G_0085F0_SO3_DEST_BASE_ENA(x)                (((x) >> 5) & 0x1)
-#define   C_0085F0_SO3_DEST_BASE_ENA                   0xFFFFFFDF
-#define   S_0085F0_CB0_DEST_BASE_ENA(x)                (((x) & 0x1) << 6)
-#define   G_0085F0_CB0_DEST_BASE_ENA(x)                (((x) >> 6) & 0x1)
-#define   C_0085F0_CB0_DEST_BASE_ENA                   0xFFFFFFBF
-#define   S_0085F0_CB1_DEST_BASE_ENA(x)                (((x) & 0x1) << 7)
-#define   G_0085F0_CB1_DEST_BASE_ENA(x)                (((x) >> 7) & 0x1)
-#define   C_0085F0_CB1_DEST_BASE_ENA                   0xFFFFFF7F
-#define   S_0085F0_CB2_DEST_BASE_ENA(x)                (((x) & 0x1) << 8)
-#define   G_0085F0_CB2_DEST_BASE_ENA(x)                (((x) >> 8) & 0x1)
-#define   C_0085F0_CB2_DEST_BASE_ENA                   0xFFFFFEFF
-#define   S_0085F0_CB3_DEST_BASE_ENA(x)                (((x) & 0x1) << 9)
-#define   G_0085F0_CB3_DEST_BASE_ENA(x)                (((x) >> 9) & 0x1)
-#define   C_0085F0_CB3_DEST_BASE_ENA                   0xFFFFFDFF
-#define   S_0085F0_CB4_DEST_BASE_ENA(x)                (((x) & 0x1) << 10)
-#define   G_0085F0_CB4_DEST_BASE_ENA(x)                (((x) >> 10) & 0x1)
-#define   C_0085F0_CB4_DEST_BASE_ENA                   0xFFFFFBFF
-#define   S_0085F0_CB5_DEST_BASE_ENA(x)                (((x) & 0x1) << 11)
-#define   G_0085F0_CB5_DEST_BASE_ENA(x)                (((x) >> 11) & 0x1)
-#define   C_0085F0_CB5_DEST_BASE_ENA                   0xFFFFF7FF
-#define   S_0085F0_CB6_DEST_BASE_ENA(x)                (((x) & 0x1) << 12)
-#define   G_0085F0_CB6_DEST_BASE_ENA(x)                (((x) >> 12) & 0x1)
-#define   C_0085F0_CB6_DEST_BASE_ENA                   0xFFFFEFFF
-#define   S_0085F0_CB7_DEST_BASE_ENA(x)                (((x) & 0x1) << 13)
-#define   G_0085F0_CB7_DEST_BASE_ENA(x)                (((x) >> 13) & 0x1)
-#define   C_0085F0_CB7_DEST_BASE_ENA                   0xFFFFDFFF
-#define   S_0085F0_DB_DEST_BASE_ENA(x)                 (((x) & 0x1) << 14)
-#define   G_0085F0_DB_DEST_BASE_ENA(x)                 (((x) >> 14) & 0x1)
-#define   C_0085F0_DB_DEST_BASE_ENA                    0xFFFFBFFF
-#define   S_0085F0_CR_DEST_BASE_ENA(x)                 (((x) & 0x1) << 15)
-#define   G_0085F0_CR_DEST_BASE_ENA(x)                 (((x) >> 15) & 0x1)
-#define   C_0085F0_CR_DEST_BASE_ENA                    0xFFFF7FFF
-#define   S_0085F0_TC_ACTION_ENA(x)                    (((x) & 0x1) << 23)
-#define   G_0085F0_TC_ACTION_ENA(x)                    (((x) >> 23) & 0x1)
-#define   C_0085F0_TC_ACTION_ENA                       0xFF7FFFFF
-#define   S_0085F0_VC_ACTION_ENA(x)                    (((x) & 0x1) << 24)
-#define   G_0085F0_VC_ACTION_ENA(x)                    (((x) >> 24) & 0x1)
-#define   C_0085F0_VC_ACTION_ENA                       0xFEFFFFFF
-#define   S_0085F0_CB_ACTION_ENA(x)                    (((x) & 0x1) << 25)
-#define   G_0085F0_CB_ACTION_ENA(x)                    (((x) >> 25) & 0x1)
-#define   C_0085F0_CB_ACTION_ENA                       0xFDFFFFFF
-#define   S_0085F0_DB_ACTION_ENA(x)                    (((x) & 0x1) << 26)
-#define   G_0085F0_DB_ACTION_ENA(x)                    (((x) >> 26) & 0x1)
-#define   C_0085F0_DB_ACTION_ENA                       0xFBFFFFFF
-#define   S_0085F0_SH_ACTION_ENA(x)                    (((x) & 0x1) << 27)
-#define   G_0085F0_SH_ACTION_ENA(x)                    (((x) >> 27) & 0x1)
-#define   C_0085F0_SH_ACTION_ENA                       0xF7FFFFFF
-#define   S_0085F0_SMX_ACTION_ENA(x)                   (((x) & 0x1) << 28)
-#define   G_0085F0_SMX_ACTION_ENA(x)                   (((x) >> 28) & 0x1)
-#define   C_0085F0_SMX_ACTION_ENA                      0xEFFFFFFF
-#define   S_0085F0_CR0_ACTION_ENA(x)                   (((x) & 0x1) << 29)
-#define   G_0085F0_CR0_ACTION_ENA(x)                   (((x) >> 29) & 0x1)
-#define   C_0085F0_CR0_ACTION_ENA                      0xDFFFFFFF
-#define   S_0085F0_CR1_ACTION_ENA(x)                   (((x) & 0x1) << 30)
-#define   G_0085F0_CR1_ACTION_ENA(x)                   (((x) >> 30) & 0x1)
-#define   C_0085F0_CR1_ACTION_ENA                      0xBFFFFFFF
-#define   S_0085F0_CR2_ACTION_ENA(x)                   (((x) & 0x1) << 31)
-#define   G_0085F0_CR2_ACTION_ENA(x)                   (((x) >> 31) & 0x1)
-#define   C_0085F0_CR2_ACTION_ENA                      0x7FFFFFFF
-
-
-#define R_02812C_CB_CLEAR_ALPHA                      0x02812C
-#define   S_02812C_CLEAR_ALPHA(x)                      (((x) & 0xFFFFFFFF) << 0)
-#define   G_02812C_CLEAR_ALPHA(x)                      (((x) >> 0) & 0xFFFFFFFF)
-#define   C_02812C_CLEAR_ALPHA                         0x00000000
-#define R_028128_CB_CLEAR_BLUE                       0x028128
-#define   S_028128_CLEAR_BLUE(x)                       (((x) & 0xFFFFFFFF) << 0)
-#define   G_028128_CLEAR_BLUE(x)                       (((x) >> 0) & 0xFFFFFFFF)
-#define   C_028128_CLEAR_BLUE                          0x00000000
-#define R_028124_CB_CLEAR_GREEN                      0x028124
-#define   S_028124_CLEAR_GREEN(x)                      (((x) & 0xFFFFFFFF) << 0)
-#define   G_028124_CLEAR_GREEN(x)                      (((x) >> 0) & 0xFFFFFFFF)
-#define   C_028124_CLEAR_GREEN                         0x00000000
-#define R_028120_CB_CLEAR_RED                        0x028120
-#define   S_028120_CLEAR_RED(x)                        (((x) & 0xFFFFFFFF) << 0)
-#define   G_028120_CLEAR_RED(x)                        (((x) >> 0) & 0xFFFFFFFF)
-#define   C_028120_CLEAR_RED                           0x00000000
-#define R_02842C_CB_FOG_BLUE                         0x02842C
-#define   S_02842C_FOG_BLUE(x)                         (((x) & 0xFFFFFFFF) << 0)
-#define   G_02842C_FOG_BLUE(x)                         (((x) >> 0) & 0xFFFFFFFF)
-#define   C_02842C_FOG_BLUE                            0x00000000
-#define R_028428_CB_FOG_GREEN                        0x028428
-#define   S_028428_FOG_GREEN(x)                        (((x) & 0xFFFFFFFF) << 0)
-#define   G_028428_FOG_GREEN(x)                        (((x) >> 0) & 0xFFFFFFFF)
-#define   C_028428_FOG_GREEN                           0x00000000
-#define R_028424_CB_FOG_RED                          0x028424
-#define   S_028424_FOG_RED(x)                          (((x) & 0xFFFFFFFF) << 0)
-#define   G_028424_FOG_RED(x)                          (((x) >> 0) & 0xFFFFFFFF)
-#define   C_028424_FOG_RED                             0x00000000
-#define R_03C000_SQ_TEX_SAMPLER_WORD0_0              0x03C000
-#define   S_03C000_CLAMP_X(x)                          (((x) & 0x7) << 0)
-#define   G_03C000_CLAMP_X(x)                          (((x) >> 0) & 0x7)
-#define   C_03C000_CLAMP_X                             0xFFFFFFF8
-#define   S_03C000_CLAMP_Y(x)                          (((x) & 0x7) << 3)
-#define   G_03C000_CLAMP_Y(x)                          (((x) >> 3) & 0x7)
-#define   C_03C000_CLAMP_Y                             0xFFFFFFC7
-#define   S_03C000_CLAMP_Z(x)                          (((x) & 0x7) << 6)
-#define   G_03C000_CLAMP_Z(x)                          (((x) >> 6) & 0x7)
-#define   C_03C000_CLAMP_Z                             0xFFFFFE3F
-#define   S_03C000_XY_MAG_FILTER(x)                    (((x) & 0x7) << 9)
-#define   G_03C000_XY_MAG_FILTER(x)                    (((x) >> 9) & 0x7)
-#define   C_03C000_XY_MAG_FILTER                       0xFFFFF1FF
-#define   S_03C000_XY_MIN_FILTER(x)                    (((x) & 0x7) << 12)
-#define   G_03C000_XY_MIN_FILTER(x)                    (((x) >> 12) & 0x7)
-#define   C_03C000_XY_MIN_FILTER                       0xFFFF8FFF
-#define   S_03C000_Z_FILTER(x)                         (((x) & 0x3) << 15)
-#define   G_03C000_Z_FILTER(x)                         (((x) >> 15) & 0x3)
-#define   C_03C000_Z_FILTER                            0xFFFE7FFF
-#define   S_03C000_MIP_FILTER(x)                       (((x) & 0x3) << 17)
-#define   G_03C000_MIP_FILTER(x)                       (((x) >> 17) & 0x3)
-#define   C_03C000_MIP_FILTER                          0xFFF9FFFF
-#define   S_03C000_BORDER_COLOR_TYPE(x)                (((x) & 0x3) << 22)
-#define   G_03C000_BORDER_COLOR_TYPE(x)                (((x) >> 22) & 0x3)
-#define   C_03C000_BORDER_COLOR_TYPE                   0xFF3FFFFF
-#define   S_03C000_POINT_SAMPLING_CLAMP(x)             (((x) & 0x1) << 24)
-#define   G_03C000_POINT_SAMPLING_CLAMP(x)             (((x) >> 24) & 0x1)
-#define   C_03C000_POINT_SAMPLING_CLAMP                0xFEFFFFFF
-#define   S_03C000_TEX_ARRAY_OVERRIDE(x)               (((x) & 0x1) << 25)
-#define   G_03C000_TEX_ARRAY_OVERRIDE(x)               (((x) >> 25) & 0x1)
-#define   C_03C000_TEX_ARRAY_OVERRIDE                  0xFDFFFFFF
-#define   S_03C000_DEPTH_COMPARE_FUNCTION(x)           (((x) & 0x7) << 26)
-#define   G_03C000_DEPTH_COMPARE_FUNCTION(x)           (((x) >> 26) & 0x7)
-#define   C_03C000_DEPTH_COMPARE_FUNCTION              0xE3FFFFFF
-#define   S_03C000_CHROMA_KEY(x)                       (((x) & 0x3) << 29)
-#define   G_03C000_CHROMA_KEY(x)                       (((x) >> 29) & 0x3)
-#define   C_03C000_CHROMA_KEY                          0x9FFFFFFF
-#define   S_03C000_LOD_USES_MINOR_AXIS(x)              (((x) & 0x1) << 31)
-#define   G_03C000_LOD_USES_MINOR_AXIS(x)              (((x) >> 31) & 0x1)
-#define   C_03C000_LOD_USES_MINOR_AXIS                 0x7FFFFFFF
-#define R_03C004_SQ_TEX_SAMPLER_WORD1_0              0x03C004
-#define   S_03C004_MIN_LOD(x)                          (((x) & 0x3FF) << 0)
-#define   G_03C004_MIN_LOD(x)                          (((x) >> 0) & 0x3FF)
-#define   C_03C004_MIN_LOD                             0xFFFFFC00
-#define   S_03C004_MAX_LOD(x)                          (((x) & 0x3FF) << 10)
-#define   G_03C004_MAX_LOD(x)                          (((x) >> 10) & 0x3FF)
-#define   C_03C004_MAX_LOD                             0xFFF003FF
-#define   S_03C004_LOD_BIAS(x)                         (((x) & 0xFFF) << 20)
-#define   G_03C004_LOD_BIAS(x)                         (((x) >> 20) & 0xFFF)
-#define   C_03C004_LOD_BIAS                            0x000FFFFF
-#define R_03C008_SQ_TEX_SAMPLER_WORD2_0              0x03C008
-#define   S_03C008_LOD_BIAS_SEC(x)                     (((x) & 0xFFF) << 0)
-#define   G_03C008_LOD_BIAS_SEC(x)                     (((x) >> 0) & 0xFFF)
-#define   C_03C008_LOD_BIAS_SEC                        0xFFFFF000
-#define   S_03C008_MC_COORD_TRUNCATE(x)                (((x) & 0x1) << 12)
-#define   G_03C008_MC_COORD_TRUNCATE(x)                (((x) >> 12) & 0x1)
-#define   C_03C008_MC_COORD_TRUNCATE                   0xFFFFEFFF
-#define   S_03C008_FORCE_DEGAMMA(x)                    (((x) & 0x1) << 13)
-#define   G_03C008_FORCE_DEGAMMA(x)                    (((x) >> 13) & 0x1)
-#define   C_03C008_FORCE_DEGAMMA                       0xFFFFDFFF
-#define   S_03C008_HIGH_PRECISION_FILTER(x)            (((x) & 0x1) << 14)
-#define   G_03C008_HIGH_PRECISION_FILTER(x)            (((x) >> 14) & 0x1)
-#define   C_03C008_HIGH_PRECISION_FILTER               0xFFFFBFFF
-#define   S_03C008_PERF_MIP(x)                         (((x) & 0x7) << 15)
-#define   G_03C008_PERF_MIP(x)                         (((x) >> 15) & 0x7)
-#define   C_03C008_PERF_MIP                            0xFFFC7FFF
-#define   S_03C008_PERF_Z(x)                           (((x) & 0x3) << 18)
-#define   G_03C008_PERF_Z(x)                           (((x) >> 18) & 0x3)
-#define   C_03C008_PERF_Z                              0xFFF3FFFF
-#define   S_03C008_FETCH_4(x)                          (((x) & 0x1) << 26)
-#define   G_03C008_FETCH_4(x)                          (((x) >> 26) & 0x1)
-#define   C_03C008_FETCH_4                             0xFBFFFFFF
-#define   S_03C008_SAMPLE_IS_PCF(x)                    (((x) & 0x1) << 27)
-#define   G_03C008_SAMPLE_IS_PCF(x)                    (((x) >> 27) & 0x1)
-#define   C_03C008_SAMPLE_IS_PCF                       0xF7FFFFFF
-#define   S_03C008_TYPE(x)                             (((x) & 0x1) << 31)
-#define   G_03C008_TYPE(x)                             (((x) >> 31) & 0x1)
-#define   C_03C008_TYPE                                0x7FFFFFFF
-#define R_00A40C_TD_PS_SAMPLER0_BORDER_ALPHA         0x00A40C
-#define   S_00A40C_BORDER_ALPHA(x)                     (((x) & 0xFFFFFFFF) << 0)
-#define   G_00A40C_BORDER_ALPHA(x)                     (((x) >> 0) & 0xFFFFFFFF)
-#define   C_00A40C_BORDER_ALPHA                        0x00000000
-#define R_00A408_TD_PS_SAMPLER0_BORDER_BLUE          0x00A408
-#define   S_00A408_BORDER_BLUE(x)                      (((x) & 0xFFFFFFFF) << 0)
-#define   G_00A408_BORDER_BLUE(x)                      (((x) >> 0) & 0xFFFFFFFF)
-#define   C_00A408_BORDER_BLUE                         0x00000000
-#define R_00A404_TD_PS_SAMPLER0_BORDER_GREEN         0x00A404
-#define   S_00A404_BORDER_GREEN(x)                     (((x) & 0xFFFFFFFF) << 0)
-#define   G_00A404_BORDER_GREEN(x)                     (((x) >> 0) & 0xFFFFFFFF)
-#define   C_00A404_BORDER_GREEN                        0x00000000
-#define R_00A400_TD_PS_SAMPLER0_BORDER_RED           0x00A400
-#define   S_00A400_BORDER_RED(x)                       (((x) & 0xFFFFFFFF) << 0)
-#define   G_00A400_BORDER_RED(x)                       (((x) >> 0) & 0xFFFFFFFF)
-#define   C_00A400_BORDER_RED                          0x00000000
-#define R_00A60C_TD_VS_SAMPLER0_BORDER_ALPHA         0x00A60C
-#define   S_00A60C_BORDER_ALPHA(x)                     (((x) & 0xFFFFFFFF) << 0)
-#define   G_00A60C_BORDER_ALPHA(x)                     (((x) >> 0) & 0xFFFFFFFF)
-#define   C_00A60C_BORDER_ALPHA                        0x00000000
-#define R_00A608_TD_VS_SAMPLER0_BORDER_BLUE          0x00A608
-#define   S_00A608_BORDER_BLUE(x)                      (((x) & 0xFFFFFFFF) << 0)
-#define   G_00A608_BORDER_BLUE(x)                      (((x) >> 0) & 0xFFFFFFFF)
-#define   C_00A608_BORDER_BLUE                         0x00000000
-#define R_00A604_TD_VS_SAMPLER0_BORDER_GREEN         0x00A604
-#define   S_00A604_BORDER_GREEN(x)                     (((x) & 0xFFFFFFFF) << 0)
-#define   G_00A604_BORDER_GREEN(x)                     (((x) >> 0) & 0xFFFFFFFF)
-#define   C_00A604_BORDER_GREEN                        0x00000000
-#define R_00A600_TD_VS_SAMPLER0_BORDER_RED           0x00A600
-#define   S_00A600_BORDER_RED(x)                       (((x) & 0xFFFFFFFF) << 0)
-#define   G_00A600_BORDER_RED(x)                       (((x) >> 0) & 0xFFFFFFFF)
-#define   C_00A600_BORDER_RED                          0x00000000
-#define R_00A80C_TD_GS_SAMPLER0_BORDER_ALPHA         0x00A80C
-#define   S_00A80C_BORDER_ALPHA(x)                     (((x) & 0xFFFFFFFF) << 0)
-#define   G_00A80C_BORDER_ALPHA(x)                     (((x) >> 0) & 0xFFFFFFFF)
-#define   C_00A80C_BORDER_ALPHA                        0x00000000
-#define R_00A808_TD_GS_SAMPLER0_BORDER_BLUE          0x00A808
-#define   S_00A808_BORDER_BLUE(x)                      (((x) & 0xFFFFFFFF) << 0)
-#define   G_00A808_BORDER_BLUE(x)                      (((x) >> 0) & 0xFFFFFFFF)
-#define   C_00A808_BORDER_BLUE                         0x00000000
-#define R_00A804_TD_GS_SAMPLER0_BORDER_GREEN         0x00A804
-#define   S_00A804_BORDER_GREEN(x)                     (((x) & 0xFFFFFFFF) << 0)
-#define   G_00A804_BORDER_GREEN(x)                     (((x) >> 0) & 0xFFFFFFFF)
-#define   C_00A804_BORDER_GREEN                        0x00000000
-#define R_00A800_TD_GS_SAMPLER0_BORDER_RED           0x00A800
-#define   S_00A800_BORDER_RED(x)                       (((x) & 0xFFFFFFFF) << 0)
-#define   G_00A800_BORDER_RED(x)                       (((x) >> 0) & 0xFFFFFFFF)
-#define   C_00A800_BORDER_RED                          0x00000000
-#define R_030000_SQ_ALU_CONSTANT0_0                  0x030000
-#define   S_030000_X(x)                                (((x) & 0xFFFFFFFF) << 0)
-#define   G_030000_X(x)                                (((x) >> 0) & 0xFFFFFFFF)
-#define   C_030000_X                                   0x00000000
-#define R_030004_SQ_ALU_CONSTANT1_0                  0x030004
-#define   S_030004_Y(x)                                (((x) & 0xFFFFFFFF) << 0)
-#define   G_030004_Y(x)                                (((x) >> 0) & 0xFFFFFFFF)
-#define   C_030004_Y                                   0x00000000
-#define R_030008_SQ_ALU_CONSTANT2_0                  0x030008
-#define   S_030008_Z(x)                                (((x) & 0xFFFFFFFF) << 0)
-#define   G_030008_Z(x)                                (((x) >> 0) & 0xFFFFFFFF)
-#define   C_030008_Z                                   0x00000000
-#define R_03000C_SQ_ALU_CONSTANT3_0                  0x03000C
-#define   S_03000C_W(x)                                (((x) & 0xFFFFFFFF) << 0)
-#define   G_03000C_W(x)                                (((x) >> 0) & 0xFFFFFFFF)
-#define   C_03000C_W                                   0x00000000
-#define R_0287E4_VGT_DMA_BASE_HI                     0x0287E4
-#define R_0287E8_VGT_DMA_BASE                        0x0287E8
-#define R_028E20_PA_CL_UCP0_X                        0x028E20
-#define R_028E24_PA_CL_UCP0_Y                        0x028E24
-#define R_028E28_PA_CL_UCP0_Z                        0x028E28
-#define R_028E2C_PA_CL_UCP0_W                        0x028E2C
-#define R_028E30_PA_CL_UCP1_X                        0x028E30
-#define R_028E34_PA_CL_UCP1_Y                        0x028E34
-#define R_028E38_PA_CL_UCP1_Z                        0x028E38
-#define R_028E3C_PA_CL_UCP1_W                        0x028E3C
-#define R_028E40_PA_CL_UCP2_X                        0x028E40
-#define R_028E44_PA_CL_UCP2_Y                        0x028E44
-#define R_028E48_PA_CL_UCP2_Z                        0x028E48
-#define R_028E4C_PA_CL_UCP2_W                        0x028E4C
-#define R_028E50_PA_CL_UCP3_X                        0x028E50
-#define R_028E54_PA_CL_UCP3_Y                        0x028E54
-#define R_028E58_PA_CL_UCP3_Z                        0x028E58
-#define R_028E5C_PA_CL_UCP3_W                        0x028E5C
-#define R_028E60_PA_CL_UCP4_X                        0x028E60
-#define R_028E64_PA_CL_UCP4_Y                        0x028E64
-#define R_028E68_PA_CL_UCP4_Z                        0x028E68
-#define R_028E6C_PA_CL_UCP4_W                        0x028E6C
-#define R_028E70_PA_CL_UCP5_X                        0x028E70
-#define R_028E74_PA_CL_UCP5_Y                        0x028E74
-#define R_028E78_PA_CL_UCP5_Z                        0x028E78
-#define R_028E7C_PA_CL_UCP5_W                        0x028E7C
-#define R_038000_RESOURCE0_WORD0                     0x038000
-#define R_038004_RESOURCE0_WORD1                     0x038004
-#define R_038008_RESOURCE0_WORD2                     0x038008
-#define R_03800C_RESOURCE0_WORD3                     0x03800C
-#define R_038010_RESOURCE0_WORD4                     0x038010
-#define R_038014_RESOURCE0_WORD5                     0x038014
-#define R_038018_RESOURCE0_WORD6                     0x038018
-
-#define R_028140_ALU_CONST_BUFFER_SIZE_PS_0          0x00028140
-#define R_028180_ALU_CONST_BUFFER_SIZE_VS_0          0x00028180
-#define R_028940_ALU_CONST_CACHE_PS_0                0x00028940
-#define R_028980_ALU_CONST_CACHE_VS_0                0x00028980
-
-#define R_03CFF0_SQ_VTX_BASE_VTX_LOC                 0x03CFF0
-#define R_03CFF4_SQ_VTX_START_INST_LOC               0x03CFF4
-
-#endif
diff --git a/src/gallium/winsys/r600/drm/radeon_bo.c b/src/gallium/winsys/r600/drm/radeon_bo.c
deleted file mode 100644
index 45cf6f0..0000000
--- a/src/gallium/winsys/r600/drm/radeon_bo.c
+++ /dev/null
@@ -1,256 +0,0 @@
-/*
- * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- *      Jerome Glisse
- */
-#define _FILE_OFFSET_BITS 64
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/mman.h>
-#include <errno.h>
-#include "r600_priv.h"
-#include "xf86drm.h"
-#include "radeon_drm.h"
-
-int radeon_bo_fixed_map(struct radeon *radeon, struct radeon_bo *bo)
-{
-	struct drm_radeon_gem_mmap args;
-	void *ptr;
-	int r;
-
-	/* Zero out args to make valgrind happy */
-	memset(&args, 0, sizeof(args));
-	args.handle = bo->handle;
-	args.offset = 0;
-	args.size = (uint64_t)bo->size;
-	r = drmCommandWriteRead(radeon->fd, DRM_RADEON_GEM_MMAP,
-				&args, sizeof(args));
-	if (r) {
-		fprintf(stderr, "error mapping %p 0x%08X (error = %d)\n",
-			bo, bo->handle, r);
-		return r;
-	}
-	ptr = mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED, radeon->fd, args.addr_ptr);
-	if (ptr == MAP_FAILED) {
-		fprintf(stderr, "%s failed to map bo\n", __func__);
-		return -errno;
-	}
-	bo->data = ptr;
-
-	bo->map_count++;
-	return 0;
-}
-
-static void radeon_bo_fixed_unmap(struct radeon *radeon, struct radeon_bo *bo)
-{
-	if (bo->data) {
-		munmap(bo->data, bo->size);
-		bo->data = NULL;
-	}
-}
-
-struct radeon_bo *radeon_bo(struct radeon *radeon, unsigned handle,
-			    unsigned size, unsigned alignment, unsigned initial_domain)
-{
-	struct radeon_bo *bo;
-	int r;
-
-	if (handle) {
-		pipe_mutex_lock(radeon->bo_handles_mutex);
-		bo = util_hash_table_get(radeon->bo_handles,
-					 (void *)(uintptr_t)handle);
-		if (bo) {
-			struct radeon_bo *b = NULL;
-			radeon_bo_reference(radeon, &b, bo);
-			goto done;
-		}
-	}
-	bo = calloc(1, sizeof(*bo));
-	if (bo == NULL) {
-		return NULL;
-	}
-	bo->size = size;
-	bo->handle = handle;
-	pipe_reference_init(&bo->reference, 1);
-	bo->alignment = alignment;
-	LIST_INITHEAD(&bo->fencedlist);
-
-	if (handle) {
-		struct drm_gem_open open_arg;
-
-		memset(&open_arg, 0, sizeof(open_arg));
-		open_arg.name = handle;
-		r = drmIoctl(radeon->fd, DRM_IOCTL_GEM_OPEN, &open_arg);
-		if (r != 0) {
-			free(bo);
-			return NULL;
-		}
-		bo->name = handle;
-		bo->handle = open_arg.handle;
-		bo->size = open_arg.size;
-		bo->shared = TRUE;
-	} else {
-		struct drm_radeon_gem_create args = {};
-
-		args.size = size;
-		args.alignment = alignment;
-		args.initial_domain = initial_domain;
-		args.flags = 0;
-		args.handle = 0;
-		r = drmCommandWriteRead(radeon->fd, DRM_RADEON_GEM_CREATE,
-					&args, sizeof(args));
-		bo->handle = args.handle;
-		if (r) {
-			fprintf(stderr, "Failed to allocate :\n");
-			fprintf(stderr, "   size      : %d bytes\n", size);
-			fprintf(stderr, "   alignment : %d bytes\n", alignment);
-			free(bo);
-			return NULL;
-		}
-	}
-
-	if (handle)
-		util_hash_table_set(radeon->bo_handles, (void *)(uintptr_t)handle, bo);
-done:
-	if (handle)
-		pipe_mutex_unlock(radeon->bo_handles_mutex);
-
-	return bo;
-}
-
-static void radeon_bo_destroy(struct radeon *radeon, struct radeon_bo *bo)
-{
-	struct drm_gem_close args;
-
-	if (bo->name) {
-		pipe_mutex_lock(radeon->bo_handles_mutex);
-		util_hash_table_remove(radeon->bo_handles,
-				       (void *)(uintptr_t)bo->name);
-		pipe_mutex_unlock(radeon->bo_handles_mutex);
-	}
-	LIST_DEL(&bo->fencedlist);
-	radeon_bo_fixed_unmap(radeon, bo);
-	memset(&args, 0, sizeof(args));
-	args.handle = bo->handle;
-	drmIoctl(radeon->fd, DRM_IOCTL_GEM_CLOSE, &args);
-	memset(bo, 0, sizeof(struct radeon_bo));
-	free(bo);
-}
-
-void radeon_bo_reference(struct radeon *radeon,
-			 struct radeon_bo **dst,
-			 struct radeon_bo *src)
-{
-	struct radeon_bo *old = *dst;
-	if (pipe_reference(&(*dst)->reference, &src->reference)) {
-		radeon_bo_destroy(radeon, old);
-	}
-	*dst = src;
-}
-
-int radeon_bo_wait(struct radeon *radeon, struct radeon_bo *bo)
-{
-	struct drm_radeon_gem_wait_idle args;
-	int ret;
-
-        if (!bo->shared) {
-                if (!bo->fence)
-			return 0;
-		if (bo->fence <= *radeon->cfence) {
-			LIST_DELINIT(&bo->fencedlist);
-			bo->fence = 0;
-			return 0;
-		}
-        }
-
-	/* Zero out args to make valgrind happy */
-	memset(&args, 0, sizeof(args));
-	args.handle = bo->handle;
-	do {
-		ret = drmCommandWriteRead(radeon->fd, DRM_RADEON_GEM_WAIT_IDLE,
-					&args, sizeof(args));
-	} while (ret == -EBUSY);
-	return ret;
-}
-
-int radeon_bo_busy(struct radeon *radeon, struct radeon_bo *bo, uint32_t *domain)
-{
-	struct drm_radeon_gem_busy args;
-	int ret;
-
-	if (!bo->shared) {
-		if (!bo->fence)
-			return 0;
-		if (bo->fence <= *radeon->cfence) {
-			LIST_DELINIT(&bo->fencedlist);
-			bo->fence = 0;
-			return 0;
-		}
-	}
-
-	memset(&args, 0, sizeof(args));
-	args.handle = bo->handle;
-	args.domain = 0;
-
-	ret = drmCommandWriteRead(radeon->fd, DRM_RADEON_GEM_BUSY,
-			&args, sizeof(args));
-
-	*domain = args.domain;
-	return ret;
-}
-
-int radeon_bo_get_tiling_flags(struct radeon *radeon,
-			       struct radeon_bo *bo,
-			       uint32_t *tiling_flags,
-			       uint32_t *pitch)
-{
-	struct drm_radeon_gem_get_tiling args = {};
-	int ret;
-
-	args.handle = bo->handle;
-	ret = drmCommandWriteRead(radeon->fd, DRM_RADEON_GEM_GET_TILING,
-				  &args, sizeof(args));
-	if (ret)
-		return ret;
-
-	*tiling_flags = args.tiling_flags;
-	*pitch = args.pitch;
-	return ret;
-}
-
-int radeon_bo_get_name(struct radeon *radeon,
-		       struct radeon_bo *bo,
-		       uint32_t *name)
-{
-	struct drm_gem_flink flink;
-	int ret;
-
-	flink.handle = bo->handle;
-	ret = drmIoctl(radeon->fd, DRM_IOCTL_GEM_FLINK, &flink);
-	if (ret)
-		return ret;
-
-	*name = flink.name;
-	return ret;
-}
diff --git a/src/gallium/winsys/r600/drm/radeon_pciid.c b/src/gallium/winsys/r600/drm/radeon_pciid.c
index f54a7c8..8757241 100644
--- a/src/gallium/winsys/r600/drm/radeon_pciid.c
+++ b/src/gallium/winsys/r600/drm/radeon_pciid.c
@@ -23,7 +23,6 @@
  * Authors:
  *      Jerome Glisse
  */
-#include <stdlib.h>
 #include "r600_priv.h"
 
 struct pci_id {
diff --git a/src/gallium/winsys/radeon/drm/Android.mk b/src/gallium/winsys/radeon/drm/Android.mk
new file mode 100644
index 0000000..c192249
--- /dev/null
+++ b/src/gallium/winsys/radeon/drm/Android.mk
@@ -0,0 +1,40 @@
+# Mesa 3-D graphics library
+#
+# Copyright (C) 2011 Chia-I Wu <olvaffe@gmail.com>
+# Copyright (C) 2011 LunarG Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+LOCAL_PATH := $(call my-dir)
+
+# get C_SOURCES
+include $(LOCAL_PATH)/Makefile.sources
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(C_SOURCES)
+
+LOCAL_C_INCLUDES := \
+	$(DRM_TOP) \
+	$(DRM_TOP)/include/drm
+
+LOCAL_MODULE := libmesa_winsys_radeon
+
+include $(GALLIUM_COMMON_MK)
+include $(BUILD_STATIC_LIBRARY)
diff --git a/src/gallium/winsys/radeon/drm/Makefile b/src/gallium/winsys/radeon/drm/Makefile
index 913e6ad..68b9efe 100644
--- a/src/gallium/winsys/radeon/drm/Makefile
+++ b/src/gallium/winsys/radeon/drm/Makefile
@@ -4,10 +4,8 @@
 
 LIBNAME = radeonwinsys
 
-C_SOURCES = \
-	radeon_drm_bo.c \
-	radeon_drm_cs.c \
-	radeon_drm_winsys.c
+# get C_SOURCES
+include Makefile.sources
 
 LIBRARY_INCLUDES = -I$(TOP)/include \
 		   $(shell pkg-config libdrm --cflags-only-I)
diff --git a/src/gallium/winsys/radeon/drm/Makefile.sources b/src/gallium/winsys/radeon/drm/Makefile.sources
new file mode 100644
index 0000000..1d18d61
--- /dev/null
+++ b/src/gallium/winsys/radeon/drm/Makefile.sources
@@ -0,0 +1,4 @@
+C_SOURCES := \
+	radeon_drm_bo.c \
+	radeon_drm_cs.c \
+	radeon_drm_winsys.c
diff --git a/src/gallium/winsys/radeon/drm/SConscript b/src/gallium/winsys/radeon/drm/SConscript
index 2edb1e9..e5048d6 100644
--- a/src/gallium/winsys/radeon/drm/SConscript
+++ b/src/gallium/winsys/radeon/drm/SConscript
@@ -2,11 +2,7 @@
 
 env = env.Clone()
 
-radeon_sources = [
-    'radeon_drm_bo.c',
-    'radeon_drm_cs.c',
-    'radeon_drm_winsys.c',
-]
+radeon_sources = env.ParseSourceList('Makefile.sources', 'C_SOURCES')
 
 env.PkgUseModules('DRM')
 
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
index b6f1272..b45efe5 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
@@ -31,11 +31,11 @@
 #include "util/u_memory.h"
 #include "util/u_simple_list.h"
 #include "os/os_thread.h"
+#include "os/os_mman.h"
 
 #include "state_tracker/drm_driver.h"
 
 #include <sys/ioctl.h>
-#include <sys/mman.h>
 #include <xf86drm.h>
 #include <errno.h>
 
@@ -43,6 +43,21 @@
 #define RADEON_BO_FLAGS_MICRO_TILE  2
 #define RADEON_BO_FLAGS_MICRO_TILE_SQUARE 0x20
 
+#ifndef DRM_RADEON_GEM_WAIT
+#define DRM_RADEON_GEM_WAIT		0x2b
+
+#define RADEON_GEM_NO_WAIT	0x1
+#define RADEON_GEM_USAGE_READ	0x2
+#define RADEON_GEM_USAGE_WRITE	0x4
+
+struct drm_radeon_gem_wait {
+	uint32_t	handle;
+	uint32_t        flags;  /* one of RADEON_GEM_* */
+};
+
+#endif
+
+
 extern const struct pb_vtbl radeon_bo_vtbl;
 
 
@@ -87,39 +102,49 @@
     return bo;
 }
 
-static void radeon_bo_wait(struct pb_buffer *_buf)
+static void radeon_bo_wait(struct pb_buffer *_buf, enum radeon_bo_usage usage)
 {
-    struct radeon_bo *bo = get_radeon_bo(pb_buffer(_buf));
-    struct drm_radeon_gem_wait_idle args = {};
+    struct radeon_bo *bo = get_radeon_bo(_buf);
 
     while (p_atomic_read(&bo->num_active_ioctls)) {
         sched_yield();
     }
 
-    args.handle = bo->handle;
-    while (drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_WAIT_IDLE,
-                               &args, sizeof(args)) == -EBUSY);
-
-    bo->busy_for_write = FALSE;
+    if (bo->rws->info.drm_minor >= 12) {
+        struct drm_radeon_gem_wait args = {};
+        args.handle = bo->handle;
+        args.flags = usage;
+        while (drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_WAIT,
+                                   &args, sizeof(args)) == -EBUSY);
+    } else {
+        struct drm_radeon_gem_wait_idle args = {};
+        args.handle = bo->handle;
+        while (drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_WAIT_IDLE,
+                                   &args, sizeof(args)) == -EBUSY);
+    }
 }
 
-static boolean radeon_bo_is_busy(struct pb_buffer *_buf)
+static boolean radeon_bo_is_busy(struct pb_buffer *_buf,
+                                 enum radeon_bo_usage usage)
 {
-    struct radeon_bo *bo = get_radeon_bo(pb_buffer(_buf));
-    struct drm_radeon_gem_busy args = {};
-    boolean busy;
+    struct radeon_bo *bo = get_radeon_bo(_buf);
 
     if (p_atomic_read(&bo->num_active_ioctls)) {
         return TRUE;
     }
 
-    args.handle = bo->handle;
-    busy = drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_BUSY,
-                               &args, sizeof(args)) != 0;
-
-    if (!busy)
-        bo->busy_for_write = FALSE;
-    return busy;
+    if (bo->rws->info.drm_minor >= 12) {
+        struct drm_radeon_gem_wait args = {};
+        args.handle = bo->handle;
+        args.flags = usage | RADEON_GEM_NO_WAIT;
+        return drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_WAIT,
+                                   &args, sizeof(args)) != 0;
+    } else {
+        struct drm_radeon_gem_busy args = {};
+        args.handle = bo->handle;
+        return drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_BUSY,
+                                   &args, sizeof(args)) != 0;
+    }
 }
 
 static void radeon_bo_destroy(struct pb_buffer *_buf)
@@ -135,7 +160,7 @@
     }
 
     if (bo->ptr)
-        munmap(bo->ptr, bo->size);
+        os_munmap(bo->ptr, bo->size);
 
     /* Close object. */
     args.handle = bo->handle;
@@ -172,13 +197,33 @@
     if (!(flags & PB_USAGE_UNSYNCHRONIZED)) {
         /* DONTBLOCK doesn't make sense with UNSYNCHRONIZED. */
         if (flags & PB_USAGE_DONTBLOCK) {
-            if (radeon_bo_is_referenced_by_cs(cs, bo)) {
-                cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC);
-                return NULL;
-            }
+            if (!(flags & PB_USAGE_CPU_WRITE)) {
+                /* Mapping for read.
+                 *
+                 * Since we are mapping for read, we don't need to wait
+                 * if the GPU is using the buffer for read too
+                 * (neither one is changing it).
+                 *
+                 * Only check whether the buffer is being used for write. */
+                if (radeon_bo_is_referenced_by_cs_for_write(cs, bo)) {
+                    cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC);
+                    return NULL;
+                }
 
-            if (radeon_bo_is_busy((struct pb_buffer*)bo)) {
-                return NULL;
+                if (radeon_bo_is_busy((struct pb_buffer*)bo,
+                                      RADEON_USAGE_WRITE)) {
+                    return NULL;
+                }
+            } else {
+                if (radeon_bo_is_referenced_by_cs(cs, bo)) {
+                    cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC);
+                    return NULL;
+                }
+
+                if (radeon_bo_is_busy((struct pb_buffer*)bo,
+                                      RADEON_USAGE_READWRITE)) {
+                    return NULL;
+                }
             }
         } else {
             if (!(flags & PB_USAGE_CPU_WRITE)) {
@@ -191,14 +236,9 @@
                  * Only check whether the buffer is being used for write. */
                 if (radeon_bo_is_referenced_by_cs_for_write(cs, bo)) {
                     cs->flush_cs(cs->flush_data, 0);
-                    radeon_bo_wait((struct pb_buffer*)bo);
-                } else if (bo->busy_for_write) {
-                    /* Update the busy_for_write field (done by radeon_bo_is_busy)
-                     * and wait if needed. */
-                    if (radeon_bo_is_busy((struct pb_buffer*)bo)) {
-                        radeon_bo_wait((struct pb_buffer*)bo);
-                    }
                 }
+                radeon_bo_wait((struct pb_buffer*)bo,
+                               RADEON_USAGE_WRITE);
             } else {
                 /* Mapping for write. */
                 if (radeon_bo_is_referenced_by_cs(cs, bo)) {
@@ -209,7 +249,7 @@
                         radeon_drm_cs_sync_flush(cs);
                 }
 
-                radeon_bo_wait((struct pb_buffer*)bo);
+                radeon_bo_wait((struct pb_buffer*)bo, RADEON_USAGE_READWRITE);
             }
         }
     }
@@ -238,7 +278,7 @@
         return NULL;
     }
 
-    ptr = mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED,
+    ptr = os_mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED,
                bo->rws->fd, args.addr_ptr);
     if (ptr == MAP_FAILED) {
         pipe_mutex_unlock(bo->map_mutex);
@@ -345,7 +385,7 @@
        return TRUE;
    }
 
-   if (radeon_bo_is_busy((struct pb_buffer*)bo)) {
+   if (radeon_bo_is_busy((struct pb_buffer*)bo, RADEON_USAGE_READWRITE)) {
        return TRUE;
    }
 
@@ -395,16 +435,14 @@
                            struct radeon_winsys_cs *cs,
                            enum pipe_transfer_usage usage)
 {
-    struct pb_buffer *_buf = pb_buffer(buf);
-
-    return pb_map(_buf, get_pb_usage_from_transfer_flags(usage), cs);
+    return pb_map(buf, get_pb_usage_from_transfer_flags(usage), cs);
 }
 
 static void radeon_bo_get_tiling(struct pb_buffer *_buf,
                                  enum radeon_bo_layout *microtiled,
                                  enum radeon_bo_layout *macrotiled)
 {
-    struct radeon_bo *bo = get_radeon_bo(pb_buffer(_buf));
+    struct radeon_bo *bo = get_radeon_bo(_buf);
     struct drm_radeon_gem_set_tiling args = {};
 
     args.handle = bo->handle;
@@ -429,7 +467,7 @@
                                  enum radeon_bo_layout macrotiled,
                                  uint32_t pitch)
 {
-    struct radeon_bo *bo = get_radeon_bo(pb_buffer(_buf));
+    struct radeon_bo *bo = get_radeon_bo(_buf);
     struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
     struct drm_radeon_gem_set_tiling args = {};
 
@@ -464,12 +502,10 @@
         struct pb_buffer *_buf)
 {
     /* return radeon_bo. */
-    return (struct radeon_winsys_cs_handle*)
-            get_radeon_bo(pb_buffer(_buf));
+    return (struct radeon_winsys_cs_handle*)get_radeon_bo(_buf);
 }
 
-static unsigned get_pb_usage_from_create_flags(unsigned bind, unsigned usage,
-                                               enum radeon_bo_domain domain)
+static unsigned get_pb_usage_from_create_flags(enum radeon_bo_domain domain)
 {
     unsigned res = 0;
 
@@ -487,7 +523,6 @@
                         unsigned size,
                         unsigned alignment,
                         unsigned bind,
-                        unsigned usage,
                         enum radeon_bo_domain domain)
 {
     struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
@@ -497,10 +532,11 @@
 
     memset(&desc, 0, sizeof(desc));
     desc.alignment = alignment;
-    desc.usage = get_pb_usage_from_create_flags(bind, usage, domain);
+    desc.usage = get_pb_usage_from_create_flags(domain);
 
     /* Assign a buffer manager. */
-    if (bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER))
+    if (bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER |
+                PIPE_BIND_CONSTANT_BUFFER))
 	provider = ws->cman;
     else
         provider = ws->kman;
@@ -587,7 +623,7 @@
                                            struct winsys_handle *whandle)
 {
     struct drm_gem_flink flink = {};
-    struct radeon_bo *bo = get_radeon_bo(pb_buffer(buffer));
+    struct radeon_bo *bo = get_radeon_bo(buffer);
 
     if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) {
         if (!bo->flinked) {
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h
index b94881b..047ea6b 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h
@@ -60,13 +60,6 @@
      * thread, is this bo referenced in? */
     int num_active_ioctls;
 
-    /* Whether the buffer has been relocated for write and is busy since then.
-     * This field is updated in:
-     * - radeon_drm_cs_flush (to TRUE if it's relocated for write)
-     * - radeon_bo_is_busy (to FALSE if it's not busy)
-     * - radeon_bo_wait (to FALSE) */
-    boolean busy_for_write;
-
     boolean flinked;
     uint32_t flink;
 };
@@ -80,10 +73,4 @@
     pb_reference((struct pb_buffer**)dst, (struct pb_buffer*)src);
 }
 
-static INLINE struct pb_buffer *
-pb_buffer(struct pb_buffer *buffer)
-{
-    return (struct pb_buffer *)buffer;
-}
-
 #endif
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
index 0139de1..c309354 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
@@ -115,6 +115,7 @@
     }
 
     csc->crelocs = 0;
+    csc->validated_crelocs = 0;
     csc->chunks[0].length_dw = 0;
     csc->chunks[1].length_dw = 0;
     csc->used_gart = 0;
@@ -218,11 +219,11 @@
     return -1;
 }
 
-static void radeon_add_reloc(struct radeon_cs_context *csc,
-                             struct radeon_bo *bo,
-                             enum radeon_bo_domain rd,
-                             enum radeon_bo_domain wd,
-                             enum radeon_bo_domain *added_domains)
+static unsigned radeon_add_reloc(struct radeon_cs_context *csc,
+                                 struct radeon_bo *bo,
+                                 enum radeon_bo_domain rd,
+                                 enum radeon_bo_domain wd,
+                                 enum radeon_bo_domain *added_domains)
 {
     struct drm_radeon_cs_reloc *reloc;
     unsigned i;
@@ -232,7 +233,7 @@
         reloc = csc->relocs_hashlist[hash];
         if (reloc->handle == bo->handle) {
             update_domains(reloc, rd, wd, added_domains);
-            return;
+            return csc->reloc_indices_hashlist[hash];
         }
 
         /* Hash collision, look for the BO in the list of relocs linearly. */
@@ -245,7 +246,7 @@
                 csc->relocs_hashlist[hash] = reloc;
                 csc->reloc_indices_hashlist[hash] = i;
                 /*printf("write_reloc collision, hash: %i, handle: %i\n", hash, bo->handle);*/
-                return;
+                return i;
             }
         }
     }
@@ -279,37 +280,64 @@
     csc->reloc_indices_hashlist[hash] = csc->crelocs;
 
     csc->chunks[1].length_dw += RELOC_DWORDS;
-    csc->crelocs++;
 
     *added_domains = rd | wd;
+    return csc->crelocs++;
 }
 
-static void radeon_drm_cs_add_reloc(struct radeon_winsys_cs *rcs,
-                                    struct radeon_winsys_cs_handle *buf,
-                                    enum radeon_bo_domain rd,
-                                    enum radeon_bo_domain wd)
+static unsigned radeon_drm_cs_add_reloc(struct radeon_winsys_cs *rcs,
+                                        struct radeon_winsys_cs_handle *buf,
+                                        enum radeon_bo_domain rd,
+                                        enum radeon_bo_domain wd)
 {
     struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
     struct radeon_bo *bo = (struct radeon_bo*)buf;
     enum radeon_bo_domain added_domains;
 
-    radeon_add_reloc(cs->csc, bo, rd, wd, &added_domains);
-
-    if (!added_domains)
-        return;
+    unsigned index = radeon_add_reloc(cs->csc, bo, rd, wd, &added_domains);
 
     if (added_domains & RADEON_DOMAIN_GTT)
         cs->csc->used_gart += bo->size;
     if (added_domains & RADEON_DOMAIN_VRAM)
         cs->csc->used_vram += bo->size;
+
+    return index;
 }
 
 static boolean radeon_drm_cs_validate(struct radeon_winsys_cs *rcs)
 {
     struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
+    boolean status =
+        cs->csc->used_gart < cs->ws->info.gart_size * 0.8 &&
+        cs->csc->used_vram < cs->ws->info.vram_size * 0.8;
 
-    return cs->csc->used_gart < cs->ws->gart_size * 0.8 &&
-           cs->csc->used_vram < cs->ws->vram_size * 0.8;
+    if (status) {
+        cs->csc->validated_crelocs = cs->csc->crelocs;
+    } else {
+        /* Remove lately-added relocations. The validation failed with them
+         * and the CS is about to be flushed because of that. Keep only
+         * the already-validated relocations. */
+        unsigned i;
+
+        for (i = cs->csc->validated_crelocs; i < cs->csc->crelocs; i++) {
+            p_atomic_dec(&cs->csc->relocs_bo[i]->num_cs_references);
+            radeon_bo_reference(&cs->csc->relocs_bo[i], NULL);
+        }
+        cs->csc->crelocs = cs->csc->validated_crelocs;
+
+        /* Flush if there are any relocs. Clean up otherwise. */
+        if (cs->csc->crelocs) {
+            cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC);
+        } else {
+            radeon_cs_context_cleanup(cs->csc);
+
+            assert(cs->base.cdw == 0);
+            if (cs->base.cdw != 0) {
+                fprintf(stderr, "radeon: Unexpected error in %s.\n", __func__);
+            }
+        }
+    }
+    return status;
 }
 
 static void radeon_drm_cs_write_reloc(struct radeon_winsys_cs *rcs,
@@ -351,6 +379,8 @@
 
     for (i = 0; i < csc->crelocs; i++)
         p_atomic_dec(&csc->relocs_bo[i]->num_active_ioctls);
+
+    radeon_cs_context_cleanup(csc);
     return NULL;
 }
 
@@ -381,11 +411,6 @@
         for (i = 0; i < crelocs; i++) {
             /* Update the number of active asynchronous CS ioctls for the buffer. */
             p_atomic_inc(&cs->csc->relocs_bo[i]->num_active_ioctls);
-
-            /* Update whether the buffer is busy for write. */
-            if (cs->csc->relocs[i].write_domain) {
-                cs->csc->relocs_bo[i]->busy_for_write = TRUE;
-            }
         }
 
         if (cs->ws->num_cpus > 1 && debug_get_option_thread() &&
@@ -395,6 +420,8 @@
         } else {
             radeon_drm_cs_emit_ioctl(cs->csc);
         }
+    } else {
+        radeon_cs_context_cleanup(cs->csc);
     }
 
     /* Flip command streams. */
@@ -403,8 +430,6 @@
     cs->cst = tmp;
 
     /* Prepare a new CS. */
-    radeon_cs_context_cleanup(cs->csc);
-
     cs->base.buf = cs->csc->buf;
     cs->base.cdw = 0;
 }
@@ -447,6 +472,6 @@
     ws->base.cs_validate = radeon_drm_cs_validate;
     ws->base.cs_write_reloc = radeon_drm_cs_write_reloc;
     ws->base.cs_flush = radeon_drm_cs_flush;
-    ws->base.cs_set_flush = radeon_drm_cs_set_flush;
+    ws->base.cs_set_flush_callback = radeon_drm_cs_set_flush;
     ws->base.cs_is_buffer_referenced = radeon_bo_is_referenced;
 }
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
index 339beed..fe28532 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
@@ -41,6 +41,7 @@
     /* Relocs. */
     unsigned                    nrelocs;
     unsigned                    crelocs;
+    unsigned			validated_crelocs;
     struct radeon_bo            **relocs_bo;
     struct drm_radeon_cs_reloc  *relocs;
 
@@ -88,8 +89,9 @@
 radeon_bo_is_referenced_by_cs(struct radeon_drm_cs *cs,
                               struct radeon_bo *bo)
 {
-    return bo->num_cs_references == bo->rws->num_cs ||
-           (bo->num_cs_references && radeon_get_reloc(cs->csc, bo) != -1);
+    int num_refs = bo->num_cs_references;
+    return num_refs == bo->rws->num_cs ||
+           (num_refs && radeon_get_reloc(cs->csc, bo) != -1);
 }
 
 static INLINE boolean
@@ -111,7 +113,7 @@
 static INLINE boolean
 radeon_bo_is_referenced_by_any_cs(struct radeon_bo *bo)
 {
-    return bo->num_cs_references;
+    return bo->num_cs_references != 0;
 }
 
 void radeon_drm_cs_sync_flush(struct radeon_drm_cs *cs);
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
index 0474b38..e234321 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
@@ -41,13 +41,34 @@
 #include <xf86drm.h>
 #include <stdio.h>
 
+#ifndef RADEON_INFO_TILING_CONFIG
+#define RADEON_INFO_TILING_CONFIG 6
+#endif
+
 #ifndef RADEON_INFO_WANT_HYPERZ
 #define RADEON_INFO_WANT_HYPERZ 7
 #endif
+
 #ifndef RADEON_INFO_WANT_CMASK
 #define RADEON_INFO_WANT_CMASK 8
 #endif
 
+#ifndef RADEON_INFO_CLOCK_CRYSTAL_FREQ
+#define RADEON_INFO_CLOCK_CRYSTAL_FREQ 9
+#endif
+
+#ifndef RADEON_INFO_NUM_BACKENDS
+#define RADEON_INFO_NUM_BACKENDS 0xa
+#endif
+
+#ifndef RADEON_INFO_NUM_TILE_PIPES
+#define RADEON_INFO_NUM_TILE_PIPES 0xb
+#endif
+
+#ifndef RADEON_INFO_BACKEND_MAP
+#define RADEON_INFO_BACKEND_MAP 0xd
+#endif
+
 /* Enable/disable feature access for one command stream.
  * If enable == TRUE, return TRUE on success.
  * Otherwise, return FALSE.
@@ -103,17 +124,31 @@
     return FALSE;
 }
 
+static boolean radeon_get_drm_value(int fd, unsigned request,
+                                    const char *errname, uint32_t *out)
+{
+    struct drm_radeon_info info = {0};
+    int retval;
+
+    info.value = (unsigned long)out;
+    info.request = request;
+
+    retval = drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info));
+    if (retval && errname) {
+        fprintf(stderr, "radeon: Failed to get %s, error number %d\n",
+                errname, retval);
+        return FALSE;
+    }
+    return TRUE;
+}
+
 /* Helper function to do the ioctls needed for setup and init. */
-static void do_ioctls(struct radeon_drm_winsys *winsys)
+static boolean do_winsys_init(struct radeon_drm_winsys *ws)
 {
     struct drm_radeon_gem_info gem_info = {0};
-    struct drm_radeon_info info = {0};
-    int target = 0;
     int retval;
     drmVersionPtr version;
 
-    info.value = (unsigned long)&target;
-
     /* We do things in a specific order here.
      *
      * DRM version first. We need to be sure we're running on a KMS chipset.
@@ -123,71 +158,108 @@
      * for all Radeons. If this fails, we probably got handed an FD for some
      * non-Radeon card.
      *
+     * The GEM info is actually bogus on the kernel side, as well as our side
+     * (see radeon_gem_info_ioctl in radeon_gem.c) but that's alright because
+     * we don't actually use the info for anything yet.
+     *
      * The GB and Z pipe requests should always succeed, but they might not
      * return sensical values for all chipsets, but that's alright because
      * the pipe drivers already know that.
-     *
-     * The GEM info is actually bogus on the kernel side, as well as our side
-     * (see radeon_gem_info_ioctl in radeon_gem.c) but that's alright because
-     * we don't actually use the info for anything yet. */
+     */
 
-    version = drmGetVersion(winsys->fd);
+    /* Get DRM version. */
+    version = drmGetVersion(ws->fd);
     if (version->version_major != 2 ||
         version->version_minor < 3) {
         fprintf(stderr, "%s: DRM version is %d.%d.%d but this driver is "
-                "only compatible with 2.3.x (kernel 2.6.34) and later.\n",
+                "only compatible with 2.3.x (kernel 2.6.34) or later.\n",
                 __FUNCTION__,
                 version->version_major,
                 version->version_minor,
                 version->version_patchlevel);
         drmFreeVersion(version);
-        exit(1);
+        return FALSE;
     }
 
-    winsys->drm_major = version->version_major;
-    winsys->drm_minor = version->version_minor;
-    winsys->drm_patchlevel = version->version_patchlevel;
-
-    info.request = RADEON_INFO_DEVICE_ID;
-    retval = drmCommandWriteRead(winsys->fd, DRM_RADEON_INFO, &info, sizeof(info));
-    if (retval) {
-        fprintf(stderr, "%s: Failed to get PCI ID, "
-                "error number %d\n", __FUNCTION__, retval);
-        exit(1);
-    }
-    winsys->pci_id = target;
-
-    info.request = RADEON_INFO_NUM_GB_PIPES;
-    retval = drmCommandWriteRead(winsys->fd, DRM_RADEON_INFO, &info, sizeof(info));
-    if (retval) {
-        fprintf(stderr, "%s: Failed to get GB pipe count, "
-                "error number %d\n", __FUNCTION__, retval);
-        exit(1);
-    }
-    winsys->gb_pipes = target;
-
-    info.request = RADEON_INFO_NUM_Z_PIPES;
-    retval = drmCommandWriteRead(winsys->fd, DRM_RADEON_INFO, &info, sizeof(info));
-    if (retval) {
-        fprintf(stderr, "%s: Failed to get Z pipe count, "
-                "error number %d\n", __FUNCTION__, retval);
-        exit(1);
-    }
-    winsys->z_pipes = target;
-
-    retval = drmCommandWriteRead(winsys->fd, DRM_RADEON_GEM_INFO,
-            &gem_info, sizeof(gem_info));
-    if (retval) {
-        fprintf(stderr, "%s: Failed to get MM info, error number %d\n",
-                __FUNCTION__, retval);
-        exit(1);
-    }
-    winsys->gart_size = gem_info.gart_size;
-    winsys->vram_size = gem_info.vram_size;
-
+    ws->info.drm_major = version->version_major;
+    ws->info.drm_minor = version->version_minor;
+    ws->info.drm_patchlevel = version->version_patchlevel;
     drmFreeVersion(version);
 
-    winsys->num_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+    /* Get PCI ID. */
+    if (!radeon_get_drm_value(ws->fd, RADEON_INFO_DEVICE_ID, "PCI ID",
+                              &ws->info.pci_id))
+        return FALSE;
+
+    /* Check PCI ID. */
+    switch (ws->info.pci_id) {
+#define CHIPSET(pci_id, name, family) case pci_id:
+#include "pci_ids/r300_pci_ids.h"
+#undef CHIPSET
+        ws->gen = R300;
+        break;
+
+#define CHIPSET(pci_id, name, family) case pci_id:
+#include "pci_ids/r600_pci_ids.h"
+#undef CHIPSET
+        ws->gen = R600;
+        break;
+
+    default:
+        fprintf(stderr, "radeon: Invalid PCI ID.\n");
+        return FALSE;
+    }
+
+    /* Get GEM info. */
+    retval = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_INFO,
+            &gem_info, sizeof(gem_info));
+    if (retval) {
+        fprintf(stderr, "radeon: Failed to get MM info, error number %d\n",
+                retval);
+        return FALSE;
+    }
+    ws->info.gart_size = gem_info.gart_size;
+    ws->info.vram_size = gem_info.vram_size;
+
+    ws->num_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+
+    /* Generation-specific queries. */
+    if (ws->gen == R300) {
+        if (!radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_GB_PIPES,
+                                  "GB pipe count",
+                                  &ws->info.r300_num_gb_pipes))
+            return FALSE;
+
+        if (!radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_Z_PIPES,
+                                  "Z pipe count",
+                                  &ws->info.r300_num_z_pipes))
+            return FALSE;
+    }
+    else if (ws->gen == R600) {
+        if (ws->info.drm_minor >= 9 &&
+            !radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_BACKENDS,
+                                  "num backends",
+                                  &ws->info.r600_num_backends))
+            return FALSE;
+
+        /* get the GPU counter frequency, failure is not fatal */
+        radeon_get_drm_value(ws->fd, RADEON_INFO_CLOCK_CRYSTAL_FREQ, NULL,
+                             &ws->info.r600_clock_crystal_freq);
+
+        radeon_get_drm_value(ws->fd, RADEON_INFO_TILING_CONFIG, NULL,
+                             &ws->info.r600_tiling_config);
+
+        if (ws->info.drm_minor >= 11) {
+            radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_TILE_PIPES, NULL,
+                                 &ws->info.r600_num_tile_pipes);
+
+            if (radeon_get_drm_value(ws->fd, RADEON_INFO_BACKEND_MAP, NULL,
+                                      &ws->info.r600_backend_map))
+                ws->info.r600_backend_map_valid = TRUE;
+        }
+    }
+
+    return TRUE;
 }
 
 static void radeon_winsys_destroy(struct radeon_winsys *rws)
@@ -202,34 +274,10 @@
     FREE(rws);
 }
 
-static uint32_t radeon_get_value(struct radeon_winsys *rws,
-                                 enum radeon_value_id id)
+static void radeon_query_info(struct radeon_winsys *rws,
+                              struct radeon_info *info)
 {
-    struct radeon_drm_winsys *ws = (struct radeon_drm_winsys *)rws;
-
-    switch(id) {
-    case RADEON_VID_PCI_ID:
-	return ws->pci_id;
-    case RADEON_VID_R300_GB_PIPES:
-	return ws->gb_pipes;
-    case RADEON_VID_R300_Z_PIPES:
-	return ws->z_pipes;
-    case RADEON_VID_GART_SIZE:
-        return ws->gart_size;
-    case RADEON_VID_VRAM_SIZE:
-        return ws->vram_size;
-    case RADEON_VID_DRM_MAJOR:
-        return ws->drm_major;
-    case RADEON_VID_DRM_MINOR:
-        return ws->drm_minor;
-    case RADEON_VID_DRM_PATCHLEVEL:
-        return ws->drm_patchlevel;
-    case RADEON_VID_DRM_2_6_0:
-        return ws->drm_major*100 + ws->drm_minor >= 206;
-    case RADEON_VID_DRM_2_8_0:
-        return ws->drm_major*100 + ws->drm_minor >= 208;
-    }
-    return 0;
+    *info = ((struct radeon_drm_winsys *)rws)->info;
 }
 
 static boolean radeon_cs_request_feature(struct radeon_winsys_cs *rcs,
@@ -239,7 +287,7 @@
     struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
 
     switch (fid) {
-    case RADEON_FID_HYPERZ_RAM_ACCESS:
+    case RADEON_FID_R300_HYPERZ_ACCESS:
         if (debug_get_bool_option("RADEON_HYPERZ", FALSE)) {
             return radeon_set_fd_access(cs, &cs->ws->hyperz_owner,
                                         &cs->ws->hyperz_owner_mutex,
@@ -248,7 +296,7 @@
             return FALSE;
         }
 
-    case RADEON_FID_CMASK_RAM_ACCESS:
+    case RADEON_FID_R300_CMASK_ACCESS:
         if (debug_get_bool_option("RADEON_CMASK", FALSE)) {
             return radeon_set_fd_access(cs, &cs->ws->cmask_owner,
                                         &cs->ws->cmask_owner_mutex,
@@ -268,16 +316,9 @@
     }
 
     ws->fd = fd;
-    do_ioctls(ws);
 
-    switch (ws->pci_id) {
-#define CHIPSET(pci_id, name, family) case pci_id:
-#include "pci_ids/r300_pci_ids.h"
-#undef CHIPSET
-       break;
-    default:
-       goto fail;
-    }
+    if (!do_winsys_init(ws))
+        goto fail;
 
     /* Create managers. */
     ws->kman = radeon_bomgr_create(ws);
@@ -289,7 +330,7 @@
 
     /* Set functions. */
     ws->base.destroy = radeon_winsys_destroy;
-    ws->base.get_value = radeon_get_value;
+    ws->base.query_info = radeon_query_info;
     ws->base.cs_request_feature = radeon_cs_request_feature;
 
     radeon_bomgr_init_functions(ws);
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h
index d5186bc..6921644 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h
@@ -31,29 +31,27 @@
 #define RADEON_DRM_WINSYS_H
 
 #include "radeon_winsys.h"
-
 #include "os/os_thread.h"
 
+enum radeon_generation {
+    R300,
+    R600
+};
+
 struct radeon_drm_winsys {
     struct radeon_winsys base;
 
     int fd; /* DRM file descriptor */
     int num_cs; /* The number of command streams created. */
 
+    enum radeon_generation gen;
+    struct radeon_info info;
+
     struct pb_manager *kman;
     struct pb_manager *cman;
 
-    uint32_t pci_id;        /* PCI ID */
-    uint32_t gb_pipes;      /* GB pipe count */
-    uint32_t z_pipes;       /* Z pipe count (rv530 only) */
-    uint32_t gart_size;     /* GART size. */
-    uint32_t vram_size;     /* VRAM size. */
     uint32_t num_cpus;      /* Number of CPUs. */
 
-    unsigned drm_major;
-    unsigned drm_minor;
-    unsigned drm_patchlevel;
-
     struct radeon_drm_cs *hyperz_owner;
     pipe_mutex hyperz_owner_mutex;
     struct radeon_drm_cs *cmask_owner;
diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h
index 3a64e4a..90583e3 100644
--- a/src/gallium/winsys/radeon/drm/radeon_winsys.h
+++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h
@@ -26,6 +26,20 @@
 
 /* The public winsys interface header for the radeon driver. */
 
+/* R300 features in DRM.
+ *
+ * 2.6.0:
+ * - Hyper-Z
+ * - GB_Z_PEQ_CONFIG on rv350->r4xx
+ * - R500 FG_ALPHA_VALUE
+ *
+ * 2.8.0:
+ * - R500 US_FORMAT regs
+ * - R500 ARGB2101010 colorbuffer
+ * - CMask and AA regs
+ * - R16F/RG16F
+ */
+
 #include "pipebuffer/pb_bufmgr.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_state.h"
@@ -47,6 +61,12 @@
     RADEON_DOMAIN_VRAM = 4
 };
 
+enum radeon_bo_usage { /* bitfield */
+    RADEON_USAGE_READ = 2,
+    RADEON_USAGE_WRITE = 4,
+    RADEON_USAGE_READWRITE = RADEON_USAGE_READ | RADEON_USAGE_WRITE
+};
+
 struct winsys_handle;
 struct radeon_winsys_cs_handle;   /* for write_reloc etc. */
 
@@ -55,43 +75,29 @@
     uint32_t *buf; /* The command buffer. */
 };
 
-enum radeon_value_id {
-    RADEON_VID_PCI_ID,
-    RADEON_VID_R300_GB_PIPES,
-    RADEON_VID_R300_Z_PIPES,
-    RADEON_VID_GART_SIZE,
-    RADEON_VID_VRAM_SIZE,
-    RADEON_VID_DRM_MAJOR,
-    RADEON_VID_DRM_MINOR,
-    RADEON_VID_DRM_PATCHLEVEL,
+struct radeon_info {
+    uint32_t pci_id;
+    uint32_t gart_size;
+    uint32_t vram_size;
 
-    /* These should probably go away: */
+    uint32_t drm_major; /* version */
+    uint32_t drm_minor;
+    uint32_t drm_patchlevel;
 
-    /* R300 features:
-     * - Hyper-Z
-     * - GB_Z_PEQ_CONFIG on rv350->r4xx
-     * - R500 FG_ALPHA_VALUE
-     *
-     * R600 features:
-     * - TBD
-     */
-    RADEON_VID_DRM_2_6_0,
+    uint32_t r300_num_gb_pipes;
+    uint32_t r300_num_z_pipes;
 
-    /* R300 features:
-     * - R500 US_FORMAT regs
-     * - R500 ARGB2101010 colorbuffer
-     * - CMask and AA regs
-     * - R16F/RG16F
-     *
-     * R600 features:
-     * - TBD
-     */
-    RADEON_VID_DRM_2_8_0,
+    uint32_t r600_num_backends;
+    uint32_t r600_clock_crystal_freq;
+    uint32_t r600_tiling_config;
+    uint32_t r600_num_tile_pipes;
+    uint32_t r600_backend_map;
+    boolean r600_backend_map_valid;
 };
 
 enum radeon_feature_id {
-    RADEON_FID_HYPERZ_RAM_ACCESS,     /* ZMask + HiZ */
-    RADEON_FID_CMASK_RAM_ACCESS,
+    RADEON_FID_R300_HYPERZ_ACCESS,     /* ZMask + HiZ */
+    RADEON_FID_R300_CMASK_ACCESS,
 };
 
 struct radeon_winsys {
@@ -103,13 +109,13 @@
     void (*destroy)(struct radeon_winsys *ws);
 
     /**
-     * Query a system value from a winsys.
+     * Query an info structure from winsys.
      *
      * \param ws        The winsys this function is called from.
-     * \param vid       One of the RADEON_VID_* enums.
+     * \param info      Return structure
      */
-    uint32_t (*get_value)(struct radeon_winsys *ws,
-                          enum radeon_value_id vid);
+    void (*query_info)(struct radeon_winsys *ws,
+                       struct radeon_info *info);
 
     /**************************************************************************
      * Buffer management. Buffer attributes are mostly fixed over its lifetime.
@@ -126,7 +132,6 @@
      * \param size      The size to allocate.
      * \param alignment An alignment of the buffer in memory.
      * \param bind      A bitmask of the PIPE_BIND_* flags.
-     * \param usage     A bitmask of the PIPE_USAGE_* flags.
      * \param domain    A bitmask of the RADEON_DOMAIN_* flags.
      * \return          The created buffer object.
      */
@@ -134,7 +139,6 @@
                                        unsigned size,
                                        unsigned alignment,
                                        unsigned bind,
-                                       unsigned usage,
                                        enum radeon_bo_domain domain);
 
     struct radeon_winsys_cs_handle *(*buffer_get_cs_handle)(
@@ -164,8 +168,10 @@
      * Return TRUE if a buffer object is being used by the GPU.
      *
      * \param buf       A winsys buffer object.
+     * \param usage     Only check whether the buffer is busy for the given usage.
      */
-    boolean (*buffer_is_busy)(struct pb_buffer *buf);
+    boolean (*buffer_is_busy)(struct pb_buffer *buf,
+                              enum radeon_bo_usage usage);
 
     /**
      * Wait for a buffer object until it is not used by a GPU. This is
@@ -173,8 +179,10 @@
      * and synchronizing to the fence.
      *
      * \param buf       A winsys buffer object to wait for.
+     * \param usage     Only wait until the buffer is idle for the given usage,
+     *                  but may still be busy for some other usage.
      */
-    void (*buffer_wait)(struct pb_buffer *buf);
+    void (*buffer_wait)(struct pb_buffer *buf, enum radeon_bo_usage usage);
 
     /**
      * Return tiling flags describing a memory layout of a buffer object.
@@ -263,15 +271,18 @@
      * \param buf A winsys buffer to validate.
      * \param rd  A read domain containing a bitmask of the RADEON_DOMAIN_* flags.
      * \param wd  A write domain containing a bitmask of the RADEON_DOMAIN_* flags.
+     * \return Relocation index.
      */
-    void (*cs_add_reloc)(struct radeon_winsys_cs *cs,
-                         struct radeon_winsys_cs_handle *buf,
-                         enum radeon_bo_domain rd,
-                         enum radeon_bo_domain wd);
+    unsigned (*cs_add_reloc)(struct radeon_winsys_cs *cs,
+                             struct radeon_winsys_cs_handle *buf,
+                             enum radeon_bo_domain rd,
+                             enum radeon_bo_domain wd);
 
     /**
      * Return TRUE if there is enough memory in VRAM and GTT for the relocs
-     * added so far.
+     * added so far. If the validation fails, all the relocations which have
+     * been added since the last call of cs_validate will be removed and
+     * the CS will be flushed (provided there are still any relocations).
      *
      * \param cs        A command stream to validate.
      */
@@ -304,9 +315,9 @@
      * \param flush     A flush callback function associated with the command stream.
      * \param user      A user pointer that will be passed to the flush callback.
      */
-    void (*cs_set_flush)(struct radeon_winsys_cs *cs,
-                         void (*flush)(void *ctx, unsigned flags),
-                         void *user);
+    void (*cs_set_flush_callback)(struct radeon_winsys_cs *cs,
+                                  void (*flush)(void *ctx, unsigned flags),
+                                  void *ctx);
 
     /**
      * Return TRUE if a buffer is referenced by a command stream.
@@ -321,7 +332,8 @@
      * Request access to a feature for a command stream.
      *
      * \param cs        A command stream.
-     * \param fid       A winsys buffer.
+     * \param fid       Feature ID, one of RADEON_FID_*
+     * \param enable	Whether to enable or disable the feature.
      */
     boolean (*cs_request_feature)(struct radeon_winsys_cs *cs,
                                   enum radeon_feature_id fid,
diff --git a/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c b/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c
index d92ba38..afdbd44 100644
--- a/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c
+++ b/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c
@@ -42,7 +42,8 @@
 #include "xf86drm.h"
 #include "vmwgfx_drm.h"
 
-#include <sys/mman.h>
+#include "os/os_mman.h"
+
 #include <errno.h>
 #include <unistd.h>
 
@@ -94,7 +95,7 @@
 vmw_ioctl_fifo_unmap(struct vmw_winsys_screen *vws, void *mapping)
 {
    VMW_FUNC;
-   (void)munmap(mapping, getpagesize());
+   (void)os_munmap(mapping, getpagesize());
 }
 
 
@@ -106,7 +107,7 @@
 
    VMW_FUNC;
 
-   map = mmap(NULL, getpagesize(), PROT_READ, MAP_SHARED,
+   map = os_mmap(NULL, getpagesize(), PROT_READ, MAP_SHARED,
 	      vws->ioctl.drm_fd, fifo_offset);
 
    if (map == MAP_FAILED) {
@@ -362,7 +363,7 @@
               region->ptr.gmrId, region->ptr.offset);
 
    if (region->data) {
-      munmap(region->data, region->size);
+      os_munmap(region->data, region->size);
       region->data = NULL;
    }
 
@@ -388,7 +389,7 @@
               region->ptr.gmrId, region->ptr.offset);
 
    if (region->data == NULL) {
-      map = mmap(NULL, region->size, PROT_READ | PROT_WRITE, MAP_SHARED,
+      map = os_mmap(NULL, region->size, PROT_READ | PROT_WRITE, MAP_SHARED,
 		 region->drm_fd, region->map_handle);
       if (map == MAP_FAILED) {
 	 debug_printf("%s: Map failed.\n", __FUNCTION__);
diff --git a/src/gallium/winsys/sw/android/Android.mk b/src/gallium/winsys/sw/android/Android.mk
new file mode 100644
index 0000000..4fb2715
--- /dev/null
+++ b/src/gallium/winsys/sw/android/Android.mk
@@ -0,0 +1,34 @@
+# Mesa 3-D graphics library
+#
+# Copyright (C) 2010-2011 Chia-I Wu <olvaffe@gmail.com>
+# Copyright (C) 2010-2011 LunarG Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+LOCAL_PATH := $(call my-dir)
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := \
+	android_sw_winsys.cpp
+
+LOCAL_MODULE := libmesa_winsys_sw_android
+
+include $(GALLIUM_COMMON_MK)
+include $(BUILD_STATIC_LIBRARY)
diff --git a/src/gallium/winsys/sw/android/android_sw_winsys.cpp b/src/gallium/winsys/sw/android/android_sw_winsys.cpp
new file mode 100644
index 0000000..02faf1e
--- /dev/null
+++ b/src/gallium/winsys/sw/android/android_sw_winsys.cpp
@@ -0,0 +1,255 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  7.12
+ *
+ * Copyright (C) 2010-2011 LunarG Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chia-I Wu <olv@lunarg.com>
+ */
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_state.h"
+#include "util/u_memory.h"
+#include "util/u_format.h"
+#include "state_tracker/sw_winsys.h"
+
+#include <utils/Errors.h>
+#include <private/ui/sw_gralloc_handle.h>
+
+#include <hardware/gralloc.h>
+
+#include "android_sw_winsys.h"
+
+struct android_sw_winsys
+{
+   struct sw_winsys base;
+
+   const gralloc_module_t *grmod;
+};
+
+struct android_sw_displaytarget
+{
+   buffer_handle_t handle;
+   int stride;
+   int width, height;
+   int usage; /* gralloc usage */
+
+   void *mapped;
+};
+
+static INLINE struct android_sw_winsys *
+android_sw_winsys(struct sw_winsys *ws)
+{
+   return (struct android_sw_winsys *) ws;
+}
+
+static INLINE struct android_sw_displaytarget *
+android_sw_displaytarget(struct sw_displaytarget *dt)
+{
+   return (struct android_sw_displaytarget *) dt;
+}
+
+namespace android {
+
+static void
+android_displaytarget_display(struct sw_winsys *ws,
+                              struct sw_displaytarget *dt,
+                              void *context_private)
+{
+}
+
+static struct sw_displaytarget *
+android_displaytarget_create(struct sw_winsys *ws,
+                             unsigned tex_usage,
+                             enum pipe_format format,
+                             unsigned width, unsigned height,
+                             unsigned alignment,
+                             unsigned *stride)
+{
+   return NULL;
+}
+
+static void
+android_displaytarget_destroy(struct sw_winsys *ws,
+                              struct sw_displaytarget *dt)
+{
+   struct android_sw_displaytarget *adt = android_sw_displaytarget(dt);
+
+   assert(!adt->mapped);
+   FREE(adt);
+}
+
+static void
+android_displaytarget_unmap(struct sw_winsys *ws,
+                            struct sw_displaytarget *dt)
+{
+   struct android_sw_winsys *droid = android_sw_winsys(ws);
+   struct android_sw_displaytarget *adt = android_sw_displaytarget(dt);
+
+   if (adt->mapped) {
+      if (sw_gralloc_handle_t::validate(adt->handle) >= 0) {
+         adt->mapped = NULL;
+      }
+      else {
+         droid->grmod->unlock(droid->grmod, adt->handle);
+         adt->mapped = NULL;
+      }
+   }
+}
+
+static void *
+android_displaytarget_map(struct sw_winsys *ws,
+                          struct sw_displaytarget *dt,
+                          unsigned flags)
+{
+   struct android_sw_winsys *droid = android_sw_winsys(ws);
+   struct android_sw_displaytarget *adt = android_sw_displaytarget(dt);
+
+   if (!adt->mapped) {
+      if (sw_gralloc_handle_t::validate(adt->handle) >= 0) {
+         const sw_gralloc_handle_t *swhandle =
+            reinterpret_cast<const sw_gralloc_handle_t *>(adt->handle);
+         adt->mapped = reinterpret_cast<void *>(swhandle->base);
+      }
+      else {
+         /* lock the buffer for CPU access */
+         droid->grmod->lock(droid->grmod, adt->handle,
+               adt->usage, 0, 0, adt->width, adt->height, &adt->mapped);
+      }
+   }
+
+   return adt->mapped;
+}
+
+static struct sw_displaytarget *
+android_displaytarget_from_handle(struct sw_winsys *ws,
+                                  const struct pipe_resource *templ,
+                                  struct winsys_handle *whandle,
+                                  unsigned *stride)
+{
+   struct android_winsys_handle *ahandle =
+      (struct android_winsys_handle *) whandle;
+   struct android_sw_displaytarget *adt;
+
+   adt = CALLOC_STRUCT(android_sw_displaytarget);
+   if (!adt)
+      return NULL;
+
+   adt->handle = ahandle->handle;
+   adt->stride = ahandle->stride;
+   adt->width = templ->width0;
+   adt->height = templ->height0;
+
+   if (templ->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_TRANSFER_WRITE))
+      adt->usage |= GRALLOC_USAGE_SW_WRITE_OFTEN;
+   if (templ->bind & (PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_TRANSFER_READ))
+      adt->usage |= GRALLOC_USAGE_SW_READ_OFTEN;
+
+   if (stride)
+      *stride = adt->stride;
+
+   return reinterpret_cast<struct sw_displaytarget *>(adt);
+}
+
+static boolean
+android_displaytarget_get_handle(struct sw_winsys *ws,
+                                 struct sw_displaytarget *dt,
+                                 struct winsys_handle *whandle)
+{
+   return FALSE;
+}
+
+static boolean
+android_is_displaytarget_format_supported(struct sw_winsys *ws,
+                                          unsigned tex_usage,
+                                          enum pipe_format format)
+{
+   struct android_sw_winsys *droid = android_sw_winsys(ws);
+   int fmt = -1;
+
+   switch (format) {
+   case PIPE_FORMAT_R8G8B8A8_UNORM:
+      fmt = HAL_PIXEL_FORMAT_RGBA_8888;
+      break;
+   case PIPE_FORMAT_R8G8B8X8_UNORM:
+      fmt = HAL_PIXEL_FORMAT_RGBX_8888;
+      break;
+   case PIPE_FORMAT_R8G8B8_UNORM:
+      fmt = HAL_PIXEL_FORMAT_RGB_888;
+      break;
+   case PIPE_FORMAT_B5G6R5_UNORM:
+      fmt = HAL_PIXEL_FORMAT_RGB_565;
+      break;
+   case PIPE_FORMAT_B8G8R8A8_UNORM:
+      fmt = HAL_PIXEL_FORMAT_BGRA_8888;
+      break;
+   default:
+      break;
+   }
+
+   return (fmt != -1);
+}
+
+static void
+android_destroy(struct sw_winsys *ws)
+{
+   struct android_sw_winsys *droid = android_sw_winsys(ws);
+
+   FREE(droid);
+}
+
+}; /* namespace android */
+
+using namespace android;
+
+struct sw_winsys *
+android_create_sw_winsys(void)
+{
+   struct android_sw_winsys *droid;
+   const hw_module_t *mod;
+
+   droid = CALLOC_STRUCT(android_sw_winsys);
+   if (!droid)
+      return NULL;
+
+   if (hw_get_module(GRALLOC_HARDWARE_MODULE_ID, &mod)) {
+      FREE(droid);
+      return NULL;
+   }
+
+   droid->grmod = (const gralloc_module_t *) mod;
+
+   droid->base.destroy = android_destroy;
+   droid->base.is_displaytarget_format_supported =
+      android_is_displaytarget_format_supported;
+
+   droid->base.displaytarget_create = android_displaytarget_create;
+   droid->base.displaytarget_destroy = android_displaytarget_destroy;
+   droid->base.displaytarget_from_handle = android_displaytarget_from_handle;
+   droid->base.displaytarget_get_handle = android_displaytarget_get_handle;
+
+   droid->base.displaytarget_map = android_displaytarget_map;
+   droid->base.displaytarget_unmap = android_displaytarget_unmap;
+   droid->base.displaytarget_display = android_displaytarget_display;
+
+   return &droid->base;
+}
diff --git a/src/gallium/winsys/sw/android/android_sw_winsys.h b/src/gallium/winsys/sw/android/android_sw_winsys.h
new file mode 100644
index 0000000..79392dc
--- /dev/null
+++ b/src/gallium/winsys/sw/android/android_sw_winsys.h
@@ -0,0 +1,49 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  7.12
+ *
+ * Copyright (C) 2010-2011 LunarG Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chia-I Wu <olv@lunarg.com>
+ */
+
+#ifndef ANDROID_SW_WINSYS
+#define ANDROID_SW_WINSYS
+
+#include <sys/cdefs.h>
+#include <hardware/gralloc.h>
+
+__BEGIN_DECLS
+
+struct sw_winsys;
+
+struct android_winsys_handle {
+   buffer_handle_t handle;
+   int stride;
+};
+
+struct sw_winsys *
+android_create_sw_winsys(void);
+
+__END_DECLS
+
+#endif /* ANDROID_SW_WINSYS */
diff --git a/src/gbm/backends/dri/gbm_dri.c b/src/gbm/backends/dri/gbm_dri.c
index 6bb7848..9de8cb6 100644
--- a/src/gbm/backends/dri/gbm_dri.c
+++ b/src/gbm/backends/dri/gbm_dri.c
@@ -194,6 +194,8 @@
    dri->screen = dri->dri2->createNewScreen(0, dri->base.base.fd,
                                             dri->extensions,
                                             &dri->driver_configs, dri);
+   if (dri->screen == NULL)
+      return -1;
 
    extensions = dri->core->getExtensions(dri->screen);
    if (dri_bind_extensions(dri, dri_core_extensions, extensions) < 0) {
diff --git a/src/gbm/main/gbm.h b/src/gbm/main/gbm.h
index d79a03e..05d2292 100644
--- a/src/gbm/main/gbm.h
+++ b/src/gbm/main/gbm.h
@@ -28,6 +28,11 @@
 #ifndef _GBM_H_
 #define _GBM_H_
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
 #define __GBM__ 1
 
 #include <stdint.h>
@@ -97,4 +102,8 @@
 void
 gbm_bo_destroy(struct gbm_bo *bo);
 
+#ifdef __cplusplus
+}
+#endif
+
 #endif
diff --git a/src/glsl/.gitignore b/src/glsl/.gitignore
index dfbd572..d26839a 100644
--- a/src/glsl/.gitignore
+++ b/src/glsl/.gitignore
@@ -5,3 +5,4 @@
 glsl_parser.output
 builtin_function.cpp
 builtin_compiler
+glsl_test
diff --git a/src/glsl/Android.gen.mk b/src/glsl/Android.gen.mk
new file mode 100644
index 0000000..e4ccb72
--- /dev/null
+++ b/src/glsl/Android.gen.mk
@@ -0,0 +1,98 @@
+# Mesa 3-D graphics library
+#
+# Copyright (C) 2010-2011 Chia-I Wu <olvaffe@gmail.com>
+# Copyright (C) 2010-2011 LunarG Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# included by glsl Android.mk for source generation
+
+ifeq ($(LOCAL_MODULE_CLASS),)
+LOCAL_MODULE_CLASS := STATIC_LIBRARIES
+endif
+
+intermediates := $(call local-intermediates-dir)
+
+sources := \
+	glsl_lexer.cpp \
+	glsl_parser.cpp \
+	glcpp/glcpp-lex.c \
+	glcpp/glcpp-parse.c
+
+ifneq ($(LOCAL_IS_HOST_MODULE),true)
+sources += builtin_function.cpp
+endif
+
+LOCAL_SRC_FILES := $(filter-out $(sources), $(LOCAL_SRC_FILES))
+
+LOCAL_C_INCLUDES += $(intermediates) $(intermediates)/glcpp $(MESA_TOP)/src/glsl/glcpp
+
+sources := $(addprefix $(intermediates)/, $(sources))
+LOCAL_GENERATED_SOURCES += $(sources)
+
+define local-l-or-ll-to-c-or-cpp
+	@mkdir -p $(dir $@)
+	@echo "Mesa Lex: $(PRIVATE_MODULE) <= $<"
+	$(hide) $(LEX) --nounistd -o$@ $<
+endef
+
+define local-y-to-c-and-h
+	@mkdir -p $(dir $@)
+	@echo "Mesa Yacc: $(PRIVATE_MODULE) <= $<"
+	$(hide) $(YACC) -o $@ $<
+endef
+
+define local-yy-to-cpp-and-h
+	@mkdir -p $(dir $@)
+	@echo "Mesa Yacc: $(PRIVATE_MODULE) <= $<"
+	$(hide) $(YACC) -p "_mesa_glsl_" -o $@ $<
+	touch $(@:$1=$(YACC_HEADER_SUFFIX))
+	echo '#ifndef '$(@F:$1=_h) > $(@:$1=.h)
+	echo '#define '$(@F:$1=_h) >> $(@:$1=.h)
+	cat $(@:$1=$(YACC_HEADER_SUFFIX)) >> $(@:$1=.h)
+	echo '#endif' >> $(@:$1=.h)
+	rm -f $(@:$1=$(YACC_HEADER_SUFFIX))
+endef
+
+$(intermediates)/glsl_lexer.cpp: $(LOCAL_PATH)/glsl_lexer.ll
+	$(call local-l-or-ll-to-c-or-cpp)
+
+$(intermediates)/glsl_parser.cpp: $(LOCAL_PATH)/glsl_parser.yy
+	$(call local-yy-to-cpp-and-h,.cpp)
+
+$(intermediates)/glcpp/glcpp-lex.c: $(LOCAL_PATH)/glcpp/glcpp-lex.l
+	$(call local-l-or-ll-to-c-or-cpp)
+
+$(intermediates)/glcpp/glcpp-parse.c: $(LOCAL_PATH)/glcpp/glcpp-parse.y
+	$(call local-y-to-c-and-h)
+
+BUILTIN_COMPILER := $(BUILD_OUT_EXECUTABLES)/mesa_builtin_compiler$(BUILD_EXECUTABLE_SUFFIX)
+
+builtin_function_deps := \
+	$(LOCAL_PATH)/builtins/tools/generate_builtins.py \
+	$(LOCAL_PATH)/builtins/tools/texture_builtins.py \
+	$(BUILTIN_COMPILER) \
+	$(wildcard $(LOCAL_PATH)/builtins/profiles/*) \
+       	$(wildcard $(LOCAL_PATH)/builtins/ir/*)
+
+$(intermediates)/builtin_function.cpp: PRIVATE_SCRIPT := $(MESA_PYTHON2) $(LOCAL_PATH)/builtins/tools/generate_builtins.py
+$(intermediates)/builtin_function.cpp: $(builtin_function_deps)
+	@mkdir -p $(dir $@)
+	@echo "Gen GLSL: $(PRIVATE_MODULE) <= $(notdir $@)"
+	$(hide) $(PRIVATE_SCRIPT) $(BUILTIN_COMPILER) > $@ || rm -f $@
diff --git a/src/glsl/Android.mk b/src/glsl/Android.mk
new file mode 100644
index 0000000..d0b3ff3
--- /dev/null
+++ b/src/glsl/Android.mk
@@ -0,0 +1,171 @@
+# Mesa 3-D graphics library
+#
+# Copyright (C) 2010-2011 Chia-I Wu <olvaffe@gmail.com>
+# Copyright (C) 2010-2011 LunarG Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# Android.mk for glsl
+
+LOCAL_PATH := $(call my-dir)
+
+# from Makefile
+LIBGLCPP_SOURCES = \
+	glcpp/glcpp-lex.c \
+	glcpp/glcpp-parse.c \
+	glcpp/pp.c
+
+C_SOURCES = \
+	strtod.c \
+	ralloc.c \
+	$(LIBGLCPP_SOURCES)
+
+CXX_SOURCES = \
+	ast_expr.cpp \
+	ast_function.cpp \
+	ast_to_hir.cpp \
+	ast_type.cpp \
+	glsl_lexer.cpp \
+	glsl_parser.cpp \
+	glsl_parser_extras.cpp \
+	glsl_types.cpp \
+	glsl_symbol_table.cpp \
+	hir_field_selection.cpp \
+	ir_basic_block.cpp \
+	ir_clone.cpp \
+	ir_constant_expression.cpp \
+	ir.cpp \
+	ir_expression_flattening.cpp \
+	ir_function_can_inline.cpp \
+	ir_function_detect_recursion.cpp \
+	ir_function.cpp \
+	ir_hierarchical_visitor.cpp \
+	ir_hv_accept.cpp \
+	ir_import_prototypes.cpp \
+	ir_print_visitor.cpp \
+	ir_reader.cpp \
+	ir_rvalue_visitor.cpp \
+	ir_set_program_inouts.cpp \
+	ir_validate.cpp \
+	ir_variable.cpp \
+	ir_variable_refcount.cpp \
+	linker.cpp \
+	link_functions.cpp \
+	loop_analysis.cpp \
+	loop_controls.cpp \
+	loop_unroll.cpp \
+	lower_discard.cpp \
+	lower_if_to_cond_assign.cpp \
+	lower_instructions.cpp \
+	lower_jumps.cpp \
+	lower_mat_op_to_vec.cpp \
+	lower_noise.cpp \
+	lower_texture_projection.cpp \
+	lower_variable_index_to_cond_assign.cpp \
+	lower_vec_index_to_cond_assign.cpp \
+	lower_vec_index_to_swizzle.cpp \
+	lower_vector.cpp \
+	opt_algebraic.cpp \
+	opt_constant_folding.cpp \
+	opt_constant_propagation.cpp \
+	opt_constant_variable.cpp \
+	opt_copy_propagation.cpp \
+	opt_copy_propagation_elements.cpp \
+	opt_dead_code.cpp \
+	opt_dead_code_local.cpp \
+	opt_dead_functions.cpp \
+	opt_discard_simplification.cpp \
+	opt_function_inlining.cpp \
+	opt_if_simplification.cpp \
+	opt_noop_swizzle.cpp \
+	opt_redundant_jumps.cpp \
+	opt_structure_splitting.cpp \
+	opt_swizzle_swizzle.cpp \
+	opt_tree_grafting.cpp \
+	s_expression.cpp
+
+# ---------------------------------------
+# Build libmesa_glsl
+# ---------------------------------------
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := \
+	$(C_SOURCES) \
+	$(CXX_SOURCES) \
+	builtin_function.cpp
+
+LOCAL_C_INCLUDES := \
+	$(MESA_TOP)/src/mapi \
+	$(MESA_TOP)/src/mesa
+
+LOCAL_MODULE := libmesa_glsl
+
+include $(LOCAL_PATH)/Android.gen.mk
+include $(MESA_COMMON_MK)
+include $(BUILD_STATIC_LIBRARY)
+
+# ---------------------------------------
+# Build mesa_builtin_compiler for host
+# ---------------------------------------
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := \
+	$(C_SOURCES) \
+	$(CXX_SOURCES) \
+	builtin_stubs.cpp \
+	main.cpp \
+	standalone_scaffolding.cpp
+
+LOCAL_C_INCLUDES := \
+	$(MESA_TOP)/src/mapi \
+	$(MESA_TOP)/src/mesa
+
+LOCAL_STATIC_LIBRARIES := libmesa_glsl_utils
+
+LOCAL_MODULE := mesa_builtin_compiler
+
+LOCAL_MODULE_CLASS := EXECUTABLES
+LOCAL_IS_HOST_MODULE := true
+include $(LOCAL_PATH)/Android.gen.mk
+include $(MESA_COMMON_MK)
+include $(BUILD_HOST_EXECUTABLE)
+
+# ---------------------------------------
+# Build glsl_compiler
+# ---------------------------------------
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := \
+	main.cpp \
+	standalone_scaffolding.cpp
+
+LOCAL_C_INCLUDES := \
+	$(MESA_TOP)/src/mapi \
+	$(MESA_TOP)/src/mesa
+
+LOCAL_STATIC_LIBRARIES := libmesa_glsl libmesa_glsl_utils
+
+LOCAL_MODULE_TAGS := eng
+LOCAL_MODULE := glsl_compiler
+
+include $(MESA_COMMON_MK)
+include $(BUILD_EXECUTABLE)
diff --git a/src/glsl/Makefile b/src/glsl/Makefile
index e0776c1..c20a6c9 100644
--- a/src/glsl/Makefile
+++ b/src/glsl/Makefile
@@ -39,6 +39,7 @@
 	ir.cpp \
 	ir_expression_flattening.cpp \
 	ir_function_can_inline.cpp \
+	ir_function_detect_recursion.cpp \
 	ir_function.cpp \
 	ir_hierarchical_visitor.cpp \
 	ir_hv_accept.cpp \
@@ -88,18 +89,32 @@
 LIBS = \
 	$(TOP)/src/glsl/libglsl.a
 
-APPS = glsl_compiler glcpp/glcpp
+APPS = glsl_compiler glsl_test glcpp/glcpp
 
 GLSL2_C_SOURCES = \
 	../mesa/program/hash_table.c \
 	../mesa/program/symbol_table.c
 GLSL2_CXX_SOURCES = \
-	main.cpp
+	main.cpp \
+	standalone_scaffolding.cpp
 
 GLSL2_OBJECTS = \
 	$(GLSL2_C_SOURCES:.c=.o) \
 	$(GLSL2_CXX_SOURCES:.cpp=.o)
 
+TEST_C_SOURCES = \
+	../mesa/program/hash_table.c \
+	../mesa/program/symbol_table.c
+
+TEST_CXX_SOURCES = \
+	standalone_scaffolding.cpp \
+	test.cpp \
+	test_optpass.cpp
+
+TEST_OBJECTS = \
+	$(TEST_C_SOURCES:.c=.o) \
+	$(TEST_CXX_SOURCES:.cpp=.o)
+
 ### Basic defines ###
 
 DEFINES += \
@@ -128,7 +143,9 @@
 	$(C_SOURCES) \
 	$(CXX_SOURCES) \
 	$(GLSL2_CXX_SOURCES) \
-	$(GLSL2_C_SOURCES)
+	$(GLSL2_C_SOURCES) \
+	$(TEST_CXX_SOURCES) \
+	$(TEST_C_SOURCES)
 
 ##### TARGETS #####
 
@@ -147,10 +164,11 @@
 	rm -f depend
 	touch depend
 	$(MKDEP) $(MKDEP_OPTIONS) $(INCLUDES) $(ALL_SOURCES) 2> /dev/null
+	$(MKDEP) $(MKDEP_OPTIONS) -a -p $(DRICORE_OBJ_DIR)/ $(INCLUDES) $(ALL_SOURCES) 2> /dev/null
 
 # Remove .o and backup files
 clean: clean-dricore
-	rm -f $(GLCPP_OBJECTS) $(GLSL2_OBJECTS) $(OBJECTS) lib$(LIBNAME).a depend depend.bak builtin_function.cpp builtin_function.o builtin_stubs.o builtin_compiler
+	rm -f $(GLCPP_OBJECTS) $(GLSL2_OBJECTS) $(TEST_OBJECTS) $(OBJECTS) lib$(LIBNAME).a depend depend.bak builtin_function.cpp builtin_function.o builtin_stubs.o builtin_compiler
 	-rm -f $(APPS)
 
 clean-dricore:
@@ -173,6 +191,9 @@
 glsl_compiler: $(GLSL2_OBJECTS) libglsl.a builtin_stubs.o
 	$(APP_CXX) $(INCLUDES) $(CFLAGS) $(LDFLAGS) $(GLSL2_OBJECTS) builtin_stubs.o $(LIBS) -o $@
 
+glsl_test: $(TEST_OBJECTS) libglsl.a builtin_stubs.o
+	$(APP_CXX) $(INCLUDES) $(CFLAGS) $(LDFLAGS) $(TEST_OBJECTS) builtin_stubs.o $(LIBS) -o $@
+
 glcpp: glcpp/glcpp
 glcpp/glcpp: $(GLCPP_OBJECTS)
 	$(APP_CC) $(INCLUDES) $(CFLAGS) $(LDFLAGS) $(GLCPP_OBJECTS) -o $@
diff --git a/src/glsl/SConscript b/src/glsl/SConscript
index 1441cc7..1da58a9 100644
--- a/src/glsl/SConscript
+++ b/src/glsl/SConscript
@@ -50,6 +50,7 @@
     'ir.cpp',
     'ir_expression_flattening.cpp',
     'ir_function_can_inline.cpp',
+    'ir_function_detect_recursion.cpp',
     'ir_function.cpp',
     'ir_hierarchical_visitor.cpp',
     'ir_hv_accept.cpp',
@@ -95,6 +96,7 @@
     'opt_tree_grafting.cpp',
     'ralloc.c',
     's_expression.cpp',
+    'standalone_scaffolding.cpp',
     'strtod.c',
 ] 
 
diff --git a/src/glsl/TODO b/src/glsl/TODO
index a376238..c99d7e1 100644
--- a/src/glsl/TODO
+++ b/src/glsl/TODO
@@ -9,30 +9,19 @@
 - Implement support for ir_binop_dot in ir_algebraic.cpp.  Perform
   transformations such as "dot(v, vec3(0.0, 1.0, 0.0))" -> v.y.
 
+- Track source locations throughout the IR.  There are currently several
+  places where we cannot emit line numbers for errors (and currently emit 0:0)
+  because we've "lost" the line number information.  This is particularly
+  noticeable at link time.
+
 1.30 features:
 
-- Implement AST-to-HIR conversion of bit-shift operators.
-
-- Implement AST-to-HIR conversion of bit-wise {&,|,^,!} operators.
-
 - Implement AST-to-HIR conversion of switch-statements
   - switch
   - case
   - Update break to correcly handle mixed nexting of switch-statements
     and loops.
 
-- Handle currently unsupported constant expression types
-  - ir_unop_bit_not
-  - ir_binop_mod
-  - ir_binop_lshift
-  - ir_binop_rshift
-  - ir_binop_bit_and
-  - ir_binop_bit_xor
-  - ir_binop_bit_or
-
-- Implement support for 1.30 style shadow compares which only return a float
-  instead of a vec4.
-
 - Implement support for gl_ClipDistance.  This is non-trivial because
   gl_ClipDistance is exposed as a float[8], but all hardware actually
   implements it as vec4[2].
\ No newline at end of file
diff --git a/src/glsl/ast.h b/src/glsl/ast.h
index 878f48b..d1de227 100644
--- a/src/glsl/ast.h
+++ b/src/glsl/ast.h
@@ -730,7 +730,6 @@
 				 struct _mesa_glsl_parse_state *state);
 
 void
-emit_function(_mesa_glsl_parse_state *state, exec_list *instructions,
-	      ir_function *f);
+emit_function(_mesa_glsl_parse_state *state, ir_function *f);
 
 #endif /* AST_H */
diff --git a/src/glsl/ast_function.cpp b/src/glsl/ast_function.cpp
index 60a2c61..ca45934 100644
--- a/src/glsl/ast_function.cpp
+++ b/src/glsl/ast_function.cpp
@@ -62,8 +62,10 @@
  *
  * \param return_type Return type of the function.  May be \c NULL.
  * \param name        Name of the function.
- * \param parameters  Parameter list for the function.  This may be either a
- *                    formal or actual parameter list.  Only the type is used.
+ * \param parameters  List of \c ir_instruction nodes representing the
+ *                    parameter list for the function.  This may be either a
+ *                    formal (\c ir_variable) or actual (\c ir_rvalue)
+ *                    parameter list.  Only the type is used.
  *
  * \return
  * A ralloced string representing the prototype of the function.
@@ -123,7 +125,7 @@
 	    if (f == NULL) {
 	       f = new(ctx) ir_function(name);
 	       state->symbols->add_global_function(f);
-	       emit_function(state, instructions, f);
+	       emit_function(state, f);
 	    }
 
 	    f->add_signature(sig->clone_prototype(f, NULL));
@@ -132,6 +134,8 @@
       }
    }
 
+   exec_list post_call_conversions;
+
    if (sig != NULL) {
       /* Verify that 'out' and 'inout' actual parameters are lvalues.  This
        * isn't done in ir_function::matching_signature because that function
@@ -139,6 +143,12 @@
        *
        * Also, validate that 'const_in' formal parameters (an extension of our
        * IR) correspond to ir_constant actual parameters.
+       *
+       * Also, perform implicit conversion of arguments.  Note: to implicitly
+       * convert out parameters, we need to place them in a temporary
+       * variable, and do the conversion after the call takes place.  Since we
+       * haven't emitted the call yet, we'll place the post-call conversions
+       * in a temporary exec_list, and emit them later.
        */
       exec_list_iterator actual_iter = actual_parameters->iterator();
       exec_list_iterator formal_iter = sig->parameters.iterator();
@@ -154,6 +164,7 @@
 	    _mesa_glsl_error(loc, state,
 			     "parameter `%s' must be a constant expression",
 			     formal->name);
+	    return ir_call::get_error_instruction(ctx);
 	 }
 
 	 if ((formal->mode == ir_var_out)
@@ -183,8 +194,64 @@
 	 }
 
 	 if (formal->type->is_numeric() || formal->type->is_boolean()) {
-	    ir_rvalue *converted = convert_component(actual, formal->type);
-	    actual->replace_with(converted);
+            switch (formal->mode) {
+            case ir_var_const_in:
+            case ir_var_in: {
+               ir_rvalue *converted
+                  = convert_component(actual, formal->type);
+               actual->replace_with(converted);
+               break;
+            }
+            case ir_var_out:
+               if (actual->type != formal->type) {
+                  /* To convert an out parameter, we need to create a
+                   * temporary variable to hold the value before conversion,
+                   * and then perform the conversion after the function call
+                   * returns.
+                   *
+                   * This has the effect of transforming code like this:
+                   *
+                   *   void f(out int x);
+                   *   float value;
+                   *   f(value);
+                   *
+                   * Into IR that's equivalent to this:
+                   *
+                   *   void f(out int x);
+                   *   float value;
+                   *   int out_parameter_conversion;
+                   *   f(out_parameter_conversion);
+                   *   value = float(out_parameter_conversion);
+                   */
+                  ir_variable *tmp =
+                     new(ctx) ir_variable(formal->type,
+                                          "out_parameter_conversion",
+                                          ir_var_temporary);
+                  instructions->push_tail(tmp);
+                  ir_dereference_variable *deref_tmp_1
+                     = new(ctx) ir_dereference_variable(tmp);
+                  ir_dereference_variable *deref_tmp_2
+                     = new(ctx) ir_dereference_variable(tmp);
+                  ir_rvalue *converted_tmp
+                     = convert_component(deref_tmp_1, actual->type);
+                  ir_assignment *assignment
+                     = new(ctx) ir_assignment(actual, converted_tmp);
+                  post_call_conversions.push_tail(assignment);
+                  actual->replace_with(deref_tmp_2);
+               }
+               break;
+            case ir_var_inout:
+               /* Inout parameters should never require conversion, since that
+                * would require an implicit conversion to exist both to and
+                * from the formal parameter type, and there are no
+                * bidirectional implicit conversions.
+                */
+               assert (actual->type == formal->type);
+               break;
+            default:
+               assert (!"Illegal formal parameter mode");
+               break;
+            }
 	 }
 
 	 actual_iter.next();
@@ -194,11 +261,27 @@
       /* Always insert the call in the instruction stream, and return a deref
        * of its return val if it returns a value, since we don't know if
        * the rvalue is going to be assigned to anything or not.
+       *
+       * Also insert any out parameter conversions after the call.
        */
       ir_call *call = new(ctx) ir_call(sig, actual_parameters);
+      ir_dereference_variable *deref;
       if (!sig->return_type->is_void()) {
+         /* If the function call is a constant expression, don't
+          * generate the instructions to call it; just generate an
+          * ir_constant representing the constant value.
+          *
+          * Function calls can only be constant expressions starting
+          * in GLSL 1.20.
+          */
+         if (state->language_version >= 120) {
+            ir_constant *const_val = call->constant_expression_value();
+            if (const_val) {
+               return const_val;
+            }
+         }
+
 	 ir_variable *var;
-	 ir_dereference_variable *deref;
 
 	 var = new(ctx) ir_variable(sig->return_type,
 				    ralloc_asprintf(ctx, "%s_retval",
@@ -209,15 +292,14 @@
 	 deref = new(ctx) ir_dereference_variable(var);
 	 ir_assignment *assign = new(ctx) ir_assignment(deref, call, NULL);
 	 instructions->push_tail(assign);
-	 if (state->language_version >= 120)
-	    var->constant_value = call->constant_expression_value();
 
 	 deref = new(ctx) ir_dereference_variable(var);
-	 return deref;
       } else {
 	 instructions->push_tail(call);
-	 return NULL;
+	 deref = NULL;
       }
+      instructions->append_list(&post_call_conversions);
+      return deref;
    } else {
       char *str = prototype_string(NULL, name, actual_parameters);
 
@@ -440,13 +522,21 @@
       ir_rvalue *ir = (ir_rvalue *) n;
       ir_rvalue *result = ir;
 
-      /* Apply implicit conversions (not the scalar constructor rules!) */
+      /* Apply implicit conversions (not the scalar constructor rules!). See
+       * the spec quote above. */
       if (constructor_type->element_type()->is_float()) {
 	 const glsl_type *desired_type =
 	    glsl_type::get_instance(GLSL_TYPE_FLOAT,
 				    ir->type->vector_elements,
 				    ir->type->matrix_columns);
-	 result = convert_component(ir, desired_type);
+	 if (result->type->can_implicitly_convert_to(desired_type)) {
+	    /* Even though convert_component() implements the constructor
+	     * conversion rules (not the implicit conversion rules), its safe
+	     * to use it here because we already checked that the implicit
+	     * conversion is legal.
+	     */
+	    result = convert_component(ir, desired_type);
+	 }
       }
 
       if (result->type != constructor_type->element_type()) {
diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index 2312c29..9e7496b 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -66,6 +66,8 @@
 
    state->current_function = NULL;
 
+   state->toplevel_ir = instructions;
+
    /* Section 4.2 of the GLSL 1.20 specification states:
     * "The built-in functions are scoped in a scope outside the global scope
     *  users declare global variables in.  That is, a shader's global scope,
@@ -83,6 +85,10 @@
 
    foreach_list_typed (ast_node, ast, link, & state->translation_unit)
       ast->hir(instructions, state);
+
+   detect_recursion_unlinked(state, instructions);
+
+   state->toplevel_ir = NULL;
 }
 
 
@@ -647,6 +653,16 @@
    return NULL;
 }
 
+static void
+mark_whole_array_access(ir_rvalue *access)
+{
+   ir_dereference_variable *deref = access->as_dereference_variable();
+
+   if (deref && deref->var) {
+      deref->var->max_array_access = deref->type->length - 1;
+   }
+}
+
 ir_rvalue *
 do_assignment(exec_list *instructions, struct _mesa_glsl_parse_state *state,
 	      ir_rvalue *lhs, ir_rvalue *rhs, bool is_initializer,
@@ -707,6 +723,7 @@
 						   rhs->type->array_size());
 	 d->type = var->type;
       }
+      mark_whole_array_access(lhs);
    }
 
    /* Most callers of do_assignment (assign, add_assign, pre_inc/dec,
@@ -767,16 +784,6 @@
    return NULL;
 }
 
-static void
-mark_whole_array_access(ir_rvalue *access)
-{
-   ir_dereference_variable *deref = access->as_dereference_variable();
-
-   if (deref) {
-      deref->var->max_array_access = deref->type->length - 1;
-   }
-}
-
 static ir_rvalue *
 do_comparison(void *mem_ctx, int operation, ir_rvalue *op0, ir_rvalue *op1)
 {
@@ -1763,11 +1770,6 @@
       ir_rvalue *const ir = array_size->hir(& dummy_instructions, state);
       YYLTYPE loc = array_size->get_location();
 
-      /* FINISHME: Verify that the grammar forbids side-effects in array
-       * FINISHME: sizes.   i.e., 'vec4 [x = 12] data'
-       */
-      assert(dummy_instructions.is_empty());
-
       if (ir != NULL) {
 	 if (!ir->type->is_integer()) {
 	    _mesa_glsl_error(& loc, state, "array size must be integer type");
@@ -1784,6 +1786,14 @@
 	    } else {
 	       assert(size->type == ir->type);
 	       length = size->value.u[0];
+
+               /* If the array size is const (and we've verified that
+                * it is) then no instructions should have been emitted
+                * when we converted it to HIR.  If they were emitted,
+                * then either the array size isn't const after all, or
+                * we are emitting unnecessary instructions.
+                */
+               assert(dummy_instructions.is_empty());
 	    }
 	 }
       }
@@ -2397,12 +2407,12 @@
 
    decl_type = this->type->specifier->glsl_type(& type_name, state);
    if (this->declarations.is_empty()) {
-      /* The only valid case where the declaration list can be empty is when
-       * the declaration is setting the default precision of a built-in type
-       * (e.g., 'precision highp vec4;').
-       */
-
       if (decl_type != NULL) {
+	 /* Warn if this empty declaration is not for declaring a structure.
+	  */
+	 if (this->type->specifier->structure == NULL) {
+	    _mesa_glsl_warning(&loc, state, "empty declaration");
+	 }
       } else {
 	    _mesa_glsl_error(& loc, state, "incomplete declaration");
       }
@@ -2924,23 +2934,16 @@
 
 
 void
-emit_function(_mesa_glsl_parse_state *state, exec_list *instructions,
-	      ir_function *f)
+emit_function(_mesa_glsl_parse_state *state, ir_function *f)
 {
-   /* Emit the new function header */
-   if (state->current_function == NULL) {
-      instructions->push_tail(f);
-   } else {
-      /* IR invariants disallow function declarations or definitions nested
-       * within other function definitions.  Insert the new ir_function
-       * block in the instruction sequence before the ir_function block
-       * containing the current ir_function_signature.
-       */
-      ir_function *const curr =
-	 const_cast<ir_function *>(state->current_function->function());
-
-      curr->insert_before(f);
-   }
+   /* IR invariants disallow function declarations or definitions
+    * nested within other function definitions.  But there is no
+    * requirement about the relative order of function declarations
+    * and definitions with respect to one another.  So simply insert
+    * the new ir_function block at the end of the toplevel instruction
+    * list.
+    */
+   state->toplevel_ir->push_tail(f);
 }
 
 
@@ -3067,7 +3070,7 @@
 	 return NULL;
       }
 
-      emit_function(state, instructions, f);
+      emit_function(state, f);
    }
 
    /* Verify the return type of main() */
diff --git a/src/glsl/builtins/ir/asin b/src/glsl/builtins/ir/asin
index e230ad6..45d9e67 100644
--- a/src/glsl/builtins/ir/asin
+++ b/src/glsl/builtins/ir/asin
@@ -5,23 +5,26 @@
      ((return (expression float *
 	       (expression float sign (var_ref x))
 	       (expression float -
-		(expression float *
-		 (constant float (3.1415926))
-		 (constant float (0.5)))
+		(constant float (1.5707964))
 		(expression float *
 		 (expression float sqrt
 		  (expression float -
 		   (constant float (1.0))
 		   (expression float abs (var_ref x))))
 		 (expression float +
-		  (constant float (1.5707288))
+		  (constant float (1.5707964))
 		  (expression float *
 		   (expression float abs (var_ref x))
 		   (expression float +
-		    (constant float (-0.2121144))
+		    (constant float (-0.21460183))
 		    (expression float *
-		     (constant float (0.0742610))
-		     (expression float abs (var_ref x))))))))))))
+		     (expression float abs (var_ref x))
+                     (expression float +
+                      (constant float (0.086566724))
+                      (expression float *
+                       (expression float abs (var_ref x))
+                       (constant float (-0.03102955))
+		     ))))))))))))
 
    (signature vec2
      (parameters
@@ -29,23 +32,26 @@
      ((return (expression vec2 *
 	       (expression vec2 sign (var_ref x))
 	       (expression vec2 -
-		(expression float *
-		 (constant float (3.1415926))
-		 (constant float (0.5)))
+		(constant float (1.5707964))
 		(expression vec2 *
 		 (expression vec2 sqrt
 		  (expression vec2 -
 		   (constant float (1.0))
 		   (expression vec2 abs (var_ref x))))
 		 (expression vec2 +
-		  (constant float (1.5707288))
+		  (constant float (1.5707964))
 		  (expression vec2 *
 		   (expression vec2 abs (var_ref x))
 		   (expression vec2 +
-		    (constant float (-0.2121144))
+		    (constant float (-0.21460183))
 		    (expression vec2 *
-		     (constant float (0.0742610))
-		     (expression vec2 abs (var_ref x))))))))))))
+		     (expression vec2 abs (var_ref x))
+                     (expression vec2 +
+                      (constant float (0.086566724))
+                      (expression vec2 *
+                       (expression vec2 abs (var_ref x))
+                       (constant float (-0.03102955))
+		     ))))))))))))
 
    (signature vec3
      (parameters
@@ -53,23 +59,26 @@
      ((return (expression vec3 *
 	       (expression vec3 sign (var_ref x))
 	       (expression vec3 -
-		(expression float *
-		 (constant float (3.1415926))
-		 (constant float (0.5)))
+		(constant float (1.5707964))
 		(expression vec3 *
 		 (expression vec3 sqrt
 		  (expression vec3 -
 		   (constant float (1.0))
 		   (expression vec3 abs (var_ref x))))
 		 (expression vec3 +
-		  (constant float (1.5707288))
+		  (constant float (1.5707964))
 		  (expression vec3 *
 		   (expression vec3 abs (var_ref x))
 		   (expression vec3 +
-		    (constant float (-0.2121144))
+		    (constant float (-0.21460183))
 		    (expression vec3 *
-		     (constant float (0.0742610))
-		     (expression vec3 abs (var_ref x))))))))))))
+		     (expression vec3 abs (var_ref x))
+                     (expression vec3 +
+                      (constant float (0.086566724))
+                      (expression vec3 *
+                       (expression vec3 abs (var_ref x))
+                       (constant float (-0.03102955))
+		     ))))))))))))
 
    (signature vec4
      (parameters
@@ -77,21 +86,24 @@
      ((return (expression vec4 *
 	       (expression vec4 sign (var_ref x))
 	       (expression vec4 -
-		(expression float *
-		 (constant float (3.1415926))
-		 (constant float (0.5)))
+		(constant float (1.5707964))
 		(expression vec4 *
 		 (expression vec4 sqrt
 		  (expression vec4 -
 		   (constant float (1.0))
 		   (expression vec4 abs (var_ref x))))
 		 (expression vec4 +
-		  (constant float (1.5707288))
+		  (constant float (1.5707964))
 		  (expression vec4 *
 		   (expression vec4 abs (var_ref x))
 		   (expression vec4 +
-		    (constant float (-0.2121144))
+		    (constant float (-0.21460183))
 		    (expression vec4 *
-		     (constant float (0.0742610))
-		     (expression vec4 abs (var_ref x))))))))))))
+		     (expression vec4 abs (var_ref x))
+                     (expression vec4 +
+                      (constant float (0.086566724))
+                      (expression vec4 *
+                       (expression vec4 abs (var_ref x))
+                       (constant float (-0.03102955))
+		     ))))))))))))
 ))
diff --git a/src/glsl/builtins/ir/atan b/src/glsl/builtins/ir/atan
index cfecc1f..7b5ea13 100644
--- a/src/glsl/builtins/ir/atan
+++ b/src/glsl/builtins/ir/atan
@@ -54,7 +54,9 @@
     )
     (
       (declare () float r)
-      (if (expression bool > (expression float abs (var_ref x)) (constant float (0.000100))) (
+      (if (expression bool >
+           (expression float abs (var_ref x))
+           (expression float * (constant float (1.0e-8)) (expression float abs (var_ref y)))) (
         (assign (x) (var_ref r) (call atan ((expression float / (var_ref y) (var_ref x)))))
         (if (expression bool < (var_ref x) (constant float (0.000000)) ) (
           (if (expression bool >= (var_ref y) (constant float (0.000000)) )
diff --git a/src/glsl/builtins/ir/radians b/src/glsl/builtins/ir/radians
index 6a0f5d2..a419101 100644
--- a/src/glsl/builtins/ir/radians
+++ b/src/glsl/builtins/ir/radians
@@ -2,20 +2,20 @@
    (signature float
      (parameters
        (declare (in) float arg0))
-     ((return (expression float * (var_ref arg0) (constant float (0.017453))))))
+     ((return (expression float * (var_ref arg0) (constant float (0.0174532925))))))
 
    (signature vec2
      (parameters
        (declare (in) vec2 arg0))
-     ((return (expression vec2 * (var_ref arg0) (constant float (0.017453))))))
+     ((return (expression vec2 * (var_ref arg0) (constant float (0.0174532925))))))
 
    (signature vec3
      (parameters
        (declare (in) vec3 arg0))
-     ((return (expression vec3 * (var_ref arg0) (constant float (0.017453))))))
+     ((return (expression vec3 * (var_ref arg0) (constant float (0.0174532925))))))
 
    (signature vec4
      (parameters
        (declare (in) vec4 arg0))
-     ((return (expression vec4 * (var_ref arg0) (constant float (0.017453))))))
+     ((return (expression vec4 * (var_ref arg0) (constant float (0.0174532925))))))
 ))
diff --git a/src/glsl/builtins/profiles/130.frag b/src/glsl/builtins/profiles/130.frag
index 0e3c7ac..c121859 100644
--- a/src/glsl/builtins/profiles/130.frag
+++ b/src/glsl/builtins/profiles/130.frag
@@ -465,7 +465,6 @@
  * 8.7 - Texture Lookup Functions
  */
 
-#if 0
 /* textureSize */
 int   textureSize( sampler1D sampler, int lod);
 int   textureSize(isampler1D sampler, int lod);
@@ -496,7 +495,6 @@
 
 ivec2 textureSize(sampler1DArrayShadow sampler, int lod);
 ivec3 textureSize(sampler2DArrayShadow sampler, int lod);
-#endif
 
 /* texture - no bias */
  vec4 texture( sampler1D sampler, float P);
diff --git a/src/glsl/builtins/profiles/130.vert b/src/glsl/builtins/profiles/130.vert
index f85b27f..ebd9a50 100644
--- a/src/glsl/builtins/profiles/130.vert
+++ b/src/glsl/builtins/profiles/130.vert
@@ -467,7 +467,6 @@
  * 8.7 - Texture Lookup Functions
  */
 
-#if 0
 /* textureSize */
 int   textureSize( sampler1D sampler, int lod);
 int   textureSize(isampler1D sampler, int lod);
@@ -498,7 +497,6 @@
 
 ivec2 textureSize(sampler1DArrayShadow sampler, int lod);
 ivec3 textureSize(sampler2DArrayShadow sampler, int lod);
-#endif
 
 /* texture - no bias */
  vec4 texture( sampler1D sampler, float P);
diff --git a/src/glsl/builtins/tools/texture_builtins.py b/src/glsl/builtins/tools/texture_builtins.py
index a4054ca..7e569bf 100755
--- a/src/glsl/builtins/tools/texture_builtins.py
+++ b/src/glsl/builtins/tools/texture_builtins.py
@@ -44,6 +44,11 @@
         extra_dim += 1
     return extra_dim
 
+def get_txs_dim(sampler_type):
+    if sampler_type.startswith("Cube"):
+        return 2
+    return get_coord_dim(sampler_type)
+
 def generate_sigs(g, tex_inst, sampler_type, variant = 0, unused_fields = 0):
     coord_dim = get_coord_dim(sampler_type)
     extra_dim = get_extra_dim(sampler_type, variant & Proj, unused_fields)
@@ -51,17 +56,20 @@
 
     if variant & Single:
         return_type = "float"
+    elif tex_inst == "txs":
+        return_type = vec_type("i", get_txs_dim(sampler_type))
     else:
         return_type = g + "vec4"
 
     # Print parameters
     print "   (signature", return_type
     print "     (parameters"
-    print "       (declare (in) " + g + "sampler" + sampler_type + " sampler)"
-    print "       (declare (in) " + vec_type("i" if tex_inst == "txf" else "", coord_dim + extra_dim) + " P)",
+    print "       (declare (in) " + g + "sampler" + sampler_type + " sampler)",
+    if tex_inst != "txs":
+        print "\n       (declare (in) " + vec_type("i" if tex_inst == "txf" else "", coord_dim + extra_dim) + " P)",
     if tex_inst == "txl":
         print "\n       (declare (in) float lod)",
-    elif tex_inst == "txf":
+    elif tex_inst == "txf" or tex_inst == "txs":
         print "\n       (declare (in) int lod)",
     elif tex_inst == "txd":
         grad_type = vec_type("", coord_dim)
@@ -75,18 +83,19 @@
 
     print ")\n     ((return (" + tex_inst, return_type, "(var_ref sampler)",
 
-    # Coordinate
-    if extra_dim > 0:
-        print "(swiz " + "xyzw"[:coord_dim] + " (var_ref P))",
-    else:
-        print "(var_ref P)",
+    if tex_inst != "txs":
+        # Coordinate
+        if extra_dim > 0:
+            print "(swiz " + "xyzw"[:coord_dim] + " (var_ref P))",
+        else:
+            print "(var_ref P)",
 
-    if variant & Offset:
-        print "(var_ref offset)",
-    else:
-        print "0",
+        if variant & Offset:
+            print "(var_ref offset)",
+        else:
+            print "0",
 
-    if tex_inst != "txf":
+    if tex_inst != "txf" and tex_inst != "txs":
         # Projective divisor
         if variant & Proj:
             print "(swiz " + "xyzw"[coord_dim + extra_dim-1] + " (var_ref P))",
@@ -104,7 +113,7 @@
     # Bias/explicit LOD/gradient:
     if tex_inst == "txb":
         print "(var_ref bias)",
-    elif tex_inst == "txl" or tex_inst == "txf":
+    elif tex_inst == "txl" or tex_inst == "txf" or tex_inst == "txs":
         print "(var_ref lod)",
     elif tex_inst == "txd":
         print "((var_ref dPdx) (var_ref dPdy))",
@@ -130,6 +139,19 @@
 #
 # Takes a dictionary as an argument.
 def generate_texture_functions(fs):
+    start_function("textureSize")
+    generate_fiu_sigs("txs", "1D")
+    generate_fiu_sigs("txs", "2D")
+    generate_fiu_sigs("txs", "3D")
+    generate_fiu_sigs("txs", "Cube")
+    generate_fiu_sigs("txs", "1DArray")
+    generate_fiu_sigs("txs", "2DArray")
+    generate_sigs("", "txs", "1DShadow")
+    generate_sigs("", "txs", "2DShadow")
+    generate_sigs("", "txs", "1DArrayShadow")
+    generate_sigs("", "txs", "2DArrayShadow")
+    end_function(fs, "textureSize")
+
     start_function("texture")
     generate_fiu_sigs("tex", "1D")
     generate_fiu_sigs("tex", "2D")
diff --git a/src/glsl/glcpp/glcpp-parse.y b/src/glsl/glcpp/glcpp-parse.y
index 0a35e88..9408304 100644
--- a/src/glsl/glcpp/glcpp-parse.y
+++ b/src/glsl/glcpp/glcpp-parse.y
@@ -1132,8 +1132,10 @@
 	   if (extensions->ARB_shader_texture_lod)
 	      add_builtin_define(parser, "GL_ARB_shader_texture_lod", 1);
 
-	   if (extensions->AMD_conservative_depth)
+	   if (extensions->AMD_conservative_depth) {
 	      add_builtin_define(parser, "GL_AMD_conservative_depth", 1);
+	      add_builtin_define(parser, "GL_ARB_conservative_depth", 1);
+	   }
 	}
 
 	language_version = 110;
diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy
index 2c0498e..25d02fb 100644
--- a/src/glsl/glsl_parser.yy
+++ b/src/glsl/glsl_parser.yy
@@ -971,13 +971,9 @@
 	fully_specified_type
 	{
 	   void *ctx = state;
-	   if ($1->specifier->type_specifier != ast_struct) {
-	      _mesa_glsl_error(& @1, state, "empty declaration list\n");
-	      YYERROR;
-	   } else {
-	      $$ = new(ctx) ast_declarator_list($1);
-	      $$->set_location(yylloc);
-	   }
+	   /* Empty declaration list is valid. */
+	   $$ = new(ctx) ast_declarator_list($1);
+	   $$->set_location(yylloc);
 	}
 	| fully_specified_type any_identifier
 	{
@@ -1115,7 +1111,7 @@
 	      }
 	   }
 
-	   /* Layout qualifiers for AMD_conservative_depth. */
+	   /* Layout qualifiers for AMD/ARB_conservative_depth. */
 	   if (!got_one && state->AMD_conservative_depth_enable) {
 	      if (strcmp($1, "depth_any") == 0) {
 	         got_one = true;
@@ -1133,7 +1129,7 @@
 	
 	      if (got_one && state->AMD_conservative_depth_warn) {
 	         _mesa_glsl_warning(& @1, state,
-	                            "GL_AMD_conservative_depth "
+	                            "GL_ARB_conservative_depth "
 	                            "layout qualifier `%s' is used\n", $1);
 	      }
 	   }
diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp
index cc78137..8f740e6 100644
--- a/src/glsl/glsl_parser_extras.cpp
+++ b/src/glsl/glsl_parser_extras.cpp
@@ -253,6 +253,7 @@
 static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = {
    /*                                  target availability  API availability */
    /* name                             VS     GS     FS     GL     ES         supported flag */
+   EXT(ARB_conservative_depth,         true,  false, true,  true,  false,     AMD_conservative_depth),
    EXT(ARB_draw_buffers,               false, false, true,  true,  false,     dummy_true),
    EXT(ARB_draw_instanced,             true,  false, false, true,  false,     ARB_draw_instanced),
    EXT(ARB_explicit_attrib_location,   true,  false, true,  true,  false,     ARB_explicit_attrib_location),
diff --git a/src/glsl/glsl_parser_extras.h b/src/glsl/glsl_parser_extras.h
index 2f4d3cb..dc6911d 100644
--- a/src/glsl/glsl_parser_extras.h
+++ b/src/glsl/glsl_parser_extras.h
@@ -129,6 +129,12 @@
     */
    class ir_function_signature *current_function;
 
+   /**
+    * During AST to IR conversion, pointer to the toplevel IR
+    * instruction list being generated.
+    */
+   exec_list *toplevel_ir;
+
    /** Have we found a return statement in this function? */
    bool found_return;
 
@@ -174,6 +180,8 @@
    bool ARB_shader_stencil_export_warn;
    bool AMD_conservative_depth_enable;
    bool AMD_conservative_depth_warn;
+   bool ARB_conservative_depth_enable;
+   bool ARB_conservative_depth_warn;
    bool AMD_shader_stencil_export_enable;
    bool AMD_shader_stencil_export_warn;
    bool OES_texture_3D_enable;
diff --git a/src/glsl/glsl_types.cpp b/src/glsl/glsl_types.cpp
index a5e21bb..c94aec0 100644
--- a/src/glsl/glsl_types.cpp
+++ b/src/glsl/glsl_types.cpp
@@ -523,3 +523,19 @@
       return 0;
    }
 }
+
+bool
+glsl_type::can_implicitly_convert_to(const glsl_type *desired) const
+{
+   if (this == desired)
+      return true;
+
+   /* There is no conversion among matrix types. */
+   if (this->matrix_columns > 1 || desired->matrix_columns > 1)
+      return false;
+
+   /* int and uint can be converted to float. */
+   return desired->is_float()
+          && this->is_integer()
+          && this->vector_elements == desired->vector_elements;
+}
diff --git a/src/glsl/glsl_types.h b/src/glsl/glsl_types.h
index 87f57e7..0486966 100644
--- a/src/glsl/glsl_types.h
+++ b/src/glsl/glsl_types.h
@@ -224,6 +224,41 @@
     */
    unsigned component_slots() const;
 
+   /**
+    * \brief Can this type be implicitly converted to another?
+    *
+    * \return True if the types are identical or if this type can be converted
+    *         to \c desired according to Section 4.1.10 of the GLSL spec.
+    *
+    * \verbatim
+    * From page 25 (31 of the pdf) of the GLSL 1.50 spec, Section 4.1.10
+    * Implicit Conversions:
+    *
+    *     In some situations, an expression and its type will be implicitly
+    *     converted to a different type. The following table shows all allowed
+    *     implicit conversions:
+    *
+    *     Type of expression | Can be implicitly converted to
+    *     --------------------------------------------------
+    *     int                  float
+    *     uint
+    *
+    *     ivec2                vec2
+    *     uvec2
+    *
+    *     ivec3                vec3
+    *     uvec3
+    *
+    *     ivec4                vec4
+    *     uvec4
+    *
+    *     There are no implicit array or structure conversions. For example,
+    *     an array of int cannot be implicitly converted to an array of float.
+    *     There are no implicit conversions between signed and unsigned
+    *     integers.
+    * \endverbatim
+    */
+   bool can_implicitly_convert_to(const glsl_type *desired) const;
 
    /**
     * Query whether or not a type is a scalar (non-vector and non-matrix).
diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp
index 827fe8e..41ed4f1 100644
--- a/src/glsl/ir.cpp
+++ b/src/glsl/ir.cpp
@@ -1096,7 +1096,7 @@
 }
 
 bool
-ir_dereference::is_lvalue()
+ir_dereference::is_lvalue() const
 {
    ir_variable *var = this->variable_referenced();
 
@@ -1121,7 +1121,7 @@
 }
 
 
-const char *tex_opcode_strs[] = { "tex", "txb", "txl", "txd", "txf" };
+const char *tex_opcode_strs[] = { "tex", "txb", "txl", "txd", "txf", "txs" };
 
 const char *ir_texture::opcode_string()
 {
@@ -1150,11 +1150,15 @@
    this->sampler = sampler;
    this->type = type;
 
-   assert(sampler->type->sampler_type == (int) type->base_type);
-   if (sampler->type->sampler_shadow)
-      assert(type->vector_elements == 4 || type->vector_elements == 1);
-   else
-      assert(type->vector_elements == 4);
+   if (this->op == ir_txs) {
+      assert(type->base_type == GLSL_TYPE_INT);
+   } else {
+      assert(sampler->type->sampler_type == (int) type->base_type);
+      if (sampler->type->sampler_shadow)
+	 assert(type->vector_elements == 4 || type->vector_elements == 1);
+      else
+	 assert(type->vector_elements == 4);
+   }
 }
 
 
@@ -1310,7 +1314,7 @@
 #undef I
 
 ir_variable *
-ir_swizzle::variable_referenced()
+ir_swizzle::variable_referenced() const
 {
    return this->val->variable_referenced();
 }
diff --git a/src/glsl/ir.h b/src/glsl/ir.h
index 80ad3dd..2e899f3 100644
--- a/src/glsl/ir.h
+++ b/src/glsl/ir.h
@@ -144,7 +144,7 @@
 
    ir_rvalue *as_rvalue_to_saturate();
 
-   virtual bool is_lvalue()
+   virtual bool is_lvalue() const
    {
       return false;
    }
@@ -152,7 +152,7 @@
    /**
     * Get the variable that is ultimately referenced by an r-value
     */
-   virtual ir_variable *variable_referenced()
+   virtual ir_variable *variable_referenced() const
    {
       return NULL;
    }
@@ -236,7 +236,7 @@
 /**
  * \brief Layout qualifiers for gl_FragDepth.
  *
- * The AMD_conservative_depth extension allows gl_FragDepth to be redeclared
+ * The AMD/ARB_conservative_depth extensions allow gl_FragDepth to be redeclared
  * with a layout qualifier.
  */
 enum ir_depth_layout {
@@ -1212,7 +1212,8 @@
    ir_txb,		/**< Texture look-up with LOD bias */
    ir_txl,		/**< Texture look-up with explicit LOD */
    ir_txd,		/**< Texture look-up with partial derivatvies */
-   ir_txf		/**< Texel fetch with explicit LOD */
+   ir_txf,		/**< Texel fetch with explicit LOD */
+   ir_txs		/**< Texture size */
 };
 
 
@@ -1233,6 +1234,7 @@
  * (txl <type> <sampler> <coordinate> 0 1 ( ) <lod>)
  * (txd <type> <sampler> <coordinate> 0 1 ( ) (dPdx dPdy))
  * (txf <type> <sampler> <coordinate> 0       <lod>)
+ * (txs <type> <sampler> <lod>)
  */
 class ir_texture : public ir_rvalue {
 public:
@@ -1355,7 +1357,7 @@
 
    virtual ir_visitor_status accept(ir_hierarchical_visitor *);
 
-   bool is_lvalue()
+   bool is_lvalue() const
    {
       return val->is_lvalue() && !mask.has_duplicates;
    }
@@ -1363,7 +1365,7 @@
    /**
     * Get the variable that is ultimately referenced by an r-value
     */
-   virtual ir_variable *variable_referenced();
+   virtual ir_variable *variable_referenced() const;
 
    ir_rvalue *val;
    ir_swizzle_mask mask;
@@ -1387,12 +1389,12 @@
       return this;
    }
 
-   bool is_lvalue();
+   bool is_lvalue() const;
 
    /**
     * Get the variable that is ultimately referenced by an r-value
     */
-   virtual ir_variable *variable_referenced() = 0;
+   virtual ir_variable *variable_referenced() const = 0;
 };
 
 
@@ -1413,7 +1415,7 @@
    /**
     * Get the variable that is ultimately referenced by an r-value
     */
-   virtual ir_variable *variable_referenced()
+   virtual ir_variable *variable_referenced() const
    {
       return this->var;
    }
@@ -1462,7 +1464,7 @@
    /**
     * Get the variable that is ultimately referenced by an r-value
     */
-   virtual ir_variable *variable_referenced()
+   virtual ir_variable *variable_referenced() const
    {
       return this->array->variable_referenced();
    }
@@ -1496,7 +1498,7 @@
    /**
     * Get the variable that is ultimately referenced by an r-value
     */
-   virtual ir_variable *variable_referenced()
+   virtual ir_variable *variable_referenced() const
    {
       return this->record->variable_referenced();
    }
@@ -1635,6 +1637,32 @@
  */
 void validate_ir_tree(exec_list *instructions);
 
+struct _mesa_glsl_parse_state;
+struct gl_shader_program;
+
+/**
+ * Detect whether an unlinked shader contains static recursion
+ *
+ * If the list of instructions is determined to contain static recursion,
+ * \c _mesa_glsl_error will be called to emit error messages for each function
+ * that is in the recursion cycle.
+ */
+void
+detect_recursion_unlinked(struct _mesa_glsl_parse_state *state,
+			  exec_list *instructions);
+
+/**
+ * Detect whether a linked shader contains static recursion
+ *
+ * If the list of instructions is determined to contain static recursion,
+ * \c link_error_printf will be called to emit error messages for each function
+ * that is in the recursion cycle.  In addition,
+ * \c gl_shader_program::LinkStatus will be set to false.
+ */
+void
+detect_recursion_linked(struct gl_shader_program *prog,
+			exec_list *instructions);
+
 /**
  * Make a clone of each IR instruction in a list
  *
@@ -1669,4 +1697,8 @@
 extern void
 do_set_program_inouts(exec_list *instructions, struct gl_program *prog);
 
+extern char *
+prototype_string(const glsl_type *return_type, const char *name,
+		 exec_list *parameters);
+
 #endif /* IR_H */
diff --git a/src/glsl/ir_clone.cpp b/src/glsl/ir_clone.cpp
index 069bb85..f075736 100644
--- a/src/glsl/ir_clone.cpp
+++ b/src/glsl/ir_clone.cpp
@@ -222,7 +222,8 @@
    new_tex->type = this->type;
 
    new_tex->sampler = this->sampler->clone(mem_ctx, ht);
-   new_tex->coordinate = this->coordinate->clone(mem_ctx, ht);
+   if (this->coordinate)
+      new_tex->coordinate = this->coordinate->clone(mem_ctx, ht);
    if (this->projector)
       new_tex->projector = this->projector->clone(mem_ctx, ht);
    if (this->shadow_comparitor) {
@@ -240,6 +241,7 @@
       break;
    case ir_txl:
    case ir_txf:
+   case ir_txs:
       new_tex->lod_info.lod = this->lod_info.lod->clone(mem_ctx, ht);
       break;
    case ir_txd:
diff --git a/src/glsl/ir_function.cpp b/src/glsl/ir_function.cpp
index 0f2f1a0..51d32b4 100644
--- a/src/glsl/ir_function.cpp
+++ b/src/glsl/ir_function.cpp
@@ -24,73 +24,28 @@
 #include "glsl_types.h"
 #include "ir.h"
 
-int
-type_compare(const glsl_type *a, const glsl_type *b)
-{
-   /* If the types are the same, they trivially match.
-    */
-   if (a == b)
-      return 0;
+typedef enum {
+   PARAMETER_LIST_NO_MATCH,
+   PARAMETER_LIST_EXACT_MATCH,
+   PARAMETER_LIST_INEXACT_MATCH /*< Match requires implicit conversion. */
+} parameter_list_match_t;
 
-   switch (a->base_type) {
-   case GLSL_TYPE_UINT:
-   case GLSL_TYPE_INT:
-   case GLSL_TYPE_BOOL:
-      /* There is no implicit conversion to or from integer types or bool.
-       */
-      if ((a->is_integer() != b->is_integer())
-	  || (a->is_boolean() != b->is_boolean()))
-	 return -1;
-
-      /* FALLTHROUGH */
-
-   case GLSL_TYPE_FLOAT:
-      if ((a->vector_elements != b->vector_elements)
-	  || (a->matrix_columns != b->matrix_columns))
-	 return -1;
-
-      return 1;
-
-   case GLSL_TYPE_SAMPLER:
-   case GLSL_TYPE_STRUCT:
-      /* Samplers and structures must match exactly.
-       */
-      return -1;
-
-   case GLSL_TYPE_ARRAY:
-      if ((b->base_type != GLSL_TYPE_ARRAY)
-	  || (a->length != b->length))
-	 return -1;
-
-      /* From GLSL 1.50 spec, page 27 (page 33 of the PDF):
-       *    "There are no implicit array or structure conversions."
-       *
-       * If the comparison of the array element types detects that a conversion
-       * would be required, the array types do not match.
-       */
-      return (type_compare(a->fields.array, b->fields.array) == 0) ? 0 : -1;
-
-   case GLSL_TYPE_VOID:
-   case GLSL_TYPE_ERROR:
-   default:
-      /* These are all error conditions.  It is invalid for a parameter to
-       * a function to be declared as error, void, or a function.
-       */
-      return -1;
-   }
-
-   /* This point should be unreachable.
-    */
-   assert(0);
-}
-
-
-static int
+/**
+ * \brief Check if two parameter lists match.
+ *
+ * \param list_a Parameters of the function definition.
+ * \param list_b Actual parameters passed to the function.
+ * \see matching_signature()
+ */
+static parameter_list_match_t
 parameter_lists_match(const exec_list *list_a, const exec_list *list_b)
 {
    const exec_node *node_a = list_a->head;
    const exec_node *node_b = list_b->head;
-   int total_score = 0;
+
+   /* This is set to true if there is an inexact match requiring an implicit
+    * conversion. */
+   bool inexact_match = false;
 
    for (/* empty */
 	; !node_a->is_tail_sentinel()
@@ -100,18 +55,17 @@
        * do not match.
        */
       if (node_b->is_tail_sentinel())
-	 return -1;
+	 return PARAMETER_LIST_NO_MATCH;
 
 
       const ir_variable *const param = (ir_variable *) node_a;
       const ir_instruction *const actual = (ir_instruction *) node_b;
 
-      /* Determine whether or not the types match.  If the types are an
-       * exact match, the match score is zero.  If the types don't match
-       * but the actual parameter can be coerced to the type of the declared
-       * parameter, the match score is one.
-       */
-      int score;
+      if (param->type == actual->type)
+	 continue;
+
+      /* Try to find an implicit conversion from actual to param. */
+      inexact_match = true;
       switch ((enum ir_variable_mode)(param->mode)) {
       case ir_var_auto:
       case ir_var_uniform:
@@ -121,15 +75,17 @@
 	  * as uniform.
 	  */
 	 assert(0);
-	 return -1;
+	 return PARAMETER_LIST_NO_MATCH;
 
       case ir_var_const_in:
       case ir_var_in:
-	 score = type_compare(param->type, actual->type);
+	 if (!actual->type->can_implicitly_convert_to(param->type))
+	    return PARAMETER_LIST_NO_MATCH;
 	 break;
 
       case ir_var_out:
-	 score = type_compare(actual->type, param->type);
+	 if (!param->type->can_implicitly_convert_to(actual->type))
+	    return PARAMETER_LIST_NO_MATCH;
 	 break;
 
       case ir_var_inout:
@@ -137,17 +93,12 @@
 	  * there is int -> float but no float -> int), inout parameters must
 	  * be exact matches.
 	  */
-	 score = (type_compare(actual->type, param->type) == 0) ? 0 : -1;
-	 break;
+	 return PARAMETER_LIST_NO_MATCH;
 
       default:
 	 assert(false);
+	 return PARAMETER_LIST_NO_MATCH;
       }
-
-      if (score < 0)
-	 return -1;
-
-      total_score += score;
    }
 
    /* If all of the parameters from the other parameter list have been
@@ -155,9 +106,12 @@
     * match.
     */
    if (!node_b->is_tail_sentinel())
-      return -1;
+      return PARAMETER_LIST_NO_MATCH;
 
-   return total_score;
+   if (inexact_match)
+      return PARAMETER_LIST_INEXACT_MATCH;
+   else
+      return PARAMETER_LIST_EXACT_MATCH;
 }
 
 
@@ -181,18 +135,20 @@
       ir_function_signature *const sig =
 	 (ir_function_signature *) iter.get();
 
-      const int score = parameter_lists_match(& sig->parameters,
-					      actual_parameters);
-
-      /* If we found an exact match, simply return it */
-      if (score == 0)
+      switch (parameter_lists_match(& sig->parameters, actual_parameters)) {
+      case PARAMETER_LIST_EXACT_MATCH:
 	 return sig;
-
-      if (score > 0) {
+      case PARAMETER_LIST_INEXACT_MATCH:
 	 if (match == NULL)
 	    match = sig;
 	 else
 	    multiple_inexact_matches = true;
+	 continue;
+      case PARAMETER_LIST_NO_MATCH:
+	 continue;
+      default:
+	 assert(false);
+	 return NULL;
       }
    }
 
diff --git a/src/glsl/ir_function_detect_recursion.cpp b/src/glsl/ir_function_detect_recursion.cpp
new file mode 100644
index 0000000..8f805bf
--- /dev/null
+++ b/src/glsl/ir_function_detect_recursion.cpp
@@ -0,0 +1,370 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file ir_function_detect_recursion.cpp
+ * Determine whether a shader contains static recursion.
+ *
+ * Consider the (possibly disjoint) graph of function calls in a shader.  If a
+ * program contains recursion, this graph will contain a cycle.  If a function
+ * is part of a cycle, it will have a caller and it will have a callee (it
+ * calls another function).
+ *
+ * To detect recursion, the function call graph is constructed.  The graph is
+ * repeatedly reduced by removing any function that either has no callees
+ * (leaf functions) or has no caller.  Eventually the only functions that
+ * remain will be the functions in the cycles.
+ *
+ * The GLSL spec is a bit wishy-washy about recursion.
+ *
+ * From page 39 (page 45 of the PDF) of the GLSL 1.10 spec:
+ *
+ *     "Behavior is undefined if recursion is used. Recursion means having any
+ *     function appearing more than once at any one time in the run-time stack
+ *     of function calls. That is, a function may not call itself either
+ *     directly or indirectly. Compilers may give diagnostic messages when
+ *     this is detectable at compile time, but not all such cases can be
+ *     detected at compile time."
+ *
+ * From page 79 (page 85 of the PDF):
+ *
+ *     "22) Should recursion be supported?
+ *
+ *      DISCUSSION: Probably not necessary, but another example of limiting
+ *      the language based on how it would directly map to hardware. One
+ *      thought is that recursion would benefit ray tracing shaders. On the
+ *      other hand, many recursion operations can also be implemented with the
+ *      user managing the recursion through arrays. RenderMan doesn't support
+ *      recursion. This could be added at a later date, if it proved to be
+ *      necessary.
+ *
+ *      RESOLVED on September 10, 2002: Implementations are not required to
+ *      support recursion.
+ *
+ *      CLOSED on September 10, 2002."
+ *
+ * From page 79 (page 85 of the PDF):
+ *
+ *     "56) Is it an error for an implementation to support recursion if the
+ *     specification says recursion is not supported?
+ *
+ *     ADDED on September 10, 2002.
+ *
+ *     DISCUSSION: This issues is related to Issue (22). If we say that
+ *     recursion (or some other piece of functionality) is not supported, is
+ *     it an error for an implementation to support it? Perhaps the
+ *     specification should remain silent on these kind of things so that they
+ *     could be gracefully added later as an extension or as part of the
+ *     standard.
+ *
+ *     RESOLUTION: Languages, in general, have programs that are not
+ *     well-formed in ways a compiler cannot detect. Portability is only
+ *     ensured for well-formed programs. Detecting recursion is an example of
+ *     this. The language will say a well-formed program may not recurse, but
+ *     compilers are not forced to detect that recursion may happen.
+ *
+ *     CLOSED: November 29, 2002."
+ *
+ * In GLSL 1.10 the behavior of recursion is undefined.  Compilers don't have
+ * to reject shaders (at compile-time or link-time) that contain recursion.
+ * Instead they could work, or crash, or kill a kitten.
+ *
+ * From page 44 (page 50 of the PDF) of the GLSL 1.20 spec:
+ *
+ *     "Recursion is not allowed, not even statically. Static recursion is
+ *     present if the static function call graph of the program contains
+ *     cycles."
+ *
+ * This langauge clears things up a bit, but it still leaves a lot of
+ * questions unanswered.
+ *
+ *     - Is the error generated at compile-time or link-time?
+ *
+ *     - Is it an error to have a recursive function that is never statically
+ *       called by main or any function called directly or indirectly by main?
+ *       Technically speaking, such a function is not in the "static function
+ *       call graph of the program" at all.
+ *
+ * \bug
+ * If a shader has multiple cycles, this algorithm may erroneously complain
+ * about functions that aren't in any cycle, but are in the part of the call
+ * tree that connects them.  For example, if the call graph consists of a
+ * cycle between A and B, and a cycle between D and E, and B also calls C
+ * which calls D, then this algorithm will report C as a function which "has
+ * static recursion" even though it is not part of any cycle.
+ *
+ * A better algorithm for cycle detection that doesn't have this drawback can
+ * be found here:
+ *
+ * http://en.wikipedia.org/wiki/Tarjan%E2%80%99s_strongly_connected_components_algorithm
+ *
+ * \author Ian Romanick <ian.d.romanick@intel.com>
+ */
+#include "main/core.h"
+#include "ir.h"
+#include "glsl_parser_extras.h"
+#include "linker.h"
+#include "program/hash_table.h"
+#include "program.h"
+
+struct call_node : public exec_node {
+   class function *func;
+};
+
+class function {
+public:
+   function(ir_function_signature *sig)
+      : sig(sig)
+   {
+      /* empty */
+   }
+
+
+   /* Callers of this ralloc-based new need not call delete. It's
+    * easier to just ralloc_free 'ctx' (or any of its ancestors). */
+   static void* operator new(size_t size, void *ctx)
+   {
+      void *node;
+
+      node = ralloc_size(ctx, size);
+      assert(node != NULL);
+
+      return node;
+   }
+
+   /* If the user *does* call delete, that's OK, we will just
+    * ralloc_free in that case. */
+   static void operator delete(void *node)
+   {
+      ralloc_free(node);
+   }
+
+   ir_function_signature *sig;
+
+   /** List of functions called by this function. */
+   exec_list callees;
+
+   /** List of functions that call this function. */
+   exec_list callers;
+};
+
+class has_recursion_visitor : public ir_hierarchical_visitor {
+public:
+   has_recursion_visitor()
+      : current(NULL)
+   {
+      this->mem_ctx = ralloc_context(NULL);
+      this->function_hash = hash_table_ctor(0, hash_table_pointer_hash,
+					    hash_table_pointer_compare);
+   }
+
+   ~has_recursion_visitor()
+   {
+      hash_table_dtor(this->function_hash);
+      ralloc_free(this->mem_ctx);
+   }
+
+   function *get_function(ir_function_signature *sig)
+   {
+      function *f = (function *) hash_table_find(this->function_hash, sig);
+      if (f == NULL) {
+	 f = new(mem_ctx) function(sig);
+	 hash_table_insert(this->function_hash, f, sig);
+      }
+
+      return f;
+   }
+
+   virtual ir_visitor_status visit_enter(ir_function_signature *sig)
+   {
+      this->current = this->get_function(sig);
+      return visit_continue;
+   }
+
+   virtual ir_visitor_status visit_leave(ir_function_signature *sig)
+   {
+      (void) sig;
+      this->current = NULL;
+      return visit_continue;
+   }
+
+   virtual ir_visitor_status visit_enter(ir_call *call)
+   {
+      /* At global scope this->current will be NULL.  Since there is no way to
+       * call global scope, it can never be part of a cycle.  Don't bother
+       * adding calls from global scope to the graph.
+       */
+      if (this->current == NULL)
+	 return visit_continue;
+
+      function *const target = this->get_function(call->get_callee());
+
+      /* Create a link from the caller to the callee.
+       */
+      call_node *node = new(mem_ctx) call_node;
+      node->func = target;
+      this->current->callees.push_tail(node);
+
+      /* Create a link from the callee to the caller.
+       */
+      node = new(mem_ctx) call_node;
+      node->func = this->current;
+      target->callers.push_tail(node);
+      return visit_continue;
+   }
+
+   function *current;
+   struct hash_table *function_hash;
+   void *mem_ctx;
+   bool progress;
+};
+
+static void
+destroy_links(exec_list *list, function *f)
+{
+   foreach_list_safe(node, list) {
+      struct call_node *n = (struct call_node *) node;
+
+      /* If this is the right function, remove it.  Note that the loop cannot
+       * terminate now.  There can be multiple links to a function if it is
+       * either called multiple times or calls multiple times.
+       */
+      if (n->func == f)
+	 n->remove();
+   }
+}
+
+
+/**
+ * Remove a function if it has either no in or no out links
+ */
+static void
+remove_unlinked_functions(const void *key, void *data, void *closure)
+{
+   has_recursion_visitor *visitor = (has_recursion_visitor *) closure;
+   function *f = (function *) data;
+
+   if (f->callers.is_empty() || f->callees.is_empty()) {
+      while (!f->callers.is_empty()) {
+	 struct call_node *n = (struct call_node *) f->callers.pop_head();
+	 destroy_links(& n->func->callees, f);
+      }
+
+      while (!f->callees.is_empty()) {
+	 struct call_node *n = (struct call_node *) f->callees.pop_head();
+	 destroy_links(& n->func->callers, f);
+      }
+
+      hash_table_remove(visitor->function_hash, key);
+      visitor->progress = true;
+   }
+}
+
+
+static void
+emit_errors_unlinked(const void *key, void *data, void *closure)
+{
+   struct _mesa_glsl_parse_state *state =
+      (struct _mesa_glsl_parse_state *) closure;
+   function *f = (function *) data;
+   YYLTYPE loc;
+
+   char *proto = prototype_string(f->sig->return_type,
+				  f->sig->function_name(),
+				  &f->sig->parameters);
+
+   memset(&loc, 0, sizeof(loc));
+   _mesa_glsl_error(&loc, state,
+		    "function `%s' has static recursion.",
+		    proto);
+   ralloc_free(proto);
+}
+
+
+static void
+emit_errors_linked(const void *key, void *data, void *closure)
+{
+   struct gl_shader_program *prog =
+      (struct gl_shader_program *) closure;
+   function *f = (function *) data;
+
+   char *proto = prototype_string(f->sig->return_type,
+				  f->sig->function_name(),
+				  &f->sig->parameters);
+
+   linker_error(prog, "function `%s' has static recursion.\n", proto);
+   ralloc_free(proto);
+   prog->LinkStatus = false;
+}
+
+
+void
+detect_recursion_unlinked(struct _mesa_glsl_parse_state *state,
+			  exec_list *instructions)
+{
+   has_recursion_visitor v;
+
+   /* Collect all of the information about which functions call which other
+    * functions.
+    */
+   v.run(instructions);
+
+   /* Remove from the set all of the functions that either have no caller or
+    * call no other functions.  Repeat until no functions are removed.
+    */
+   do {
+      v.progress = false;
+      hash_table_call_foreach(v.function_hash, remove_unlinked_functions, & v);
+   } while (v.progress);
+
+
+   /* At this point any functions still in the hash must be part of a cycle.
+    */
+   hash_table_call_foreach(v.function_hash, emit_errors_unlinked, state);
+}
+
+
+void
+detect_recursion_linked(struct gl_shader_program *prog,
+			exec_list *instructions)
+{
+   has_recursion_visitor v;
+
+   /* Collect all of the information about which functions call which other
+    * functions.
+    */
+   v.run(instructions);
+
+   /* Remove from the set all of the functions that either have no caller or
+    * call no other functions.  Repeat until no functions are removed.
+    */
+   do {
+      v.progress = false;
+      hash_table_call_foreach(v.function_hash, remove_unlinked_functions, & v);
+   } while (v.progress);
+
+
+   /* At this point any functions still in the hash must be part of a cycle.
+    */
+   hash_table_call_foreach(v.function_hash, emit_errors_linked, prog);
+}
diff --git a/src/glsl/ir_hv_accept.cpp b/src/glsl/ir_hv_accept.cpp
index 4a607dc..d33fc85 100644
--- a/src/glsl/ir_hv_accept.cpp
+++ b/src/glsl/ir_hv_accept.cpp
@@ -171,9 +171,11 @@
    if (s != visit_continue)
       return (s == visit_continue_with_parent) ? visit_continue : s;
 
-   s = this->coordinate->accept(v);
-   if (s != visit_continue)
-      return (s == visit_continue_with_parent) ? visit_continue : s;
+   if (this->coordinate) {
+      s = this->coordinate->accept(v);
+      if (s != visit_continue)
+	 return (s == visit_continue_with_parent) ? visit_continue : s;
+   }
 
    if (this->projector) {
       s = this->projector->accept(v);
@@ -203,6 +205,7 @@
       break;
    case ir_txl:
    case ir_txf:
+   case ir_txs:
       s = this->lod_info.lod->accept(v);
       if (s != visit_continue)
 	 return (s == visit_continue_with_parent) ? visit_continue : s;
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index 59a0407..f7808bd 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -69,3 +69,7 @@
     bool lower_input, bool lower_output, bool lower_temp, bool lower_uniform);
 bool lower_quadop_vector(exec_list *instructions, bool dont_lower_swz);
 bool optimize_redundant_jumps(exec_list *instructions);
+
+ir_rvalue *
+compare_index_block(exec_list *instructions, ir_variable *index,
+		    unsigned base, unsigned components, void *mem_ctx);
diff --git a/src/glsl/ir_print_visitor.cpp b/src/glsl/ir_print_visitor.cpp
index 518910b..ea78582 100644
--- a/src/glsl/ir_print_visitor.cpp
+++ b/src/glsl/ir_print_visitor.cpp
@@ -244,19 +244,21 @@
    ir->sampler->accept(this);
    printf(" ");
 
-   ir->coordinate->accept(this);
+   if (ir->op != ir_txs) {
+      ir->coordinate->accept(this);
 
-   printf(" ");
+      printf(" ");
 
-   if (ir->offset != NULL) {
-      ir->offset->accept(this);
-   } else {
-      printf("0");
+      if (ir->offset != NULL) {
+	 ir->offset->accept(this);
+      } else {
+	 printf("0");
+      }
+
+      printf(" ");
    }
 
-   printf(" ");
-
-   if (ir->op != ir_txf) {
+   if (ir->op != ir_txf && ir->op != ir_txs) {
       if (ir->projector)
 	 ir->projector->accept(this);
       else
@@ -280,6 +282,7 @@
       break;
    case ir_txl:
    case ir_txf:
+   case ir_txs:
       ir->lod_info.lod->accept(this);
       break;
    case ir_txd:
diff --git a/src/glsl/ir_reader.cpp b/src/glsl/ir_reader.cpp
index f3a6217..22009ee 100644
--- a/src/glsl/ir_reader.cpp
+++ b/src/glsl/ir_reader.cpp
@@ -885,6 +885,8 @@
       { "tex", s_type, s_sampler, s_coord, s_offset, s_proj, s_shadow };
    s_pattern txf_pattern[] =
       { "txf", s_type, s_sampler, s_coord, s_offset, s_lod };
+   s_pattern txs_pattern[] =
+      { "txs", s_type, s_sampler, s_lod };
    s_pattern other_pattern[] =
       { tag, s_type, s_sampler, s_coord, s_offset, s_proj, s_shadow, s_lod };
 
@@ -892,6 +894,8 @@
       op = ir_tex;
    } else if (MATCH(expr, txf_pattern)) {
       op = ir_txf;
+   } else if (MATCH(expr, txs_pattern)) {
+      op = ir_txs;
    } else if (MATCH(expr, other_pattern)) {
       op = ir_texture::get_opcode(tag->value());
       if (op == -1)
@@ -920,25 +924,27 @@
    }
    tex->set_sampler(sampler, type);
 
-   // Read coordinate (any rvalue)
-   tex->coordinate = read_rvalue(s_coord);
-   if (tex->coordinate == NULL) {
-      ir_read_error(NULL, "when reading coordinate in (%s ...)",
-		    tex->opcode_string());
-      return NULL;
-   }
-
-   // Read texel offset - either 0 or an rvalue.
-   s_int *si_offset = SX_AS_INT(s_offset);
-   if (si_offset == NULL || si_offset->value() != 0) {
-      tex->offset = read_rvalue(s_offset);
-      if (tex->offset == NULL) {
-	 ir_read_error(s_offset, "expected 0 or an expression");
+   if (op != ir_txs) {
+      // Read coordinate (any rvalue)
+      tex->coordinate = read_rvalue(s_coord);
+      if (tex->coordinate == NULL) {
+	 ir_read_error(NULL, "when reading coordinate in (%s ...)",
+		       tex->opcode_string());
 	 return NULL;
       }
+
+      // Read texel offset - either 0 or an rvalue.
+      s_int *si_offset = SX_AS_INT(s_offset);
+      if (si_offset == NULL || si_offset->value() != 0) {
+	 tex->offset = read_rvalue(s_offset);
+	 if (tex->offset == NULL) {
+	    ir_read_error(s_offset, "expected 0 or an expression");
+	    return NULL;
+	 }
+      }
    }
 
-   if (op != ir_txf) {
+   if (op != ir_txf && op != ir_txs) {
       s_int *proj_as_int = SX_AS_INT(s_proj);
       if (proj_as_int && proj_as_int->value() == 1) {
 	 tex->projector = NULL;
@@ -973,6 +979,7 @@
       break;
    case ir_txl:
    case ir_txf:
+   case ir_txs:
       tex->lod_info.lod = read_rvalue(s_lod);
       if (tex->lod_info.lod == NULL) {
 	 ir_read_error(NULL, "when reading LOD in (%s ...)",
diff --git a/src/glsl/ir_rvalue_visitor.cpp b/src/glsl/ir_rvalue_visitor.cpp
index ed6c7cb..193bcd2 100644
--- a/src/glsl/ir_rvalue_visitor.cpp
+++ b/src/glsl/ir_rvalue_visitor.cpp
@@ -63,6 +63,7 @@
       break;
    case ir_txf:
    case ir_txl:
+   case ir_txs:
       handle_rvalue(&ir->lod_info.lod);
       break;
    case ir_txd:
diff --git a/src/glsl/ir_validate.cpp b/src/glsl/ir_validate.cpp
index f3fceb2..2d1c609 100644
--- a/src/glsl/ir_validate.cpp
+++ b/src/glsl/ir_validate.cpp
@@ -59,7 +59,8 @@
 
    virtual ir_visitor_status visit(ir_variable *v);
    virtual ir_visitor_status visit(ir_dereference_variable *ir);
-   virtual ir_visitor_status visit(ir_if *ir);
+
+   virtual ir_visitor_status visit_enter(ir_if *ir);
 
    virtual ir_visitor_status visit_leave(ir_loop *ir);
    virtual ir_visitor_status visit_enter(ir_function *ir);
@@ -102,7 +103,7 @@
 }
 
 ir_visitor_status
-ir_validate::visit(ir_if *ir)
+ir_validate::visit_enter(ir_if *ir)
 {
    if (ir->condition->type != glsl_type::bool_type) {
       printf("ir_if condition %s type instead of bool.\n",
@@ -541,7 +542,43 @@
       abort();
    }
 
+   const exec_node *formal_param_node = callee->parameters.head;
+   const exec_node *actual_param_node = ir->actual_parameters.head;
+   while (true) {
+      if (formal_param_node->is_tail_sentinel()
+          != actual_param_node->is_tail_sentinel()) {
+         printf("ir_call has the wrong number of parameters:\n");
+         goto dump_ir;
+      }
+      if (formal_param_node->is_tail_sentinel()) {
+         break;
+      }
+      const ir_variable *formal_param
+         = (const ir_variable *) formal_param_node;
+      const ir_rvalue *actual_param
+         = (const ir_rvalue *) actual_param_node;
+      if (formal_param->type != actual_param->type) {
+         printf("ir_call parameter type mismatch:\n");
+         goto dump_ir;
+      }
+      if (formal_param->mode == ir_var_out
+          || formal_param->mode == ir_var_inout) {
+         if (!actual_param->is_lvalue()) {
+            printf("ir_call out/inout parameters must be lvalues:\n");
+            goto dump_ir;
+         }
+      }
+      formal_param_node = formal_param_node->next;
+      actual_param_node = actual_param_node->next;
+   }
+
    return visit_continue;
+
+dump_ir:
+   ir->print();
+   printf("callee:\n");
+   callee->print();
+   abort();
 }
 
 void
diff --git a/src/glsl/link_functions.cpp b/src/glsl/link_functions.cpp
index 7ba760d..acee327 100644
--- a/src/glsl/link_functions.cpp
+++ b/src/glsl/link_functions.cpp
@@ -91,8 +91,8 @@
       if (sig == NULL) {
 	 /* FINISHME: Log the full signature of unresolved function.
 	  */
-	 linker_error_printf(this->prog, "unresolved reference to function "
-			     "`%s'\n", name);
+	 linker_error(this->prog, "unresolved reference to function `%s'\n",
+		      name);
 	 this->success = false;
 	 return visit_stop;
       }
@@ -104,10 +104,12 @@
       if (f == NULL) {
 	 f = new(linked) ir_function(name);
 
-	 /* Add the new function to the linked IR.
+	 /* Add the new function to the linked IR.  Put it at the end
+          * so that it comes after any global variable declarations
+          * that it refers to.
 	  */
 	 linked->symbols->add_function(f);
-	 linked->ir->push_head(f);
+	 linked->ir->push_tail(f);
       }
 
       ir_function_signature *linked_sig =
diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index 34b6483..ba81c59 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -164,7 +164,7 @@
 
 
 void
-linker_error_printf(gl_shader_program *prog, const char *fmt, ...)
+linker_error(gl_shader_program *prog, const char *fmt, ...)
 {
    va_list ap;
 
@@ -172,6 +172,21 @@
    va_start(ap, fmt);
    ralloc_vasprintf_append(&prog->InfoLog, fmt, ap);
    va_end(ap);
+
+   prog->LinkStatus = false;
+}
+
+
+void
+linker_warning(gl_shader_program *prog, const char *fmt, ...)
+{
+   va_list ap;
+
+   ralloc_strcat(&prog->InfoLog, "error: ");
+   va_start(ap, fmt);
+   ralloc_vasprintf_append(&prog->InfoLog, fmt, ap);
+   va_end(ap);
+
 }
 
 
@@ -243,8 +258,7 @@
    find_assignment_visitor find("gl_Position");
    find.run(shader->ir);
    if (!find.variable_found()) {
-      linker_error_printf(prog,
-			  "vertex shader does not write to `gl_Position'\n");
+      linker_error(prog, "vertex shader does not write to `gl_Position'\n");
       return false;
    }
 
@@ -271,8 +285,8 @@
    frag_data.run(shader->ir);
 
    if (frag_color.variable_found() && frag_data.variable_found()) {
-      linker_error_printf(prog,  "fragment shader writes to both "
-			  "`gl_FragColor' and `gl_FragData'\n");
+      linker_error(prog,  "fragment shader writes to both "
+		   "`gl_FragColor' and `gl_FragData'\n");
       return false;
    }
 
@@ -357,11 +371,11 @@
 		     existing->type = var->type;
 		  }
 	       } else {
-		  linker_error_printf(prog, "%s `%s' declared as type "
-				      "`%s' and type `%s'\n",
-				      mode_string(var),
-				      var->name, var->type->name,
-				      existing->type->name);
+		  linker_error(prog, "%s `%s' declared as type "
+			       "`%s' and type `%s'\n",
+			       mode_string(var),
+			       var->name, var->type->name,
+			       existing->type->name);
 		  return false;
 	       }
 	    }
@@ -369,9 +383,9 @@
 	    if (var->explicit_location) {
 	       if (existing->explicit_location
 		   && (var->location != existing->location)) {
-		     linker_error_printf(prog, "explicit locations for %s "
-					 "`%s' have differing values\n",
-					 mode_string(var), var->name);
+		     linker_error(prog, "explicit locations for %s "
+				  "`%s' have differing values\n",
+				  mode_string(var), var->name);
 		     return false;
 	       }
 
@@ -381,7 +395,7 @@
 
         /* Validate layout qualifiers for gl_FragDepth.
          *
-         * From the AMD_conservative_depth spec:
+         * From the AMD/ARB_conservative_depth specs:
          *    "If gl_FragDepth is redeclared in any fragment shader in
          *    a program, it must be redeclared in all fragment shaders in that
          *    program that have static assignments to gl_FragDepth. All
@@ -392,12 +406,12 @@
            bool layout_declared = var->depth_layout != ir_depth_layout_none;
            bool layout_differs = var->depth_layout != existing->depth_layout;
            if (layout_declared && layout_differs) {
-              linker_error_printf(prog,
+              linker_error(prog,
                  "All redeclarations of gl_FragDepth in all fragment shaders "
                  "in a single program must have the same set of qualifiers.");
            }
            if (var->used && layout_differs) {
-              linker_error_printf(prog,
+              linker_error(prog,
                     "If gl_FragDepth is redeclared with a layout qualifier in"
                     "any fragment shader, it must be redeclared with the same"
                     "layout qualifier in all fragment shaders that have"
@@ -410,9 +424,9 @@
 	    if (var->constant_value != NULL) {
 	       if (existing->constant_value != NULL) {
 		  if (!var->constant_value->has_value(existing->constant_value)) {
-		     linker_error_printf(prog, "initializers for %s "
-					 "`%s' have differing values\n",
-					 mode_string(var), var->name);
+		     linker_error(prog, "initializers for %s "
+				  "`%s' have differing values\n",
+				  mode_string(var), var->name);
 		     return false;
 		  }
 	       } else
@@ -433,15 +447,15 @@
 	    }
 
 	    if (existing->invariant != var->invariant) {
-	       linker_error_printf(prog, "declarations for %s `%s' have "
-	                           "mismatching invariant qualifiers\n",
-	                           mode_string(var), var->name);
+	       linker_error(prog, "declarations for %s `%s' have "
+			    "mismatching invariant qualifiers\n",
+			    mode_string(var), var->name);
 	       return false;
 	    }
             if (existing->centroid != var->centroid) {
-               linker_error_printf(prog, "declarations for %s `%s' have "
-                                   "mismatching centroid qualifiers\n",
-                                   mode_string(var), var->name);
+               linker_error(prog, "declarations for %s `%s' have "
+			    "mismatching centroid qualifiers\n",
+			    mode_string(var), var->name);
                return false;
             }
 	 } else
@@ -529,13 +543,12 @@
 	     */
 	    if (!output->type->is_array()
 		|| (strncmp("gl_", output->name, 3) != 0)) {
-	       linker_error_printf(prog,
-				   "%s shader output `%s' declared as "
-				   "type `%s', but %s shader input declared "
-				   "as type `%s'\n",
-				   producer_stage, output->name,
-				   output->type->name,
-				   consumer_stage, input->type->name);
+	       linker_error(prog,
+			    "%s shader output `%s' declared as type `%s', "
+			    "but %s shader input declared as type `%s'\n",
+			    producer_stage, output->name,
+			    output->type->name,
+			    consumer_stage, input->type->name);
 	       return false;
 	    }
 	 }
@@ -543,40 +556,40 @@
 	 /* Check that all of the qualifiers match between stages.
 	  */
 	 if (input->centroid != output->centroid) {
-	    linker_error_printf(prog,
-				"%s shader output `%s' %s centroid qualifier, "
-				"but %s shader input %s centroid qualifier\n",
-				producer_stage,
-				output->name,
-				(output->centroid) ? "has" : "lacks",
-				consumer_stage,
-				(input->centroid) ? "has" : "lacks");
+	    linker_error(prog,
+			 "%s shader output `%s' %s centroid qualifier, "
+			 "but %s shader input %s centroid qualifier\n",
+			 producer_stage,
+			 output->name,
+			 (output->centroid) ? "has" : "lacks",
+			 consumer_stage,
+			 (input->centroid) ? "has" : "lacks");
 	    return false;
 	 }
 
 	 if (input->invariant != output->invariant) {
-	    linker_error_printf(prog,
-				"%s shader output `%s' %s invariant qualifier, "
-				"but %s shader input %s invariant qualifier\n",
-				producer_stage,
-				output->name,
-				(output->invariant) ? "has" : "lacks",
-				consumer_stage,
-				(input->invariant) ? "has" : "lacks");
+	    linker_error(prog,
+			 "%s shader output `%s' %s invariant qualifier, "
+			 "but %s shader input %s invariant qualifier\n",
+			 producer_stage,
+			 output->name,
+			 (output->invariant) ? "has" : "lacks",
+			 consumer_stage,
+			 (input->invariant) ? "has" : "lacks");
 	    return false;
 	 }
 
 	 if (input->interpolation != output->interpolation) {
-	    linker_error_printf(prog,
-				"%s shader output `%s' specifies %s "
-				"interpolation qualifier, "
-				"but %s shader input specifies %s "
-				"interpolation qualifier\n",
-				producer_stage,
-				output->name,
-				output->interpolation_string(),
-				consumer_stage,
-				input->interpolation_string());
+	    linker_error(prog,
+			 "%s shader output `%s' specifies %s "
+			 "interpolation qualifier, "
+			 "but %s shader input specifies %s "
+			 "interpolation qualifier\n",
+			 producer_stage,
+			 output->name,
+			 output->interpolation_string(),
+			 consumer_stage,
+			 input->interpolation_string());
 	    return false;
 	 }
       }
@@ -823,9 +836,8 @@
 
 	       if ((other_sig != NULL) && other_sig->is_defined
 		   && !other_sig->is_builtin) {
-		  linker_error_printf(prog,
-				      "function `%s' is multiply defined",
-				      f->name);
+		  linker_error(prog, "function `%s' is multiply defined",
+			       f->name);
 		  return NULL;
 	       }
 	    }
@@ -849,9 +861,9 @@
    }
 
    if (main == NULL) {
-      linker_error_printf(prog, "%s shader lacks `main'\n",
-			  (shader_list[0]->Type == GL_VERTEX_SHADER)
-			  ? "vertex" : "fragment");
+      linker_error(prog, "%s shader lacks `main'\n",
+		   (shader_list[0]->Type == GL_VERTEX_SHADER)
+		   ? "vertex" : "fragment");
       return NULL;
    }
 
@@ -910,6 +922,14 @@
 
    free(linking_shaders);
 
+#ifdef DEBUG
+   /* At this point linked should contain all of the linked IR, so
+    * validate it to make sure nothing went wrong.
+    */
+   if (linked)
+      validate_ir_tree(linked->ir);
+#endif
+
    /* Make a pass over all variable declarations to ensure that arrays with
     * unspecified sizes have a size specified.  The size is inferred from the
     * max_array_access field.
@@ -1309,10 +1329,10 @@
 	  * attribute overlaps any previously allocated bits.
 	  */
 	 if ((~(use_mask << attr) & used_locations) != used_locations) {
-	    linker_error_printf(prog,
-				"insufficient contiguous attribute locations "
-				"available for vertex shader input `%s'",
-				var->name);
+	    linker_error(prog,
+			 "insufficient contiguous attribute locations "
+			 "available for vertex shader input `%s'",
+			 var->name);
 	    return false;
 	 }
 
@@ -1343,7 +1363,7 @@
    foreach_list(node, sh->ir) {
       ir_variable *const var = ((ir_instruction *) node)->as_variable();
 
-      if ((var == NULL) || (var->mode != direction))
+      if ((var == NULL) || (var->mode != (unsigned) direction))
 	 continue;
 
       if (var->explicit_location) {
@@ -1353,11 +1373,10 @@
 
 	 if ((var->location >= (int)(max_index + generic_base))
 	     || (var->location < 0)) {
-	    linker_error_printf(prog,
-				"invalid explicit location %d specified for "
-				"`%s'\n",
-				(var->location < 0) ? var->location : attr,
-				var->name);
+	    linker_error(prog,
+			 "invalid explicit location %d specified for `%s'\n",
+			 (var->location < 0) ? var->location : attr,
+			 var->name);
 	    return false;
 	 } else if (var->location >= generic_base) {
 	    used_locations |= (use_mask << attr);
@@ -1406,10 +1425,10 @@
 	 const char *const string = (target_index == MESA_SHADER_VERTEX)
 	    ? "vertex shader input" : "fragment shader output";
 
-	 linker_error_printf(prog,
-			     "insufficient contiguous attribute locations "
-			     "available for %s `%s'",
-			     string, to_assign[i].var->name);
+	 linker_error(prog,
+		      "insufficient contiguous attribute locations "
+		      "available for %s `%s'",
+		      string, to_assign[i].var->name);
 	 return false;
       }
 
@@ -1525,9 +1544,8 @@
 	     * "glsl1-varying read but not written" in piglit.
 	     */
 
-	    linker_error_printf(prog, "fragment shader varying %s not written "
-				"by vertex shader\n.", var->name);
-	    prog->LinkStatus = false;
+	    linker_error(prog, "fragment shader varying %s not written "
+			 "by vertex shader\n.", var->name);
 	 }
 
 	 /* An 'in' variable is only really a shader input if its
@@ -1544,17 +1562,17 @@
 
    if (ctx->API == API_OPENGLES2 || prog->Version == 100) {
       if (varying_vectors > ctx->Const.MaxVarying) {
-	 linker_error_printf(prog, "shader uses too many varying vectors "
-			     "(%u > %u)\n",
-			     varying_vectors, ctx->Const.MaxVarying);
+	 linker_error(prog, "shader uses too many varying vectors "
+		      "(%u > %u)\n",
+		      varying_vectors, ctx->Const.MaxVarying);
 	 return false;
       }
    } else {
       const unsigned float_components = varying_vectors * 4;
       if (float_components > ctx->Const.MaxVarying * 4) {
-	 linker_error_printf(prog, "shader uses too many varying components "
-			     "(%u > %u)\n",
-			     float_components, ctx->Const.MaxVarying * 4);
+	 linker_error(prog, "shader uses too many varying components "
+		      "(%u > %u)\n",
+		      float_components, ctx->Const.MaxVarying * 4);
 	 return false;
       }
    }
@@ -1618,8 +1636,8 @@
    assert(max_version <= 130);
    if ((max_version >= 130 || min_version == 100)
        && min_version != max_version) {
-      linker_error_printf(prog, "all shaders must use same shading "
-			  "language version\n");
+      linker_error(prog, "all shaders must use same shading "
+		   "language version\n");
       goto done;
    }
 
@@ -1702,6 +1720,10 @@
       if (prog->_LinkedShaders[i] == NULL)
 	 continue;
 
+      detect_recursion_linked(prog, prog->_LinkedShaders[i]->ir);
+      if (!prog->LinkStatus)
+	 goto done;
+
       while (do_common_optimization(prog->_LinkedShaders[i]->ir, true, 32))
 	 ;
    }
@@ -1716,12 +1738,10 @@
     * FINISHME: at least 16, so hardcode 16 for now.
     */
    if (!assign_attribute_or_color_locations(prog, MESA_SHADER_VERTEX, 16)) {
-      prog->LinkStatus = false;
       goto done;
    }
 
    if (!assign_attribute_or_color_locations(prog, MESA_SHADER_FRAGMENT, ctx->Const.MaxDrawBuffers)) {
-      prog->LinkStatus = false;
       goto done;
    }
 
@@ -1738,7 +1758,6 @@
       if (!assign_varying_locations(ctx, prog,
 				    prog->_LinkedShaders[prev],
 				    prog->_LinkedShaders[i])) {
-	 prog->LinkStatus = false;
 	 goto done;
       }
 
@@ -1770,11 +1789,9 @@
     */
    if (ctx->API == API_OPENGLES2 || prog->Version == 100) {
       if (prog->_LinkedShaders[MESA_SHADER_VERTEX] == NULL) {
-	 linker_error_printf(prog, "program lacks a vertex shader\n");
-	 prog->LinkStatus = false;
+	 linker_error(prog, "program lacks a vertex shader\n");
       } else if (prog->_LinkedShaders[MESA_SHADER_FRAGMENT] == NULL) {
-	 linker_error_printf(prog, "program lacks a fragment shader\n");
-	 prog->LinkStatus = false;
+	 linker_error(prog, "program lacks a fragment shader\n");
       }
    }
 
diff --git a/src/glsl/linker.h b/src/glsl/linker.h
index a8ce16a..769cf68 100644
--- a/src/glsl/linker.h
+++ b/src/glsl/linker.h
@@ -25,9 +25,6 @@
 #ifndef GLSL_LINKER_H
 #define GLSL_LINKER_H
 
-extern void
-linker_error_printf(gl_shader_program *prog, const char *fmt, ...);
-
 extern bool
 link_function_calls(gl_shader_program *prog, gl_shader *main,
 		    gl_shader **shader_list, unsigned num_shaders);
diff --git a/src/glsl/lower_if_to_cond_assign.cpp b/src/glsl/lower_if_to_cond_assign.cpp
index b637eb4..7b89a15 100644
--- a/src/glsl/lower_if_to_cond_assign.cpp
+++ b/src/glsl/lower_if_to_cond_assign.cpp
@@ -47,6 +47,7 @@
 
 #include "glsl_types.h"
 #include "ir.h"
+#include "program/hash_table.h"
 
 class ir_if_to_cond_assign_visitor : public ir_hierarchical_visitor {
 public:
@@ -55,6 +56,14 @@
       this->progress = false;
       this->max_depth = max_depth;
       this->depth = 0;
+
+      this->condition_variables = hash_table_ctor(0, hash_table_pointer_hash,
+						  hash_table_pointer_compare);
+   }
+
+   ~ir_if_to_cond_assign_visitor()
+   {
+      hash_table_dtor(this->condition_variables);
    }
 
    ir_visitor_status visit_enter(ir_if *);
@@ -63,6 +72,8 @@
    bool progress;
    unsigned max_depth;
    unsigned depth;
+
+   struct hash_table *condition_variables;
 };
 
 bool
@@ -94,40 +105,43 @@
 
 void
 move_block_to_cond_assign(void *mem_ctx,
-			  ir_if *if_ir, ir_variable *cond_var, bool then)
+			  ir_if *if_ir, ir_rvalue *cond_expr,
+			  exec_list *instructions,
+			  struct hash_table *ht)
 {
-   exec_list *instructions;
-
-   if (then) {
-      instructions = &if_ir->then_instructions;
-   } else {
-      instructions = &if_ir->else_instructions;
-   }
-
-   foreach_iter(exec_list_iterator, iter, *instructions) {
-      ir_instruction *ir = (ir_instruction *)iter.get();
+   foreach_list_safe(node, instructions) {
+      ir_instruction *ir = (ir_instruction *) node;
 
       if (ir->ir_type == ir_type_assignment) {
 	 ir_assignment *assign = (ir_assignment *)ir;
-	 ir_rvalue *cond_expr;
-	 ir_dereference *deref = new(mem_ctx) ir_dereference_variable(cond_var);
 
-	 if (then) {
-	    cond_expr = deref;
-	 } else {
-	    cond_expr = new(mem_ctx) ir_expression(ir_unop_logic_not,
-						   glsl_type::bool_type,
-						   deref,
-						   NULL);
-	 }
+	 if (hash_table_find(ht, assign) == NULL) {
+	    hash_table_insert(ht, assign, assign);
 
-	 if (!assign->condition) {
-	    assign->condition = cond_expr;
-	 } else {
-	    assign->condition = new(mem_ctx) ir_expression(ir_binop_logic_and,
-							   glsl_type::bool_type,
-							   cond_expr,
-							   assign->condition);
+	    /* If the LHS of the assignment is a condition variable that was
+	     * previously added, insert an additional assignment of false to
+	     * the variable.
+	     */
+	    const bool assign_to_cv =
+	       hash_table_find(ht, assign->lhs->variable_referenced()) != NULL;
+
+	    if (!assign->condition) {
+	       if (assign_to_cv) {
+		  assign->rhs =
+		     new(mem_ctx) ir_expression(ir_binop_logic_and,
+						glsl_type::bool_type,
+						cond_expr->clone(mem_ctx, NULL),
+						assign->rhs);
+	       } else {
+		  assign->condition = cond_expr->clone(mem_ctx, NULL);
+	       }
+	    } else {
+	       assign->condition =
+		  new(mem_ctx) ir_expression(ir_binop_logic_and,
+					     glsl_type::bool_type,
+					     cond_expr->clone(mem_ctx, NULL),
+					     assign->condition);
+	    }
 	 }
       }
 
@@ -142,6 +156,7 @@
 {
    (void) ir;
    this->depth++;
+
    return visit_continue;
 }
 
@@ -153,9 +168,7 @@
       return visit_continue;
 
    bool found_control_flow = false;
-   ir_variable *cond_var;
    ir_assignment *assign;
-   ir_dereference_variable *deref;
 
    /* Check that both blocks don't contain anything we can't support. */
    foreach_iter(exec_list_iterator, then_iter, ir->then_instructions) {
@@ -171,24 +184,62 @@
 
    void *mem_ctx = ralloc_parent(ir);
 
-   /* Store the condition to a variable so the assignment conditions are
-    * simpler.
+   /* Store the condition to a variable.  Move all of the instructions from
+    * the then-clause of the if-statement.  Use the condition variable as a
+    * condition for all assignments.
     */
-   cond_var = new(mem_ctx) ir_variable(glsl_type::bool_type,
-				       "if_to_cond_assign_condition",
-				       ir_var_temporary);
-   ir->insert_before(cond_var);
+   ir_variable *const then_var =
+      new(mem_ctx) ir_variable(glsl_type::bool_type,
+			       "if_to_cond_assign_then",
+			       ir_var_temporary);
+   ir->insert_before(then_var);
 
-   deref = new(mem_ctx) ir_dereference_variable(cond_var);
-   assign = new(mem_ctx) ir_assignment(deref,
-				       ir->condition, NULL);
+   ir_dereference_variable *then_cond =
+      new(mem_ctx) ir_dereference_variable(then_var);
+
+   assign = new(mem_ctx) ir_assignment(then_cond, ir->condition);
    ir->insert_before(assign);
 
-   /* Now, move all of the instructions out of the if blocks, putting
-    * conditions on assignments.
+   move_block_to_cond_assign(mem_ctx, ir, then_cond,
+			     &ir->then_instructions,
+			     this->condition_variables);
+
+   /* Add the new condition variable to the hash table.  This allows us to
+    * find this variable when lowering other (enclosing) if-statements.
     */
-   move_block_to_cond_assign(mem_ctx, ir, cond_var, true);
-   move_block_to_cond_assign(mem_ctx, ir, cond_var, false);
+   hash_table_insert(this->condition_variables, then_var, then_var);
+
+   /* If there are instructions in the else-clause, store the inverse of the
+    * condition to a variable.  Move all of the instructions from the
+    * else-clause if the if-statement.  Use the (inverse) condition variable
+    * as a condition for all assignments.
+    */
+   if (!ir->else_instructions.is_empty()) {
+      ir_variable *const else_var =
+	 new(mem_ctx) ir_variable(glsl_type::bool_type,
+				  "if_to_cond_assign_else",
+				  ir_var_temporary);
+      ir->insert_before(else_var);
+
+      ir_dereference_variable *else_cond =
+	 new(mem_ctx) ir_dereference_variable(else_var);
+
+      ir_rvalue *inverse =
+	 new(mem_ctx) ir_expression(ir_unop_logic_not,
+				    then_cond->clone(mem_ctx, NULL));
+
+      assign = new(mem_ctx) ir_assignment(else_cond, inverse);
+      ir->insert_before(assign);
+
+      move_block_to_cond_assign(mem_ctx, ir, else_cond,
+				&ir->else_instructions,
+				this->condition_variables);
+
+      /* Add the new condition variable to the hash table.  This allows us to
+       * find this variable when lowering other (enclosing) if-statements.
+       */
+      hash_table_insert(this->condition_variables, else_var, else_var);
+   }
 
    ir->remove();
 
diff --git a/src/glsl/lower_instructions.cpp b/src/glsl/lower_instructions.cpp
index 806f863..23aa19b 100644
--- a/src/glsl/lower_instructions.cpp
+++ b/src/glsl/lower_instructions.cpp
@@ -166,6 +166,10 @@
       else
 	 op0 = new(ir) ir_expression(ir_unop_u2f, vec_type, ir->operands[0], NULL);
 
+      vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
+					 ir->type->vector_elements,
+					 ir->type->matrix_columns);
+
       op0 = new(ir) ir_expression(ir_binop_mul, vec_type, op0, op1);
 
       if (ir->operands[1]->type->base_type == GLSL_TYPE_INT) {
diff --git a/src/glsl/lower_variable_index_to_cond_assign.cpp b/src/glsl/lower_variable_index_to_cond_assign.cpp
index 8eb1612..f8e4a1d 100644
--- a/src/glsl/lower_variable_index_to_cond_assign.cpp
+++ b/src/glsl/lower_variable_index_to_cond_assign.cpp
@@ -29,6 +29,21 @@
  *
  * Pre-DX10 GPUs often don't have a native way to do this operation,
  * and this works around that.
+ *
+ * The lowering process proceeds as follows.  Each non-constant index
+ * found in an r-value is converted to a canonical form \c array[i].  Each
+ * element of the array is conditionally assigned to a temporary by comparing
+ * \c i to a constant index.  This is done by cloning the canonical form and
+ * replacing all occurances of \c i with a constant.  Each remaining occurance
+ * of the canonical form in the IR is replaced with a dereference of the
+ * temporary variable.
+ *
+ * L-values with non-constant indices are handled similarly.  In this case,
+ * the RHS of the assignment is assigned to a temporary.  The non-constant
+ * index is replace with the canonical form (just like for r-values).  The
+ * temporary is conditionally assigned to each element of the canonical form
+ * by comparing \c i with each index.  The same clone-and-replace scheme is
+ * used.
  */
 
 #include "ir.h"
@@ -37,10 +52,140 @@
 #include "glsl_types.h"
 #include "main/macros.h"
 
+/**
+ * Generate a comparison value for a block of indices
+ *
+ * Lowering passes for non-constant indexing of arrays, matrices, or vectors
+ * can use this to generate blocks of index comparison values.
+ *
+ * \param instructions  List where new instructions will be appended
+ * \param index         \c ir_variable containing the desired index
+ * \param base          Base value for this block of comparisons
+ * \param components    Number of unique index values to compare.  This must
+ *                      be on the range [1, 4].
+ * \param mem_ctx       ralloc memory context to be used for all allocations.
+ *
+ * \returns
+ * An \c ir_rvalue that \b must be cloned for each use in conditional
+ * assignments, etc.
+ */
+ir_rvalue *
+compare_index_block(exec_list *instructions, ir_variable *index,
+		    unsigned base, unsigned components, void *mem_ctx)
+{
+   ir_rvalue *broadcast_index = new(mem_ctx) ir_dereference_variable(index);
+
+   assert(index->type->is_scalar());
+   assert(index->type->base_type == GLSL_TYPE_INT);
+   assert(components >= 1 && components <= 4);
+
+   if (components > 1) {
+      const ir_swizzle_mask m = { 0, 0, 0, 0, components, false };
+      broadcast_index = new(mem_ctx) ir_swizzle(broadcast_index, m);
+   }
+
+   /* Compare the desired index value with the next block of four indices.
+    */
+   ir_constant_data test_indices_data;
+   memset(&test_indices_data, 0, sizeof(test_indices_data));
+   test_indices_data.i[0] = base;
+   test_indices_data.i[1] = base + 1;
+   test_indices_data.i[2] = base + 2;
+   test_indices_data.i[3] = base + 3;
+
+   ir_constant *const test_indices =
+      new(mem_ctx) ir_constant(broadcast_index->type,
+			       &test_indices_data);
+
+   ir_rvalue *const condition_val =
+      new(mem_ctx) ir_expression(ir_binop_equal,
+				 &glsl_type::bool_type[components - 1],
+				 broadcast_index,
+				 test_indices);
+
+   ir_variable *const condition =
+      new(mem_ctx) ir_variable(condition_val->type,
+			       "dereference_condition",
+			       ir_var_temporary);
+   instructions->push_tail(condition);
+
+   ir_rvalue *const cond_deref =
+      new(mem_ctx) ir_dereference_variable(condition);
+   instructions->push_tail(new(mem_ctx) ir_assignment(cond_deref, condition_val, 0));
+
+   return cond_deref;
+}
+
+static inline bool
+is_array_or_matrix(const ir_instruction *ir)
+{
+   return (ir->type->is_array() || ir->type->is_matrix());
+}
+
+/**
+ * Replace a dereference of a variable with a specified r-value
+ *
+ * Each time a dereference of the specified value is replaced, the r-value
+ * tree is cloned.
+ */
+class deref_replacer : public ir_rvalue_visitor {
+public:
+   deref_replacer(const ir_variable *variable_to_replace, ir_rvalue *value)
+      : variable_to_replace(variable_to_replace), value(value),
+	progress(false)
+   {
+      assert(this->variable_to_replace != NULL);
+      assert(this->value != NULL);
+   }
+
+   virtual void handle_rvalue(ir_rvalue **rvalue)
+   {
+      ir_dereference_variable *const dv = (*rvalue)->as_dereference_variable();
+
+      if ((dv != NULL) && (dv->var == this->variable_to_replace)) {
+	 this->progress = true;
+	 *rvalue = this->value->clone(ralloc_parent(*rvalue), NULL);
+      }
+   }
+
+   const ir_variable *variable_to_replace;
+   ir_rvalue *value;
+   bool progress;
+};
+
+/**
+ * Find a variable index dereference of an array in an rvalue tree
+ */
+class find_variable_index : public ir_hierarchical_visitor {
+public:
+   find_variable_index()
+      : deref(NULL)
+   {
+      /* empty */
+   }
+
+   virtual ir_visitor_status visit_enter(ir_dereference_array *ir)
+   {
+      if (is_array_or_matrix(ir->array)
+	  && (ir->array_index->as_constant() == NULL)) {
+	 this->deref = ir;
+	 return visit_stop;
+      }
+
+      return visit_continue;
+   }
+
+   /**
+    * First array dereference found in the tree that has a non-constant index.
+    */
+   ir_dereference_array *deref;
+};
+
 struct assignment_generator
 {
    ir_instruction* base_ir;
-   ir_rvalue* array;
+   ir_dereference *rvalue;
+   ir_variable *old_index;
    bool is_write;
    unsigned int write_mask;
    ir_variable* var;
@@ -55,18 +200,23 @@
        * underlying variable.
        */
       void *mem_ctx = ralloc_parent(base_ir);
-      ir_dereference *element =
-	 new(mem_ctx) ir_dereference_array(this->array->clone(mem_ctx, NULL),
-					   new(mem_ctx) ir_constant(i));
-      ir_rvalue *variable = new(mem_ctx) ir_dereference_variable(this->var);
 
-      ir_assignment *assignment;
-      if (is_write) {
-	 assignment = new(mem_ctx) ir_assignment(element, variable, condition,
-						 write_mask);
-      } else {
-	 assignment = new(mem_ctx) ir_assignment(variable, element, condition);
-      }
+      /* Clone the old r-value in its entirety.  Then replace any occurances of
+       * the old variable index with the new constant index.
+       */
+      ir_dereference *element = this->rvalue->clone(mem_ctx, NULL);
+      ir_constant *const index = new(mem_ctx) ir_constant(i);
+      deref_replacer r(this->old_index, index);
+      element->accept(&r);
+      assert(r.progress);
+
+      /* Generate a conditional assignment to (or from) the constant indexed
+       * array dereference.
+       */
+      ir_rvalue *variable = new(mem_ctx) ir_dereference_variable(this->var);
+      ir_assignment *const assignment = (is_write)
+	 ? new(mem_ctx) ir_assignment(element, variable, condition, write_mask)
+	 : new(mem_ctx) ir_assignment(variable, element, condition);
 
       list->push_tail(assignment);
    }
@@ -118,54 +268,17 @@
       for (unsigned i = first; i < end; i += 4) {
          const unsigned comps = MIN2(condition_components, end - i);
 
-         ir_rvalue *broadcast_index =
-	    new(this->mem_ctx) ir_dereference_variable(index);
-
-         if (comps) {
-	    const ir_swizzle_mask m = { 0, 0, 0, 0, comps, false };
-	    broadcast_index = new(this->mem_ctx) ir_swizzle(broadcast_index, m);
-	 }
-
-	 /* Compare the desired index value with the next block of four indices.
-	  */
-         ir_constant_data test_indices_data;
-         memset(&test_indices_data, 0, sizeof(test_indices_data));
-         test_indices_data.i[0] = i;
-         test_indices_data.i[1] = i + 1;
-         test_indices_data.i[2] = i + 2;
-         test_indices_data.i[3] = i + 3;
-         ir_constant *const test_indices =
-	    new(this->mem_ctx) ir_constant(broadcast_index->type,
-					   &test_indices_data);
-
-         ir_rvalue *const condition_val =
-	    new(this->mem_ctx) ir_expression(ir_binop_equal,
-					     &glsl_type::bool_type[comps - 1],
-					     broadcast_index,
-					     test_indices);
-
-         ir_variable *const condition =
-	    new(this->mem_ctx) ir_variable(condition_val->type,
-					   "dereference_array_condition",
-					   ir_var_temporary);
-         list->push_tail(condition);
-
 	 ir_rvalue *const cond_deref =
-	    new(this->mem_ctx) ir_dereference_variable(condition);
-         list->push_tail(new(this->mem_ctx) ir_assignment(cond_deref,
-							  condition_val, 0));
+	    compare_index_block(list, index, i, comps, this->mem_ctx);
 
          if (comps == 1) {
-	    ir_rvalue *const cond_deref =
-	       new(this->mem_ctx) ir_dereference_variable(condition);
-
-            this->generator.generate(i, cond_deref, list);
+            this->generator.generate(i, cond_deref->clone(this->mem_ctx, NULL),
+				     list);
          } else {
             for (unsigned j = 0; j < comps; j++) {
-	       ir_rvalue *const cond_deref =
-		  new(this->mem_ctx) ir_dereference_variable(condition);
 	       ir_rvalue *const cond_swiz =
-		  new(this->mem_ctx) ir_swizzle(cond_deref, j, 0, 0, 0, 1);
+		  new(this->mem_ctx) ir_swizzle(cond_deref->clone(this->mem_ctx, NULL),
+						j, 0, 0, 0, 1);
 
                this->generator.generate(i + j, cond_swiz, list);
             }
@@ -233,21 +346,18 @@
    bool lower_temps;
    bool lower_uniforms;
 
-   bool is_array_or_matrix(const ir_instruction *ir) const
+   bool storage_type_needs_lowering(ir_dereference_array *deref) const
    {
-      return (ir->type->is_array() || ir->type->is_matrix());
-   }
-
-   bool needs_lowering(ir_dereference_array *deref) const
-   {
-      if (deref == NULL || deref->array_index->as_constant()
-	  || !is_array_or_matrix(deref->array))
-	 return false;
-
-      if (deref->array->ir_type == ir_type_constant)
+      /* If a variable isn't eventually the target of this dereference, then
+       * it must be a constant or some sort of anonymous temporary storage.
+       *
+       * FINISHME: Is this correct?  Most drivers treat arrays of constants as
+       * FINISHME: uniforms.  It seems like this should do the same.
+       */
+      const ir_variable *const var = deref->array->variable_referenced();
+      if (var == NULL)
 	 return this->lower_temps;
 
-      const ir_variable *const var = deref->array->variable_referenced();
       switch (var->mode) {
       case ir_var_auto:
       case ir_var_temporary:
@@ -267,8 +377,18 @@
       return false;
    }
 
+   bool needs_lowering(ir_dereference_array *deref) const
+   {
+      if (deref == NULL || deref->array_index->as_constant()
+	  || !is_array_or_matrix(deref->array))
+	 return false;
+
+      return this->storage_type_needs_lowering(deref);
+   }
+
    ir_variable *convert_dereference_array(ir_dereference_array *orig_deref,
-					  ir_assignment* orig_assign)
+					  ir_assignment* orig_assign,
+					  ir_dereference *orig_base)
    {
       assert(is_array_or_matrix(orig_deref->array));
 
@@ -314,9 +434,12 @@
 	 new(mem_ctx) ir_assignment(lhs, orig_deref->array_index, NULL);
       base_ir->insert_before(assign);
 
+      orig_deref->array_index = lhs->clone(mem_ctx, NULL);
+
       assignment_generator ag;
-      ag.array = orig_deref->array;
+      ag.rvalue = orig_base;
       ag.base_ir = base_ir;
+      ag.old_index = index;
       ag.var = var;
       if (orig_assign) {
 	 ag.is_write = true;
@@ -327,21 +450,40 @@
 
       switch_generator sg(ag, index, 4, 4);
 
-      exec_list list;
-      sg.generate(0, length, &list);
-      base_ir->insert_before(&list);
+      /* If the original assignment has a condition, respect that original
+       * condition!  This is acomplished by wrapping the new conditional
+       * assignments in an if-statement that uses the original condition.
+       */
+      if ((orig_assign != NULL) && (orig_assign->condition != NULL)) {
+	 /* No need to clone the condition because the IR that it hangs on is
+	  * going to be removed from the instruction sequence.
+	  */
+	 ir_if *if_stmt = new(mem_ctx) ir_if(orig_assign->condition);
+
+	 sg.generate(0, length, &if_stmt->then_instructions);
+	 base_ir->insert_before(if_stmt);
+      } else {
+	 exec_list list;
+
+	 sg.generate(0, length, &list);
+	 base_ir->insert_before(&list);
+      }
 
       return var;
    }
 
    virtual void handle_rvalue(ir_rvalue **pir)
    {
+      if (this->in_assignee)
+	 return;
+
       if (!*pir)
          return;
 
       ir_dereference_array* orig_deref = (*pir)->as_dereference_array();
       if (needs_lowering(orig_deref)) {
-         ir_variable* var = convert_dereference_array(orig_deref, 0);
+         ir_variable *var =
+	    convert_dereference_array(orig_deref, NULL, orig_deref);
          assert(var);
          *pir = new(ralloc_parent(base_ir)) ir_dereference_variable(var);
          this->progress = true;
@@ -353,10 +495,11 @@
    {
       ir_rvalue_visitor::visit_leave(ir);
 
-      ir_dereference_array *orig_deref = ir->lhs->as_dereference_array();
+      find_variable_index f;
+      ir->lhs->accept(&f);
 
-      if (needs_lowering(orig_deref)) {
-         convert_dereference_array(orig_deref, ir);
+      if ((f.deref != NULL) && storage_type_needs_lowering(f.deref)) {
+         convert_dereference_array(f.deref, ir, ir->lhs);
          ir->remove();
          this->progress = true;
       }
@@ -377,7 +520,17 @@
 					   lower_temp,
 					   lower_uniform);
 
-   visit_list_elements(&v, instructions);
+   /* Continue lowering until no progress is made.  If there are multiple
+    * levels of indirection (e.g., non-constant indexing of array elements and
+    * matrix columns of an array of matrix), each pass will only lower one
+    * level of indirection.
+    */
+   bool progress_ever = false;
+   do {
+      v.progress = false;
+      visit_list_elements(&v, instructions);
+      progress_ever = v.progress || progress_ever;
+   } while (v.progress);
 
-   return v.progress;
+   return progress_ever;
 }
diff --git a/src/glsl/lower_vec_index_to_cond_assign.cpp b/src/glsl/lower_vec_index_to_cond_assign.cpp
index 3c4d932..fce9c34 100644
--- a/src/glsl/lower_vec_index_to_cond_assign.cpp
+++ b/src/glsl/lower_vec_index_to_cond_assign.cpp
@@ -71,8 +71,6 @@
    ir_assignment *assign;
    ir_variable *index, *var;
    ir_dereference *deref;
-   ir_expression *condition;
-   ir_swizzle *swizzle;
    int i;
 
    if (!orig_deref)
@@ -86,39 +84,52 @@
 
    assert(orig_deref->array_index->type->base_type == GLSL_TYPE_INT);
 
+   exec_list list;
+
    /* Store the index to a temporary to avoid reusing its tree. */
    index = new(base_ir) ir_variable(glsl_type::int_type,
 				    "vec_index_tmp_i",
 				    ir_var_temporary);
-   base_ir->insert_before(index);
+   list.push_tail(index);
    deref = new(base_ir) ir_dereference_variable(index);
    assign = new(base_ir) ir_assignment(deref, orig_deref->array_index, NULL);
-   base_ir->insert_before(assign);
+   list.push_tail(assign);
 
    /* Temporary where we store whichever value we swizzle out. */
    var = new(base_ir) ir_variable(ir->type, "vec_index_tmp_v",
 				  ir_var_temporary);
-   base_ir->insert_before(var);
+   list.push_tail(var);
+
+   /* Generate a single comparison condition "mask" for all of the components
+    * in the vector.
+    */
+   ir_rvalue *const cond_deref =
+      compare_index_block(&list, index, 0,
+			  orig_deref->array->type->vector_elements,
+			  mem_ctx);
 
    /* Generate a conditional move of each vector element to the temp. */
    for (i = 0; i < orig_deref->array->type->vector_elements; i++) {
-      deref = new(base_ir) ir_dereference_variable(index);
-      condition = new(base_ir) ir_expression(ir_binop_equal,
-					     glsl_type::bool_type,
-					     deref,
-					     new(base_ir) ir_constant(i));
+      ir_rvalue *condition_swizzle =
+	 new(base_ir) ir_swizzle(cond_deref->clone(ir, NULL), i, 0, 0, 0, 1);
 
       /* Just clone the rest of the deref chain when trying to get at the
        * underlying variable.
        */
-      swizzle = new(base_ir) ir_swizzle(orig_deref->array->clone(mem_ctx, NULL),
-					i, 0, 0, 0, 1);
+      ir_rvalue *swizzle =
+	 new(base_ir) ir_swizzle(orig_deref->array->clone(mem_ctx, NULL),
+				 i, 0, 0, 0, 1);
 
       deref = new(base_ir) ir_dereference_variable(var);
-      assign = new(base_ir) ir_assignment(deref, swizzle, condition);
-      base_ir->insert_before(assign);
+      assign = new(base_ir) ir_assignment(deref, swizzle, condition_swizzle);
+      list.push_tail(assign);
    }
 
+   /* Put all of the new instructions in the IR stream before the old
+    * instruction.
+    */
+   base_ir->insert_before(&list);
+
    this->progress = true;
    return new(base_ir) ir_dereference_variable(var);
 }
@@ -171,42 +182,66 @@
 
    assert(orig_deref->array_index->type->base_type == GLSL_TYPE_INT);
 
+   exec_list list;
+
    /* Store the index to a temporary to avoid reusing its tree. */
    index = new(ir) ir_variable(glsl_type::int_type, "vec_index_tmp_i",
 			       ir_var_temporary);
-   ir->insert_before(index);
+   list.push_tail(index);
    deref = new(ir) ir_dereference_variable(index);
    assign = new(ir) ir_assignment(deref, orig_deref->array_index, NULL);
-   ir->insert_before(assign);
+   list.push_tail(assign);
 
    /* Store the RHS to a temporary to avoid reusing its tree. */
    var = new(ir) ir_variable(ir->rhs->type, "vec_index_tmp_v",
 			     ir_var_temporary);
-   ir->insert_before(var);
+   list.push_tail(var);
    deref = new(ir) ir_dereference_variable(var);
    assign = new(ir) ir_assignment(deref, ir->rhs, NULL);
-   ir->insert_before(assign);
+   list.push_tail(assign);
+
+   /* Generate a single comparison condition "mask" for all of the components
+    * in the vector.
+    */
+   ir_rvalue *const cond_deref =
+      compare_index_block(&list, index, 0,
+			  orig_deref->array->type->vector_elements,
+			  mem_ctx);
 
    /* Generate a conditional move of each vector element to the temp. */
    for (i = 0; i < orig_deref->array->type->vector_elements; i++) {
-      ir_rvalue *condition, *swizzle;
+      ir_rvalue *condition_swizzle =
+	 new(ir) ir_swizzle(cond_deref->clone(ir, NULL), i, 0, 0, 0, 1);
 
-      deref = new(ir) ir_dereference_variable(index);
-      condition = new(ir) ir_expression(ir_binop_equal,
-					glsl_type::bool_type,
-					deref,
-					new(ir) ir_constant(i));
 
       /* Just clone the rest of the deref chain when trying to get at the
        * underlying variable.
        */
-      swizzle = new(ir) ir_swizzle(orig_deref->array->clone(mem_ctx, NULL),
-				   i, 0, 0, 0, 1);
+      ir_rvalue *swizzle =
+	 new(ir) ir_swizzle(orig_deref->array->clone(mem_ctx, NULL),
+			    i, 0, 0, 0, 1);
 
       deref = new(ir) ir_dereference_variable(var);
-      assign = new(ir) ir_assignment(swizzle, deref, condition);
-      ir->insert_before(assign);
+      assign = new(ir) ir_assignment(swizzle, deref, condition_swizzle);
+      list.push_tail(assign);
    }
+
+   /* If the original assignment has a condition, respect that original
+    * condition!  This is acomplished by wrapping the new conditional
+    * assignments in an if-statement that uses the original condition.
+    */
+   if (ir->condition != NULL) {
+      /* No need to clone the condition because the IR that it hangs on is
+       * going to be removed from the instruction sequence.
+       */
+      ir_if *if_stmt = new(mem_ctx) ir_if(ir->condition);
+
+      list.move_nodes_to(&if_stmt->then_instructions);
+      ir->insert_before(if_stmt);
+   } else {
+      ir->insert_before(&list);
+   }
+
    ir->remove();
 
    this->progress = true;
diff --git a/src/glsl/main.cpp b/src/glsl/main.cpp
index 7952bb1..0192137 100644
--- a/src/glsl/main.cpp
+++ b/src/glsl/main.cpp
@@ -24,85 +24,30 @@
 
 #include "ast.h"
 #include "glsl_parser_extras.h"
-#include "glsl_parser.h"
 #include "ir_optimization.h"
 #include "ir_print_visitor.h"
 #include "program.h"
 #include "loop_analysis.h"
-
-extern "C" struct gl_shader *
-_mesa_new_shader(struct gl_context *ctx, GLuint name, GLenum type);
-
-extern "C" void
-_mesa_reference_shader(struct gl_context *ctx, struct gl_shader **ptr,
-                       struct gl_shader *sh);
-
-/* Copied from shader_api.c for the stand-alone compiler.
- */
-void
-_mesa_reference_shader(struct gl_context *ctx, struct gl_shader **ptr,
-                       struct gl_shader *sh)
-{
-   *ptr = sh;
-}
-
-struct gl_shader *
-_mesa_new_shader(struct gl_context *ctx, GLuint name, GLenum type)
-{
-   struct gl_shader *shader;
-
-   (void) ctx;
-
-   assert(type == GL_FRAGMENT_SHADER || type == GL_VERTEX_SHADER);
-   shader = rzalloc(NULL, struct gl_shader);
-   if (shader) {
-      shader->Type = type;
-      shader->Name = name;
-      shader->RefCount = 1;
-   }
-   return shader;
-}
+#include "standalone_scaffolding.h"
 
 static void
 initialize_context(struct gl_context *ctx, gl_api api)
 {
-   memset(ctx, 0, sizeof(*ctx));
-
-   ctx->API = api;
-
-   ctx->Extensions.ARB_ES2_compatibility = GL_TRUE;
-   ctx->Extensions.ARB_draw_buffers = GL_TRUE;
-   ctx->Extensions.ARB_draw_instanced = GL_TRUE;
-   ctx->Extensions.ARB_fragment_coord_conventions = GL_TRUE;
-   ctx->Extensions.EXT_texture_array = GL_TRUE;
-   ctx->Extensions.NV_texture_rectangle = GL_TRUE;
-   ctx->Extensions.EXT_texture3D = GL_TRUE;
+   initialize_context_to_defaults(ctx, api);
 
    /* GLSL 1.30 isn't fully supported, but we need to advertise 1.30 so that
     * the built-in functions for 1.30 can be built.
     */
    ctx->Const.GLSLVersion = 130;
 
-   /* 1.10 minimums. */
-   ctx->Const.MaxLights = 8;
    ctx->Const.MaxClipPlanes = 8;
-   ctx->Const.MaxTextureUnits = 2;
+   ctx->Const.MaxDrawBuffers = 2;
 
    /* More than the 1.10 minimum to appease parser tests taken from
     * apps that (hopefully) already checked the number of coords.
     */
    ctx->Const.MaxTextureCoordUnits = 4;
 
-   ctx->Const.VertexProgram.MaxAttribs = 16;
-   ctx->Const.VertexProgram.MaxUniformComponents = 512;
-   ctx->Const.MaxVarying = 8;
-   ctx->Const.MaxVertexTextureImageUnits = 0;
-   ctx->Const.MaxCombinedTextureImageUnits = 2;
-   ctx->Const.MaxTextureImageUnits = 2;
-   ctx->Const.FragmentProgram.MaxUniformComponents = 64;
-
-   ctx->Const.MaxDrawBuffers = 2;
-
    ctx->Driver.NewShader = _mesa_new_shader;
 }
 
@@ -275,6 +220,7 @@
 
    whole_program = rzalloc (NULL, struct gl_shader_program);
    assert(whole_program != NULL);
+   whole_program->InfoLog = ralloc_strdup(whole_program, "");
 
    for (/* empty */; argc > optind; optind++) {
       whole_program->Shaders =
diff --git a/src/glsl/opt_dead_functions.cpp b/src/glsl/opt_dead_functions.cpp
index 7c64c61..51c77e3 100644
--- a/src/glsl/opt_dead_functions.cpp
+++ b/src/glsl/opt_dead_functions.cpp
@@ -50,7 +50,6 @@
    ir_dead_functions_visitor()
    {
       this->mem_ctx = ralloc_context(NULL);
-      this->seen_another_function_signature = false;
    }
 
    ~ir_dead_functions_visitor()
@@ -65,8 +64,6 @@
 
    bool (*predicate)(ir_instruction *ir);
 
-   bool seen_another_function_signature;
-
    /* List of signature_entry */
    exec_list signature_list;
    void *mem_ctx;
@@ -97,13 +94,7 @@
       entry->used = true;
    }
 
-   /* If this is the first signature to look at, no need to descend to see
-    * if it has calls to another function signature.
-    */
-   if (!this->seen_another_function_signature) {
-      this->seen_another_function_signature = true;
-      return visit_continue_with_parent;
-   }
+
 
    return visit_continue;
 }
diff --git a/src/glsl/opt_tree_grafting.cpp b/src/glsl/opt_tree_grafting.cpp
index 1ef940f..22a1749 100644
--- a/src/glsl/opt_tree_grafting.cpp
+++ b/src/glsl/opt_tree_grafting.cpp
@@ -258,6 +258,7 @@
       break;
    case ir_txf:
    case ir_txl:
+   case ir_txs:
       if (do_graft(&ir->lod_info.lod))
 	 return visit_stop;
       break;
diff --git a/src/glsl/program.h b/src/glsl/program.h
index db602fa..437ca14 100644
--- a/src/glsl/program.h
+++ b/src/glsl/program.h
@@ -25,3 +25,11 @@
 
 extern void
 link_shaders(struct gl_context *ctx, struct gl_shader_program *prog);
+
+extern void
+linker_error(gl_shader_program *prog, const char *fmt, ...)
+   PRINTFLIKE(2, 3);
+
+extern void
+linker_warning(gl_shader_program *prog, const char *fmt, ...)
+   PRINTFLIKE(2, 3);
diff --git a/src/glsl/ralloc.c b/src/glsl/ralloc.c
index 6a5eac6..fb48a91 100644
--- a/src/glsl/ralloc.c
+++ b/src/glsl/ralloc.c
@@ -28,6 +28,11 @@
 #include <string.h>
 #include <stdint.h>
 
+/* Android defines SIZE_MAX in limits.h, instead of the standard stdint.h */
+#ifdef ANDROID
+#include <limits.h>
+#endif
+
 #include "ralloc.h"
 
 #ifdef __GNUC__
diff --git a/src/glsl/s_expression.cpp b/src/glsl/s_expression.cpp
index a922a50..e704a3b 100644
--- a/src/glsl/s_expression.cpp
+++ b/src/glsl/s_expression.cpp
@@ -25,10 +25,13 @@
 #include <assert.h>
 #include "s_expression.h"
 
-s_symbol::s_symbol(const char *tmp, size_t n)
+s_symbol::s_symbol(const char *str, size_t n)
 {
-   this->str = ralloc_strndup (this, tmp, n);
-   assert(this->str != NULL);
+   /* Assume the given string is already nul-terminated and in memory that
+    * will live as long as this node.
+    */
+   assert(str[n] == '\0');
+   this->str = str;
 }
 
 s_list::s_list()
@@ -36,22 +39,26 @@
 }
 
 static void
-skip_whitespace(const char *& src)
+skip_whitespace(const char *&src, char *&symbol_buffer)
 {
-   src += strspn(src, " \v\t\r\n");
+   size_t n = strspn(src, " \v\t\r\n");
+   src += n;
+   symbol_buffer += n;
    /* Also skip Scheme-style comments: semi-colon 'til end of line */
    if (src[0] == ';') {
-      src += strcspn(src, "\n");
-      skip_whitespace(src);
+      n = strcspn(src, "\n");
+      src += n;
+      symbol_buffer += n;
+      skip_whitespace(src, symbol_buffer);
    }
 }
 
 static s_expression *
-read_atom(void *ctx, const char *& src)
+read_atom(void *ctx, const char *&src, char *&symbol_buffer)
 {
    s_expression *expr = NULL;
 
-   skip_whitespace(src);
+   skip_whitespace(src, symbol_buffer);
 
    size_t n = strcspn(src, "( \v\t\r\n);");
    if (n == 0)
@@ -70,42 +77,63 @@
 	 expr = new(ctx) s_int(i);
    } else {
       // Not a number; return a symbol.
-      expr = new(ctx) s_symbol(src, n);
+      symbol_buffer[n] = '\0';
+      expr = new(ctx) s_symbol(symbol_buffer, n);
    }
 
    src += n;
+   symbol_buffer += n;
 
    return expr;
 }
 
+static s_expression *
+__read_expression(void *ctx, const char *&src, char *&symbol_buffer)
+{
+   s_expression *atom = read_atom(ctx, src, symbol_buffer);
+   if (atom != NULL)
+      return atom;
+
+   skip_whitespace(src, symbol_buffer);
+   if (src[0] == '(') {
+      ++src;
+      ++symbol_buffer;
+
+      s_list *list = new(ctx) s_list;
+      s_expression *expr;
+
+      while ((expr = __read_expression(ctx, src, symbol_buffer)) != NULL) {
+	 list->subexpressions.push_tail(expr);
+      }
+      skip_whitespace(src, symbol_buffer);
+      if (src[0] != ')') {
+	 printf("Unclosed expression (check your parenthesis).\n");
+	 return NULL;
+      }
+      ++src;
+      ++symbol_buffer;
+      return list;
+   }
+   return NULL;
+}
+
 s_expression *
 s_expression::read_expression(void *ctx, const char *&src)
 {
    assert(src != NULL);
 
-   s_expression *atom = read_atom(ctx, src);
-   if (atom != NULL)
-      return atom;
-
-   skip_whitespace(src);
-   if (src[0] == '(') {
-      ++src;
-
-      s_list *list = new(ctx) s_list;
-      s_expression *expr;
-
-      while ((expr = read_expression(ctx, src)) != NULL) {
-	 list->subexpressions.push_tail(expr);
-      }
-      skip_whitespace(src);
-      if (src[0] != ')') {
-	 printf("Unclosed expression (check your parenthesis).\n");
-	 return NULL;
-      }
-      ++src;
-      return list;
-   }
-   return NULL;
+   /* When we encounter a Symbol, we need to save a nul-terminated copy of
+    * the string.  However, ralloc_strndup'ing every individual Symbol is
+    * extremely expensive.  We could avoid this by simply overwriting the
+    * next character (guaranteed to be whitespace, parens, or semicolon) with
+    * a nul-byte.  But overwriting non-whitespace would mess up parsing.
+    *
+    * So, just copy the whole buffer ahead of time.  Walk both, leaving the
+    * original source string unmodified, and altering the copy to contain the
+    * necessary nul-bytes whenever we encounter a symbol.
+    */
+   char *symbol_buffer = ralloc_strdup(ctx, src);
+   return __read_expression(ctx, src, symbol_buffer);
 }
 
 void s_int::print()
diff --git a/src/glsl/s_expression.h b/src/glsl/s_expression.h
index c9dc676..642af19 100644
--- a/src/glsl/s_expression.h
+++ b/src/glsl/s_expression.h
@@ -129,7 +129,7 @@
    void print();
 
 private:
-   char *str;
+   const char *str;
 };
 
 /* Lists of expressions: (expr1 ... exprN) */
diff --git a/src/glsl/standalone_scaffolding.cpp b/src/glsl/standalone_scaffolding.cpp
new file mode 100644
index 0000000..bbd7bb9
--- /dev/null
+++ b/src/glsl/standalone_scaffolding.cpp
@@ -0,0 +1,93 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/* This file declares stripped-down versions of functions that
+ * normally exist outside of the glsl folder, so that they can be used
+ * when running the GLSL compiler standalone (for unit testing or
+ * compiling builtins).
+ */
+
+#include "standalone_scaffolding.h"
+
+#include <assert.h>
+#include <string.h>
+#include "ralloc.h"
+
+void
+_mesa_reference_shader(struct gl_context *ctx, struct gl_shader **ptr,
+                       struct gl_shader *sh)
+{
+   *ptr = sh;
+}
+
+struct gl_shader *
+_mesa_new_shader(struct gl_context *ctx, GLuint name, GLenum type)
+{
+   struct gl_shader *shader;
+
+   (void) ctx;
+
+   assert(type == GL_FRAGMENT_SHADER || type == GL_VERTEX_SHADER);
+   shader = rzalloc(NULL, struct gl_shader);
+   if (shader) {
+      shader->Type = type;
+      shader->Name = name;
+      shader->RefCount = 1;
+   }
+   return shader;
+}
+
+void initialize_context_to_defaults(struct gl_context *ctx, gl_api api)
+{
+   memset(ctx, 0, sizeof(*ctx));
+
+   ctx->API = api;
+
+   ctx->Extensions.dummy_false = false;
+   ctx->Extensions.dummy_true = true;
+   ctx->Extensions.ARB_ES2_compatibility = true;
+   ctx->Extensions.ARB_draw_buffers = true;
+   ctx->Extensions.ARB_draw_instanced = true;
+   ctx->Extensions.ARB_fragment_coord_conventions = true;
+   ctx->Extensions.EXT_texture_array = true;
+   ctx->Extensions.NV_texture_rectangle = true;
+   ctx->Extensions.EXT_texture3D = true;
+
+   ctx->Const.GLSLVersion = 120;
+
+   /* 1.20 minimums. */
+   ctx->Const.MaxLights = 8;
+   ctx->Const.MaxClipPlanes = 6;
+   ctx->Const.MaxTextureUnits = 2;
+   ctx->Const.MaxTextureCoordUnits = 2;
+   ctx->Const.VertexProgram.MaxAttribs = 16;
+
+   ctx->Const.VertexProgram.MaxUniformComponents = 512;
+   ctx->Const.MaxVarying = 8; /* == gl_MaxVaryingFloats / 4 */
+   ctx->Const.MaxVertexTextureImageUnits = 0;
+   ctx->Const.MaxCombinedTextureImageUnits = 2;
+   ctx->Const.MaxTextureImageUnits = 2;
+   ctx->Const.FragmentProgram.MaxUniformComponents = 64;
+
+   ctx->Const.MaxDrawBuffers = 1;
+}
diff --git a/src/glsl/standalone_scaffolding.h b/src/glsl/standalone_scaffolding.h
new file mode 100644
index 0000000..8773320
--- /dev/null
+++ b/src/glsl/standalone_scaffolding.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/* This file declares stripped-down versions of functions that
+ * normally exist outside of the glsl folder, so that they can be used
+ * when running the GLSL compiler standalone (for unit testing or
+ * compiling builtins).
+ */
+
+#pragma once
+#ifndef STANDALONE_SCAFFOLDING_H
+#define STANDALONE_SCAFFOLDING_H
+
+#include "main/mtypes.h"
+
+extern "C" void
+_mesa_reference_shader(struct gl_context *ctx, struct gl_shader **ptr,
+                       struct gl_shader *sh);
+
+extern "C" struct gl_shader *
+_mesa_new_shader(struct gl_context *ctx, GLuint name, GLenum type);
+
+/**
+ * Initialize the given gl_context structure to a reasonable set of
+ * defaults representing the minimum capabilities required by the
+ * OpenGL spec.
+ *
+ * This is used when compiling builtin functions and in testing, when
+ * we don't have a connection to an actual driver.
+ */
+void initialize_context_to_defaults(struct gl_context *ctx, gl_api api);
+
+
+#endif /* STANDALONE_SCAFFOLDING_H */
diff --git a/src/glsl/test.cpp b/src/glsl/test.cpp
new file mode 100644
index 0000000..b1ff92e
--- /dev/null
+++ b/src/glsl/test.cpp
@@ -0,0 +1,78 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file test.cpp
+ *
+ * Standalone tests for the GLSL compiler.
+ *
+ * This file provides a standalone executable which can be used to
+ * test components of the GLSL.
+ *
+ * Each test is a function with the same signature as main().  The
+ * main function interprets its first argument as the name of the test
+ * to run, strips out that argument, and then calls the test function.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "test_optpass.h"
+
+/**
+ * Print proper usage and exit with failure.
+ */
+static void
+usage_fail(const char *name)
+{
+   printf("*** usage: %s <command> <options>\n", name);
+   printf("\n");
+   printf("Possible commands are:\n");
+   printf("  optpass: test an optimization pass in isolation\n");
+   exit(EXIT_FAILURE);
+}
+
+static const char *extract_command_from_argv(int *argc, char **argv)
+{
+   if (*argc < 2) {
+      usage_fail(argv[0]);
+   }
+   const char *command = argv[1];
+   --*argc;
+   memmove(&argv[1], &argv[2], (*argc) * sizeof(argv[1]));
+   return command;
+}
+
+int main(int argc, char **argv)
+{
+   const char *command = extract_command_from_argv(&argc, argv);
+   if (strcmp(command, "optpass") == 0) {
+      return test_optpass(argc, argv);
+   } else {
+      usage_fail(argv[0]);
+   }
+
+   /* Execution should never reach here. */
+   return EXIT_FAILURE;
+}
diff --git a/src/glsl/test_optpass.cpp b/src/glsl/test_optpass.cpp
new file mode 100644
index 0000000..89b7f83
--- /dev/null
+++ b/src/glsl/test_optpass.cpp
@@ -0,0 +1,273 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file test_optpass.cpp
+ *
+ * Standalone test for optimization passes.
+ *
+ * This file provides the "optpass" command for the standalone
+ * glsl_test app.  It accepts either GLSL or high-level IR as input,
+ * and performs the optimiation passes specified on the command line.
+ * It outputs the IR, both before and after optimiations.
+ */
+
+#include <string>
+#include <iostream>
+#include <sstream>
+#include <getopt.h>
+
+#include "ast.h"
+#include "ir_optimization.h"
+#include "ir_print_visitor.h"
+#include "program.h"
+#include "ir_reader.h"
+#include "standalone_scaffolding.h"
+
+using namespace std;
+
+static string read_stdin_to_eof()
+{
+   stringbuf sb;
+   cin.get(sb, '\0');
+   return sb.str();
+}
+
+static GLboolean
+do_optimization(struct exec_list *ir, const char *optimization)
+{
+   int int_0;
+   int int_1;
+   int int_2;
+   int int_3;
+   int int_4;
+
+   if (sscanf(optimization, "do_common_optimization ( %d , %d ) ",
+              &int_0, &int_1) == 2) {
+      return do_common_optimization(ir, int_0 != 0, int_1);
+   } else if (strcmp(optimization, "do_algebraic") == 0) {
+      return do_algebraic(ir);
+   } else if (strcmp(optimization, "do_constant_folding") == 0) {
+      return do_constant_folding(ir);
+   } else if (strcmp(optimization, "do_constant_variable") == 0) {
+      return do_constant_variable(ir);
+   } else if (strcmp(optimization, "do_constant_variable_unlinked") == 0) {
+      return do_constant_variable_unlinked(ir);
+   } else if (strcmp(optimization, "do_copy_propagation") == 0) {
+      return do_copy_propagation(ir);
+   } else if (strcmp(optimization, "do_copy_propagation_elements") == 0) {
+      return do_copy_propagation_elements(ir);
+   } else if (strcmp(optimization, "do_constant_propagation") == 0) {
+      return do_constant_propagation(ir);
+   } else if (strcmp(optimization, "do_dead_code") == 0) {
+      return do_dead_code(ir);
+   } else if (strcmp(optimization, "do_dead_code_local") == 0) {
+      return do_dead_code_local(ir);
+   } else if (strcmp(optimization, "do_dead_code_unlinked") == 0) {
+      return do_dead_code_unlinked(ir);
+   } else if (strcmp(optimization, "do_dead_functions") == 0) {
+      return do_dead_functions(ir);
+   } else if (strcmp(optimization, "do_function_inlining") == 0) {
+      return do_function_inlining(ir);
+   } else if (sscanf(optimization,
+                     "do_lower_jumps ( %d , %d , %d , %d , %d ) ",
+                     &int_0, &int_1, &int_2, &int_3, &int_4) == 5) {
+      return do_lower_jumps(ir, int_0 != 0, int_1 != 0, int_2 != 0,
+                            int_3 != 0, int_4 != 0);
+   } else if (strcmp(optimization, "do_lower_texture_projection") == 0) {
+      return do_lower_texture_projection(ir);
+   } else if (strcmp(optimization, "do_if_simplification") == 0) {
+      return do_if_simplification(ir);
+   } else if (strcmp(optimization, "do_discard_simplification") == 0) {
+      return do_discard_simplification(ir);
+   } else if (sscanf(optimization, "lower_if_to_cond_assign ( %d ) ",
+                     &int_0) == 1) {
+      return lower_if_to_cond_assign(ir, int_0);
+   } else if (strcmp(optimization, "do_mat_op_to_vec") == 0) {
+      return do_mat_op_to_vec(ir);
+   } else if (strcmp(optimization, "do_noop_swizzle") == 0) {
+      return do_noop_swizzle(ir);
+   } else if (strcmp(optimization, "do_structure_splitting") == 0) {
+      return do_structure_splitting(ir);
+   } else if (strcmp(optimization, "do_swizzle_swizzle") == 0) {
+      return do_swizzle_swizzle(ir);
+   } else if (strcmp(optimization, "do_tree_grafting") == 0) {
+      return do_tree_grafting(ir);
+   } else if (strcmp(optimization, "do_vec_index_to_cond_assign") == 0) {
+      return do_vec_index_to_cond_assign(ir);
+   } else if (strcmp(optimization, "do_vec_index_to_swizzle") == 0) {
+      return do_vec_index_to_swizzle(ir);
+   } else if (strcmp(optimization, "lower_discard") == 0) {
+      return lower_discard(ir);
+   } else if (sscanf(optimization, "lower_instructions ( %d ) ",
+                     &int_0) == 1) {
+      return lower_instructions(ir, int_0);
+   } else if (strcmp(optimization, "lower_noise") == 0) {
+      return lower_noise(ir);
+   } else if (sscanf(optimization, "lower_variable_index_to_cond_assign "
+                     "( %d , %d , %d , %d ) ", &int_0, &int_1, &int_2,
+                     &int_3) == 4) {
+      return lower_variable_index_to_cond_assign(ir, int_0 != 0, int_1 != 0,
+                                                 int_2 != 0, int_3 != 0);
+   } else if (sscanf(optimization, "lower_quadop_vector ( %d ) ",
+                     &int_0) == 1) {
+      return lower_quadop_vector(ir, int_0 != 0);
+   } else if (strcmp(optimization, "optimize_redundant_jumps") == 0) {
+      return optimize_redundant_jumps(ir);
+   } else {
+      printf("Unrecognized optimization %s\n", optimization);
+      exit(EXIT_FAILURE);
+      return false;
+   }
+}
+
+static GLboolean
+do_optimization_passes(struct exec_list *ir, char **optimizations,
+                       int num_optimizations, bool quiet)
+{
+   GLboolean overall_progress = false;
+
+   for (int i = 0; i < num_optimizations; ++i) {
+      const char *optimization = optimizations[i];
+      if (!quiet) {
+         printf("*** Running optimization %s...", optimization);
+      }
+      GLboolean progress = do_optimization(ir, optimization);
+      if (!quiet) {
+         printf("%s\n", progress ? "progress" : "no progress");
+      }
+      validate_ir_tree(ir);
+
+      overall_progress = overall_progress || progress;
+   }
+
+   return overall_progress;
+}
+
+int test_optpass(int argc, char **argv)
+{
+   int input_format_ir = 0; /* 0=glsl, 1=ir */
+   int loop = 0;
+   int shader_type = GL_VERTEX_SHADER;
+   int quiet = 0;
+
+   const struct option optpass_opts[] = {
+      { "input-ir", no_argument, &input_format_ir, 1 },
+      { "input-glsl", no_argument, &input_format_ir, 0 },
+      { "loop", no_argument, &loop, 1 },
+      { "vertex-shader", no_argument, &shader_type, GL_VERTEX_SHADER },
+      { "fragment-shader", no_argument, &shader_type, GL_FRAGMENT_SHADER },
+      { "quiet", no_argument, &quiet, 1 },
+      { NULL, 0, NULL, 0 }
+   };
+
+   int idx = 0;
+   int c;
+   while ((c = getopt_long(argc, argv, "", optpass_opts, &idx)) != -1) {
+      if (c != 0) {
+         printf("*** usage: %s optpass <optimizations> <options>\n", argv[0]);
+         printf("\n");
+         printf("Possible options are:\n");
+         printf("  --input-ir: input format is IR\n");
+         printf("  --input-glsl: input format is GLSL (the default)\n");
+         printf("  --loop: run optimizations repeatedly until no progress\n");
+         printf("  --vertex-shader: test with a vertex shader (the default)\n");
+         printf("  --fragment-shader: test with a fragment shader\n");
+         exit(EXIT_FAILURE);
+      }
+   }
+
+   struct gl_context local_ctx;
+   struct gl_context *ctx = &local_ctx;
+   initialize_context_to_defaults(ctx, API_OPENGL);
+
+   ctx->Driver.NewShader = _mesa_new_shader;
+
+   struct gl_shader *shader = rzalloc(NULL, struct gl_shader);
+   shader->Type = shader_type;
+
+   string input = read_stdin_to_eof();
+
+   struct _mesa_glsl_parse_state *state
+      = new(shader) _mesa_glsl_parse_state(ctx, shader->Type, shader);
+
+   if (input_format_ir) {
+      shader->ir = new(shader) exec_list;
+      _mesa_glsl_initialize_types(state);
+      _mesa_glsl_read_ir(state, shader->ir, input.c_str(), true);
+   } else {
+      shader->Source = input.c_str();
+      const char *source = shader->Source;
+      state->error = preprocess(state, &source, &state->info_log,
+                                state->extensions, ctx->API) != 0;
+
+      if (!state->error) {
+         _mesa_glsl_lexer_ctor(state, source);
+         _mesa_glsl_parse(state);
+         _mesa_glsl_lexer_dtor(state);
+      }
+
+      shader->ir = new(shader) exec_list;
+      if (!state->error && !state->translation_unit.is_empty())
+         _mesa_ast_to_hir(shader->ir, state);
+   }
+
+   /* Print out the initial IR */
+   if (!state->error && !quiet) {
+      printf("*** pre-optimization IR:\n");
+      _mesa_print_ir(shader->ir, state);
+      printf("\n--\n");
+   }
+
+   /* Optimization passes */
+   if (!state->error) {
+      GLboolean progress;
+      do {
+         progress = do_optimization_passes(shader->ir, &argv[optind],
+                                           argc - optind, quiet != 0);
+      } while (loop && progress);
+   }
+
+   /* Print out the resulting IR */
+   if (!state->error) {
+      if (!quiet) {
+         printf("*** resulting IR:\n");
+      }
+      _mesa_print_ir(shader->ir, state);
+      if (!quiet) {
+         printf("\n--\n");
+      }
+   }
+
+   if (state->error) {
+      printf("*** error(s) occurred:\n");
+      printf("%s\n", state->info_log);
+      printf("--\n");
+   }
+
+   ralloc_free(state);
+   ralloc_free(shader);
+
+   return state->error;
+}
+
diff --git a/src/glsl/test_optpass.h b/src/glsl/test_optpass.h
new file mode 100644
index 0000000..923ccf3
--- /dev/null
+++ b/src/glsl/test_optpass.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#pragma once
+#ifndef TEST_OPTPASS_H
+#define TEST_OPTPASS_H
+
+int test_optpass(int argc, char **argv);
+
+#endif /* TEST_OPTPASS_H */
diff --git a/src/glsl/tests/compare_ir b/src/glsl/tests/compare_ir
new file mode 100755
index 0000000..a40fc81
--- /dev/null
+++ b/src/glsl/tests/compare_ir
@@ -0,0 +1,59 @@
+#!/usr/bin/env python
+# coding=utf-8
+#
+# Copyright © 2011 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# Compare two files containing IR code.  Ignore formatting differences
+# and declaration order.
+
+import os
+import os.path
+import subprocess
+import sys
+import tempfile
+
+from sexps import *
+
+if len(sys.argv) != 3:
+    print 'Usage: compare_ir <file1> <file2>'
+    exit(1)
+
+with open(sys.argv[1]) as f:
+    ir1 = sort_decls(parse_sexp(f.read()))
+with open(sys.argv[2]) as f:
+    ir2 = sort_decls(parse_sexp(f.read()))
+
+if ir1 == ir2:
+    exit(0)
+else:
+    file1, path1 = tempfile.mkstemp(os.path.basename(sys.argv[1]))
+    file2, path2 = tempfile.mkstemp(os.path.basename(sys.argv[2]))
+    try:
+        os.write(file1, '{0}\n'.format(sexp_to_string(ir1)))
+        os.close(file1)
+        os.write(file2, '{0}\n'.format(sexp_to_string(ir2)))
+        os.close(file2)
+        subprocess.call(['diff', '-u', path1, path2])
+    finally:
+        os.remove(path1)
+        os.remove(path2)
+    exit(1)
diff --git a/src/glsl/tests/lower_jumps/.gitignore b/src/glsl/tests/lower_jumps/.gitignore
new file mode 100644
index 0000000..f47cb20
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/.gitignore
@@ -0,0 +1 @@
+*.out
diff --git a/src/glsl/tests/lower_jumps/create_test_cases.py b/src/glsl/tests/lower_jumps/create_test_cases.py
new file mode 100644
index 0000000..fbc6f0a
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/create_test_cases.py
@@ -0,0 +1,643 @@
+# coding=utf-8
+#
+# Copyright © 2011 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+import os
+import os.path
+import re
+import subprocess
+import sys
+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) # For access to sexps.py, which is in parent dir
+from sexps import *
+
+def make_test_case(f_name, ret_type, body):
+    """Create a simple optimization test case consisting of a single
+    function with the given name, return type, and body.
+
+    Global declarations are automatically created for any undeclared
+    variables that are referenced by the function.  All undeclared
+    variables are assumed to be floats.
+    """
+    check_sexp(body)
+    declarations = {}
+    def make_declarations(sexp, already_declared = ()):
+        if isinstance(sexp, list):
+            if len(sexp) == 2 and sexp[0] == 'var_ref':
+                if sexp[1] not in already_declared:
+                    declarations[sexp[1]] = [
+                        'declare', ['in'], 'float', sexp[1]]
+            elif len(sexp) == 4 and sexp[0] == 'assign':
+                assert sexp[2][0] == 'var_ref'
+                if sexp[2][1] not in already_declared:
+                    declarations[sexp[2][1]] = [
+                        'declare', ['out'], 'float', sexp[2][1]]
+                make_declarations(sexp[3], already_declared)
+            else:
+                already_declared = set(already_declared)
+                for s in sexp:
+                    if isinstance(s, list) and len(s) >= 4 and \
+                            s[0] == 'declare':
+                        already_declared.add(s[3])
+                    else:
+                        make_declarations(s, already_declared)
+    make_declarations(body)
+    return declarations.values() + \
+        [['function', f_name, ['signature', ret_type, ['parameters'], body]]]
+
+
+# The following functions can be used to build expressions.
+
+def const_float(value):
+    """Create an expression representing the given floating point value."""
+    return ['constant', 'float', ['{0:.6f}'.format(value)]]
+
+def const_bool(value):
+    """Create an expression representing the given boolean value.
+
+    If value is not a boolean, it is converted to a boolean.  So, for
+    instance, const_bool(1) is equivalent to const_bool(True).
+    """
+    return ['constant', 'bool', ['{0}'.format(1 if value else 0)]]
+
+def gt_zero(var_name):
+    """Create Construct the expression var_name > 0"""
+    return ['expression', 'bool', '>', ['var_ref', var_name], const_float(0)]
+
+
+# The following functions can be used to build complex control flow
+# statements.  All of these functions return statement lists (even
+# those which only create a single statement), so that statements can
+# be sequenced together using the '+' operator.
+
+def return_(value = None):
+    """Create a return statement."""
+    if value is not None:
+        return [['return', value]]
+    else:
+        return [['return']]
+
+def break_():
+    """Create a break statement."""
+    return ['break']
+
+def continue_():
+    """Create a continue statement."""
+    return ['continue']
+
+def simple_if(var_name, then_statements, else_statements = None):
+    """Create a statement of the form
+
+    if (var_name > 0.0) {
+       <then_statements>
+    } else {
+       <else_statements>
+    }
+
+    else_statements may be omitted.
+    """
+    if else_statements is None:
+        else_statements = []
+    check_sexp(then_statements)
+    check_sexp(else_statements)
+    return [['if', gt_zero(var_name), then_statements, else_statements]]
+
+def loop(statements):
+    """Create a loop containing the given statements as its loop
+    body.
+    """
+    check_sexp(statements)
+    return [['loop', [], [], [], [], statements]]
+
+def declare_temp(var_type, var_name):
+    """Create a declaration of the form
+
+    (declare (temporary) <var_type> <var_name)
+    """
+    return [['declare', ['temporary'], var_type, var_name]]
+
+def assign_x(var_name, value):
+    """Create a statement that assigns <value> to the variable
+    <var_name>.  The assignment uses the mask (x).
+    """
+    check_sexp(value)
+    return [['assign', ['x'], ['var_ref', var_name], value]]
+
+def complex_if(var_prefix, statements):
+    """Create a statement of the form
+
+    if (<var_prefix>a > 0.0) {
+       if (<var_prefix>b > 0.0) {
+          <statements>
+       }
+    }
+
+    This is useful in testing jump lowering, because if <statements>
+    ends in a jump, lower_jumps.cpp won't try to combine this
+    construct with the code that follows it, as it might do for a
+    simple if.
+
+    All variables used in the if statement are prefixed with
+    var_prefix.  This can be used to ensure uniqueness.
+    """
+    check_sexp(statements)
+    return simple_if(var_prefix + 'a', simple_if(var_prefix + 'b', statements))
+
+def declare_execute_flag():
+    """Create the statements that lower_jumps.cpp uses to declare and
+    initialize the temporary boolean execute_flag.
+    """
+    return declare_temp('bool', 'execute_flag') + \
+        assign_x('execute_flag', const_bool(True))
+
+def declare_return_flag():
+    """Create the statements that lower_jumps.cpp uses to declare and
+    initialize the temporary boolean return_flag.
+    """
+    return declare_temp('bool', 'return_flag') + \
+        assign_x('return_flag', const_bool(False))
+
+def declare_return_value():
+    """Create the statements that lower_jumps.cpp uses to declare and
+    initialize the temporary variable return_value.  Assume that
+    return_value is a float.
+    """
+    return declare_temp('float', 'return_value')
+
+def declare_break_flag():
+    """Create the statements that lower_jumps.cpp uses to declare and
+    initialize the temporary boolean break_flag.
+    """
+    return declare_temp('bool', 'break_flag') + \
+        assign_x('break_flag', const_bool(False))
+
+def lowered_return_simple(value = None):
+    """Create the statements that lower_jumps.cpp lowers a return
+    statement to, in situations where it does not need to clear the
+    execute flag.
+    """
+    if value:
+        result = assign_x('return_value', value)
+    else:
+        result = []
+    return result + assign_x('return_flag', const_bool(True))
+
+def lowered_return(value = None):
+    """Create the statements that lower_jumps.cpp lowers a return
+    statement to, in situations where it needs to clear the execute
+    flag.
+    """
+    return lowered_return_simple(value) + \
+        assign_x('execute_flag', const_bool(False))
+
+def lowered_continue():
+    """Create the statement that lower_jumps.cpp lowers a continue
+    statement to.
+    """
+    return assign_x('execute_flag', const_bool(False))
+
+def lowered_break_simple():
+    """Create the statement that lower_jumps.cpp lowers a break
+    statement to, in situations where it does not need to clear the
+    execute flag.
+    """
+    return assign_x('break_flag', const_bool(True))
+
+def lowered_break():
+    """Create the statement that lower_jumps.cpp lowers a break
+    statement to, in situations where it needs to clear the execute
+    flag.
+    """
+    return lowered_break_simple() + assign_x('execute_flag', const_bool(False))
+
+def if_execute_flag(statements):
+    """Wrap statements in an if test so that they will only execute if
+    execute_flag is True.
+    """
+    check_sexp(statements)
+    return [['if', ['var_ref', 'execute_flag'], statements, []]]
+
+def if_not_return_flag(statements):
+    """Wrap statements in an if test so that they will only execute if
+    return_flag is False.
+    """
+    check_sexp(statements)
+    return [['if', ['var_ref', 'return_flag'], [], statements]]
+
+def final_return():
+    """Create the return statement that lower_jumps.cpp places at the
+    end of a function when lowering returns.
+    """
+    return [['return', ['var_ref', 'return_value']]]
+
+def final_break():
+    """Create the conditional break statement that lower_jumps.cpp
+    places at the end of a function when lowering breaks.
+    """
+    return [['if', ['var_ref', 'break_flag'], break_(), []]]
+
+def bash_quote(*args):
+    """Quote the arguments appropriately so that bash will understand
+    each argument as a single word.
+    """
+    def quote_word(word):
+        for c in word:
+            if not (c.isalpha() or c.isdigit() or c in '@%_-+=:,./'):
+                break
+        else:
+            if not word:
+                return "''"
+            return word
+        return "'{0}'".format(word.replace("'", "'\"'\"'"))
+    return ' '.join(quote_word(word) for word in args)
+
+def create_test_case(doc_string, input_sexp, expected_sexp, test_name,
+                     pull_out_jumps=False, lower_sub_return=False,
+                     lower_main_return=False, lower_continue=False,
+                     lower_break=False):
+    """Create a test case that verifies that do_lower_jumps transforms
+    the given code in the expected way.
+    """
+    doc_lines = [line.strip() for line in doc_string.splitlines()]
+    doc_string = ''.join('# {0}\n'.format(line) for line in doc_lines if line != '')
+    check_sexp(input_sexp)
+    check_sexp(expected_sexp)
+    input_str = sexp_to_string(sort_decls(input_sexp))
+    expected_output = sexp_to_string(sort_decls(expected_sexp))
+
+    optimization = (
+        'do_lower_jumps({0:d}, {1:d}, {2:d}, {3:d}, {4:d})'.format(
+            pull_out_jumps, lower_sub_return, lower_main_return,
+            lower_continue, lower_break))
+    args = ['../../glsl_test', 'optpass', '--quiet', '--input-ir', optimization]
+    test_file = '{0}.opt_test'.format(test_name)
+    with open(test_file, 'w') as f:
+        f.write('#!/bin/bash\n#\n# This file was generated by create_test_cases.py.\n#\n')
+        f.write(doc_string)
+        f.write('{0} <<EOF\n'.format(bash_quote(*args)))
+        f.write('{0}\nEOF\n'.format(input_str))
+    os.chmod(test_file, 0774)
+    expected_file = '{0}.opt_test.expected'.format(test_name)
+    with open(expected_file, 'w') as f:
+        f.write('{0}\n'.format(expected_output))
+
+def test_lower_returns_main():
+    doc_string = """Test that do_lower_jumps respects the lower_main_return
+    flag in deciding whether to lower returns in the main
+    function.
+    """
+    input_sexp = make_test_case('main', 'void', (
+            complex_if('', return_())
+            ))
+    expected_sexp = make_test_case('main', 'void', (
+            declare_execute_flag() +
+            declare_return_flag() +
+            complex_if('', lowered_return())
+            ))
+    create_test_case(doc_string, input_sexp, expected_sexp, 'lower_returns_main_true',
+                     lower_main_return=True)
+    create_test_case(doc_string, input_sexp, input_sexp, 'lower_returns_main_false',
+                     lower_main_return=False)
+
+def test_lower_returns_sub():
+    doc_string = """Test that do_lower_jumps respects the lower_sub_return flag
+    in deciding whether to lower returns in subroutines.
+    """
+    input_sexp = make_test_case('sub', 'void', (
+            complex_if('', return_())
+            ))
+    expected_sexp = make_test_case('sub', 'void', (
+            declare_execute_flag() +
+            declare_return_flag() +
+            complex_if('', lowered_return())
+            ))
+    create_test_case(doc_string, input_sexp, expected_sexp, 'lower_returns_sub_true',
+                     lower_sub_return=True)
+    create_test_case(doc_string, input_sexp, input_sexp, 'lower_returns_sub_false',
+                     lower_sub_return=False)
+
+def test_lower_returns_1():
+    doc_string = """Test that a void return at the end of a function is
+    eliminated.
+    """
+    input_sexp = make_test_case('main', 'void', (
+            assign_x('a', const_float(1)) +
+            return_()
+            ))
+    expected_sexp = make_test_case('main', 'void', (
+            assign_x('a', const_float(1))
+            ))
+    create_test_case(doc_string, input_sexp, expected_sexp, 'lower_returns_1',
+                     lower_main_return=True)
+
+def test_lower_returns_2():
+    doc_string = """Test that lowering is not performed on a non-void return at
+    the end of subroutine.
+    """
+    input_sexp = make_test_case('sub', 'float', (
+            assign_x('a', const_float(1)) +
+            return_(const_float(1))
+            ))
+    create_test_case(doc_string, input_sexp, input_sexp, 'lower_returns_2',
+                     lower_sub_return=True)
+
+def test_lower_returns_3():
+    doc_string = """Test lowering of returns when there is one nested inside a
+    complex structure of ifs, and one at the end of a function.
+
+    In this case, the latter return needs to be lowered because it
+    will not be at the end of the function once the final return
+    is inserted.
+    """
+    input_sexp = make_test_case('sub', 'float', (
+            complex_if('', return_(const_float(1))) +
+            return_(const_float(2))
+            ))
+    expected_sexp = make_test_case('sub', 'float', (
+            declare_execute_flag() +
+            declare_return_value() +
+            declare_return_flag() +
+            complex_if('', lowered_return(const_float(1))) +
+            if_execute_flag(lowered_return(const_float(2))) +
+            final_return()
+            ))
+    create_test_case(doc_string, input_sexp, expected_sexp, 'lower_returns_3',
+                     lower_sub_return=True)
+
+def test_lower_returns_4():
+    doc_string = """Test that returns are properly lowered when they occur in
+    both branches of an if-statement.
+    """
+    input_sexp = make_test_case('sub', 'float', (
+            simple_if('a', return_(const_float(1)),
+                      return_(const_float(2)))
+            ))
+    expected_sexp = make_test_case('sub', 'float', (
+            declare_execute_flag() +
+            declare_return_value() +
+            declare_return_flag() +
+            simple_if('a', lowered_return(const_float(1)),
+                      lowered_return(const_float(2))) +
+            final_return()
+            ))
+    create_test_case(doc_string, input_sexp, expected_sexp, 'lower_returns_4',
+                     lower_sub_return=True)
+
+def test_lower_unified_returns():
+    doc_string = """If both branches of an if statement end in a return, and
+    pull_out_jumps is True, then those returns should be lifted
+    outside the if and then properly lowered.
+
+    Verify that this lowering occurs during the same pass as the
+    lowering of other returns by checking that extra temporary
+    variables aren't generated.
+    """
+    input_sexp = make_test_case('main', 'void', (
+            complex_if('a', return_()) +
+            simple_if('b', simple_if('c', return_(), return_()))
+            ))
+    expected_sexp = make_test_case('main', 'void', (
+            declare_execute_flag() +
+            declare_return_flag() +
+            complex_if('a', lowered_return()) +
+            if_execute_flag(simple_if('b', (simple_if('c', [], []) +
+                                            lowered_return())))
+            ))
+    create_test_case(doc_string, input_sexp, expected_sexp, 'lower_unified_returns',
+                     lower_main_return=True, pull_out_jumps=True)
+
+def test_lower_pulled_out_jump():
+    doc_string = """If one branch of an if ends in a jump, and control cannot
+    fall out the bottom of the other branch, and pull_out_jumps is
+    True, then the jump is lifted outside the if.
+
+    Verify that this lowering occurs during the same pass as the
+    lowering of other jumps by checking that extra temporary
+    variables aren't generated.
+    """
+    input_sexp = make_test_case('main', 'void', (
+            complex_if('a', return_()) +
+            loop(simple_if('b', simple_if('c', break_(), continue_()),
+                           return_())) +
+            assign_x('d', const_float(1))
+            ))
+    # Note: optimization produces two other effects: the break
+    # gets lifted out of the if statements, and the code after the
+    # loop gets guarded so that it only executes if the return
+    # flag is clear.
+    expected_sexp = make_test_case('main', 'void', (
+            declare_execute_flag() +
+            declare_return_flag() +
+            complex_if('a', lowered_return()) +
+            if_execute_flag(
+                loop(simple_if('b', simple_if('c', [], continue_()),
+                               lowered_return_simple()) +
+                     break_()) +
+                if_not_return_flag(assign_x('d', const_float(1))))
+            ))
+    create_test_case(doc_string, input_sexp, expected_sexp, 'lower_pulled_out_jump',
+                     lower_main_return=True, pull_out_jumps=True)
+
+def test_lower_breaks_1():
+    doc_string = """If a loop contains an unconditional break at the bottom of
+    it, it should not be lowered."""
+    input_sexp = make_test_case('main', 'void', (
+            loop(assign_x('a', const_float(1)) +
+                 break_())
+            ))
+    expected_sexp = input_sexp
+    create_test_case(doc_string, input_sexp, expected_sexp, 'lower_breaks_1', lower_break=True)
+
+def test_lower_breaks_2():
+    doc_string = """If a loop contains a conditional break at the bottom of it,
+    it should not be lowered if it is in the then-clause.
+    """
+    input_sexp = make_test_case('main', 'void', (
+            loop(assign_x('a', const_float(1)) +
+                 simple_if('b', break_()))
+            ))
+    expected_sexp = input_sexp
+    create_test_case(doc_string, input_sexp, expected_sexp, 'lower_breaks_2', lower_break=True)
+
+def test_lower_breaks_3():
+    doc_string = """If a loop contains a conditional break at the bottom of it,
+    it should not be lowered if it is in the then-clause, even if
+    there are statements preceding the break.
+    """
+    input_sexp = make_test_case('main', 'void', (
+            loop(assign_x('a', const_float(1)) +
+                 simple_if('b', (assign_x('c', const_float(1)) +
+                                 break_())))
+            ))
+    expected_sexp = input_sexp
+    create_test_case(doc_string, input_sexp, expected_sexp, 'lower_breaks_3', lower_break=True)
+
+def test_lower_breaks_4():
+    doc_string = """If a loop contains a conditional break at the bottom of it,
+    it should not be lowered if it is in the else-clause.
+    """
+    input_sexp = make_test_case('main', 'void', (
+            loop(assign_x('a', const_float(1)) +
+                 simple_if('b', [], break_()))
+            ))
+    expected_sexp = input_sexp
+    create_test_case(doc_string, input_sexp, expected_sexp, 'lower_breaks_4', lower_break=True)
+
+def test_lower_breaks_5():
+    doc_string = """If a loop contains a conditional break at the bottom of it,
+    it should not be lowered if it is in the else-clause, even if
+    there are statements preceding the break.
+    """
+    input_sexp = make_test_case('main', 'void', (
+            loop(assign_x('a', const_float(1)) +
+                 simple_if('b', [], (assign_x('c', const_float(1)) +
+                                     break_())))
+            ))
+    expected_sexp = input_sexp
+    create_test_case(doc_string, input_sexp, expected_sexp, 'lower_breaks_5', lower_break=True)
+
+def test_lower_breaks_6():
+    doc_string = """If a loop contains conditional breaks and continues, and
+    ends in an unconditional break, then the unconditional break
+    needs to be lowered, because it will no longer be at the end
+    of the loop after the final break is added.
+    """
+    input_sexp = make_test_case('main', 'void', (
+            loop(simple_if('a', (complex_if('b', continue_()) +
+                                 complex_if('c', break_()))) +
+                 break_())
+            ))
+    expected_sexp = make_test_case('main', 'void', (
+            declare_break_flag() +
+            loop(declare_execute_flag() +
+                 simple_if(
+                    'a',
+                    (complex_if('b', lowered_continue()) +
+                     if_execute_flag(
+                            complex_if('c', lowered_break())))) +
+                 if_execute_flag(lowered_break_simple()) +
+                 final_break())
+            ))
+    create_test_case(doc_string, input_sexp, expected_sexp, 'lower_breaks_6',
+                     lower_break=True, lower_continue=True)
+
+def test_lower_guarded_conditional_break():
+    doc_string = """Normally a conditional break at the end of a loop isn't
+    lowered, however if the conditional break gets placed inside
+    an if(execute_flag) because of earlier lowering of continues,
+    then the break needs to be lowered.
+    """
+    input_sexp = make_test_case('main', 'void', (
+            loop(complex_if('a', continue_()) +
+                 simple_if('b', break_()))
+            ))
+    expected_sexp = make_test_case('main', 'void', (
+            declare_break_flag() +
+            loop(declare_execute_flag() +
+                 complex_if('a', lowered_continue()) +
+                 if_execute_flag(simple_if('b', lowered_break())) +
+                 final_break())
+            ))
+    create_test_case(doc_string, input_sexp, expected_sexp, 'lower_guarded_conditional_break',
+                     lower_break=True, lower_continue=True)
+
+def test_remove_continue_at_end_of_loop():
+    doc_string = """Test that a redundant continue-statement at the end of a
+    loop is removed.
+    """
+    input_sexp = make_test_case('main', 'void', (
+            loop(assign_x('a', const_float(1)) +
+                 continue_())
+            ))
+    expected_sexp = make_test_case('main', 'void', (
+            loop(assign_x('a', const_float(1)))
+            ))
+    create_test_case(doc_string, input_sexp, expected_sexp, 'remove_continue_at_end_of_loop')
+
+def test_lower_return_void_at_end_of_loop():
+    doc_string = """Test that a return of void at the end of a loop is properly
+    lowered.
+    """
+    input_sexp = make_test_case('main', 'void', (
+            loop(assign_x('a', const_float(1)) +
+                 return_()) +
+            assign_x('b', const_float(2))
+            ))
+    expected_sexp = make_test_case('main', 'void', (
+            declare_return_flag() +
+            loop(assign_x('a', const_float(1)) +
+                 lowered_return_simple() +
+                 break_()) +
+            if_not_return_flag(assign_x('b', const_float(2)))
+            ))
+    create_test_case(doc_string, input_sexp, input_sexp, 'return_void_at_end_of_loop_lower_nothing')
+    create_test_case(doc_string, input_sexp, expected_sexp, 'return_void_at_end_of_loop_lower_return',
+                     lower_main_return=True)
+    create_test_case(doc_string, input_sexp, expected_sexp, 'return_void_at_end_of_loop_lower_return_and_break',
+                     lower_main_return=True, lower_break=True)
+
+def test_lower_return_non_void_at_end_of_loop():
+    doc_string = """Test that a non-void return at the end of a loop is
+    properly lowered.
+    """
+    input_sexp = make_test_case('sub', 'float', (
+            loop(assign_x('a', const_float(1)) +
+                 return_(const_float(2))) +
+            assign_x('b', const_float(3)) +
+            return_(const_float(4))
+            ))
+    expected_sexp = make_test_case('sub', 'float', (
+            declare_execute_flag() +
+            declare_return_value() +
+            declare_return_flag() +
+            loop(assign_x('a', const_float(1)) +
+                 lowered_return_simple(const_float(2)) +
+                 break_()) +
+            if_not_return_flag(assign_x('b', const_float(3)) +
+                               lowered_return(const_float(4))) +
+            final_return()
+            ))
+    create_test_case(doc_string, input_sexp, input_sexp, 'return_non_void_at_end_of_loop_lower_nothing')
+    create_test_case(doc_string, input_sexp, expected_sexp, 'return_non_void_at_end_of_loop_lower_return',
+                     lower_sub_return=True)
+    create_test_case(doc_string, input_sexp, expected_sexp, 'return_non_void_at_end_of_loop_lower_return_and_break',
+                     lower_sub_return=True, lower_break=True)
+
+if __name__ == '__main__':
+    test_lower_returns_main()
+    test_lower_returns_sub()
+    test_lower_returns_1()
+    test_lower_returns_2()
+    test_lower_returns_3()
+    test_lower_returns_4()
+    test_lower_unified_returns()
+    test_lower_pulled_out_jump()
+    test_lower_breaks_1()
+    test_lower_breaks_2()
+    test_lower_breaks_3()
+    test_lower_breaks_4()
+    test_lower_breaks_5()
+    test_lower_breaks_6()
+    test_lower_guarded_conditional_break()
+    test_remove_continue_at_end_of_loop()
+    test_lower_return_void_at_end_of_loop()
+    test_lower_return_non_void_at_end_of_loop()
diff --git a/src/glsl/tests/lower_jumps/lower_breaks_1.opt_test b/src/glsl/tests/lower_jumps/lower_breaks_1.opt_test
new file mode 100755
index 0000000..01ad708
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_breaks_1.opt_test
@@ -0,0 +1,13 @@
+#!/bin/bash
+#
+# This file was generated by create_test_cases.py.
+#
+# If a loop contains an unconditional break at the bottom of
+# it, it should not be lowered.
+../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 0, 0, 0, 1)' <<EOF
+((declare (out) float a)
+ (function main
+  (signature void (parameters)
+   ((loop () () () ()
+     ((assign (x) (var_ref a) (constant float (1.000000))) break))))))
+EOF
diff --git a/src/glsl/tests/lower_jumps/lower_breaks_1.opt_test.expected b/src/glsl/tests/lower_jumps/lower_breaks_1.opt_test.expected
new file mode 100644
index 0000000..d4bb6fc
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_breaks_1.opt_test.expected
@@ -0,0 +1,5 @@
+((declare (out) float a)
+ (function main
+  (signature void (parameters)
+   ((loop () () () ()
+     ((assign (x) (var_ref a) (constant float (1.000000))) break))))))
diff --git a/src/glsl/tests/lower_jumps/lower_breaks_2.opt_test b/src/glsl/tests/lower_jumps/lower_breaks_2.opt_test
new file mode 100755
index 0000000..0be22f9
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_breaks_2.opt_test
@@ -0,0 +1,15 @@
+#!/bin/bash
+#
+# This file was generated by create_test_cases.py.
+#
+# If a loop contains a conditional break at the bottom of it,
+# it should not be lowered if it is in the then-clause.
+../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 0, 0, 0, 1)' <<EOF
+((declare (in) float b) (declare (out) float a)
+ (function main
+  (signature void (parameters)
+   ((loop () () () ()
+     ((assign (x) (var_ref a) (constant float (1.000000)))
+      (if (expression bool > (var_ref b) (constant float (0.000000))) (break)
+       ())))))))
+EOF
diff --git a/src/glsl/tests/lower_jumps/lower_breaks_2.opt_test.expected b/src/glsl/tests/lower_jumps/lower_breaks_2.opt_test.expected
new file mode 100644
index 0000000..a4cb2d6
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_breaks_2.opt_test.expected
@@ -0,0 +1,7 @@
+((declare (in) float b) (declare (out) float a)
+ (function main
+  (signature void (parameters)
+   ((loop () () () ()
+     ((assign (x) (var_ref a) (constant float (1.000000)))
+      (if (expression bool > (var_ref b) (constant float (0.000000))) (break)
+       ())))))))
diff --git a/src/glsl/tests/lower_jumps/lower_breaks_3.opt_test b/src/glsl/tests/lower_jumps/lower_breaks_3.opt_test
new file mode 100755
index 0000000..4149360
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_breaks_3.opt_test
@@ -0,0 +1,17 @@
+#!/bin/bash
+#
+# This file was generated by create_test_cases.py.
+#
+# If a loop contains a conditional break at the bottom of it,
+# it should not be lowered if it is in the then-clause, even if
+# there are statements preceding the break.
+../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 0, 0, 0, 1)' <<EOF
+((declare (in) float b) (declare (out) float a) (declare (out) float c)
+ (function main
+  (signature void (parameters)
+   ((loop () () () ()
+     ((assign (x) (var_ref a) (constant float (1.000000)))
+      (if (expression bool > (var_ref b) (constant float (0.000000)))
+       ((assign (x) (var_ref c) (constant float (1.000000))) break)
+       ())))))))
+EOF
diff --git a/src/glsl/tests/lower_jumps/lower_breaks_3.opt_test.expected b/src/glsl/tests/lower_jumps/lower_breaks_3.opt_test.expected
new file mode 100644
index 0000000..325f7b4
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_breaks_3.opt_test.expected
@@ -0,0 +1,8 @@
+((declare (in) float b) (declare (out) float a) (declare (out) float c)
+ (function main
+  (signature void (parameters)
+   ((loop () () () ()
+     ((assign (x) (var_ref a) (constant float (1.000000)))
+      (if (expression bool > (var_ref b) (constant float (0.000000)))
+       ((assign (x) (var_ref c) (constant float (1.000000))) break)
+       ())))))))
diff --git a/src/glsl/tests/lower_jumps/lower_breaks_4.opt_test b/src/glsl/tests/lower_jumps/lower_breaks_4.opt_test
new file mode 100755
index 0000000..70458bb
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_breaks_4.opt_test
@@ -0,0 +1,15 @@
+#!/bin/bash
+#
+# This file was generated by create_test_cases.py.
+#
+# If a loop contains a conditional break at the bottom of it,
+# it should not be lowered if it is in the else-clause.
+../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 0, 0, 0, 1)' <<EOF
+((declare (in) float b) (declare (out) float a)
+ (function main
+  (signature void (parameters)
+   ((loop () () () ()
+     ((assign (x) (var_ref a) (constant float (1.000000)))
+      (if (expression bool > (var_ref b) (constant float (0.000000))) ()
+       (break))))))))
+EOF
diff --git a/src/glsl/tests/lower_jumps/lower_breaks_4.opt_test.expected b/src/glsl/tests/lower_jumps/lower_breaks_4.opt_test.expected
new file mode 100644
index 0000000..a773545
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_breaks_4.opt_test.expected
@@ -0,0 +1,7 @@
+((declare (in) float b) (declare (out) float a)
+ (function main
+  (signature void (parameters)
+   ((loop () () () ()
+     ((assign (x) (var_ref a) (constant float (1.000000)))
+      (if (expression bool > (var_ref b) (constant float (0.000000))) ()
+       (break))))))))
diff --git a/src/glsl/tests/lower_jumps/lower_breaks_5.opt_test b/src/glsl/tests/lower_jumps/lower_breaks_5.opt_test
new file mode 100755
index 0000000..da9eef1
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_breaks_5.opt_test
@@ -0,0 +1,16 @@
+#!/bin/bash
+#
+# This file was generated by create_test_cases.py.
+#
+# If a loop contains a conditional break at the bottom of it,
+# it should not be lowered if it is in the else-clause, even if
+# there are statements preceding the break.
+../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 0, 0, 0, 1)' <<EOF
+((declare (in) float b) (declare (out) float a) (declare (out) float c)
+ (function main
+  (signature void (parameters)
+   ((loop () () () ()
+     ((assign (x) (var_ref a) (constant float (1.000000)))
+      (if (expression bool > (var_ref b) (constant float (0.000000))) ()
+       ((assign (x) (var_ref c) (constant float (1.000000))) break))))))))
+EOF
diff --git a/src/glsl/tests/lower_jumps/lower_breaks_5.opt_test.expected b/src/glsl/tests/lower_jumps/lower_breaks_5.opt_test.expected
new file mode 100644
index 0000000..0dd4a52
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_breaks_5.opt_test.expected
@@ -0,0 +1,7 @@
+((declare (in) float b) (declare (out) float a) (declare (out) float c)
+ (function main
+  (signature void (parameters)
+   ((loop () () () ()
+     ((assign (x) (var_ref a) (constant float (1.000000)))
+      (if (expression bool > (var_ref b) (constant float (0.000000))) ()
+       ((assign (x) (var_ref c) (constant float (1.000000))) break))))))))
diff --git a/src/glsl/tests/lower_jumps/lower_breaks_6.opt_test b/src/glsl/tests/lower_jumps/lower_breaks_6.opt_test
new file mode 100755
index 0000000..9440dfe
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_breaks_6.opt_test
@@ -0,0 +1,29 @@
+#!/bin/bash
+#
+# This file was generated by create_test_cases.py.
+#
+# If a loop contains conditional breaks and continues, and
+# ends in an unconditional break, then the unconditional break
+# needs to be lowered, because it will no longer be at the end
+# of the loop after the final break is added.
+../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 0, 0, 1, 1)' <<EOF
+((declare (in) float a) (declare (in) float ba) (declare (in) float bb)
+ (declare (in) float ca)
+ (declare (in) float cb)
+ (function main
+  (signature void (parameters)
+   ((loop () () () ()
+     ((if (expression bool > (var_ref a) (constant float (0.000000)))
+       ((if (expression bool > (var_ref ba) (constant float (0.000000)))
+         ((if (expression bool > (var_ref bb) (constant float (0.000000)))
+           (continue)
+           ()))
+         ())
+        (if (expression bool > (var_ref ca) (constant float (0.000000)))
+         ((if (expression bool > (var_ref cb) (constant float (0.000000)))
+           (break)
+           ()))
+         ()))
+       ())
+      break))))))
+EOF
diff --git a/src/glsl/tests/lower_jumps/lower_breaks_6.opt_test.expected b/src/glsl/tests/lower_jumps/lower_breaks_6.opt_test.expected
new file mode 100644
index 0000000..8222328
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_breaks_6.opt_test.expected
@@ -0,0 +1,29 @@
+((declare (in) float a) (declare (in) float ba) (declare (in) float bb)
+ (declare (in) float ca)
+ (declare (in) float cb)
+ (function main
+  (signature void (parameters)
+   ((declare (temporary) bool break_flag)
+    (assign (x) (var_ref break_flag) (constant bool (0)))
+    (loop () () () ()
+     ((declare (temporary) bool execute_flag)
+      (assign (x) (var_ref execute_flag) (constant bool (1)))
+      (if (expression bool > (var_ref a) (constant float (0.000000)))
+       ((if (expression bool > (var_ref ba) (constant float (0.000000)))
+         ((if (expression bool > (var_ref bb) (constant float (0.000000)))
+           ((assign (x) (var_ref execute_flag) (constant bool (0))))
+           ()))
+         ())
+        (if (var_ref execute_flag)
+         ((if (expression bool > (var_ref ca) (constant float (0.000000)))
+           ((if (expression bool > (var_ref cb) (constant float (0.000000)))
+             ((assign (x) (var_ref break_flag) (constant bool (1)))
+              (assign (x) (var_ref execute_flag) (constant bool (0))))
+             ()))
+           ()))
+         ()))
+       ())
+      (if (var_ref execute_flag)
+       ((assign (x) (var_ref break_flag) (constant bool (1))))
+       ())
+      (if (var_ref break_flag) (break) ())))))))
diff --git a/src/glsl/tests/lower_jumps/lower_guarded_conditional_break.opt_test b/src/glsl/tests/lower_jumps/lower_guarded_conditional_break.opt_test
new file mode 100755
index 0000000..379aa59
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_guarded_conditional_break.opt_test
@@ -0,0 +1,21 @@
+#!/bin/bash
+#
+# This file was generated by create_test_cases.py.
+#
+# Normally a conditional break at the end of a loop isn't
+# lowered, however if the conditional break gets placed inside
+# an if(execute_flag) because of earlier lowering of continues,
+# then the break needs to be lowered.
+../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 0, 0, 1, 1)' <<EOF
+((declare (in) float aa) (declare (in) float ab) (declare (in) float b)
+ (function main
+  (signature void (parameters)
+   ((loop () () () ()
+     ((if (expression bool > (var_ref aa) (constant float (0.000000)))
+       ((if (expression bool > (var_ref ab) (constant float (0.000000)))
+         (continue)
+         ()))
+       ())
+      (if (expression bool > (var_ref b) (constant float (0.000000))) (break)
+       ())))))))
+EOF
diff --git a/src/glsl/tests/lower_jumps/lower_guarded_conditional_break.opt_test.expected b/src/glsl/tests/lower_jumps/lower_guarded_conditional_break.opt_test.expected
new file mode 100644
index 0000000..7c6e73f
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_guarded_conditional_break.opt_test.expected
@@ -0,0 +1,20 @@
+((declare (in) float aa) (declare (in) float ab) (declare (in) float b)
+ (function main
+  (signature void (parameters)
+   ((declare (temporary) bool break_flag)
+    (assign (x) (var_ref break_flag) (constant bool (0)))
+    (loop () () () ()
+     ((declare (temporary) bool execute_flag)
+      (assign (x) (var_ref execute_flag) (constant bool (1)))
+      (if (expression bool > (var_ref aa) (constant float (0.000000)))
+       ((if (expression bool > (var_ref ab) (constant float (0.000000)))
+         ((assign (x) (var_ref execute_flag) (constant bool (0))))
+         ()))
+       ())
+      (if (var_ref execute_flag)
+       ((if (expression bool > (var_ref b) (constant float (0.000000)))
+         ((assign (x) (var_ref break_flag) (constant bool (1)))
+          (assign (x) (var_ref execute_flag) (constant bool (0))))
+         ()))
+       ())
+      (if (var_ref break_flag) (break) ())))))))
diff --git a/src/glsl/tests/lower_jumps/lower_pulled_out_jump.opt_test b/src/glsl/tests/lower_jumps/lower_pulled_out_jump.opt_test
new file mode 100755
index 0000000..15f3c41
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_pulled_out_jump.opt_test
@@ -0,0 +1,28 @@
+#!/bin/bash
+#
+# This file was generated by create_test_cases.py.
+#
+# If one branch of an if ends in a jump, and control cannot
+# fall out the bottom of the other branch, and pull_out_jumps is
+# True, then the jump is lifted outside the if.
+# Verify that this lowering occurs during the same pass as the
+# lowering of other jumps by checking that extra temporary
+# variables aren't generated.
+../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(1, 0, 1, 0, 0)' <<EOF
+((declare (in) float aa) (declare (in) float ab) (declare (in) float b)
+ (declare (in) float c)
+ (declare (out) float d)
+ (function main
+  (signature void (parameters)
+   ((if (expression bool > (var_ref aa) (constant float (0.000000)))
+     ((if (expression bool > (var_ref ab) (constant float (0.000000)))
+       ((return))
+       ()))
+     ())
+    (loop () () () ()
+     ((if (expression bool > (var_ref b) (constant float (0.000000)))
+       ((if (expression bool > (var_ref c) (constant float (0.000000))) (break)
+         (continue)))
+       ((return)))))
+    (assign (x) (var_ref d) (constant float (1.000000)))))))
+EOF
diff --git a/src/glsl/tests/lower_jumps/lower_pulled_out_jump.opt_test.expected b/src/glsl/tests/lower_jumps/lower_pulled_out_jump.opt_test.expected
new file mode 100644
index 0000000..bf45c2c
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_pulled_out_jump.opt_test.expected
@@ -0,0 +1,25 @@
+((declare (in) float aa) (declare (in) float ab) (declare (in) float b)
+ (declare (in) float c)
+ (declare (out) float d)
+ (function main
+  (signature void (parameters)
+   ((declare (temporary) bool execute_flag)
+    (assign (x) (var_ref execute_flag) (constant bool (1)))
+    (declare (temporary) bool return_flag)
+    (assign (x) (var_ref return_flag) (constant bool (0)))
+    (if (expression bool > (var_ref aa) (constant float (0.000000)))
+     ((if (expression bool > (var_ref ab) (constant float (0.000000)))
+       ((assign (x) (var_ref return_flag) (constant bool (1)))
+        (assign (x) (var_ref execute_flag) (constant bool (0))))
+       ()))
+     ())
+    (if (var_ref execute_flag)
+     ((loop () () () ()
+       ((if (expression bool > (var_ref b) (constant float (0.000000)))
+         ((if (expression bool > (var_ref c) (constant float (0.000000))) ()
+           (continue)))
+         ((assign (x) (var_ref return_flag) (constant bool (1)))))
+        break))
+      (if (var_ref return_flag) ()
+       ((assign (x) (var_ref d) (constant float (1.000000))))))
+     ())))))
diff --git a/src/glsl/tests/lower_jumps/lower_returns_1.opt_test b/src/glsl/tests/lower_jumps/lower_returns_1.opt_test
new file mode 100755
index 0000000..a1f895b
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_returns_1.opt_test
@@ -0,0 +1,12 @@
+#!/bin/bash
+#
+# This file was generated by create_test_cases.py.
+#
+# Test that a void return at the end of a function is
+# eliminated.
+../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 0, 1, 0, 0)' <<EOF
+((declare (out) float a)
+ (function main
+  (signature void (parameters)
+   ((assign (x) (var_ref a) (constant float (1.000000))) (return)))))
+EOF
diff --git a/src/glsl/tests/lower_jumps/lower_returns_1.opt_test.expected b/src/glsl/tests/lower_jumps/lower_returns_1.opt_test.expected
new file mode 100644
index 0000000..7c3919c
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_returns_1.opt_test.expected
@@ -0,0 +1,4 @@
+((declare (out) float a)
+ (function main
+  (signature void (parameters)
+   ((assign (x) (var_ref a) (constant float (1.000000)))))))
diff --git a/src/glsl/tests/lower_jumps/lower_returns_2.opt_test b/src/glsl/tests/lower_jumps/lower_returns_2.opt_test
new file mode 100755
index 0000000..61673d4
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_returns_2.opt_test
@@ -0,0 +1,13 @@
+#!/bin/bash
+#
+# This file was generated by create_test_cases.py.
+#
+# Test that lowering is not performed on a non-void return at
+# the end of subroutine.
+../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 1, 0, 0, 0)' <<EOF
+((declare (out) float a)
+ (function sub
+  (signature float (parameters)
+   ((assign (x) (var_ref a) (constant float (1.000000)))
+    (return (constant float (1.000000)))))))
+EOF
diff --git a/src/glsl/tests/lower_jumps/lower_returns_2.opt_test.expected b/src/glsl/tests/lower_jumps/lower_returns_2.opt_test.expected
new file mode 100644
index 0000000..7777927
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_returns_2.opt_test.expected
@@ -0,0 +1,5 @@
+((declare (out) float a)
+ (function sub
+  (signature float (parameters)
+   ((assign (x) (var_ref a) (constant float (1.000000)))
+    (return (constant float (1.000000)))))))
diff --git a/src/glsl/tests/lower_jumps/lower_returns_3.opt_test b/src/glsl/tests/lower_jumps/lower_returns_3.opt_test
new file mode 100755
index 0000000..9881e24
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_returns_3.opt_test
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# This file was generated by create_test_cases.py.
+#
+# Test lowering of returns when there is one nested inside a
+# complex structure of ifs, and one at the end of a function.
+# In this case, the latter return needs to be lowered because it
+# will not be at the end of the function once the final return
+# is inserted.
+../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 1, 0, 0, 0)' <<EOF
+((declare (in) float a) (declare (in) float b)
+ (function sub
+  (signature float (parameters)
+   ((if (expression bool > (var_ref a) (constant float (0.000000)))
+     ((if (expression bool > (var_ref b) (constant float (0.000000)))
+       ((return (constant float (1.000000))))
+       ()))
+     ())
+    (return (constant float (2.000000)))))))
+EOF
diff --git a/src/glsl/tests/lower_jumps/lower_returns_3.opt_test.expected b/src/glsl/tests/lower_jumps/lower_returns_3.opt_test.expected
new file mode 100644
index 0000000..d4835e9
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_returns_3.opt_test.expected
@@ -0,0 +1,21 @@
+((declare (in) float a) (declare (in) float b)
+ (function sub
+  (signature float (parameters)
+   ((declare (temporary) bool execute_flag)
+    (assign (x) (var_ref execute_flag) (constant bool (1)))
+    (declare (temporary) float return_value)
+    (declare (temporary) bool return_flag)
+    (assign (x) (var_ref return_flag) (constant bool (0)))
+    (if (expression bool > (var_ref a) (constant float (0.000000)))
+     ((if (expression bool > (var_ref b) (constant float (0.000000)))
+       ((assign (x) (var_ref return_value) (constant float (1.000000)))
+        (assign (x) (var_ref return_flag) (constant bool (1)))
+        (assign (x) (var_ref execute_flag) (constant bool (0))))
+       ()))
+     ())
+    (if (var_ref execute_flag)
+     ((assign (x) (var_ref return_value) (constant float (2.000000)))
+      (assign (x) (var_ref return_flag) (constant bool (1)))
+      (assign (x) (var_ref execute_flag) (constant bool (0))))
+     ())
+    (return (var_ref return_value))))))
diff --git a/src/glsl/tests/lower_jumps/lower_returns_4.opt_test b/src/glsl/tests/lower_jumps/lower_returns_4.opt_test
new file mode 100755
index 0000000..9f54c67
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_returns_4.opt_test
@@ -0,0 +1,14 @@
+#!/bin/bash
+#
+# This file was generated by create_test_cases.py.
+#
+# Test that returns are properly lowered when they occur in
+# both branches of an if-statement.
+../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 1, 0, 0, 0)' <<EOF
+((declare (in) float a)
+ (function sub
+  (signature float (parameters)
+   ((if (expression bool > (var_ref a) (constant float (0.000000)))
+     ((return (constant float (1.000000))))
+     ((return (constant float (2.000000)))))))))
+EOF
diff --git a/src/glsl/tests/lower_jumps/lower_returns_4.opt_test.expected b/src/glsl/tests/lower_jumps/lower_returns_4.opt_test.expected
new file mode 100644
index 0000000..b551a06
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_returns_4.opt_test.expected
@@ -0,0 +1,16 @@
+((declare (in) float a)
+ (function sub
+  (signature float (parameters)
+   ((declare (temporary) bool execute_flag)
+    (assign (x) (var_ref execute_flag) (constant bool (1)))
+    (declare (temporary) float return_value)
+    (declare (temporary) bool return_flag)
+    (assign (x) (var_ref return_flag) (constant bool (0)))
+    (if (expression bool > (var_ref a) (constant float (0.000000)))
+     ((assign (x) (var_ref return_value) (constant float (1.000000)))
+      (assign (x) (var_ref return_flag) (constant bool (1)))
+      (assign (x) (var_ref execute_flag) (constant bool (0))))
+     ((assign (x) (var_ref return_value) (constant float (2.000000)))
+      (assign (x) (var_ref return_flag) (constant bool (1)))
+      (assign (x) (var_ref execute_flag) (constant bool (0)))))
+    (return (var_ref return_value))))))
diff --git a/src/glsl/tests/lower_jumps/lower_returns_main_false.opt_test b/src/glsl/tests/lower_jumps/lower_returns_main_false.opt_test
new file mode 100755
index 0000000..5f97bfd
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_returns_main_false.opt_test
@@ -0,0 +1,17 @@
+#!/bin/bash
+#
+# This file was generated by create_test_cases.py.
+#
+# Test that do_lower_jumps respects the lower_main_return
+# flag in deciding whether to lower returns in the main
+# function.
+../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 0, 0, 0, 0)' <<EOF
+((declare (in) float a) (declare (in) float b)
+ (function main
+  (signature void (parameters)
+   ((if (expression bool > (var_ref a) (constant float (0.000000)))
+     ((if (expression bool > (var_ref b) (constant float (0.000000)))
+       ((return))
+       ()))
+     ())))))
+EOF
diff --git a/src/glsl/tests/lower_jumps/lower_returns_main_false.opt_test.expected b/src/glsl/tests/lower_jumps/lower_returns_main_false.opt_test.expected
new file mode 100644
index 0000000..e8b36f1
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_returns_main_false.opt_test.expected
@@ -0,0 +1,8 @@
+((declare (in) float a) (declare (in) float b)
+ (function main
+  (signature void (parameters)
+   ((if (expression bool > (var_ref a) (constant float (0.000000)))
+     ((if (expression bool > (var_ref b) (constant float (0.000000)))
+       ((return))
+       ()))
+     ())))))
diff --git a/src/glsl/tests/lower_jumps/lower_returns_main_true.opt_test b/src/glsl/tests/lower_jumps/lower_returns_main_true.opt_test
new file mode 100755
index 0000000..59c7ba1
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_returns_main_true.opt_test
@@ -0,0 +1,17 @@
+#!/bin/bash
+#
+# This file was generated by create_test_cases.py.
+#
+# Test that do_lower_jumps respects the lower_main_return
+# flag in deciding whether to lower returns in the main
+# function.
+../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 0, 1, 0, 0)' <<EOF
+((declare (in) float a) (declare (in) float b)
+ (function main
+  (signature void (parameters)
+   ((if (expression bool > (var_ref a) (constant float (0.000000)))
+     ((if (expression bool > (var_ref b) (constant float (0.000000)))
+       ((return))
+       ()))
+     ())))))
+EOF
diff --git a/src/glsl/tests/lower_jumps/lower_returns_main_true.opt_test.expected b/src/glsl/tests/lower_jumps/lower_returns_main_true.opt_test.expected
new file mode 100644
index 0000000..e15a97d
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_returns_main_true.opt_test.expected
@@ -0,0 +1,13 @@
+((declare (in) float a) (declare (in) float b)
+ (function main
+  (signature void (parameters)
+   ((declare (temporary) bool execute_flag)
+    (assign (x) (var_ref execute_flag) (constant bool (1)))
+    (declare (temporary) bool return_flag)
+    (assign (x) (var_ref return_flag) (constant bool (0)))
+    (if (expression bool > (var_ref a) (constant float (0.000000)))
+     ((if (expression bool > (var_ref b) (constant float (0.000000)))
+       ((assign (x) (var_ref return_flag) (constant bool (1)))
+        (assign (x) (var_ref execute_flag) (constant bool (0))))
+       ()))
+     ())))))
diff --git a/src/glsl/tests/lower_jumps/lower_returns_sub_false.opt_test b/src/glsl/tests/lower_jumps/lower_returns_sub_false.opt_test
new file mode 100755
index 0000000..40e784e
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_returns_sub_false.opt_test
@@ -0,0 +1,16 @@
+#!/bin/bash
+#
+# This file was generated by create_test_cases.py.
+#
+# Test that do_lower_jumps respects the lower_sub_return flag
+# in deciding whether to lower returns in subroutines.
+../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 0, 0, 0, 0)' <<EOF
+((declare (in) float a) (declare (in) float b)
+ (function sub
+  (signature void (parameters)
+   ((if (expression bool > (var_ref a) (constant float (0.000000)))
+     ((if (expression bool > (var_ref b) (constant float (0.000000)))
+       ((return))
+       ()))
+     ())))))
+EOF
diff --git a/src/glsl/tests/lower_jumps/lower_returns_sub_false.opt_test.expected b/src/glsl/tests/lower_jumps/lower_returns_sub_false.opt_test.expected
new file mode 100644
index 0000000..07db6e7
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_returns_sub_false.opt_test.expected
@@ -0,0 +1,8 @@
+((declare (in) float a) (declare (in) float b)
+ (function sub
+  (signature void (parameters)
+   ((if (expression bool > (var_ref a) (constant float (0.000000)))
+     ((if (expression bool > (var_ref b) (constant float (0.000000)))
+       ((return))
+       ()))
+     ())))))
diff --git a/src/glsl/tests/lower_jumps/lower_returns_sub_true.opt_test b/src/glsl/tests/lower_jumps/lower_returns_sub_true.opt_test
new file mode 100755
index 0000000..9fe6b90
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_returns_sub_true.opt_test
@@ -0,0 +1,16 @@
+#!/bin/bash
+#
+# This file was generated by create_test_cases.py.
+#
+# Test that do_lower_jumps respects the lower_sub_return flag
+# in deciding whether to lower returns in subroutines.
+../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 1, 0, 0, 0)' <<EOF
+((declare (in) float a) (declare (in) float b)
+ (function sub
+  (signature void (parameters)
+   ((if (expression bool > (var_ref a) (constant float (0.000000)))
+     ((if (expression bool > (var_ref b) (constant float (0.000000)))
+       ((return))
+       ()))
+     ())))))
+EOF
diff --git a/src/glsl/tests/lower_jumps/lower_returns_sub_true.opt_test.expected b/src/glsl/tests/lower_jumps/lower_returns_sub_true.opt_test.expected
new file mode 100644
index 0000000..3110980
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_returns_sub_true.opt_test.expected
@@ -0,0 +1,13 @@
+((declare (in) float a) (declare (in) float b)
+ (function sub
+  (signature void (parameters)
+   ((declare (temporary) bool execute_flag)
+    (assign (x) (var_ref execute_flag) (constant bool (1)))
+    (declare (temporary) bool return_flag)
+    (assign (x) (var_ref return_flag) (constant bool (0)))
+    (if (expression bool > (var_ref a) (constant float (0.000000)))
+     ((if (expression bool > (var_ref b) (constant float (0.000000)))
+       ((assign (x) (var_ref return_flag) (constant bool (1)))
+        (assign (x) (var_ref execute_flag) (constant bool (0))))
+       ()))
+     ())))))
diff --git a/src/glsl/tests/lower_jumps/lower_unified_returns.opt_test b/src/glsl/tests/lower_jumps/lower_unified_returns.opt_test
new file mode 100755
index 0000000..e716813
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_unified_returns.opt_test
@@ -0,0 +1,26 @@
+#!/bin/bash
+#
+# This file was generated by create_test_cases.py.
+#
+# If both branches of an if statement end in a return, and
+# pull_out_jumps is True, then those returns should be lifted
+# outside the if and then properly lowered.
+# Verify that this lowering occurs during the same pass as the
+# lowering of other returns by checking that extra temporary
+# variables aren't generated.
+../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(1, 0, 1, 0, 0)' <<EOF
+((declare (in) float aa) (declare (in) float ab) (declare (in) float b)
+ (declare (in) float c)
+ (function main
+  (signature void (parameters)
+   ((if (expression bool > (var_ref aa) (constant float (0.000000)))
+     ((if (expression bool > (var_ref ab) (constant float (0.000000)))
+       ((return))
+       ()))
+     ())
+    (if (expression bool > (var_ref b) (constant float (0.000000)))
+     ((if (expression bool > (var_ref c) (constant float (0.000000)))
+       ((return))
+       ((return))))
+     ())))))
+EOF
diff --git a/src/glsl/tests/lower_jumps/lower_unified_returns.opt_test.expected b/src/glsl/tests/lower_jumps/lower_unified_returns.opt_test.expected
new file mode 100644
index 0000000..271cd3b
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_unified_returns.opt_test.expected
@@ -0,0 +1,21 @@
+((declare (in) float aa) (declare (in) float ab) (declare (in) float b)
+ (declare (in) float c)
+ (function main
+  (signature void (parameters)
+   ((declare (temporary) bool execute_flag)
+    (assign (x) (var_ref execute_flag) (constant bool (1)))
+    (declare (temporary) bool return_flag)
+    (assign (x) (var_ref return_flag) (constant bool (0)))
+    (if (expression bool > (var_ref aa) (constant float (0.000000)))
+     ((if (expression bool > (var_ref ab) (constant float (0.000000)))
+       ((assign (x) (var_ref return_flag) (constant bool (1)))
+        (assign (x) (var_ref execute_flag) (constant bool (0))))
+       ()))
+     ())
+    (if (var_ref execute_flag)
+     ((if (expression bool > (var_ref b) (constant float (0.000000)))
+       ((if (expression bool > (var_ref c) (constant float (0.000000))) () ())
+        (assign (x) (var_ref return_flag) (constant bool (1)))
+        (assign (x) (var_ref execute_flag) (constant bool (0))))
+       ()))
+     ())))))
diff --git a/src/glsl/tests/lower_jumps/remove_continue_at_end_of_loop.opt_test b/src/glsl/tests/lower_jumps/remove_continue_at_end_of_loop.opt_test
new file mode 100755
index 0000000..18efc37
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/remove_continue_at_end_of_loop.opt_test
@@ -0,0 +1,13 @@
+#!/bin/bash
+#
+# This file was generated by create_test_cases.py.
+#
+# Test that a redundant continue-statement at the end of a
+# loop is removed.
+../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 0, 0, 0, 0)' <<EOF
+((declare (out) float a)
+ (function main
+  (signature void (parameters)
+   ((loop () () () ()
+     ((assign (x) (var_ref a) (constant float (1.000000))) continue))))))
+EOF
diff --git a/src/glsl/tests/lower_jumps/remove_continue_at_end_of_loop.opt_test.expected b/src/glsl/tests/lower_jumps/remove_continue_at_end_of_loop.opt_test.expected
new file mode 100644
index 0000000..d2a02c6
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/remove_continue_at_end_of_loop.opt_test.expected
@@ -0,0 +1,5 @@
+((declare (out) float a)
+ (function main
+  (signature void (parameters)
+   ((loop () () () ()
+     ((assign (x) (var_ref a) (constant float (1.000000)))))))))
diff --git a/src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_nothing.opt_test b/src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_nothing.opt_test
new file mode 100755
index 0000000..79c0e82
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_nothing.opt_test
@@ -0,0 +1,16 @@
+#!/bin/bash
+#
+# This file was generated by create_test_cases.py.
+#
+# Test that a non-void return at the end of a loop is
+# properly lowered.
+../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 0, 0, 0, 0)' <<EOF
+((declare (out) float a) (declare (out) float b)
+ (function sub
+  (signature float (parameters)
+   ((loop () () () ()
+     ((assign (x) (var_ref a) (constant float (1.000000)))
+      (return (constant float (2.000000)))))
+    (assign (x) (var_ref b) (constant float (3.000000)))
+    (return (constant float (4.000000)))))))
+EOF
diff --git a/src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_nothing.opt_test.expected b/src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_nothing.opt_test.expected
new file mode 100644
index 0000000..2cf117a
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_nothing.opt_test.expected
@@ -0,0 +1,8 @@
+((declare (out) float a) (declare (out) float b)
+ (function sub
+  (signature float (parameters)
+   ((loop () () () ()
+     ((assign (x) (var_ref a) (constant float (1.000000)))
+      (return (constant float (2.000000)))))
+    (assign (x) (var_ref b) (constant float (3.000000)))
+    (return (constant float (4.000000)))))))
diff --git a/src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_return.opt_test b/src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_return.opt_test
new file mode 100755
index 0000000..920d2ad
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_return.opt_test
@@ -0,0 +1,16 @@
+#!/bin/bash
+#
+# This file was generated by create_test_cases.py.
+#
+# Test that a non-void return at the end of a loop is
+# properly lowered.
+../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 1, 0, 0, 0)' <<EOF
+((declare (out) float a) (declare (out) float b)
+ (function sub
+  (signature float (parameters)
+   ((loop () () () ()
+     ((assign (x) (var_ref a) (constant float (1.000000)))
+      (return (constant float (2.000000)))))
+    (assign (x) (var_ref b) (constant float (3.000000)))
+    (return (constant float (4.000000)))))))
+EOF
diff --git a/src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_return.opt_test.expected b/src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_return.opt_test.expected
new file mode 100644
index 0000000..0bab8f1
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_return.opt_test.expected
@@ -0,0 +1,19 @@
+((declare (out) float a) (declare (out) float b)
+ (function sub
+  (signature float (parameters)
+   ((declare (temporary) bool execute_flag)
+    (assign (x) (var_ref execute_flag) (constant bool (1)))
+    (declare (temporary) float return_value)
+    (declare (temporary) bool return_flag)
+    (assign (x) (var_ref return_flag) (constant bool (0)))
+    (loop () () () ()
+     ((assign (x) (var_ref a) (constant float (1.000000)))
+      (assign (x) (var_ref return_value) (constant float (2.000000)))
+      (assign (x) (var_ref return_flag) (constant bool (1)))
+      break))
+    (if (var_ref return_flag) ()
+     ((assign (x) (var_ref b) (constant float (3.000000)))
+      (assign (x) (var_ref return_value) (constant float (4.000000)))
+      (assign (x) (var_ref return_flag) (constant bool (1)))
+      (assign (x) (var_ref execute_flag) (constant bool (0)))))
+    (return (var_ref return_value))))))
diff --git a/src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_return_and_break.opt_test b/src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_return_and_break.opt_test
new file mode 100755
index 0000000..99f1f86
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_return_and_break.opt_test
@@ -0,0 +1,16 @@
+#!/bin/bash
+#
+# This file was generated by create_test_cases.py.
+#
+# Test that a non-void return at the end of a loop is
+# properly lowered.
+../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 1, 0, 0, 1)' <<EOF
+((declare (out) float a) (declare (out) float b)
+ (function sub
+  (signature float (parameters)
+   ((loop () () () ()
+     ((assign (x) (var_ref a) (constant float (1.000000)))
+      (return (constant float (2.000000)))))
+    (assign (x) (var_ref b) (constant float (3.000000)))
+    (return (constant float (4.000000)))))))
+EOF
diff --git a/src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_return_and_break.opt_test.expected b/src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_return_and_break.opt_test.expected
new file mode 100644
index 0000000..0bab8f1
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_return_and_break.opt_test.expected
@@ -0,0 +1,19 @@
+((declare (out) float a) (declare (out) float b)
+ (function sub
+  (signature float (parameters)
+   ((declare (temporary) bool execute_flag)
+    (assign (x) (var_ref execute_flag) (constant bool (1)))
+    (declare (temporary) float return_value)
+    (declare (temporary) bool return_flag)
+    (assign (x) (var_ref return_flag) (constant bool (0)))
+    (loop () () () ()
+     ((assign (x) (var_ref a) (constant float (1.000000)))
+      (assign (x) (var_ref return_value) (constant float (2.000000)))
+      (assign (x) (var_ref return_flag) (constant bool (1)))
+      break))
+    (if (var_ref return_flag) ()
+     ((assign (x) (var_ref b) (constant float (3.000000)))
+      (assign (x) (var_ref return_value) (constant float (4.000000)))
+      (assign (x) (var_ref return_flag) (constant bool (1)))
+      (assign (x) (var_ref execute_flag) (constant bool (0)))))
+    (return (var_ref return_value))))))
diff --git a/src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_nothing.opt_test b/src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_nothing.opt_test
new file mode 100755
index 0000000..63487d3
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_nothing.opt_test
@@ -0,0 +1,14 @@
+#!/bin/bash
+#
+# This file was generated by create_test_cases.py.
+#
+# Test that a return of void at the end of a loop is properly
+# lowered.
+../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 0, 0, 0, 0)' <<EOF
+((declare (out) float a) (declare (out) float b)
+ (function main
+  (signature void (parameters)
+   ((loop () () () ()
+     ((assign (x) (var_ref a) (constant float (1.000000))) (return)))
+    (assign (x) (var_ref b) (constant float (2.000000)))))))
+EOF
diff --git a/src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_nothing.opt_test.expected b/src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_nothing.opt_test.expected
new file mode 100644
index 0000000..0bd8037
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_nothing.opt_test.expected
@@ -0,0 +1,6 @@
+((declare (out) float a) (declare (out) float b)
+ (function main
+  (signature void (parameters)
+   ((loop () () () ()
+     ((assign (x) (var_ref a) (constant float (1.000000))) (return)))
+    (assign (x) (var_ref b) (constant float (2.000000)))))))
diff --git a/src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_return.opt_test b/src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_return.opt_test
new file mode 100755
index 0000000..523c92a
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_return.opt_test
@@ -0,0 +1,14 @@
+#!/bin/bash
+#
+# This file was generated by create_test_cases.py.
+#
+# Test that a return of void at the end of a loop is properly
+# lowered.
+../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 0, 1, 0, 0)' <<EOF
+((declare (out) float a) (declare (out) float b)
+ (function main
+  (signature void (parameters)
+   ((loop () () () ()
+     ((assign (x) (var_ref a) (constant float (1.000000))) (return)))
+    (assign (x) (var_ref b) (constant float (2.000000)))))))
+EOF
diff --git a/src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_return.opt_test.expected b/src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_return.opt_test.expected
new file mode 100644
index 0000000..53814ea
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_return.opt_test.expected
@@ -0,0 +1,11 @@
+((declare (out) float a) (declare (out) float b)
+ (function main
+  (signature void (parameters)
+   ((declare (temporary) bool return_flag)
+    (assign (x) (var_ref return_flag) (constant bool (0)))
+    (loop () () () ()
+     ((assign (x) (var_ref a) (constant float (1.000000)))
+      (assign (x) (var_ref return_flag) (constant bool (1)))
+      break))
+    (if (var_ref return_flag) ()
+     ((assign (x) (var_ref b) (constant float (2.000000)))))))))
diff --git a/src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_return_and_break.opt_test b/src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_return_and_break.opt_test
new file mode 100755
index 0000000..22b5581
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_return_and_break.opt_test
@@ -0,0 +1,14 @@
+#!/bin/bash
+#
+# This file was generated by create_test_cases.py.
+#
+# Test that a return of void at the end of a loop is properly
+# lowered.
+../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 0, 1, 0, 1)' <<EOF
+((declare (out) float a) (declare (out) float b)
+ (function main
+  (signature void (parameters)
+   ((loop () () () ()
+     ((assign (x) (var_ref a) (constant float (1.000000))) (return)))
+    (assign (x) (var_ref b) (constant float (2.000000)))))))
+EOF
diff --git a/src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_return_and_break.opt_test.expected b/src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_return_and_break.opt_test.expected
new file mode 100644
index 0000000..53814ea
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_return_and_break.opt_test.expected
@@ -0,0 +1,11 @@
+((declare (out) float a) (declare (out) float b)
+ (function main
+  (signature void (parameters)
+   ((declare (temporary) bool return_flag)
+    (assign (x) (var_ref return_flag) (constant bool (0)))
+    (loop () () () ()
+     ((assign (x) (var_ref a) (constant float (1.000000)))
+      (assign (x) (var_ref return_flag) (constant bool (1)))
+      break))
+    (if (var_ref return_flag) ()
+     ((assign (x) (var_ref b) (constant float (2.000000)))))))))
diff --git a/src/glsl/tests/optimization-test b/src/glsl/tests/optimization-test
new file mode 100755
index 0000000..0c130be
--- /dev/null
+++ b/src/glsl/tests/optimization-test
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+total=0
+pass=0
+
+echo "====== Testing optimization passes ======"
+for test in `find . -iname '*.opt_test'`; do
+    echo -n "Testing $test..."
+    (cd `dirname "$test"`; ./`basename "$test"`) > "$test.out" 2>&1
+    total=$((total+1))
+    if ./compare_ir "$test.expected" "$test.out" >/dev/null 2>&1; then
+        echo "PASS"
+        pass=$((pass+1))
+    else
+        echo "FAIL"
+        ./compare_ir "$test.expected" "$test.out"
+    fi
+done
+
+echo ""
+echo "$pass/$total tests returned correct results"
+echo ""
+
+if [[ $pass == $total ]]; then
+    exit 0
+else
+    exit 1
+fi
diff --git a/src/glsl/tests/sexps.py b/src/glsl/tests/sexps.py
new file mode 100644
index 0000000..a714af8
--- /dev/null
+++ b/src/glsl/tests/sexps.py
@@ -0,0 +1,103 @@
+# coding=utf-8
+#
+# Copyright © 2011 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# This file contains helper functions for manipulating sexps in Python.
+#
+# We represent a sexp in Python using nested lists containing strings.
+# So, for example, the sexp (constant float (1.000000)) is represented
+# as ['constant', 'float', ['1.000000']].
+
+import re
+
+def check_sexp(sexp):
+    """Verify that the argument is a proper sexp.
+
+    That is, raise an exception if the argument is not a string or a
+    list, or if it contains anything that is not a string or a list at
+    any nesting level.
+    """
+    if isinstance(sexp, list):
+        for s in sexp:
+            check_sexp(s)
+    elif not isinstance(sexp, basestring):
+        raise Exception('Not a sexp: {0!r}'.format(sexp))
+
+def parse_sexp(sexp):
+    """Convert a string, of the form that would be output by mesa,
+    into a sexp represented as nested lists containing strings.
+    """
+    sexp_token_regexp = re.compile(
+        '[a-zA-Z_]+(@[0-9]+)?|[0-9]+(\\.[0-9]+)?|[^ \n]')
+    stack = [[]]
+    for match in sexp_token_regexp.finditer(sexp):
+        token = match.group(0)
+        if token == '(':
+            stack.append([])
+        elif token == ')':
+            if len(stack) == 1:
+                raise Exception('Unmatched )')
+            sexp = stack.pop()
+            stack[-1].append(sexp)
+        else:
+            stack[-1].append(token)
+    if len(stack) != 1:
+        raise Exception('Unmatched (')
+    if len(stack[0]) != 1:
+        raise Exception('Multiple sexps')
+    return stack[0][0]
+
+def sexp_to_string(sexp):
+    """Convert a sexp, represented as nested lists containing strings,
+    into a single string of the form parseable by mesa.
+    """
+    if isinstance(sexp, basestring):
+        return sexp
+    assert isinstance(sexp, list)
+    result = ''
+    for s in sexp:
+        sub_result = sexp_to_string(s)
+        if result == '':
+            result = sub_result
+        elif '\n' not in result and '\n' not in sub_result and \
+                len(result) + len(sub_result) + 1 <= 70:
+            result += ' ' + sub_result
+        else:
+            result += '\n' + sub_result
+    return '({0})'.format(result.replace('\n', '\n '))
+
+def sort_decls(sexp):
+    """Sort all toplevel variable declarations in sexp.
+
+    This is used to work around the fact that
+    ir_reader::read_instructions reorders declarations.
+    """
+    assert isinstance(sexp, list)
+    decls = []
+    other_code = []
+    for s in sexp:
+        if isinstance(s, list) and len(s) >= 4 and s[0] == 'declare':
+            decls.append(s)
+        else:
+            other_code.append(s)
+    return sorted(decls) + other_code
+
diff --git a/src/glw/GLwDrawA.c b/src/glw/GLwDrawA.c
deleted file mode 100644
index 30304a4..0000000
--- a/src/glw/GLwDrawA.c
+++ /dev/null
@@ -1,684 +0,0 @@
-/*
- * (c) Copyright 1993, Silicon Graphics, Inc.
- * ALL RIGHTS RESERVED 
- * Permission to use, copy, modify, and distribute this software for 
- * any purpose and without fee is hereby granted, provided that the above
- * copyright notice appear in all copies and that both the copyright notice
- * and this permission notice appear in supporting documentation, and that 
- * the name of Silicon Graphics, Inc. not be used in advertising
- * or publicity pertaining to distribution of the software without specific,
- * written prior permission. 
- *
- * THE MATERIAL EMBODIED ON THIS SOFTWARE IS PROVIDED TO YOU "AS-IS"
- * AND WITHOUT WARRANTY OF ANY KIND, EXPRESS, IMPLIED OR OTHERWISE,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTY OF MERCHANTABILITY OR
- * FITNESS FOR A PARTICULAR PURPOSE.  IN NO EVENT SHALL SILICON
- * GRAPHICS, INC.  BE LIABLE TO YOU OR ANYONE ELSE FOR ANY DIRECT,
- * SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY
- * KIND, OR ANY DAMAGES WHATSOEVER, INCLUDING WITHOUT LIMITATION,
- * LOSS OF PROFIT, LOSS OF USE, SAVINGS OR REVENUE, OR THE CLAIMS OF
- * THIRD PARTIES, WHETHER OR NOT SILICON GRAPHICS, INC.  HAS BEEN
- * ADVISED OF THE POSSIBILITY OF SUCH LOSS, HOWEVER CAUSED AND ON
- * ANY THEORY OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE
- * POSSESSION, USE OR PERFORMANCE OF THIS SOFTWARE.
- * 
- * 
- * US Government Users Restricted Rights 
- * Use, duplication, or disclosure by the Government is subject to
- * restrictions set forth in FAR 52.227.19(c)(2) or subparagraph
- * (c)(1)(ii) of the Rights in Technical Data and Computer Software
- * clause at DFARS 252.227-7013 and/or in similar or successor
- * clauses in the FAR or the DOD or NASA FAR Supplement.
- * Unpublished-- rights reserved under the copyright laws of the
- * United States.  Contractor/manufacturer is Silicon Graphics,
- * Inc., 2011 N.  Shoreline Blvd., Mountain View, CA 94039-7311.
- *
- * OpenGL(TM) is a trademark of Silicon Graphics, Inc.
- */
-
-/*
- *
- * This file has been slightly modified from the original for use with Mesa
- *
- *     Jeroen van der Zijp
- *
- *     jvz@cyberia.cfdrc.com
- *
- */
-#include <X11/IntrinsicP.h>
-#include <X11/StringDefs.h>
-#include <GL/glx.h>
-#include <GL/gl.h>
-#ifdef __GLX_MOTIF
-#include <Xm/PrimitiveP.h>
-#include "GLwMDrawAP.h"
-#else 
-#include "GLwDrawAP.h"
-#endif 
-#include <assert.h>
-#include <stdio.h>
-
-#ifdef __GLX_MOTIF
-#define GLwDrawingAreaWidget             GLwMDrawingAreaWidget
-#define GLwDrawingAreaClassRec           GLwMDrawingAreaClassRec
-#define glwDrawingAreaClassRec           glwMDrawingAreaClassRec
-#define glwDrawingAreaWidgetClass        glwMDrawingAreaWidgetClass
-#define GLwDrawingAreaRec                GLwMDrawingAreaRec
-#endif 
-
-#define ATTRIBLIST_SIZE 32
-
-#define offset(field) XtOffset(GLwDrawingAreaWidget,glwDrawingArea.field)
-
-
-/* forward definitions */
-static void createColormap(GLwDrawingAreaWidget w,int offset,XrmValue *value);
-static void Initialize(GLwDrawingAreaWidget req,GLwDrawingAreaWidget neww,ArgList args,Cardinal *num_args);
-static void Realize(Widget w,Mask *valueMask,XSetWindowAttributes *attributes);
-static void Redraw(GLwDrawingAreaWidget w,XEvent *event,Region region);
-static void Resize(GLwDrawingAreaWidget glw);
-static void Destroy(GLwDrawingAreaWidget glw);
-static void glwInput(GLwDrawingAreaWidget glw,XEvent *event,String *params,Cardinal *numParams);
-
-
-
-static char defaultTranslations[] =
-#ifdef __GLX_MOTIF
-     "<Key>osfHelp:PrimitiveHelp() \n"
-#endif
-    "<KeyDown>:   glwInput() \n\
-     <KeyUp>:     glwInput() \n\
-     <BtnDown>:   glwInput() \n\
-     <BtnUp>:     glwInput() \n\
-     <BtnMotion>: glwInput() ";
-
-
-static XtActionsRec actions[] = {
-  {"glwInput",(XtActionProc)glwInput},                /* key or mouse input */
-  };
-
-
-/*
- * There is a bit of unusual handling of the resources here.
- * Because Xt insists on allocating the colormap resource when it is
- * processing the core resources (even if we redeclare the colormap
- * resource here, we need to do a little trick.  When Xt first allocates
- * the colormap, we allow it to allocate the default one, since we have
- * not yet determined the appropriate visual (which is determined from
- * resources parsed after the colormap).  We also let it allocate colors
- * in that default colormap.
- *
- * In the initialize proc we calculate the actual visual.  Then, we
- * reobtain the colormap resource using XtGetApplicationResources in
- * the initialize proc.  If requested, we also reallocate colors in
- * that colormap using the same method.
- */
-
-static XtResource resources[] = {
-  /* The GLX attributes.  Add any new attributes here */
-
-  {GLwNbufferSize, GLwCBufferSize, XtRInt, sizeof (int),
-       offset(bufferSize), XtRImmediate, (XtPointer) 0},
-  
-  {GLwNlevel, GLwCLevel, XtRInt, sizeof (int),
-       offset(level), XtRImmediate, (XtPointer) 0},
-  
-  {GLwNrgba, GLwCRgba, XtRBoolean, sizeof (Boolean),
-       offset(rgba), XtRImmediate, (XtPointer) FALSE},
-  
-  {GLwNdoublebuffer, GLwCDoublebuffer, XtRBoolean, sizeof (Boolean),
-       offset(doublebuffer), XtRImmediate, (XtPointer) FALSE},
-  
-  {GLwNstereo, GLwCStereo, XtRBoolean, sizeof (Boolean),
-       offset(stereo), XtRImmediate, (XtPointer) FALSE},
-  
-  {GLwNauxBuffers, GLwCAuxBuffers, XtRInt, sizeof (int),
-       offset(auxBuffers), XtRImmediate, (XtPointer) 0},
-  
-  {GLwNredSize, GLwCColorSize, XtRInt, sizeof (int),
-       offset(redSize), XtRImmediate, (XtPointer) 1},
-  
-  {GLwNgreenSize, GLwCColorSize, XtRInt, sizeof (int),
-       offset(greenSize), XtRImmediate, (XtPointer) 1},
-  
-  {GLwNblueSize, GLwCColorSize, XtRInt, sizeof (int),
-       offset(blueSize), XtRImmediate, (XtPointer) 1},
-  
-  {GLwNalphaSize, GLwCAlphaSize, XtRInt, sizeof (int),
-       offset(alphaSize), XtRImmediate, (XtPointer) 0},
-  
-  {GLwNdepthSize, GLwCDepthSize, XtRInt, sizeof (int),
-       offset(depthSize), XtRImmediate, (XtPointer) 0},
-  
-  {GLwNstencilSize, GLwCStencilSize, XtRInt, sizeof (int),
-       offset(stencilSize), XtRImmediate, (XtPointer) 0},
-  
-  {GLwNaccumRedSize, GLwCAccumColorSize, XtRInt, sizeof (int),
-       offset(accumRedSize), XtRImmediate, (XtPointer) 0},
-  
-  {GLwNaccumGreenSize, GLwCAccumColorSize, XtRInt, sizeof (int),
-       offset(accumGreenSize), XtRImmediate, (XtPointer) 0},
-  
-  {GLwNaccumBlueSize, GLwCAccumColorSize, XtRInt, sizeof (int),
-       offset(accumBlueSize), XtRImmediate, (XtPointer) 0},
-  
-  {GLwNaccumAlphaSize, GLwCAccumAlphaSize, XtRInt, sizeof (int),
-       offset(accumAlphaSize), XtRImmediate, (XtPointer) 0},
-  
-  /* the attribute list */
-  {GLwNattribList, GLwCAttribList, XtRPointer, sizeof(int *),
-       offset(attribList), XtRImmediate, (XtPointer) NULL},
-
-  /* the visual info */
-  {GLwNvisualInfo, GLwCVisualInfo, GLwRVisualInfo, sizeof (XVisualInfo *),
-       offset(visualInfo), XtRImmediate, (XtPointer) NULL},
-
-  /* miscellaneous resources */
-  {GLwNinstallColormap, GLwCInstallColormap, XtRBoolean, sizeof (Boolean),
-       offset(installColormap), XtRImmediate, (XtPointer) TRUE},
-
-  {GLwNallocateBackground, GLwCAllocateColors, XtRBoolean, sizeof (Boolean),
-       offset(allocateBackground), XtRImmediate, (XtPointer) FALSE},
-
-  {GLwNallocateOtherColors, GLwCAllocateColors, XtRBoolean, sizeof (Boolean),
-       offset(allocateOtherColors), XtRImmediate, (XtPointer) FALSE},
-
-  {GLwNinstallBackground, GLwCInstallBackground, XtRBoolean, sizeof (Boolean),
-       offset(installBackground), XtRImmediate, (XtPointer) TRUE},
-
-  {GLwNginitCallback, GLwCCallback, XtRCallback, sizeof (XtCallbackList),
-       offset(ginitCallback), XtRImmediate, (XtPointer) NULL},
-
-  {GLwNinputCallback, GLwCCallback, XtRCallback, sizeof (XtCallbackList),
-       offset(inputCallback), XtRImmediate, (XtPointer) NULL},
-
-  {GLwNresizeCallback, GLwCCallback, XtRCallback, sizeof (XtCallbackList),
-       offset(resizeCallback), XtRImmediate, (XtPointer) NULL},
-
-  {GLwNexposeCallback, GLwCCallback, XtRCallback, sizeof (XtCallbackList),
-       offset(exposeCallback), XtRImmediate, (XtPointer) NULL},
-
-  /* Changes to Motif primitive resources */
-#ifdef __GLX_MOTIF
-  {XmNtraversalOn, XmCTraversalOn, XmRBoolean, sizeof (Boolean),
-   XtOffset (GLwDrawingAreaWidget, primitive.traversal_on), XmRImmediate,
-   (XtPointer)FALSE},
-  
-  /* highlighting is normally disabled, as when Motif tries to disable
-   * highlighting, it tries to reset the color back to the parent's
-   * background (usually Motif blue).  Unfortunately, that is in a
-   * different colormap, and doesn't work too well.
-   */
-  {XmNhighlightOnEnter, XmCHighlightOnEnter, XmRBoolean, sizeof (Boolean),
-   XtOffset (GLwDrawingAreaWidget, primitive.highlight_on_enter),
-   XmRImmediate, (XtPointer) FALSE},
-  
-  {XmNhighlightThickness, XmCHighlightThickness, XmRHorizontalDimension,
-   sizeof (Dimension),
-   XtOffset (GLwDrawingAreaWidget, primitive.highlight_thickness),
-   XmRImmediate, (XtPointer) 0},
-#endif 
-  };
-
-
-/*
-** The following resources are reobtained using XtGetApplicationResources
-** in the initialize proc.
-*/
-
-/* The colormap */
-static XtResource initializeResources[] = {
-  /* reobtain the colormap with the new visual */
-  {XtNcolormap, XtCColormap, XtRColormap, sizeof(Colormap),
-   XtOffset(GLwDrawingAreaWidget, core.colormap),
-   XtRCallProc,(XtPointer) createColormap},
-  };
-
-
-/* reallocate any colors we need in the new colormap */
-  
-/* The background is obtained only if the allocateBackground resource is TRUE*/
-static XtResource backgroundResources[] = {
-#ifdef __GLX_MOTIF
-  {XmNbackground, XmCBackground,XmRPixel, 
-   sizeof(Pixel),XtOffset(GLwDrawingAreaWidget,core.background_pixel),
-   XmRString,(XtPointer)"lightgrey"},
-   /*XmRCallProc,(XtPointer)_XmBackgroundColorDefault},*/
-
-  {XmNbackgroundPixmap,XmCPixmap,XmRXmBackgroundPixmap, 
-   sizeof(Pixmap),XtOffset(GLwDrawingAreaWidget,core.background_pixmap),
-   XmRImmediate,(XtPointer)XmUNSPECIFIED_PIXMAP},
-
-#else
-  {XtNbackground,XtCBackground,XtRPixel,sizeof(Pixel),
-   XtOffset(GLwDrawingAreaWidget,core.background_pixel),
-   XtRString,(XtPointer)"lightgrey"},
-   /*XtRString,(XtPointer)"XtDefaultBackground"},*/
-
-  {XtNbackgroundPixmap, XtCPixmap, XtRPixmap, sizeof(Pixmap),
-   XtOffset(GLwDrawingAreaWidget,core.background_pixmap),
-   XtRImmediate,(XtPointer)XtUnspecifiedPixmap},
-#endif  
-  };
-
-
-
-/* The other colors such as the foreground are allocated only if
- * allocateOtherColors are set.  These resources only exist in Motif.
- */
-#ifdef __GLX_MOTIF
-static XtResource otherColorResources[] = {
-  {XmNforeground,XmCForeground,XmRPixel, 
-   sizeof(Pixel),XtOffset(GLwDrawingAreaWidget,primitive.foreground),
-   XmRString,(XtPointer)"lighgrey"},
-   /*XmRCallProc, (XtPointer) _XmForegroundColorDefault},*/
-
-  {XmNhighlightColor,XmCHighlightColor,XmRPixel,sizeof(Pixel),
-   XtOffset(GLwDrawingAreaWidget,primitive.highlight_color),
-   XmRString,(XtPointer)"lightgrey"},
-   /*XmRCallProc,(XtPointer)_XmHighlightColorDefault},*/
-
-  {XmNhighlightPixmap,XmCHighlightPixmap,XmRPrimHighlightPixmap,
-   sizeof(Pixmap),
-   XtOffset(GLwDrawingAreaWidget,primitive.highlight_pixmap),
-   XmRImmediate,(XtPointer)XmUNSPECIFIED_PIXMAP},
-   /*XmRCallProc,(XtPointer)_XmPrimitiveHighlightPixmapDefault},*/
-  };
-#endif
-
-
-#undef offset
-
-
-GLwDrawingAreaClassRec glwDrawingAreaClassRec = {
-  { /* core fields */
-#ifdef __GLX_MOTIF
-    /* superclass                */        (WidgetClass) &xmPrimitiveClassRec,
-    /* class_name                */        "GLwMDrawingArea",
-#else /* not __GLX_MOTIF */
-    /* superclass                */        (WidgetClass) &widgetClassRec,
-    /* class_name                */        "GLwDrawingArea",
-#endif /* __GLX_MOTIF */
-    /* widget_size               */        sizeof(GLwDrawingAreaRec),
-    /* class_initialize          */        NULL,
-    /* class_part_initialize     */        NULL,
-    /* class_inited              */        FALSE,
-    /* initialize                */        (XtInitProc) Initialize,
-    /* initialize_hook           */        NULL,
-    /* realize                   */        Realize,
-    /* actions                   */        actions,
-    /* num_actions               */        XtNumber(actions),
-    /* resources                 */        resources,
-    /* num_resources             */        XtNumber(resources),
-    /* xrm_class                 */        NULLQUARK,
-    /* compress_motion           */        TRUE,
-    /* compress_exposure         */        TRUE,
-    /* compress_enterleave       */        TRUE,
-    /* visible_interest          */        TRUE,
-    /* destroy                   */        (XtWidgetProc) Destroy,
-    /* resize                    */        (XtWidgetProc) Resize,
-    /* expose                    */        (XtExposeProc) Redraw,
-    /* set_values                */        NULL,
-    /* set_values_hook           */        NULL,
-    /* set_values_almost         */        XtInheritSetValuesAlmost,
-    /* get_values_hook           */        NULL,
-    /* accept_focus              */        NULL,
-    /* version                   */        XtVersion,
-    /* callback_private          */        NULL,
-    /* tm_table                  */        defaultTranslations,
-    /* query_geometry            */        XtInheritQueryGeometry,
-    /* display_accelerator       */        XtInheritDisplayAccelerator,
-    /* extension                 */        NULL
-  },
-#ifdef __GLX_MOTIF /* primitive resources */
-  {
-    /* border_highlight          */        XmInheritBorderHighlight,
-    /* border_unhighlight        */        XmInheritBorderUnhighlight,
-    /* translations              */        XtInheritTranslations,
-    /* arm_and_activate          */        NULL,
-    /* get_resources             */        NULL,
-    /* num get_resources         */        0,
-    /* extension                 */        NULL,                                
-  }
-#endif 
-  };
-
-WidgetClass glwDrawingAreaWidgetClass=(WidgetClass)&glwDrawingAreaClassRec;
-
-
-
-static void error(Widget w,char* string){
-  char buf[100];
-#ifdef __GLX_MOTIF
-  sprintf(buf,"GLwMDrawingArea: %s\n",string);
-#else
-  sprintf(buf,"GLwDrawingArea: %s\n",string);
-#endif
-  XtAppError(XtWidgetToApplicationContext(w),buf);
-  }
-
-
-static void warning(Widget w,char* string){
-  char buf[100];
-#ifdef __GLX_MOTIF
-  sprintf (buf, "GLwMDraw: %s\n", string);
-#else
-  sprintf (buf, "GLwDraw: %s\n", string);
-#endif
-  XtAppWarning(XtWidgetToApplicationContext(w), buf);
-  }
-
-
-
-/* Initialize the attribList based on the attributes */
-static void createAttribList(GLwDrawingAreaWidget w){
-  int *ptr;
-  w->glwDrawingArea.attribList = (int*)XtMalloc(ATTRIBLIST_SIZE*sizeof(int));
-  if(!w->glwDrawingArea.attribList){
-    error((Widget)w,"Unable to allocate attribute list");
-    }
-  ptr = w->glwDrawingArea.attribList;
-  *ptr++ = GLX_BUFFER_SIZE;
-  *ptr++ = w->glwDrawingArea.bufferSize;
-  *ptr++ = GLX_LEVEL;
-  *ptr++ = w->glwDrawingArea.level;
-  if(w->glwDrawingArea.rgba) *ptr++ = GLX_RGBA;
-  if(w->glwDrawingArea.doublebuffer) *ptr++ = GLX_DOUBLEBUFFER;
-  if(w->glwDrawingArea.stereo) *ptr++ = GLX_STEREO;
-  *ptr++ = GLX_AUX_BUFFERS;
-  *ptr++ = w->glwDrawingArea.auxBuffers;
-  *ptr++ = GLX_RED_SIZE;
-  *ptr++ = w->glwDrawingArea.redSize;
-  *ptr++ = GLX_GREEN_SIZE;
-  *ptr++ = w->glwDrawingArea.greenSize;
-  *ptr++ = GLX_BLUE_SIZE;
-  *ptr++ = w->glwDrawingArea.blueSize;
-  *ptr++ = GLX_ALPHA_SIZE;
-  *ptr++ = w->glwDrawingArea.alphaSize;
-  *ptr++ = GLX_DEPTH_SIZE;
-  *ptr++ = w->glwDrawingArea.depthSize;
-  *ptr++ = GLX_STENCIL_SIZE;
-  *ptr++ = w->glwDrawingArea.stencilSize;
-  *ptr++ = GLX_ACCUM_RED_SIZE;
-  *ptr++ = w->glwDrawingArea.accumRedSize;
-  *ptr++ = GLX_ACCUM_GREEN_SIZE;
-  *ptr++ = w->glwDrawingArea.accumGreenSize;
-  *ptr++ = GLX_ACCUM_BLUE_SIZE;
-  *ptr++ = w->glwDrawingArea.accumBlueSize;
-  *ptr++ = GLX_ACCUM_ALPHA_SIZE;
-  *ptr++ = w->glwDrawingArea.accumAlphaSize;
-  *ptr++ = None;
-  assert((ptr-w->glwDrawingArea.attribList)<ATTRIBLIST_SIZE);
-  }
-
-
-
-/* Initialize the visualInfo based on the attribute list */
-static void createVisualInfo(GLwDrawingAreaWidget w){
-  assert(w->glwDrawingArea.attribList);
-  w->glwDrawingArea.visualInfo=glXChooseVisual(XtDisplay(w),XScreenNumberOfScreen(XtScreen(w)),w->glwDrawingArea.attribList);
-  if(!w->glwDrawingArea.visualInfo) error((Widget)w,"requested visual not supported");
-  }
-
-
-
-/* Initialize the colormap based on the visual info.
- * This routine maintains a cache of visual-infos to colormaps.  If two
- * widgets share the same visual info, they share the same colormap.
- * This function is called by the callProc of the colormap resource entry.
- */
-static void createColormap(GLwDrawingAreaWidget w,int offset,XrmValue *value){
-  static struct cmapCache { Visual *visual; Colormap cmap; } *cmapCache;
-  static int cacheEntries=0;
-  static int cacheMalloced=0;
-  register int i;
-    
-  assert(w->glwDrawingArea.visualInfo);
-
-  /* see if we can find it in the cache */
-  for(i=0; i<cacheEntries; i++){
-    if(cmapCache[i].visual==w->glwDrawingArea.visualInfo->visual){
-      value->addr=(XtPointer)(&cmapCache[i].cmap);
-      return;
-      }
-    }
-
-  /* not in the cache, create a new entry */
-  if(cacheEntries >= cacheMalloced){
-    /* need to malloc a new one.  Since we are likely to have only a
-     * few colormaps, we allocate one the first time, and double
-     * each subsequent time.
-     */
-    if(cacheMalloced==0){
-      cacheMalloced=1;
-      cmapCache=(struct cmapCache*)XtMalloc(sizeof(struct cmapCache));
-      }
-    else{
-      cacheMalloced<<=1;
-      cmapCache=(struct cmapCache*)XtRealloc((char*)cmapCache,sizeof(struct cmapCache)*cacheMalloced);
-      }
-    }
-       
-  cmapCache[cacheEntries].cmap=XCreateColormap(XtDisplay(w),
-                                               RootWindow(XtDisplay(w),
-                                               w->glwDrawingArea.visualInfo->screen),
-                                               w->glwDrawingArea.visualInfo->visual,
-                                               AllocNone);
-  cmapCache[cacheEntries].visual=w->glwDrawingArea.visualInfo->visual;
-  value->addr=(XtPointer)(&cmapCache[cacheEntries++].cmap);
-  }
-
-
-
-static void Initialize(GLwDrawingAreaWidget req,GLwDrawingAreaWidget neww,ArgList args,Cardinal *num_args){
-
-  /* fix size */
-  if(req->core.width==0) neww->core.width=100;
-  if(req->core.height==0) neww->core.width=100;
-
-  /* create the attribute list if needed */
-  neww->glwDrawingArea.myList=FALSE;
-  if(neww->glwDrawingArea.attribList==NULL){
-    neww->glwDrawingArea.myList=TRUE;
-    createAttribList(neww);
-    }
-
-  /* Gotta have it */
-  assert(neww->glwDrawingArea.attribList);
-
-  /* determine the visual info if needed */
-  neww->glwDrawingArea.myVisual=FALSE;
-  if(neww->glwDrawingArea.visualInfo==NULL){
-    neww->glwDrawingArea.myVisual=TRUE;
-    createVisualInfo(neww);
-    }
-
-  /* Gotta have that too */
-  assert(neww->glwDrawingArea.visualInfo);
-
-  neww->core.depth=neww->glwDrawingArea.visualInfo->depth;
-
-  /* Reobtain the colormap and colors in it using XtGetApplicationResources*/
-  XtGetApplicationResources((Widget)neww,neww,initializeResources,XtNumber(initializeResources),args,*num_args);
-
-  /* obtain the color resources if appropriate */
-  if(req->glwDrawingArea.allocateBackground){
-    XtGetApplicationResources((Widget)neww,neww,backgroundResources,XtNumber(backgroundResources),args,*num_args);
-    }
-
-#ifdef __GLX_MOTIF
-  if(req->glwDrawingArea.allocateOtherColors){
-    XtGetApplicationResources((Widget)neww,neww,otherColorResources,XtNumber(otherColorResources),args,*num_args);
-    }
-#endif 
-  }
-
-
-
-static void Realize(Widget w,Mask *valueMask,XSetWindowAttributes *attributes){
-  register GLwDrawingAreaWidget glw=(GLwDrawingAreaWidget)w;
-  GLwDrawingAreaCallbackStruct cb;
-  Widget parentShell;
-  Status status;
-  Window windows[2],*windowsReturn,*windowList;
-  int countReturn,i;
-   
-  /* if we haven't requested that the background be both installed and
-   * allocated, don't install it.
-   */
-  if(!(glw->glwDrawingArea.installBackground && glw->glwDrawingArea.allocateBackground)){
-    *valueMask&=~CWBackPixel;
-    }
- 
-  XtCreateWindow(w,(unsigned int)InputOutput,glw->glwDrawingArea.visualInfo->visual,*valueMask,attributes);
-
-  /* if appropriate, call XSetWMColormapWindows to install the colormap */
-  if(glw->glwDrawingArea.installColormap){
-
-    /* Get parent shell */
-    for(parentShell=XtParent(w); parentShell&&!XtIsShell(parentShell); parentShell=XtParent(parentShell));
-
-    if(parentShell && XtWindow(parentShell)){
-
-      /* check to see if there is already a property */
-      status=XGetWMColormapWindows(XtDisplay(parentShell),XtWindow(parentShell),&windowsReturn,&countReturn);
-            
-      /* if no property, just create one */
-      if(!status){
-        windows[0]=XtWindow(w);
-        windows[1]=XtWindow(parentShell);
-        XSetWMColormapWindows(XtDisplay(parentShell),XtWindow(parentShell),windows,2);
-        }
-
-      /* there was a property, add myself to the beginning */
-      else{
-        windowList=(Window *)XtMalloc((sizeof(Window))*(countReturn+1));
-        windowList[0]=XtWindow(w);
-        for(i=0; i<countReturn; i++) windowList[i+1]=windowsReturn[i];
-        XSetWMColormapWindows(XtDisplay(parentShell),XtWindow(parentShell),windowList,countReturn+1);
-        XtFree((char*)windowList);
-        XtFree((char*)windowsReturn);
-        }
-      }
-    else{
-      warning(w,"Could not set colormap property on parent shell");
-      }
-    }
-
-  /* Invoke callbacks */
-  cb.reason=GLwCR_GINIT;
-  cb.event=NULL;
-  cb.width=glw->core.width;
-  cb.height=glw->core.height;
-  XtCallCallbackList((Widget)glw,glw->glwDrawingArea.ginitCallback,&cb);
-  }
-
-
-
-static void Redraw(GLwDrawingAreaWidget w,XEvent *event,Region region){
-  GLwDrawingAreaCallbackStruct cb;
-  if(!XtIsRealized((Widget)w)) return;
-  cb.reason=GLwCR_EXPOSE;
-  cb.event=event;
-  cb.width=w->core.width;
-  cb.height=w->core.height;
-  XtCallCallbackList((Widget)w,w->glwDrawingArea.exposeCallback,&cb);
-  }
-
-
-
-static void Resize(GLwDrawingAreaWidget glw){
-  GLwDrawingAreaCallbackStruct cb;
-  if(!XtIsRealized((Widget)glw)) return;
-  cb.reason=GLwCR_RESIZE;
-  cb.event=NULL;
-  cb.width=glw->core.width;
-  cb.height=glw->core.height;
-  XtCallCallbackList((Widget)glw,glw->glwDrawingArea.resizeCallback,&cb);
-  }
-
-
-
-static void Destroy(GLwDrawingAreaWidget glw){
-  Window *windowsReturn;
-  Widget parentShell;
-  Status status;
-  int countReturn;
-  register int i;
-
-  if(glw->glwDrawingArea.myList && glw->glwDrawingArea.attribList){
-    XtFree((XtPointer)glw->glwDrawingArea.attribList);
-    }
-
-  if(glw->glwDrawingArea.myVisual && glw->glwDrawingArea.visualInfo){
-    XtFree((XtPointer)glw->glwDrawingArea.visualInfo);
-    }
-
-  /* if my colormap was installed, remove it */
-  if(glw->glwDrawingArea.installColormap){
-
-    /* Get parent shell */
-    for(parentShell=XtParent(glw); parentShell&&!XtIsShell(parentShell); parentShell=XtParent(parentShell));
-
-    if(parentShell && XtWindow(parentShell)){
-
-      /* make sure there is a property */
-      status=XGetWMColormapWindows(XtDisplay(parentShell),XtWindow(parentShell),&windowsReturn,&countReturn);
-            
-      /* if no property, just return.  If there was a property, continue */
-      if(status){
-
-        /* search for a match */
-        for(i=0; i<countReturn; i++){
-          if(windowsReturn[i]==XtWindow(glw)){
-
-            /* we found a match, now copy the rest down */
-            for(i++; i<countReturn; i++){ windowsReturn[i-1]=windowsReturn[i]; }
-
-            XSetWMColormapWindows(XtDisplay(parentShell),XtWindow(parentShell),windowsReturn,countReturn-1);
-            break; 
-            }
-          }
-        XtFree((char *)windowsReturn);
-        }
-      }
-    }
-  }
-
-
-
-/* Action routine for keyboard and mouse events */
-static void glwInput(GLwDrawingAreaWidget glw,XEvent *event,String *params,Cardinal *numParams){
-  GLwDrawingAreaCallbackStruct cb;
-  cb.reason=GLwCR_INPUT;
-  cb.event=event;
-  cb.width=glw->core.width;
-  cb.height=glw->core.height;
-  XtCallCallbackList((Widget)glw,glw->glwDrawingArea.inputCallback,&cb);
-  }
-
-
-#ifdef __GLX_MOTIF
-
-/* Create routine */
-Widget GLwCreateMDrawingArea(Widget parent, char *name,ArgList arglist,Cardinal argcount){
-  return XtCreateWidget(name,glwMDrawingAreaWidgetClass, parent, arglist,argcount);
-  }
-
-#endif
-
-
-#ifndef __GLX_MOTIF
-
-/* Make context current */
-void GLwDrawingAreaMakeCurrent(Widget w,GLXContext ctx){
-  glXMakeCurrent(XtDisplay(w),XtWindow(w),ctx);
-  }
-
-
-/* Swap buffers convenience function */
-void GLwDrawingAreaSwapBuffers(Widget w){
-  glXSwapBuffers(XtDisplay(w),XtWindow(w));
-  }
-
-#endif
diff --git a/src/glw/GLwDrawA.h b/src/glw/GLwDrawA.h
deleted file mode 100644
index b9711c2..0000000
--- a/src/glw/GLwDrawA.h
+++ /dev/null
@@ -1,195 +0,0 @@
-/*
- * (c) Copyright 1993, Silicon Graphics, Inc.
- * ALL RIGHTS RESERVED 
- * Permission to use, copy, modify, and distribute this software for 
- * any purpose and without fee is hereby granted, provided that the above
- * copyright notice appear in all copies and that both the copyright notice
- * and this permission notice appear in supporting documentation, and that 
- * the name of Silicon Graphics, Inc. not be used in advertising
- * or publicity pertaining to distribution of the software without specific,
- * written prior permission. 
- *
- * THE MATERIAL EMBODIED ON THIS SOFTWARE IS PROVIDED TO YOU "AS-IS"
- * AND WITHOUT WARRANTY OF ANY KIND, EXPRESS, IMPLIED OR OTHERWISE,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTY OF MERCHANTABILITY OR
- * FITNESS FOR A PARTICULAR PURPOSE.  IN NO EVENT SHALL SILICON
- * GRAPHICS, INC.  BE LIABLE TO YOU OR ANYONE ELSE FOR ANY DIRECT,
- * SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY
- * KIND, OR ANY DAMAGES WHATSOEVER, INCLUDING WITHOUT LIMITATION,
- * LOSS OF PROFIT, LOSS OF USE, SAVINGS OR REVENUE, OR THE CLAIMS OF
- * THIRD PARTIES, WHETHER OR NOT SILICON GRAPHICS, INC.  HAS BEEN
- * ADVISED OF THE POSSIBILITY OF SUCH LOSS, HOWEVER CAUSED AND ON
- * ANY THEORY OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE
- * POSSESSION, USE OR PERFORMANCE OF THIS SOFTWARE.
- * 
- * 
- * US Government Users Restricted Rights 
- * Use, duplication, or disclosure by the Government is subject to
- * restrictions set forth in FAR 52.227.19(c)(2) or subparagraph
- * (c)(1)(ii) of the Rights in Technical Data and Computer Software
- * clause at DFARS 252.227-7013 and/or in similar or successor
- * clauses in the FAR or the DOD or NASA FAR Supplement.
- * Unpublished-- rights reserved under the copyright laws of the
- * United States.  Contractor/manufacturer is Silicon Graphics,
- * Inc., 2011 N.  Shoreline Blvd., Mountain View, CA 94039-7311.
- *
- * OpenGL(TM) is a trademark of Silicon Graphics, Inc.
- */
-#ifndef _GLwDrawA_h
-#define _GLwDrawA_h
-
-#include <GL/glx.h>
-#include <GL/gl.h>
-
-/****************************************************************
- *
- * GLwDrawingArea widgets
- *
- ****************************************************************/
-
-/* Resources:
-
- Name		     Class		RepType		Default Value
- ----		     -----		-------		-------------
- attribList	     AttribList		int *		NULL
- visualInfo	     VisualInfo		VisualInfo	NULL
- installColormap     InstallColormap	Boolean		TRUE
- allocateBackground  AllocateColors	Boolean		FALSE
- allocateOtherColors AllocateColors	Boolean		FALSE
- installBackground   InstallBackground	Boolean		TRUE
- exposeCallback      Callback		Pointer		NULL
- ginitCallback       Callback		Pointer		NULL
- inputCallback       Callback		Pointer		NULL
- resizeCallback      Callback		Pointer		NULL
-
-*** The following resources all correspond to the GLX configuration
-*** attributes and are used to create the attribList if it is NULL
- bufferSize	     BufferSize		int		0
- level		     Level		int		0
- rgba		     Rgba		Boolean		FALSE
- doublebuffer	     Doublebuffer	Boolean		FALSE
- stereo		     Stereo		Boolean		FALSE
- auxBuffers	     AuxBuffers		int		0
- redSize	     ColorSize		int		1
- greenSize	     ColorSize		int		1
- blueSize	     ColorSize		int		1
- alphaSize	     AlphaSize		int		0
- depthSize	     DepthSize		int		0
- stencilSize	     StencilSize	int		0
- accumRedSize	     AccumColorSize	int		0
- accumGreenSize	     AccumColorSize	int		0
- accumBlueSize	     AccumColorSize	int		0
- accumAlphaSize	     AccumAlphaSize	int		0
-*/
-
-#define GLwNattribList		"attribList"
-#define GLwCAttribList		"AttribList"
-#define GLwNvisualInfo		"visualInfo"
-#define GLwCVisualInfo		"VisualInfo"
-#define GLwRVisualInfo		"VisualInfo"
-
-#define GLwNinstallColormap	"installColormap"
-#define GLwCInstallColormap	"InstallColormap"
-#define GLwNallocateBackground	"allocateBackground"
-#define GLwNallocateOtherColors	"allocateOtherColors"
-#define GLwCAllocateColors	"AllocateColors"
-#define GLwNinstallBackground	"installBackground"
-#define GLwCInstallBackground	"InstallBackground"
-
-#define GLwCCallback		"Callback"
-#define GLwNexposeCallback	"exposeCallback"
-#define GLwNginitCallback	"ginitCallback"
-#define GLwNresizeCallback	"resizeCallback"
-#define GLwNinputCallback	"inputCallback"
-
-#define GLwNbufferSize		"bufferSize"
-#define GLwCBufferSize		"BufferSize"
-#define GLwNlevel		"level"
-#define GLwCLevel		"Level"
-#define GLwNrgba		"rgba"
-#define GLwCRgba		"Rgba"
-#define GLwNdoublebuffer	"doublebuffer"
-#define GLwCDoublebuffer	"Doublebuffer"
-#define GLwNstereo		"stereo"
-#define GLwCStereo		"Stereo"
-#define GLwNauxBuffers		"auxBuffers"
-#define GLwCAuxBuffers		"AuxBuffers"
-#define GLwNredSize		"redSize"
-#define GLwNgreenSize		"greenSize"
-#define GLwNblueSize		"blueSize"
-#define GLwCColorSize		"ColorSize"
-#define GLwNalphaSize		"alphaSize"
-#define GLwCAlphaSize		"AlphaSize"
-#define GLwNdepthSize		"depthSize"
-#define GLwCDepthSize		"DepthSize"
-#define GLwNstencilSize		"stencilSize"
-#define GLwCStencilSize		"StencilSize"
-#define GLwNaccumRedSize	"accumRedSize"
-#define GLwNaccumGreenSize	"accumGreenSize"
-#define GLwNaccumBlueSize	"accumBlueSize"
-#define GLwCAccumColorSize	"AccumColorSize"
-#define GLwNaccumAlphaSize	"accumAlphaSize"
-#define GLwCAccumAlphaSize	"AccumAlphaSize"
-
-#ifdef __GLX_MOTIF
-
-typedef struct _GLwMDrawingAreaClassRec	*GLwMDrawingAreaWidgetClass;
-typedef struct _GLwMDrawingAreaRec	*GLwMDrawingAreaWidget;
-
-GLAPI WidgetClass glwMDrawingAreaWidgetClass;
-
-
-#else 
-
-typedef struct _GLwDrawingAreaClassRec	*GLwDrawingAreaWidgetClass;
-typedef struct _GLwDrawingAreaRec	*GLwDrawingAreaWidget;
-
-GLAPI WidgetClass glwDrawingAreaWidgetClass;
-
-
-#endif
-
-
-/* Callback reasons */
-#ifdef __GLX_MOTIF
-#define GLwCR_EXPOSE	XmCR_EXPOSE
-#define GLwCR_RESIZE	XmCR_RESIZE
-#define GLwCR_INPUT	XmCR_INPUT
-#else 
-/* The same values as Motif, but don't use Motif constants */
-#define GLwCR_EXPOSE	38
-#define GLwCR_RESIZE	39
-#define GLwCR_INPUT	40
-#endif
-
-#define GLwCR_GINIT	32135	/* Arbitrary number that should neverr clash */
-
-typedef struct 
-  {
-  int       reason;
-  XEvent   *event;
-  Dimension width,height;
-  } 
-  GLwDrawingAreaCallbackStruct;
-
-#if defined(__cplusplus) || defined(c_plusplus)
-extern "C" {
-#endif
-
-/* front ends to glXMakeCurrent and glXSwapBuffers */
-GLAPI void GLwDrawingAreaMakeCurrent(Widget w,GLXContext ctx);
-GLAPI void GLwDrawingAreaSwapBuffers(Widget w);
-
-#ifdef __GLX_MOTIF
-#ifdef _NO_PROTO
-GLAPI Widget GLwCreateMDrawingArea();
-#else
-GLAPI Widget GLwCreateMDrawingArea(Widget parent,char *name,ArgList arglist,Cardinal argcount);
-#endif
-#endif 
-
-#if defined(__cplusplus) || defined(c_plusplus)
-}
-#endif
-
-#endif
diff --git a/src/glw/GLwDrawAP.h b/src/glw/GLwDrawAP.h
deleted file mode 100644
index 4ff21b4..0000000
--- a/src/glw/GLwDrawAP.h
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * (c) Copyright 1993, Silicon Graphics, Inc.
- * ALL RIGHTS RESERVED 
- * Permission to use, copy, modify, and distribute this software for 
- * any purpose and without fee is hereby granted, provided that the above
- * copyright notice appear in all copies and that both the copyright notice
- * and this permission notice appear in supporting documentation, and that 
- * the name of Silicon Graphics, Inc. not be used in advertising
- * or publicity pertaining to distribution of the software without specific,
- * written prior permission. 
- *
- * THE MATERIAL EMBODIED ON THIS SOFTWARE IS PROVIDED TO YOU "AS-IS"
- * AND WITHOUT WARRANTY OF ANY KIND, EXPRESS, IMPLIED OR OTHERWISE,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTY OF MERCHANTABILITY OR
- * FITNESS FOR A PARTICULAR PURPOSE.  IN NO EVENT SHALL SILICON
- * GRAPHICS, INC.  BE LIABLE TO YOU OR ANYONE ELSE FOR ANY DIRECT,
- * SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY
- * KIND, OR ANY DAMAGES WHATSOEVER, INCLUDING WITHOUT LIMITATION,
- * LOSS OF PROFIT, LOSS OF USE, SAVINGS OR REVENUE, OR THE CLAIMS OF
- * THIRD PARTIES, WHETHER OR NOT SILICON GRAPHICS, INC.  HAS BEEN
- * ADVISED OF THE POSSIBILITY OF SUCH LOSS, HOWEVER CAUSED AND ON
- * ANY THEORY OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE
- * POSSESSION, USE OR PERFORMANCE OF THIS SOFTWARE.
- * 
- * 
- * US Government Users Restricted Rights 
- * Use, duplication, or disclosure by the Government is subject to
- * restrictions set forth in FAR 52.227.19(c)(2) or subparagraph
- * (c)(1)(ii) of the Rights in Technical Data and Computer Software
- * clause at DFARS 252.227-7013 and/or in similar or successor
- * clauses in the FAR or the DOD or NASA FAR Supplement.
- * Unpublished-- rights reserved under the copyright laws of the
- * United States.  Contractor/manufacturer is Silicon Graphics,
- * Inc., 2011 N.  Shoreline Blvd., Mountain View, CA 94039-7311.
- *
- * OpenGL(TM) is a trademark of Silicon Graphics, Inc.
- */
-#ifndef _GLwDrawAP_h
-#define _GLwDrawAP_h
-
-
-/* MOTIF */
-#ifdef __GLX_MOTIF
-#include "GLwMDrawA.h"
-#else
-#include "GLwDrawA.h"
-#endif
-
-typedef struct _GLwDrawingAreaClassPart {
-  caddr_t extension;
-  } GLwDrawingAreaClassPart;
-
-
-#ifdef __GLX_MOTIF
-typedef struct _GLwMDrawingAreaClassRec {
-  CoreClassPart               core_class;
-  XmPrimitiveClassPart        primitive_class;
-  GLwDrawingAreaClassPart     glwDrawingArea_class;
-  } GLwMDrawingAreaClassRec;
-
-
-GLAPI GLwMDrawingAreaClassRec glwMDrawingAreaClassRec;
-
-
-/* XT */
-#else 
-
-typedef struct _GLwDrawingAreaClassRec {
-  CoreClassPart               core_class;
-  GLwDrawingAreaClassPart     glwDrawingArea_class;
-  } GLwDrawingAreaClassRec;
-
-GLAPI GLwDrawingAreaClassRec glwDrawingAreaClassRec;
-
-
-#endif 
-
-
-
-typedef struct {
-  /* resources */
-  int *                attribList;
-  XVisualInfo *        visualInfo;
-  Boolean              myList;                /* TRUE if we malloced the attribList*/
-  Boolean              myVisual;        /* TRUE if we created the visualInfo*/
-  Boolean              installColormap;
-  Boolean              allocateBackground;
-  Boolean              allocateOtherColors;
-  Boolean              installBackground;
-  XtCallbackList       ginitCallback;
-  XtCallbackList       resizeCallback;
-  XtCallbackList       exposeCallback;
-  XtCallbackList       inputCallback;
-  /* specific attributes; add as we get new attributes */
-  int                  bufferSize;
-  int                  level;
-  Boolean              rgba;
-  Boolean              doublebuffer;
-  Boolean              stereo;
-  int                  auxBuffers;
-  int                  redSize;
-  int                  greenSize;
-  int                  blueSize;
-  int                  alphaSize;
-  int                  depthSize;
-  int                  stencilSize;
-  int                  accumRedSize;
-  int                  accumGreenSize;
-  int                  accumBlueSize;
-  int                  accumAlphaSize;
-  } GLwDrawingAreaPart;
-
-#ifdef __GLX_MOTIF
-
-typedef struct _GLwMDrawingAreaRec {
-  CorePart             core;
-  XmPrimitivePart      primitive;
-  GLwDrawingAreaPart   glwDrawingArea;
-  } GLwMDrawingAreaRec;
-
-#else 
-
-typedef struct _GLwDrawingAreaRec {
-  CorePart             core;
-  GLwDrawingAreaPart   glwDrawingArea;
-  } GLwDrawingAreaRec;
-
-#endif 
-
-#endif
diff --git a/src/glw/GLwMDrawA.c b/src/glw/GLwMDrawA.c
deleted file mode 100644
index bdefe92..0000000
--- a/src/glw/GLwMDrawA.c
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * (c) Copyright 1993, Silicon Graphics, Inc.
- * ALL RIGHTS RESERVED 
- * Permission to use, copy, modify, and distribute this software for 
- * any purpose and without fee is hereby granted, provided that the above
- * copyright notice appear in all copies and that both the copyright notice
- * and this permission notice appear in supporting documentation, and that 
- * the name of Silicon Graphics, Inc. not be used in advertising
- * or publicity pertaining to distribution of the software without specific,
- * written prior permission. 
- *
- * THE MATERIAL EMBODIED ON THIS SOFTWARE IS PROVIDED TO YOU "AS-IS"
- * AND WITHOUT WARRANTY OF ANY KIND, EXPRESS, IMPLIED OR OTHERWISE,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTY OF MERCHANTABILITY OR
- * FITNESS FOR A PARTICULAR PURPOSE.  IN NO EVENT SHALL SILICON
- * GRAPHICS, INC.  BE LIABLE TO YOU OR ANYONE ELSE FOR ANY DIRECT,
- * SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY
- * KIND, OR ANY DAMAGES WHATSOEVER, INCLUDING WITHOUT LIMITATION,
- * LOSS OF PROFIT, LOSS OF USE, SAVINGS OR REVENUE, OR THE CLAIMS OF
- * THIRD PARTIES, WHETHER OR NOT SILICON GRAPHICS, INC.  HAS BEEN
- * ADVISED OF THE POSSIBILITY OF SUCH LOSS, HOWEVER CAUSED AND ON
- * ANY THEORY OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE
- * POSSESSION, USE OR PERFORMANCE OF THIS SOFTWARE.
- * 
- * 
- * US Government Users Restricted Rights 
- * Use, duplication, or disclosure by the Government is subject to
- * restrictions set forth in FAR 52.227.19(c)(2) or subparagraph
- * (c)(1)(ii) of the Rights in Technical Data and Computer Software
- * clause at DFARS 252.227-7013 and/or in similar or successor
- * clauses in the FAR or the DOD or NASA FAR Supplement.
- * Unpublished-- rights reserved under the copyright laws of the
- * United States.  Contractor/manufacturer is Silicon Graphics,
- * Inc., 2011 N.  Shoreline Blvd., Mountain View, CA 94039-7311.
- *
- * OpenGL(TM) is a trademark of Silicon Graphics, Inc.
- */
-#ifndef __GLX_MOTIF
-#define __GLX_MOTIF 1
-#endif
-#include "GLwDrawA.c"
diff --git a/src/glw/GLwMDrawA.h b/src/glw/GLwMDrawA.h
deleted file mode 100644
index 2e24589..0000000
--- a/src/glw/GLwMDrawA.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * (c) Copyright 1993, Silicon Graphics, Inc.
- * ALL RIGHTS RESERVED 
- * Permission to use, copy, modify, and distribute this software for 
- * any purpose and without fee is hereby granted, provided that the above
- * copyright notice appear in all copies and that both the copyright notice
- * and this permission notice appear in supporting documentation, and that 
- * the name of Silicon Graphics, Inc. not be used in advertising
- * or publicity pertaining to distribution of the software without specific,
- * written prior permission. 
- *
- * THE MATERIAL EMBODIED ON THIS SOFTWARE IS PROVIDED TO YOU "AS-IS"
- * AND WITHOUT WARRANTY OF ANY KIND, EXPRESS, IMPLIED OR OTHERWISE,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTY OF MERCHANTABILITY OR
- * FITNESS FOR A PARTICULAR PURPOSE.  IN NO EVENT SHALL SILICON
- * GRAPHICS, INC.  BE LIABLE TO YOU OR ANYONE ELSE FOR ANY DIRECT,
- * SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY
- * KIND, OR ANY DAMAGES WHATSOEVER, INCLUDING WITHOUT LIMITATION,
- * LOSS OF PROFIT, LOSS OF USE, SAVINGS OR REVENUE, OR THE CLAIMS OF
- * THIRD PARTIES, WHETHER OR NOT SILICON GRAPHICS, INC.  HAS BEEN
- * ADVISED OF THE POSSIBILITY OF SUCH LOSS, HOWEVER CAUSED AND ON
- * ANY THEORY OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE
- * POSSESSION, USE OR PERFORMANCE OF THIS SOFTWARE.
- * 
- * 
- * US Government Users Restricted Rights 
- * Use, duplication, or disclosure by the Government is subject to
- * restrictions set forth in FAR 52.227.19(c)(2) or subparagraph
- * (c)(1)(ii) of the Rights in Technical Data and Computer Software
- * clause at DFARS 252.227-7013 and/or in similar or successor
- * clauses in the FAR or the DOD or NASA FAR Supplement.
- * Unpublished-- rights reserved under the copyright laws of the
- * United States.  Contractor/manufacturer is Silicon Graphics,
- * Inc., 2011 N.  Shoreline Blvd., Mountain View, CA 94039-7311.
- *
- * OpenGL(TM) is a trademark of Silicon Graphics, Inc.
- */
-#ifndef __GLX_MOTIF
-#define __GLX_MOTIF 1
-#endif
-#include "GLwDrawA.h"
diff --git a/src/glw/GLwMDrawAP.h b/src/glw/GLwMDrawAP.h
deleted file mode 100644
index a0a689b..0000000
--- a/src/glw/GLwMDrawAP.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * (c) Copyright 1993, Silicon Graphics, Inc.
- * ALL RIGHTS RESERVED 
- * Permission to use, copy, modify, and distribute this software for 
- * any purpose and without fee is hereby granted, provided that the above
- * copyright notice appear in all copies and that both the copyright notice
- * and this permission notice appear in supporting documentation, and that 
- * the name of Silicon Graphics, Inc. not be used in advertising
- * or publicity pertaining to distribution of the software without specific,
- * written prior permission. 
- *
- * THE MATERIAL EMBODIED ON THIS SOFTWARE IS PROVIDED TO YOU "AS-IS"
- * AND WITHOUT WARRANTY OF ANY KIND, EXPRESS, IMPLIED OR OTHERWISE,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTY OF MERCHANTABILITY OR
- * FITNESS FOR A PARTICULAR PURPOSE.  IN NO EVENT SHALL SILICON
- * GRAPHICS, INC.  BE LIABLE TO YOU OR ANYONE ELSE FOR ANY DIRECT,
- * SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY
- * KIND, OR ANY DAMAGES WHATSOEVER, INCLUDING WITHOUT LIMITATION,
- * LOSS OF PROFIT, LOSS OF USE, SAVINGS OR REVENUE, OR THE CLAIMS OF
- * THIRD PARTIES, WHETHER OR NOT SILICON GRAPHICS, INC.  HAS BEEN
- * ADVISED OF THE POSSIBILITY OF SUCH LOSS, HOWEVER CAUSED AND ON
- * ANY THEORY OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE
- * POSSESSION, USE OR PERFORMANCE OF THIS SOFTWARE.
- * 
- * 
- * US Government Users Restricted Rights 
- * Use, duplication, or disclosure by the Government is subject to
- * restrictions set forth in FAR 52.227.19(c)(2) or subparagraph
- * (c)(1)(ii) of the Rights in Technical Data and Computer Software
- * clause at DFARS 252.227-7013 and/or in similar or successor
- * clauses in the FAR or the DOD or NASA FAR Supplement.
- * Unpublished-- rights reserved under the copyright laws of the
- * United States.  Contractor/manufacturer is Silicon Graphics,
- * Inc., 2011 N.  Shoreline Blvd., Mountain View, CA 94039-7311.
- *
- * OpenGL(TM) is a trademark of Silicon Graphics, Inc.
- */
-#ifndef __GLX_MOTIF
-#define __GLX_MOTIF 1
-#endif
-#include "GLwDrawAP.h"
diff --git a/src/glw/Makefile b/src/glw/Makefile
deleted file mode 100644
index 776b1aa..0000000
--- a/src/glw/Makefile
+++ /dev/null
@@ -1,74 +0,0 @@
-# src/glw/Makefile
-
-TOP = ../..
-include $(TOP)/configs/current
-
-MAJOR = 1
-MINOR = 0
-TINY = 0
-
-INCDIRS = -I$(TOP)/include $(MOTIF_CFLAGS) $(X11_INCLUDES)
-
-
-OBJECTS = $(GLW_SOURCES:.c=.o)
-
-
-
-##### RULES #####
-
-.c.o:
-	$(CC) -c $(INCDIRS) $(CFLAGS) $(GLW_CFLAGS) $<
-
-
-
-##### TARGETS #####
-
-default: $(TOP)/$(LIB_DIR)/$(GLW_LIB_NAME)
-
-# GLU pkg-config file
-pcedit = sed \
-	-e 's,@INSTALL_DIR@,$(INSTALL_DIR),' \
-	-e 's,@INSTALL_LIB_DIR@,$(INSTALL_LIB_DIR),' \
-	-e 's,@INSTALL_INC_DIR@,$(INSTALL_INC_DIR),' \
-	-e 's,@VERSION@,$(MAJOR).$(MINOR).$(TINY),' \
-	-e 's,@GLW_PC_REQ_PRIV@,$(GLW_PC_REQ_PRIV),' \
-	-e 's,@GLW_PC_LIB_PRIV@,$(GLW_PC_LIB_PRIV),' \
-	-e 's,@GLW_PC_CFLAGS@,$(GLW_PC_CFLAGS),' \
-	-e 's,@GLW_LIB@,$(GLW_LIB),'
-glw.pc: glw.pc.in
-	$(pcedit) $< > $@
-
-install: glw.pc
-	$(INSTALL) -d $(DESTDIR)$(INSTALL_INC_DIR)/GL
-	$(INSTALL) -d $(DESTDIR)$(INSTALL_LIB_DIR)
-	$(INSTALL) -d $(DESTDIR)$(INSTALL_LIB_DIR)/pkgconfig
-	$(INSTALL) -m 644 *.h $(DESTDIR)$(INSTALL_INC_DIR)/GL
-	$(MINSTALL) $(TOP)/$(LIB_DIR)/$(GLW_LIB_GLOB) $(DESTDIR)$(INSTALL_LIB_DIR)
-	$(INSTALL) -m 644 glw.pc $(DESTDIR)$(INSTALL_LIB_DIR)/pkgconfig
-
-clean:
-	-rm -f depend depend.bak
-	-rm -f *.o *.pc *~
-
-
-# Make the library
-$(TOP)/$(LIB_DIR)/$(GLW_LIB_NAME): $(OBJECTS)
-	$(MKLIB) -o $(GLW_LIB) -linker '$(CC)' -ldflags '$(LDFLAGS)' \
-		-major $(MAJOR) -minor $(MINOR) -patch $(TINY) \
-		$(MKLIB_OPTIONS) -install $(TOP)/$(LIB_DIR) \
-		-id $(INSTALL_LIB_DIR)/lib$(GLW_LIB).$(MAJOR).dylib \
-		$(GLW_LIB_DEPS) $(OBJECTS)
-
-
-#
-# Run 'make depend' to update the dependencies if you change what's included
-# by any source file.
-# 
-depend: $(GLW_SOURCES)
-	rm -f depend
-	touch depend
-	$(MKDEP) $(MKDEP_OPTIONS) -I$(TOP)/include $(GLW_SOURCES) \
-		$(X11_INCLUDES) > /dev/null
-
-
--include depend
diff --git a/src/glw/README b/src/glw/README
deleted file mode 100644
index 70f4f7b..0000000
--- a/src/glw/README
+++ /dev/null
@@ -1,56 +0,0 @@
-
-                           widgets README file
-
-
-This directory contains the source code for SGI's OpenGL Xt/Motif widgets,
-slightly modified by Jeroen van der Zijp to work better with Mesa.
-
-To compile the widget code (producing lib/libGLw.a) cd to the widgets/
-directory and type 'make <config>' where <config> is the system configuration
-you used to compile Mesa (like 'make linux').  This hasn't been tested on
-many systems so let us know if you have trouble.
-
-If you want to make a Linux ELF shared lib instead of the non-shared .a
-file see the notes in the Makefile.
-
-If you want to build with Motif support, edit Makefile.X11, looking
-for the "Motif" information.
-
-The SGI copyright is as follows.
-
-
- * (c) Copyright 1993, Silicon Graphics, Inc.
- * ALL RIGHTS RESERVED 
- * Permission to use, copy, modify, and distribute this software for 
- * any purpose and without fee is hereby granted, provided that the above
- * copyright notice appear in all copies and that both the copyright notice
- * and this permission notice appear in supporting documentation, and that 
- * the name of Silicon Graphics, Inc. not be used in advertising
- * or publicity pertaining to distribution of the software without specific,
- * written prior permission. 
- *
- * THE MATERIAL EMBODIED ON THIS SOFTWARE IS PROVIDED TO YOU "AS-IS"
- * AND WITHOUT WARRANTY OF ANY KIND, EXPRESS, IMPLIED OR OTHERWISE,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTY OF MERCHANTABILITY OR
- * FITNESS FOR A PARTICULAR PURPOSE.  IN NO EVENT SHALL SILICON
- * GRAPHICS, INC.  BE LIABLE TO YOU OR ANYONE ELSE FOR ANY DIRECT,
- * SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY
- * KIND, OR ANY DAMAGES WHATSOEVER, INCLUDING WITHOUT LIMITATION,
- * LOSS OF PROFIT, LOSS OF USE, SAVINGS OR REVENUE, OR THE CLAIMS OF
- * THIRD PARTIES, WHETHER OR NOT SILICON GRAPHICS, INC.  HAS BEEN
- * ADVISED OF THE POSSIBILITY OF SUCH LOSS, HOWEVER CAUSED AND ON
- * ANY THEORY OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE
- * POSSESSION, USE OR PERFORMANCE OF THIS SOFTWARE.
- * 
- * 
- * US Government Users Restricted Rights 
- * Use, duplication, or disclosure by the Government is subject to
- * restrictions set forth in FAR 52.227.19(c)(2) or subparagraph
- * (c)(1)(ii) of the Rights in Technical Data and Computer Software
- * clause at DFARS 252.227-7013 and/or in similar or successor
- * clauses in the FAR or the DOD or NASA FAR Supplement.
- * Unpublished-- rights reserved under the copyright laws of the
- * United States.  Contractor/manufacturer is Silicon Graphics,
- * Inc., 2011 N.  Shoreline Blvd., Mountain View, CA 94039-7311.
- *
- * OpenGL(TM) is a trademark of Silicon Graphics, Inc.
diff --git a/src/glw/glw.pc.in b/src/glw/glw.pc.in
deleted file mode 100644
index 19a7c30..0000000
--- a/src/glw/glw.pc.in
+++ /dev/null
@@ -1,13 +0,0 @@
-prefix=@INSTALL_DIR@
-exec_prefix=${prefix}
-libdir=@INSTALL_LIB_DIR@
-includedir=@INSTALL_INC_DIR@
-
-Name: glw
-Description: Mesa OpenGL widget library
-Requires: gl
-Requires.private: @GLW_PC_REQ_PRIV@
-Version: @VERSION@
-Libs: -L${libdir} -l@GLW_LIB@
-Libs.private: @GLW_PC_LIB_PRIV@
-Cflags: -I${includedir} @GLW_PC_CFLAGS@
diff --git a/src/glx/dri2.c b/src/glx/dri2.c
index 229840d..b1b5013 100644
--- a/src/glx/dri2.c
+++ b/src/glx/dri2.c
@@ -190,6 +190,15 @@
 	err->minorCode == X_DRI2DestroyDrawable)
 	return True;
 
+    /* If the server is non-local DRI2Connect will raise BadRequest.
+     * Swallow this so that DRI2Connect can signal this in its return code */
+    if (err->majorCode == codes->major_opcode &&
+        err->minorCode == X_DRI2Connect &&
+        err->errorCode == BadRequest) {
+	*ret_code = False;
+	return True;
+    }
+
     return False;
 }
 
diff --git a/src/glx/dri2_glx.c b/src/glx/dri2_glx.c
index 80e4da3..9fa0d5a 100644
--- a/src/glx/dri2_glx.c
+++ b/src/glx/dri2_glx.c
@@ -455,16 +455,20 @@
 static void
 dri2FlushFrontBuffer(__DRIdrawable *driDrawable, void *loaderPrivate)
 {
+   struct glx_display *priv;
+   struct dri2_display *pdp;
+   struct glx_context *gc;
    struct dri2_drawable *pdraw = loaderPrivate;
+
    if (!pdraw)
       return;
 
    if (!pdraw->base.psc)
       return;
 
-   struct glx_display *priv = __glXInitialize(pdraw->base.psc->dpy);
-   struct dri2_display *pdp = (struct dri2_display *)priv->dri2Display;
-   struct glx_context *gc = __glXGetCurrentContext();
+   priv = __glXInitialize(pdraw->base.psc->dpy);
+   pdp = (struct dri2_display *) priv->dri2Display;
+   gc = __glXGetCurrentContext();
 
    /* Old servers don't send invalidate events */
    if (!pdp->invalidateAvailable)
@@ -539,6 +543,11 @@
 	(struct dri2_display *)dpyPriv->dri2Display;
     CARD64 ret = 0;
 
+    /* Old servers can't handle swapbuffers */
+    if (!pdp->swapAvailable) {
+       dri2CopySubBuffer(pdraw, 0, 0, priv->width, priv->height);
+    } else {
+#ifdef X_DRI2SwapBuffers
 #ifdef __DRI2_FLUSH
     if (psc->f) {
        struct glx_context *gc = __glXGetCurrentContext();
@@ -549,21 +558,15 @@
     }
 #endif
 
+       DRI2SwapBuffers(psc->base.dpy, pdraw->xDrawable,
+		       target_msc, divisor, remainder, &ret);
+#endif
+    }
+
     /* Old servers don't send invalidate events */
     if (!pdp->invalidateAvailable)
        dri2InvalidateBuffers(dpyPriv->dpy, pdraw->xDrawable);
 
-    /* Old servers can't handle swapbuffers */
-    if (!pdp->swapAvailable) {
-       dri2CopySubBuffer(pdraw, 0, 0, priv->width, priv->height);
-       return 0;
-    }
-
-#ifdef X_DRI2SwapBuffers
-    DRI2SwapBuffers(psc->base.dpy, pdraw->xDrawable, target_msc, divisor,
-		    remainder, &ret);
-#endif
-
     return ret;
 }
 
diff --git a/src/glx/dri_common.c b/src/glx/dri_common.c
index bac0c9e..e7dba5a 100644
--- a/src/glx/dri_common.c
+++ b/src/glx/dri_common.c
@@ -388,7 +388,7 @@
 _X_HIDDEN void
 driReleaseDrawables(struct glx_context *gc)
 {
-   struct glx_display *const priv = __glXInitialize(gc->psc->dpy);
+   const struct glx_display *priv = gc->psc->display;
    __GLXDRIdrawable *pdraw;
 
    if (priv == NULL)
diff --git a/src/glx/glxcmds.c b/src/glx/glxcmds.c
index fc0a079..c8ec9c2 100644
--- a/src/glx/glxcmds.c
+++ b/src/glx/glxcmds.c
@@ -794,15 +794,17 @@
    gc = __glXGetCurrentContext();
 
 #if defined(GLX_DIRECT_RENDERING) && !defined(GLX_USE_APPLEGL)
-   __GLXDRIdrawable *pdraw = GetGLXDRIDrawable(dpy, drawable);
+   {
+      __GLXDRIdrawable *pdraw = GetGLXDRIDrawable(dpy, drawable);
 
-   if (pdraw != NULL) {
-      if (gc && drawable == gc->currentDrawable) {
-	 glFlush();
+      if (pdraw != NULL) {
+         if (gc && drawable == gc->currentDrawable) {
+            glFlush();
+         }
+
+         (*pdraw->psc->driScreen->swapBuffers)(pdraw, 0, 0, 0);
+         return;
       }
-
-      (*pdraw->psc->driScreen->swapBuffers)(pdraw, 0, 0, 0);
-      return;
    }
 #endif
 
diff --git a/src/glx/glxext.c b/src/glx/glxext.c
index 8704c48..8254544 100644
--- a/src/glx/glxext.c
+++ b/src/glx/glxext.c
@@ -260,25 +260,20 @@
 static int
 __glXCloseDisplay(Display * dpy, XExtCodes * codes)
 {
-   struct glx_display *priv, **prev, *next;
+   struct glx_display *priv, **prev;
 
    _XLockMutex(_Xglobal_lock);
    prev = &glx_displays;
    for (priv = glx_displays; priv; prev = &priv->next, priv = priv->next) {
       if (priv->dpy == dpy) {
+         *prev = priv->next;
 	 break;
       }
    }
-
-   /* Only remove the display from the list after it's destroyed. The cleanup
-    * code (e.g. driReleaseDrawables()) ends up calling __glXInitialize(),
-    * which would create a new glx_display while we're trying to destroy this
-    * one. */
-   next = priv->next;
-   glx_display_free(priv);
-   *prev = next;
    _XUnlockMutex(_Xglobal_lock);
 
+   glx_display_free(priv);
+
    return 1;
 }
 
diff --git a/src/mapi/Android.mk b/src/mapi/Android.mk
new file mode 100644
index 0000000..0d09ee1
--- /dev/null
+++ b/src/mapi/Android.mk
@@ -0,0 +1,60 @@
+# Mesa 3-D graphics library
+#
+# Copyright (C) 2010-2011 Chia-I Wu <olvaffe@gmail.com>
+# Copyright (C) 2010-2011 LunarG Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# Android.mk for glapi
+
+LOCAL_PATH := $(call my-dir)
+
+include $(CLEAR_VARS)
+
+include $(LOCAL_PATH)/mapi/sources.mak
+LOCAL_SRC_FILES := $(addprefix mapi/, $(MAPI_GLAPI_SOURCES))
+
+LOCAL_CFLAGS := \
+	-DMAPI_MODE_GLAPI \
+	-DMAPI_ABI_HEADER=\"shared-glapi/glapi_mapi_tmp.h\"
+
+LOCAL_C_INCLUDES := \
+	$(MESA_TOP)/src/mapi
+
+LOCAL_MODULE := libglapi
+
+LOCAL_MODULE_CLASS := SHARED_LIBRARIES
+intermediates := $(call local-intermediates-dir)
+mapi_abi_header := $(intermediates)/shared-glapi/glapi_mapi_tmp.h
+LOCAL_GENERATED_SOURCES := $(mapi_abi_header)
+
+mapi_abi_deps := \
+	$(wildcard $(LOCAL_PATH)/glapi/gen/*.py) \
+	$(wildcard $(LOCAL_PATH)/glapi/gen/*.xml) \
+	$(LOCAL_PATH)/mapi/mapi_abi.py
+
+$(mapi_abi_header): PRIVATE_SCRIPT := $(MESA_PYTHON2) $(LOCAL_PATH)/mapi/mapi_abi.py
+$(mapi_abi_header): PRIVATE_APIXML := $(LOCAL_PATH)/glapi/gen/gl_and_es_API.xml
+$(mapi_abi_header): $(mapi_abi_deps)
+	@mkdir -p $(dir $@)
+	@echo "Gen GLAPI: $(PRIVATE_MODULE) <= $(notdir $@)"
+	$(hide) $(PRIVATE_SCRIPT) --printer shared-glapi --mode lib $(PRIVATE_APIXML) > $@
+
+include $(MESA_COMMON_MK)
+include $(BUILD_SHARED_LIBRARY)
diff --git a/src/mapi/es1api/.gitignore b/src/mapi/es1api/.gitignore
index b21f1d1..dfe4656 100644
--- a/src/mapi/es1api/.gitignore
+++ b/src/mapi/es1api/.gitignore
@@ -1,4 +1 @@
 glapi_mapi_tmp.h
-glapi-stamp
-glapi
-main
diff --git a/src/mapi/es1api/Makefile b/src/mapi/es1api/Makefile
index aef6948..0a0449b 100644
--- a/src/mapi/es1api/Makefile
+++ b/src/mapi/es1api/Makefile
@@ -34,8 +34,6 @@
 
 GLAPI := $(TOP)/src/mapi/glapi
 MAPI := $(TOP)/src/mapi/mapi
-# directory for generated sources/headers
-GEN := glapi
 
 esapi_CPPFLAGS := \
 	-I$(TOP)/include \
@@ -68,13 +66,11 @@
 $(esapi_OBJECTS): %.o: $(MAPI)/%.c
 	$(CC) -c $(esapi_CPPFLAGS) $(CFLAGS) $< -o $@
 
-$(esapi_SOURCES): | glapi-stamp
+$(esapi_SOURCES): glapi_mapi_tmp.h
 
-.PHONY: glapi-stamp
-glapi-stamp:
-	@# generate sources/headers
-	@$(MAKE) -C $(GLAPI)/gen-es $(ES)
-	@touch $@
+include $(GLAPI)/gen/glapi_gen.mk
+glapi_mapi_tmp.h: $(GLAPI)/gen/gl_and_es_API.xml $(glapi_gen_mapi_deps)
+	$(call glapi_gen_mapi,$<,$(ESAPI))
 
 .PHONY: clean
 clean:
@@ -83,9 +79,7 @@
 	-rm -f lib$(ESAPI).a
 	-rm -f $(esapi_OBJECTS)
 	-rm -f depend depend.bak
-	-rm -f glapi-stamp
-	@# clean generated sources/headers
-	@$(MAKE) -C $(GLAPI)/gen-es clean-$(ES)
+	-rm -f glapi_mapi_tmp.h
 
 pcedit = \
 	-e 's,@INSTALL_DIR@,$(INSTALL_DIR),' \
diff --git a/src/mapi/glapi/SConscript b/src/mapi/glapi/SConscript
index a776474..fdd6579 100644
--- a/src/mapi/glapi/SConscript
+++ b/src/mapi/glapi/SConscript
@@ -74,6 +74,11 @@
         else:
             pass
     
+    if env['toolchain'] == 'crossmingw':
+        # compile these files without -gstabs option
+        glapi_sources = env.compile_without_gstabs(glapi_sources, "glapi_dispatch.c")
+        glapi_sources = env.compile_without_gstabs(glapi_sources, "glapi_getproc.c")
+
     glapi = env.ConvenienceLibrary(
         target = 'glapi',
         source = glapi_sources,
diff --git a/src/mapi/glapi/gen-es/Makefile b/src/mapi/glapi/gen-es/Makefile
deleted file mode 100644
index bf66ec0..0000000
--- a/src/mapi/glapi/gen-es/Makefile
+++ /dev/null
@@ -1,91 +0,0 @@
-TOP = ../../../..
-MAPI = $(TOP)/src/mapi/mapi
-GLAPI = ../gen
-include $(TOP)/configs/current
-
-OUTPUTS :=			\
-	glapi_mapi_tmp.h	\
-	main/dispatch.h		\
-	main/remap_helper.h
-
-COMMON = gl_and_es_API.xml gl_XML.py glX_XML.py license.py typeexpr.py
-COMMON := $(addprefix $(GLAPI)/, $(COMMON))
-
-ES1_APIXML := es1_API.xml
-ES2_APIXML := es2_API.xml
-ES1_OUTPUT_DIR := $(TOP)/src/mapi/es1api
-ES2_OUTPUT_DIR := $(TOP)/src/mapi/es2api
-
-ES1_DEPS = $(ES1_APIXML) base1_API.xml es1_EXT.xml es_EXT.xml \
-	   es1_COMPAT.xml es_COMPAT.xml
-ES2_DEPS = $(ES2_APIXML) base2_API.xml es2_EXT.xml es_EXT.xml \
-	   es2_COMPAT.xml es_COMPAT.xml
-
-ES1_OUTPUTS := $(addprefix $(ES1_OUTPUT_DIR)/, $(OUTPUTS))
-ES2_OUTPUTS := $(addprefix $(ES2_OUTPUT_DIR)/, $(OUTPUTS))
-
-SHARED_GLAPI_APIXML := $(GLAPI)/gl_and_es_API.xml
-SHARED_GLAPI_OUTPUT_DIR := $(TOP)/src/mapi/shared-glapi
-SHARED_GLAPI_DEPS := $(SHARED_GLAPI_APIXML)
-SHARED_GLAPI_OUTPUTS = $(SHARED_GLAPI_OUTPUT_DIR)/glapi_mapi_tmp.h
-
-all: es1 es2 shared-glapi
-
-es1: $(ES1_OUTPUTS)
-es2: $(ES2_OUTPUTS)
-shared-glapi: $(SHARED_GLAPI_OUTPUTS)
-
-$(ES1_OUTPUTS): APIXML := $(ES1_APIXML)
-$(ES1_OUTPUTS): PRINTER := es1api
-$(ES1_OUTPUTS): $(ES1_DEPS)
-
-$(ES2_OUTPUTS): APIXML := $(ES2_APIXML)
-$(ES2_OUTPUTS): PRINTER := es2api
-$(ES2_OUTPUTS): $(ES2_DEPS)
-
-$(SHARED_GLAPI_OUTPUTS): APIXML := $(SHARED_GLAPI_APIXML)
-$(SHARED_GLAPI_OUTPUTS): PRINTER := shared-glapi
-$(SHARED_GLAPI_OUTPUTS): $(SHARED_GLAPI_DEPS)
-
-define gen-glapi
-	@mkdir -p $(dir $@)
-	$(PYTHON2) $(PYTHON_FLAGS) $< -f $(APIXML) $(1) > $@
-endef
-
-%/glapi_mapi_tmp.h: $(MAPI)/mapi_abi.py $(COMMON)
-	@mkdir -p $(dir $@)
-	$(PYTHON2) $(PYTHON_FLAGS) $< \
-		--printer $(PRINTER) --mode lib $(GLAPI)/gl_and_es_API.xml > $@
-
-%/main/dispatch.h: $(GLAPI)/gl_table.py $(COMMON)
-	$(call gen-glapi,-c -m remap_table)
-
-%/main/remap_helper.h: $(GLAPI)/remap_helper.py $(COMMON)
-	$(call gen-glapi)
-
-verify_xml:
-	@if [ ! -f gl.h ]; then \
-		echo "Please copy gl.h and gl2.h to this directory"; \
-		exit 1; \
-	fi
-	@echo "Verifying that es1_API.xml covers OpenGL ES 1.1..."
-	@$(PYTHON2) $(PYTHON_FLAGS) gl_parse_header.py gl.h > tmp.xml
-	@$(PYTHON2) $(PYTHON_FLAGS) gl_compare.py difference tmp.xml es1_API.xml
-	@echo "Verifying that es2_API.xml covers OpenGL ES 2.0..."
-	@$(PYTHON2) $(PYTHON_FLAGS) gl_parse_header.py gl2.h > tmp.xml
-	@$(PYTHON2) $(PYTHON_FLAGS) gl_compare.py difference tmp.xml es2_API.xml
-	@rm -f tmp.xml
-
-clean-es1:
-	-rm -f $(ES1_OUTPUTS)
-	-rm -rf $(ES1_OUTPUT_DIR)/main
-
-clean-es2:
-	-rm -f $(ES2_OUTPUTS)
-	-rm -rf $(ES2_OUTPUT_DIR)/main
-
-clean-shared-glapi:
-	-rm -f $(SHARED_GLAPI_OUTPUTS)
-
-clean: clean-es1 clean-es2 clean-shared-glapi
-	-rm -f *~ *.pyc *.pyo
diff --git a/src/mapi/glapi/gen-es/base1_API.xml b/src/mapi/glapi/gen-es/base1_API.xml
deleted file mode 100644
index 720be25..0000000
--- a/src/mapi/glapi/gen-es/base1_API.xml
+++ /dev/null
@@ -1,744 +0,0 @@
-<?xml version="1.0"?>
-<!DOCTYPE OpenGLAPI SYSTEM "../gen/gl_API.dtd">
-
-<!-- OpenGL and OpenGL ES 1.x APIs
-     This file defines the base categories that can be shared by all APIs.
-     They are defined in an incremental fashion.
--->
-
-<OpenGLAPI>
-
-<!-- base subset of OpenGL 1.0 -->
-<category name="base1.0">
-    <enum name="FALSE"                                    value="0x0"/>
-    <enum name="TRUE"                                     value="0x1"/>
-    <enum name="ZERO"                                     value="0x0"/>
-    <enum name="ONE"                                      value="0x1"/>
-    <enum name="NO_ERROR"                                 value="0x0"/>
-
-    <enum name="POINTS"                                   value="0x0000"/>
-    <enum name="LINES"                                    value="0x0001"/>
-    <enum name="LINE_LOOP"                                value="0x0002"/>
-    <enum name="LINE_STRIP"                               value="0x0003"/>
-    <enum name="TRIANGLES"                                value="0x0004"/>
-    <enum name="TRIANGLE_STRIP"                           value="0x0005"/>
-    <enum name="TRIANGLE_FAN"                             value="0x0006"/>
-    <enum name="NEVER"                                    value="0x0200"/>
-    <enum name="LESS"                                     value="0x0201"/>
-    <enum name="EQUAL"                                    value="0x0202"/>
-    <enum name="LEQUAL"                                   value="0x0203"/>
-    <enum name="GREATER"                                  value="0x0204"/>
-    <enum name="NOTEQUAL"                                 value="0x0205"/>
-    <enum name="GEQUAL"                                   value="0x0206"/>
-    <enum name="ALWAYS"                                   value="0x0207"/>
-    <enum name="SRC_COLOR"                                value="0x0300"/>
-    <enum name="ONE_MINUS_SRC_COLOR"                      value="0x0301"/>
-    <enum name="SRC_ALPHA"                                value="0x0302"/>
-    <enum name="ONE_MINUS_SRC_ALPHA"                      value="0x0303"/>
-    <enum name="DST_ALPHA"                                value="0x0304"/>
-    <enum name="ONE_MINUS_DST_ALPHA"                      value="0x0305"/>
-    <enum name="DST_COLOR"                                value="0x0306"/>
-    <enum name="ONE_MINUS_DST_COLOR"                      value="0x0307"/>
-    <enum name="SRC_ALPHA_SATURATE"                       value="0x0308"/>
-    <enum name="FRONT"                                    value="0x0404"/>
-    <enum name="BACK"                                     value="0x0405"/>
-    <enum name="FRONT_AND_BACK"                           value="0x0408"/>
-    <enum name="INVALID_ENUM"                             value="0x0500"/>
-    <enum name="INVALID_VALUE"                            value="0x0501"/>
-    <enum name="INVALID_OPERATION"                        value="0x0502"/>
-    <enum name="OUT_OF_MEMORY"                            value="0x0505"/>
-    <enum name="CW"                                       value="0x0900"/>
-    <enum name="CCW"                                      value="0x0901"/>
-    <enum name="CULL_FACE"                     count="1"  value="0x0B44">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="DEPTH_TEST"                    count="1"  value="0x0B71">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="STENCIL_TEST"                  count="1"  value="0x0B90">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="DITHER"                        count="1"  value="0x0BD0">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="BLEND"                         count="1"  value="0x0BE2">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="SCISSOR_TEST"                  count="1"  value="0x0C11">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="UNPACK_ALIGNMENT"              count="1"  value="0x0CF5">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="PACK_ALIGNMENT"                count="1"  value="0x0D05">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="MAX_TEXTURE_SIZE"              count="1"  value="0x0D33">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="MAX_VIEWPORT_DIMS"             count="2"  value="0x0D3A">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="SUBPIXEL_BITS"                 count="1"  value="0x0D50">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="RED_BITS"                      count="1"  value="0x0D52">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="GREEN_BITS"                    count="1"  value="0x0D53">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="BLUE_BITS"                     count="1"  value="0x0D54">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="ALPHA_BITS"                    count="1"  value="0x0D55">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="DEPTH_BITS"                    count="1"  value="0x0D56">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="STENCIL_BITS"                  count="1"  value="0x0D57">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="TEXTURE_2D"                    count="1"  value="0x0DE1">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="DONT_CARE"                                value="0x1100"/>
-    <enum name="FASTEST"                                  value="0x1101"/>
-    <enum name="NICEST"                                   value="0x1102"/>
-    <enum name="BYTE"                          count="1"  value="0x1400">
-        <size name="CallLists"/>
-    </enum>
-    <enum name="UNSIGNED_BYTE"                 count="1"  value="0x1401">
-        <size name="CallLists"/>
-    </enum>
-    <enum name="SHORT"                         count="2"  value="0x1402">
-        <size name="CallLists"/>
-    </enum>
-    <enum name="UNSIGNED_SHORT"                count="2"  value="0x1403">
-        <size name="CallLists"/>
-    </enum>
-    <enum name="FLOAT"                         count="4"  value="0x1406">
-        <size name="CallLists"/>
-    </enum>
-    <enum name="INVERT"                                   value="0x150A"/>
-    <enum name="TEXTURE"                                  value="0x1702"/>
-    <enum name="ALPHA"                                    value="0x1906"/>
-    <enum name="RGB"                                      value="0x1907"/>
-    <enum name="RGBA"                                     value="0x1908"/>
-    <enum name="LUMINANCE"                                value="0x1909"/>
-    <enum name="LUMINANCE_ALPHA"                          value="0x190A"/>
-    <enum name="KEEP"                                     value="0x1E00"/>
-    <enum name="REPLACE"                                  value="0x1E01"/>
-    <enum name="INCR"                                     value="0x1E02"/>
-    <enum name="DECR"                                     value="0x1E03"/>
-    <enum name="VENDOR"                                   value="0x1F00"/>
-    <enum name="RENDERER"                                 value="0x1F01"/>
-    <enum name="VERSION"                                  value="0x1F02"/>
-    <enum name="EXTENSIONS"                               value="0x1F03"/>
-    <enum name="NEAREST"                                  value="0x2600"/>
-    <enum name="LINEAR"                                   value="0x2601"/>
-    <enum name="NEAREST_MIPMAP_NEAREST"                   value="0x2700"/>
-    <enum name="LINEAR_MIPMAP_NEAREST"                    value="0x2701"/>
-    <enum name="NEAREST_MIPMAP_LINEAR"                    value="0x2702"/>
-    <enum name="LINEAR_MIPMAP_LINEAR"                     value="0x2703"/>
-    <enum name="TEXTURE_MAG_FILTER"            count="1"  value="0x2800">
-        <size name="TexParameterfv"/>
-        <size name="TexParameteriv"/>
-        <size name="GetTexParameterfv" mode="get"/>
-        <size name="GetTexParameteriv" mode="get"/>
-    </enum>
-    <enum name="TEXTURE_MIN_FILTER"            count="1"  value="0x2801">
-        <size name="TexParameterfv"/>
-        <size name="TexParameteriv"/>
-        <size name="GetTexParameterfv" mode="get"/>
-        <size name="GetTexParameteriv" mode="get"/>
-    </enum>
-    <enum name="TEXTURE_WRAP_S"                count="1"  value="0x2802">
-        <size name="TexParameterfv"/>
-        <size name="TexParameteriv"/>
-        <size name="GetTexParameterfv" mode="get"/>
-        <size name="GetTexParameteriv" mode="get"/>
-    </enum>
-    <enum name="TEXTURE_WRAP_T"                count="1"  value="0x2803">
-        <size name="TexParameterfv"/>
-        <size name="TexParameteriv"/>
-        <size name="GetTexParameterfv" mode="get"/>
-        <size name="GetTexParameteriv" mode="get"/>
-    </enum>
-    <enum name="REPEAT"                                   value="0x2901"/>
-
-    <enum name="DEPTH_BUFFER_BIT"                         value="0x00000100"/>
-    <enum name="STENCIL_BUFFER_BIT"                       value="0x00000400"/>
-    <enum name="COLOR_BUFFER_BIT"                         value="0x00004000"/>
-
-    <type name="float"   size="4"  float="true"    glx_name="FLOAT32"/>
-    <type name="clampf"  size="4"  float="true"    glx_name="FLOAT32"/>
-
-    <type name="int"     size="4"                  glx_name="CARD32"/>
-    <type name="uint"    size="4"  unsigned="true" glx_name="CARD32"/>
-    <type name="sizei"   size="4"                  glx_name="CARD32"/>
-    <type name="enum"    size="4"  unsigned="true" glx_name="ENUM"/>
-    <type name="bitfield" size="4" unsigned="true" glx_name="CARD32"/>
-
-    <type name="short"   size="2"                  glx_name="CARD16"/>
-    <type name="ushort"  size="2"  unsigned="true" glx_name="CARD16"/>
-
-    <type name="byte"    size="1"                  glx_name="CARD8"/>
-    <type name="ubyte"   size="1"  unsigned="true" glx_name="CARD8"/>
-    <type name="boolean" size="1"  unsigned="true" glx_name="CARD8"/>
-
-    <type name="void"    size="1"/>
-
-    <function name="BlendFunc" offset="241">
-        <param name="sfactor" type="GLenum"/>
-        <param name="dfactor" type="GLenum"/>
-        <glx rop="160"/>
-    </function>
-
-    <function name="Clear" offset="203">
-        <param name="mask" type="GLbitfield"/>
-        <glx rop="127"/>
-    </function>
-
-    <function name="ClearColor" offset="206">
-        <param name="red" type="GLclampf"/>
-        <param name="green" type="GLclampf"/>
-        <param name="blue" type="GLclampf"/>
-        <param name="alpha" type="GLclampf"/>
-        <glx rop="130"/>
-    </function>
-
-    <function name="ClearStencil" offset="207">
-        <param name="s" type="GLint"/>
-        <glx rop="131"/>
-    </function>
-
-    <function name="ColorMask" offset="210">
-        <param name="red" type="GLboolean"/>
-        <param name="green" type="GLboolean"/>
-        <param name="blue" type="GLboolean"/>
-        <param name="alpha" type="GLboolean"/>
-        <glx rop="134"/>
-    </function>
-
-    <function name="CullFace" offset="152">
-        <param name="mode" type="GLenum"/>
-        <glx rop="79"/>
-    </function>
-
-    <function name="DepthFunc" offset="245">
-        <param name="func" type="GLenum"/>
-        <glx rop="164"/>
-    </function>
-
-    <function name="DepthMask" offset="211">
-        <param name="flag" type="GLboolean"/>
-        <glx rop="135"/>
-    </function>
-
-    <function name="Disable" offset="214">
-        <param name="cap" type="GLenum"/>
-        <glx rop="138" handcode="client"/>
-    </function>
-
-    <function name="Enable" offset="215">
-        <param name="cap" type="GLenum"/>
-        <glx rop="139" handcode="client"/>
-    </function>
-
-    <function name="Finish" offset="216">
-        <glx sop="108" handcode="true"/>
-    </function>
-
-    <function name="Flush" offset="217">
-        <glx sop="142" handcode="true"/>
-    </function>
-
-    <function name="FrontFace" offset="157">
-        <param name="mode" type="GLenum"/>
-        <glx rop="84"/>
-    </function>
-
-    <function name="GetError" offset="261">
-        <return type="GLenum"/>
-        <glx sop="115" handcode="client"/>
-    </function>
-
-    <function name="GetIntegerv" offset="263">
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLint *" output="true" variable_param="pname"/>
-        <glx sop="117" handcode="client"/>
-    </function>
-
-    <function name="GetString" offset="275">
-        <param name="name" type="GLenum"/>
-        <return type="const GLubyte *"/>
-        <glx sop="129" handcode="true"/>
-    </function>
-
-    <function name="Hint" offset="158">
-        <param name="target" type="GLenum"/>
-        <param name="mode" type="GLenum"/>
-        <glx rop="85"/>
-    </function>
-
-    <function name="LineWidth" offset="168">
-        <param name="width" type="GLfloat"/>
-        <glx rop="95"/>
-    </function>
-
-    <function name="PixelStorei" offset="250">
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLint"/>
-        <glx sop="110" handcode="client"/>
-    </function>
-
-    <function name="ReadPixels" offset="256">
-        <param name="x" type="GLint"/>
-        <param name="y" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <param name="format" type="GLenum"/>
-        <param name="type" type="GLenum"/>
-        <param name="pixels" type="GLvoid *" output="true"  img_width="width" img_height="height" img_format="format" img_type="type" img_target="0"/>
-        <glx sop="111"/>
-    </function>
-
-    <function name="Scissor" offset="176">
-        <param name="x" type="GLint"/>
-        <param name="y" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <glx rop="103"/>
-    </function>
-
-    <function name="StencilFunc" offset="243">
-        <param name="func" type="GLenum"/>
-        <param name="ref" type="GLint"/>
-        <param name="mask" type="GLuint"/>
-        <glx rop="162"/>
-    </function>
-
-    <function name="StencilMask" offset="209">
-        <param name="mask" type="GLuint"/>
-        <glx rop="133"/>
-    </function>
-
-    <function name="StencilOp" offset="244">
-        <param name="fail" type="GLenum"/>
-        <param name="zfail" type="GLenum"/>
-        <param name="zpass" type="GLenum"/>
-        <glx rop="163"/>
-    </function>
-
-    <function name="TexParameterf" offset="178">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfloat"/>
-        <glx rop="105"/>
-    </function>
-
-    <function name="Viewport" offset="305">
-        <param name="x" type="GLint"/>
-        <param name="y" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <glx rop="191"/>
-    </function>
-
-    <!-- these are not in OpenGL ES 1.0 -->
-    <enum name="LINE_WIDTH"                    count="1"  value="0x0B21">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="CULL_FACE_MODE"                count="1"  value="0x0B45">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="FRONT_FACE"                    count="1"  value="0x0B46">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="DEPTH_RANGE"                   count="2"  value="0x0B70">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="DEPTH_WRITEMASK"               count="1"  value="0x0B72">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="DEPTH_CLEAR_VALUE"             count="1"  value="0x0B73">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="DEPTH_FUNC"                    count="1"  value="0x0B74">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="STENCIL_CLEAR_VALUE"           count="1"  value="0x0B91">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="STENCIL_FUNC"                  count="1"  value="0x0B92">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="STENCIL_VALUE_MASK"            count="1"  value="0x0B93">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="STENCIL_FAIL"                  count="1"  value="0x0B94">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="STENCIL_PASS_DEPTH_FAIL"       count="1"  value="0x0B95">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="STENCIL_PASS_DEPTH_PASS"       count="1"  value="0x0B96">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="STENCIL_REF"                   count="1"  value="0x0B97">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="STENCIL_WRITEMASK"             count="1"  value="0x0B98">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="VIEWPORT"                      count="4"  value="0x0BA2">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="SCISSOR_BOX"                   count="4"  value="0x0C10">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="COLOR_CLEAR_VALUE"             count="4"  value="0x0C22">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="COLOR_WRITEMASK"               count="4"  value="0x0C23">
-        <size name="Get" mode="get"/>
-    </enum>
-
-    <function name="TexParameterfv" offset="179">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfloat *" variable_param="pname"/>
-        <glx rop="106"/>
-    </function>
-
-    <function name="TexParameteri" offset="180">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLint"/>
-        <glx rop="107"/>
-    </function>
-
-    <function name="TexParameteriv" offset="181">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLint *" variable_param="pname"/>
-        <glx rop="108"/>
-    </function>
-
-    <function name="GetBooleanv" offset="258">
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLboolean *" output="true" variable_param="pname"/>
-        <glx sop="112" handcode="client"/>
-    </function>
-
-    <function name="GetFloatv" offset="262">
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfloat *" output="true" variable_param="pname"/>
-        <glx sop="116" handcode="client"/>
-    </function>
-
-    <function name="GetTexParameterfv" offset="282">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfloat *" output="true" variable_param="pname"/>
-        <glx sop="136"/>
-    </function>
-
-    <function name="GetTexParameteriv" offset="283">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLint *" output="true" variable_param="pname"/>
-        <glx sop="137"/>
-    </function>
-
-    <function name="IsEnabled" offset="286">
-        <param name="cap" type="GLenum"/>
-        <return type="GLboolean"/>
-        <glx sop="140" handcode="client"/>
-    </function>
-</category>
-
-<!-- base subset of OpenGL 1.1 -->
-<category name="base1.1">
-    <enum name="POLYGON_OFFSET_FILL"                      value="0x8037"/>
-
-    <function name="BindTexture" offset="307">
-        <param name="target" type="GLenum"/>
-        <param name="texture" type="GLuint"/>
-        <glx rop="4117"/>
-    </function>
-
-    <function name="CopyTexImage2D" offset="324">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="internalformat" type="GLenum"/>
-        <param name="x" type="GLint"/>
-        <param name="y" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <param name="border" type="GLint"/>
-        <glx rop="4120"/>
-    </function>
-
-    <function name="CopyTexSubImage2D" offset="326">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="xoffset" type="GLint"/>
-        <param name="yoffset" type="GLint"/>
-        <param name="x" type="GLint"/>
-        <param name="y" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <glx rop="4122"/>
-    </function>
-
-    <function name="DeleteTextures" offset="327">
-        <param name="n" type="GLsizei" counter="true"/>
-        <param name="textures" type="const GLuint *" count="n"/>
-        <glx sop="144"/>
-    </function>
-
-    <function name="DrawArrays" offset="310">
-        <param name="mode" type="GLenum"/>
-        <param name="first" type="GLint"/>
-        <param name="count" type="GLsizei"/>
-        <glx rop="193" handcode="true"/>
-    </function>
-
-    <function name="DrawElements" offset="311">
-        <param name="mode" type="GLenum"/>
-        <param name="count" type="GLsizei"/>
-        <param name="type" type="GLenum"/>
-        <param name="indices" type="const GLvoid *"/>
-        <glx handcode="true"/>
-    </function>
-
-    <function name="GenTextures" offset="328">
-        <param name="n" type="GLsizei" counter="true"/>
-        <param name="textures" type="GLuint *" output="true" count="n"/>
-        <glx sop="145" always_array="true"/>
-    </function>
-
-    <function name="PolygonOffset" offset="319">
-        <param name="factor" type="GLfloat"/>
-        <param name="units" type="GLfloat"/>
-        <glx rop="192"/>
-    </function>
-
-    <function name="TexSubImage2D" offset="333">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="xoffset" type="GLint"/>
-        <param name="yoffset" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <param name="format" type="GLenum"/>
-        <param name="type" type="GLenum"/>
-        <param name="UNUSED" type="GLuint" padding="true"/>
-        <param name="pixels" type="const GLvoid *" img_width="width" img_height="height" img_xoff="xoffset" img_yoff="yoffset" img_format="format" img_type="type" img_target="target" img_pad_dimensions="true"/>
-        <glx rop="4100" large="true"/>
-    </function>
-
-    <!-- these are not in OpenGL ES 1.0 -->
-    <enum name="POLYGON_OFFSET_UNITS"          count="1"  value="0x2A00">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="POLYGON_OFFSET_FACTOR"         count="1"  value="0x8038">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="TEXTURE_BINDING_2D"            count="1"  value="0x8069">
-        <size name="Get" mode="get"/>
-    </enum>
-
-    <function name="IsTexture" offset="330">
-        <param name="texture" type="GLuint"/>
-        <return type="GLboolean"/>
-        <glx sop="146"/>
-    </function>
-</category>
-
-<!-- base subset of OpenGL 1.2 -->
-<category name="base1.2">
-    <enum name="UNSIGNED_SHORT_4_4_4_4"                   value="0x8033"/>
-    <enum name="UNSIGNED_SHORT_5_5_5_1"                   value="0x8034"/>
-    <enum name="CLAMP_TO_EDGE"                            value="0x812F"/>
-    <enum name="UNSIGNED_SHORT_5_6_5"                     value="0x8363"/>
-    <enum name="ALIASED_POINT_SIZE_RANGE"      count="2"  value="0x846D">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="ALIASED_LINE_WIDTH_RANGE"      count="2"  value="0x846E">
-        <size name="Get" mode="get"/>
-    </enum>
-</category>
-
-<!-- base subset of OpenGL 1.3 -->
-<category name="base1.3">
-    <enum name="SAMPLE_ALPHA_TO_COVERAGE"      count="1"  value="0x809E">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="SAMPLE_COVERAGE"               count="1"  value="0x80A0">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="TEXTURE0"                                 value="0x84C0"/>
-    <enum name="TEXTURE1"                                 value="0x84C1"/>
-    <enum name="TEXTURE2"                                 value="0x84C2"/>
-    <enum name="TEXTURE3"                                 value="0x84C3"/>
-    <enum name="TEXTURE4"                                 value="0x84C4"/>
-    <enum name="TEXTURE5"                                 value="0x84C5"/>
-    <enum name="TEXTURE6"                                 value="0x84C6"/>
-    <enum name="TEXTURE7"                                 value="0x84C7"/>
-    <enum name="TEXTURE8"                                 value="0x84C8"/>
-    <enum name="TEXTURE9"                                 value="0x84C9"/>
-    <enum name="TEXTURE10"                                value="0x84CA"/>
-    <enum name="TEXTURE11"                                value="0x84CB"/>
-    <enum name="TEXTURE12"                                value="0x84CC"/>
-    <enum name="TEXTURE13"                                value="0x84CD"/>
-    <enum name="TEXTURE14"                                value="0x84CE"/>
-    <enum name="TEXTURE15"                                value="0x84CF"/>
-    <enum name="TEXTURE16"                                value="0x84D0"/>
-    <enum name="TEXTURE17"                                value="0x84D1"/>
-    <enum name="TEXTURE18"                                value="0x84D2"/>
-    <enum name="TEXTURE19"                                value="0x84D3"/>
-    <enum name="TEXTURE20"                                value="0x84D4"/>
-    <enum name="TEXTURE21"                                value="0x84D5"/>
-    <enum name="TEXTURE22"                                value="0x84D6"/>
-    <enum name="TEXTURE23"                                value="0x84D7"/>
-    <enum name="TEXTURE24"                                value="0x84D8"/>
-    <enum name="TEXTURE25"                                value="0x84D9"/>
-    <enum name="TEXTURE26"                                value="0x84DA"/>
-    <enum name="TEXTURE27"                                value="0x84DB"/>
-    <enum name="TEXTURE28"                                value="0x84DC"/>
-    <enum name="TEXTURE29"                                value="0x84DD"/>
-    <enum name="TEXTURE30"                                value="0x84DE"/>
-    <enum name="TEXTURE31"                                value="0x84DF"/>
-    <enum name="NUM_COMPRESSED_TEXTURE_FORMATS" count="1" value="0x86A2">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="COMPRESSED_TEXTURE_FORMATS"    count="-1"  value="0x86A3">
-        <size name="Get" mode="get"/>
-    </enum>
-
-    <function name="ActiveTexture" offset="374">
-        <param name="texture" type="GLenum"/>
-        <glx rop="197"/>
-    </function>
-
-    <function name="CompressedTexImage2D" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="internalformat" type="GLenum"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <param name="border" type="GLint"/>
-        <param name="imageSize" type="GLsizei" counter="true"/>
-        <param name="data" type="const GLvoid *" count="imageSize"/>
-        <glx rop="215" handcode="client"/>
-    </function>
-
-    <function name="CompressedTexSubImage2D" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="xoffset" type="GLint"/>
-        <param name="yoffset" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <param name="format" type="GLenum"/>
-        <param name="imageSize" type="GLsizei" counter="true"/>
-        <param name="data" type="const GLvoid *" count="imageSize"/>
-        <glx rop="218" handcode="client"/>
-    </function>
-
-    <function name="SampleCoverage" offset="assign">
-        <param name="value" type="GLclampf"/>
-        <param name="invert" type="GLboolean"/>
-        <glx rop="229"/>
-    </function>
-
-    <!-- these are not in OpenGL ES 1.0 -->
-    <enum name="SAMPLE_BUFFERS"                count="1"  value="0x80A8">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="SAMPLES"                       count="1"  value="0x80A9">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="SAMPLE_COVERAGE_VALUE"         count="1"  value="0x80AA">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="SAMPLE_COVERAGE_INVERT"        count="1"  value="0x80AB">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="ACTIVE_TEXTURE"                count="1"  value="0x84E0">
-        <size name="Get" mode="get"/>
-    </enum>
-</category>
-
-<!-- base subset of OpenGL 1.4 -->
-<category name="base1.4">
-    <enum name="GENERATE_MIPMAP_HINT"                     value="0x8192"/>
-</category>
-
-<!-- base subset of OpenGL 1.5 -->
-<category name="base1.5">
-    <enum name="BUFFER_SIZE"                              value="0x8764"/>
-    <enum name="BUFFER_USAGE"                             value="0x8765"/>
-    <enum name="ARRAY_BUFFER"                             value="0x8892"/>
-    <enum name="ELEMENT_ARRAY_BUFFER"                     value="0x8893"/>
-    <enum name="ARRAY_BUFFER_BINDING"                     value="0x8894"/>
-    <enum name="ELEMENT_ARRAY_BUFFER_BINDING"             value="0x8895"/>
-    <enum name="STATIC_DRAW"                              value="0x88E4"/>
-    <enum name="DYNAMIC_DRAW"                             value="0x88E8"/>
-
-    <type name="intptr"   size="4"                  glx_name="CARD32"/>
-    <type name="sizeiptr" size="4"                  glx_name="CARD32"/>
-
-    <function name="BindBuffer" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="buffer" type="GLuint"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="BufferData" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="size" type="GLsizeiptr" counter="true"/>
-        <param name="data" type="const GLvoid *" count="size" img_null_flag="true"/>
-        <param name="usage" type="GLenum"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="BufferSubData" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="offset" type="GLintptr"/>
-        <param name="size" type="GLsizeiptr" counter="true"/>
-        <param name="data" type="const GLvoid *" count="size"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="DeleteBuffers" offset="assign">
-        <param name="n" type="GLsizei" counter="true"/>
-        <param name="buffer" type="const GLuint *" count="n"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="GenBuffers" offset="assign">
-        <param name="n" type="GLsizei" counter="true"/>
-        <param name="buffer" type="GLuint *" output="true" count="n"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="GetBufferParameteriv" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLint *" output="true" variable_param="pname"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="IsBuffer" offset="assign">
-        <param name="buffer" type="GLuint"/>
-        <return type="GLboolean"/>
-        <glx ignore="true"/>
-    </function>
-</category>
-
-</OpenGLAPI>
diff --git a/src/mapi/glapi/gen-es/base2_API.xml b/src/mapi/glapi/gen-es/base2_API.xml
deleted file mode 100644
index b59ef62..0000000
--- a/src/mapi/glapi/gen-es/base2_API.xml
+++ /dev/null
@@ -1,533 +0,0 @@
-<?xml version="1.0"?>
-<!DOCTYPE OpenGLAPI SYSTEM "../gen/gl_API.dtd">
-
-<!-- OpenGL and OpenGL ES 2.x APIs -->
-
-<OpenGLAPI>
-
-<xi:include href="base1_API.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
-
-<!-- base subset of OpenGL 2.0 -->
-<category name="base2.0">
-    <enum name="BLEND_EQUATION_RGB"            count="1"  value="0x8009"> <!-- same as BLEND_EQUATION -->
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="VERTEX_ATTRIB_ARRAY_ENABLED"   count="1"  value="0x8622">
-        <size name="GetVertexAttribdv" mode="get"/>
-        <size name="GetVertexAttribfv" mode="get"/>
-        <size name="GetVertexAttribiv" mode="get"/>
-    </enum>
-    <enum name="VERTEX_ATTRIB_ARRAY_SIZE"      count="1"  value="0x8623">
-        <size name="GetVertexAttribdv" mode="get"/>
-        <size name="GetVertexAttribfv" mode="get"/>
-        <size name="GetVertexAttribiv" mode="get"/>
-    </enum>
-    <enum name="VERTEX_ATTRIB_ARRAY_STRIDE"     count="1" value="0x8624">
-        <size name="GetVertexAttribdv" mode="get"/>
-        <size name="GetVertexAttribfv" mode="get"/>
-        <size name="GetVertexAttribiv" mode="get"/>
-    </enum>
-    <enum name="VERTEX_ATTRIB_ARRAY_TYPE"      count="1"  value="0x8625">
-        <size name="GetVertexAttribdv" mode="get"/>
-        <size name="GetVertexAttribfv" mode="get"/>
-        <size name="GetVertexAttribiv" mode="get"/>
-    </enum>
-    <enum name="CURRENT_VERTEX_ATTRIB"         count="1"  value="0x8626">
-        <size name="GetVertexAttribdv" mode="get"/>
-        <size name="GetVertexAttribfv" mode="get"/>
-        <size name="GetVertexAttribiv" mode="get"/>
-    </enum>
-    <enum name="VERTEX_ATTRIB_ARRAY_POINTER"              value="0x8645"/>
-    <enum name="STENCIL_BACK_FUNC"             count="1"  value="0x8800">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="STENCIL_BACK_FAIL"             count="1"  value="0x8801">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="STENCIL_BACK_PASS_DEPTH_FAIL"  count="1"  value="0x8802">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="STENCIL_BACK_PASS_DEPTH_PASS"  count="1"  value="0x8803">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="BLEND_EQUATION_ALPHA"          count="1"  value="0x883D">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="MAX_VERTEX_ATTRIBS"            count="1"  value="0x8869">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="VERTEX_ATTRIB_ARRAY_NORMALIZED"           value="0x886A"/>
-    <enum name="MAX_TEXTURE_IMAGE_UNITS"       count="1"  value="0x8872">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="FRAGMENT_SHADER"                          value="0x8B30"/>
-    <enum name="VERTEX_SHADER"                            value="0x8B31"/>
-    <enum name="MAX_VERTEX_TEXTURE_IMAGE_UNITS"           value="0x8B4C"/>
-    <enum name="MAX_COMBINED_TEXTURE_IMAGE_UNITS"         value="0x8B4D"/>
-    <enum name="SHADER_TYPE"                              value="0x8B4F"/>
-    <enum name="FLOAT_VEC2"                               value="0x8B50"/>
-    <enum name="FLOAT_VEC3"                               value="0x8B51"/>
-    <enum name="FLOAT_VEC4"                               value="0x8B52"/>
-    <enum name="INT_VEC2"                                 value="0x8B53"/>
-    <enum name="INT_VEC3"                                 value="0x8B54"/>
-    <enum name="INT_VEC4"                                 value="0x8B55"/>
-    <enum name="BOOL"                                     value="0x8B56"/>
-    <enum name="BOOL_VEC2"                                value="0x8B57"/>
-    <enum name="BOOL_VEC3"                                value="0x8B58"/>
-    <enum name="BOOL_VEC4"                                value="0x8B59"/>
-    <enum name="FLOAT_MAT2"                               value="0x8B5A"/>
-    <enum name="FLOAT_MAT3"                               value="0x8B5B"/>
-    <enum name="FLOAT_MAT4"                               value="0x8B5C"/>
-    <enum name="SAMPLER_2D"                               value="0x8B5E"/>
-    <enum name="SAMPLER_CUBE"                             value="0x8B60"/>
-    <enum name="DELETE_STATUS"                            value="0x8B80"/>
-    <enum name="COMPILE_STATUS"                           value="0x8B81"/>
-    <enum name="LINK_STATUS"                              value="0x8B82"/>
-    <enum name="VALIDATE_STATUS"                          value="0x8B83"/>
-    <enum name="INFO_LOG_LENGTH"                          value="0x8B84"/>
-    <enum name="ATTACHED_SHADERS"                         value="0x8B85"/>
-    <enum name="ACTIVE_UNIFORMS"                          value="0x8B86"/>
-    <enum name="ACTIVE_UNIFORM_MAX_LENGTH"                value="0x8B87"/>
-    <enum name="SHADER_SOURCE_LENGTH"                     value="0x8B88"/>
-    <enum name="ACTIVE_ATTRIBUTES"                        value="0x8B89"/>
-    <enum name="ACTIVE_ATTRIBUTE_MAX_LENGTH"              value="0x8B8A"/>
-    <enum name="SHADING_LANGUAGE_VERSION"                 value="0x8B8C"/>
-    <enum name="CURRENT_PROGRAM"                          value="0x8B8D"/>
-    <enum name="STENCIL_BACK_REF"                         value="0x8CA3"/>
-    <enum name="STENCIL_BACK_VALUE_MASK"                  value="0x8CA4"/>
-    <enum name="STENCIL_BACK_WRITEMASK"                   value="0x8CA5"/>
-
-    <type name="char"    size="1"                  glx_name="CARD8"/>
-
-    <function name="AttachShader" offset="assign">
-        <param name="program" type="GLuint"/>
-        <param name="shader" type="GLuint"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="BindAttribLocation" offset="assign">
-        <param name="program" type="GLuint"/>
-        <param name="index" type="GLuint"/>
-        <param name="name" type="const GLchar *"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="BlendEquationSeparate" offset="assign">
-        <param name="modeRGB" type="GLenum"/>
-        <param name="modeA" type="GLenum"/>
-        <glx rop="4228"/>
-    </function>
-
-    <function name="CompileShader" offset="assign">
-        <param name="shader" type="GLuint"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="CreateProgram" offset="assign">
-        <return type="GLuint"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="CreateShader" offset="assign">
-        <param name="type" type="GLenum"/>
-        <return type="GLuint"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="DeleteProgram" offset="assign">
-        <param name="program" type="GLuint"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="DeleteShader" offset="assign">
-        <param name="program" type="GLuint"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="DetachShader" offset="assign">
-        <param name="program" type="GLuint"/>
-        <param name="shader" type="GLuint"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="DisableVertexAttribArray" offset="assign">
-        <param name="index" type="GLuint"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="EnableVertexAttribArray" offset="assign">
-        <param name="index" type="GLuint"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="GetActiveAttrib" offset="assign">
-        <param name="program" type="GLuint"/>
-        <param name="index" type="GLuint"/>
-        <param name="bufSize" type="GLsizei "/>
-        <param name="length" type="GLsizei *" output="true"/>
-        <param name="size" type="GLint *" output="true"/>
-        <param name="type" type="GLenum *" output="true"/>
-        <param name="name" type="GLchar *" output="true"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="GetActiveUniform" offset="assign">
-        <param name="program" type="GLuint"/>
-        <param name="index" type="GLuint"/>
-        <param name="bufSize" type="GLsizei"/>
-        <param name="length" type="GLsizei *" output="true"/>
-        <param name="size" type="GLint *" output="true"/>
-        <param name="type" type="GLenum *" output="true"/>
-        <param name="name" type="GLchar *" output="true"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="GetAttachedShaders" offset="assign">
-        <param name="program" type="GLuint"/>
-        <param name="maxCount" type="GLsizei"/>
-        <param name="count" type="GLsizei *" output="true"/>
-        <param name="obj" type="GLuint *" output="true"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="GetAttribLocation" offset="assign">
-        <param name="program" type="GLuint"/>
-        <param name="name" type="const GLchar *"/>
-        <return type="GLint"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="GetProgramiv" offset="assign">
-        <param name="program" type="GLuint"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLint *"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="GetProgramInfoLog" offset="assign">
-        <param name="program" type="GLuint"/>
-        <param name="bufSize" type="GLsizei"/>
-        <param name="length" type="GLsizei *"/>
-        <param name="infoLog" type="GLchar *"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="GetShaderiv" offset="assign">
-        <param name="shader" type="GLuint"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLint *"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="GetShaderInfoLog" offset="assign">
-        <param name="shader" type="GLuint"/>
-        <param name="bufSize" type="GLsizei"/>
-        <param name="length" type="GLsizei *"/>
-        <param name="infoLog" type="GLchar *"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="GetShaderSource" offset="assign">
-        <param name="shader" type="GLuint"/>
-        <param name="bufSize" type="GLsizei"/>
-        <param name="length" type="GLsizei *" output="true"/>
-        <param name="source" type="GLchar *" output="true"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="GetUniformfv" offset="assign">
-        <param name="program" type="GLuint"/>
-        <param name="location" type="GLint"/>
-        <param name="params" type="GLfloat *" output="true"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="GetUniformiv" offset="assign">
-        <param name="program" type="GLuint"/>
-        <param name="location" type="GLint"/>
-        <param name="params" type="GLint *"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="GetUniformLocation" offset="assign">
-        <param name="program" type="GLuint"/>
-        <param name="name" type="const GLchar *"/>
-        <return type="GLint"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="GetVertexAttribfv" offset="assign">
-        <param name="index" type="GLuint"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfloat *" output="true" variable_param="pname"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="GetVertexAttribiv" offset="assign">
-        <param name="index" type="GLuint"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLint *" output="true" variable_param="pname"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="GetVertexAttribPointerv" offset="assign">
-        <param name="index" type="GLuint"/>
-        <param name="pname" type="GLenum"/>
-        <param name="pointer" type="GLvoid **" output="true"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="IsProgram" offset="assign">
-        <param name="program" type="GLuint"/>
-        <return type="GLboolean"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="IsShader" offset="assign">
-        <param name="shader" type="GLuint"/>
-        <return type="GLboolean"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="LinkProgram" offset="assign">
-        <param name="program" type="GLuint"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="ShaderSource" offset="assign">
-        <param name="shader" type="GLuint"/>
-        <param name="count" type="GLsizei"/>
-        <param name="string" type="const GLchar **"/>
-        <param name="length" type="const GLint *"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="StencilFuncSeparate" offset="assign">
-        <param name="face" type="GLenum"/>
-        <param name="func" type="GLenum"/>
-        <param name="ref" type="GLint"/>
-        <param name="mask" type="GLuint"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="StencilOpSeparate" offset="assign">
-        <param name="face" type="GLenum"/>
-        <param name="sfail" type="GLenum"/>
-        <param name="zfail" type="GLenum"/>
-        <param name="zpass" type="GLenum"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="StencilMaskSeparate" offset="assign">
-        <param name="face" type="GLenum"/>
-        <param name="mask" type="GLuint"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="Uniform1f" offset="assign">
-        <param name="location" type="GLint"/>
-        <param name="v0" type="GLfloat"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="Uniform1fv" offset="assign">
-        <param name="location" type="GLint"/>
-        <param name="count" type="GLsizei"/>
-        <param name="value" type="const GLfloat *"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="Uniform1i" offset="assign">
-        <param name="location" type="GLint"/>
-        <param name="v0" type="GLint"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="Uniform1iv" offset="assign">
-        <param name="location" type="GLint"/>
-        <param name="count" type="GLsizei"/>
-        <param name="value" type="const GLint *"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="Uniform2f" offset="assign">
-        <param name="location" type="GLint"/>
-        <param name="v0" type="GLfloat"/>
-        <param name="v1" type="GLfloat"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="Uniform2fv" offset="assign">
-        <param name="location" type="GLint"/>
-        <param name="count" type="GLsizei"/>
-        <param name="value" type="const GLfloat *"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="Uniform2i" offset="assign">
-        <param name="location" type="GLint"/>
-        <param name="v0" type="GLint"/>
-        <param name="v1" type="GLint"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="Uniform2iv" offset="assign">
-        <param name="location" type="GLint"/>
-        <param name="count" type="GLsizei"/>
-        <param name="value" type="const GLint *"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="Uniform3f" offset="assign">
-        <param name="location" type="GLint"/>
-        <param name="v0" type="GLfloat"/>
-        <param name="v1" type="GLfloat"/>
-        <param name="v2" type="GLfloat"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="Uniform3fv" offset="assign">
-        <param name="location" type="GLint"/>
-        <param name="count" type="GLsizei"/>
-        <param name="value" type="const GLfloat *"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="Uniform3i" offset="assign">
-        <param name="location" type="GLint"/>
-        <param name="v0" type="GLint"/>
-        <param name="v1" type="GLint"/>
-        <param name="v2" type="GLint"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="Uniform3iv" offset="assign">
-        <param name="location" type="GLint"/>
-        <param name="count" type="GLsizei"/>
-        <param name="value" type="const GLint *"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="Uniform4f" offset="assign">
-        <param name="location" type="GLint"/>
-        <param name="v0" type="GLfloat"/>
-        <param name="v1" type="GLfloat"/>
-        <param name="v2" type="GLfloat"/>
-        <param name="v3" type="GLfloat"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="Uniform4fv" offset="assign">
-        <param name="location" type="GLint"/>
-        <param name="count" type="GLsizei"/>
-        <param name="value" type="const GLfloat *"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="Uniform4i" offset="assign">
-        <param name="location" type="GLint"/>
-        <param name="v0" type="GLint"/>
-        <param name="v1" type="GLint"/>
-        <param name="v2" type="GLint"/>
-        <param name="v3" type="GLint"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="Uniform4iv" offset="assign">
-        <param name="location" type="GLint"/>
-        <param name="count" type="GLsizei"/>
-        <param name="value" type="const GLint *"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="UniformMatrix2fv" offset="assign">
-        <param name="location" type="GLint"/>
-        <param name="count" type="GLsizei"/>
-        <param name="transpose" type="GLboolean"/>
-        <param name="value" type="const GLfloat *"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="UniformMatrix3fv" offset="assign">
-        <param name="location" type="GLint"/>
-        <param name="count" type="GLsizei"/>
-        <param name="transpose" type="GLboolean"/>
-        <param name="value" type="const GLfloat *"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="UniformMatrix4fv" offset="assign">
-        <param name="location" type="GLint"/>
-        <param name="count" type="GLsizei"/>
-        <param name="transpose" type="GLboolean"/>
-        <param name="value" type="const GLfloat *"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="UseProgram" offset="assign">
-        <param name="program" type="GLuint"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="ValidateProgram" offset="assign">
-        <param name="program" type="GLuint"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="VertexAttrib1f" offset="assign">
-        <param name="index" type="GLuint"/>
-        <param name="x" type="GLfloat"/>
-    </function>
-
-    <function name="VertexAttrib1fv" offset="assign">
-        <param name="index" type="GLuint"/>
-        <param name="v" type="const GLfloat *"/>
-    </function>
-
-    <function name="VertexAttrib2f" offset="assign">
-        <param name="index" type="GLuint"/>
-        <param name="x" type="GLfloat"/>
-        <param name="y" type="GLfloat"/>
-    </function>
-
-    <function name="VertexAttrib2fv" offset="assign">
-        <param name="index" type="GLuint"/>
-        <param name="v" type="const GLfloat *"/>
-    </function>
-
-    <function name="VertexAttrib3f" offset="assign">
-        <param name="index" type="GLuint"/>
-        <param name="x" type="GLfloat"/>
-        <param name="y" type="GLfloat"/>
-        <param name="z" type="GLfloat"/>
-    </function>
-
-    <function name="VertexAttrib3fv" offset="assign">
-        <param name="index" type="GLuint"/>
-        <param name="v" type="const GLfloat *"/>
-    </function>
-
-    <function name="VertexAttrib4f" offset="assign">
-        <param name="index" type="GLuint"/>
-        <param name="x" type="GLfloat"/>
-        <param name="y" type="GLfloat"/>
-        <param name="z" type="GLfloat"/>
-        <param name="w" type="GLfloat"/>
-    </function>
-
-    <function name="VertexAttrib4fv" offset="assign">
-	<param name="index" type="GLuint"/>
-        <param name="v" type="const GLfloat *"/>
-    </function>
-
-    <function name="VertexAttribPointer" offset="assign">
-        <param name="index" type="GLuint"/>
-        <param name="size" type="GLint"/>
-        <param name="type" type="GLenum"/>
-        <param name="normalized" type="GLboolean"/>
-        <param name="stride" type="GLsizei"/>
-        <param name="pointer" type="const GLvoid *"/>
-    </function>
-</category>
-
-</OpenGLAPI>
diff --git a/src/mapi/glapi/gen-es/es1_API.xml b/src/mapi/glapi/gen-es/es1_API.xml
deleted file mode 100644
index 3428ae5..0000000
--- a/src/mapi/glapi/gen-es/es1_API.xml
+++ /dev/null
@@ -1,1100 +0,0 @@
-<?xml version="1.0"?>
-<!DOCTYPE OpenGLAPI SYSTEM "../gen/gl_API.dtd">
-
-<!-- OpenGL ES 1.x API -->
-
-<OpenGLAPI>
-
-<xi:include href="base1_API.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
-
-<!-- core subset of OpenGL 1.3 defined in OpenGL ES 1.0 -->
-<category name="core1.0">
-    <!-- addition to base1.0 -->
-    <enum name="ADD"                                      value="0x0104"/>
-    <enum name="STACK_OVERFLOW"                           value="0x0503"/>
-    <enum name="STACK_UNDERFLOW"                          value="0x0504"/>
-    <enum name="EXP"                                      value="0x0800"/>
-    <enum name="EXP2"                                     value="0x0801"/>
-    <enum name="POINT_SMOOTH"                  count="1"  value="0x0B10">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="LINE_SMOOTH"                   count="1"  value="0x0B20">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="LIGHTING"                      count="1"  value="0x0B50">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="LIGHT_MODEL_TWO_SIDE"          count="1"  value="0x0B52">
-        <size name="LightModelfv"/>
-        <size name="LightModeliv"/>
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="LIGHT_MODEL_AMBIENT"           count="4"  value="0x0B53">
-        <size name="LightModelfv"/>
-        <size name="LightModeliv"/>
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="COLOR_MATERIAL"                count="1"  value="0x0B57">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="FOG"                           count="1"  value="0x0B60">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="FOG_DENSITY"                   count="1"  value="0x0B62">
-        <size name="Fogfv"/>
-        <size name="Fogiv"/>
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="FOG_START"                     count="1"  value="0x0B63">
-        <size name="Fogfv"/>
-        <size name="Fogiv"/>
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="FOG_END"                       count="1"  value="0x0B64">
-        <size name="Fogfv"/>
-        <size name="Fogiv"/>
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="FOG_MODE"                      count="1"  value="0x0B65">
-        <size name="Fogfv"/>
-        <size name="Fogiv"/>
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="FOG_COLOR"                     count="4"  value="0x0B66">
-        <size name="Fogfv"/>
-        <size name="Fogiv"/>
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="NORMALIZE"                     count="1"  value="0x0BA1">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="ALPHA_TEST"                    count="1"  value="0x0BC0">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="PERSPECTIVE_CORRECTION_HINT"   count="1"  value="0x0C50">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="POINT_SMOOTH_HINT"             count="1"  value="0x0C51">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="LINE_SMOOTH_HINT"              count="1"  value="0x0C52">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="POLYGON_SMOOTH_HINT"           count="1"  value="0x0C53">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="FOG_HINT"                      count="1"  value="0x0C54">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="MAX_LIGHTS"                    count="1"  value="0x0D31">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="MAX_MODELVIEW_STACK_DEPTH"     count="1"  value="0x0D36">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="MAX_PROJECTION_STACK_DEPTH"    count="1"  value="0x0D38">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="MAX_TEXTURE_STACK_DEPTH"       count="1"  value="0x0D39">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="AMBIENT"                       count="4"  value="0x1200">
-        <size name="Materialfv"/>
-        <size name="Materialiv"/>
-        <size name="Lightfv"/>
-        <size name="Lightiv"/>
-        <size name="GetMaterialfv" mode="get"/>
-        <size name="GetMaterialiv" mode="get"/>
-        <size name="GetLightfv" mode="get"/>
-        <size name="GetLightiv" mode="get"/>
-    </enum>
-    <enum name="DIFFUSE"                       count="4"  value="0x1201">
-        <size name="Materialfv"/>
-        <size name="Materialiv"/>
-        <size name="Lightfv"/>
-        <size name="Lightiv"/>
-        <size name="GetMaterialfv" mode="get"/>
-        <size name="GetMaterialiv" mode="get"/>
-        <size name="GetLightfv" mode="get"/>
-        <size name="GetLightiv" mode="get"/>
-    </enum>
-    <enum name="SPECULAR"                      count="4"  value="0x1202">
-        <size name="Materialfv"/>
-        <size name="Materialiv"/>
-        <size name="Lightfv"/>
-        <size name="Lightiv"/>
-        <size name="GetMaterialfv" mode="get"/>
-        <size name="GetMaterialiv" mode="get"/>
-        <size name="GetLightfv" mode="get"/>
-        <size name="GetLightiv" mode="get"/>
-    </enum>
-    <enum name="POSITION"                      count="4"  value="0x1203">
-        <size name="Lightfv"/>
-        <size name="Lightiv"/>
-        <size name="GetLightfv" mode="get"/>
-        <size name="GetLightiv" mode="get"/>
-    </enum>
-    <enum name="SPOT_DIRECTION"                count="3"  value="0x1204">
-        <size name="Lightfv"/>
-        <size name="Lightiv"/>
-        <size name="GetLightfv" mode="get"/>
-        <size name="GetLightiv" mode="get"/>
-    </enum>
-    <enum name="SPOT_EXPONENT"                 count="1"  value="0x1205">
-        <size name="Lightfv"/>
-        <size name="Lightiv"/>
-        <size name="GetLightfv" mode="get"/>
-        <size name="GetLightiv" mode="get"/>
-    </enum>
-    <enum name="SPOT_CUTOFF"                   count="1"  value="0x1206">
-        <size name="Lightfv"/>
-        <size name="Lightiv"/>
-        <size name="GetLightfv" mode="get"/>
-        <size name="GetLightiv" mode="get"/>
-    </enum>
-    <enum name="CONSTANT_ATTENUATION"          count="1"  value="0x1207">
-        <size name="Lightfv"/>
-        <size name="Lightiv"/>
-        <size name="GetLightfv" mode="get"/>
-        <size name="GetLightiv" mode="get"/>
-    </enum>
-    <enum name="LINEAR_ATTENUATION"            count="1"  value="0x1208">
-        <size name="Lightfv"/>
-        <size name="Lightiv"/>
-        <size name="GetLightfv" mode="get"/>
-        <size name="GetLightiv" mode="get"/>
-    </enum>
-    <enum name="QUADRATIC_ATTENUATION"         count="1"  value="0x1209">
-        <size name="Lightfv"/>
-        <size name="Lightiv"/>
-        <size name="GetLightfv" mode="get"/>
-        <size name="GetLightiv" mode="get"/>
-    </enum>
-    <enum name="CLEAR"                                    value="0x1500"/>
-    <enum name="AND"                                      value="0x1501"/>
-    <enum name="AND_REVERSE"                              value="0x1502"/>
-    <enum name="COPY"                                     value="0x1503"/>
-    <enum name="AND_INVERTED"                             value="0x1504"/>
-    <enum name="NOOP"                                     value="0x1505"/>
-    <enum name="XOR"                                      value="0x1506"/>
-    <enum name="OR"                                       value="0x1507"/>
-    <enum name="NOR"                                      value="0x1508"/>
-    <enum name="EQUIV"                                    value="0x1509"/>
-    <enum name="OR_REVERSE"                               value="0x150B"/>
-    <enum name="COPY_INVERTED"                            value="0x150C"/>
-    <enum name="OR_INVERTED"                              value="0x150D"/>
-    <enum name="NAND"                                     value="0x150E"/>
-    <enum name="SET"                                      value="0x150F"/>
-    <enum name="EMISSION"                      count="4"  value="0x1600">
-        <size name="Materialfv"/>
-        <size name="Materialiv"/>
-        <size name="GetMaterialfv" mode="get"/>
-        <size name="GetMaterialiv" mode="get"/>
-    </enum>
-    <enum name="SHININESS"                     count="1"  value="0x1601">
-        <size name="Materialfv"/>
-        <size name="Materialiv"/>
-        <size name="GetMaterialfv" mode="get"/>
-        <size name="GetMaterialiv" mode="get"/>
-    </enum>
-    <enum name="AMBIENT_AND_DIFFUSE"           count="4"  value="0x1602">
-        <size name="Materialfv"/>
-        <size name="Materialiv"/>
-        <size name="GetMaterialfv" mode="get"/>
-        <size name="GetMaterialiv" mode="get"/>
-    </enum>
-    <enum name="MODELVIEW"                                value="0x1700"/>
-    <enum name="PROJECTION"                               value="0x1701"/>
-    <enum name="FLAT"                                     value="0x1D00"/>
-    <enum name="SMOOTH"                                   value="0x1D01"/>
-    <enum name="MODULATE"                                 value="0x2100"/>
-    <enum name="DECAL"                                    value="0x2101"/>
-    <enum name="TEXTURE_ENV_MODE"              count="1"  value="0x2200">
-        <size name="TexEnvfv"/>
-        <size name="TexEnviv"/>
-        <size name="GetTexEnvfv" mode="get"/>
-        <size name="GetTexEnviv" mode="get"/>
-    </enum>
-    <enum name="TEXTURE_ENV_COLOR"             count="4"  value="0x2201">
-        <size name="TexEnvfv"/>
-        <size name="TexEnviv"/>
-        <size name="GetTexEnvfv" mode="get"/>
-        <size name="GetTexEnviv" mode="get"/>
-    </enum>
-    <enum name="TEXTURE_ENV"                              value="0x2300"/>
-    <enum name="LIGHT0"                        count="1"  value="0x4000">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="LIGHT1"                        count="1"  value="0x4001">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="LIGHT2"                        count="1"  value="0x4002">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="LIGHT3"                        count="1"  value="0x4003">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="LIGHT4"                        count="1"  value="0x4004">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="LIGHT5"                        count="1"  value="0x4005">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="LIGHT6"                        count="1"  value="0x4006">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="LIGHT7"                        count="1"  value="0x4007">
-        <size name="Get" mode="get"/>
-    </enum>
-
-    <function name="AlphaFunc" offset="240">
-        <param name="func" type="GLenum"/>
-        <param name="ref" type="GLclampf"/>
-        <glx rop="159"/>
-    </function>
-
-    <function name="Color4f" offset="29" vectorequiv="Color4fv">
-        <param name="red" type="GLfloat"/>
-        <param name="green" type="GLfloat"/>
-        <param name="blue" type="GLfloat"/>
-        <param name="alpha" type="GLfloat"/>
-    </function>
-
-    <function name="Fogf" offset="153">
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfloat"/>
-        <glx rop="80"/>
-    </function>
-
-    <function name="Fogfv" offset="154">
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfloat *" variable_param="pname"/>
-        <glx rop="81"/>
-    </function>
-
-    <function name="Lightf" offset="159">
-        <param name="light" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfloat"/>
-        <glx rop="86"/>
-    </function>
-
-    <function name="Lightfv" offset="160">
-        <param name="light" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfloat *" variable_param="pname"/>
-        <glx rop="87"/>
-    </function>
-
-    <function name="LightModelf" offset="163">
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfloat"/>
-        <glx rop="90"/>
-    </function>
-
-    <function name="LightModelfv" offset="164">
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfloat *" variable_param="pname"/>
-        <glx rop="91"/>
-    </function>
-
-    <function name="LoadIdentity" offset="290">
-        <glx rop="176"/>
-    </function>
-
-    <function name="LoadMatrixf" offset="291">
-        <param name="m" type="const GLfloat *" count="16"/>
-        <glx rop="177"/>
-    </function>
-
-    <function name="LogicOp" offset="242">
-        <param name="opcode" type="GLenum"/>
-        <glx rop="161"/>
-    </function>
-
-    <function name="Materialf" offset="169">
-        <param name="face" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfloat"/>
-        <glx rop="96"/>
-    </function>
-
-    <function name="Materialfv" offset="170">
-        <param name="face" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfloat *" variable_param="pname"/>
-        <glx rop="97"/>
-    </function>
-
-    <function name="MatrixMode" offset="293">
-        <param name="mode" type="GLenum"/>
-        <glx rop="179"/>
-    </function>
-
-    <function name="MultMatrixf" offset="294">
-        <param name="m" type="const GLfloat *" count="16"/>
-        <glx rop="180"/>
-    </function>
-
-    <function name="Normal3f" offset="56" vectorequiv="Normal3fv">
-        <param name="nx" type="GLfloat"/>
-        <param name="ny" type="GLfloat"/>
-        <param name="nz" type="GLfloat"/>
-    </function>
-
-    <function name="PointSize" offset="173">
-        <param name="size" type="GLfloat"/>
-        <glx rop="100"/>
-    </function>
-
-    <function name="PopMatrix" offset="297">
-        <glx rop="183"/>
-    </function>
-
-    <function name="PushMatrix" offset="298">
-        <glx rop="184"/>
-    </function>
-
-    <function name="Rotatef" offset="300">
-        <param name="angle" type="GLfloat"/>
-        <param name="x" type="GLfloat"/>
-        <param name="y" type="GLfloat"/>
-        <param name="z" type="GLfloat"/>
-        <glx rop="186"/>
-    </function>
-
-    <function name="Scalef" offset="302">
-        <param name="x" type="GLfloat"/>
-        <param name="y" type="GLfloat"/>
-        <param name="z" type="GLfloat"/>
-        <glx rop="188"/>
-    </function>
-
-    <function name="ShadeModel" offset="177">
-        <param name="mode" type="GLenum"/>
-        <glx rop="104"/>
-    </function>
-
-    <function name="TexEnvf" offset="184">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfloat"/>
-        <glx rop="111"/>
-    </function>
-
-    <function name="TexEnvfv" offset="185">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfloat *" variable_param="pname"/>
-        <glx rop="112"/>
-    </function>
-
-    <function name="TexImage2D" offset="183">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="internalformat" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <param name="border" type="GLint"/>
-        <param name="format" type="GLenum"/>
-        <param name="type" type="GLenum"/>
-        <param name="pixels" type="const GLvoid *" img_width="width" img_height="height" img_format="format" img_type="type" img_target="target" img_send_null="true" img_pad_dimensions="true"/>
-        <glx rop="110" large="true"/>
-    </function>
-
-    <function name="Translatef" offset="304">
-        <param name="x" type="GLfloat"/>
-        <param name="y" type="GLfloat"/>
-        <param name="z" type="GLfloat"/>
-        <glx rop="190"/>
-    </function>
-
-    <!-- addition to base1.1 -->
-    <enum name="COLOR_LOGIC_OP"                           value="0x0BF2"/>
-    <enum name="VERTEX_ARRAY"                  count="1"  value="0x8074">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="NORMAL_ARRAY"                  count="1"  value="0x8075">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="COLOR_ARRAY"                   count="1"  value="0x8076">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="TEXTURE_COORD_ARRAY"           count="1"  value="0x8078">
-        <size name="Get" mode="get"/>
-    </enum>
-
-    <function name="ColorPointer" offset="308">
-        <param name="size" type="GLint"/>
-        <param name="type" type="GLenum"/>
-        <param name="stride" type="GLsizei"/>
-        <param name="pointer" type="const GLvoid *"/>
-        <glx handcode="true"/>
-    </function>
-
-    <function name="DisableClientState" offset="309">
-        <param name="array" type="GLenum"/>
-        <glx handcode="true"/>
-    </function>
-
-    <function name="EnableClientState" offset="313">
-        <param name="array" type="GLenum"/>
-        <glx handcode="true"/>
-    </function>
-
-    <function name="NormalPointer" offset="318">
-        <param name="type" type="GLenum"/>
-        <param name="stride" type="GLsizei"/>
-        <param name="pointer" type="const GLvoid *"/>
-        <glx handcode="true"/>
-    </function>
-
-    <function name="TexCoordPointer" offset="320">
-        <param name="size" type="GLint"/>
-        <param name="type" type="GLenum"/>
-        <param name="stride" type="GLsizei"/>
-        <param name="pointer" type="const GLvoid *"/>
-        <glx handcode="true"/>
-    </function>
-
-    <function name="VertexPointer" offset="321">
-        <param name="size" type="GLint"/>
-        <param name="type" type="GLenum"/>
-        <param name="stride" type="GLsizei"/>
-        <param name="pointer" type="const GLvoid *"/>
-        <glx handcode="true"/>
-    </function>
-
-    <!-- addition to base1.2 -->
-    <enum name="SMOOTH_POINT_SIZE_RANGE"       count="2"  value="0x0B12">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="SMOOTH_LINE_WIDTH_RANGE"       count="2"  value="0x0B22">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="RESCALE_NORMAL"                count="1"  value="0x803A">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="MAX_ELEMENTS_VERTICES"         count="1"  value="0x80E8">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="MAX_ELEMENTS_INDICES"          count="1"  value="0x80E9">
-        <size name="Get" mode="get"/>
-    </enum>
-
-    <!-- addition to base1.3 -->
-    <enum name="MULTISAMPLE"                   count="1"  value="0x809D">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="SAMPLE_ALPHA_TO_ONE"           count="1"  value="0x809F">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="MAX_TEXTURE_UNITS"             count="1"  value="0x84E2">
-        <size name="Get" mode="get"/>
-    </enum>
-
-    <function name="ClientActiveTexture" offset="375">
-        <param name="texture" type="GLenum"/>
-        <glx handcode="true"/>
-    </function>
-
-    <function name="MultiTexCoord4f" offset="402" vectorequiv="MultiTexCoord4fv">
-        <param name="target" type="GLenum"/>
-        <param name="s" type="GLfloat"/>
-        <param name="t" type="GLfloat"/>
-        <param name="r" type="GLfloat"/>
-        <param name="q" type="GLfloat"/>
-    </function>
-</category>
-
-<!-- core subset of OpenGL 1.5 defined in OpenGL ES 1.1 -->
-<category name="core1.1">
-    <!-- addition to base1.0 -->
-    <enum name="CURRENT_COLOR"                 count="4"  value="0x0B00">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="CURRENT_NORMAL"                count="3"  value="0x0B02">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="CURRENT_TEXTURE_COORDS"        count="4"  value="0x0B03">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="POINT_SIZE"                    count="1"  value="0x0B11">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="SHADE_MODEL"                   count="1"  value="0x0B54">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="MATRIX_MODE"                   count="1"  value="0x0BA0">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="MODELVIEW_STACK_DEPTH"         count="1"  value="0x0BA3">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="PROJECTION_STACK_DEPTH"        count="1"  value="0x0BA4">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="TEXTURE_STACK_DEPTH"           count="1"  value="0x0BA5">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="MODELVIEW_MATRIX"              count="16" value="0x0BA6">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="PROJECTION_MATRIX"             count="16" value="0x0BA7">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="TEXTURE_MATRIX"                count="16" value="0x0BA8">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="ALPHA_TEST_FUNC"               count="1"  value="0x0BC1">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="ALPHA_TEST_REF"                count="1"  value="0x0BC2">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="BLEND_DST"                     count="1"  value="0x0BE0">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="BLEND_SRC"                     count="1"  value="0x0BE1">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="LOGIC_OP_MODE"                 count="1"  value="0x0BF0">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="ALPHA_SCALE"                   count="1"  value="0x0D1C">
-        <size name="TexEnvfv"/>
-        <size name="TexEnviv"/>
-        <size name="GetTexEnvfv" mode="get"/>
-        <size name="GetTexEnviv" mode="get"/>
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="MAX_CLIP_PLANES"               count="1"  value="0x0D32">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="CLIP_PLANE0"                   count="1"  value="0x3000">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="CLIP_PLANE1"                   count="1"  value="0x3001">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="CLIP_PLANE2"                   count="1"  value="0x3002">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="CLIP_PLANE3"                   count="1"  value="0x3003">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="CLIP_PLANE4"                   count="1"  value="0x3004">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="CLIP_PLANE5"                   count="1"  value="0x3005">
-        <size name="Get" mode="get"/>
-    </enum>
-
-    <function name="Color4ub" offset="35" vectorequiv="Color4ubv">
-        <param name="red" type="GLubyte"/>
-        <param name="green" type="GLubyte"/>
-        <param name="blue" type="GLubyte"/>
-        <param name="alpha" type="GLubyte"/>
-    </function>
-
-    <function name="GetLightfv" offset="264">
-        <param name="light" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfloat *" output="true" variable_param="pname"/>
-        <glx sop="118"/>
-    </function>
-
-    <function name="GetMaterialfv" offset="269">
-        <param name="face" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfloat *" output="true" variable_param="pname"/>
-        <glx sop="123"/>
-    </function>
-
-    <function name="GetTexEnvfv" offset="276">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfloat *" output="true" variable_param="pname"/>
-        <glx sop="130"/>
-    </function>
-
-    <function name="GetTexEnviv" offset="277">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLint *" output="true" variable_param="pname"/>
-        <glx sop="131"/>
-    </function>
-
-    <function name="TexEnvi" offset="186">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLint"/>
-        <glx rop="113"/>
-    </function>
-
-    <function name="TexEnviv" offset="187">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLint *" variable_param="pname"/>
-        <glx rop="114"/>
-    </function>
-
-    <!-- addition to base1.1 -->
-    <enum name="VERTEX_ARRAY_SIZE"             count="1"  value="0x807A">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="VERTEX_ARRAY_TYPE"             count="1"  value="0x807B">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="VERTEX_ARRAY_STRIDE"           count="1"  value="0x807C">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="NORMAL_ARRAY_TYPE"             count="1"  value="0x807E">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="NORMAL_ARRAY_STRIDE"           count="1"  value="0x807F">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="COLOR_ARRAY_SIZE"              count="1"  value="0x8081">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="COLOR_ARRAY_TYPE"              count="1"  value="0x8082">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="COLOR_ARRAY_STRIDE"            count="1"  value="0x8083">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="TEXTURE_COORD_ARRAY_SIZE"      count="1"  value="0x8088">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="TEXTURE_COORD_ARRAY_TYPE"      count="1"  value="0x8089">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="TEXTURE_COORD_ARRAY_STRIDE"    count="1"  value="0x808A">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="VERTEX_ARRAY_POINTER"                     value="0x808E"/>
-    <enum name="NORMAL_ARRAY_POINTER"                     value="0x808F"/>
-    <enum name="COLOR_ARRAY_POINTER"                      value="0x8090"/>
-    <enum name="TEXTURE_COORD_ARRAY_POINTER"              value="0x8092"/>
-
-    <function name="GetPointerv" offset="329">
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLvoid **" output="true"/>
-        <glx handcode="true"/>
-    </function>
-
-    <!-- addition to base1.2 -->
-
-    <!-- addition to base1.3 -->
-    <enum name="CLIENT_ACTIVE_TEXTURE"         count="1"  value="0x84E1">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="SUBTRACT"                                 value="0x84E7"/>
-    <enum name="COMBINE"                                  value="0x8570"/>
-    <enum name="COMBINE_RGB"                   count="1"  value="0x8571">
-        <size name="TexEnvfv"/>
-        <size name="TexEnviv"/>
-        <size name="GetTexEnvfv" mode="get"/>
-        <size name="GetTexEnviv" mode="get"/>
-    </enum>
-    <enum name="COMBINE_ALPHA"                 count="1"  value="0x8572">
-        <size name="TexEnvfv"/>
-        <size name="TexEnviv"/>
-        <size name="GetTexEnvfv" mode="get"/>
-        <size name="GetTexEnviv" mode="get"/>
-    </enum>
-    <enum name="RGB_SCALE"                     count="1"  value="0x8573">
-        <size name="TexEnvfv"/>
-        <size name="TexEnviv"/>
-        <size name="GetTexEnvfv" mode="get"/>
-        <size name="GetTexEnviv" mode="get"/>
-    </enum>
-    <enum name="ADD_SIGNED"                               value="0x8574"/>
-    <enum name="INTERPOLATE"                              value="0x8575"/>
-    <enum name="CONSTANT"                                 value="0x8576"/>
-    <enum name="PRIMARY_COLOR"                            value="0x8577"/>
-    <enum name="PREVIOUS"                                 value="0x8578"/>
-    <enum name="OPERAND0_RGB"                  count="1"  value="0x8590">
-        <size name="TexEnvfv"/>
-        <size name="TexEnviv"/>
-        <size name="GetTexEnvfv" mode="get"/>
-        <size name="GetTexEnviv" mode="get"/>
-    </enum>
-    <enum name="OPERAND1_RGB"                  count="1"  value="0x8591">
-        <size name="TexEnvfv"/>
-        <size name="TexEnviv"/>
-        <size name="GetTexEnvfv" mode="get"/>
-        <size name="GetTexEnviv" mode="get"/>
-    </enum>
-    <enum name="OPERAND2_RGB"                  count="1"  value="0x8592">
-        <size name="TexEnvfv"/>
-        <size name="TexEnviv"/>
-        <size name="GetTexEnvfv" mode="get"/>
-        <size name="GetTexEnviv" mode="get"/>
-    </enum>
-    <enum name="OPERAND0_ALPHA"                count="1"  value="0x8598">
-        <size name="TexEnvfv"/>
-        <size name="TexEnviv"/>
-        <size name="GetTexEnvfv" mode="get"/>
-        <size name="GetTexEnviv" mode="get"/>
-    </enum>
-    <enum name="OPERAND1_ALPHA"                count="1"  value="0x8599">
-        <size name="TexEnvfv"/>
-        <size name="TexEnviv"/>
-        <size name="GetTexEnvfv" mode="get"/>
-        <size name="GetTexEnviv" mode="get"/>
-    </enum>
-    <enum name="OPERAND2_ALPHA"                count="1"  value="0x859A">
-        <size name="TexEnvfv"/>
-        <size name="TexEnviv"/>
-        <size name="GetTexEnvfv" mode="get"/>
-        <size name="GetTexEnviv" mode="get"/>
-    </enum>
-    <enum name="DOT3_RGB"                                 value="0x86AE"/>
-    <enum name="DOT3_RGBA"                                value="0x86AF"/>
-
-    <!-- addition to base1.4 -->
-    <enum name="POINT_SIZE_MIN"                count="1"  value="0x8126">
-        <size name="PointParameterfv"/>
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="POINT_SIZE_MAX"                count="1"  value="0x8127">
-        <size name="PointParameterfv"/>
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="POINT_FADE_THRESHOLD_SIZE"     count="1"  value="0x8128">
-        <size name="PointParameterfv"/>
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="POINT_DISTANCE_ATTENUATION"    count="3"  value="0x8129">
-        <size name="PointParameterfv"/>
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="GENERATE_MIPMAP"               count="1"  value="0x8191">
-        <size name="TexParameterfv"/>
-        <size name="TexParameteriv"/>
-        <size name="GetTexParameterfv" mode="get"/>
-        <size name="GetTexParameteriv" mode="get"/>
-    </enum>
-
-    <function name="PointParameterf" offset="assign">
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfloat"/>
-        <glx rop="2065"/>
-    </function>
-
-    <function name="PointParameterfv" offset="assign">
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfloat *" variable_param="pname"/>
-        <glx rop="2066"/>
-    </function>
-
-    <!-- addition to base1.5 -->
-    <enum name="SRC0_RGB"                                 value="0x8580"/>
-    <enum name="SRC1_RGB"                                 value="0x8581"/>
-    <enum name="SRC2_RGB"                                 value="0x8582"/>
-    <enum name="SRC0_ALPHA"                               value="0x8588"/>
-    <enum name="SRC1_ALPHA"                               value="0x8589"/>
-    <enum name="SRC2_ALPHA"                               value="0x858A"/>
-    <enum name="VERTEX_ARRAY_BUFFER_BINDING"   count="1"  value="0x8896">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="NORMAL_ARRAY_BUFFER_BINDING"   count="1"  value="0x8897">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="COLOR_ARRAY_BUFFER_BINDING"    count="1"  value="0x8898">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="TEXTURE_COORD_ARRAY_BUFFER_BINDING" count="1" value="0x889A">
-        <size name="Get" mode="get"/>
-    </enum>
-</category>
-
-<!-- OpenGL ES 1.0 -->
-<category name="es1.0">
-    <!-- addition to core1.0 -->
-
-    <!-- from GL_OES_fixed_point -->
-    <enum name="FIXED"                                    value="0x140C"/>
-
-    <type name="fixed"   size="4"                                    />
-    <type name="clampx"  size="4"                                    />
-
-    <function name="AlphaFuncx" offset="assign">
-        <param name="func" type="GLenum"/>
-        <param name="ref" type="GLclampx"/>
-    </function>
-
-    <function name="ClearColorx" offset="assign">
-        <param name="red" type="GLclampx"/>
-        <param name="green" type="GLclampx"/>
-        <param name="blue" type="GLclampx"/>
-        <param name="alpha" type="GLclampx"/>
-    </function>
-
-    <function name="ClearDepthx" offset="assign">
-        <param name="depth" type="GLclampx"/>
-    </function>
-
-    <function name="Color4x" offset="assign">
-        <param name="red" type="GLfixed"/>
-        <param name="green" type="GLfixed"/>
-        <param name="blue" type="GLfixed"/>
-        <param name="alpha" type="GLfixed"/>
-    </function>
-
-    <function name="DepthRangex" offset="assign">
-        <param name="zNear" type="GLclampx"/>
-        <param name="zFar" type="GLclampx"/>
-    </function>
-
-    <function name="Fogx" offset="assign">
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfixed"/>
-    </function>
-
-    <function name="Fogxv" offset="assign">
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfixed *" variable_param="pname"/>
-    </function>
-
-    <function name="Frustumx" offset="assign">
-        <param name="left" type="GLfixed"/>
-        <param name="right" type="GLfixed"/>
-        <param name="bottom" type="GLfixed"/>
-        <param name="top" type="GLfixed"/>
-        <param name="zNear" type="GLfixed"/>
-        <param name="zFar" type="GLfixed"/>
-    </function>
-
-    <function name="LightModelx" offset="assign">
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfixed"/>
-    </function>
-
-    <function name="LightModelxv" offset="assign">
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfixed *" variable_param="pname"/>
-    </function>
-
-    <function name="Lightx" offset="assign">
-        <param name="light" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfixed"/>
-    </function>
-
-    <function name="Lightxv" offset="assign">
-        <param name="light" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfixed *" variable_param="pname"/>
-    </function>
-
-    <function name="LineWidthx" offset="assign">
-        <param name="width" type="GLfixed"/>
-    </function>
-
-    <function name="LoadMatrixx" offset="assign">
-        <param name="m" type="const GLfixed *" count="16"/>
-    </function>
-
-    <function name="Materialx" offset="assign">
-        <param name="face" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfixed"/>
-    </function>
-
-    <function name="Materialxv" offset="assign">
-        <param name="face" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfixed *" variable_param="pname"/>
-    </function>
-
-    <function name="MultMatrixx" offset="assign">
-        <param name="m" type="const GLfixed *" count="16"/>
-    </function>
-
-    <function name="MultiTexCoord4x" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="s" type="GLfixed"/>
-        <param name="t" type="GLfixed"/>
-        <param name="r" type="GLfixed"/>
-        <param name="q" type="GLfixed"/>
-    </function>
-
-    <function name="Normal3x" offset="assign">
-        <param name="nx" type="GLfixed"/>
-        <param name="ny" type="GLfixed"/>
-        <param name="nz" type="GLfixed"/>
-    </function>
-
-    <function name="Orthox" offset="assign">
-        <param name="left" type="GLfixed"/>
-        <param name="right" type="GLfixed"/>
-        <param name="bottom" type="GLfixed"/>
-        <param name="top" type="GLfixed"/>
-        <param name="zNear" type="GLfixed"/>
-        <param name="zFar" type="GLfixed"/>
-    </function>
-
-    <function name="PointSizex" offset="assign">
-        <param name="size" type="GLfixed"/>
-    </function>
-
-    <function name="PolygonOffsetx" offset="assign">
-        <param name="factor" type="GLfixed"/>
-        <param name="units" type="GLfixed"/>
-    </function>
-
-    <function name="Rotatex" offset="assign">
-        <param name="angle" type="GLfixed"/>
-        <param name="x" type="GLfixed"/>
-        <param name="y" type="GLfixed"/>
-        <param name="z" type="GLfixed"/>
-    </function>
-
-    <function name="SampleCoveragex" offset="assign">
-        <param name="value" type="GLclampx"/>
-        <param name="invert" type="GLboolean"/>
-    </function>
-
-    <function name="Scalex" offset="assign">
-        <param name="x" type="GLfixed"/>
-        <param name="y" type="GLfixed"/>
-        <param name="z" type="GLfixed"/>
-    </function>
-
-    <function name="TexEnvx" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfixed"/>
-    </function>
-
-    <function name="TexEnvxv" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfixed *" variable_param="pname"/>
-    </function>
-
-    <function name="TexParameterx" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfixed"/>
-    </function>
-
-    <function name="Translatex" offset="assign">
-        <param name="x" type="GLfixed"/>
-        <param name="y" type="GLfixed"/>
-        <param name="z" type="GLfixed"/>
-    </function>
-
-    <!-- from GL_OES_single_precision -->
-    <function name="ClearDepthf" offset="assign">
-        <param name="depth" type="GLclampf"/>
-    </function>
-
-    <function name="DepthRangef" offset="assign">
-        <param name="zNear" type="GLclampf"/>
-        <param name="zFar" type="GLclampf"/>
-    </function>
-
-    <function name="Frustumf" offset="assign">
-        <param name="left" type="GLfloat"/>
-        <param name="right" type="GLfloat"/>
-        <param name="bottom" type="GLfloat"/>
-        <param name="top" type="GLfloat"/>
-        <param name="zNear" type="GLfloat"/>
-        <param name="zFar" type="GLfloat"/>
-    </function>
-
-    <function name="Orthof" offset="assign">
-        <param name="left" type="GLfloat"/>
-        <param name="right" type="GLfloat"/>
-        <param name="bottom" type="GLfloat"/>
-        <param name="top" type="GLfloat"/>
-        <param name="zNear" type="GLfloat"/>
-        <param name="zFar" type="GLfloat"/>
-    </function>
-</category>
-
-<!-- OpenGL ES 1.1 -->
-<category name="es1.1">
-    <!-- addition to core1.1 -->
-
-    <!-- from GL_OES_fixed_point -->
-    <function name="ClipPlanex" offset="assign">
-        <param name="plane" type="GLenum"/>
-        <param name="equation" type="const GLfixed *" count="4"/>
-    </function>
-
-    <function name="GetClipPlanex" offset="assign">
-        <param name="plane" type="GLenum"/>
-        <param name="equation" type="GLfixed *" output="true" count="4"/>
-    </function>
-
-    <function name="GetFixedv" offset="assign">
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfixed *" output="true" variable_param="pname"/>
-    </function>
-
-    <function name="GetLightxv" offset="assign">
-        <param name="light" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfixed *" output="true" variable_param="pname"/>
-    </function>
-
-    <function name="GetMaterialxv" offset="assign">
-        <param name="face" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfixed *" output="true" variable_param="pname"/>
-    </function>
-
-    <function name="GetTexEnvxv" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfixed *" output="true" variable_param="pname"/>
-    </function>
-
-    <function name="GetTexParameterxv" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfixed *" output="true" variable_param="pname"/>
-    </function>
-
-    <function name="PointParameterx" offset="assign">
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfixed"/>
-    </function>
-
-    <function name="PointParameterxv" offset="assign">
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfixed *"/>
-    </function>
-
-    <function name="TexParameterxv" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfixed *" variable_param="pname"/>
-    </function>
-
-    <!-- from GL_OES_matrix_get -->
-    <enum name="MODELVIEW_MATRIX_FLOAT_AS_INT_BITS_OES"   value="0x898D"/>
-    <enum name="PROJECTION_MATRIX_FLOAT_AS_INT_BITS_OES"  value="0x898E"/>
-    <enum name="TEXTURE_MATRIX_FLOAT_AS_INT_BITS_OES"	  value="0x898F"/>
-
-    <!-- from GL_OES_single_precision -->
-    <function name="ClipPlanef" offset="assign">
-        <param name="plane" type="GLenum"/>
-        <param name="equation" type="const GLfloat *" count="4"/>
-    </function>
-
-    <function name="GetClipPlanef" offset="assign">
-        <param name="plane" type="GLenum"/>
-        <param name="equation" type="GLfloat *" output="true" count="4"/>
-    </function>
-</category>
-
-<xi:include href="es1_EXT.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
-<xi:include href="es1_COMPAT.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
-
-</OpenGLAPI>
diff --git a/src/mapi/glapi/gen-es/es1_COMPAT.xml b/src/mapi/glapi/gen-es/es1_COMPAT.xml
deleted file mode 100644
index 096cea8..0000000
--- a/src/mapi/glapi/gen-es/es1_COMPAT.xml
+++ /dev/null
@@ -1,135 +0,0 @@
-<?xml version="1.0"?>
-<!DOCTYPE OpenGLAPI SYSTEM "../gen/gl_API.dtd">
-
-<OpenGLAPI>
-
-<!-- This file defines the functions that are needed by Mesa.  It
-     makes sure the generated glapi headers are compatible with Mesa.
-     It mainly consists of missing functions and aliases in OpenGL ES.
--->
-
-<xi:include href="es_COMPAT.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
-
-<!-- except for those defined by es_COMPAT.xml, these are also needed -->
-<category name="compat">
-    <!-- OpenGL 1.0 -->
-    <function name="TexGenf" alias="TexGenfOES" static_dispatch="false">
-        <param name="coord" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfloat"/>
-        <glx rop="117"/>
-    </function>
-
-    <function name="TexGenfv" alias="TexGenfvOES" static_dispatch="false">
-        <param name="coord" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfloat *" variable_param="pname"/>
-        <glx rop="118"/>
-    </function>
-
-    <function name="TexGeni" alias="TexGeniOES" static_dispatch="false">
-        <param name="coord" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLint"/>
-        <glx rop="119"/>
-    </function>
-
-    <function name="TexGeniv" alias="TexGenivOES" static_dispatch="false">
-        <param name="coord" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLint *" variable_param="pname"/>
-        <glx rop="120"/>
-    </function>
-
-    <function name="GetTexGenfv" alias="GetTexGenfvOES" static_dispatch="false">
-        <param name="coord" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfloat *" output="true" variable_param="pname"/>
-        <glx sop="133"/>
-    </function>
-
-    <function name="GetTexGeniv" alias="GetTexGenivOES" static_dispatch="false">
-        <param name="coord" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLint *" output="true" variable_param="pname"/>
-        <glx sop="134"/>
-    </function>
-
-    <!-- OpenGL 1.2 -->
-    <function name="BlendColor" offset="336" static_dispatch="false">
-        <param name="red" type="GLclampf"/>
-        <param name="green" type="GLclampf"/>
-        <param name="blue" type="GLclampf"/>
-        <param name="alpha" type="GLclampf"/>
-        <glx rop="4096"/>
-    </function>
-
-    <function name="BlendEquation" alias="BlendEquationOES" static_dispatch="false">
-        <param name="mode" type="GLenum"/>
-        <glx rop="4097"/>
-    </function>
-
-    <function name="TexImage3D" offset="371" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="internalformat" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <param name="depth" type="GLsizei"/>
-        <param name="border" type="GLint"/>
-        <param name="format" type="GLenum"/>
-        <param name="type" type="GLenum"/>
-        <param name="pixels" type="const GLvoid *" img_width="width" img_height="height" img_depth="depth" img_format="format" img_type="type" img_target="target" img_null_flag="true" img_pad_dimensions="true"/>
-        <glx rop="4114" large="true"/>
-    </function>
-
-    <function name="TexSubImage3D" offset="372" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="xoffset" type="GLint"/>
-        <param name="yoffset" type="GLint"/>
-        <param name="zoffset" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <param name="depth" type="GLsizei"/>
-        <param name="format" type="GLenum"/>
-        <param name="type" type="GLenum"/>
-        <param name="UNUSED" type="GLuint" padding="true"/>
-        <param name="pixels" type="const GLvoid *" img_width="width" img_height="height" img_depth="depth" img_xoff="xoffset" img_yoff="yoffset" img_zoff="zoffset" img_format="format" img_type="type" img_target="target" img_pad_dimensions="true"/>
-        <glx rop="4115" large="true"/>
-    </function>
-
-    <function name="CopyTexSubImage3D" offset="373" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="xoffset" type="GLint"/>
-        <param name="yoffset" type="GLint"/>
-        <param name="zoffset" type="GLint"/>
-        <param name="x" type="GLint"/>
-        <param name="y" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <glx rop="4123"/>
-    </function>
-
-    <!-- GL_ARB_multitexture -->
-    <function name="ActiveTextureARB" alias="ActiveTexture" static_dispatch="false">
-        <param name="texture" type="GLenum"/>
-        <glx rop="197"/>
-    </function>
-
-    <function name="ClientActiveTextureARB" alias="ClientActiveTexture" static_dispatch="false">
-        <param name="texture" type="GLenum"/>
-        <glx handcode="true"/>
-    </function>
-
-    <function name="MultiTexCoord4fARB" alias="MultiTexCoord4f" vectorequiv="MultiTexCoord4fvARB" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="s" type="GLfloat"/>
-        <param name="t" type="GLfloat"/>
-        <param name="r" type="GLfloat"/>
-        <param name="q" type="GLfloat"/>
-    </function>
-</category>
-
-</OpenGLAPI>
diff --git a/src/mapi/glapi/gen-es/es1_EXT.xml b/src/mapi/glapi/gen-es/es1_EXT.xml
deleted file mode 100644
index c1e8637..0000000
--- a/src/mapi/glapi/gen-es/es1_EXT.xml
+++ /dev/null
@@ -1,699 +0,0 @@
-<?xml version="1.0"?>
-<!DOCTYPE OpenGLAPI SYSTEM "../gen/gl_API.dtd">
-
-<!-- OpenGL ES 1.x extensions -->
-
-<OpenGLAPI>
-
-<xi:include href="es_EXT.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
-
-<!-- part of es1.1 extension pack -->
-<category name="GL_OES_blend_equation_separate" number="1">
-    <enum name="BLEND_EQUATION_RGB_OES"        count="1"  value="0x8009">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="BLEND_EQUATION_ALPHA_OES"      count="1"  value="0x883D">
-        <size name="Get" mode="get"/>
-    </enum>
-
-    <function name="BlendEquationSeparateOES" offset="assign">
-        <param name="modeRGB" type="GLenum"/>
-        <param name="modeA" type="GLenum"/>
-        <glx rop="4228"/>
-    </function>
-</category>
-
-<!-- part of es1.1 extension pack -->
-<category name="GL_OES_blend_func_separate" number="2">
-    <enum name="BLEND_DST_RGB_OES"             count="1"  value="0x80C8">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="BLEND_SRC_RGB_OES"             count="1"  value="0x80C9">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="BLEND_DST_ALPHA_OES"           count="1"  value="0x80CA">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="BLEND_SRC_ALPHA_OES"           count="1"  value="0x80CB">
-        <size name="Get" mode="get"/>
-    </enum>
-
-    <function name="BlendFuncSeparateOES" offset="assign">
-        <param name="sfactorRGB" type="GLenum"/>
-        <param name="dfactorRGB" type="GLenum"/>
-        <param name="sfactorAlpha" type="GLenum"/>
-        <param name="dfactorAlpha" type="GLenum"/>
-        <glx rop="4134"/>
-    </function>
-</category>
-
-<!-- part of es1.1 extension pack -->
-<category name="GL_OES_blend_subtract" number="3">
-    <enum name="FUNC_ADD_OES"                             value="0x8006"/>
-    <enum name="BLEND_EQUATION_OES"            count="1"  value="0x8009">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="FUNC_SUBTRACT_OES"                        value="0x800A"/>
-    <enum name="FUNC_REVERSE_SUBTRACT_OES"                value="0x800B"/>
-
-    <function name="BlendEquationOES" offset="337">
-        <param name="mode" type="GLenum"/>
-        <glx rop="4097"/>
-    </function>
-</category>
-
-<!-- core addition to es1.0 and later -->
-<category name="GL_OES_byte_coordinates" number="4">
-    <enum name="BYTE"                                     value="0x1400"/>
-</category>
-
-<!-- optional for es1.1 -->
-<category name="GL_OES_draw_texture" number="7">
-    <enum name="TEXTURE_CROP_RECT_OES"                    value="0x8B9D"/>
-
-    <function name="DrawTexiOES" offset="assign">
-        <param name="x" type="GLint"/>
-        <param name="y" type="GLint"/>
-        <param name="z" type="GLint"/>
-        <param name="width" type="GLint"/>
-        <param name="height" type="GLint"/>
-    </function>
-
-    <function name="DrawTexivOES" offset="assign">
-        <param name="coords" type="const GLint *" count="5"/>
-    </function>
-
-    <function name="DrawTexfOES" offset="assign">
-        <param name="x" type="GLfloat"/>
-        <param name="y" type="GLfloat"/>
-        <param name="z" type="GLfloat"/>
-        <param name="width" type="GLfloat"/>
-        <param name="height" type="GLfloat"/>
-    </function>
-
-    <function name="DrawTexfvOES" offset="assign">
-        <param name="coords" type="const GLfloat *" count="5"/>
-    </function>
-
-    <function name="DrawTexsOES" offset="assign">
-        <param name="x" type="GLshort"/>
-        <param name="y" type="GLshort"/>
-        <param name="z" type="GLshort"/>
-        <param name="width" type="GLshort"/>
-        <param name="height" type="GLshort"/>
-    </function>
-
-    <function name="DrawTexsvOES" offset="assign">
-        <param name="coords" type="const GLshort *" count="5"/>
-    </function>
-
-    <function name="DrawTexxOES" offset="assign">
-        <param name="x" type="GLfixed"/>
-        <param name="y" type="GLfixed"/>
-        <param name="z" type="GLfixed"/>
-        <param name="width" type="GLfixed"/>
-        <param name="height" type="GLfixed"/>
-    </function>
-
-    <function name="DrawTexxvOES" offset="assign">
-        <param name="coords" type="const GLfixed *" count="5"/>
-    </function>
-
-    <!-- TexParameter{ifx}v is skipped here -->
-</category>
-
-<!-- core addition to es1.0 and later -->
-<category name="GL_OES_fixed_point" number="9">
-    <enum name="FIXED_OES"                                value="0x140C"/>
-
-    <!-- additon to es1.0 -->
-    <function name="AlphaFuncxOES" alias="AlphaFuncx">
-        <param name="func" type="GLenum"/>
-        <param name="ref" type="GLclampx"/>
-    </function>
-
-    <function name="ClearColorxOES" alias="ClearColorx">
-        <param name="red" type="GLclampx"/>
-        <param name="green" type="GLclampx"/>
-        <param name="blue" type="GLclampx"/>
-        <param name="alpha" type="GLclampx"/>
-    </function>
-
-    <function name="ClearDepthxOES" alias="ClearDepthx">
-        <param name="depth" type="GLclampx"/>
-    </function>
-
-    <function name="Color4xOES" alias="Color4x">
-        <param name="red" type="GLfixed"/>
-        <param name="green" type="GLfixed"/>
-        <param name="blue" type="GLfixed"/>
-        <param name="alpha" type="GLfixed"/>
-    </function>
-
-    <function name="DepthRangexOES" alias="DepthRangex">
-        <param name="zNear" type="GLclampx"/>
-        <param name="zFar" type="GLclampx"/>
-    </function>
-
-    <function name="FogxOES" alias="Fogx">
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfixed"/>
-    </function>
-
-    <function name="FogxvOES" alias="Fogxv">
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfixed *" variable_param="pname"/>
-    </function>
-
-    <function name="FrustumxOES" alias="Frustumx">
-        <param name="left" type="GLfixed"/>
-        <param name="right" type="GLfixed"/>
-        <param name="bottom" type="GLfixed"/>
-        <param name="top" type="GLfixed"/>
-        <param name="zNear" type="GLfixed"/>
-        <param name="zFar" type="GLfixed"/>
-    </function>
-
-    <function name="LightModelxOES" alias="LightModelx">
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfixed"/>
-    </function>
-
-    <function name="LightModelxvOES" alias="LightModelxv">
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfixed *" variable_param="pname"/>
-    </function>
-
-    <function name="LightxOES" alias="Lightx">
-        <param name="light" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfixed"/>
-    </function>
-
-    <function name="LightxvOES" alias="Lightxv">
-        <param name="light" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfixed *" variable_param="pname"/>
-    </function>
-
-    <function name="LineWidthxOES" alias="LineWidthx">
-        <param name="width" type="GLfixed"/>
-    </function>
-
-    <function name="LoadMatrixxOES" alias="LoadMatrixx">
-        <param name="m" type="const GLfixed *" count="16"/>
-    </function>
-
-    <function name="MaterialxOES" alias="Materialx">
-        <param name="face" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfixed"/>
-    </function>
-
-    <function name="MaterialxvOES" alias="Materialxv">
-        <param name="face" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfixed *" variable_param="pname"/>
-    </function>
-
-    <function name="MultiTexCoord4xOES" alias="MultiTexCoord4x">
-        <param name="target" type="GLenum"/>
-        <param name="s" type="GLfixed"/>
-        <param name="t" type="GLfixed"/>
-        <param name="r" type="GLfixed"/>
-        <param name="q" type="GLfixed"/>
-    </function>
-
-    <function name="MultMatrixxOES" alias="MultMatrixx">
-        <param name="m" type="const GLfixed *" count="16"/>
-    </function>
-
-    <function name="Normal3xOES" alias="Normal3x">
-        <param name="nx" type="GLfixed"/>
-        <param name="ny" type="GLfixed"/>
-        <param name="nz" type="GLfixed"/>
-    </function>
-
-    <function name="OrthoxOES" alias="Orthox">
-        <param name="left" type="GLfixed"/>
-        <param name="right" type="GLfixed"/>
-        <param name="bottom" type="GLfixed"/>
-        <param name="top" type="GLfixed"/>
-        <param name="zNear" type="GLfixed"/>
-        <param name="zFar" type="GLfixed"/>
-    </function>
-
-    <function name="PointSizexOES" alias="PointSizex">
-        <param name="size" type="GLfixed"/>
-    </function>
-
-    <function name="PolygonOffsetxOES" alias="PolygonOffsetx">
-        <param name="factor" type="GLfixed"/>
-        <param name="units" type="GLfixed"/>
-    </function>
-
-    <function name="RotatexOES" alias="Rotatex">
-        <param name="angle" type="GLfixed"/>
-        <param name="x" type="GLfixed"/>
-        <param name="y" type="GLfixed"/>
-        <param name="z" type="GLfixed"/>
-    </function>
-
-    <function name="SampleCoveragexOES" alias="SampleCoveragex">
-        <param name="value" type="GLclampx"/>
-        <param name="invert" type="GLboolean"/>
-    </function>
-
-    <function name="ScalexOES" alias="Scalex">
-        <param name="x" type="GLfixed"/>
-        <param name="y" type="GLfixed"/>
-        <param name="z" type="GLfixed"/>
-    </function>
-
-    <function name="TexEnvxOES" alias="TexEnvx">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfixed"/>
-    </function>
-
-    <function name="TexEnvxvOES" alias="TexEnvxv">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfixed *" variable_param="pname"/>
-    </function>
-
-    <function name="TexParameterxOES" alias="TexParameterx">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfixed"/>
-    </function>
-
-    <function name="TranslatexOES" alias="Translatex">
-        <param name="x" type="GLfixed"/>
-        <param name="y" type="GLfixed"/>
-        <param name="z" type="GLfixed"/>
-    </function>
-
-    <!-- additon to es1.1 -->
-    <function name="ClipPlanexOES" alias="ClipPlanex">
-        <param name="plane" type="GLenum"/>
-        <param name="equation" type="const GLfixed *" count="4"/>
-    </function>
-
-    <function name="GetClipPlanexOES" alias="GetClipPlanex">
-        <param name="plane" type="GLenum"/>
-        <param name="equation" type="GLfixed *" output="true" count="4"/>
-    </function>
-
-    <function name="GetFixedvOES" alias="GetFixedv">
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfixed *" output="true" variable_param="pname"/>
-    </function>
-
-    <function name="GetLightxvOES" alias="GetLightxv">
-        <param name="light" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfixed *" output="true" variable_param="pname"/>
-    </function>
-
-    <function name="GetMaterialxvOES" alias="GetMaterialxv">
-        <param name="face" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfixed *" output="true" variable_param="pname"/>
-    </function>
-
-    <function name="GetTexEnvxvOES" alias="GetTexEnvxv">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfixed *" output="true" variable_param="pname"/>
-    </function>
-
-    <function name="GetTexParameterxvOES" alias="GetTexParameterxv">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfixed *" output="true" variable_param="pname"/>
-    </function>
-
-    <function name="PointParameterxOES" alias="PointParameterx">
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfixed"/>
-    </function>
-
-    <function name="PointParameterxvOES" alias="PointParameterxv">
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfixed *"/>
-    </function>
-
-    <function name="TexParameterxvOES" alias="TexParameterxv">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfixed *" variable_param="pname"/>
-    </function>
-</category>
-
-<!-- part of es1.1 extension pack -->
-<category name="GL_OES_framebuffer_object" number="10">
-    <enum name="NONE_OES"                                 value="0"/>
-    <enum name="INVALID_FRAMEBUFFER_OPERATION_OES"        value="0x0506"/>
-    <enum name="RGBA4_OES"                                value="0x8056"/>
-    <enum name="RGB5_A1_OES"                              value="0x8057"/>
-    <enum name="DEPTH_COMPONENT16_OES"                    value="0x81A5"/>
-
-    <enum name="MAX_RENDERBUFFER_SIZE_OES"                value="0x84E8"/>
-    <enum name="FRAMEBUFFER_BINDING_OES"                  value="0x8CA6"/>
-    <enum name="RENDERBUFFER_BINDING_OES"                 value="0x8CA7"/>
-    <enum name="FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE_OES"   value="0x8CD0"/>
-    <enum name="FRAMEBUFFER_ATTACHMENT_OBJECT_NAME_OES"   value="0x8CD1"/>
-    <enum name="FRAMEBUFFER_ATTACHMENT_TEXTURE_LEVEL_OES" value="0x8CD2"/>
-    <enum name="FRAMEBUFFER_ATTACHMENT_TEXTURE_CUBE_MAP_FACE_OES" value="0x8CD3"/>
-    <enum name="FRAMEBUFFER_ATTACHMENT_TEXTURE_3D_ZOFFSET_OES" value="0x8CD4"/>
-    <enum name="FRAMEBUFFER_COMPLETE_OES"                 value="0x8CD5"/>
-    <enum name="FRAMEBUFFER_INCOMPLETE_ATTACHMENT_OES"    value="0x8CD6"/>
-    <enum name="FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT_OES" value="0x8CD7"/>
-    <enum name="FRAMEBUFFER_INCOMPLETE_DIMENSIONS_OES"    value="0x8CD9"/>
-    <enum name="FRAMEBUFFER_INCOMPLETE_FORMATS_OES"       value="0x8CDA"/>
-    <enum name="FRAMEBUFFER_INCOMPLETE_DRAW_BUFFER_OES"   value="0x8CDB"/>
-    <enum name="FRAMEBUFFER_INCOMPLETE_READ_BUFFER_OES"   value="0x8CDC"/>
-    <enum name="FRAMEBUFFER_UNSUPPORTED_OES"              value="0x8CDD"/>
-    <enum name="COLOR_ATTACHMENT0_OES"                    value="0x8CE0"/>
-    <enum name="DEPTH_ATTACHMENT_OES"                     value="0x8D00"/>
-    <enum name="STENCIL_ATTACHMENT_OES"                   value="0x8D20"/>
-    <enum name="FRAMEBUFFER_OES"                          value="0x8D40"/>
-    <enum name="RENDERBUFFER_OES"                         value="0x8D41"/>
-    <enum name="RENDERBUFFER_WIDTH_OES"                   value="0x8D42"/>
-    <enum name="RENDERBUFFER_HEIGHT_OES"                  value="0x8D43"/>
-    <enum name="RENDERBUFFER_INTERNAL_FORMAT_OES"         value="0x8D44"/>
-    <enum name="STENCIL_INDEX1_OES"                       value="0x8D46"/>
-    <enum name="STENCIL_INDEX4_OES"                       value="0x8D47"/>
-    <enum name="STENCIL_INDEX8_OES"                       value="0x8D48"/>
-    <enum name="RENDERBUFFER_RED_SIZE_OES"                value="0x8D50"/>
-    <enum name="RENDERBUFFER_GREEN_SIZE_OES"              value="0x8D51"/>
-    <enum name="RENDERBUFFER_BLUE_SIZE_OES"               value="0x8D52"/>
-    <enum name="RENDERBUFFER_ALPHA_SIZE_OES"              value="0x8D53"/>
-    <enum name="RENDERBUFFER_DEPTH_SIZE_OES"              value="0x8D54"/>
-    <enum name="RENDERBUFFER_STENCIL_SIZE_OES"            value="0x8D55"/>
-    <enum name="RGB565_OES"                               value="0x8D62"/>
-
-    <function name="BindFramebufferOES" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="framebuffer" type="GLuint"/>
-    </function>
-
-    <function name="BindRenderbufferOES" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="renderbuffer" type="GLuint"/>
-    </function>
-
-    <function name="CheckFramebufferStatusOES" offset="assign">
-        <param name="target" type="GLenum"/>
-	<return type="GLenum"/>
-    </function>
-
-    <function name="DeleteFramebuffersOES" offset="assign">
-        <param name="n" type="GLsizei" counter="true"/>
-        <param name="framebuffers" type="const GLuint *" count="n"/>
-    </function>
-
-    <function name="DeleteRenderbuffersOES" offset="assign">
-        <param name="n" type="GLsizei" counter="true"/>
-        <param name="renderbuffers" type="const GLuint *" count="n"/>
-    </function>
-
-    <function name="FramebufferRenderbufferOES" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="attachment" type="GLenum"/>
-        <param name="renderbuffertarget" type="GLenum"/>
-        <param name="renderbuffer" type="GLuint"/>
-    </function>
-
-    <function name="FramebufferTexture2DOES" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="attachment" type="GLenum"/>
-        <param name="textarget" type="GLenum"/>
-        <param name="texture" type="GLuint"/>
-        <param name="level" type="GLint"/>
-    </function>
-
-    <function name="GenerateMipmapOES" offset="assign">
-        <param name="target" type="GLenum"/>
-    </function>
-
-    <function name="GenFramebuffersOES" offset="assign">
-        <param name="n" type="GLsizei" counter="true"/>
-        <param name="framebuffers" type="GLuint *" count="n" output="true"/>
-    </function>
-
-    <function name="GenRenderbuffersOES" offset="assign">
-        <param name="n" type="GLsizei" counter="true"/>
-        <param name="renderbuffers" type="GLuint *" count="n" output="true"/>
-    </function>
-
-    <function name="GetFramebufferAttachmentParameterivOES" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="attachment" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLint *" output="true"/>
-    </function>
-
-    <function name="GetRenderbufferParameterivOES" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLint *" output="true"/>
-    </function>
-
-    <function name="IsFramebufferOES" offset="assign">
-        <param name="framebuffer" type="GLuint"/>
-	<return type="GLboolean"/>
-    </function>
-
-    <function name="IsRenderbufferOES" offset="assign">
-        <param name="renderbuffer" type="GLuint"/>
-	<return type="GLboolean"/>
-    </function>
-
-    <function name="RenderbufferStorageOES" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="internalformat" type="GLenum"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-    </function>
-</category>
-
-<!-- core addition to es1.1 -->
-<category name="GL_OES_matrix_get" number="11">
-    <enum name="MODELVIEW_MATRIX_FLOAT_AS_INT_BITS_OES"   value="0x898D"/>
-    <enum name="PROJECTION_MATRIX_FLOAT_AS_INT_BITS_OES"  value="0x898E"/>
-    <enum name="TEXTURE_MATRIX_FLOAT_AS_INT_BITS_OES"	  value="0x898F"/>
-</category>
-
-<!-- optional for es1.1 -->
-<category name="GL_OES_matrix_palette" number="12">
-    <enum name="MAX_VERTEX_UNITS_OES"                     value="0x86A4"/>
-    <enum name="WEIGHT_ARRAY_TYPE_OES"                    value="0x86A9"/>
-    <enum name="WEIGHT_ARRAY_STRIDE_OES"                  value="0x86AA"/>
-    <enum name="WEIGHT_ARRAY_SIZE_OES"                    value="0x86AB"/>
-    <enum name="WEIGHT_ARRAY_POINTER_OES"                 value="0x86AC"/>
-    <enum name="WEIGHT_ARRAY_OES"                         value="0x86AD"/>
-    <enum name="MATRIX_PALETTE_OES"                       value="0x8840"/>
-    <enum name="MAX_PALETTE_MATRICES_OES"                 value="0x8842"/>
-    <enum name="CURRENT_PALETTE_MATRIX_OES"               value="0x8843"/>
-    <enum name="MATRIX_INDEX_ARRAY_OES"                   value="0x8844"/>
-    <enum name="MATRIX_INDEX_ARRAY_SIZE_OES"              value="0x8846"/>
-    <enum name="MATRIX_INDEX_ARRAY_TYPE_OES"              value="0x8847"/>
-    <enum name="MATRIX_INDEX_ARRAY_STRIDE_OES"            value="0x8848"/>
-    <enum name="MATRIX_INDEX_ARRAY_POINTER_OES"           value="0x8849"/>
-    <enum name="WEIGHT_ARRAY_BUFFER_BINDING_OES"          value="0x889E"/>
-    <enum name="MATRIX_INDEX_ARRAY_BUFFER_BINDING_OES"    value="0x8B9E"/>
-
-    <function name="CurrentPaletteMatrixOES">
-        <param name="matrixpaletteindex" type="GLuint"/>
-    </function>
-
-    <function name="LoadPaletteFromModelViewMatrixOES">
-    </function>
-
-    <function name="MatrixIndexPointerOES">
-        <param name="size" type="GLint"/>
-        <param name="type" type="GLenum"/>
-        <param name="stride" type="GLsizei"/>
-        <param name="pointer" type="const GLvoid *"/>
-    </function>
-
-    <function name="WeightPointerOES">
-        <param name="size" type="GLint"/>
-        <param name="type" type="GLenum"/>
-        <param name="stride" type="GLsizei"/>
-        <param name="pointer" type="const GLvoid *"/>
-    </function>
-</category>
-
-<!-- required for es1.1 -->
-<category name="GL_OES_point_size_array" number="14">
-    <enum name="POINT_SIZE_ARRAY_TYPE_OES"                value="0x898A"/>
-    <enum name="POINT_SIZE_ARRAY_STRIDE_OES"	          value="0x898B"/>
-    <enum name="POINT_SIZE_ARRAY_POINTER_OES"	          value="0x898C"/>
-    <enum name="POINT_SIZE_ARRAY_OES"                     value="0x8B9C"/>
-    <enum name="POINT_SIZE_ARRAY_BUFFER_BINDING_OES"	  value="0x8B9F"/>
-
-    <function name="PointSizePointerOES" offset="assign">
-        <param name="type" type="GLenum"/>
-        <param name="stride" type="GLsizei"/>
-        <param name="pointer" type="const GLvoid *"/>
-    </function>
-</category>
-
-<!-- required for es1.1 -->
-<category name="GL_OES_point_sprite" number="15">
-    <enum name="POINT_SPRITE_OES"                         value="0x8861"/>
-    <enum name="COORD_REPLACE_OES"	                  value="0x8862"/>
-</category>
-
-<!-- optional for es1.0 -->
-<category name="GL_OES_query_matrix" number="16">
-    <function name="QueryMatrixxOES" offset="assign">
-        <param name="mantissa" type="GLfixed *" count="16" />
-        <param name="exponent" type="GLint *" count="16" />
-	<return type="GLbitfield"/>
-    </function>
-</category>
-
-<!-- required for es1.0 and later -->
-<category name="GL_OES_read_format" number="17">
-    <enum name="IMPLEMENTATION_COLOR_READ_TYPE_OES"       value="0x8B9A"/>
-    <enum name="IMPLEMENTATION_COLOR_READ_FORMAT_OES"     value="0x8B9B"/>
-</category>
-
-<!-- core addition to es1.0 and later -->
-<category name="GL_OES_single_precision" number="18">
-    <!-- additon to es1.0 -->
-    <function name="ClearDepthfOES" alias="ClearDepthf">
-        <param name="depth" type="GLclampf"/>
-    </function>
-
-    <function name="DepthRangefOES" alias="DepthRangef">
-        <param name="zNear" type="GLclampf"/>
-        <param name="zFar" type="GLclampf"/>
-    </function>
-
-    <function name="FrustumfOES" alias="Frustumf">
-        <param name="left" type="GLfloat"/>
-        <param name="right" type="GLfloat"/>
-        <param name="bottom" type="GLfloat"/>
-        <param name="top" type="GLfloat"/>
-        <param name="zNear" type="GLfloat"/>
-        <param name="zFar" type="GLfloat"/>
-    </function>
-
-    <function name="OrthofOES" alias="Orthof">
-        <param name="left" type="GLfloat"/>
-        <param name="right" type="GLfloat"/>
-        <param name="bottom" type="GLfloat"/>
-        <param name="top" type="GLfloat"/>
-        <param name="zNear" type="GLfloat"/>
-        <param name="zFar" type="GLfloat"/>
-    </function>
-
-    <!-- additon to es1.1 -->
-    <function name="ClipPlanefOES" alias="ClipPlanef">
-        <param name="plane" type="GLenum"/>
-        <param name="equation" type="const GLfloat *" count="4"/>
-    </function>
-
-    <function name="GetClipPlanefOES" alias="GetClipPlanef">
-        <param name="plane" type="GLenum"/>
-        <param name="equation" type="GLfloat *" output="true" count="4"/>
-    </function>
-</category>
-
-<!-- part of es1.1 extension pack -->
-<category name="GL_OES_texture_cube_map" number="20">
-    <enum name="TEXTURE_GEN_MODE_OES"                  value="0x2500"/>
-    <enum name="NORMAL_MAP_OES"                        value="0x8511"/>
-    <enum name="REFLECTION_MAP_OES"                    value="0x8512"/>
-    <enum name="TEXTURE_CUBE_MAP_OES"                  value="0x8513"/>
-    <enum name="TEXTURE_BINDING_CUBE_MAP_OES"          value="0x8514"/>
-    <enum name="TEXTURE_CUBE_MAP_POSITIVE_X_OES"       value="0x8515"/>
-    <enum name="TEXTURE_CUBE_MAP_NEGATIVE_X_OES"       value="0x8516"/>
-    <enum name="TEXTURE_CUBE_MAP_POSITIVE_Y_OES"       value="0x8517"/>
-    <enum name="TEXTURE_CUBE_MAP_NEGATIVE_Y_OES"       value="0x8518"/>
-    <enum name="TEXTURE_CUBE_MAP_POSITIVE_Z_OES"       value="0x8519"/>
-    <enum name="TEXTURE_CUBE_MAP_NEGATIVE_Z_OES"       value="0x851A"/>
-    <enum name="MAX_CUBE_MAP_TEXTURE_SIZE_OES"         value="0x851C"/>
-    <enum name="TEXTURE_GEN_STR_OES"                   value="0x8D60"/>
-
-    <function name="GetTexGenfvOES" offset="279">
-        <param name="coord" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfloat *" output="true" variable_param="pname"/>
-        <glx sop="133"/>
-    </function>
-
-    <function name="GetTexGenivOES" offset="280">
-        <param name="coord" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLint *" output="true" variable_param="pname"/>
-        <glx sop="134"/>
-    </function>
-
-    <function name="GetTexGenxvOES" offset="assign">
-        <param name="coord" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfixed *" output="true" variable_param="pname"/>
-    </function>
-
-    <function name="TexGenfOES" offset="190">
-        <param name="coord" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfloat"/>
-        <glx rop="117"/>
-    </function>
-
-    <function name="TexGenfvOES" offset="191">
-        <param name="coord" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfloat *" variable_param="pname"/>
-        <glx rop="118"/>
-    </function>
-
-    <function name="TexGeniOES" offset="192">
-        <param name="coord" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLint"/>
-        <glx rop="119"/>
-    </function>
-
-    <function name="TexGenivOES" offset="193">
-        <param name="coord" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLint *" variable_param="pname"/>
-        <glx rop="120"/>
-    </function>
-
-    <function name="TexGenxOES" offset="assign">
-        <param name="coord" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLint"/>
-    </function>
-
-    <function name="TexGenxvOES" offset="assign">
-        <param name="coord" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfixed *" variable_param="pname"/>
-    </function>
-</category>
-
-<category name="GL_OES_texture_env_crossbar" number="21">
-    <!-- No new functions, types, enums. -->
-</category>
-
-<category name="GL_OES_texture_mirrored_repeat" number="22">
-    <!-- No new functions, types, enums. -->
-</category>
-
-<category name="GL_EXT_texture_lod_bias" number="60">
-    <enum name="TEXTURE_FILTER_CONTROL_EXT"               value="0x8500"/>
-    <enum name="TEXTURE_LOD_BIAS_EXT"                     value="0x8501"/>
-    <enum name="MAX_TEXTURE_LOD_BIAS_EXT"                 value="0x84FD"/>
-</category>
-
-</OpenGLAPI>
diff --git a/src/mapi/glapi/gen-es/es2_API.xml b/src/mapi/glapi/gen-es/es2_API.xml
deleted file mode 100644
index f8af63b..0000000
--- a/src/mapi/glapi/gen-es/es2_API.xml
+++ /dev/null
@@ -1,294 +0,0 @@
-<?xml version="1.0"?>
-<!DOCTYPE OpenGLAPI SYSTEM "../gen/gl_API.dtd">
-
-<!-- OpenGL ES 2.x API -->
-
-<OpenGLAPI>
-
-<xi:include href="base2_API.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
-
-<!-- core subset of OpenGL 2.0 defined in OpenGL ES 2.0 -->
-<category name="core2.0">
-    <!-- addition to base1.0 -->
-    <enum name="NONE"                                     value="0x0"/>
-    <enum name="INT"                           count="4"  value="0x1404">
-        <size name="CallLists"/>
-    </enum>
-    <enum name="UNSIGNED_INT"                  count="4"  value="0x1405">
-        <size name="CallLists"/>
-    </enum>
-    <enum name="STENCIL_INDEX"                            value="0x1901"/>
-    <enum name="DEPTH_COMPONENT"                          value="0x1902"/>
-
-    <function name="TexImage2D" offset="183">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="internalformat" type="GLint"/> <!-- XXX the actual type is GLenum... -->
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <param name="border" type="GLint"/>
-        <param name="format" type="GLenum"/>
-        <param name="type" type="GLenum"/>
-        <param name="pixels" type="const GLvoid *" img_width="width" img_height="height" img_format="format" img_type="type" img_target="target" img_send_null="true" img_pad_dimensions="true"/>
-        <glx rop="110" large="true"/>
-    </function>
-
-    <!-- addition to base1.1 -->
-    <enum name="RGBA4"                                    value="0x8056"/>
-    <enum name="RGB5_A1"                                  value="0x8057"/>
-
-    <!-- addition to base1.2 -->
-    <enum name="CONSTANT_COLOR"                           value="0x8001"/>
-    <enum name="ONE_MINUS_CONSTANT_COLOR"                 value="0x8002"/>
-    <enum name="CONSTANT_ALPHA"                           value="0x8003"/>
-    <enum name="ONE_MINUS_CONSTANT_ALPHA"                 value="0x8004"/>
-    <enum name="BLEND_COLOR"                   count="4"  value="0x8005">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="FUNC_ADD"                                 value="0x8006"/>
-    <enum name="BLEND_EQUATION"                count="1"  value="0x8009">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="FUNC_SUBTRACT"                            value="0x800A"/>
-    <enum name="FUNC_REVERSE_SUBTRACT"                    value="0x800B"/>
-
-    <function name="BlendColor" offset="336">
-        <param name="red" type="GLclampf"/>
-        <param name="green" type="GLclampf"/>
-        <param name="blue" type="GLclampf"/>
-        <param name="alpha" type="GLclampf"/>
-        <glx rop="4096"/>
-    </function>
-
-    <function name="BlendEquation" offset="337">
-        <param name="mode" type="GLenum"/>
-        <glx rop="4097"/>
-    </function>
-
-    <!-- addition to base1.3 -->
-    <enum name="TEXTURE_CUBE_MAP"              count="1"  value="0x8513">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="TEXTURE_BINDING_CUBE_MAP"      count="1"  value="0x8514">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="TEXTURE_CUBE_MAP_POSITIVE_X"              value="0x8515"/>
-    <enum name="TEXTURE_CUBE_MAP_NEGATIVE_X"              value="0x8516"/>
-    <enum name="TEXTURE_CUBE_MAP_POSITIVE_Y"              value="0x8517"/>
-    <enum name="TEXTURE_CUBE_MAP_NEGATIVE_Y"              value="0x8518"/>
-    <enum name="TEXTURE_CUBE_MAP_POSITIVE_Z"              value="0x8519"/>
-    <enum name="TEXTURE_CUBE_MAP_NEGATIVE_Z"              value="0x851A"/>
-    <enum name="MAX_CUBE_MAP_TEXTURE_SIZE"     count="1"  value="0x851C">
-        <size name="Get" mode="get"/>
-    </enum>
-
-    <!-- addition to base1.4 -->
-    <enum name="BLEND_DST_RGB"                 count="1"  value="0x80C8">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="BLEND_SRC_RGB"                 count="1"  value="0x80C9">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="BLEND_DST_ALPHA"               count="1"  value="0x80CA">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="BLEND_SRC_ALPHA"               count="1"  value="0x80CB">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="DEPTH_COMPONENT16"                        value="0x81A5"/>
-    <enum name="MIRRORED_REPEAT"                          value="0x8370"/>
-    <enum name="INCR_WRAP"                                value="0x8507"/>
-    <enum name="DECR_WRAP"                                value="0x8508"/>
-
-    <function name="BlendFuncSeparate" offset="assign">
-        <param name="sfactorRGB" type="GLenum"/>
-        <param name="dfactorRGB" type="GLenum"/>
-        <param name="sfactorAlpha" type="GLenum"/>
-        <param name="dfactorAlpha" type="GLenum"/>
-        <glx rop="4134"/>
-    </function>
-
-    <!-- addition to base1.5 -->
-    <enum name="VERTEX_ATTRIB_ARRAY_BUFFER_BINDING" count="1" value="0x889F">
-        <size name="GetVertexAttribdv" mode="get"/>
-        <size name="GetVertexAttribfv" mode="get"/>
-        <size name="GetVertexAttribiv" mode="get"/>
-    </enum>
-    <enum name="STREAM_DRAW"                              value="0x88E0"/>
-
-    <!-- addition to base2.0 -->
-    <!-- base2.0 should have everything defined -->
-</category>
-
-<!-- OpenGL ES 2.0 -->
-<category name="es2.0">
-    <!-- addition to core2.0 -->
-    <enum name="LOW_FLOAT"                                    value="0x8DF0"/>
-    <enum name="MEDIUM_FLOAT"                                 value="0x8DF1"/>
-    <enum name="HIGH_FLOAT"                                   value="0x8DF2"/>
-    <enum name="LOW_INT"                                      value="0x8DF3"/>
-    <enum name="MEDIUM_INT"                                   value="0x8DF4"/>
-    <enum name="HIGH_INT"                                     value="0x8DF5"/>
-    <enum name="SHADER_BINARY_FORMATS"                        value="0x8DF8"/>
-    <enum name="NUM_SHADER_BINARY_FORMATS"                    value="0x8DF9"/>
-    <enum name="SHADER_COMPILER"                              value="0x8DFA"/>
-    <enum name="MAX_VERTEX_UNIFORM_VECTORS"                   value="0x8DFB"/>
-    <enum name="MAX_VARYING_VECTORS"                          value="0x8DFC"/>
-    <enum name="MAX_FRAGMENT_UNIFORM_VECTORS"                 value="0x8DFD"/>
-
-    <function name="GetShaderPrecisionFormat" offset="assign">
-        <param name="shadertype" type="GLenum"/>
-        <param name="precisiontype" type="GLenum"/>
-        <param name="range" type="GLint *"/>
-        <param name="precision" type="GLint *"/>
-    </function>
-
-    <function name="ReleaseShaderCompiler" offset="assign">
-    </function>
-
-    <function name="ShaderBinary" offset="assign">
-        <param name="n" type="GLsizei"/>
-        <param name="shaders" type="const GLuint *"/>
-        <param name="binaryformat" type="GLenum"/>
-        <param name="binary" type="const GLvoid *"/>
-        <param name="length" type="GLsizei"/>
-    </function>
-
-    <!-- from GL_OES_fixed_point -->
-    <enum name="FIXED"                                    value="0x140C"/>
-    <type name="fixed"   size="4"                                    />
-
-    <!-- from GL_OES_framebuffer_object -->
-    <enum name="INVALID_FRAMEBUFFER_OPERATION"                value="0x0506"/>
-    <enum name="MAX_RENDERBUFFER_SIZE"                        value="0x84E8"/>
-    <enum name="FRAMEBUFFER_BINDING"                          value="0x8CA6"/>
-    <enum name="RENDERBUFFER_BINDING"                         value="0x8CA7"/>
-    <enum name="FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE"           value="0x8CD0"/>
-    <enum name="FRAMEBUFFER_ATTACHMENT_OBJECT_NAME"           value="0x8CD1"/>
-    <enum name="FRAMEBUFFER_ATTACHMENT_TEXTURE_LEVEL"         value="0x8CD2"/>
-    <enum name="FRAMEBUFFER_ATTACHMENT_TEXTURE_CUBE_MAP_FACE" value="0x8CD3"/>
-    <enum name="FRAMEBUFFER_COMPLETE"                         value="0x8CD5"/>
-    <enum name="FRAMEBUFFER_INCOMPLETE_ATTACHMENT"            value="0x8CD6"/>
-    <enum name="FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT"    value="0x8CD7"/>
-    <enum name="FRAMEBUFFER_INCOMPLETE_DIMENSIONS"            value="0x8CD9"/>
-    <enum name="FRAMEBUFFER_UNSUPPORTED"                      value="0x8CDD"/>
-    <enum name="COLOR_ATTACHMENT0"                            value="0x8CE0"/>
-    <enum name="DEPTH_ATTACHMENT"                             value="0x8D00"/>
-    <enum name="STENCIL_ATTACHMENT"                           value="0x8D20"/>
-    <enum name="FRAMEBUFFER"                                  value="0x8D40"/>
-    <enum name="RENDERBUFFER"                                 value="0x8D41"/>
-    <enum name="RENDERBUFFER_WIDTH"                           value="0x8D42"/>
-    <enum name="RENDERBUFFER_HEIGHT"                          value="0x8D43"/>
-    <enum name="RENDERBUFFER_INTERNAL_FORMAT"                 value="0x8D44"/>
-    <enum name="STENCIL_INDEX8"                               value="0x8D48"/>
-    <enum name="RENDERBUFFER_RED_SIZE"                        value="0x8D50"/>
-    <enum name="RENDERBUFFER_GREEN_SIZE"                      value="0x8D51"/>
-    <enum name="RENDERBUFFER_BLUE_SIZE"                       value="0x8D52"/>
-    <enum name="RENDERBUFFER_ALPHA_SIZE"                      value="0x8D53"/>
-    <enum name="RENDERBUFFER_DEPTH_SIZE"                      value="0x8D54"/>
-    <enum name="RENDERBUFFER_STENCIL_SIZE"                    value="0x8D55"/>
-    <enum name="RGB565"                                       value="0x8D62"/>
-
-    <function name="BindFramebuffer" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="framebuffer" type="GLuint"/>
-    </function>
-
-    <function name="BindRenderbuffer" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="renderbuffer" type="GLuint"/>
-    </function>
-
-    <function name="CheckFramebufferStatus" offset="assign">
-        <param name="target" type="GLenum"/>
-	<return type="GLenum"/>
-    </function>
-
-    <function name="DeleteFramebuffers" offset="assign">
-        <param name="n" type="GLsizei" counter="true"/>
-        <param name="framebuffers" type="const GLuint *" count="n"/>
-    </function>
-
-    <function name="DeleteRenderbuffers" offset="assign">
-        <param name="n" type="GLsizei" counter="true"/>
-        <param name="renderbuffers" type="const GLuint *" count="n"/>
-    </function>
-
-    <function name="FramebufferRenderbuffer" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="attachment" type="GLenum"/>
-        <param name="renderbuffertarget" type="GLenum"/>
-        <param name="renderbuffer" type="GLuint"/>
-    </function>
-
-    <function name="FramebufferTexture2D" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="attachment" type="GLenum"/>
-        <param name="textarget" type="GLenum"/>
-        <param name="texture" type="GLuint"/>
-        <param name="level" type="GLint"/>
-    </function>
-
-    <function name="GenerateMipmap" offset="assign">
-        <param name="target" type="GLenum"/>
-    </function>
-
-    <function name="GenFramebuffers" offset="assign">
-        <param name="n" type="GLsizei" counter="true"/>
-        <param name="framebuffers" type="GLuint *" count="n" output="true"/>
-    </function>
-
-    <function name="GenRenderbuffers" offset="assign">
-        <param name="n" type="GLsizei" counter="true"/>
-        <param name="renderbuffers" type="GLuint *" count="n" output="true"/>
-    </function>
-
-    <function name="GetFramebufferAttachmentParameteriv" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="attachment" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLint *" output="true"/>
-    </function>
-
-    <function name="GetRenderbufferParameteriv" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLint *" output="true"/>
-    </function>
-
-    <function name="IsFramebuffer" offset="assign">
-        <param name="framebuffer" type="GLuint"/>
-	<return type="GLboolean"/>
-    </function>
-
-    <function name="IsRenderbuffer" offset="assign">
-        <param name="renderbuffer" type="GLuint"/>
-	<return type="GLboolean"/>
-    </function>
-
-    <function name="RenderbufferStorage" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="internalformat" type="GLenum"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-    </function>
-
-    <!-- from GL_OES_read_format -->
-    <enum name="IMPLEMENTATION_COLOR_READ_TYPE"           value="0x8B9A"/>
-    <enum name="IMPLEMENTATION_COLOR_READ_FORMAT"         value="0x8B9B"/>
-
-    <!-- from GL_OES_single_precision -->
-    <function name="ClearDepthf" offset="assign">
-        <param name="depth" type="GLclampf"/>
-    </function>
-
-    <function name="DepthRangef" offset="assign">
-        <param name="zNear" type="GLclampf"/>
-        <param name="zFar" type="GLclampf"/>
-    </function>
-</category>
-
-<xi:include href="es2_EXT.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
-<xi:include href="es2_COMPAT.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
-
-</OpenGLAPI>
diff --git a/src/mapi/glapi/gen-es/es2_COMPAT.xml b/src/mapi/glapi/gen-es/es2_COMPAT.xml
deleted file mode 100644
index 1bd3569..0000000
--- a/src/mapi/glapi/gen-es/es2_COMPAT.xml
+++ /dev/null
@@ -1,368 +0,0 @@
-<?xml version="1.0"?>
-<!DOCTYPE OpenGLAPI SYSTEM "../gen/gl_API.dtd">
-
-<OpenGLAPI>
-
-<!-- This file defines the functions that are needed by Mesa.  It
-     makes sure the generated glapi headers are compatible with Mesa.
-     It mainly consists of missing functions and aliases in OpenGL ES.
--->
-
-<xi:include href="es_COMPAT.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
-
-<!-- except for those defined by es_COMPAT.xml, these are also needed -->
-<category name="compat">
-    <!-- OpenGL 1.0 -->
-    <function name="Color4f" offset="29" vectorequiv="Color4fv" static_dispatch="false">
-        <param name="red" type="GLfloat"/>
-        <param name="green" type="GLfloat"/>
-        <param name="blue" type="GLfloat"/>
-        <param name="alpha" type="GLfloat"/>
-    </function>
-
-    <function name="Color4ub" offset="35" vectorequiv="Color4ubv" static_dispatch="false">
-        <param name="red" type="GLubyte"/>
-        <param name="green" type="GLubyte"/>
-        <param name="blue" type="GLubyte"/>
-        <param name="alpha" type="GLubyte"/>
-    </function>
-
-    <function name="Normal3f" offset="56" vectorequiv="Normal3fv" static_dispatch="false">
-        <param name="nx" type="GLfloat"/>
-        <param name="ny" type="GLfloat"/>
-        <param name="nz" type="GLfloat"/>
-    </function>
-
-    <function name="Fogf" offset="153" static_dispatch="false">
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfloat"/>
-        <glx rop="80"/>
-    </function>
-
-    <function name="Fogfv" offset="154" static_dispatch="false">
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfloat *" variable_param="pname"/>
-        <glx rop="81"/>
-    </function>
-
-    <function name="Lightf" offset="159" static_dispatch="false">
-        <param name="light" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfloat"/>
-        <glx rop="86"/>
-    </function>
-
-    <function name="Lightfv" offset="160" static_dispatch="false">
-        <param name="light" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfloat *" variable_param="pname"/>
-        <glx rop="87"/>
-    </function>
-
-    <function name="LightModelf" offset="163" static_dispatch="false">
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfloat"/>
-        <glx rop="90"/>
-    </function>
-
-    <function name="LightModelfv" offset="164" static_dispatch="false">
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfloat *" variable_param="pname"/>
-        <glx rop="91"/>
-    </function>
-
-    <function name="Materialf" offset="169" static_dispatch="false">
-        <param name="face" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfloat"/>
-        <glx rop="96"/>
-    </function>
-
-    <function name="Materialfv" offset="170" static_dispatch="false">
-        <param name="face" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfloat *" variable_param="pname"/>
-        <glx rop="97"/>
-    </function>
-
-    <function name="PointSize" offset="173" static_dispatch="false">
-        <param name="size" type="GLfloat"/>
-        <glx rop="100"/>
-    </function>
-
-    <function name="ShadeModel" offset="177" static_dispatch="false">
-        <param name="mode" type="GLenum"/>
-        <glx rop="104"/>
-    </function>
-
-    <function name="TexEnvf" offset="184" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfloat"/>
-        <glx rop="111"/>
-    </function>
-
-    <function name="TexEnvfv" offset="185" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfloat *" variable_param="pname"/>
-        <glx rop="112"/>
-    </function>
-
-    <function name="TexEnvi" offset="186" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLint"/>
-        <glx rop="113"/>
-    </function>
-
-    <function name="TexEnviv" offset="187" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLint *" variable_param="pname"/>
-        <glx rop="114"/>
-    </function>
-
-    <function name="TexGenf" offset="190" static_dispatch="false">
-        <param name="coord" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfloat"/>
-        <glx rop="117"/>
-    </function>
-
-    <function name="TexGenfv" offset="191" static_dispatch="false">
-        <param name="coord" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfloat *" variable_param="pname"/>
-        <glx rop="118"/>
-    </function>
-
-    <function name="TexGeni" offset="192" static_dispatch="false">
-        <param name="coord" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLint"/>
-        <glx rop="119"/>
-    </function>
-
-    <function name="TexGeniv" offset="193" static_dispatch="false">
-        <param name="coord" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLint *" variable_param="pname"/>
-        <glx rop="120"/>
-    </function>
-
-    <function name="AlphaFunc" offset="240" static_dispatch="false">
-        <param name="func" type="GLenum"/>
-        <param name="ref" type="GLclampf"/>
-        <glx rop="159"/>
-    </function>
-
-    <function name="LogicOp" offset="242" static_dispatch="false">
-        <param name="opcode" type="GLenum"/>
-        <glx rop="161"/>
-    </function>
-
-    <function name="GetLightfv" offset="264" static_dispatch="false">
-        <param name="light" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfloat *" output="true" variable_param="pname"/>
-        <glx sop="118"/>
-    </function>
-
-    <function name="GetMaterialfv" offset="269" static_dispatch="false">
-        <param name="face" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfloat *" output="true" variable_param="pname"/>
-        <glx sop="123"/>
-    </function>
-
-    <function name="GetTexEnvfv" offset="276" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfloat *" output="true" variable_param="pname"/>
-        <glx sop="130"/>
-    </function>
-
-    <function name="GetTexEnviv" offset="277" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLint *" output="true" variable_param="pname"/>
-        <glx sop="131"/>
-    </function>
-
-    <function name="GetTexGenfv" offset="279" static_dispatch="false">
-        <param name="coord" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfloat *" output="true" variable_param="pname"/>
-        <glx sop="133"/>
-    </function>
-
-    <function name="GetTexGeniv" offset="280" static_dispatch="false">
-        <param name="coord" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLint *" output="true" variable_param="pname"/>
-        <glx sop="134"/>
-    </function>
-
-    <function name="LoadIdentity" offset="290" static_dispatch="false">
-        <glx rop="176"/>
-    </function>
-
-    <function name="LoadMatrixf" offset="291" static_dispatch="false">
-        <param name="m" type="const GLfloat *" count="16"/>
-        <glx rop="177"/>
-    </function>
-
-    <function name="MatrixMode" offset="293" static_dispatch="false">
-        <param name="mode" type="GLenum"/>
-        <glx rop="179"/>
-    </function>
-
-    <function name="MultMatrixf" offset="294" static_dispatch="false">
-        <param name="m" type="const GLfloat *" count="16"/>
-        <glx rop="180"/>
-    </function>
-
-    <function name="PopMatrix" offset="297" static_dispatch="false">
-        <glx rop="183"/>
-    </function>
-
-    <function name="PushMatrix" offset="298" static_dispatch="false">
-        <glx rop="184"/>
-    </function>
-
-    <function name="Rotatef" offset="300" static_dispatch="false">
-        <param name="angle" type="GLfloat"/>
-        <param name="x" type="GLfloat"/>
-        <param name="y" type="GLfloat"/>
-        <param name="z" type="GLfloat"/>
-        <glx rop="186"/>
-    </function>
-
-    <function name="Scalef" offset="302" static_dispatch="false">
-        <param name="x" type="GLfloat"/>
-        <param name="y" type="GLfloat"/>
-        <param name="z" type="GLfloat"/>
-        <glx rop="188"/>
-    </function>
-
-    <function name="Translatef" offset="304" static_dispatch="false">
-        <param name="x" type="GLfloat"/>
-        <param name="y" type="GLfloat"/>
-        <param name="z" type="GLfloat"/>
-        <glx rop="190"/>
-    </function>
-
-    <!-- OpenGL 1.1 -->
-    <function name="ColorPointer" offset="308" static_dispatch="false">
-        <param name="size" type="GLint"/>
-        <param name="type" type="GLenum"/>
-        <param name="stride" type="GLsizei"/>
-        <param name="pointer" type="const GLvoid *"/>
-        <glx handcode="true"/>
-    </function>
-
-    <function name="DisableClientState" offset="309" static_dispatch="false">
-        <param name="array" type="GLenum"/>
-        <glx handcode="true"/>
-    </function>
-
-    <function name="EnableClientState" offset="313" static_dispatch="false">
-        <param name="array" type="GLenum"/>
-        <glx handcode="true"/>
-    </function>
-
-    <function name="NormalPointer" offset="318" static_dispatch="false">
-        <param name="type" type="GLenum"/>
-        <param name="stride" type="GLsizei"/>
-        <param name="pointer" type="const GLvoid *"/>
-        <glx handcode="true"/>
-    </function>
-
-    <function name="TexCoordPointer" offset="320" static_dispatch="false">
-        <param name="size" type="GLint"/>
-        <param name="type" type="GLenum"/>
-        <param name="stride" type="GLsizei"/>
-        <param name="pointer" type="const GLvoid *"/>
-        <glx handcode="true"/>
-    </function>
-
-    <function name="VertexPointer" offset="321" static_dispatch="false">
-        <param name="size" type="GLint"/>
-        <param name="type" type="GLenum"/>
-        <param name="stride" type="GLsizei"/>
-        <param name="pointer" type="const GLvoid *"/>
-        <glx handcode="true"/>
-    </function>
-
-    <function name="GetPointerv" offset="329" static_dispatch="false">
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLvoid **" output="true"/>
-        <glx handcode="true"/>
-    </function>
-
-    <!-- OpenGL 1.2 -->
-    <function name="TexImage3D" alias="TexImage3DOES" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="internalformat" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <param name="depth" type="GLsizei"/>
-        <param name="border" type="GLint"/>
-        <param name="format" type="GLenum"/>
-        <param name="type" type="GLenum"/>
-        <param name="pixels" type="const GLvoid *" img_width="width" img_height="height" img_depth="depth" img_format="format" img_type="type" img_target="target" img_null_flag="true" img_pad_dimensions="true"/>
-        <glx rop="4114" large="true"/>
-    </function>
-
-    <function name="TexSubImage3D" alias="TexSubImage3DOES" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="xoffset" type="GLint"/>
-        <param name="yoffset" type="GLint"/>
-        <param name="zoffset" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <param name="depth" type="GLsizei"/>
-        <param name="format" type="GLenum"/>
-        <param name="type" type="GLenum"/>
-        <param name="UNUSED" type="GLuint" padding="true"/>
-        <param name="pixels" type="const GLvoid *" img_width="width" img_height="height" img_depth="depth" img_xoff="xoffset" img_yoff="yoffset" img_zoff="zoffset" img_format="format" img_type="type" img_target="target" img_pad_dimensions="true"/>
-        <glx rop="4115" large="true"/>
-    </function>
-
-    <function name="CopyTexSubImage3D" alias="CopyTexSubImage3DOES" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="xoffset" type="GLint"/>
-        <param name="yoffset" type="GLint"/>
-        <param name="zoffset" type="GLint"/>
-        <param name="x" type="GLint"/>
-        <param name="y" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <glx rop="4123"/>
-    </function>
-
-    <!-- GL_ARB_multitexture -->
-    <function name="ActiveTextureARB" alias="ActiveTexture" static_dispatch="false">
-        <param name="texture" type="GLenum"/>
-        <glx rop="197"/>
-    </function>
-
-    <function name="ClientActiveTextureARB" offset="375" static_dispatch="false">
-        <param name="texture" type="GLenum"/>
-        <glx handcode="true"/>
-    </function>
-
-    <function name="MultiTexCoord4fARB" offset="402" vectorequiv="MultiTexCoord4fvARB" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="s" type="GLfloat"/>
-        <param name="t" type="GLfloat"/>
-        <param name="r" type="GLfloat"/>
-        <param name="q" type="GLfloat"/>
-    </function>
-</category>
-
-</OpenGLAPI>
diff --git a/src/mapi/glapi/gen-es/es2_EXT.xml b/src/mapi/glapi/gen-es/es2_EXT.xml
deleted file mode 100644
index 4a67952..0000000
--- a/src/mapi/glapi/gen-es/es2_EXT.xml
+++ /dev/null
@@ -1,162 +0,0 @@
-<?xml version="1.0"?>
-<!DOCTYPE OpenGLAPI SYSTEM "../gen/gl_API.dtd">
-
-<!-- OpenGL ES 2.x extensions -->
-
-<OpenGLAPI>
-
-<xi:include href="es_EXT.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
-
-<category name="GL_OES_texture_3D" number="34">
-    <enum name="TEXTURE_BINDING_3D_OES"                   value="0x806A"/>
-    <enum name="TEXTURE_3D_OES"                           value="0x806F"/>
-    <enum name="TEXTURE_WRAP_R_OES"                       value="0x8072"/>
-    <enum name="MAX_3D_TEXTURE_SIZE_OES"                  value="0x8073"/>
-    <enum name="SAMPLER_3D_OES"                           value="0x8B5F"/>
-    <enum name="FRAMEBUFFER_ATTACHMENT_TEXTURE_3D_ZOFFSET_OES" value="0x8CD4"/>
-
-    <function name="CompressedTexImage3DOES" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="internalformat" type="GLenum"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <param name="depth" type="GLsizei"/>
-        <param name="border" type="GLint"/>
-        <param name="imageSize" type="GLsizei" counter="true"/>
-        <param name="data" type="const GLvoid *" count="imageSize"/>
-        <glx rop="216" handcode="client"/>
-    </function>
-
-    <function name="CompressedTexSubImage3DOES" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="xoffset" type="GLint"/>
-        <param name="yoffset" type="GLint"/>
-        <param name="zoffset" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <param name="depth" type="GLsizei"/>
-        <param name="format" type="GLenum"/>
-        <param name="imageSize" type="GLsizei" counter="true"/>
-        <param name="data" type="const GLvoid *" count="imageSize"/>
-        <glx rop="219" handcode="client"/>
-    </function>
-
-    <function name="CopyTexSubImage3DOES" offset="373">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="xoffset" type="GLint"/>
-        <param name="yoffset" type="GLint"/>
-        <param name="zoffset" type="GLint"/>
-        <param name="x" type="GLint"/>
-        <param name="y" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <glx rop="4123"/>
-    </function>
-
-    <function name="FramebufferTexture3DOES" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="attachment" type="GLenum"/>
-        <param name="textarget" type="GLenum"/>
-        <param name="texture" type="GLuint"/>
-        <param name="level" type="GLint"/>
-        <param name="zoffset" type="GLint"/>
-        <glx rop="4323"/>
-    </function>
-
-    <function name="TexImage3DOES" offset="371">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="internalformat" type="GLenum"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <param name="depth" type="GLsizei"/>
-        <param name="border" type="GLint"/>
-        <param name="format" type="GLenum"/>
-        <param name="type" type="GLenum"/>
-        <param name="pixels" type="const GLvoid *" img_width="width" img_height="height" img_depth="depth" img_format="format" img_type="type" img_target="target" img_null_flag="true" img_pad_dimensions="true"/>
-        <glx rop="4114" large="true"/>
-    </function>
-
-    <function name="TexSubImage3DOES" offset="372">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="xoffset" type="GLint"/>
-        <param name="yoffset" type="GLint"/>
-        <param name="zoffset" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <param name="depth" type="GLsizei"/>
-        <param name="format" type="GLenum"/>
-        <param name="type" type="GLenum"/>
-        <param name="UNUSED" type="GLuint" padding="true"/>
-        <param name="pixels" type="const GLvoid *" img_width="width" img_height="height" img_depth="depth" img_xoff="xoffset" img_yoff="yoffset" img_zoff="zoffset" img_format="format" img_type="type" img_target="target" img_pad_dimensions="true"/>
-        <glx rop="4115" large="true"/>
-    </function>
-</category>
-
-<!-- the other name is OES_texture_float_linear -->
-<category name="OES_texture_half_float_linear" number="35">
-    <!-- No new functions, types, enums. -->
-</category>
-
-<!-- the other name is OES_texture_float -->
-<category name="OES_texture_half_float" number="36">
-    <enum name="HALF_FLOAT_OES"                           value="0x8D61"/>
-</category>
-
-<category name="GL_OES_texture_npot" number="37">
-    <!-- No new functions, types, enums. -->
-</category>
-
-<category name="GL_OES_vertex_half_float" number="38">
-    <enum name="HALF_FLOAT_OES"                           value="0x8D61"/>
-</category>
-
-<category name="GL_EXT_texture_type_2_10_10_10_REV" number="42">
-    <enum name="UNSIGNED_INT_2_10_10_10_REV_EXT"          value="0x8368"/>
-</category>
-
-<category name="GL_OES_packed_depth_stencil" number="43">
-    <enum name="DEPTH_STENCIL_OES"                        value="0x84F9"/>
-    <enum name="UNSIGNED_INT_24_8_OES"                    value="0x84FA"/>
-    <enum name="DEPTH24_STENCIL8_OES"                     value="0x88F0"/>
-</category>
-
-<category name="GL_OES_depth_texture" number="44">
-    <!-- No new functions, types, enums. -->
-</category>
-
-<category name="GL_OES_standard_derivatives" number="45">
-    <enum name="FRAGMENT_SHADER_DERIVATIVE_HINT_OES"      value="0x8B8B"/>
-</category>
-
-<category name="GL_OES_vertex_type_10_10_10_2" number="46">
-    <enum name="UNSIGNED_INT_10_10_10_2_OES"              value="0x8DF6"/>
-    <enum name="INT_10_10_10_2_OES"                       value="0x8DF7"/>
-</category>
-
-<category name="GL_OES_get_program_binary" number="47">
-    <enum name="PROGRAM_BINARY_LENGTH_OES"                value="0x8741"/>
-    <enum name="NUM_PROGRAM_BINARY_FORMATS_OES"           value="0x87FE"/>
-    <enum name="PROGRAM_BINARY_FORMATS_OES"               value="0x87FF"/>
-
-    <function name="GetProgramBinaryOES" offset="assign">
-        <param name="program" type="GLuint"/>
-        <param name="bufSize" type="GLsizei"/>
-        <param name="length" type="GLsizei *"/>
-        <param name="binaryFormat" type="GLenum *"/>
-        <param name="binary" type="GLvoid *"/>
-    </function>
-
-    <function name="ProgramBinaryOES" offset="assign">
-        <param name="program" type="GLuint"/>
-        <param name="binaryFormat" type="GLenum"/>
-        <param name="binary" type="const GLvoid *"/>
-        <param name="length" type="GLint"/>
-    </function>
-</category>
-
-</OpenGLAPI>
diff --git a/src/mapi/glapi/gen-es/es_COMPAT.xml b/src/mapi/glapi/gen-es/es_COMPAT.xml
deleted file mode 100644
index 7c72926..0000000
--- a/src/mapi/glapi/gen-es/es_COMPAT.xml
+++ /dev/null
@@ -1,2646 +0,0 @@
-<?xml version="1.0"?>
-<!DOCTYPE OpenGLAPI SYSTEM "../gen/gl_API.dtd">
-
-<OpenGLAPI>
-
-<!-- This file defines the following categories
-
-         a subset of 1.0
-         a subset of 1.1
-         a subset of 1.2
-         a subset of GL_ARB_multitexture
-         GL_APPLE_vertex_array_object
-
-     to make sure the generated glapi headers are compatible with Mesa.
-     It is included by es1_COMPAT.xml and es2_COMPAT.xml.
--->
-
-<category name="1.0">
-    <type name="double"  size="8"  float="true"    glx_name="FLOAT64"/>
-    <type name="clampd"  size="8"  float="true"    glx_name="FLOAT64"/>
-
-    <type name="float"   size="4"  float="true"    glx_name="FLOAT32"/>
-    <type name="clampf"  size="4"  float="true"    glx_name="FLOAT32"/>
-
-    <type name="int"     size="4"                  glx_name="CARD32"/>
-    <type name="uint"    size="4"  unsigned="true" glx_name="CARD32"/>
-    <type name="sizei"   size="4"  unsigned="true" glx_name="CARD32"/>
-    <type name="enum"    size="4"  unsigned="true" glx_name="ENUM"/>
-    <type name="bitfield" size="4" unsigned="true" glx_name="CARD32"/>
-
-    <type name="short"   size="2"                  glx_name="CARD16"/>
-    <type name="ushort"  size="2"  unsigned="true" glx_name="CARD16"/>
-
-    <type name="byte"    size="1"                  glx_name="CARD8"/>
-    <type name="ubyte"   size="1"  unsigned="true" glx_name="CARD8"/>
-    <type name="boolean" size="1"  unsigned="true" glx_name="CARD8"/>
-
-    <type name="void"    size="1"/>
-
-    <function name="NewList" offset="0" static_dispatch="false">
-        <param name="list" type="GLuint"/>
-        <param name="mode" type="GLenum"/>
-        <glx sop="101"/>
-    </function>
-
-    <function name="EndList" offset="1" static_dispatch="false">
-        <glx sop="102"/>
-    </function>
-
-    <function name="CallList" offset="2" static_dispatch="false">
-        <param name="list" type="GLuint"/>
-        <glx rop="1"/>
-    </function>
-
-    <function name="CallLists" offset="3" static_dispatch="false">
-        <param name="n" type="GLsizei" counter="true"/>
-        <param name="type" type="GLenum"/>
-        <param name="lists" type="const GLvoid *" variable_param="type" count="n"/>
-        <glx rop="2" large="true"/>
-    </function>
-
-    <function name="DeleteLists" offset="4" static_dispatch="false">
-        <param name="list" type="GLuint"/>
-        <param name="range" type="GLsizei"/>
-        <glx sop="103"/>
-    </function>
-
-    <function name="GenLists" offset="5" static_dispatch="false">
-        <param name="range" type="GLsizei"/>
-        <return type="GLuint"/>
-        <glx sop="104"/>
-    </function>
-
-    <function name="ListBase" offset="6" static_dispatch="false">
-        <param name="base" type="GLuint"/>
-        <glx rop="3"/>
-    </function>
-
-    <function name="Begin" offset="7" static_dispatch="false">
-        <param name="mode" type="GLenum"/>
-        <glx rop="4"/>
-    </function>
-
-    <function name="Bitmap" offset="8" static_dispatch="false">
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <param name="xorig" type="GLfloat"/>
-        <param name="yorig" type="GLfloat"/>
-        <param name="xmove" type="GLfloat"/>
-        <param name="ymove" type="GLfloat"/>
-        <param name="bitmap" type="const GLubyte *" img_width="width" img_height="height" img_format="GL_COLOR_INDEX" img_type="GL_BITMAP" img_target="0" img_pad_dimensions="false"/>
-        <glx rop="5" large="true"/>
-    </function>
-
-    <function name="Color3b" offset="9" vectorequiv="Color3bv" static_dispatch="false">
-        <param name="red" type="GLbyte"/>
-        <param name="green" type="GLbyte"/>
-        <param name="blue" type="GLbyte"/>
-    </function>
-
-    <function name="Color3bv" offset="10" static_dispatch="false">
-        <param name="v" type="const GLbyte *" count="3"/>
-        <glx rop="6"/>
-    </function>
-
-    <function name="Color3d" offset="11" vectorequiv="Color3dv" static_dispatch="false">
-        <param name="red" type="GLdouble"/>
-        <param name="green" type="GLdouble"/>
-        <param name="blue" type="GLdouble"/>
-    </function>
-
-    <function name="Color3dv" offset="12" static_dispatch="false">
-        <param name="v" type="const GLdouble *" count="3"/>
-        <glx rop="7"/>
-    </function>
-
-    <function name="Color3f" offset="13" vectorequiv="Color3fv" static_dispatch="false">
-        <param name="red" type="GLfloat"/>
-        <param name="green" type="GLfloat"/>
-        <param name="blue" type="GLfloat"/>
-    </function>
-
-    <function name="Color3fv" offset="14" static_dispatch="false">
-        <param name="v" type="const GLfloat *" count="3"/>
-        <glx rop="8"/>
-    </function>
-
-    <function name="Color3i" offset="15" vectorequiv="Color3iv" static_dispatch="false">
-        <param name="red" type="GLint"/>
-        <param name="green" type="GLint"/>
-        <param name="blue" type="GLint"/>
-    </function>
-
-    <function name="Color3iv" offset="16" static_dispatch="false">
-        <param name="v" type="const GLint *" count="3"/>
-        <glx rop="9"/>
-    </function>
-
-    <function name="Color3s" offset="17" vectorequiv="Color3sv" static_dispatch="false">
-        <param name="red" type="GLshort"/>
-        <param name="green" type="GLshort"/>
-        <param name="blue" type="GLshort"/>
-    </function>
-
-    <function name="Color3sv" offset="18" static_dispatch="false">
-        <param name="v" type="const GLshort *" count="3"/>
-        <glx rop="10"/>
-    </function>
-
-    <function name="Color3ub" offset="19" vectorequiv="Color3ubv" static_dispatch="false">
-        <param name="red" type="GLubyte"/>
-        <param name="green" type="GLubyte"/>
-        <param name="blue" type="GLubyte"/>
-    </function>
-
-    <function name="Color3ubv" offset="20" static_dispatch="false">
-        <param name="v" type="const GLubyte *" count="3"/>
-        <glx rop="11"/>
-    </function>
-
-    <function name="Color3ui" offset="21" vectorequiv="Color3uiv" static_dispatch="false">
-        <param name="red" type="GLuint"/>
-        <param name="green" type="GLuint"/>
-        <param name="blue" type="GLuint"/>
-    </function>
-
-    <function name="Color3uiv" offset="22" static_dispatch="false">
-        <param name="v" type="const GLuint *" count="3"/>
-        <glx rop="12"/>
-    </function>
-
-    <function name="Color3us" offset="23" vectorequiv="Color3usv" static_dispatch="false">
-        <param name="red" type="GLushort"/>
-        <param name="green" type="GLushort"/>
-        <param name="blue" type="GLushort"/>
-    </function>
-
-    <function name="Color3usv" offset="24" static_dispatch="false">
-        <param name="v" type="const GLushort *" count="3"/>
-        <glx rop="13"/>
-    </function>
-
-    <function name="Color4b" offset="25" vectorequiv="Color4bv" static_dispatch="false">
-        <param name="red" type="GLbyte"/>
-        <param name="green" type="GLbyte"/>
-        <param name="blue" type="GLbyte"/>
-        <param name="alpha" type="GLbyte"/>
-    </function>
-
-    <function name="Color4bv" offset="26" static_dispatch="false">
-        <param name="v" type="const GLbyte *" count="4"/>
-        <glx rop="14"/>
-    </function>
-
-    <function name="Color4d" offset="27" vectorequiv="Color4dv" static_dispatch="false">
-        <param name="red" type="GLdouble"/>
-        <param name="green" type="GLdouble"/>
-        <param name="blue" type="GLdouble"/>
-        <param name="alpha" type="GLdouble"/>
-    </function>
-
-    <function name="Color4dv" offset="28" static_dispatch="false">
-        <param name="v" type="const GLdouble *" count="4"/>
-        <glx rop="15"/>
-    </function>
-
-    <!--function name="Color4f" offset="29" vectorequiv="Color4fv" static_dispatch="false">
-        <param name="red" type="GLfloat"/>
-        <param name="green" type="GLfloat"/>
-        <param name="blue" type="GLfloat"/>
-        <param name="alpha" type="GLfloat"/>
-    </function-->
-
-    <function name="Color4fv" offset="30" static_dispatch="false">
-        <param name="v" type="const GLfloat *" count="4"/>
-        <glx rop="16"/>
-    </function>
-
-    <function name="Color4i" offset="31" vectorequiv="Color4iv" static_dispatch="false">
-        <param name="red" type="GLint"/>
-        <param name="green" type="GLint"/>
-        <param name="blue" type="GLint"/>
-        <param name="alpha" type="GLint"/>
-    </function>
-
-    <function name="Color4iv" offset="32" static_dispatch="false">
-        <param name="v" type="const GLint *" count="4"/>
-        <glx rop="17"/>
-    </function>
-
-    <function name="Color4s" offset="33" vectorequiv="Color4sv" static_dispatch="false">
-        <param name="red" type="GLshort"/>
-        <param name="green" type="GLshort"/>
-        <param name="blue" type="GLshort"/>
-        <param name="alpha" type="GLshort"/>
-    </function>
-
-    <function name="Color4sv" offset="34" static_dispatch="false">
-        <param name="v" type="const GLshort *" count="4"/>
-        <glx rop="18"/>
-    </function>
-
-    <!--function name="Color4ub" offset="35" vectorequiv="Color4ubv" static_dispatch="false">
-        <param name="red" type="GLubyte"/>
-        <param name="green" type="GLubyte"/>
-        <param name="blue" type="GLubyte"/>
-        <param name="alpha" type="GLubyte"/>
-    </function-->
-
-    <function name="Color4ubv" offset="36" static_dispatch="false">
-        <param name="v" type="const GLubyte *" count="4"/>
-        <glx rop="19"/>
-    </function>
-
-    <function name="Color4ui" offset="37" vectorequiv="Color4uiv" static_dispatch="false">
-        <param name="red" type="GLuint"/>
-        <param name="green" type="GLuint"/>
-        <param name="blue" type="GLuint"/>
-        <param name="alpha" type="GLuint"/>
-    </function>
-
-    <function name="Color4uiv" offset="38" static_dispatch="false">
-        <param name="v" type="const GLuint *" count="4"/>
-        <glx rop="20"/>
-    </function>
-
-    <function name="Color4us" offset="39" vectorequiv="Color4usv" static_dispatch="false">
-        <param name="red" type="GLushort"/>
-        <param name="green" type="GLushort"/>
-        <param name="blue" type="GLushort"/>
-        <param name="alpha" type="GLushort"/>
-    </function>
-
-    <function name="Color4usv" offset="40" static_dispatch="false">
-        <param name="v" type="const GLushort *" count="4"/>
-        <glx rop="21"/>
-    </function>
-
-    <function name="EdgeFlag" offset="41" vectorequiv="EdgeFlagv" static_dispatch="false">
-        <param name="flag" type="GLboolean"/>
-    </function>
-
-    <function name="EdgeFlagv" offset="42" static_dispatch="false">
-        <param name="flag" type="const GLboolean *" count="1"/>
-        <glx rop="22"/>
-    </function>
-
-    <function name="End" offset="43" static_dispatch="false">
-        <glx rop="23"/>
-    </function>
-
-    <function name="Indexd" offset="44" vectorequiv="Indexdv" static_dispatch="false">
-        <param name="c" type="GLdouble"/>
-    </function>
-
-    <function name="Indexdv" offset="45" static_dispatch="false">
-        <param name="c" type="const GLdouble *" count="1"/>
-        <glx rop="24"/>
-    </function>
-
-    <function name="Indexf" offset="46" vectorequiv="Indexfv" static_dispatch="false">
-        <param name="c" type="GLfloat"/>
-    </function>
-
-    <function name="Indexfv" offset="47" static_dispatch="false">
-        <param name="c" type="const GLfloat *" count="1"/>
-        <glx rop="25"/>
-    </function>
-
-    <function name="Indexi" offset="48" vectorequiv="Indexiv" static_dispatch="false">
-        <param name="c" type="GLint"/>
-    </function>
-
-    <function name="Indexiv" offset="49" static_dispatch="false">
-        <param name="c" type="const GLint *" count="1"/>
-        <glx rop="26"/>
-    </function>
-
-    <function name="Indexs" offset="50" vectorequiv="Indexsv" static_dispatch="false">
-        <param name="c" type="GLshort"/>
-    </function>
-
-    <function name="Indexsv" offset="51" static_dispatch="false">
-        <param name="c" type="const GLshort *" count="1"/>
-        <glx rop="27"/>
-    </function>
-
-    <function name="Normal3b" offset="52" vectorequiv="Normal3bv" static_dispatch="false">
-        <param name="nx" type="GLbyte"/>
-        <param name="ny" type="GLbyte"/>
-        <param name="nz" type="GLbyte"/>
-    </function>
-
-    <function name="Normal3bv" offset="53" static_dispatch="false">
-        <param name="v" type="const GLbyte *" count="3"/>
-        <glx rop="28"/>
-    </function>
-
-    <function name="Normal3d" offset="54" vectorequiv="Normal3dv" static_dispatch="false">
-        <param name="nx" type="GLdouble"/>
-        <param name="ny" type="GLdouble"/>
-        <param name="nz" type="GLdouble"/>
-    </function>
-
-    <function name="Normal3dv" offset="55" static_dispatch="false">
-        <param name="v" type="const GLdouble *" count="3"/>
-        <glx rop="29"/>
-    </function>
-
-    <!--function name="Normal3f" offset="56" vectorequiv="Normal3fv" static_dispatch="false">
-        <param name="nx" type="GLfloat"/>
-        <param name="ny" type="GLfloat"/>
-        <param name="nz" type="GLfloat"/>
-    </function-->
-
-    <function name="Normal3fv" offset="57" static_dispatch="false">
-        <param name="v" type="const GLfloat *" count="3"/>
-        <glx rop="30"/>
-    </function>
-
-    <function name="Normal3i" offset="58" vectorequiv="Normal3iv" static_dispatch="false">
-        <param name="nx" type="GLint"/>
-        <param name="ny" type="GLint"/>
-        <param name="nz" type="GLint"/>
-    </function>
-
-    <function name="Normal3iv" offset="59" static_dispatch="false">
-        <param name="v" type="const GLint *" count="3"/>
-        <glx rop="31"/>
-    </function>
-
-    <function name="Normal3s" offset="60" vectorequiv="Normal3sv" static_dispatch="false">
-        <param name="nx" type="GLshort"/>
-        <param name="ny" type="GLshort"/>
-        <param name="nz" type="GLshort"/>
-    </function>
-
-    <function name="Normal3sv" offset="61" static_dispatch="false">
-        <param name="v" type="const GLshort *" count="3"/>
-        <glx rop="32"/>
-    </function>
-
-    <function name="RasterPos2d" offset="62" vectorequiv="RasterPos2dv" static_dispatch="false">
-        <param name="x" type="GLdouble"/>
-        <param name="y" type="GLdouble"/>
-    </function>
-
-    <function name="RasterPos2dv" offset="63" static_dispatch="false">
-        <param name="v" type="const GLdouble *" count="2"/>
-        <glx rop="33"/>
-    </function>
-
-    <function name="RasterPos2f" offset="64" vectorequiv="RasterPos2fv" static_dispatch="false">
-        <param name="x" type="GLfloat"/>
-        <param name="y" type="GLfloat"/>
-    </function>
-
-    <function name="RasterPos2fv" offset="65" static_dispatch="false">
-        <param name="v" type="const GLfloat *" count="2"/>
-        <glx rop="34"/>
-    </function>
-
-    <function name="RasterPos2i" offset="66" vectorequiv="RasterPos2iv" static_dispatch="false">
-        <param name="x" type="GLint"/>
-        <param name="y" type="GLint"/>
-    </function>
-
-    <function name="RasterPos2iv" offset="67" static_dispatch="false">
-        <param name="v" type="const GLint *" count="2"/>
-        <glx rop="35"/>
-    </function>
-
-    <function name="RasterPos2s" offset="68" vectorequiv="RasterPos2sv" static_dispatch="false">
-        <param name="x" type="GLshort"/>
-        <param name="y" type="GLshort"/>
-    </function>
-
-    <function name="RasterPos2sv" offset="69" static_dispatch="false">
-        <param name="v" type="const GLshort *" count="2"/>
-        <glx rop="36"/>
-    </function>
-
-    <function name="RasterPos3d" offset="70" vectorequiv="RasterPos3dv" static_dispatch="false">
-        <param name="x" type="GLdouble"/>
-        <param name="y" type="GLdouble"/>
-        <param name="z" type="GLdouble"/>
-    </function>
-
-    <function name="RasterPos3dv" offset="71" static_dispatch="false">
-        <param name="v" type="const GLdouble *" count="3"/>
-        <glx rop="37"/>
-    </function>
-
-    <function name="RasterPos3f" offset="72" vectorequiv="RasterPos3fv" static_dispatch="false">
-        <param name="x" type="GLfloat"/>
-        <param name="y" type="GLfloat"/>
-        <param name="z" type="GLfloat"/>
-    </function>
-
-    <function name="RasterPos3fv" offset="73" static_dispatch="false">
-        <param name="v" type="const GLfloat *" count="3"/>
-        <glx rop="38"/>
-    </function>
-
-    <function name="RasterPos3i" offset="74" vectorequiv="RasterPos3iv" static_dispatch="false">
-        <param name="x" type="GLint"/>
-        <param name="y" type="GLint"/>
-        <param name="z" type="GLint"/>
-    </function>
-
-    <function name="RasterPos3iv" offset="75" static_dispatch="false">
-        <param name="v" type="const GLint *" count="3"/>
-        <glx rop="39"/>
-    </function>
-
-    <function name="RasterPos3s" offset="76" vectorequiv="RasterPos3sv" static_dispatch="false">
-        <param name="x" type="GLshort"/>
-        <param name="y" type="GLshort"/>
-        <param name="z" type="GLshort"/>
-    </function>
-
-    <function name="RasterPos3sv" offset="77" static_dispatch="false">
-        <param name="v" type="const GLshort *" count="3"/>
-        <glx rop="40"/>
-    </function>
-
-    <function name="RasterPos4d" offset="78" vectorequiv="RasterPos4dv" static_dispatch="false">
-        <param name="x" type="GLdouble"/>
-        <param name="y" type="GLdouble"/>
-        <param name="z" type="GLdouble"/>
-        <param name="w" type="GLdouble"/>
-    </function>
-
-    <function name="RasterPos4dv" offset="79" static_dispatch="false">
-        <param name="v" type="const GLdouble *" count="4"/>
-        <glx rop="41"/>
-    </function>
-
-    <function name="RasterPos4f" offset="80" vectorequiv="RasterPos4fv" static_dispatch="false">
-        <param name="x" type="GLfloat"/>
-        <param name="y" type="GLfloat"/>
-        <param name="z" type="GLfloat"/>
-        <param name="w" type="GLfloat"/>
-    </function>
-
-    <function name="RasterPos4fv" offset="81" static_dispatch="false">
-        <param name="v" type="const GLfloat *" count="4"/>
-        <glx rop="42"/>
-    </function>
-
-    <function name="RasterPos4i" offset="82" vectorequiv="RasterPos4iv" static_dispatch="false">
-        <param name="x" type="GLint"/>
-        <param name="y" type="GLint"/>
-        <param name="z" type="GLint"/>
-        <param name="w" type="GLint"/>
-    </function>
-
-    <function name="RasterPos4iv" offset="83" static_dispatch="false">
-        <param name="v" type="const GLint *" count="4"/>
-        <glx rop="43"/>
-    </function>
-
-    <function name="RasterPos4s" offset="84" vectorequiv="RasterPos4sv" static_dispatch="false">
-        <param name="x" type="GLshort"/>
-        <param name="y" type="GLshort"/>
-        <param name="z" type="GLshort"/>
-        <param name="w" type="GLshort"/>
-    </function>
-
-    <function name="RasterPos4sv" offset="85" static_dispatch="false">
-        <param name="v" type="const GLshort *" count="4"/>
-        <glx rop="44"/>
-    </function>
-
-    <function name="Rectd" offset="86" vectorequiv="Rectdv" static_dispatch="false">
-        <param name="x1" type="GLdouble"/>
-        <param name="y1" type="GLdouble"/>
-        <param name="x2" type="GLdouble"/>
-        <param name="y2" type="GLdouble"/>
-    </function>
-
-    <function name="Rectdv" offset="87" static_dispatch="false">
-        <param name="v1" type="const GLdouble *" count="2"/>
-        <param name="v2" type="const GLdouble *" count="2"/>
-        <glx rop="45"/>
-    </function>
-
-    <function name="Rectf" offset="88" vectorequiv="Rectfv" static_dispatch="false">
-        <param name="x1" type="GLfloat"/>
-        <param name="y1" type="GLfloat"/>
-        <param name="x2" type="GLfloat"/>
-        <param name="y2" type="GLfloat"/>
-    </function>
-
-    <function name="Rectfv" offset="89" static_dispatch="false">
-        <param name="v1" type="const GLfloat *" count="2"/>
-        <param name="v2" type="const GLfloat *" count="2"/>
-        <glx rop="46"/>
-    </function>
-
-    <function name="Recti" offset="90" vectorequiv="Rectiv" static_dispatch="false">
-        <param name="x1" type="GLint"/>
-        <param name="y1" type="GLint"/>
-        <param name="x2" type="GLint"/>
-        <param name="y2" type="GLint"/>
-    </function>
-
-    <function name="Rectiv" offset="91" static_dispatch="false">
-        <param name="v1" type="const GLint *" count="2"/>
-        <param name="v2" type="const GLint *" count="2"/>
-        <glx rop="47"/>
-    </function>
-
-    <function name="Rects" offset="92" vectorequiv="Rectsv" static_dispatch="false">
-        <param name="x1" type="GLshort"/>
-        <param name="y1" type="GLshort"/>
-        <param name="x2" type="GLshort"/>
-        <param name="y2" type="GLshort"/>
-    </function>
-
-    <function name="Rectsv" offset="93" static_dispatch="false">
-        <param name="v1" type="const GLshort *" count="2"/>
-        <param name="v2" type="const GLshort *" count="2"/>
-        <glx rop="48"/>
-    </function>
-
-    <function name="TexCoord1d" offset="94" vectorequiv="TexCoord1dv" static_dispatch="false">
-        <param name="s" type="GLdouble"/>
-    </function>
-
-    <function name="TexCoord1dv" offset="95" static_dispatch="false">
-        <param name="v" type="const GLdouble *" count="1"/>
-        <glx rop="49"/>
-    </function>
-
-    <function name="TexCoord1f" offset="96" vectorequiv="TexCoord1fv" static_dispatch="false">
-        <param name="s" type="GLfloat"/>
-    </function>
-
-    <function name="TexCoord1fv" offset="97" static_dispatch="false">
-        <param name="v" type="const GLfloat *" count="1"/>
-        <glx rop="50"/>
-    </function>
-
-    <function name="TexCoord1i" offset="98" vectorequiv="TexCoord1iv" static_dispatch="false">
-        <param name="s" type="GLint"/>
-    </function>
-
-    <function name="TexCoord1iv" offset="99" static_dispatch="false">
-        <param name="v" type="const GLint *" count="1"/>
-        <glx rop="51"/>
-    </function>
-
-    <function name="TexCoord1s" offset="100" vectorequiv="TexCoord1sv" static_dispatch="false">
-        <param name="s" type="GLshort"/>
-    </function>
-
-    <function name="TexCoord1sv" offset="101" static_dispatch="false">
-        <param name="v" type="const GLshort *" count="1"/>
-        <glx rop="52"/>
-    </function>
-
-    <function name="TexCoord2d" offset="102" vectorequiv="TexCoord2dv" static_dispatch="false">
-        <param name="s" type="GLdouble"/>
-        <param name="t" type="GLdouble"/>
-    </function>
-
-    <function name="TexCoord2dv" offset="103" static_dispatch="false">
-        <param name="v" type="const GLdouble *" count="2"/>
-        <glx rop="53"/>
-    </function>
-
-    <function name="TexCoord2f" offset="104" vectorequiv="TexCoord2fv" static_dispatch="false">
-        <param name="s" type="GLfloat"/>
-        <param name="t" type="GLfloat"/>
-    </function>
-
-    <function name="TexCoord2fv" offset="105" static_dispatch="false">
-        <param name="v" type="const GLfloat *" count="2"/>
-        <glx rop="54"/>
-    </function>
-
-    <function name="TexCoord2i" offset="106" vectorequiv="TexCoord2iv" static_dispatch="false">
-        <param name="s" type="GLint"/>
-        <param name="t" type="GLint"/>
-    </function>
-
-    <function name="TexCoord2iv" offset="107" static_dispatch="false">
-        <param name="v" type="const GLint *" count="2"/>
-        <glx rop="55"/>
-    </function>
-
-    <function name="TexCoord2s" offset="108" vectorequiv="TexCoord2sv" static_dispatch="false">
-        <param name="s" type="GLshort"/>
-        <param name="t" type="GLshort"/>
-    </function>
-
-    <function name="TexCoord2sv" offset="109" static_dispatch="false">
-        <param name="v" type="const GLshort *" count="2"/>
-        <glx rop="56"/>
-    </function>
-
-    <function name="TexCoord3d" offset="110" vectorequiv="TexCoord3dv" static_dispatch="false">
-        <param name="s" type="GLdouble"/>
-        <param name="t" type="GLdouble"/>
-        <param name="r" type="GLdouble"/>
-    </function>
-
-    <function name="TexCoord3dv" offset="111" static_dispatch="false">
-        <param name="v" type="const GLdouble *" count="3"/>
-        <glx rop="57"/>
-    </function>
-
-    <function name="TexCoord3f" offset="112" vectorequiv="TexCoord3fv" static_dispatch="false">
-        <param name="s" type="GLfloat"/>
-        <param name="t" type="GLfloat"/>
-        <param name="r" type="GLfloat"/>
-    </function>
-
-    <function name="TexCoord3fv" offset="113" static_dispatch="false">
-        <param name="v" type="const GLfloat *" count="3"/>
-        <glx rop="58"/>
-    </function>
-
-    <function name="TexCoord3i" offset="114" vectorequiv="TexCoord3iv" static_dispatch="false">
-        <param name="s" type="GLint"/>
-        <param name="t" type="GLint"/>
-        <param name="r" type="GLint"/>
-    </function>
-
-    <function name="TexCoord3iv" offset="115" static_dispatch="false">
-        <param name="v" type="const GLint *" count="3"/>
-        <glx rop="59"/>
-    </function>
-
-    <function name="TexCoord3s" offset="116" vectorequiv="TexCoord3sv" static_dispatch="false">
-        <param name="s" type="GLshort"/>
-        <param name="t" type="GLshort"/>
-        <param name="r" type="GLshort"/>
-    </function>
-
-    <function name="TexCoord3sv" offset="117" static_dispatch="false">
-        <param name="v" type="const GLshort *" count="3"/>
-        <glx rop="60"/>
-    </function>
-
-    <function name="TexCoord4d" offset="118" vectorequiv="TexCoord4dv" static_dispatch="false">
-        <param name="s" type="GLdouble"/>
-        <param name="t" type="GLdouble"/>
-        <param name="r" type="GLdouble"/>
-        <param name="q" type="GLdouble"/>
-    </function>
-
-    <function name="TexCoord4dv" offset="119" static_dispatch="false">
-        <param name="v" type="const GLdouble *" count="4"/>
-        <glx rop="61"/>
-    </function>
-
-    <function name="TexCoord4f" offset="120" vectorequiv="TexCoord4fv" static_dispatch="false">
-        <param name="s" type="GLfloat"/>
-        <param name="t" type="GLfloat"/>
-        <param name="r" type="GLfloat"/>
-        <param name="q" type="GLfloat"/>
-    </function>
-
-    <function name="TexCoord4fv" offset="121" static_dispatch="false">
-        <param name="v" type="const GLfloat *" count="4"/>
-        <glx rop="62"/>
-    </function>
-
-    <function name="TexCoord4i" offset="122" vectorequiv="TexCoord4iv" static_dispatch="false">
-        <param name="s" type="GLint"/>
-        <param name="t" type="GLint"/>
-        <param name="r" type="GLint"/>
-        <param name="q" type="GLint"/>
-    </function>
-
-    <function name="TexCoord4iv" offset="123" static_dispatch="false">
-        <param name="v" type="const GLint *" count="4"/>
-        <glx rop="63"/>
-    </function>
-
-    <function name="TexCoord4s" offset="124" vectorequiv="TexCoord4sv" static_dispatch="false">
-        <param name="s" type="GLshort"/>
-        <param name="t" type="GLshort"/>
-        <param name="r" type="GLshort"/>
-        <param name="q" type="GLshort"/>
-    </function>
-
-    <function name="TexCoord4sv" offset="125" static_dispatch="false">
-        <param name="v" type="const GLshort *" count="4"/>
-        <glx rop="64"/>
-    </function>
-
-    <function name="Vertex2d" offset="126" vectorequiv="Vertex2dv" static_dispatch="false">
-        <param name="x" type="GLdouble"/>
-        <param name="y" type="GLdouble"/>
-    </function>
-
-    <function name="Vertex2dv" offset="127" static_dispatch="false">
-        <param name="v" type="const GLdouble *" count="2"/>
-        <glx rop="65"/>
-    </function>
-
-    <function name="Vertex2f" offset="128" vectorequiv="Vertex2fv" static_dispatch="false">
-        <param name="x" type="GLfloat"/>
-        <param name="y" type="GLfloat"/>
-    </function>
-
-    <function name="Vertex2fv" offset="129" static_dispatch="false">
-        <param name="v" type="const GLfloat *" count="2"/>
-        <glx rop="66"/>
-    </function>
-
-    <function name="Vertex2i" offset="130" vectorequiv="Vertex2iv" static_dispatch="false">
-        <param name="x" type="GLint"/>
-        <param name="y" type="GLint"/>
-    </function>
-
-    <function name="Vertex2iv" offset="131" static_dispatch="false">
-        <param name="v" type="const GLint *" count="2"/>
-        <glx rop="67"/>
-    </function>
-
-    <function name="Vertex2s" offset="132" vectorequiv="Vertex2sv" static_dispatch="false">
-        <param name="x" type="GLshort"/>
-        <param name="y" type="GLshort"/>
-    </function>
-
-    <function name="Vertex2sv" offset="133" static_dispatch="false">
-        <param name="v" type="const GLshort *" count="2"/>
-        <glx rop="68"/>
-    </function>
-
-    <function name="Vertex3d" offset="134" vectorequiv="Vertex3dv" static_dispatch="false">
-        <param name="x" type="GLdouble"/>
-        <param name="y" type="GLdouble"/>
-        <param name="z" type="GLdouble"/>
-    </function>
-
-    <function name="Vertex3dv" offset="135" static_dispatch="false">
-        <param name="v" type="const GLdouble *" count="3"/>
-        <glx rop="69"/>
-    </function>
-
-    <function name="Vertex3f" offset="136" vectorequiv="Vertex3fv" static_dispatch="false">
-        <param name="x" type="GLfloat"/>
-        <param name="y" type="GLfloat"/>
-        <param name="z" type="GLfloat"/>
-    </function>
-
-    <function name="Vertex3fv" offset="137" static_dispatch="false">
-        <param name="v" type="const GLfloat *" count="3"/>
-        <glx rop="70"/>
-    </function>
-
-    <function name="Vertex3i" offset="138" vectorequiv="Vertex3iv" static_dispatch="false">
-        <param name="x" type="GLint"/>
-        <param name="y" type="GLint"/>
-        <param name="z" type="GLint"/>
-    </function>
-
-    <function name="Vertex3iv" offset="139" static_dispatch="false">
-        <param name="v" type="const GLint *" count="3"/>
-        <glx rop="71"/>
-    </function>
-
-    <function name="Vertex3s" offset="140" vectorequiv="Vertex3sv" static_dispatch="false">
-        <param name="x" type="GLshort"/>
-        <param name="y" type="GLshort"/>
-        <param name="z" type="GLshort"/>
-    </function>
-
-    <function name="Vertex3sv" offset="141" static_dispatch="false">
-        <param name="v" type="const GLshort *" count="3"/>
-        <glx rop="72"/>
-    </function>
-
-    <function name="Vertex4d" offset="142" vectorequiv="Vertex4dv" static_dispatch="false">
-        <param name="x" type="GLdouble"/>
-        <param name="y" type="GLdouble"/>
-        <param name="z" type="GLdouble"/>
-        <param name="w" type="GLdouble"/>
-    </function>
-
-    <function name="Vertex4dv" offset="143" static_dispatch="false">
-        <param name="v" type="const GLdouble *" count="4"/>
-        <glx rop="73"/>
-    </function>
-
-    <function name="Vertex4f" offset="144" vectorequiv="Vertex4fv" static_dispatch="false">
-        <param name="x" type="GLfloat"/>
-        <param name="y" type="GLfloat"/>
-        <param name="z" type="GLfloat"/>
-        <param name="w" type="GLfloat"/>
-    </function>
-
-    <function name="Vertex4fv" offset="145" static_dispatch="false">
-        <param name="v" type="const GLfloat *" count="4"/>
-        <glx rop="74"/>
-    </function>
-
-    <function name="Vertex4i" offset="146" vectorequiv="Vertex4iv" static_dispatch="false">
-        <param name="x" type="GLint"/>
-        <param name="y" type="GLint"/>
-        <param name="z" type="GLint"/>
-        <param name="w" type="GLint"/>
-    </function>
-
-    <function name="Vertex4iv" offset="147" static_dispatch="false">
-        <param name="v" type="const GLint *" count="4"/>
-        <glx rop="75"/>
-    </function>
-
-    <function name="Vertex4s" offset="148" vectorequiv="Vertex4sv" static_dispatch="false">
-        <param name="x" type="GLshort"/>
-        <param name="y" type="GLshort"/>
-        <param name="z" type="GLshort"/>
-        <param name="w" type="GLshort"/>
-    </function>
-
-    <function name="Vertex4sv" offset="149" static_dispatch="false">
-        <param name="v" type="const GLshort *" count="4"/>
-        <glx rop="76"/>
-    </function>
-
-    <function name="ClipPlane" offset="150" static_dispatch="false">
-        <param name="plane" type="GLenum"/>
-        <param name="equation" type="const GLdouble *" count="4"/>
-        <glx rop="77"/>
-    </function>
-
-    <function name="ColorMaterial" offset="151" static_dispatch="false">
-        <param name="face" type="GLenum"/>
-        <param name="mode" type="GLenum"/>
-        <glx rop="78"/>
-    </function>
-
-    <!--function name="CullFace" offset="152" static_dispatch="false">
-        <param name="mode" type="GLenum"/>
-        <glx rop="79"/>
-    </function>
-
-    <function name="Fogf" offset="153" static_dispatch="false">
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfloat"/>
-        <glx rop="80"/>
-    </function>
-
-    <function name="Fogfv" offset="154" static_dispatch="false">
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfloat *" variable_param="pname"/>
-        <glx rop="81"/>
-    </function-->
-
-    <function name="Fogi" offset="155" static_dispatch="false">
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLint"/>
-        <glx rop="82"/>
-    </function>
-
-    <function name="Fogiv" offset="156" static_dispatch="false">
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLint *" variable_param="pname"/>
-        <glx rop="83"/>
-    </function>
-
-    <!--function name="FrontFace" offset="157" static_dispatch="false">
-        <param name="mode" type="GLenum"/>
-        <glx rop="84"/>
-    </function>
-
-    <function name="Hint" offset="158" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="mode" type="GLenum"/>
-        <glx rop="85"/>
-    </function>
-
-    <function name="Lightf" offset="159" static_dispatch="false">
-        <param name="light" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfloat"/>
-        <glx rop="86"/>
-    </function>
-
-    <function name="Lightfv" offset="160" static_dispatch="false">
-        <param name="light" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfloat *" variable_param="pname"/>
-        <glx rop="87"/>
-    </function-->
-
-    <function name="Lighti" offset="161" static_dispatch="false">
-        <param name="light" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLint"/>
-        <glx rop="88"/>
-    </function>
-
-    <function name="Lightiv" offset="162" static_dispatch="false">
-        <param name="light" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLint *" variable_param="pname"/>
-        <glx rop="89"/>
-    </function>
-
-    <!--function name="LightModelf" offset="163" static_dispatch="false">
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfloat"/>
-        <glx rop="90"/>
-    </function>
-
-    <function name="LightModelfv" offset="164" static_dispatch="false">
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfloat *" variable_param="pname"/>
-        <glx rop="91"/>
-    </function-->
-
-    <function name="LightModeli" offset="165" static_dispatch="false">
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLint"/>
-        <glx rop="92"/>
-    </function>
-
-    <function name="LightModeliv" offset="166" static_dispatch="false">
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLint *" variable_param="pname"/>
-        <glx rop="93"/>
-    </function>
-
-    <function name="LineStipple" offset="167" static_dispatch="false">
-        <param name="factor" type="GLint"/>
-        <param name="pattern" type="GLushort"/>
-        <glx rop="94"/>
-    </function>
-
-    <!--function name="LineWidth" offset="168" static_dispatch="false">
-        <param name="width" type="GLfloat"/>
-        <glx rop="95"/>
-    </function>
-
-    <function name="Materialf" offset="169" static_dispatch="false">
-        <param name="face" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfloat"/>
-        <glx rop="96"/>
-    </function>
-
-    <function name="Materialfv" offset="170" static_dispatch="false">
-        <param name="face" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfloat *" variable_param="pname"/>
-        <glx rop="97"/>
-    </function-->
-
-    <function name="Materiali" offset="171" static_dispatch="false">
-        <param name="face" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLint"/>
-        <glx rop="98"/>
-    </function>
-
-    <function name="Materialiv" offset="172" static_dispatch="false">
-        <param name="face" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLint *" variable_param="pname"/>
-        <glx rop="99"/>
-    </function>
-
-    <!--function name="PointSize" offset="173" static_dispatch="false">
-        <param name="size" type="GLfloat"/>
-        <glx rop="100"/>
-    </function-->
-
-    <function name="PolygonMode" offset="174" static_dispatch="false">
-        <param name="face" type="GLenum"/>
-        <param name="mode" type="GLenum"/>
-        <glx rop="101"/>
-    </function>
-
-    <function name="PolygonStipple" offset="175" static_dispatch="false">
-        <param name="mask" type="const GLubyte *" img_width="32" img_height="32" img_format="GL_COLOR_INDEX" img_type="GL_BITMAP" img_target="0" img_pad_dimensions="false"/>
-        <glx rop="102"/>
-    </function>
-
-    <!--function name="Scissor" offset="176" static_dispatch="false">
-        <param name="x" type="GLint"/>
-        <param name="y" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <glx rop="103"/>
-    </function>
-
-    <function name="ShadeModel" offset="177" static_dispatch="false">
-        <param name="mode" type="GLenum"/>
-        <glx rop="104"/>
-    </function>
-
-    <function name="TexParameterf" offset="178" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfloat"/>
-        <glx rop="105"/>
-    </function>
-
-    <function name="TexParameterfv" offset="179" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfloat *" variable_param="pname"/>
-        <glx rop="106"/>
-    </function>
-
-    <function name="TexParameteri" offset="180" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLint"/>
-        <glx rop="107"/>
-    </function>
-
-    <function name="TexParameteriv" offset="181" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLint *" variable_param="pname"/>
-        <glx rop="108"/>
-    </function-->
-
-    <function name="TexImage1D" offset="182" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="internalformat" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="border" type="GLint"/>
-        <param name="format" type="GLenum"/>
-        <param name="type" type="GLenum"/>
-        <param name="pixels" type="const GLvoid *" img_width="width" img_format="format" img_type="type" img_target="target" img_send_null="true" img_pad_dimensions="true"/>
-        <glx rop="109" large="true"/>
-    </function>
-
-    <!--function name="TexImage2D" offset="183" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="internalformat" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <param name="border" type="GLint"/>
-        <param name="format" type="GLenum"/>
-        <param name="type" type="GLenum"/>
-        <param name="pixels" type="const GLvoid *" img_width="width" img_height="height" img_format="format" img_type="type" img_target="target" img_send_null="true" img_pad_dimensions="true"/>
-        <glx rop="110" large="true"/>
-    </function>
-
-    <function name="TexEnvf" offset="184" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfloat"/>
-        <glx rop="111"/>
-    </function>
-
-    <function name="TexEnvfv" offset="185" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfloat *" variable_param="pname"/>
-        <glx rop="112"/>
-    </function>
-
-    <function name="TexEnvi" offset="186" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLint"/>
-        <glx rop="113"/>
-    </function>
-
-    <function name="TexEnviv" offset="187" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLint *" variable_param="pname"/>
-        <glx rop="114"/>
-    </function-->
-
-    <function name="TexGend" offset="188" static_dispatch="false">
-        <param name="coord" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLdouble"/>
-        <glx rop="115"/>
-    </function>
-
-    <function name="TexGendv" offset="189" static_dispatch="false">
-        <param name="coord" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLdouble *" variable_param="pname"/>
-        <glx rop="116"/>
-    </function>
-
-    <!--function name="TexGenf" offset="190" static_dispatch="false">
-        <param name="coord" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfloat"/>
-        <glx rop="117"/>
-    </function>
-
-    <function name="TexGenfv" offset="191" static_dispatch="false">
-        <param name="coord" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfloat *" variable_param="pname"/>
-        <glx rop="118"/>
-    </function>
-
-    <function name="TexGeni" offset="192" static_dispatch="false">
-        <param name="coord" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLint"/>
-        <glx rop="119"/>
-    </function>
-
-    <function name="TexGeniv" offset="193" static_dispatch="false">
-        <param name="coord" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLint *" variable_param="pname"/>
-        <glx rop="120"/>
-    </function-->
-
-    <function name="FeedbackBuffer" offset="194" static_dispatch="false">
-        <param name="size" type="GLsizei"/>
-        <param name="type" type="GLenum"/>
-        <param name="buffer" type="GLfloat *" output="true"/>
-        <glx sop="105" handcode="true"/>
-    </function>
-
-    <function name="SelectBuffer" offset="195" static_dispatch="false">
-        <param name="size" type="GLsizei"/>
-        <param name="buffer" type="GLuint *" output="true"/>
-        <glx sop="106" handcode="true"/>
-    </function>
-
-    <function name="RenderMode" offset="196" static_dispatch="false">
-        <param name="mode" type="GLenum"/>
-        <return type="GLint"/>
-        <glx sop="107" handcode="true"/>
-    </function>
-
-    <function name="InitNames" offset="197" static_dispatch="false">
-        <glx rop="121"/>
-    </function>
-
-    <function name="LoadName" offset="198" static_dispatch="false">
-        <param name="name" type="GLuint"/>
-        <glx rop="122"/>
-    </function>
-
-    <function name="PassThrough" offset="199" static_dispatch="false">
-        <param name="token" type="GLfloat"/>
-        <glx rop="123"/>
-    </function>
-
-    <function name="PopName" offset="200" static_dispatch="false">
-        <glx rop="124"/>
-    </function>
-
-    <function name="PushName" offset="201" static_dispatch="false">
-        <param name="name" type="GLuint"/>
-        <glx rop="125"/>
-    </function>
-
-    <function name="DrawBuffer" offset="202" static_dispatch="false">
-        <param name="mode" type="GLenum"/>
-        <glx rop="126"/>
-    </function>
-
-    <!--function name="Clear" offset="203" static_dispatch="false">
-        <param name="mask" type="GLbitfield"/>
-        <glx rop="127"/>
-    </function-->
-
-    <function name="ClearAccum" offset="204" static_dispatch="false">
-        <param name="red" type="GLfloat"/>
-        <param name="green" type="GLfloat"/>
-        <param name="blue" type="GLfloat"/>
-        <param name="alpha" type="GLfloat"/>
-        <glx rop="128"/>
-    </function>
-
-    <function name="ClearIndex" offset="205" static_dispatch="false">
-        <param name="c" type="GLfloat"/>
-        <glx rop="129"/>
-    </function>
-
-    <!--function name="ClearColor" offset="206" static_dispatch="false">
-        <param name="red" type="GLclampf"/>
-        <param name="green" type="GLclampf"/>
-        <param name="blue" type="GLclampf"/>
-        <param name="alpha" type="GLclampf"/>
-        <glx rop="130"/>
-    </function>
-
-    <function name="ClearStencil" offset="207" static_dispatch="false">
-        <param name="s" type="GLint"/>
-        <glx rop="131"/>
-    </function-->
-
-    <function name="ClearDepth" offset="208" static_dispatch="false">
-        <param name="depth" type="GLclampd"/>
-        <glx rop="132"/>
-    </function>
-
-    <!--function name="StencilMask" offset="209" static_dispatch="false">
-        <param name="mask" type="GLuint"/>
-        <glx rop="133"/>
-    </function>
-
-    <function name="ColorMask" offset="210" static_dispatch="false">
-        <param name="red" type="GLboolean"/>
-        <param name="green" type="GLboolean"/>
-        <param name="blue" type="GLboolean"/>
-        <param name="alpha" type="GLboolean"/>
-        <glx rop="134"/>
-    </function>
-
-    <function name="DepthMask" offset="211" static_dispatch="false">
-        <param name="flag" type="GLboolean"/>
-        <glx rop="135"/>
-    </function-->
-
-    <function name="IndexMask" offset="212" static_dispatch="false">
-        <param name="mask" type="GLuint"/>
-        <glx rop="136"/>
-    </function>
-
-    <function name="Accum" offset="213" static_dispatch="false">
-        <param name="op" type="GLenum"/>
-        <param name="value" type="GLfloat"/>
-        <glx rop="137"/>
-    </function>
-
-    <!--function name="Disable" offset="214" static_dispatch="false">
-        <param name="cap" type="GLenum"/>
-        <glx rop="138" handcode="client"/>
-    </function>
-
-    <function name="Enable" offset="215" static_dispatch="false">
-        <param name="cap" type="GLenum"/>
-        <glx rop="139" handcode="client"/>
-    </function>
-
-    <function name="Finish" offset="216" static_dispatch="false">
-        <glx sop="108" handcode="true"/>
-    </function>
-
-    <function name="Flush" offset="217" static_dispatch="false">
-        <glx sop="142" handcode="true"/>
-    </function-->
-
-    <function name="PopAttrib" offset="218" static_dispatch="false">
-        <glx rop="141"/>
-    </function>
-
-    <function name="PushAttrib" offset="219" static_dispatch="false">
-        <param name="mask" type="GLbitfield"/>
-        <glx rop="142"/>
-    </function>
-
-    <function name="Map1d" offset="220" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="u1" type="GLdouble"/>
-        <param name="u2" type="GLdouble"/>
-        <param name="stride" type="GLint" client_only="true"/>
-        <param name="order" type="GLint"/>
-        <param name="points" type="const GLdouble *" variable_param="order"/>
-        <glx rop="143" handcode="true"/>
-    </function>
-
-    <function name="Map1f" offset="221" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="u1" type="GLfloat"/>
-        <param name="u2" type="GLfloat"/>
-        <param name="stride" type="GLint" client_only="true"/>
-        <param name="order" type="GLint"/>
-        <param name="points" type="const GLfloat *" variable_param="order"/>
-        <glx rop="144" handcode="true"/>
-    </function>
-
-    <function name="Map2d" offset="222" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="u1" type="GLdouble"/>
-        <param name="u2" type="GLdouble"/>
-        <param name="ustride" type="GLint" client_only="true"/>
-        <param name="uorder" type="GLint"/>
-        <param name="v1" type="GLdouble"/>
-        <param name="v2" type="GLdouble"/>
-        <param name="vstride" type="GLint" client_only="true"/>
-        <param name="vorder" type="GLint"/>
-        <param name="points" type="const GLdouble *" variable_param="uorder"/>
-        <glx rop="145" handcode="true"/>
-    </function>
-
-    <function name="Map2f" offset="223" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="u1" type="GLfloat"/>
-        <param name="u2" type="GLfloat"/>
-        <param name="ustride" type="GLint" client_only="true"/>
-        <param name="uorder" type="GLint"/>
-        <param name="v1" type="GLfloat"/>
-        <param name="v2" type="GLfloat"/>
-        <param name="vstride" type="GLint" client_only="true"/>
-        <param name="vorder" type="GLint"/>
-        <param name="points" type="const GLfloat *" variable_param="uorder"/>
-        <glx rop="146" handcode="true"/>
-    </function>
-
-    <function name="MapGrid1d" offset="224" static_dispatch="false">
-        <param name="un" type="GLint"/>
-        <param name="u1" type="GLdouble"/>
-        <param name="u2" type="GLdouble"/>
-        <glx rop="147"/>
-    </function>
-
-    <function name="MapGrid1f" offset="225" static_dispatch="false">
-        <param name="un" type="GLint"/>
-        <param name="u1" type="GLfloat"/>
-        <param name="u2" type="GLfloat"/>
-        <glx rop="148"/>
-    </function>
-
-    <function name="MapGrid2d" offset="226" static_dispatch="false">
-        <param name="un" type="GLint"/>
-        <param name="u1" type="GLdouble"/>
-        <param name="u2" type="GLdouble"/>
-        <param name="vn" type="GLint"/>
-        <param name="v1" type="GLdouble"/>
-        <param name="v2" type="GLdouble"/>
-        <glx rop="149"/>
-    </function>
-
-    <function name="MapGrid2f" offset="227" static_dispatch="false">
-        <param name="un" type="GLint"/>
-        <param name="u1" type="GLfloat"/>
-        <param name="u2" type="GLfloat"/>
-        <param name="vn" type="GLint"/>
-        <param name="v1" type="GLfloat"/>
-        <param name="v2" type="GLfloat"/>
-        <glx rop="150"/>
-    </function>
-
-    <function name="EvalCoord1d" offset="228" vectorequiv="EvalCoord1dv" static_dispatch="false">
-        <param name="u" type="GLdouble"/>
-    </function>
-
-    <function name="EvalCoord1dv" offset="229" static_dispatch="false">
-        <param name="u" type="const GLdouble *" count="1"/>
-        <glx rop="151"/>
-    </function>
-
-    <function name="EvalCoord1f" offset="230" vectorequiv="EvalCoord1fv" static_dispatch="false">
-        <param name="u" type="GLfloat"/>
-    </function>
-
-    <function name="EvalCoord1fv" offset="231" static_dispatch="false">
-        <param name="u" type="const GLfloat *" count="1"/>
-        <glx rop="152"/>
-    </function>
-
-    <function name="EvalCoord2d" offset="232" vectorequiv="EvalCoord2dv" static_dispatch="false">
-        <param name="u" type="GLdouble"/>
-        <param name="v" type="GLdouble"/>
-    </function>
-
-    <function name="EvalCoord2dv" offset="233" static_dispatch="false">
-        <param name="u" type="const GLdouble *" count="2"/>
-        <glx rop="153"/>
-    </function>
-
-    <function name="EvalCoord2f" offset="234" vectorequiv="EvalCoord2fv" static_dispatch="false">
-        <param name="u" type="GLfloat"/>
-        <param name="v" type="GLfloat"/>
-    </function>
-
-    <function name="EvalCoord2fv" offset="235" static_dispatch="false">
-        <param name="u" type="const GLfloat *" count="2"/>
-        <glx rop="154"/>
-    </function>
-
-    <function name="EvalMesh1" offset="236" static_dispatch="false">
-        <param name="mode" type="GLenum"/>
-        <param name="i1" type="GLint"/>
-        <param name="i2" type="GLint"/>
-        <glx rop="155"/>
-    </function>
-
-    <function name="EvalPoint1" offset="237" static_dispatch="false">
-        <param name="i" type="GLint"/>
-        <glx rop="156"/>
-    </function>
-
-    <function name="EvalMesh2" offset="238" static_dispatch="false">
-        <param name="mode" type="GLenum"/>
-        <param name="i1" type="GLint"/>
-        <param name="i2" type="GLint"/>
-        <param name="j1" type="GLint"/>
-        <param name="j2" type="GLint"/>
-        <glx rop="157"/>
-    </function>
-
-    <function name="EvalPoint2" offset="239" static_dispatch="false">
-        <param name="i" type="GLint"/>
-        <param name="j" type="GLint"/>
-        <glx rop="158"/>
-    </function>
-
-    <!--function name="AlphaFunc" offset="240" static_dispatch="false">
-        <param name="func" type="GLenum"/>
-        <param name="ref" type="GLclampf"/>
-        <glx rop="159"/>
-    </function>
-
-    <function name="BlendFunc" offset="241" static_dispatch="false">
-        <param name="sfactor" type="GLenum"/>
-        <param name="dfactor" type="GLenum"/>
-        <glx rop="160"/>
-    </function>
-
-    <function name="LogicOp" offset="242" static_dispatch="false">
-        <param name="opcode" type="GLenum"/>
-        <glx rop="161"/>
-    </function>
-
-    <function name="StencilFunc" offset="243" static_dispatch="false">
-        <param name="func" type="GLenum"/>
-        <param name="ref" type="GLint"/>
-        <param name="mask" type="GLuint"/>
-        <glx rop="162"/>
-    </function>
-
-    <function name="StencilOp" offset="244" static_dispatch="false">
-        <param name="fail" type="GLenum"/>
-        <param name="zfail" type="GLenum"/>
-        <param name="zpass" type="GLenum"/>
-        <glx rop="163"/>
-    </function>
-
-    <function name="DepthFunc" offset="245" static_dispatch="false">
-        <param name="func" type="GLenum"/>
-        <glx rop="164"/>
-    </function-->
-
-    <function name="PixelZoom" offset="246" static_dispatch="false">
-        <param name="xfactor" type="GLfloat"/>
-        <param name="yfactor" type="GLfloat"/>
-        <glx rop="165"/>
-    </function>
-
-    <function name="PixelTransferf" offset="247" static_dispatch="false">
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfloat"/>
-        <glx rop="166"/>
-    </function>
-
-    <function name="PixelTransferi" offset="248" static_dispatch="false">
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLint"/>
-        <glx rop="167"/>
-    </function>
-
-    <function name="PixelStoref" offset="249" static_dispatch="false">
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfloat"/>
-        <glx sop="109" handcode="client"/>
-    </function>
-
-    <!--function name="PixelStorei" offset="250" static_dispatch="false">
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLint"/>
-        <glx sop="110" handcode="client"/>
-    </function-->
-
-    <function name="PixelMapfv" offset="251" static_dispatch="false">
-        <param name="map" type="GLenum"/>
-        <param name="mapsize" type="GLsizei" counter="true"/>
-        <param name="values" type="const GLfloat *" count="mapsize"/>
-        <glx rop="168" large="true"/>
-    </function>
-
-    <function name="PixelMapuiv" offset="252" static_dispatch="false">
-        <param name="map" type="GLenum"/>
-        <param name="mapsize" type="GLsizei" counter="true"/>
-        <param name="values" type="const GLuint *" count="mapsize"/>
-        <glx rop="169" large="true"/>
-    </function>
-
-    <function name="PixelMapusv" offset="253" static_dispatch="false">
-        <param name="map" type="GLenum"/>
-        <param name="mapsize" type="GLsizei" counter="true"/>
-        <param name="values" type="const GLushort *" count="mapsize"/>
-        <glx rop="170" large="true"/>
-    </function>
-
-    <function name="ReadBuffer" offset="254" static_dispatch="false">
-        <param name="mode" type="GLenum"/>
-        <glx rop="171"/>
-    </function>
-
-    <function name="CopyPixels" offset="255" static_dispatch="false">
-        <param name="x" type="GLint"/>
-        <param name="y" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <param name="type" type="GLenum"/>
-        <glx rop="172"/>
-    </function>
-
-    <!--function name="ReadPixels" offset="256" static_dispatch="false">
-        <param name="x" type="GLint"/>
-        <param name="y" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <param name="format" type="GLenum"/>
-        <param name="type" type="GLenum"/>
-        <param name="pixels" type="GLvoid *" output="true"  img_width="width" img_height="height" img_format="format" img_type="type" img_target="0"/>
-        <glx sop="111"/>
-    </function-->
-
-    <function name="DrawPixels" offset="257" static_dispatch="false">
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <param name="format" type="GLenum"/>
-        <param name="type" type="GLenum"/>
-        <param name="pixels" type="const GLvoid *" img_width="width" img_height="height" img_format="format" img_type="type" img_target="0" img_pad_dimensions="false"/>
-        <glx rop="173" large="true"/>
-    </function>
-
-    <!--function name="GetBooleanv" offset="258" static_dispatch="false">
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLboolean *" output="true" variable_param="pname"/>
-        <glx sop="112" handcode="client"/>
-    </function-->
-
-    <function name="GetClipPlane" offset="259" static_dispatch="false">
-        <param name="plane" type="GLenum"/>
-        <param name="equation" type="GLdouble *" output="true" count="4"/>
-        <glx sop="113" always_array="true"/>
-    </function>
-
-    <function name="GetDoublev" offset="260" static_dispatch="false">
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLdouble *" output="true" variable_param="pname"/>
-        <glx sop="114" handcode="client"/>
-    </function>
-
-    <!--function name="GetError" offset="261" static_dispatch="false">
-        <return type="GLenum"/>
-        <glx sop="115" handcode="client"/>
-    </function>
-
-    <function name="GetFloatv" offset="262" static_dispatch="false">
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfloat *" output="true" variable_param="pname"/>
-        <glx sop="116" handcode="client"/>
-    </function>
-
-    <function name="GetIntegerv" offset="263" static_dispatch="false">
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLint *" output="true" variable_param="pname"/>
-        <glx sop="117" handcode="client"/>
-    </function>
-
-    <function name="GetLightfv" offset="264" static_dispatch="false">
-        <param name="light" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfloat *" output="true" variable_param="pname"/>
-        <glx sop="118"/>
-    </function-->
-
-    <function name="GetLightiv" offset="265" static_dispatch="false">
-        <param name="light" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLint *" output="true" variable_param="pname"/>
-        <glx sop="119"/>
-    </function>
-
-    <function name="GetMapdv" offset="266" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="query" type="GLenum"/>
-        <param name="v" type="GLdouble *" output="true" variable_param="target query"/>
-        <glx sop="120"/>
-    </function>
-
-    <function name="GetMapfv" offset="267" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="query" type="GLenum"/>
-        <param name="v" type="GLfloat *" output="true" variable_param="target query"/>
-        <glx sop="121"/>
-    </function>
-
-    <function name="GetMapiv" offset="268" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="query" type="GLenum"/>
-        <param name="v" type="GLint *" output="true" variable_param="target query"/>
-        <glx sop="122"/>
-    </function>
-
-    <!--function name="GetMaterialfv" offset="269" static_dispatch="false">
-        <param name="face" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfloat *" output="true" variable_param="pname"/>
-        <glx sop="123"/>
-    </function-->
-
-    <function name="GetMaterialiv" offset="270" static_dispatch="false">
-        <param name="face" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLint *" output="true" variable_param="pname"/>
-        <glx sop="124"/>
-    </function>
-
-    <function name="GetPixelMapfv" offset="271" static_dispatch="false">
-        <param name="map" type="GLenum"/>
-        <param name="values" type="GLfloat *" output="true" variable_param="map"/>
-        <glx sop="125"/>
-    </function>
-
-    <function name="GetPixelMapuiv" offset="272" static_dispatch="false">
-        <param name="map" type="GLenum"/>
-        <param name="values" type="GLuint *" output="true" variable_param="map"/>
-        <glx sop="126"/>
-    </function>
-
-    <function name="GetPixelMapusv" offset="273" static_dispatch="false">
-        <param name="map" type="GLenum"/>
-        <param name="values" type="GLushort *" output="true" variable_param="map"/>
-        <glx sop="127"/>
-    </function>
-
-    <function name="GetPolygonStipple" offset="274" static_dispatch="false">
-        <param name="mask" type="GLubyte *" output="true" img_width="32" img_height="32" img_format="GL_COLOR_INDEX" img_type="GL_BITMAP"/>
-        <glx sop="128"/>
-    </function>
-
-    <!--function name="GetString" offset="275" static_dispatch="false">
-        <param name="name" type="GLenum"/>
-        <return type="const GLubyte *"/>
-        <glx sop="129" handcode="true"/>
-    </function>
-
-    <function name="GetTexEnvfv" offset="276" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfloat *" output="true" variable_param="pname"/>
-        <glx sop="130"/>
-    </function>
-
-    <function name="GetTexEnviv" offset="277" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLint *" output="true" variable_param="pname"/>
-        <glx sop="131"/>
-    </function-->
-
-    <function name="GetTexGendv" offset="278" static_dispatch="false">
-        <param name="coord" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLdouble *" output="true" variable_param="pname"/>
-        <glx sop="132"/>
-    </function>
-
-    <!--function name="GetTexGenfv" offset="279" static_dispatch="false">
-        <param name="coord" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfloat *" output="true" variable_param="pname"/>
-        <glx sop="133"/>
-    </function>
-
-    <function name="GetTexGeniv" offset="280" static_dispatch="false">
-        <param name="coord" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLint *" output="true" variable_param="pname"/>
-        <glx sop="134"/>
-    </function-->
-
-    <function name="GetTexImage" offset="281" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="format" type="GLenum"/>
-        <param name="type" type="GLenum"/>
-        <param name="pixels" type="GLvoid *" output="true" img_width="width" img_height="height" img_depth="depth" img_format="format" img_type="type"/>
-        <glx sop="135" dimensions_in_reply="true"/>
-    </function>
-
-    <!--function name="GetTexParameterfv" offset="282" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfloat *" output="true" variable_param="pname"/>
-        <glx sop="136"/>
-    </function>
-
-    <function name="GetTexParameteriv" offset="283" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLint *" output="true" variable_param="pname"/>
-        <glx sop="137"/>
-    </function-->
-
-    <function name="GetTexLevelParameterfv" offset="284" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfloat *" output="true" variable_param="pname"/>
-        <glx sop="138"/>
-    </function>
-
-    <function name="GetTexLevelParameteriv" offset="285" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLint *" output="true" variable_param="pname"/>
-        <glx sop="139"/>
-    </function>
-
-    <!--function name="IsEnabled" offset="286" static_dispatch="false">
-        <param name="cap" type="GLenum"/>
-        <return type="GLboolean"/>
-        <glx sop="140" handcode="client"/>
-    </function-->
-
-    <function name="IsList" offset="287" static_dispatch="false">
-        <param name="list" type="GLuint"/>
-        <return type="GLboolean"/>
-        <glx sop="141"/>
-    </function>
-
-    <function name="DepthRange" offset="288" static_dispatch="false">
-        <param name="zNear" type="GLclampd"/>
-        <param name="zFar" type="GLclampd"/>
-        <glx rop="174"/>
-    </function>
-
-    <function name="Frustum" offset="289" static_dispatch="false">
-        <param name="left" type="GLdouble"/>
-        <param name="right" type="GLdouble"/>
-        <param name="bottom" type="GLdouble"/>
-        <param name="top" type="GLdouble"/>
-        <param name="zNear" type="GLdouble"/>
-        <param name="zFar" type="GLdouble"/>
-        <glx rop="175"/>
-    </function>
-
-    <!--function name="LoadIdentity" offset="290" static_dispatch="false">
-        <glx rop="176"/>
-    </function>
-
-    <function name="LoadMatrixf" offset="291" static_dispatch="false">
-        <param name="m" type="const GLfloat *" count="16"/>
-        <glx rop="177"/>
-    </function-->
-
-    <function name="LoadMatrixd" offset="292" static_dispatch="false">
-        <param name="m" type="const GLdouble *" count="16"/>
-        <glx rop="178"/>
-    </function>
-
-    <!--function name="MatrixMode" offset="293" static_dispatch="false">
-        <param name="mode" type="GLenum"/>
-        <glx rop="179"/>
-    </function>
-
-    <function name="MultMatrixf" offset="294" static_dispatch="false">
-        <param name="m" type="const GLfloat *" count="16"/>
-        <glx rop="180"/>
-    </function-->
-
-    <function name="MultMatrixd" offset="295" static_dispatch="false">
-        <param name="m" type="const GLdouble *" count="16"/>
-        <glx rop="181"/>
-    </function>
-
-    <function name="Ortho" offset="296" static_dispatch="false">
-        <param name="left" type="GLdouble"/>
-        <param name="right" type="GLdouble"/>
-        <param name="bottom" type="GLdouble"/>
-        <param name="top" type="GLdouble"/>
-        <param name="zNear" type="GLdouble"/>
-        <param name="zFar" type="GLdouble"/>
-        <glx rop="182"/>
-    </function>
-
-    <!--function name="PopMatrix" offset="297" static_dispatch="false">
-        <glx rop="183"/>
-    </function>
-
-    <function name="PushMatrix" offset="298" static_dispatch="false">
-        <glx rop="184"/>
-    </function-->
-
-    <function name="Rotated" offset="299" static_dispatch="false">
-        <param name="angle" type="GLdouble"/>
-        <param name="x" type="GLdouble"/>
-        <param name="y" type="GLdouble"/>
-        <param name="z" type="GLdouble"/>
-        <glx rop="185"/>
-    </function>
-
-    <!--function name="Rotatef" offset="300" static_dispatch="false">
-        <param name="angle" type="GLfloat"/>
-        <param name="x" type="GLfloat"/>
-        <param name="y" type="GLfloat"/>
-        <param name="z" type="GLfloat"/>
-        <glx rop="186"/>
-    </function-->
-
-    <function name="Scaled" offset="301" static_dispatch="false">
-        <param name="x" type="GLdouble"/>
-        <param name="y" type="GLdouble"/>
-        <param name="z" type="GLdouble"/>
-        <glx rop="187"/>
-    </function>
-
-    <!--function name="Scalef" offset="302" static_dispatch="false">
-        <param name="x" type="GLfloat"/>
-        <param name="y" type="GLfloat"/>
-        <param name="z" type="GLfloat"/>
-        <glx rop="188"/>
-    </function-->
-
-    <function name="Translated" offset="303" static_dispatch="false">
-        <param name="x" type="GLdouble"/>
-        <param name="y" type="GLdouble"/>
-        <param name="z" type="GLdouble"/>
-        <glx rop="189"/>
-    </function>
-
-    <!--function name="Translatef" offset="304" static_dispatch="false">
-        <param name="x" type="GLfloat"/>
-        <param name="y" type="GLfloat"/>
-        <param name="z" type="GLfloat"/>
-        <glx rop="190"/>
-    </function>
-
-    <function name="Viewport" offset="305" static_dispatch="false">
-        <param name="x" type="GLint"/>
-        <param name="y" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <glx rop="191"/>
-    </function-->
-</category>
-
-<category name="1.1">
-    <function name="ArrayElement" offset="306" static_dispatch="false">
-        <param name="i" type="GLint"/>
-        <glx handcode="true"/>
-    </function>
-
-    <!--function name="ColorPointer" offset="308" static_dispatch="false">
-        <param name="size" type="GLint"/>
-        <param name="type" type="GLenum"/>
-        <param name="stride" type="GLsizei"/>
-        <param name="pointer" type="const GLvoid *"/>
-        <glx handcode="true"/>
-    </function>
-
-    <function name="DisableClientState" offset="309" static_dispatch="false">
-        <param name="array" type="GLenum"/>
-        <glx handcode="true"/>
-    </function>
-
-    <function name="DrawArrays" offset="310" static_dispatch="false">
-        <param name="mode" type="GLenum"/>
-        <param name="first" type="GLint"/>
-        <param name="count" type="GLsizei"/>
-        <glx rop="193" handcode="true"/>
-    </function>
-
-    <function name="DrawElements" offset="311" static_dispatch="false">
-        <param name="mode" type="GLenum"/>
-        <param name="count" type="GLsizei"/>
-        <param name="type" type="GLenum"/>
-        <param name="indices" type="const GLvoid *"/>
-        <glx handcode="true"/>
-    </function-->
-
-    <function name="EdgeFlagPointer" offset="312" static_dispatch="false">
-        <param name="stride" type="GLsizei"/>
-        <param name="pointer" type="const GLvoid *"/>
-        <glx handcode="true"/>
-    </function>
-
-    <!--function name="EnableClientState" offset="313" static_dispatch="false">
-        <param name="array" type="GLenum"/>
-        <glx handcode="true"/>
-    </function>
-
-    <function name="GetPointerv" offset="329" static_dispatch="false">
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLvoid **" output="true"/>
-        <glx handcode="true"/>
-    </function-->
-
-    <function name="IndexPointer" offset="314" static_dispatch="false">
-        <param name="type" type="GLenum"/>
-        <param name="stride" type="GLsizei"/>
-        <param name="pointer" type="const GLvoid *"/>
-        <glx handcode="true"/>
-    </function>
-
-    <function name="InterleavedArrays" offset="317" static_dispatch="false">
-        <param name="format" type="GLenum"/>
-        <param name="stride" type="GLsizei"/>
-        <param name="pointer" type="const GLvoid *"/>
-        <glx handcode="true"/>
-    </function>
-
-    <!--function name="NormalPointer" offset="318" static_dispatch="false">
-        <param name="type" type="GLenum"/>
-        <param name="stride" type="GLsizei"/>
-        <param name="pointer" type="const GLvoid *"/>
-        <glx handcode="true"/>
-    </function>
-
-    <function name="TexCoordPointer" offset="320" static_dispatch="false">
-        <param name="size" type="GLint"/>
-        <param name="type" type="GLenum"/>
-        <param name="stride" type="GLsizei"/>
-        <param name="pointer" type="const GLvoid *"/>
-        <glx handcode="true"/>
-    </function>
-
-    <function name="VertexPointer" offset="321" static_dispatch="false">
-        <param name="size" type="GLint"/>
-        <param name="type" type="GLenum"/>
-        <param name="stride" type="GLsizei"/>
-        <param name="pointer" type="const GLvoid *"/>
-        <glx handcode="true"/>
-    </function>
-
-    <function name="PolygonOffset" offset="319" static_dispatch="false">
-        <param name="factor" type="GLfloat"/>
-        <param name="units" type="GLfloat"/>
-        <glx rop="192"/>
-    </function-->
-
-    <function name="CopyTexImage1D" offset="323" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="internalformat" type="GLenum"/>
-        <param name="x" type="GLint"/>
-        <param name="y" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="border" type="GLint"/>
-        <glx rop="4119"/>
-    </function>
-
-    <!--function name="CopyTexImage2D" offset="324" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="internalformat" type="GLenum"/>
-        <param name="x" type="GLint"/>
-        <param name="y" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <param name="border" type="GLint"/>
-        <glx rop="4120"/>
-    </function-->
-
-    <function name="CopyTexSubImage1D" offset="325" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="xoffset" type="GLint"/>
-        <param name="x" type="GLint"/>
-        <param name="y" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <glx rop="4121"/>
-    </function>
-
-    <!--function name="CopyTexSubImage2D" offset="326" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="xoffset" type="GLint"/>
-        <param name="yoffset" type="GLint"/>
-        <param name="x" type="GLint"/>
-        <param name="y" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <glx rop="4122"/>
-    </function-->
-
-    <function name="TexSubImage1D" offset="332" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="xoffset" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="format" type="GLenum"/>
-        <param name="type" type="GLenum"/>
-        <param name="UNUSED" type="GLuint" padding="true"/>
-        <param name="pixels" type="const GLvoid *" img_width="width" img_xoff="xoffset" img_format="format" img_type="type" img_target="target" img_pad_dimensions="true"/>
-        <glx rop="4099" large="true"/>
-    </function>
-
-    <!--function name="TexSubImage2D" offset="333" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="xoffset" type="GLint"/>
-        <param name="yoffset" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <param name="format" type="GLenum"/>
-        <param name="type" type="GLenum"/>
-        <param name="UNUSED" type="GLuint" padding="true"/>
-        <param name="pixels" type="const GLvoid *" img_width="width" img_height="height" img_xoff="xoffset" img_yoff="yoffset" img_format="format" img_type="type" img_target="target" img_pad_dimensions="true"/>
-        <glx rop="4100" large="true"/>
-    </function-->
-
-    <function name="AreTexturesResident" offset="322" static_dispatch="false">
-        <param name="n" type="GLsizei" counter="true"/>
-        <param name="textures" type="const GLuint *" count="n"/>
-        <param name="residences" type="GLboolean *" output="true" count="n"/>
-        <return type="GLboolean"/>
-        <glx sop="143" handcode="client" always_array="true"/>
-    </function>
-
-    <!--function name="BindTexture" offset="307" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="texture" type="GLuint"/>
-        <glx rop="4117"/>
-    </function>
-
-    <function name="DeleteTextures" offset="327" static_dispatch="false">
-        <param name="n" type="GLsizei" counter="true"/>
-        <param name="textures" type="const GLuint *" count="n"/>
-        <glx sop="144"/>
-    </function>
-
-    <function name="GenTextures" offset="328" static_dispatch="false">
-        <param name="n" type="GLsizei" counter="true"/>
-        <param name="textures" type="GLuint *" output="true" count="n"/>
-        <glx sop="145" always_array="true"/>
-    </function>
-
-    <function name="IsTexture" offset="330" static_dispatch="false">
-        <param name="texture" type="GLuint"/>
-        <return type="GLboolean"/>
-        <glx sop="146"/>
-    </function-->
-
-    <function name="PrioritizeTextures" offset="331" static_dispatch="false">
-        <param name="n" type="GLsizei" counter="true"/>
-        <param name="textures" type="const GLuint *" count="n"/>
-        <param name="priorities" type="const GLclampf *" count="n"/>
-        <glx rop="4118"/>
-    </function>
-
-    <function name="Indexub" offset="315" vectorequiv="Indexubv" static_dispatch="false">
-        <param name="c" type="GLubyte"/>
-    </function>
-
-    <function name="Indexubv" offset="316" static_dispatch="false">
-        <param name="c" type="const GLubyte *" count="1"/>
-        <glx rop="194"/>
-    </function>
-
-    <function name="PopClientAttrib" offset="334" static_dispatch="false">
-        <glx handcode="true"/>
-    </function>
-
-    <function name="PushClientAttrib" offset="335" static_dispatch="false">
-        <param name="mask" type="GLbitfield"/>
-        <glx handcode="true"/>
-    </function>
-</category>
-
-<category name="1.2">
-    <!--function name="BlendColor" offset="336" static_dispatch="false">
-        <param name="red" type="GLclampf"/>
-        <param name="green" type="GLclampf"/>
-        <param name="blue" type="GLclampf"/>
-        <param name="alpha" type="GLclampf"/>
-        <glx rop="4096"/>
-    </function>
-
-    <function name="BlendEquation" offset="337" static_dispatch="false">
-        <param name="mode" type="GLenum"/>
-        <glx rop="4097"/>
-    </function-->
-
-    <function name="DrawRangeElements" offset="338" static_dispatch="false">
-        <param name="mode" type="GLenum"/>
-        <param name="start" type="GLuint"/>
-        <param name="end" type="GLuint"/>
-        <param name="count" type="GLsizei"/>
-        <param name="type" type="GLenum"/>
-        <param name="indices" type="const GLvoid *"/>
-        <glx handcode="true"/>
-    </function>
-
-    <function name="ColorTable" offset="339" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="internalformat" type="GLenum"/>
-        <param name="width" type="GLsizei"/>
-        <param name="format" type="GLenum"/>
-        <param name="type" type="GLenum"/>
-        <param name="table" type="const GLvoid *" img_width="width" img_pad_dimensions="false" img_format="format" img_type="type" img_target="target"/>
-        <glx rop="2053" large="true"/>
-    </function>
-
-    <function name="ColorTableParameterfv" offset="340" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfloat *" variable_param="pname"/>
-        <glx rop="2054"/>
-    </function>
-
-    <function name="ColorTableParameteriv" offset="341" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLint *" variable_param="pname"/>
-        <glx rop="2055"/>
-    </function>
-
-    <function name="CopyColorTable" offset="342" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="internalformat" type="GLenum"/>
-        <param name="x" type="GLint"/>
-        <param name="y" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <glx rop="2056"/>
-    </function>
-
-    <function name="GetColorTable" offset="343" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="format" type="GLenum"/>
-        <param name="type" type="GLenum"/>
-        <param name="table" type="GLvoid *" output="true" img_width="width" img_format="format" img_type="type"/>
-        <glx sop="147" dimensions_in_reply="true"/>
-    </function>
-
-    <function name="GetColorTableParameterfv" offset="344" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfloat *" output="true" variable_param="pname"/>
-        <glx sop="148"/>
-    </function>
-
-    <function name="GetColorTableParameteriv" offset="345" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLint *" output="true" variable_param="pname"/>
-        <glx sop="149"/>
-    </function>
-
-    <function name="ColorSubTable" offset="346" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="start" type="GLsizei"/>
-        <param name="count" type="GLsizei"/>
-        <param name="format" type="GLenum"/>
-        <param name="type" type="GLenum"/>
-        <param name="data" type="const GLvoid *" img_width="count" img_pad_dimensions="false" img_format="format" img_type="type" img_target="target"/>
-        <glx rop="195" large="true"/>
-    </function>
-
-    <function name="CopyColorSubTable" offset="347" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="start" type="GLsizei"/>
-        <param name="x" type="GLint"/>
-        <param name="y" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <glx rop="196"/>
-    </function>
-
-    <function name="ConvolutionFilter1D" offset="348" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="internalformat" type="GLenum"/>
-        <param name="width" type="GLsizei"/>
-        <param name="format" type="GLenum"/>
-        <param name="type" type="GLenum"/>
-        <param name="image" type="const GLvoid *" img_width="width" img_format="format" img_type="type" img_target="target" img_pad_dimensions="true"/>
-        <glx rop="4101" large="true"/>
-    </function>
-
-    <function name="ConvolutionFilter2D" offset="349" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="internalformat" type="GLenum"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <param name="format" type="GLenum"/>
-        <param name="type" type="GLenum"/>
-        <param name="image" type="const GLvoid *" img_width="width" img_height="height" img_format="format" img_type="type" img_target="target" img_pad_dimensions="true"/>
-        <glx rop="4102" large="true"/>
-    </function>
-
-    <function name="ConvolutionParameterf" offset="350" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfloat"/>
-        <glx rop="4103"/>
-    </function>
-
-    <function name="ConvolutionParameterfv" offset="351" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfloat *" variable_param="pname"/>
-        <glx rop="4104"/>
-    </function>
-
-    <function name="ConvolutionParameteri" offset="352" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLint"/>
-        <glx rop="4105"/>
-    </function>
-
-    <function name="ConvolutionParameteriv" offset="353" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLint *" variable_param="pname"/>
-        <glx rop="4106"/>
-    </function>
-
-    <function name="CopyConvolutionFilter1D" offset="354" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="internalformat" type="GLenum"/>
-        <param name="x" type="GLint"/>
-        <param name="y" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <glx rop="4107"/>
-    </function>
-
-    <function name="CopyConvolutionFilter2D" offset="355" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="internalformat" type="GLenum"/>
-        <param name="x" type="GLint"/>
-        <param name="y" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <glx rop="4108"/>
-    </function>
-
-    <function name="GetConvolutionFilter" offset="356" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="format" type="GLenum"/>
-        <param name="type" type="GLenum"/>
-        <param name="image" type="GLvoid *" output="true" img_width="width" img_height="height" img_format="format" img_type="type"/>
-        <glx sop="150" dimensions_in_reply="true"/>
-    </function>
-
-    <function name="GetConvolutionParameterfv" offset="357" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfloat *" output="true" variable_param="pname"/>
-        <glx sop="151"/>
-    </function>
-
-    <function name="GetConvolutionParameteriv" offset="358" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLint *" output="true" variable_param="pname"/>
-        <glx sop="152"/>
-    </function>
-
-    <function name="GetSeparableFilter" offset="359" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="format" type="GLenum"/>
-        <param name="type" type="GLenum"/>
-        <param name="row" type="GLvoid *" output="true"/>
-        <param name="column" type="GLvoid *" output="true"/>
-        <param name="span" type="GLvoid *" output="true"/>
-        <glx sop="153" handcode="true"/>
-    </function>
-
-    <function name="SeparableFilter2D" offset="360" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="internalformat" type="GLenum"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <param name="format" type="GLenum"/>
-        <param name="type" type="GLenum"/>
-        <param name="row" type="const GLvoid *"/>
-        <param name="column" type="const GLvoid *"/>
-        <glx rop="4109" handcode="true"/>
-    </function>
-
-    <function name="GetHistogram" offset="361" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="reset" type="GLboolean"/>
-        <param name="format" type="GLenum"/>
-        <param name="type" type="GLenum"/>
-        <param name="values" type="GLvoid *" output="true" img_width="width" img_format="format" img_type="type"/>
-        <glx sop="154" dimensions_in_reply="true" img_reset="reset"/>
-    </function>
-
-    <function name="GetHistogramParameterfv" offset="362" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfloat *" output="true" variable_param="pname"/>
-        <glx sop="155"/>
-    </function>
-
-    <function name="GetHistogramParameteriv" offset="363" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLint *" output="true" variable_param="pname"/>
-        <glx sop="156"/>
-    </function>
-
-    <function name="GetMinmax" offset="364" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="reset" type="GLboolean"/>
-        <param name="format" type="GLenum"/>
-        <param name="type" type="GLenum"/>
-        <param name="values" type="GLvoid *" output="true" img_width="2" img_format="format" img_type="type"/>
-        <glx sop="157" img_reset="reset"/>
-    </function>
-
-    <function name="GetMinmaxParameterfv" offset="365" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfloat *" output="true" variable_param="pname"/>
-        <glx sop="158"/>
-    </function>
-
-    <function name="GetMinmaxParameteriv" offset="366" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLint *" output="true" variable_param="pname"/>
-        <glx sop="159"/>
-    </function>
-
-    <function name="Histogram" offset="367" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="width" type="GLsizei"/>
-        <param name="internalformat" type="GLenum"/>
-        <param name="sink" type="GLboolean"/>
-        <glx rop="4110"/>
-    </function>
-
-    <function name="Minmax" offset="368" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="internalformat" type="GLenum"/>
-        <param name="sink" type="GLboolean"/>
-        <glx rop="4111"/>
-    </function>
-
-    <function name="ResetHistogram" offset="369" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <glx rop="4112"/>
-    </function>
-
-    <function name="ResetMinmax" offset="370" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <glx rop="4113"/>
-    </function>
-
-    <!--function name="TexImage3D" offset="371" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="internalformat" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <param name="depth" type="GLsizei"/>
-        <param name="border" type="GLint"/>
-        <param name="format" type="GLenum"/>
-        <param name="type" type="GLenum"/>
-        <param name="pixels" type="const GLvoid *" img_width="width" img_height="height" img_depth="depth" img_format="format" img_type="type" img_target="target" img_null_flag="true" img_pad_dimensions="true"/>
-        <glx rop="4114" large="true"/>
-    </function>
-
-    <function name="TexSubImage3D" offset="372" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="xoffset" type="GLint"/>
-        <param name="yoffset" type="GLint"/>
-        <param name="zoffset" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <param name="depth" type="GLsizei"/>
-        <param name="format" type="GLenum"/>
-        <param name="type" type="GLenum"/>
-        <param name="UNUSED" type="GLuint" padding="true"/>
-        <param name="pixels" type="const GLvoid *" img_width="width" img_height="height" img_depth="depth" img_xoff="xoffset" img_yoff="yoffset" img_zoff="zoffset" img_format="format" img_type="type" img_target="target" img_pad_dimensions="true"/>
-        <glx rop="4115" large="true"/>
-    </function>
-
-    <function name="CopyTexSubImage3D" offset="373" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="xoffset" type="GLint"/>
-        <param name="yoffset" type="GLint"/>
-        <param name="zoffset" type="GLint"/>
-        <param name="x" type="GLint"/>
-        <param name="y" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <glx rop="4123"/>
-    </function-->
-</category>
-
-<category name="GL_ARB_multitexture" number="1">
-    <!--function name="ActiveTextureARB" offset="374" static_dispatch="false">
-        <param name="texture" type="GLenum"/>
-        <glx rop="197"/>
-    </function>
-
-    <function name="ClientActiveTextureARB" offset="375" static_dispatch="false">
-        <param name="texture" type="GLenum"/>
-        <glx handcode="true"/>
-    </function-->
-
-    <function name="MultiTexCoord1dARB" offset="376" vectorequiv="MultiTexCoord1dvARB" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="s" type="GLdouble"/>
-    </function>
-
-    <function name="MultiTexCoord1dvARB" offset="377" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="v" type="const GLdouble *" count="1"/>
-        <glx rop="198"/>
-    </function>
-
-    <function name="MultiTexCoord1fARB" offset="378" vectorequiv="MultiTexCoord1fvARB" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="s" type="GLfloat"/>
-    </function>
-
-    <function name="MultiTexCoord1fvARB" offset="379" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="v" type="const GLfloat *" count="1"/>
-        <glx rop="199"/>
-    </function>
-
-    <function name="MultiTexCoord1iARB" offset="380" vectorequiv="MultiTexCoord1ivARB" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="s" type="GLint"/>
-    </function>
-
-    <function name="MultiTexCoord1ivARB" offset="381" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="v" type="const GLint *" count="1"/>
-        <glx rop="200"/>
-    </function>
-
-    <function name="MultiTexCoord1sARB" offset="382" vectorequiv="MultiTexCoord1svARB" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="s" type="GLshort"/>
-    </function>
-
-    <function name="MultiTexCoord1svARB" offset="383" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="v" type="const GLshort *" count="1"/>
-        <glx rop="201"/>
-    </function>
-
-    <function name="MultiTexCoord2dARB" offset="384" vectorequiv="MultiTexCoord2dvARB" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="s" type="GLdouble"/>
-        <param name="t" type="GLdouble"/>
-    </function>
-
-    <function name="MultiTexCoord2dvARB" offset="385" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="v" type="const GLdouble *" count="2"/>
-        <glx rop="202"/>
-    </function>
-
-    <function name="MultiTexCoord2fARB" offset="386" vectorequiv="MultiTexCoord2fvARB" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="s" type="GLfloat"/>
-        <param name="t" type="GLfloat"/>
-    </function>
-
-    <function name="MultiTexCoord2fvARB" offset="387" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="v" type="const GLfloat *" count="2"/>
-        <glx rop="203"/>
-    </function>
-
-    <function name="MultiTexCoord2iARB" offset="388" vectorequiv="MultiTexCoord2ivARB" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="s" type="GLint"/>
-        <param name="t" type="GLint"/>
-    </function>
-
-    <function name="MultiTexCoord2ivARB" offset="389" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="v" type="const GLint *" count="2"/>
-        <glx rop="204"/>
-    </function>
-
-    <function name="MultiTexCoord2sARB" offset="390" vectorequiv="MultiTexCoord2svARB" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="s" type="GLshort"/>
-        <param name="t" type="GLshort"/>
-    </function>
-
-    <function name="MultiTexCoord2svARB" offset="391" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="v" type="const GLshort *" count="2"/>
-        <glx rop="205"/>
-    </function>
-
-    <function name="MultiTexCoord3dARB" offset="392" vectorequiv="MultiTexCoord3dvARB" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="s" type="GLdouble"/>
-        <param name="t" type="GLdouble"/>
-        <param name="r" type="GLdouble"/>
-    </function>
-
-    <function name="MultiTexCoord3dvARB" offset="393" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="v" type="const GLdouble *" count="3"/>
-        <glx rop="206"/>
-    </function>
-
-    <function name="MultiTexCoord3fARB" offset="394" vectorequiv="MultiTexCoord3fvARB" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="s" type="GLfloat"/>
-        <param name="t" type="GLfloat"/>
-        <param name="r" type="GLfloat"/>
-    </function>
-
-    <function name="MultiTexCoord3fvARB" offset="395" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="v" type="const GLfloat *" count="3"/>
-        <glx rop="207"/>
-    </function>
-
-    <function name="MultiTexCoord3iARB" offset="396" vectorequiv="MultiTexCoord3ivARB" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="s" type="GLint"/>
-        <param name="t" type="GLint"/>
-        <param name="r" type="GLint"/>
-    </function>
-
-    <function name="MultiTexCoord3ivARB" offset="397" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="v" type="const GLint *" count="3"/>
-        <glx rop="208"/>
-    </function>
-
-    <function name="MultiTexCoord3sARB" offset="398" vectorequiv="MultiTexCoord3svARB" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="s" type="GLshort"/>
-        <param name="t" type="GLshort"/>
-        <param name="r" type="GLshort"/>
-    </function>
-
-    <function name="MultiTexCoord3svARB" offset="399" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="v" type="const GLshort *" count="3"/>
-        <glx rop="209"/>
-    </function>
-
-    <function name="MultiTexCoord4dARB" offset="400" vectorequiv="MultiTexCoord4dvARB" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="s" type="GLdouble"/>
-        <param name="t" type="GLdouble"/>
-        <param name="r" type="GLdouble"/>
-        <param name="q" type="GLdouble"/>
-    </function>
-
-    <function name="MultiTexCoord4dvARB" offset="401" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="v" type="const GLdouble *" count="4"/>
-        <glx rop="210"/>
-    </function>
-
-    <!--function name="MultiTexCoord4fARB" offset="402" vectorequiv="MultiTexCoord4fvARB" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="s" type="GLfloat"/>
-        <param name="t" type="GLfloat"/>
-        <param name="r" type="GLfloat"/>
-        <param name="q" type="GLfloat"/>
-    </function-->
-
-    <function name="MultiTexCoord4fvARB" offset="403" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="v" type="const GLfloat *" count="4"/>
-        <glx rop="211"/>
-    </function>
-
-    <function name="MultiTexCoord4iARB" offset="404" vectorequiv="MultiTexCoord4ivARB" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="s" type="GLint"/>
-        <param name="t" type="GLint"/>
-        <param name="r" type="GLint"/>
-        <param name="q" type="GLint"/>
-    </function>
-
-    <function name="MultiTexCoord4ivARB" offset="405" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="v" type="const GLint *" count="4"/>
-        <glx rop="212"/>
-    </function>
-
-    <function name="MultiTexCoord4sARB" offset="406" vectorequiv="MultiTexCoord4svARB" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="s" type="GLshort"/>
-        <param name="t" type="GLshort"/>
-        <param name="r" type="GLshort"/>
-        <param name="q" type="GLshort"/>
-    </function>
-
-    <function name="MultiTexCoord4svARB" offset="407" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="v" type="const GLshort *" count="4"/>
-        <glx rop="213"/>
-    </function>
-</category>
-
-<xi:include href="../gen/APPLE_vertex_array_object.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
-
-</OpenGLAPI>
diff --git a/src/mapi/glapi/gen-es/es_EXT.xml b/src/mapi/glapi/gen-es/es_EXT.xml
deleted file mode 100644
index 0013df8..0000000
--- a/src/mapi/glapi/gen-es/es_EXT.xml
+++ /dev/null
@@ -1,125 +0,0 @@
-<?xml version="1.0"?>
-<!DOCTYPE OpenGLAPI SYSTEM "../gen/gl_API.dtd">
-
-<!-- OpenGL ES extensions -->
-
-<OpenGLAPI>
-
-<category name="GL_OES_compressed_paletted_texture" number="6">
-    <enum name="PALETTE4_RGB8_OES"                        value="0x8B90"/>
-    <enum name="PALETTE4_RGBA8_OES"                       value="0x8B91"/>
-    <enum name="PALETTE4_R5_G6_B5_OES"                    value="0x8B92"/>
-    <enum name="PALETTE4_RGBA4_OES"                       value="0x8B93"/>
-    <enum name="PALETTE4_RGB5_A1_OES"                     value="0x8B94"/>
-    <enum name="PALETTE8_RGB8_OES"                        value="0x8B95"/>
-    <enum name="PALETTE8_RGBA8_OES"                       value="0x8B96"/>
-    <enum name="PALETTE8_R5_G6_B5_OES"                    value="0x8B97"/>
-    <enum name="PALETTE8_RGBA4_OES"                       value="0x8B98"/>
-    <enum name="PALETTE8_RGB5_A1_OES"                     value="0x8B99"/>
-</category>
-
-<!-- 23. GL_OES_EGL_image -->
-<xi:include href="../gen/OES_EGL_image.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
-
-<category name="GL_OES_depth24" number="24">
-    <enum name="DEPTH_COMPONENT24_OES"                    value="0x81A6"/>
-</category>
-
-<category name="GL_OES_depth32" number="25">
-    <enum name="DEPTH_COMPONENT32_OES"                    value="0x81A7"/>
-</category>
-
-<category name="GL_OES_element_index_uint" number="26">
-    <!-- No new functions, types, enums. -->
-</category>
-
-<category name="GL_OES_fbo_render_mipmap" number="27">
-    <!-- No new functions, types, enums. -->
-</category>
-
-<category name="GL_OES_mapbuffer" number="29">
-    <enum name="WRITE_ONLY_OES"                           value="0x88B9"/>
-    <enum name="BUFFER_ACCESS_OES"                        value="0x88BB"/>
-    <enum name="BUFFER_MAPPED_OES"                        value="0x88BC"/>
-    <enum name="BUFFER_MAP_POINTER_OES"                   value="0x88BD"/>
-
-    <function name="GetBufferPointervOES" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLvoid **"/>
-    </function>
-
-    <function name="MapBufferOES" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="access" type="GLenum"/>
-	<return type="GLvoid *"/>
-    </function>
-
-    <function name="UnmapBufferOES" offset="assign">
-        <param name="target" type="GLenum"/>
-	<return type="GLboolean"/>
-    </function>
-</category>
-
-<category name="GL_OES_rgb8_rgba8" number="30">
-    <enum name="RGB8_OES"                                 value="0x8051"/>
-    <enum name="RGBA8_OES"                                value="0x8058"/>
-</category>
-
-<category name="GL_OES_stencil1" number="31">
-    <enum name="STENCIL_INDEX1_OES"                       value="0x8D46"/>
-</category>
-
-<category name="GL_OES_stencil4" number="32">
-    <enum name="STENCIL_INDEX4_OES"                       value="0x8D47"/>
-</category>
-
-<category name="GL_OES_stencil8" number="33">
-    <enum name="STENCIL_INDEX8_OES"                       value="0x8D48"/>
-</category>
-
-<category name="GL_EXT_texture_filter_anisotropic" number="41">
-    <enum name="TEXTURE_MAX_ANISOTROPY_EXT"               value="0x84FE"/>
-    <enum name="MAX_TEXTURE_MAX_ANISOTROPY_EXT"           value="0x84FF"/>
-</category>
-
-<category name="GL_EXT_texture_compression_dxt1" number="49">
-    <enum name="COMPRESSED_RGB_S3TC_DXT1_EXT"             value="0x83F0"/>
-    <enum name="COMPRESSED_RGBA_S3TC_DXT1_EXT"            value="0x83F1"/>
-</category>
-
-<category name="GL_EXT_texture_format_BGRA8888" number="51">
-    <enum name="BGRA_EXT"                              value="0x80E1"/>
-</category>
-
-<category name="GL_EXT_blend_minmax" number="65">
-    <enum name="MIN_EXT"                               value="0x8007"/>
-    <enum name="MAX_EXT"                               value="0x8008"/>
-</category>
-
-<category name="GL_EXT_read_format_bgra" number="66">
-    <enum name="BGRA_EXT"                              value="0x80E1"/>
-    <enum name="UNSIGNED_SHORT_4_4_4_4_REV_EXT"        value="0x8365"/>
-    <enum name="UNSIGNED_SHORT_1_5_5_5_REV_EXT"        value="0x8366"/>
-</category>
-
-<category name="GL_EXT_multi_draw_arrays" number="69">
-    <function name="MultiDrawArraysEXT" offset="assign">
-        <param name="mode" type="GLenum"/>
-        <param name="first" type="const GLint *"/>
-        <param name="count" type="const GLsizei *"/>
-        <param name="primcount" type="GLsizei"/>
-        <glx handcode="true"/>
-    </function>
-
-    <function name="MultiDrawElementsEXT" offset="assign">
-        <param name="mode" type="GLenum"/>
-        <param name="count" type="const GLsizei *"/>
-        <param name="type" type="GLenum"/>
-        <param name="indices" type="const GLvoid **"/>
-        <param name="primcount" type="GLsizei"/>
-        <glx handcode="true"/>
-    </function>
-</category>
-
-</OpenGLAPI>
diff --git a/src/mapi/glapi/gen-es/gl_compare.py b/src/mapi/glapi/gen-es/gl_compare.py
deleted file mode 100644
index 6b5e43b..0000000
--- a/src/mapi/glapi/gen-es/gl_compare.py
+++ /dev/null
@@ -1,354 +0,0 @@
-#!/usr/bin/python
-#
-# Copyright (C) 2009 Chia-I Wu <olv@0xlab.org>
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# on the rights to use, copy, modify, merge, publish, distribute, sub
-# license, and/or sell copies of the Software, and to permit persons to whom
-# the Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice (including the next
-# paragraph) shall be included in all copies or substantial portions of the
-# Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
-# IBM AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-# IN THE SOFTWARE.
-
-import sys
-import os.path
-import getopt
-
-GLAPI = "../../glapi/gen"
-sys.path.append(GLAPI)
-
-import gl_XML
-import glX_XML
-
-class ApiSet(object):
-    def __init__(self, api, elts=["enum", "type", "function"]):
-        self.api = api
-        self.elts = elts
-
-    def _check_enum(self, e1, e2, strict=True):
-        if e1.name != e2.name:
-            raise ValueError("%s: name mismatch" % e1.name)
-        if e1.value != e2.value:
-            raise ValueError("%s: value 0x%04x != 0x%04x"
-                    % (e1.name, e1.value, e2.value))
-
-    def _check_type(self, t1, t2, strict=True):
-        if t1.name != t2.name:
-            raise ValueError("%s: name mismatch" % t1.name)
-        if t1.type_expr.string() != t2.type_expr.string():
-            raise ValueError("%s: type %s != %s"
-                    % (t1.name, t1.type_expr.string(), t2.type_expr.string()))
-
-    def _check_function(self, f1, f2, strict=True):
-        if f1.name != f2.name:
-            raise ValueError("%s: name mismatch" % f1.name)
-        if f1.return_type != f2.return_type:
-            raise ValueError("%s: return type %s != %s"
-                    % (f1.name, f1.return_type, f2.return_type))
-        # there might be padded parameters
-        if strict and len(f1.parameters) != len(f2.parameters):
-            raise ValueError("%s: parameter length %d != %d"
-                    % (f1.name, len(f1.parameters), len(f2.parameters)))
-        if f1.assign_offset != f2.assign_offset:
-            if ((f1.assign_offset and f2.offset < 0) or
-                (f2.assign_offset and f1.offset < 0)):
-                raise ValueError("%s: assign offset %d != %d"
-                        % (f1.name, f1.assign_offset, f2.assign_offset))
-        elif not f1.assign_offset:
-            if f1.offset != f2.offset:
-                raise ValueError("%s: offset %d != %d"
-                        % (f1.name, f1.offset, f2.offset))
-
-        if strict:
-            l1 = f1.entry_points
-            l2 = f2.entry_points
-            l1.sort()
-            l2.sort()
-            if l1 != l2:
-                raise ValueError("%s: entry points %s != %s"
-                        % (f1.name, l1, l2))
-
-            l1 = f1.static_entry_points
-            l2 = f2.static_entry_points
-            l1.sort()
-            l2.sort()
-            if l1 != l2:
-                raise ValueError("%s: static entry points %s != %s"
-                        % (f1.name, l1, l2))
-
-        pad = 0
-        for i in xrange(len(f1.parameters)):
-            p1 = f1.parameters[i]
-            p2 = f2.parameters[i + pad]
-
-            if not strict and p1.is_padding != p2.is_padding:
-                if p1.is_padding:
-                    pad -= 1
-                    continue
-                else:
-                    pad += 1
-                    p2 = f2.parameters[i + pad]
-
-            if strict and p1.name != p2.name:
-                raise ValueError("%s: parameter %d name %s != %s"
-                        % (f1.name, i, p1.name, p2.name))
-            if p1.type_expr.string() != p2.type_expr.string():
-                if (strict or
-                    # special case
-                    f1.name == "TexImage2D" and p1.name != "internalformat"):
-                    raise ValueError("%s: parameter %s type %s != %s"
-                            % (f1.name, p1.name, p1.type_expr.string(),
-                               p2.type_expr.string()))
-
-    def union(self, other):
-        union = gl_XML.gl_api(None)
-
-        if "enum" in self.elts:
-            union.enums_by_name = other.enums_by_name.copy()
-            for key, val in self.api.enums_by_name.iteritems():
-                if key not in union.enums_by_name:
-                    union.enums_by_name[key] = val
-                else:
-                    self._check_enum(val, other.enums_by_name[key])
-
-        if "type" in self.elts:
-            union.types_by_name = other.types_by_name.copy()
-            for key, val in self.api.types_by_name.iteritems():
-                if key not in union.types_by_name:
-                    union.types_by_name[key] = val
-                else:
-                    self._check_type(val, other.types_by_name[key])
-
-        if "function" in self.elts:
-            union.functions_by_name = other.functions_by_name.copy()
-            for key, val in self.api.functions_by_name.iteritems():
-                if key not in union.functions_by_name:
-                    union.functions_by_name[key] = val
-                else:
-                    self._check_function(val, other.functions_by_name[key])
-
-        return union
-
-    def intersection(self, other):
-        intersection = gl_XML.gl_api(None)
-
-        if "enum" in self.elts:
-            for key, val in self.api.enums_by_name.iteritems():
-                if key in other.enums_by_name:
-                    self._check_enum(val, other.enums_by_name[key])
-                    intersection.enums_by_name[key] = val
-
-        if "type" in self.elts:
-            for key, val in self.api.types_by_name.iteritems():
-                if key in other.types_by_name:
-                    self._check_type(val, other.types_by_name[key])
-                    intersection.types_by_name[key] = val
-
-        if "function" in self.elts:
-            for key, val in self.api.functions_by_name.iteritems():
-                if key in other.functions_by_name:
-                    self._check_function(val, other.functions_by_name[key])
-                    intersection.functions_by_name[key] = val
-
-        return intersection
-
-    def difference(self, other):
-        difference = gl_XML.gl_api(None)
-
-        if "enum" in self.elts:
-            for key, val in self.api.enums_by_name.iteritems():
-                if key not in other.enums_by_name:
-                    difference.enums_by_name[key] = val
-                else:
-                    self._check_enum(val, other.enums_by_name[key])
-
-        if "type" in self.elts:
-            for key, val in self.api.types_by_name.iteritems():
-                if key not in other.types_by_name:
-                    difference.types_by_name[key] = val
-                else:
-                    self._check_type(val, other.types_by_name[key])
-
-        if "function" in self.elts:
-            for key, val in self.api.functions_by_name.iteritems():
-                if key not in other.functions_by_name:
-                    difference.functions_by_name[key] = val
-                else:
-                    self._check_function(val, other.functions_by_name[key], False)
-
-        return difference
-
-def cmp_enum(e1, e2):
-    if e1.value < e2.value:
-        return -1
-    elif e1.value > e2.value:
-        return 1
-    else:
-        return 0
-
-def cmp_type(t1, t2):
-    return t1.size - t2.size
-
-def cmp_function(f1, f2):
-    if f1.name > f2.name:
-        return 1
-    elif f1.name < f2.name:
-        return -1
-    else:
-        return 0
-
-def spaces(n, str=""):
-    spaces = n - len(str)
-    if spaces < 1:
-        spaces = 1
-    return " " * spaces
-
-def output_enum(e, indent=0):
-    attrs = 'name="%s"' % e.name
-    if e.default_count > 0:
-        tab = spaces(37, attrs)
-        attrs += '%scount="%d"' % (tab, e.default_count)
-    tab = spaces(48, attrs)
-    val = "%04x" % e.value
-    val = "0x" + val.upper()
-    attrs += '%svalue="%s"' % (tab, val)
-
-    # no child
-    if not e.functions:
-        print '%s<enum %s/>' % (spaces(indent), attrs)
-        return
-
-    print '%s<enum %s>' % (spaces(indent), attrs)
-    for key, val in e.functions.iteritems():
-        attrs = 'name="%s"' % key
-        if val[0] != e.default_count:
-            attrs += ' count="%d"' % val[0]
-        if not val[1]:
-            attrs += ' mode="get"'
-
-        print '%s<size %s/>' % (spaces(indent * 2), attrs)
-
-    print '%s</enum>' % spaces(indent)
-
-def output_type(t, indent=0):
-    tab = spaces(16, t.name)
-    attrs = 'name="%s"%ssize="%d"' % (t.name, tab, t.size)
-    ctype = t.type_expr.string()
-    if ctype.find("unsigned") != -1:
-        attrs += ' unsigned="true"'
-    elif ctype.find("signed") == -1:
-        attrs += ' float="true"'
-    print '%s<type %s/>' % (spaces(indent), attrs)
-
-def output_function(f, indent=0):
-    attrs = 'name="%s"' % f.name
-    if f.offset > 0:
-        if f.assign_offset:
-            attrs += ' offset="assign"'
-        else:
-            attrs += ' offset="%d"' % f.offset
-    print '%s<function %s>' % (spaces(indent), attrs)
-
-    for p in f.parameters:
-        attrs = 'name="%s" type="%s"' \
-                % (p.name, p.type_expr.original_string)
-        print '%s<param %s/>' % (spaces(indent * 2), attrs)
-    if f.return_type != "void":
-        attrs = 'type="%s"' % f.return_type
-        print '%s<return %s/>' % (spaces(indent * 2), attrs)
-
-    print '%s</function>' % spaces(indent)
-
-def output_category(api, indent=0):
-    enums = api.enums_by_name.values()
-    enums.sort(cmp_enum)
-    types = api.types_by_name.values()
-    types.sort(cmp_type)
-    functions = api.functions_by_name.values()
-    functions.sort(cmp_function)
-
-    for e in enums:
-        output_enum(e, indent)
-    if enums and types:
-        print
-    for t in types:
-        output_type(t, indent)
-    if enums or types:
-        print
-    for f in functions:
-        output_function(f, indent)
-        if f != functions[-1]:
-            print
-
-def is_api_empty(api):
-    return bool(not api.enums_by_name and
-                not api.types_by_name and
-                not api.functions_by_name)
-
-def show_usage(ops):
-    print "Usage: %s [-k elts] <%s> <file1> <file2>" % (sys.argv[0], "|".join(ops))
-    print "    -k elts   A comma separated string of types of elements to"
-    print "              skip.  Possible types are enum, type, and function."
-    sys.exit(1)
-
-def main():
-    ops = ["union", "intersection", "difference"]
-    elts = ["enum", "type", "function"]
-
-    try:
-        options, args = getopt.getopt(sys.argv[1:], "k:")
-    except Exception, e:
-        show_usage(ops)
-
-    if len(args) != 3:
-        show_usage(ops)
-    op, file1, file2 = args
-    if op not in ops:
-        show_usage(ops)
-
-    skips = []
-    for opt, val in options:
-        if opt == "-k":
-            skips = val.split(",")
-
-    for elt in skips:
-        try:
-            elts.remove(elt)
-        except ValueError:
-            show_usage(ops)
-
-    api1 = gl_XML.parse_GL_API(file1, glX_XML.glx_item_factory())
-    api2 = gl_XML.parse_GL_API(file2, glX_XML.glx_item_factory())
-
-    set = ApiSet(api1, elts)
-    func = getattr(set, op)
-    result = func(api2)
-
-    if not is_api_empty(result):
-        cat_name = "%s_of_%s_and_%s" \
-                % (op, os.path.basename(file1), os.path.basename(file2))
-
-        print '<?xml version="1.0"?>'
-        print '<!DOCTYPE OpenGLAPI SYSTEM "%s/gl_API.dtd">' % GLAPI
-        print
-        print '<OpenGLAPI>'
-        print
-        print '<category name="%s">' % (cat_name)
-        output_category(result, 4)
-        print '</category>'
-        print
-        print '</OpenGLAPI>'
-
-if __name__ == "__main__":
-    main()
diff --git a/src/mapi/glapi/gen-es/gl_parse_header.py b/src/mapi/glapi/gen-es/gl_parse_header.py
deleted file mode 100644
index 5382eba..0000000
--- a/src/mapi/glapi/gen-es/gl_parse_header.py
+++ /dev/null
@@ -1,450 +0,0 @@
-#!/usr/bin/python
-#
-# Copyright (C) 2009 Chia-I Wu <olv@0xlab.org>
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# on the rights to use, copy, modify, merge, publish, distribute, sub
-# license, and/or sell copies of the Software, and to permit persons to whom
-# the Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice (including the next
-# paragraph) shall be included in all copies or substantial portions of the
-# Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
-# IBM AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-# IN THE SOFTWARE.
-
-import sys
-import os.path
-import getopt
-import re
-
-GLAPI = "../../glapi/gen"
-sys.path.append(GLAPI)
-
-class HeaderParser(object):
-    """Parser for GL header files."""
-
-    def __init__(self, verbose=0):
-        # match #if and #ifdef
-        self.IFDEF = re.compile('#\s*if(n?def\s+(?P<ifdef>\w+)|\s+(?P<if>.+))')
-        # match #endif
-        self.ENDIF = re.compile('#\s*endif')
-        # match typedef abc def;
-        self.TYPEDEF = re.compile('typedef\s+(?P<from>[\w ]+)\s+(?P<to>\w+);')
-        # match #define XYZ VAL
-        self.DEFINE = re.compile('#\s*define\s+(?P<key>\w+)(?P<value>\s+[\w"]*)?')
-        # match GLAPI
-        self.GLAPI = re.compile('^GL_?API(CALL)?\s+(?P<return>[\w\s*]+[\w*])\s+(GL)?_?APIENTRY\s+(?P<name>\w+)\s*\((?P<params>[\w\s(,*\[\])]+)\)\s*;')
-
-        self.split_params = re.compile('\s*,\s*')
-        self.split_ctype = re.compile('(\W)')
-        # ignore GL_VERSION_X_Y
-        self.ignore_enum = re.compile('GL(_ES)?_VERSION(_ES_C[ML])?_\d_\d')
-
-        self.verbose = verbose
-        self._reset()
-
-    def _reset(self):
-        """Reset to initial state."""
-        self.ifdef_levels = []
-        self.need_char = False
-
-    # use typeexpr?
-    def _format_ctype(self, ctype, fix=True):
-        """Format a ctype string, optionally fix it."""
-        # split the type string
-        tmp = self.split_ctype.split(ctype)
-        tmp = [s for s in tmp if s and s != " "]
-
-        pretty = ""
-        for i in xrange(len(tmp)):
-            # add missing GL prefix
-            if (fix and tmp[i] != "const" and tmp[i] != "*" and
-                not tmp[i].startswith("GL")):
-                tmp[i] = "GL" + tmp[i]
-
-            if i == 0:
-                pretty = tmp[i]
-            else:
-                sep = " "
-                if tmp[i - 1] == "*":
-                    sep = ""
-                pretty += sep + tmp[i]
-        return pretty
-
-    # use typeexpr?
-    def _get_ctype_attrs(self, ctype):
-        """Get the attributes of a ctype."""
-        is_float = (ctype.find("float") != -1 or ctype.find("double") != -1)
-        is_signed = not (ctype.find("unsigned")  != -1)
-
-        size = 0
-        if ctype.find("char") != -1:
-            size = 1
-        elif ctype.find("short") != -1:
-            size = 2
-        elif ctype.find("int") != -1:
-            size = 4
-        elif is_float:
-            if ctype.find("float") != -1:
-                size = 4
-            else:
-                size = 8
-
-        return (size, is_float, is_signed)
-
-    def _parse_define(self, line):
-        """Parse a #define line for an <enum>."""
-        m = self.DEFINE.search(line)
-        if not m:
-            if self.verbose and line.find("#define") >= 0:
-                print "ignore %s" % (line)
-            return None
-
-        key = m.group("key").strip()
-        val = m.group("value").strip()
-
-        # enum must begin with GL_ and be all uppercase
-        if ((not (key.startswith("GL_") and key.isupper())) or
-            (self.ignore_enum.match(key) and val == "1")):
-            if self.verbose:
-                print "ignore enum %s" % (key)
-            return None
-
-        return (key, val)
-
-    def _parse_typedef(self, line):
-        """Parse a typedef line for a <type>."""
-        m = self.TYPEDEF.search(line)
-        if not m:
-            if self.verbose and line.find("typedef") >= 0:
-                print "ignore %s" % (line)
-            return None
-
-        f = m.group("from").strip()
-        t = m.group("to").strip()
-        if not t.startswith("GL"):
-            if self.verbose:
-                print "ignore type %s" % (t)
-            return None
-        attrs = self._get_ctype_attrs(f)
-
-        return (f, t, attrs)
-
-    def _parse_gl_api(self, line):
-        """Parse a GLAPI line for a <function>."""
-        m = self.GLAPI.search(line)
-        if not m:
-            if self.verbose and line.find("APIENTRY") >= 0:
-                print "ignore %s" % (line)
-            return None
-
-        rettype = m.group("return")
-        rettype = self._format_ctype(rettype)
-        if rettype == "GLvoid":
-            rettype = ""
-
-        name = m.group("name")
-
-        param_str = m.group("params")
-        chunks = self.split_params.split(param_str)
-        chunks = [s.strip() for s in chunks]
-        if len(chunks) == 1 and (chunks[0] == "void" or chunks[0] == "GLvoid"):
-            chunks = []
-
-        params = []
-        for c in chunks:
-            # split type and variable name
-            idx = c.rfind("*")
-            if idx < 0:
-                idx = c.rfind(" ")
-            if idx >= 0:
-                idx += 1
-                ctype = c[:idx]
-                var = c[idx:]
-            else:
-                ctype = c
-                var = "unnamed"
-
-            # convert array to pointer
-            idx = var.find("[")
-            if idx >= 0:
-                var = var[:idx]
-                ctype += "*"
-
-            ctype = self._format_ctype(ctype)
-            var = var.strip()
-
-            if not self.need_char and ctype.find("GLchar") >= 0:
-                self.need_char = True
-
-            params.append((ctype, var))
-
-        return (rettype, name, params)
-
-    def _change_level(self, line):
-        """Parse a #ifdef line and change level."""
-        m = self.IFDEF.search(line)
-        if m:
-            ifdef = m.group("ifdef")
-            if not ifdef:
-                ifdef = m.group("if")
-            self.ifdef_levels.append(ifdef)
-            return True
-        m = self.ENDIF.search(line)
-        if m:
-            self.ifdef_levels.pop()
-            return True
-        return False
-
-    def _read_header(self, header):
-        """Open a header file and read its contents."""
-        lines = []
-        try:
-            fp = open(header, "rb")
-            lines = fp.readlines()
-            fp.close()
-        except IOError, e:
-            print "failed to read %s: %s" % (header, e)
-        return lines
-
-    def _cmp_enum(self, enum1, enum2):
-        """Compare two enums."""
-        # sort by length of the values as strings
-        val1 = enum1[1]
-        val2 = enum2[1]
-        ret = len(val1) - len(val2)
-        # sort by the values
-        if not ret:
-            val1 = int(val1, 16)
-            val2 = int(val2, 16)
-            ret = val1 - val2
-            # in case int cannot hold the result
-            if ret > 0:
-                ret = 1
-            elif ret < 0:
-                ret = -1
-        # sort by the names
-        if not ret:
-            if enum1[0] < enum2[0]:
-                ret = -1
-            elif enum1[0] > enum2[0]:
-                ret = 1
-        return ret
-
-    def _cmp_type(self, type1, type2):
-        """Compare two types."""
-        attrs1 = type1[2]
-        attrs2 = type2[2]
-        # sort by type size
-        ret = attrs1[0] - attrs2[0]
-        # float is larger
-        if not ret:
-            ret = attrs1[1] - attrs2[1]
-        # signed is larger
-        if not ret:
-            ret = attrs1[2] - attrs2[2]
-        # reverse
-        ret = -ret
-        return ret
-
-    def _cmp_function(self, func1, func2):
-        """Compare two functions."""
-        name1 = func1[1]
-        name2 = func2[1]
-        ret = 0
-        # sort by the names
-        if name1 < name2:
-            ret = -1
-        elif name1 > name2:
-            ret = 1
-        return ret
-
-    def _postprocess_dict(self, hdict):
-        """Post-process a header dict and return an ordered list."""
-        hlist = []
-        largest = 0
-        for key, cat in hdict.iteritems():
-            size = len(cat["enums"]) + len(cat["types"]) + len(cat["functions"])
-            # ignore empty category
-            if not size:
-                continue
-
-            cat["enums"].sort(self._cmp_enum)
-            # remove duplicates
-            dup = []
-            for i in xrange(1, len(cat["enums"])):
-                if cat["enums"][i] == cat["enums"][i - 1]:
-                    dup.insert(0, i)
-            for i in dup:
-                e = cat["enums"].pop(i)
-                if self.verbose:
-                    print "remove duplicate enum %s" % e[0]
-
-            cat["types"].sort(self._cmp_type)
-            cat["functions"].sort(self._cmp_function)
-
-            # largest category comes first
-            if size > largest:
-                hlist.insert(0, (key, cat))
-                largest = size
-            else:
-                hlist.append((key, cat))
-        return hlist
-
-    def parse(self, header):
-        """Parse a header file."""
-        self._reset()
-
-        if self.verbose:
-            print "Parsing %s" % (header)
-
-        hdict = {}
-        lines = self._read_header(header)
-        for line in lines:
-            if self._change_level(line):
-                continue
-
-            # skip until the first ifdef (i.e. __gl_h_)
-            if not self.ifdef_levels:
-                continue
-
-            cat_name = os.path.basename(header)
-            # check if we are in an extension
-            if (len(self.ifdef_levels) > 1 and
-                self.ifdef_levels[-1].startswith("GL_")):
-                cat_name = self.ifdef_levels[-1]
-
-            try:
-                cat = hdict[cat_name]
-            except KeyError:
-                cat = {
-                        "enums": [],
-                        "types": [],
-                        "functions": []
-                }
-                hdict[cat_name] = cat
-
-            key = "enums"
-            elem = self._parse_define(line)
-            if not elem:
-                key = "types"
-                elem = self._parse_typedef(line)
-            if not elem:
-                key = "functions"
-                elem = self._parse_gl_api(line)
-
-            if elem:
-                cat[key].append(elem)
-
-        if self.need_char:
-            if self.verbose:
-                print "define GLchar"
-            elem = self._parse_typedef("typedef char GLchar;")
-            cat["types"].append(elem)
-        return self._postprocess_dict(hdict)
-
-def spaces(n, str=""):
-    spaces = n - len(str)
-    if spaces < 1:
-        spaces = 1
-    return " " * spaces
-
-def output_xml(name, hlist):
-    """Output a parsed header in OpenGLAPI XML."""
-
-    for i in xrange(len(hlist)):
-        cat_name, cat = hlist[i]
-
-        print '<category name="%s">' % (cat_name)
-        indent = 4
-
-        for enum in cat["enums"]:
-            name = enum[0][3:]
-            value = enum[1]
-            tab = spaces(41, name)
-            attrs = 'name="%s"%svalue="%s"' % (name, tab, value)
-            print '%s<enum %s/>' % (spaces(indent), attrs)
-
-        if cat["enums"] and cat["types"]:
-            print
-
-        for type in cat["types"]:
-            ctype = type[0]
-            size, is_float, is_signed = type[2]
-
-            attrs = 'name="%s"' % (type[1][2:])
-            attrs += spaces(16, attrs) + 'size="%d"' % (size)
-            if is_float:
-                attrs += ' float="true"'
-            elif not is_signed:
-                attrs += ' unsigned="true"'
-
-            print '%s<type %s/>' % (spaces(indent), attrs)
-
-        for func in cat["functions"]:
-            print
-            ret = func[0]
-            name = func[1][2:]
-            params = func[2]
-
-            attrs = 'name="%s" offset="assign"' % name
-            print '%s<function %s>' % (spaces(indent), attrs)
-
-            for param in params:
-                attrs = 'name="%s" type="%s"' % (param[1], param[0])
-                print '%s<param %s/>' % (spaces(indent * 2), attrs)
-            if ret:
-                attrs = 'type="%s"' % ret
-                print '%s<return %s/>' % (spaces(indent * 2), attrs)
-
-            print '%s</function>' % spaces(indent)
-
-        print '</category>'
-        print
-
-def show_usage():
-    print "Usage: %s [-v] <header> ..." % sys.argv[0]
-    sys.exit(1)
-
-def main():
-    try:
-        args, headers = getopt.getopt(sys.argv[1:], "v")
-    except Exception, e:
-        show_usage()
-    if not headers:
-        show_usage()
-
-    verbose = 0
-    for arg in args:
-        if arg[0] == "-v":
-            verbose += 1
-
-    need_xml_header = True
-    parser = HeaderParser(verbose)
-    for h in headers:
-        h = os.path.abspath(h)
-        hlist = parser.parse(h)
-
-        if need_xml_header:
-            print '<?xml version="1.0"?>'
-            print '<!DOCTYPE OpenGLAPI SYSTEM "%s/gl_API.dtd">' % GLAPI
-            need_xml_header = False
-
-        print
-        print '<!-- %s -->' % (h)
-        print '<OpenGLAPI>'
-        print
-        output_xml(h, hlist)
-        print '</OpenGLAPI>'
-
-if __name__ == '__main__':
-    main()
diff --git a/src/mapi/glapi/gen/Makefile b/src/mapi/glapi/gen/Makefile
index 3e101f3..c386b87 100644
--- a/src/mapi/glapi/gen/Makefile
+++ b/src/mapi/glapi/gen/Makefile
@@ -180,10 +180,8 @@
 
 ######################################################################
 
-$(MESA_DIR)/main/enums.c: gl_enums.py $(COMMON) $(ES_API)
-	$(PYTHON2) $(PYTHON_FLAGS) $< -f gl_API.xml \
-		-f $(MESA_GLAPI_DIR)/gen-es/es1_API.xml \
-		-f $(MESA_GLAPI_DIR)/gen-es/es2_API.xml > $@
+$(MESA_DIR)/main/enums.c: gl_enums.py $(COMMON_ES)
+	$(PYTHON2) $(PYTHON_FLAGS) $< -f gl_and_es_API.xml > $@
 
 $(MESA_DIR)/main/dispatch.h: gl_table.py $(COMMON)
 	$(PYTHON2) $(PYTHON_FLAGS) $< -m remap_table > $@
diff --git a/src/mapi/glapi/gen/gl_XML.py b/src/mapi/glapi/gen/gl_XML.py
index 4d414e8..4dc2e8f 100644
--- a/src/mapi/glapi/gen/gl_XML.py
+++ b/src/mapi/glapi/gen/gl_XML.py
@@ -618,7 +618,7 @@
 		# for each entry-point.  Otherwise, they may generate code
 		# that won't compile.
 
-		self.parameter_strings = {}
+		self.entry_point_parameters = {}
 
 		self.process_element( element )
 
@@ -703,12 +703,34 @@
 
 		if element.children:
 			self.initialized = 1
-			self.parameter_strings[name] = create_parameter_string(parameters, 1)
+			self.entry_point_parameters[name] = parameters
 		else:
-			self.parameter_strings[name] = None
+			self.entry_point_parameters[name] = []
 
 		return
 
+	def filter_entry_points(self, entry_point_list):
+		"""Filter out entry points not in entry_point_list."""
+		if not self.initialized:
+			raise RuntimeError('%s is not initialized yet' % self.name)
+
+		entry_points = []
+		for ent in self.entry_points:
+			if ent not in entry_point_list:
+				if ent in self.static_entry_points:
+					self.static_entry_points.remove(ent)
+				self.entry_point_parameters.pop(ent)
+			else:
+				entry_points.append(ent)
+
+		if not entry_points:
+			raise RuntimeError('%s has no entry point after filtering' % self.name)
+
+		self.entry_points = entry_points
+		if self.name not in entry_points:
+			# use the first remaining entry point
+			self.name = entry_points[0]
+			self.parameters = self.entry_point_parameters[entry_points[0]]
 
 	def get_images(self):
 		"""Return potentially empty list of input images."""
@@ -721,11 +743,11 @@
 
 	def get_parameter_string(self, entrypoint = None):
 		if entrypoint:
-			s = self.parameter_strings[ entrypoint ]
-			if s:
-				return s
+			params = self.entry_point_parameters[ entrypoint ]
+		else:
+			params = self.parameters
 		
-		return create_parameter_string( self.parameters, 1 )
+		return create_parameter_string( params, 1 )
 
 	def get_called_parameter_string(self):
 		p_string = ""
@@ -791,6 +813,16 @@
 		typeexpr.create_initial_types()
 		return
 
+	def filter_functions(self, entry_point_list):
+		"""Filter out entry points not in entry_point_list."""
+		functions_by_name = {}
+		for func in self.functions_by_name.itervalues():
+			entry_points = [ent for ent in func.entry_points if ent in entry_point_list]
+			if entry_points:
+				func.filter_entry_points(entry_points)
+				functions_by_name[func.name] = func
+
+		self.functions_by_name = functions_by_name
 
 	def process_element(self, doc):
 		element = doc.children
diff --git a/src/mapi/glapi/gen/gl_and_es_API.xml b/src/mapi/glapi/gen/gl_and_es_API.xml
index ac7d43c..1313da0 100644
--- a/src/mapi/glapi/gen/gl_and_es_API.xml
+++ b/src/mapi/glapi/gen/gl_and_es_API.xml
@@ -3,6 +3,11 @@
 
 <!-- OpenGL + OpenGL ES -->
 
+<!-- IMPORTANT
+     Remember to update gles_api.py when new OpenGL ES specific entry points
+     are added.  Otherwise, they will be filtered out.
+-->
+
 <OpenGLAPI>
 
 <xi:include href="gl_API.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
diff --git a/src/mapi/glapi/gen/gl_table.py b/src/mapi/glapi/gen/gl_table.py
index 05979e3..2cbbd97 100644
--- a/src/mapi/glapi/gen/gl_table.py
+++ b/src/mapi/glapi/gen/gl_table.py
@@ -211,28 +211,28 @@
 
 
 def show_usage():
-	print "Usage: %s [-f input_file_name] [-m mode] [-c]" % sys.argv[0]
+	print "Usage: %s [-f input_file_name] [-m mode] [-c ver]" % sys.argv[0]
 	print "    -m mode   Mode can be 'table' or 'remap_table'."
-	print "    -c        Enable compatibility with OpenGL ES."
+	print "    -c ver    Version can be 'es1' or 'es2'."
 	sys.exit(1)
 
 if __name__ == '__main__':
 	file_name = "gl_API.xml"
     
 	try:
-		(args, trail) = getopt.getopt(sys.argv[1:], "f:m:c")
+		(args, trail) = getopt.getopt(sys.argv[1:], "f:m:c:")
 	except Exception,e:
 		show_usage()
 
 	mode = "table"
-	es = False
+	es = None
 	for (arg,val) in args:
 		if arg == "-f":
 			file_name = val
 		elif arg == "-m":
 			mode = val
 		elif arg == "-c":
-			es = True
+			es = val
 
 	if mode == "table":
 		printer = PrintGlTable(es)
@@ -243,4 +243,14 @@
 
 	api = gl_XML.parse_GL_API( file_name )
 
+	if es is not None:
+		import gles_api
+
+		api_map = {
+			'es1': gles_api.es1_api,
+			'es2': gles_api.es2_api,
+		}
+
+		api.filter_functions(api_map[es])
+
 	printer.Print( api )
diff --git a/src/mapi/glapi/gen/glapi_gen.mk b/src/mapi/glapi/gen/glapi_gen.mk
new file mode 100644
index 0000000..c7fa7c0
--- /dev/null
+++ b/src/mapi/glapi/gen/glapi_gen.mk
@@ -0,0 +1,44 @@
+# Helpers for glapi header generation
+
+ifndef TOP
+$(error TOP must be defined.)
+endif
+
+glapi_gen_common_deps := \
+	$(wildcard $(TOP)/src/mapi/glapi/gen/*.xml) \
+	$(wildcard $(TOP)/src/mapi/glapi/gen/*.py)
+
+glapi_gen_mapi_script := $(TOP)/src/mapi/mapi/mapi_abi.py
+glapi_gen_mapi_deps := \
+	$(glapi_gen_mapi_script) \
+	$(glapi_gen_common_deps)
+
+# $(1): path to an XML file
+# $(2): name of the printer
+define glapi_gen_mapi
+@mkdir -p $(dir $@)
+$(PYTHON2) $(PYTHON_FLAGS) $(glapi_gen_mapi_script) \
+	--mode lib --printer $(2) $(1) > $@
+endef
+
+glapi_gen_dispatch_script := $(TOP)/src/mapi/glapi/gen/gl_table.py
+glapi_gen_dispatch_deps := $(glapi_gen_common_deps)
+
+# $(1): path to an XML file
+# $(2): empty, es1, or es2 for entry point filtering
+define glapi_gen_dispatch
+@mkdir -p $(dir $@)
+$(PYTHON2) $(PYTHON_FLAGS) $(glapi_gen_dispatch_script) \
+	-f $(1) -m remap_table $(if $(2),-c $(2),) > $@
+endef
+
+glapi_gen_remap_script := $(TOP)/src/mapi/glapi/gen/remap_helper.py
+glapi_gen_remap_deps := $(glapi_gen_common_deps)
+
+# $(1): path to an XML file
+# $(2): empty, es1, or es2 for entry point filtering
+define glapi_gen_remap
+@mkdir -p $(dir $@)
+$(PYTHON2) $(PYTHON_FLAGS) $(glapi_gen_remap_script) \
+	-f $(1) $(if $(2),-c $(2),) > $@
+endef
diff --git a/src/mapi/glapi/gen/gles_api.py b/src/mapi/glapi/gen/gles_api.py
new file mode 100644
index 0000000..4cde9e5
--- /dev/null
+++ b/src/mapi/glapi/gen/gles_api.py
@@ -0,0 +1,452 @@
+#!/usr/bin/env python
+
+# Mesa 3-D graphics library
+# Version:  7.12
+#
+# Copyright (C) 2011 LunarG Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+#
+# Authors:
+#    Chia-I Wu <olv@lunarg.com>
+
+# These info should be part of GLAPI XML.  Until that is possible, scripts have
+# to use tables here to filter gl_api.
+
+es1_core = (
+        # OpenGL ES 1.1
+        'ActiveTexture',
+        'AlphaFunc',
+        'AlphaFuncx',
+        'BindBuffer',
+        'BindTexture',
+        'BlendFunc',
+        'BufferData',
+        'BufferSubData',
+        'Clear',
+        'ClearColor',
+        'ClearColorx',
+        'ClearDepthf',
+        'ClearDepthx',
+        'ClearStencil',
+        'ClientActiveTexture',
+        'ClipPlanef',
+        'ClipPlanex',
+        'Color4f',
+        'Color4ub',
+        'Color4x',
+        'ColorMask',
+        'ColorPointer',
+        'CompressedTexImage2D',
+        'CompressedTexSubImage2D',
+        'CopyTexImage2D',
+        'CopyTexSubImage2D',
+        'CullFace',
+        'DeleteBuffers',
+        'DeleteTextures',
+        'DepthFunc',
+        'DepthMask',
+        'DepthRangef',
+        'DepthRangex',
+        'Disable',
+        'DisableClientState',
+        'DrawArrays',
+        'DrawElements',
+        'Enable',
+        'EnableClientState',
+        'Finish',
+        'Flush',
+        'Fogf',
+        'Fogfv',
+        'Fogx',
+        'Fogxv',
+        'FrontFace',
+        'Frustumf',
+        'Frustumx',
+        'GenBuffers',
+        'GenTextures',
+        'GetBooleanv',
+        'GetBufferParameteriv',
+        'GetClipPlanef',
+        'GetClipPlanex',
+        'GetError',
+        'GetFixedv',
+        'GetFloatv',
+        'GetIntegerv',
+        'GetLightfv',
+        'GetLightxv',
+        'GetMaterialfv',
+        'GetMaterialxv',
+        'GetPointerv',
+        'GetString',
+        'GetTexEnvfv',
+        'GetTexEnviv',
+        'GetTexEnvxv',
+        'GetTexParameterfv',
+        'GetTexParameteriv',
+        'GetTexParameterxv',
+        'Hint',
+        'IsBuffer',
+        'IsEnabled',
+        'IsTexture',
+        'Lightf',
+        'Lightfv',
+        'LightModelf',
+        'LightModelfv',
+        'LightModelx',
+        'LightModelxv',
+        'Lightx',
+        'Lightxv',
+        'LineWidth',
+        'LineWidthx',
+        'LoadIdentity',
+        'LoadMatrixf',
+        'LoadMatrixx',
+        'LogicOp',
+        'Materialf',
+        'Materialfv',
+        'Materialx',
+        'Materialxv',
+        'MatrixMode',
+        'MultiTexCoord4f',
+        'MultiTexCoord4x',
+        'MultMatrixf',
+        'MultMatrixx',
+        'Normal3f',
+        'Normal3x',
+        'NormalPointer',
+        'Orthof',
+        'Orthox',
+        'PixelStorei',
+        'PointParameterf',
+        'PointParameterfv',
+        'PointParameterx',
+        'PointParameterxv',
+        'PointSize',
+        'PointSizex',
+        'PolygonOffset',
+        'PolygonOffsetx',
+        'PopMatrix',
+        'PushMatrix',
+        'ReadPixels',
+        'Rotatef',
+        'Rotatex',
+        'SampleCoverage',
+        'SampleCoveragex',
+        'Scalef',
+        'Scalex',
+        'Scissor',
+        'ShadeModel',
+        'StencilFunc',
+        'StencilMask',
+        'StencilOp',
+        'TexCoordPointer',
+        'TexEnvf',
+        'TexEnvfv',
+        'TexEnvi',
+        'TexEnviv',
+        'TexEnvx',
+        'TexEnvxv',
+        'TexImage2D',
+        'TexParameterf',
+        'TexParameterfv',
+        'TexParameteri',
+        'TexParameteriv',
+        'TexParameterx',
+        'TexParameterxv',
+        'TexSubImage2D',
+        'Translatef',
+        'Translatex',
+        'VertexPointer',
+        'Viewport',
+)
+
+es1_api = es1_core + (
+        # GL_OES_EGL_image
+        'EGLImageTargetTexture2DOES',
+        'EGLImageTargetRenderbufferStorageOES',
+        # GL_OES_mapbuffer
+        'GetBufferPointervOES',
+        'MapBufferOES',
+        'UnmapBufferOES',
+        # GL_EXT_multi_draw_arrays
+        'MultiDrawArraysEXT',
+        'MultiDrawElementsEXT',
+        # GL_OES_blend_equation_separate
+        'BlendEquationSeparateOES',
+        # GL_OES_blend_func_separate
+        'BlendFuncSeparateOES',
+        # GL_OES_blend_subtract
+        'BlendEquationOES',
+        # GL_OES_draw_texture
+        'DrawTexiOES',
+        'DrawTexivOES',
+        'DrawTexfOES',
+        'DrawTexfvOES',
+        'DrawTexsOES',
+        'DrawTexsvOES',
+        'DrawTexxOES',
+        'DrawTexxvOES',
+        # GL_OES_fixed_point
+        'AlphaFuncxOES',
+        'ClearColorxOES',
+        'ClearDepthxOES',
+        'Color4xOES',
+        'DepthRangexOES',
+        'FogxOES',
+        'FogxvOES',
+        'FrustumxOES',
+        'LightModelxOES',
+        'LightModelxvOES',
+        'LightxOES',
+        'LightxvOES',
+        'LineWidthxOES',
+        'LoadMatrixxOES',
+        'MaterialxOES',
+        'MaterialxvOES',
+        'MultiTexCoord4xOES',
+        'MultMatrixxOES',
+        'Normal3xOES',
+        'OrthoxOES',
+        'PointSizexOES',
+        'PolygonOffsetxOES',
+        'RotatexOES',
+        'SampleCoveragexOES',
+        'ScalexOES',
+        'TexEnvxOES',
+        'TexEnvxvOES',
+        'TexParameterxOES',
+        'TranslatexOES',
+        'ClipPlanexOES',
+        'GetClipPlanexOES',
+        'GetFixedvOES',
+        'GetLightxvOES',
+        'GetMaterialxvOES',
+        'GetTexEnvxvOES',
+        'GetTexParameterxvOES',
+        'PointParameterxOES',
+        'PointParameterxvOES',
+        'TexParameterxvOES',
+        # GL_OES_framebuffer_object
+        'BindFramebufferOES',
+        'BindRenderbufferOES',
+        'CheckFramebufferStatusOES',
+        'DeleteFramebuffersOES',
+        'DeleteRenderbuffersOES',
+        'FramebufferRenderbufferOES',
+        'FramebufferTexture2DOES',
+        'GenerateMipmapOES',
+        'GenFramebuffersOES',
+        'GenRenderbuffersOES',
+        'GetFramebufferAttachmentParameterivOES',
+        'GetRenderbufferParameterivOES',
+        'IsFramebufferOES',
+        'IsRenderbufferOES',
+        'RenderbufferStorageOES',
+        # GL_OES_point_size_array
+        'PointSizePointerOES',
+        # GL_OES_query_matrix
+        'QueryMatrixxOES',
+        # GL_OES_single_precision
+        'ClearDepthfOES',
+        'DepthRangefOES',
+        'FrustumfOES',
+        'OrthofOES',
+        'ClipPlanefOES',
+        'GetClipPlanefOES',
+        # GL_OES_texture_cube_map
+        'GetTexGenfvOES',
+        'GetTexGenivOES',
+        'GetTexGenxvOES',
+        'TexGenfOES',
+        'TexGenfvOES',
+        'TexGeniOES',
+        'TexGenivOES',
+        'TexGenxOES',
+        'TexGenxvOES',
+)
+
+es2_core = (
+        # OpenGL ES 2.0
+        "ActiveTexture",
+        "AttachShader",
+        "BindAttribLocation",
+        "BindBuffer",
+        "BindFramebuffer",
+        "BindRenderbuffer",
+        "BindTexture",
+        "BlendColor",
+        "BlendEquation",
+        "BlendEquationSeparate",
+        "BlendFunc",
+        "BlendFuncSeparate",
+        "BufferData",
+        "BufferSubData",
+        "CheckFramebufferStatus",
+        "Clear",
+        "ClearColor",
+        "ClearDepthf",
+        "ClearStencil",
+        "ColorMask",
+        "CompileShader",
+        "CompressedTexImage2D",
+        "CompressedTexSubImage2D",
+        "CopyTexImage2D",
+        "CopyTexSubImage2D",
+        "CreateProgram",
+        "CreateShader",
+        "CullFace",
+        "DeleteBuffers",
+        "DeleteFramebuffers",
+        "DeleteProgram",
+        "DeleteRenderbuffers",
+        "DeleteShader",
+        "DeleteTextures",
+        "DepthFunc",
+        "DepthMask",
+        "DepthRangef",
+        "DetachShader",
+        "Disable",
+        "DisableVertexAttribArray",
+        "DrawArrays",
+        "DrawElements",
+        "Enable",
+        "EnableVertexAttribArray",
+        "Finish",
+        "Flush",
+        "FramebufferRenderbuffer",
+        "FramebufferTexture2D",
+        "FrontFace",
+        "GenBuffers",
+        "GenerateMipmap",
+        "GenFramebuffers",
+        "GenRenderbuffers",
+        "GenTextures",
+        "GetActiveAttrib",
+        "GetActiveUniform",
+        "GetAttachedShaders",
+        "GetAttribLocation",
+        "GetBooleanv",
+        "GetBufferParameteriv",
+        "GetError",
+        "GetFloatv",
+        "GetFramebufferAttachmentParameteriv",
+        "GetIntegerv",
+        "GetProgramInfoLog",
+        "GetProgramiv",
+        "GetRenderbufferParameteriv",
+        "GetShaderInfoLog",
+        "GetShaderiv",
+        "GetShaderPrecisionFormat",
+        "GetShaderSource",
+        "GetString",
+        "GetTexParameterfv",
+        "GetTexParameteriv",
+        "GetUniformfv",
+        "GetUniformiv",
+        "GetUniformLocation",
+        "GetVertexAttribfv",
+        "GetVertexAttribiv",
+        "GetVertexAttribPointerv",
+        "Hint",
+        "IsBuffer",
+        "IsEnabled",
+        "IsFramebuffer",
+        "IsProgram",
+        "IsRenderbuffer",
+        "IsShader",
+        "IsTexture",
+        "LineWidth",
+        "LinkProgram",
+        "PixelStorei",
+        "PolygonOffset",
+        "ReadPixels",
+        "ReleaseShaderCompiler",
+        "RenderbufferStorage",
+        "SampleCoverage",
+        "Scissor",
+        "ShaderBinary",
+        "ShaderSource",
+        "StencilFunc",
+        "StencilFuncSeparate",
+        "StencilMask",
+        "StencilMaskSeparate",
+        "StencilOp",
+        "StencilOpSeparate",
+        "TexImage2D",
+        "TexParameterf",
+        "TexParameterfv",
+        "TexParameteri",
+        "TexParameteriv",
+        "TexSubImage2D",
+        "Uniform1f",
+        "Uniform1fv",
+        "Uniform1i",
+        "Uniform1iv",
+        "Uniform2f",
+        "Uniform2fv",
+        "Uniform2i",
+        "Uniform2iv",
+        "Uniform3f",
+        "Uniform3fv",
+        "Uniform3i",
+        "Uniform3iv",
+        "Uniform4f",
+        "Uniform4fv",
+        "Uniform4i",
+        "Uniform4iv",
+        "UniformMatrix2fv",
+        "UniformMatrix3fv",
+        "UniformMatrix4fv",
+        "UseProgram",
+        "ValidateProgram",
+        "VertexAttrib1f",
+        "VertexAttrib1fv",
+        "VertexAttrib2f",
+        "VertexAttrib2fv",
+        "VertexAttrib3f",
+        "VertexAttrib3fv",
+        "VertexAttrib4f",
+        "VertexAttrib4fv",
+        "VertexAttribPointer",
+        "Viewport",
+)
+
+es2_api = es2_core + (
+        # GL_OES_EGL_image
+        'EGLImageTargetTexture2DOES',
+        'EGLImageTargetRenderbufferStorageOES',
+        # GL_OES_mapbuffer
+        'GetBufferPointervOES',
+        'MapBufferOES',
+        'UnmapBufferOES',
+        # GL_EXT_multi_draw_arrays
+        'MultiDrawArraysEXT',
+        'MultiDrawElementsEXT',
+        # GL_OES_texture_3D
+        'CompressedTexImage3DOES',
+        'CompressedTexSubImage3DOES',
+        'CopyTexSubImage3DOES',
+        'FramebufferTexture3DOES',
+        'TexImage3DOES',
+        'TexSubImage3DOES',
+        # GL_OES_get_program_binary
+        'GetProgramBinaryOES',
+        'ProgramBinaryOES',
+)
diff --git a/src/mapi/glapi/gen/remap_helper.py b/src/mapi/glapi/gen/remap_helper.py
index 69b8e5e..367ae24 100644
--- a/src/mapi/glapi/gen/remap_helper.py
+++ b/src/mapi/glapi/gen/remap_helper.py
@@ -197,22 +197,36 @@
 
 
 def show_usage():
-	print "Usage: %s [-f input_file_name]" % sys.argv[0]
+	print "Usage: %s [-f input_file_name] [-c ver]" % sys.argv[0]
+	print "    -c ver    Version can be 'es1' or 'es2'."
 	sys.exit(1)
 
 if __name__ == '__main__':
 	file_name = "gl_API.xml"
 
 	try:
-		(args, trail) = getopt.getopt(sys.argv[1:], "f:")
+		(args, trail) = getopt.getopt(sys.argv[1:], "f:c:")
 	except Exception,e:
 		show_usage()
 
+	es = None
 	for (arg,val) in args:
 		if arg == "-f":
 			file_name = val
+		elif arg == "-c":
+			es = val
 
 	api = gl_XML.parse_GL_API( file_name )
 
+	if es is not None:
+		import gles_api
+
+		api_map = {
+			'es1': gles_api.es1_api,
+			'es2': gles_api.es2_api,
+		}
+
+		api.filter_functions(api_map[es])
+
 	printer = PrintGlRemap()
 	printer.Print( api )
diff --git a/src/mapi/mapi/mapi_abi.py b/src/mapi/mapi/mapi_abi.py
index cb9fc0e..e3d3f65 100644
--- a/src/mapi/mapi/mapi_abi.py
+++ b/src/mapi/mapi/mapi_abi.py
@@ -27,6 +27,11 @@
 #    Chia-I Wu <olv@lunarg.com>
 
 import sys
+# make it possible to import glapi
+import os
+GLAPI = "./%s/../glapi/gen" % (os.path.dirname(sys.argv[0]))
+sys.path.append(GLAPI)
+
 import re
 from optparse import OptionParser
 
@@ -128,9 +133,6 @@
 
 def abi_parse_xml(xml):
     """Parse a GLAPI XML file for ABI entries."""
-    import os
-    GLAPI = "./%s/../glapi/gen" % (os.path.dirname(sys.argv[0]))
-    sys.path.append(GLAPI)
     import gl_XML, glX_XML
 
     api = gl_XML.parse_GL_API(xml, glX_XML.glx_item_factory())
@@ -749,255 +751,7 @@
     """OpenGL ES 1.x API Printer"""
 
     def __init__(self, entries):
-        es1_api = [
-                # OpenGL ES 1.1
-                'ActiveTexture',
-                'AlphaFunc',
-                'AlphaFuncx',
-                'BindBuffer',
-                'BindTexture',
-                'BlendFunc',
-                'BufferData',
-                'BufferSubData',
-                'Clear',
-                'ClearColor',
-                'ClearColorx',
-                'ClearDepthf',
-                'ClearDepthx',
-                'ClearStencil',
-                'ClientActiveTexture',
-                'ClipPlanef',
-                'ClipPlanex',
-                'Color4f',
-                'Color4ub',
-                'Color4x',
-                'ColorMask',
-                'ColorPointer',
-                'CompressedTexImage2D',
-                'CompressedTexSubImage2D',
-                'CopyTexImage2D',
-                'CopyTexSubImage2D',
-                'CullFace',
-                'DeleteBuffers',
-                'DeleteTextures',
-                'DepthFunc',
-                'DepthMask',
-                'DepthRangef',
-                'DepthRangex',
-                'Disable',
-                'DisableClientState',
-                'DrawArrays',
-                'DrawElements',
-                'Enable',
-                'EnableClientState',
-                'Finish',
-                'Flush',
-                'Fogf',
-                'Fogfv',
-                'Fogx',
-                'Fogxv',
-                'FrontFace',
-                'Frustumf',
-                'Frustumx',
-                'GenBuffers',
-                'GenTextures',
-                'GetBooleanv',
-                'GetBufferParameteriv',
-                'GetClipPlanef',
-                'GetClipPlanex',
-                'GetError',
-                'GetFixedv',
-                'GetFloatv',
-                'GetIntegerv',
-                'GetLightfv',
-                'GetLightxv',
-                'GetMaterialfv',
-                'GetMaterialxv',
-                'GetPointerv',
-                'GetString',
-                'GetTexEnvfv',
-                'GetTexEnviv',
-                'GetTexEnvxv',
-                'GetTexParameterfv',
-                'GetTexParameteriv',
-                'GetTexParameterxv',
-                'Hint',
-                'IsBuffer',
-                'IsEnabled',
-                'IsTexture',
-                'Lightf',
-                'Lightfv',
-                'LightModelf',
-                'LightModelfv',
-                'LightModelx',
-                'LightModelxv',
-                'Lightx',
-                'Lightxv',
-                'LineWidth',
-                'LineWidthx',
-                'LoadIdentity',
-                'LoadMatrixf',
-                'LoadMatrixx',
-                'LogicOp',
-                'Materialf',
-                'Materialfv',
-                'Materialx',
-                'Materialxv',
-                'MatrixMode',
-                'MultiTexCoord4f',
-                'MultiTexCoord4x',
-                'MultMatrixf',
-                'MultMatrixx',
-                'Normal3f',
-                'Normal3x',
-                'NormalPointer',
-                'Orthof',
-                'Orthox',
-                'PixelStorei',
-                'PointParameterf',
-                'PointParameterfv',
-                'PointParameterx',
-                'PointParameterxv',
-                'PointSize',
-                'PointSizex',
-                'PolygonOffset',
-                'PolygonOffsetx',
-                'PopMatrix',
-                'PushMatrix',
-                'ReadPixels',
-                'Rotatef',
-                'Rotatex',
-                'SampleCoverage',
-                'SampleCoveragex',
-                'Scalef',
-                'Scalex',
-                'Scissor',
-                'ShadeModel',
-                'StencilFunc',
-                'StencilMask',
-                'StencilOp',
-                'TexCoordPointer',
-                'TexEnvf',
-                'TexEnvfv',
-                'TexEnvi',
-                'TexEnviv',
-                'TexEnvx',
-                'TexEnvxv',
-                'TexImage2D',
-                'TexParameterf',
-                'TexParameterfv',
-                'TexParameteri',
-                'TexParameteriv',
-                'TexParameterx',
-                'TexParameterxv',
-                'TexSubImage2D',
-                'Translatef',
-                'Translatex',
-                'VertexPointer',
-                'Viewport',
-                # GL_OES_EGL_image
-                'EGLImageTargetTexture2DOES',
-                'EGLImageTargetRenderbufferStorageOES',
-                # GL_OES_mapbuffer
-                'GetBufferPointervOES',
-                'MapBufferOES',
-                'UnmapBufferOES',
-                # GL_EXT_multi_draw_arrays
-                'MultiDrawArraysEXT',
-                'MultiDrawElementsEXT',
-                # GL_OES_blend_equation_separate
-                'BlendEquationSeparateOES',
-                # GL_OES_blend_func_separate
-                'BlendFuncSeparateOES',
-                # GL_OES_blend_subtract
-                'BlendEquationOES',
-                # GL_OES_draw_texture
-                'DrawTexiOES',
-                'DrawTexivOES',
-                'DrawTexfOES',
-                'DrawTexfvOES',
-                'DrawTexsOES',
-                'DrawTexsvOES',
-                'DrawTexxOES',
-                'DrawTexxvOES',
-                # GL_OES_fixed_point
-                'AlphaFuncxOES',
-                'ClearColorxOES',
-                'ClearDepthxOES',
-                'Color4xOES',
-                'DepthRangexOES',
-                'FogxOES',
-                'FogxvOES',
-                'FrustumxOES',
-                'LightModelxOES',
-                'LightModelxvOES',
-                'LightxOES',
-                'LightxvOES',
-                'LineWidthxOES',
-                'LoadMatrixxOES',
-                'MaterialxOES',
-                'MaterialxvOES',
-                'MultiTexCoord4xOES',
-                'MultMatrixxOES',
-                'Normal3xOES',
-                'OrthoxOES',
-                'PointSizexOES',
-                'PolygonOffsetxOES',
-                'RotatexOES',
-                'SampleCoveragexOES',
-                'ScalexOES',
-                'TexEnvxOES',
-                'TexEnvxvOES',
-                'TexParameterxOES',
-                'TranslatexOES',
-                'ClipPlanexOES',
-                'GetClipPlanexOES',
-                'GetFixedvOES',
-                'GetLightxvOES',
-                'GetMaterialxvOES',
-                'GetTexEnvxvOES',
-                'GetTexParameterxvOES',
-                'PointParameterxOES',
-                'PointParameterxvOES',
-                'TexParameterxvOES',
-                # GL_OES_framebuffer_object
-                'BindFramebufferOES',
-                'BindRenderbufferOES',
-                'CheckFramebufferStatusOES',
-                'DeleteFramebuffersOES',
-                'DeleteRenderbuffersOES',
-                'FramebufferRenderbufferOES',
-                'FramebufferTexture2DOES',
-                'GenerateMipmapOES',
-                'GenFramebuffersOES',
-                'GenRenderbuffersOES',
-                'GetFramebufferAttachmentParameterivOES',
-                'GetRenderbufferParameterivOES',
-                'IsFramebufferOES',
-                'IsRenderbufferOES',
-                'RenderbufferStorageOES',
-                # GL_OES_point_size_array
-                'PointSizePointerOES',
-                # GL_OES_query_matrix
-                'QueryMatrixxOES',
-                # GL_OES_single_precision
-                'ClearDepthfOES',
-                'DepthRangefOES',
-                'FrustumfOES',
-                'OrthofOES',
-                'ClipPlanefOES',
-                'GetClipPlanefOES',
-                # GL_OES_texture_cube_map
-                'GetTexGenfvOES',
-                'GetTexGenivOES',
-                'GetTexGenxvOES',
-                'TexGenfOES',
-                'TexGenfvOES',
-                'TexGeniOES',
-                'TexGenivOES',
-                'TexGenxOES',
-                'TexGenxvOES',
-        ]
+        from gles_api import es1_api
 
         super(ES1APIPrinter, self).__init__(entries, es1_api)
         self.prefix_lib = 'gl'
@@ -1016,171 +770,7 @@
     """OpenGL ES 2.x API Printer"""
 
     def __init__(self, entries):
-        es2_api = [
-                # OpenGL ES 2.0
-                "ActiveTexture",
-                "AttachShader",
-                "BindAttribLocation",
-                "BindBuffer",
-                "BindFramebuffer",
-                "BindRenderbuffer",
-                "BindTexture",
-                "BlendColor",
-                "BlendEquation",
-                "BlendEquationSeparate",
-                "BlendFunc",
-                "BlendFuncSeparate",
-                "BufferData",
-                "BufferSubData",
-                "CheckFramebufferStatus",
-                "Clear",
-                "ClearColor",
-                "ClearDepthf",
-                "ClearStencil",
-                "ColorMask",
-                "CompileShader",
-                "CompressedTexImage2D",
-                "CompressedTexSubImage2D",
-                "CopyTexImage2D",
-                "CopyTexSubImage2D",
-                "CreateProgram",
-                "CreateShader",
-                "CullFace",
-                "DeleteBuffers",
-                "DeleteFramebuffers",
-                "DeleteProgram",
-                "DeleteRenderbuffers",
-                "DeleteShader",
-                "DeleteTextures",
-                "DepthFunc",
-                "DepthMask",
-                "DepthRangef",
-                "DetachShader",
-                "Disable",
-                "DisableVertexAttribArray",
-                "DrawArrays",
-                "DrawElements",
-                "Enable",
-                "EnableVertexAttribArray",
-                "Finish",
-                "Flush",
-                "FramebufferRenderbuffer",
-                "FramebufferTexture2D",
-                "FrontFace",
-                "GenBuffers",
-                "GenerateMipmap",
-                "GenFramebuffers",
-                "GenRenderbuffers",
-                "GenTextures",
-                "GetActiveAttrib",
-                "GetActiveUniform",
-                "GetAttachedShaders",
-                "GetAttribLocation",
-                "GetBooleanv",
-                "GetBufferParameteriv",
-                "GetError",
-                "GetFloatv",
-                "GetFramebufferAttachmentParameteriv",
-                "GetIntegerv",
-                "GetProgramInfoLog",
-                "GetProgramiv",
-                "GetRenderbufferParameteriv",
-                "GetShaderInfoLog",
-                "GetShaderiv",
-                "GetShaderPrecisionFormat",
-                "GetShaderSource",
-                "GetString",
-                "GetTexParameterfv",
-                "GetTexParameteriv",
-                "GetUniformfv",
-                "GetUniformiv",
-                "GetUniformLocation",
-                "GetVertexAttribfv",
-                "GetVertexAttribiv",
-                "GetVertexAttribPointerv",
-                "Hint",
-                "IsBuffer",
-                "IsEnabled",
-                "IsFramebuffer",
-                "IsProgram",
-                "IsRenderbuffer",
-                "IsShader",
-                "IsTexture",
-                "LineWidth",
-                "LinkProgram",
-                "PixelStorei",
-                "PolygonOffset",
-                "ReadPixels",
-                "ReleaseShaderCompiler",
-                "RenderbufferStorage",
-                "SampleCoverage",
-                "Scissor",
-                "ShaderBinary",
-                "ShaderSource",
-                "StencilFunc",
-                "StencilFuncSeparate",
-                "StencilMask",
-                "StencilMaskSeparate",
-                "StencilOp",
-                "StencilOpSeparate",
-                "TexImage2D",
-                "TexParameterf",
-                "TexParameterfv",
-                "TexParameteri",
-                "TexParameteriv",
-                "TexSubImage2D",
-                "Uniform1f",
-                "Uniform1fv",
-                "Uniform1i",
-                "Uniform1iv",
-                "Uniform2f",
-                "Uniform2fv",
-                "Uniform2i",
-                "Uniform2iv",
-                "Uniform3f",
-                "Uniform3fv",
-                "Uniform3i",
-                "Uniform3iv",
-                "Uniform4f",
-                "Uniform4fv",
-                "Uniform4i",
-                "Uniform4iv",
-                "UniformMatrix2fv",
-                "UniformMatrix3fv",
-                "UniformMatrix4fv",
-                "UseProgram",
-                "ValidateProgram",
-                "VertexAttrib1f",
-                "VertexAttrib1fv",
-                "VertexAttrib2f",
-                "VertexAttrib2fv",
-                "VertexAttrib3f",
-                "VertexAttrib3fv",
-                "VertexAttrib4f",
-                "VertexAttrib4fv",
-                "VertexAttribPointer",
-                "Viewport",
-                # GL_OES_EGL_image
-                'EGLImageTargetTexture2DOES',
-                'EGLImageTargetRenderbufferStorageOES',
-                # GL_OES_mapbuffer
-                'GetBufferPointervOES',
-                'MapBufferOES',
-                'UnmapBufferOES',
-                # GL_EXT_multi_draw_arrays
-                'MultiDrawArraysEXT',
-                'MultiDrawElementsEXT',
-                # GL_OES_texture_3D
-                'CompressedTexImage3DOES',
-                'CompressedTexSubImage3DOES',
-                'CopyTexSubImage3DOES',
-                'FramebufferTexture3DOES',
-                'TexImage3DOES',
-                'TexSubImage3DOES',
-                # GL_OES_get_program_binary
-                'GetProgramBinaryOES',
-                'ProgramBinaryOES',
-        ]
+        from gles_api import es2_api
 
         super(ES2APIPrinter, self).__init__(entries, es2_api)
         self.prefix_lib = 'gl'
diff --git a/src/mapi/shared-glapi/Makefile b/src/mapi/shared-glapi/Makefile
index c928f82..3de864d 100644
--- a/src/mapi/shared-glapi/Makefile
+++ b/src/mapi/shared-glapi/Makefile
@@ -34,17 +34,16 @@
 
 $(glapi_SOURCES): glapi_mapi_tmp.h
 
-.PHONY: glapi_mapi_tmp.h
-glapi_mapi_tmp.h:
-	@$(MAKE) -C $(GLAPI)/gen-es shared-glapi
+include $(GLAPI)/gen/glapi_gen.mk
+glapi_mapi_tmp.h: $(GLAPI)/gen/gl_and_es_API.xml $(glapi_gen_mapi_deps)
+	$(call glapi_gen_mapi,$<,shared-glapi)
 
 .PHONY: clean
 clean:
 	-rm -f $(TOP)/$(LIB_DIR)/$(GLAPI_LIB_NAME)
 	-rm -f $(glapi_OBJECTS)
 	-rm -f depend depend.bak
-	@# clean generated sources/headers
-	@$(MAKE) -C $(GLAPI)/gen-es clean-shared-glapi
+	-rm -f glapi_mapi_tmp.h
 
 install:
 	$(INSTALL) -d $(DESTDIR)$(INSTALL_LIB_DIR)
diff --git a/src/mesa/Android.gen.mk b/src/mesa/Android.gen.mk
new file mode 100644
index 0000000..2a08184
--- /dev/null
+++ b/src/mesa/Android.gen.mk
@@ -0,0 +1,131 @@
+# Mesa 3-D graphics library
+#
+# Copyright (C) 2010-2011 Chia-I Wu <olvaffe@gmail.com>
+# Copyright (C) 2010-2011 LunarG Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# included by core mesa Android.mk for source generation
+
+ifeq ($(LOCAL_MODULE_CLASS),)
+LOCAL_MODULE_CLASS := STATIC_LIBRARIES
+endif
+
+intermediates := $(call local-intermediates-dir)
+
+sources := \
+	main/api_exec_es1.c \
+	main/api_exec_es1_dispatch.h \
+	main/api_exec_es1_remap_helper.h \
+	main/api_exec_es2.c \
+	main/api_exec_es2_dispatch.h \
+	main/api_exec_es2_remap_helper.h \
+	program/lex.yy.c \
+	program/program_parse.tab.c
+
+LOCAL_SRC_FILES := $(filter-out $(sources), $(LOCAL_SRC_FILES))
+
+LOCAL_C_INCLUDES += $(intermediates)/main
+
+ifeq ($(strip $(MESA_ENABLE_ASM)),true)
+ifeq ($(TARGET_ARCH),x86)
+sources += x86/matypes.h
+LOCAL_C_INCLUDES += $(intermediates)/x86
+endif
+endif
+
+sources += main/git_sha1.h
+
+sources := $(addprefix $(intermediates)/, $(sources))
+LOCAL_GENERATED_SOURCES += $(sources)
+
+glapi := $(MESA_TOP)/src/mapi/glapi/gen
+
+es_src_deps := \
+	$(LOCAL_PATH)/main/APIspec.xml \
+	$(LOCAL_PATH)/main/es_generator.py \
+	$(LOCAL_PATH)/main/APIspecutil.py \
+	$(LOCAL_PATH)/main/APIspec.py
+
+es_hdr_deps := \
+	$(wildcard $(glapi)/*.py) \
+	$(wildcard $(glapi)/*.xml)
+
+define es-gen
+	@mkdir -p $(dir $@)
+	@echo "Gen ES: $(PRIVATE_MODULE) <= $(notdir $(@))"
+	$(hide) $(PRIVATE_SCRIPT) $(1) $(PRIVATE_XML) > $@
+endef
+
+define local-l-to-c
+	@mkdir -p $(dir $@)
+	@echo "Mesa Lex: $(PRIVATE_MODULE) <= $<"
+	$(hide) $(LEX) -o$@ $<
+endef
+
+define local-y-to-c-and-h
+	@mkdir -p $(dir $@)
+	@echo "Mesa Yacc: $(PRIVATE_MODULE) <= $<"
+	$(hide) $(YACC) -o $@ $<
+endef
+
+$(intermediates)/main/api_exec_%.c: PRIVATE_SCRIPT := $(MESA_PYTHON2) $(LOCAL_PATH)/main/es_generator.py
+$(intermediates)/main/api_exec_%.c: PRIVATE_XML := -S $(LOCAL_PATH)/main/APIspec.xml
+$(intermediates)/main/api_exec_%_dispatch.h: PRIVATE_SCRIPT := $(MESA_PYTHON2) $(glapi)/gl_table.py
+$(intermediates)/main/api_exec_%_dispatch.h: PRIVATE_XML := -f $(glapi)/gl_and_es_API.xml
+$(intermediates)/main/api_exec_%_remap_helper.h: PRIVATE_SCRIPT := $(MESA_PYTHON2) $(glapi)/remap_helper.py
+$(intermediates)/main/api_exec_%_remap_helper.h: PRIVATE_XML := -f $(glapi)/gl_and_es_API.xml
+
+$(intermediates)/main/api_exec_es1.c: $(es_src_deps)
+	$(call es-gen,-V GLES1.1)
+
+$(intermediates)/main/api_exec_es2.c: $(es_src_deps)
+	$(call es-gen,-V GLES2.0)
+
+$(intermediates)/main/api_exec_%_dispatch.h: $(es_hdr_deps)
+	$(call es-gen, -c $* -m remap_table)
+
+$(intermediates)/main/api_exec_%_remap_helper.h: $(es_hdr_deps)
+	$(call es-gen, -c $*)
+
+$(intermediates)/program/program_parse.tab.c: $(LOCAL_PATH)/program/program_parse.y
+	$(local-y-to-c-and-h)
+
+$(intermediates)/program/lex.yy.c: $(LOCAL_PATH)/program/program_lexer.l
+	$(local-l-to-c)
+
+$(intermediates)/main/git_sha1.h:
+	@mkdir -p $(dir $@)
+	@echo "GIT-SHA1: $(PRIVATE_MODULE) <= git"
+	$(hide) touch $@
+	$(hide) if which git > /dev/null; then \
+			git --git-dir $(PRIVATE_PATH)/../../.git log -n 1 --oneline | \
+			sed 's/^\([^ ]*\) .*/#define MESA_GIT_SHA1 "git-\1"/' \
+			> $@; \
+		fi
+
+matypes_deps := \
+	$(BUILD_OUT_EXECUTABLES)/mesa_gen_matypes$(BUILD_EXECUTABLE_SUFFIX) \
+	$(LOCAL_PATH)/main/mtypes.h \
+	$(LOCAL_PATH)/tnl/t_context.h
+
+$(intermediates)/x86/matypes.h: $(matypes_deps) 
+	@mkdir -p $(dir $@)
+	@echo "MATYPES: $(PRIVATE_MODULE) <= $(notdir $@)"
+	$(hide) $< > $@
diff --git a/src/mesa/Android.mk b/src/mesa/Android.mk
new file mode 100644
index 0000000..67808d4
--- /dev/null
+++ b/src/mesa/Android.mk
@@ -0,0 +1,115 @@
+# Mesa 3-D graphics library
+#
+# Copyright (C) 2010-2011 Chia-I Wu <olvaffe@gmail.com>
+# Copyright (C) 2010-2011 LunarG Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# Android.mk for core mesa
+
+LOCAL_PATH := $(call my-dir)
+
+include $(LOCAL_PATH)/sources.mak
+
+common_CFLAGS := \
+	-DFEATURE_ES1=1 \
+	-DFEATURE_ES2=1
+
+common_C_INCLUDES := \
+	$(MESA_TOP)/src/mapi \
+	$(MESA_TOP)/src/glsl
+
+common_ASM :=
+
+# ---------------------------------------
+# Build mesa_gen_matypes for host
+# ---------------------------------------
+
+ifeq ($(strip $(MESA_ENABLE_ASM)),true)
+ifeq ($(TARGET_ARCH),x86)
+common_ASM += $(X86_SOURCES)
+
+include $(CLEAR_VARS)
+LOCAL_SRC_FILES := x86/gen_matypes.c
+LOCAL_CFLAGS := $(common_CFLAGS)
+LOCAL_C_INCLUDES := $(common_C_INCLUDES)
+LOCAL_MODULE := mesa_gen_matypes
+include $(MESA_COMMON_MK)
+include $(BUILD_HOST_EXECUTABLE)
+
+endif # x86
+endif # MESA_ENABLE_ASM
+
+# ---------------------------------------
+# Build libmesa_st_mesa
+# ---------------------------------------
+
+ifeq ($(strip $(MESA_BUILD_GALLIUM)),true)
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := \
+	$(MESA_GALLIUM_SOURCES) \
+	$(MESA_GALLIUM_CXX_SOURCES) \
+	$(common_ASM)
+
+LOCAL_CFLAGS := $(common_CFLAGS)
+
+LOCAL_C_INCLUDES := \
+	$(common_C_INCLUDES) \
+	$(MESA_TOP)/src/gallium/include \
+	$(MESA_TOP)/src/gallium/auxiliary
+
+LOCAL_MODULE := libmesa_st_mesa
+
+include $(LOCAL_PATH)/Android.gen.mk
+include $(MESA_COMMON_MK)
+include $(BUILD_STATIC_LIBRARY)
+endif # MESA_BUILD_GALLIUM
+
+# ---------------------------------------
+# Build libmesa_glsl_utils
+#
+# It is used to avoid circular dependency between core mesa and glsl.
+# ---------------------------------------
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := \
+	program/hash_table.c \
+	program/symbol_table.c
+
+LOCAL_MODULE := libmesa_glsl_utils
+
+include $(MESA_COMMON_MK)
+include $(BUILD_STATIC_LIBRARY)
+
+# ---------------------------------------
+# Build libmesa_glsl_utils for host
+# ---------------------------------------
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := \
+	program/hash_table.c \
+	program/symbol_table.c
+
+LOCAL_MODULE := libmesa_glsl_utils
+
+include $(MESA_COMMON_MK)
+include $(BUILD_HOST_STATIC_LIBRARY)
diff --git a/src/mesa/Makefile b/src/mesa/Makefile
index a903a26..0e15d61 100644
--- a/src/mesa/Makefile
+++ b/src/mesa/Makefile
@@ -12,11 +12,10 @@
 include sources.mak
 
 # adjust object dirs
+DRICORE_OBJECTS := $(addprefix $(DRICORE_OBJ_DIR)/, $(MESA_OBJECTS))
 MESA_OBJECTS := $(addprefix $(MESA_OBJ_DIR)/, $(MESA_OBJECTS))
 MESA_GALLIUM_OBJECTS := $(addprefix $(MESA_OBJ_DIR)/, $(MESA_GALLIUM_OBJECTS))
 
-DRICORE_OBJECTS := $(addprefix $(DRICORE_OBJ_DIR)/, $(MESA_OBJECTS))
-
 # define preprocessor flags
 MESA_CPPFLAGS := $(API_DEFINES) $(DEFINES)
 
@@ -68,6 +67,26 @@
 # then convenience libs (.a) and finally the device drivers:
 default: $(DEPENDS) asm_subdirs $(MESA_LIBS) $(DRICORE_LIBS) driver_subdirs
 
+# include glapi_gen.mk for generating glapi headers for GLES
+GLAPI := $(TOP)/src/mapi/glapi/gen
+include $(GLAPI)/glapi_gen.mk
+
+main/api_exec_es1_dispatch.h: $(GLAPI)/gl_and_es_API.xml $(glapi_gen_dispatch_deps)
+	$(call glapi_gen_dispatch,$<,es1)
+
+main/api_exec_es1_remap_helper.h: $(GLAPI)/gl_and_es_API.xml $(glapi_gen_remap_deps)
+	$(call glapi_gen_remap,$<,es1)
+
+main/api_exec_es1.o: main/api_exec_es1_dispatch.h main/api_exec_es1_remap_helper.h 
+
+main/api_exec_es2_dispatch.h: $(GLAPI)/gl_and_es_API.xml $(glapi_gen_dispatch_deps)
+	$(call glapi_gen_dispatch,$<,es2)
+
+main/api_exec_es2_remap_helper.h: $(GLAPI)/gl_and_es_API.xml $(glapi_gen_remap_deps)
+	$(call glapi_gen_remap,$<,es2)
+
+main/api_exec_es2.o: main/api_exec_es2_dispatch.h main/api_exec_es2_remap_helper.h 
+
 main/api_exec_es1.c: main/APIspec.xml main/es_generator.py main/APIspecutil.py main/APIspec.py
 	$(PYTHON2) $(PYTHON_FLAGS) main/es_generator.py -S main/APIspec.xml -V GLES1.1 > $@
 
@@ -124,6 +143,8 @@
 	@ touch depend
 	@$(MKDEP) $(MKDEP_OPTIONS) -p$(MESA_OBJ_DIR)/ $(MESA_CPPFLAGS) \
 		$(ALL_SOURCES) > /dev/null 2>/dev/null
+	@$(MKDEP) $(MKDEP_OPTIONS) -a -p$(DRICORE_OBJ_DIR)/ $(MESA_CPPFLAGS) \
+		$(ALL_SOURCES) > /dev/null 2>/dev/null
 
 ######################################################################
 # Installation rules
diff --git a/src/mesa/SConscript b/src/mesa/SConscript
index 24e2155..b0c3334 100644
--- a/src/mesa/SConscript
+++ b/src/mesa/SConscript
@@ -264,6 +264,7 @@
     'state_tracker/st_draw_feedback.c',
     'state_tracker/st_extensions.c',
     'state_tracker/st_format.c',
+    'state_tracker/st_glsl_to_tgsi.cpp',
     'state_tracker/st_gen_mipmap.c',
     'state_tracker/st_manager.c',
     'state_tracker/st_mesa_to_tgsi.c',
@@ -292,6 +293,7 @@
     'program/prog_instruction.c',
     'program/prog_noise.c',
     'program/prog_optimize.c',
+    'program/prog_opt_constant_fold.c',
     'program/prog_parameter.c',
     'program/prog_parameter_layout.c',
     'program/prog_print.c',
@@ -346,28 +348,28 @@
     GLAPI = '#src/mapi/glapi/'
     gles_headers = []
     gles_headers += env.CodeGenerate(
-        target = 'es1api/main/dispatch.h',
+        target = 'main/api_exec_es1_dispatch.h',
         script = GLAPI + 'gen/gl_table.py',
-        source = GLAPI + 'gen-es/es1_API.xml',
-        command = python_cmd + ' $SCRIPT -c -m remap_table -f $SOURCE > $TARGET',
+        source = GLAPI + 'gen/gl_and_es_API.xml',
+        command = python_cmd + ' $SCRIPT -c es1 -m remap_table -f $SOURCE > $TARGET',
     )
     gles_headers += env.CodeGenerate(
-        target = 'es1api/main/remap_helper.h',
+        target = 'main/api_exec_es1_remap_helper.h',
         script = GLAPI + 'gen/remap_helper.py',
-        source = GLAPI + 'gen-es/es1_API.xml',
-        command = python_cmd + ' $SCRIPT -f $SOURCE > $TARGET',
+        source = GLAPI + 'gen/gl_and_es_API.xml',
+        command = python_cmd + ' $SCRIPT -c es1 -f $SOURCE > $TARGET',
     )
     gles_headers += env.CodeGenerate(
-        target = 'es2api/main/dispatch.h',
+        target = 'main/api_exec_es2_dispatch.h',
         script = GLAPI + 'gen/gl_table.py',
-        source = GLAPI + 'gen-es/es2_API.xml',
-        command = python_cmd + ' $SCRIPT -c -m remap_table -f $SOURCE > $TARGET',
+        source = GLAPI + 'gen/gl_and_es_API.xml',
+        command = python_cmd + ' $SCRIPT -c es2 -m remap_table -f $SOURCE > $TARGET',
     )
     gles_headers += env.CodeGenerate(
-        target = 'es2api/main/remap_helper.h',
+        target = 'main/api_exec_es2_remap_helper.h',
         script = GLAPI + 'gen/remap_helper.py',
-        source = GLAPI + 'gen-es/es2_API.xml',
-        command = python_cmd + ' $SCRIPT -f $SOURCE > $TARGET',
+        source = GLAPI + 'gen/gl_and_es_API.xml',
+        command = python_cmd + ' $SCRIPT -c es2 -f $SOURCE > $TARGET',
     )
 
     env.Depends(gles_sources, gles_headers)
diff --git a/src/mesa/drivers/common/driverfuncs.c b/src/mesa/drivers/common/driverfuncs.c
index 8ab129d..a6174ee 100644
--- a/src/mesa/drivers/common/driverfuncs.c
+++ b/src/mesa/drivers/common/driverfuncs.c
@@ -95,8 +95,6 @@
    driver->TexSubImage2D = _mesa_store_texsubimage2d;
    driver->TexSubImage3D = _mesa_store_texsubimage3d;
    driver->GetTexImage = _mesa_get_teximage;
-   driver->CopyTexImage1D = _mesa_meta_CopyTexImage1D;
-   driver->CopyTexImage2D = _mesa_meta_CopyTexImage2D;
    driver->CopyTexSubImage1D = _mesa_meta_CopyTexSubImage1D;
    driver->CopyTexSubImage2D = _mesa_meta_CopyTexSubImage2D;
    driver->CopyTexSubImage3D = _mesa_meta_CopyTexSubImage3D;
@@ -250,10 +248,10 @@
       GLuint i;
       for (i = 0; i < ctx->Const.MaxDrawBuffers; i++) {
          ctx->Driver.ColorMaskIndexed(ctx, i,
-                                      ctx->Color.ColorMask[0][RCOMP],
-                                      ctx->Color.ColorMask[0][GCOMP],
-                                      ctx->Color.ColorMask[0][BCOMP],
-                                      ctx->Color.ColorMask[0][ACOMP]);
+                                      ctx->Color.ColorMask[i][RCOMP],
+                                      ctx->Color.ColorMask[i][GCOMP],
+                                      ctx->Color.ColorMask[i][BCOMP],
+                                      ctx->Color.ColorMask[i][ACOMP]);
       }
    }
    else {
@@ -288,7 +286,10 @@
    ctx->Driver.Enable(ctx, GL_TEXTURE_CUBE_MAP, GL_FALSE);
 
    ctx->Driver.Fogfv(ctx, GL_FOG_COLOR, ctx->Fog.Color);
-   ctx->Driver.Fogfv(ctx, GL_FOG_MODE, 0);
+   {
+      GLfloat mode = (GLfloat) ctx->Fog.Mode;
+      ctx->Driver.Fogfv(ctx, GL_FOG_MODE, &mode);
+   }
    ctx->Driver.Fogfv(ctx, GL_FOG_DENSITY, &ctx->Fog.Density);
    ctx->Driver.Fogfv(ctx, GL_FOG_START, &ctx->Fog.Start);
    ctx->Driver.Fogfv(ctx, GL_FOG_END, &ctx->Fog.End);
diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c
index 0e58aec..291d912 100644
--- a/src/mesa/drivers/common/meta.c
+++ b/src/mesa/drivers/common/meta.c
@@ -62,6 +62,7 @@
 #include "main/teximage.h"
 #include "main/texparam.h"
 #include "main/texstate.h"
+#include "main/uniforms.h"
 #include "main/varray.h"
 #include "main/viewport.h"
 #include "program/program.h"
@@ -72,63 +73,36 @@
 /** Return offset in bytes of the field within a vertex struct */
 #define OFFSET(FIELD) ((void *) offsetof(struct vertex, FIELD))
 
-
-/**
- * Flags passed to _mesa_meta_begin().
- */
-/*@{*/
-#define META_ALL              ~0x0
-#define META_ALPHA_TEST        0x1
-#define META_BLEND             0x2  /**< includes logicop */
-#define META_COLOR_MASK        0x4
-#define META_DEPTH_TEST        0x8
-#define META_FOG              0x10
-#define META_PIXEL_STORE      0x20
-#define META_PIXEL_TRANSFER   0x40
-#define META_RASTERIZATION    0x80
-#define META_SCISSOR         0x100
-#define META_SHADER          0x200
-#define META_STENCIL_TEST    0x400
-#define META_TRANSFORM       0x800 /**< modelview, projection, clip planes */
-#define META_TEXTURE        0x1000
-#define META_VERTEX         0x2000
-#define META_VIEWPORT       0x4000
-#define META_CLAMP_FRAGMENT_COLOR 0x8000
-#define META_CLAMP_VERTEX_COLOR 0x10000
-#define META_CONDITIONAL_RENDER 0x20000
-/*@}*/
-
-
 /**
  * State which we may save/restore across meta ops.
  * XXX this may be incomplete...
  */
 struct save_state
 {
-   GLbitfield SavedState;  /**< bitmask of META_* flags */
+   GLbitfield SavedState;  /**< bitmask of MESA_META_* flags */
 
-   /** META_ALPHA_TEST */
+   /** MESA_META_ALPHA_TEST */
    GLboolean AlphaEnabled;
    GLenum AlphaFunc;
    GLclampf AlphaRef;
 
-   /** META_BLEND */
+   /** MESA_META_BLEND */
    GLbitfield BlendEnabled;
    GLboolean ColorLogicOpEnabled;
 
-   /** META_COLOR_MASK */
+   /** MESA_META_COLOR_MASK */
    GLubyte ColorMask[MAX_DRAW_BUFFERS][4];
 
-   /** META_DEPTH_TEST */
+   /** MESA_META_DEPTH_TEST */
    struct gl_depthbuffer_attrib Depth;
 
-   /** META_FOG */
+   /** MESA_META_FOG */
    GLboolean Fog;
 
-   /** META_PIXEL_STORE */
+   /** MESA_META_PIXEL_STORE */
    struct gl_pixelstore_attrib Pack, Unpack;
 
-   /** META_PIXEL_TRANSFER */
+   /** MESA_META_PIXEL_TRANSFER */
    GLfloat RedBias, RedScale;
    GLfloat GreenBias, GreenScale;
    GLfloat BlueBias, BlueScale;
@@ -136,17 +110,17 @@
    GLfloat DepthBias, DepthScale;
    GLboolean MapColorFlag;
 
-   /** META_RASTERIZATION */
+   /** MESA_META_RASTERIZATION */
    GLenum FrontPolygonMode, BackPolygonMode;
    GLboolean PolygonOffset;
    GLboolean PolygonSmooth;
    GLboolean PolygonStipple;
    GLboolean PolygonCull;
 
-   /** META_SCISSOR */
+   /** MESA_META_SCISSOR */
    struct gl_scissor_attrib Scissor;
 
-   /** META_SHADER */
+   /** MESA_META_SHADER */
    GLboolean VertexProgramEnabled;
    struct gl_vertex_program *VertexProgram;
    GLboolean FragmentProgramEnabled;
@@ -156,17 +130,19 @@
    struct gl_shader_program *FragmentShader;
    struct gl_shader_program *ActiveShader;
 
-   /** META_STENCIL_TEST */
+   /** MESA_META_STENCIL_TEST */
    struct gl_stencil_attrib Stencil;
 
-   /** META_TRANSFORM */
+   /** MESA_META_TRANSFORM */
    GLenum MatrixMode;
    GLfloat ModelviewMatrix[16];
    GLfloat ProjectionMatrix[16];
    GLfloat TextureMatrix[16];
+
+   /** MESA_META_CLIP */
    GLbitfield ClipPlanesEnabled;
 
-   /** META_TEXTURE */
+   /** MESA_META_TEXTURE */
    GLuint ActiveUnit;
    GLuint ClientActiveUnit;
    /** for unit[0] only */
@@ -176,21 +152,21 @@
    GLbitfield TexGenEnabled[MAX_TEXTURE_UNITS];
    GLuint EnvMode;  /* unit[0] only */
 
-   /** META_VERTEX */
+   /** MESA_META_VERTEX */
    struct gl_array_object *ArrayObj;
    struct gl_buffer_object *ArrayBufferObj;
 
-   /** META_VIEWPORT */
+   /** MESA_META_VIEWPORT */
    GLint ViewportX, ViewportY, ViewportW, ViewportH;
    GLclampd DepthNear, DepthFar;
 
-   /** META_CLAMP_FRAGMENT_COLOR */
+   /** MESA_META_CLAMP_FRAGMENT_COLOR */
    GLenum ClampFragmentColor;
 
-   /** META_CLAMP_VERTEX_COLOR */
+   /** MESA_META_CLAMP_VERTEX_COLOR */
    GLenum ClampVertexColor;
 
-   /** META_CONDITIONAL_RENDER */
+   /** MESA_META_CONDITIONAL_RENDER */
    struct gl_query_object *CondRenderQuery;
    GLenum CondRenderMode;
 
@@ -235,6 +211,8 @@
 {
    GLuint ArrayObj;
    GLuint VBO;
+   GLuint ShaderProg;
+   GLint ColorLocation;
 };
 
 
@@ -336,10 +314,10 @@
  * Enter meta state.  This is like a light-weight version of glPushAttrib
  * but it also resets most GL state back to default values.
  *
- * \param state  bitmask of META_* flags indicating which attribute groups
+ * \param state  bitmask of MESA_META_* flags indicating which attribute groups
  *               to save and reset to their defaults
  */
-static void
+void
 _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
 {
    struct save_state *save;
@@ -351,7 +329,7 @@
    memset(save, 0, sizeof(*save));
    save->SavedState = state;
 
-   if (state & META_ALPHA_TEST) {
+   if (state & MESA_META_ALPHA_TEST) {
       save->AlphaEnabled = ctx->Color.AlphaEnabled;
       save->AlphaFunc = ctx->Color.AlphaFunc;
       save->AlphaRef = ctx->Color.AlphaRef;
@@ -359,7 +337,7 @@
          _mesa_set_enable(ctx, GL_ALPHA_TEST, GL_FALSE);
    }
 
-   if (state & META_BLEND) {
+   if (state & MESA_META_BLEND) {
       save->BlendEnabled = ctx->Color.BlendEnabled;
       if (ctx->Color.BlendEnabled) {
          if (ctx->Extensions.EXT_draw_buffers2) {
@@ -377,7 +355,7 @@
          _mesa_set_enable(ctx, GL_COLOR_LOGIC_OP, GL_FALSE);
    }
 
-   if (state & META_COLOR_MASK) {
+   if (state & MESA_META_COLOR_MASK) {
       memcpy(save->ColorMask, ctx->Color.ColorMask,
              sizeof(ctx->Color.ColorMask));
       if (!ctx->Color.ColorMask[0][0] ||
@@ -387,26 +365,26 @@
          _mesa_ColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
    }
 
-   if (state & META_DEPTH_TEST) {
+   if (state & MESA_META_DEPTH_TEST) {
       save->Depth = ctx->Depth; /* struct copy */
       if (ctx->Depth.Test)
          _mesa_set_enable(ctx, GL_DEPTH_TEST, GL_FALSE);
    }
 
-   if (state & META_FOG) {
+   if (state & MESA_META_FOG) {
       save->Fog = ctx->Fog.Enabled;
       if (ctx->Fog.Enabled)
          _mesa_set_enable(ctx, GL_FOG, GL_FALSE);
    }
 
-   if (state & META_PIXEL_STORE) {
+   if (state & MESA_META_PIXEL_STORE) {
       save->Pack = ctx->Pack;
       save->Unpack = ctx->Unpack;
       ctx->Pack = ctx->DefaultPacking;
       ctx->Unpack = ctx->DefaultPacking;
    }
 
-   if (state & META_PIXEL_TRANSFER) {
+   if (state & MESA_META_PIXEL_TRANSFER) {
       save->RedScale = ctx->Pixel.RedScale;
       save->RedBias = ctx->Pixel.RedBias;
       save->GreenScale = ctx->Pixel.GreenScale;
@@ -429,7 +407,7 @@
       ctx->NewState |=_NEW_PIXEL;
    }
 
-   if (state & META_RASTERIZATION) {
+   if (state & MESA_META_RASTERIZATION) {
       save->FrontPolygonMode = ctx->Polygon.FrontMode;
       save->BackPolygonMode = ctx->Polygon.BackMode;
       save->PolygonOffset = ctx->Polygon.OffsetFill;
@@ -443,12 +421,12 @@
       _mesa_set_enable(ctx, GL_CULL_FACE, GL_FALSE);
    }
 
-   if (state & META_SCISSOR) {
+   if (state & MESA_META_SCISSOR) {
       save->Scissor = ctx->Scissor; /* struct copy */
       _mesa_set_enable(ctx, GL_SCISSOR_TEST, GL_FALSE);
    }
 
-   if (state & META_SHADER) {
+   if (state & MESA_META_SHADER) {
       if (ctx->Extensions.ARB_vertex_program) {
          save->VertexProgramEnabled = ctx->VertexProgram.Enabled;
          _mesa_reference_vertprog(ctx, &save->VertexProgram,
@@ -477,14 +455,14 @@
       }
    }
 
-   if (state & META_STENCIL_TEST) {
+   if (state & MESA_META_STENCIL_TEST) {
       save->Stencil = ctx->Stencil; /* struct copy */
       if (ctx->Stencil.Enabled)
          _mesa_set_enable(ctx, GL_STENCIL_TEST, GL_FALSE);
       /* NOTE: other stencil state not reset */
    }
 
-   if (state & META_TEXTURE) {
+   if (state & MESA_META_TEXTURE) {
       GLuint u, tgt;
 
       save->ActiveUnit = ctx->Texture.CurrentUnit;
@@ -523,7 +501,7 @@
       _mesa_TexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
    }
 
-   if (state & META_TRANSFORM) {
+   if (state & MESA_META_TRANSFORM) {
       GLuint activeTexture = ctx->Texture.CurrentUnit;
       memcpy(save->ModelviewMatrix, ctx->ModelviewMatrixStack.Top->m,
              16 * sizeof(GLfloat));
@@ -544,6 +522,9 @@
       _mesa_Ortho(0.0, ctx->DrawBuffer->Width,
                   0.0, ctx->DrawBuffer->Height,
                   -1.0, 1.0);
+   }
+
+   if (state & MESA_META_CLIP) {
       save->ClipPlanesEnabled = ctx->Transform.ClipPlanesEnabled;
       if (ctx->Transform.ClipPlanesEnabled) {
          GLuint i;
@@ -553,7 +534,7 @@
       }
    }
 
-   if (state & META_VERTEX) {
+   if (state & MESA_META_VERTEX) {
       /* save vertex array object state */
       _mesa_reference_array_object(ctx, &save->ArrayObj,
                                    ctx->Array.ArrayObj);
@@ -562,7 +543,7 @@
       /* set some default state? */
    }
 
-   if (state & META_VIEWPORT) {
+   if (state & MESA_META_VIEWPORT) {
       /* save viewport state */
       save->ViewportX = ctx->Viewport.X;
       save->ViewportY = ctx->Viewport.Y;
@@ -583,7 +564,7 @@
       _mesa_DepthRange(0.0, 1.0);
    }
 
-   if (state & META_CLAMP_FRAGMENT_COLOR) {
+   if (state & MESA_META_CLAMP_FRAGMENT_COLOR) {
       save->ClampFragmentColor = ctx->Color.ClampFragmentColor;
 
       /* Generally in here we want to do clamping according to whether
@@ -594,7 +575,7 @@
 	 _mesa_ClampColorARB(GL_CLAMP_FRAGMENT_COLOR, GL_FALSE);
    }
 
-   if (state & META_CLAMP_VERTEX_COLOR) {
+   if (state & MESA_META_CLAMP_VERTEX_COLOR) {
       save->ClampVertexColor = ctx->Light.ClampVertexColor;
 
       /* Generally in here we never want vertex color clamping --
@@ -603,7 +584,7 @@
       _mesa_ClampColorARB(GL_CLAMP_VERTEX_COLOR, GL_FALSE);
    }
 
-   if (state & META_CONDITIONAL_RENDER) {
+   if (state & MESA_META_CONDITIONAL_RENDER) {
       save->CondRenderQuery = ctx->Query.CondRenderQuery;
       save->CondRenderMode = ctx->Query.CondRenderMode;
 
@@ -623,19 +604,19 @@
 /**
  * Leave meta state.  This is like a light-weight version of glPopAttrib().
  */
-static void
+void
 _mesa_meta_end(struct gl_context *ctx)
 {
    struct save_state *save = &ctx->Meta->Save[--ctx->Meta->SaveStackDepth];
    const GLbitfield state = save->SavedState;
 
-   if (state & META_ALPHA_TEST) {
+   if (state & MESA_META_ALPHA_TEST) {
       if (ctx->Color.AlphaEnabled != save->AlphaEnabled)
          _mesa_set_enable(ctx, GL_ALPHA_TEST, save->AlphaEnabled);
       _mesa_AlphaFunc(save->AlphaFunc, save->AlphaRef);
    }
 
-   if (state & META_BLEND) {
+   if (state & MESA_META_BLEND) {
       if (ctx->Color.BlendEnabled != save->BlendEnabled) {
          if (ctx->Extensions.EXT_draw_buffers2) {
             GLuint i;
@@ -651,7 +632,7 @@
          _mesa_set_enable(ctx, GL_COLOR_LOGIC_OP, save->ColorLogicOpEnabled);
    }
 
-   if (state & META_COLOR_MASK) {
+   if (state & MESA_META_COLOR_MASK) {
       GLuint i;
       for (i = 0; i < ctx->Const.MaxDrawBuffers; i++) {
          if (!TEST_EQ_4V(ctx->Color.ColorMask[i], save->ColorMask[i])) {
@@ -670,23 +651,23 @@
       }
    }
 
-   if (state & META_DEPTH_TEST) {
+   if (state & MESA_META_DEPTH_TEST) {
       if (ctx->Depth.Test != save->Depth.Test)
          _mesa_set_enable(ctx, GL_DEPTH_TEST, save->Depth.Test);
       _mesa_DepthFunc(save->Depth.Func);
       _mesa_DepthMask(save->Depth.Mask);
    }
 
-   if (state & META_FOG) {
+   if (state & MESA_META_FOG) {
       _mesa_set_enable(ctx, GL_FOG, save->Fog);
    }
 
-   if (state & META_PIXEL_STORE) {
+   if (state & MESA_META_PIXEL_STORE) {
       ctx->Pack = save->Pack;
       ctx->Unpack = save->Unpack;
    }
 
-   if (state & META_PIXEL_TRANSFER) {
+   if (state & MESA_META_PIXEL_TRANSFER) {
       ctx->Pixel.RedScale = save->RedScale;
       ctx->Pixel.RedBias = save->RedBias;
       ctx->Pixel.GreenScale = save->GreenScale;
@@ -700,7 +681,7 @@
       ctx->NewState |=_NEW_PIXEL;
    }
 
-   if (state & META_RASTERIZATION) {
+   if (state & MESA_META_RASTERIZATION) {
       _mesa_PolygonMode(GL_FRONT, save->FrontPolygonMode);
       _mesa_PolygonMode(GL_BACK, save->BackPolygonMode);
       _mesa_set_enable(ctx, GL_POLYGON_STIPPLE, save->PolygonStipple);
@@ -709,13 +690,13 @@
       _mesa_set_enable(ctx, GL_CULL_FACE, save->PolygonCull);
    }
 
-   if (state & META_SCISSOR) {
+   if (state & MESA_META_SCISSOR) {
       _mesa_set_enable(ctx, GL_SCISSOR_TEST, save->Scissor.Enabled);
       _mesa_Scissor(save->Scissor.X, save->Scissor.Y,
                     save->Scissor.Width, save->Scissor.Height);
    }
 
-   if (state & META_SHADER) {
+   if (state & MESA_META_SHADER) {
       if (ctx->Extensions.ARB_vertex_program) {
          _mesa_set_enable(ctx, GL_VERTEX_PROGRAM_ARB,
                           save->VertexProgramEnabled);
@@ -747,7 +728,7 @@
 				     save->ActiveShader);
    }
 
-   if (state & META_STENCIL_TEST) {
+   if (state & MESA_META_STENCIL_TEST) {
       const struct gl_stencil_attrib *stencil = &save->Stencil;
 
       _mesa_set_enable(ctx, GL_STENCIL_TEST, stencil->Enabled);
@@ -778,7 +759,7 @@
                               stencil->ZPassFunc[1]);
    }
 
-   if (state & META_TEXTURE) {
+   if (state & MESA_META_TEXTURE) {
       GLuint u, tgt;
 
       ASSERT(ctx->Texture.CurrentUnit == 0);
@@ -829,7 +810,7 @@
       _mesa_ClientActiveTextureARB(GL_TEXTURE0 + save->ClientActiveUnit);
    }
 
-   if (state & META_TRANSFORM) {
+   if (state & MESA_META_TRANSFORM) {
       GLuint activeTexture = ctx->Texture.CurrentUnit;
       _mesa_ActiveTextureARB(GL_TEXTURE0);
       _mesa_MatrixMode(GL_TEXTURE);
@@ -843,7 +824,9 @@
       _mesa_LoadMatrixf(save->ProjectionMatrix);
 
       _mesa_MatrixMode(save->MatrixMode);
+   }
 
+   if (state & MESA_META_CLIP) {
       if (save->ClipPlanesEnabled) {
          GLuint i;
          for (i = 0; i < ctx->Const.MaxClipPlanes; i++) {
@@ -854,7 +837,7 @@
       }
    }
 
-   if (state & META_VERTEX) {
+   if (state & MESA_META_VERTEX) {
       /* restore vertex buffer object */
       _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, save->ArrayBufferObj->Name);
       _mesa_reference_buffer_object(ctx, &save->ArrayBufferObj, NULL);
@@ -864,7 +847,7 @@
       _mesa_reference_array_object(ctx, &save->ArrayObj, NULL);
    }
 
-   if (state & META_VIEWPORT) {
+   if (state & MESA_META_VIEWPORT) {
       if (save->ViewportX != ctx->Viewport.X ||
           save->ViewportY != ctx->Viewport.Y ||
           save->ViewportW != ctx->Viewport.Width ||
@@ -875,15 +858,15 @@
       _mesa_DepthRange(save->DepthNear, save->DepthFar);
    }
 
-   if (state & META_CLAMP_FRAGMENT_COLOR) {
+   if (state & MESA_META_CLAMP_FRAGMENT_COLOR) {
       _mesa_ClampColorARB(GL_CLAMP_FRAGMENT_COLOR, save->ClampFragmentColor);
    }
 
-   if (state & META_CLAMP_VERTEX_COLOR) {
+   if (state & MESA_META_CLAMP_VERTEX_COLOR) {
       _mesa_ClampColorARB(GL_CLAMP_VERTEX_COLOR, save->ClampVertexColor);
    }
 
-   if (state & META_CONDITIONAL_RENDER) {
+   if (state & MESA_META_CONDITIONAL_RENDER) {
       if (save->CondRenderQuery)
 	 _mesa_BeginConditionalRender(save->CondRenderQuery->Id,
 				      save->CondRenderMode);
@@ -1349,7 +1332,7 @@
    }
 
    /* only scissor effects blit so save/clear all other relevant state */
-   _mesa_meta_begin(ctx, ~META_SCISSOR);
+   _mesa_meta_begin(ctx, ~MESA_META_SCISSOR);
 
    if (blit->ArrayObj == 0) {
       /* one-time setup */
@@ -1478,15 +1461,15 @@
    };
    struct vertex verts[4];
    /* save all state but scissor, pixel pack/unpack */
-   GLbitfield metaSave = (META_ALL -
-			  META_SCISSOR -
-			  META_PIXEL_STORE -
-			  META_CONDITIONAL_RENDER);
+   GLbitfield metaSave = (MESA_META_ALL -
+			  MESA_META_SCISSOR -
+			  MESA_META_PIXEL_STORE -
+			  MESA_META_CONDITIONAL_RENDER);
    const GLuint stencilMax = (1 << ctx->DrawBuffer->Visual.stencilBits) - 1;
 
    if (buffers & BUFFER_BITS_COLOR) {
       /* if clearing color buffers, don't save/restore colormask */
-      metaSave -= META_COLOR_MASK;
+      metaSave -= MESA_META_COLOR_MASK;
    }
 
    _mesa_meta_begin(ctx, metaSave);
@@ -1521,7 +1504,7 @@
       _mesa_ClampColorARB(GL_CLAMP_FRAGMENT_COLOR, GL_FALSE);
    }
    else {
-      ASSERT(metaSave & META_COLOR_MASK);
+      ASSERT(metaSave & MESA_META_COLOR_MASK);
       _mesa_ColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
    }
 
@@ -1589,10 +1572,166 @@
    _mesa_meta_end(ctx);
 }
 
+static void
+meta_glsl_clear_init(struct gl_context *ctx, struct clear_state *clear)
+{
+   const char *vs_source =
+      "attribute vec4 position;\n"
+      "void main()\n"
+      "{\n"
+      "   gl_Position = position;\n"
+      "}\n";
+   const char *fs_source =
+      "uniform vec4 color;\n"
+      "void main()\n"
+      "{\n"
+      "   gl_FragColor = color;\n"
+      "}\n";
+   GLuint vs, fs;
+
+   if (clear->ArrayObj != 0)
+      return;
+
+   /* create vertex array object */
+   _mesa_GenVertexArrays(1, &clear->ArrayObj);
+   _mesa_BindVertexArray(clear->ArrayObj);
+
+   /* create vertex array buffer */
+   _mesa_GenBuffersARB(1, &clear->VBO);
+   _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, clear->VBO);
+
+   /* setup vertex arrays */
+   _mesa_VertexAttribPointerARB(0, 3, GL_FLOAT, GL_FALSE, 0, (void *)0);
+   _mesa_EnableVertexAttribArrayARB(0);
+
+   vs = _mesa_CreateShaderObjectARB(GL_VERTEX_SHADER);
+   _mesa_ShaderSourceARB(vs, 1, &vs_source, NULL);
+   _mesa_CompileShaderARB(vs);
+
+   fs = _mesa_CreateShaderObjectARB(GL_FRAGMENT_SHADER);
+   _mesa_ShaderSourceARB(fs, 1, &fs_source, NULL);
+   _mesa_CompileShaderARB(fs);
+
+   clear->ShaderProg = _mesa_CreateProgramObjectARB();
+   _mesa_AttachShader(clear->ShaderProg, fs);
+   _mesa_AttachShader(clear->ShaderProg, vs);
+   _mesa_BindAttribLocationARB(clear->ShaderProg, 0, "position");
+   _mesa_LinkProgramARB(clear->ShaderProg);
+
+   clear->ColorLocation = _mesa_GetUniformLocationARB(clear->ShaderProg,
+						      "color");
+}
+
+/**
+ * Meta implementation of ctx->Driver.Clear() in terms of polygon rendering.
+ */
+void
+_mesa_meta_glsl_Clear(struct gl_context *ctx, GLbitfield buffers)
+{
+   struct clear_state *clear = &ctx->Meta->Clear;
+   GLbitfield metaSave;
+   const GLuint stencilMax = (1 << ctx->DrawBuffer->Visual.stencilBits) - 1;
+   struct gl_framebuffer *fb = ctx->DrawBuffer;
+   const float x0 = ((float)fb->_Xmin / fb->Width)  * 2.0f - 1.0f;
+   const float y0 = ((float)fb->_Ymin / fb->Height) * 2.0f - 1.0f;
+   const float x1 = ((float)fb->_Xmax / fb->Width)  * 2.0f - 1.0f;
+   const float y1 = ((float)fb->_Ymax / fb->Height) * 2.0f - 1.0f;
+   const float z = -invert_z(ctx->Depth.Clear);
+   struct vertex {
+      GLfloat x, y, z;
+   } verts[4];
+
+   metaSave = (MESA_META_ALPHA_TEST |
+	       MESA_META_BLEND |
+	       MESA_META_DEPTH_TEST |
+	       MESA_META_RASTERIZATION |
+	       MESA_META_SHADER |
+	       MESA_META_STENCIL_TEST |
+	       MESA_META_VERTEX |
+	       MESA_META_VIEWPORT |
+	       MESA_META_CLIP |
+	       MESA_META_CLAMP_FRAGMENT_COLOR);
+
+   if (!(buffers & BUFFER_BITS_COLOR)) {
+      /* We'll use colormask to disable color writes.  Otherwise,
+       * respect color mask
+       */
+      metaSave |= MESA_META_COLOR_MASK;
+   }
+
+   _mesa_meta_begin(ctx, metaSave);
+
+   meta_glsl_clear_init(ctx, clear);
+
+   _mesa_UseProgramObjectARB(clear->ShaderProg);
+   _mesa_Uniform4fvARB(clear->ColorLocation, 1,
+		       ctx->Color.ClearColorUnclamped);
+
+   _mesa_BindVertexArray(clear->ArrayObj);
+   _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, clear->VBO);
+
+   /* GL_COLOR_BUFFER_BIT */
+   if (buffers & BUFFER_BITS_COLOR) {
+      /* leave colormask, glDrawBuffer state as-is */
+
+      /* Clears never have the color clamped. */
+      _mesa_ClampColorARB(GL_CLAMP_FRAGMENT_COLOR, GL_FALSE);
+   }
+   else {
+      ASSERT(metaSave & MESA_META_COLOR_MASK);
+      _mesa_ColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
+   }
+
+   /* GL_DEPTH_BUFFER_BIT */
+   if (buffers & BUFFER_BIT_DEPTH) {
+      _mesa_set_enable(ctx, GL_DEPTH_TEST, GL_TRUE);
+      _mesa_DepthFunc(GL_ALWAYS);
+      _mesa_DepthMask(GL_TRUE);
+   }
+   else {
+      assert(!ctx->Depth.Test);
+   }
+
+   /* GL_STENCIL_BUFFER_BIT */
+   if (buffers & BUFFER_BIT_STENCIL) {
+      _mesa_set_enable(ctx, GL_STENCIL_TEST, GL_TRUE);
+      _mesa_StencilOpSeparate(GL_FRONT_AND_BACK,
+                              GL_REPLACE, GL_REPLACE, GL_REPLACE);
+      _mesa_StencilFuncSeparate(GL_FRONT_AND_BACK, GL_ALWAYS,
+                                ctx->Stencil.Clear & stencilMax,
+                                ctx->Stencil.WriteMask[0]);
+   }
+   else {
+      assert(!ctx->Stencil.Enabled);
+   }
+
+   /* vertex positions */
+   verts[0].x = x0;
+   verts[0].y = y0;
+   verts[0].z = z;
+   verts[1].x = x1;
+   verts[1].y = y0;
+   verts[1].z = z;
+   verts[2].x = x1;
+   verts[2].y = y1;
+   verts[2].z = z;
+   verts[3].x = x0;
+   verts[3].y = y1;
+   verts[3].z = z;
+
+   /* upload new vertex data */
+   _mesa_BufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(verts), verts,
+		       GL_DYNAMIC_DRAW_ARB);
+
+   /* draw quad */
+   _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4);
+
+   _mesa_meta_end(ctx);
+}
 
 /**
  * Meta implementation of ctx->Driver.CopyPixels() in terms
- * of texture mapping and polygon rendering.
+ * of texture mapping and polygon rendering and GLSL shaders.
  */
 void
 _mesa_meta_CopyPixels(struct gl_context *ctx, GLint srcX, GLint srcY,
@@ -1621,12 +1760,13 @@
    /* Most GL state applies to glCopyPixels, but a there's a few things
     * we need to override:
     */
-   _mesa_meta_begin(ctx, (META_RASTERIZATION |
-                          META_SHADER |
-                          META_TEXTURE |
-                          META_TRANSFORM |
-                          META_VERTEX |
-                          META_VIEWPORT));
+   _mesa_meta_begin(ctx, (MESA_META_RASTERIZATION |
+                          MESA_META_SHADER |
+                          MESA_META_TEXTURE |
+                          MESA_META_TRANSFORM |
+                          MESA_META_CLIP |
+                          MESA_META_VERTEX |
+                          MESA_META_VIEWPORT));
 
    if (copypix->ArrayObj == 0) {
       /* one-time setup */
@@ -1901,10 +2041,10 @@
           * in [0,1].
           */
          texIntFormat = GL_ALPHA;
-         metaExtraSave = (META_COLOR_MASK |
-                          META_DEPTH_TEST |
-                          META_SHADER |
-                          META_STENCIL_TEST);
+         metaExtraSave = (MESA_META_COLOR_MASK |
+                          MESA_META_DEPTH_TEST |
+                          MESA_META_SHADER |
+                          MESA_META_STENCIL_TEST);
       }
       else {
          fallback = GL_TRUE;
@@ -1914,7 +2054,7 @@
       if (ctx->Extensions.ARB_depth_texture &&
           ctx->Extensions.ARB_fragment_program) {
          texIntFormat = GL_DEPTH_COMPONENT;
-         metaExtraSave = (META_SHADER);
+         metaExtraSave = (MESA_META_SHADER);
       }
       else {
          fallback = GL_TRUE;
@@ -1942,13 +2082,14 @@
    /* Most GL state applies to glDrawPixels (like blending, stencil, etc),
     * but a there's a few things we need to override:
     */
-   _mesa_meta_begin(ctx, (META_RASTERIZATION |
-                          META_SHADER |
-                          META_TEXTURE |
-                          META_TRANSFORM |
-                          META_VERTEX |
-                          META_VIEWPORT |
-			  META_CLAMP_FRAGMENT_COLOR |
+   _mesa_meta_begin(ctx, (MESA_META_RASTERIZATION |
+                          MESA_META_SHADER |
+                          MESA_META_TEXTURE |
+                          MESA_META_TRANSFORM |
+                          MESA_META_CLIP |
+                          MESA_META_VERTEX |
+                          MESA_META_VIEWPORT |
+			  MESA_META_CLAMP_FRAGMENT_COLOR |
                           metaExtraSave));
 
    newTex = alloc_texture(tex, width, height, texIntFormat);
@@ -2149,14 +2290,15 @@
    /* Most GL state applies to glBitmap (like blending, stencil, etc),
     * but a there's a few things we need to override:
     */
-   _mesa_meta_begin(ctx, (META_ALPHA_TEST |
-                          META_PIXEL_STORE |
-                          META_RASTERIZATION |
-                          META_SHADER |
-                          META_TEXTURE |
-                          META_TRANSFORM |
-                          META_VERTEX |
-                          META_VIEWPORT));
+   _mesa_meta_begin(ctx, (MESA_META_ALPHA_TEST |
+                          MESA_META_PIXEL_STORE |
+                          MESA_META_RASTERIZATION |
+                          MESA_META_SHADER |
+                          MESA_META_TEXTURE |
+                          MESA_META_TRANSFORM |
+                          MESA_META_CLIP |
+                          MESA_META_VERTEX |
+                          MESA_META_VIEWPORT));
 
    if (bitmap->ArrayObj == 0) {
       /* one-time setup */
@@ -2282,7 +2424,9 @@
 
    /* check for fallbacks */
    if (!ctx->Extensions.EXT_framebuffer_object ||
-       target == GL_TEXTURE_3D) {
+       target == GL_TEXTURE_3D ||
+       target == GL_TEXTURE_1D_ARRAY ||
+       target == GL_TEXTURE_2D_ARRAY) {
       return GL_TRUE;
    }
 
@@ -2334,7 +2478,8 @@
 
 /**
  * Called via ctx->Driver.GenerateMipmap()
- * Note: texture borders and 3D texture support not yet complete.
+ * Note: We don't yet support 3D textures, 1D/2D array textures or texture
+ * borders.
  */
 void
 _mesa_meta_GenerateMipmap(struct gl_context *ctx, GLenum target,
@@ -2374,7 +2519,7 @@
       faceTarget = target;
    }
 
-   _mesa_meta_begin(ctx, META_ALL);
+   _mesa_meta_begin(ctx, MESA_META_ALL);
 
    if (original_active_unit != 0)
       _mesa_BindTexture(target, texObj->Name);
@@ -2678,119 +2823,6 @@
 
 
 /**
- * Helper for _mesa_meta_CopyTexImage1/2D() functions.
- * Have to be careful with locking and meta state for pixel transfer.
- */
-static void
-copy_tex_image(struct gl_context *ctx, GLuint dims, GLenum target, GLint level,
-               GLenum internalFormat, GLint x, GLint y,
-               GLsizei width, GLsizei height, GLint border)
-{
-   struct gl_texture_object *texObj;
-   struct gl_texture_image *texImage;
-   GLenum format, type;
-   GLint bpp;
-   void *buf;
-   struct gl_renderbuffer *read_rb = ctx->ReadBuffer->_ColorReadBuffer;
-
-   texObj = _mesa_get_current_tex_object(ctx, target);
-   texImage = _mesa_get_tex_image(ctx, texObj, target, level);
-
-   /* Choose format/type for temporary image buffer */
-   format = _mesa_base_tex_format(ctx, internalFormat);
-
-   if (format == GL_LUMINANCE &&
-       _mesa_get_format_base_format(read_rb->Format) != GL_LUMINANCE) {
-      /* The glReadPixels() path will convert RGB to luminance by
-       * summing R+G+B.  glCopyTexImage() is supposed to behave as
-       * glCopyPixels, which doesn't do that change, and instead
-       * leaves it up to glTexImage which converts RGB to luminance by
-       * just taking the R channel.  To avoid glReadPixels() trashing
-       * our data, use RGBA for our temporary image.
-       */
-      format = GL_RGBA;
-   }
-
-   type = get_temp_image_type(ctx, format);
-   bpp = _mesa_bytes_per_pixel(format, type);
-   if (bpp <= 0) {
-      _mesa_problem(ctx, "Bad bpp in meta copy_tex_image()");
-      return;
-   }
-
-   /*
-    * Alloc image buffer (XXX could use a PBO)
-    */
-   buf = malloc(width * height * bpp);
-   if (!buf) {
-      _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyTexImage%uD", dims);
-      return;
-   }
-
-   _mesa_unlock_texture(ctx, texObj); /* need to unlock first */
-
-   /*
-    * Read image from framebuffer (disable pixel transfer ops)
-    */
-   _mesa_meta_begin(ctx, META_PIXEL_STORE | META_PIXEL_TRANSFER);
-   ctx->Driver.ReadPixels(ctx, x, y, width, height,
-			  format, type, &ctx->Pack, buf);
-   _mesa_meta_end(ctx);
-
-   if (texImage->Data) {
-      ctx->Driver.FreeTexImageData(ctx, texImage);
-   }
-
-   /* The texture's format was already chosen in _mesa_CopyTexImage() */
-   ASSERT(texImage->TexFormat != MESA_FORMAT_NONE);
-
-   /*
-    * Store texture data (with pixel transfer ops)
-    */
-   _mesa_meta_begin(ctx, META_PIXEL_STORE);
-
-   _mesa_update_state(ctx); /* to update pixel transfer state */
-
-   if (target == GL_TEXTURE_1D) {
-      ctx->Driver.TexImage1D(ctx, target, level, internalFormat,
-                             width, border, format, type,
-                             buf, &ctx->Unpack, texObj, texImage);
-   }
-   else {
-      ctx->Driver.TexImage2D(ctx, target, level, internalFormat,
-                             width, height, border, format, type,
-                             buf, &ctx->Unpack, texObj, texImage);
-   }
-   _mesa_meta_end(ctx);
-
-   _mesa_lock_texture(ctx, texObj); /* re-lock */
-
-   free(buf);
-}
-
-
-void
-_mesa_meta_CopyTexImage1D(struct gl_context *ctx, GLenum target, GLint level,
-                          GLenum internalFormat, GLint x, GLint y,
-                          GLsizei width, GLint border)
-{
-   copy_tex_image(ctx, 1, target, level, internalFormat, x, y,
-                  width, 1, border);
-}
-
-
-void
-_mesa_meta_CopyTexImage2D(struct gl_context *ctx, GLenum target, GLint level,
-                          GLenum internalFormat, GLint x, GLint y,
-                          GLsizei width, GLsizei height, GLint border)
-{
-   copy_tex_image(ctx, 2, target, level, internalFormat, x, y,
-                  width, height, border);
-}
-
-
-
-/**
  * Helper for _mesa_meta_CopyTexSubImage1/2/3D() functions.
  * Have to be careful with locking and meta state for pixel transfer.
  */
@@ -2812,6 +2844,16 @@
 
    /* Choose format/type for temporary image buffer */
    format = _mesa_get_format_base_format(texImage->TexFormat);
+   if (format == GL_LUMINANCE ||
+       format == GL_LUMINANCE_ALPHA ||
+       format == GL_INTENSITY) {
+      /* We don't want to use GL_LUMINANCE, GL_INTENSITY, etc. for the
+       * temp image buffer because glReadPixels will do L=R+G+B which is
+       * not what we want (should be L=R).
+       */
+      format = GL_RGBA;
+   }
+
    type = get_temp_image_type(ctx, format);
    bpp = _mesa_bytes_per_pixel(format, type);
    if (bpp <= 0) {
@@ -2833,7 +2875,7 @@
    /*
     * Read image from framebuffer (disable pixel transfer ops)
     */
-   _mesa_meta_begin(ctx, META_PIXEL_STORE | META_PIXEL_TRANSFER);
+   _mesa_meta_begin(ctx, MESA_META_PIXEL_STORE | MESA_META_PIXEL_TRANSFER);
    ctx->Driver.ReadPixels(ctx, x, y, width, height,
 			  format, type, &ctx->Pack, buf);
    _mesa_meta_end(ctx);
@@ -2843,7 +2885,7 @@
    /*
     * Store texture data (with pixel transfer ops)
     */
-   _mesa_meta_begin(ctx, META_PIXEL_STORE);
+   _mesa_meta_begin(ctx, MESA_META_PIXEL_STORE);
    if (target == GL_TEXTURE_1D) {
       ctx->Driver.TexSubImage1D(ctx, target, level, xoffset,
                                 width, format, type, buf,
@@ -2915,7 +2957,7 @@
    /*
     * Read image from framebuffer (disable pixel transfer ops)
     */
-   _mesa_meta_begin(ctx, META_PIXEL_STORE | META_PIXEL_TRANSFER);
+   _mesa_meta_begin(ctx, MESA_META_PIXEL_STORE | MESA_META_PIXEL_TRANSFER);
    ctx->Driver.ReadPixels(ctx, x, y, width, 1,
                           GL_RGBA, GL_FLOAT, &ctx->Pack, buf);
 
@@ -2942,7 +2984,7 @@
    /*
     * Read image from framebuffer (disable pixel transfer ops)
     */
-   _mesa_meta_begin(ctx, META_PIXEL_STORE | META_PIXEL_TRANSFER);
+   _mesa_meta_begin(ctx, MESA_META_PIXEL_STORE | MESA_META_PIXEL_TRANSFER);
    ctx->Driver.ReadPixels(ctx, x, y, width, 1,
                           GL_RGBA, GL_FLOAT, &ctx->Pack, buf);
 
diff --git a/src/mesa/drivers/common/meta.h b/src/mesa/drivers/common/meta.h
index b0797d3..ac20e37 100644
--- a/src/mesa/drivers/common/meta.h
+++ b/src/mesa/drivers/common/meta.h
@@ -26,6 +26,33 @@
 #ifndef META_H
 #define META_H
 
+/**
+ * \name Flags for meta operations
+ * \{
+ *
+ * These flags are passed to _mesa_meta_begin().
+ */
+#define MESA_META_ALL                      ~0x0
+#define MESA_META_ALPHA_TEST                0x1
+#define MESA_META_BLEND                     0x2  /**< includes logicop */
+#define MESA_META_COLOR_MASK                0x4
+#define MESA_META_DEPTH_TEST                0x8
+#define MESA_META_FOG                      0x10
+#define MESA_META_PIXEL_STORE              0x20
+#define MESA_META_PIXEL_TRANSFER           0x40
+#define MESA_META_RASTERIZATION            0x80
+#define MESA_META_SCISSOR                 0x100
+#define MESA_META_SHADER                  0x200
+#define MESA_META_STENCIL_TEST            0x400
+#define MESA_META_TRANSFORM               0x800 /**< modelview/projection matrix state */
+#define MESA_META_TEXTURE                0x1000
+#define MESA_META_VERTEX                 0x2000
+#define MESA_META_VIEWPORT               0x4000
+#define MESA_META_CLAMP_FRAGMENT_COLOR   0x8000
+#define MESA_META_CLAMP_VERTEX_COLOR    0x10000
+#define MESA_META_CONDITIONAL_RENDER    0x20000
+#define MESA_META_CLIP                  0x40000
+/**\}*/
 
 extern void
 _mesa_meta_init(struct gl_context *ctx);
@@ -34,6 +61,12 @@
 _mesa_meta_free(struct gl_context *ctx);
 
 extern void
+_mesa_meta_begin(struct gl_context *ctx, GLbitfield state);
+
+extern void
+_mesa_meta_end(struct gl_context *ctx);
+
+extern void
 _mesa_meta_BlitFramebuffer(struct gl_context *ctx,
                            GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
                            GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
@@ -43,6 +76,9 @@
 _mesa_meta_Clear(struct gl_context *ctx, GLbitfield buffers);
 
 extern void
+_mesa_meta_glsl_Clear(struct gl_context *ctx, GLbitfield buffers);
+
+extern void
 _mesa_meta_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy,
                       GLsizei width, GLsizei height,
                       GLint dstx, GLint dsty, GLenum type);
@@ -69,16 +105,6 @@
                           struct gl_texture_object *texObj);
 
 extern void
-_mesa_meta_CopyTexImage1D(struct gl_context *ctx, GLenum target, GLint level,
-                          GLenum internalFormat, GLint x, GLint y,
-                          GLsizei width, GLint border);
-
-extern void
-_mesa_meta_CopyTexImage2D(struct gl_context *ctx, GLenum target, GLint level,
-                          GLenum internalFormat, GLint x, GLint y,
-                          GLsizei width, GLsizei height, GLint border);
-
-extern void
 _mesa_meta_CopyTexSubImage1D(struct gl_context *ctx, GLenum target, GLint level,
                              GLint xoffset,
                              GLint x, GLint y, GLsizei width);
diff --git a/src/mesa/drivers/dri/common/xmlconfig.c b/src/mesa/drivers/dri/common/xmlconfig.c
index 77967ac..12dd31b 100644
--- a/src/mesa/drivers/dri/common/xmlconfig.c
+++ b/src/mesa/drivers/dri/common/xmlconfig.c
@@ -567,7 +567,7 @@
     } else
 	defaultVal = attrVal[OA_DEFAULT];
     if (!parseValue (&cache->values[opt], cache->info[opt].type, defaultVal))
-	XML_FATAL ("illegal default value: %s.", defaultVal);
+	XML_FATAL ("illegal default value for %s: %s.", cache->info[opt].name, defaultVal);
 
     if (attrVal[OA_VALID]) {
 	if (cache->info[opt].type == DRI_BOOL)
diff --git a/src/mesa/drivers/dri/common/xmlpool.h b/src/mesa/drivers/dri/common/xmlpool.h
index 587517e..ffea430 100644
--- a/src/mesa/drivers/dri/common/xmlpool.h
+++ b/src/mesa/drivers/dri/common/xmlpool.h
@@ -60,7 +60,7 @@
 #define DRI_CONF_OPT_BEGIN(name,type,def) \
 "<option name=\""#name"\" type=\""#type"\" default=\""#def"\">\n"
 
-/** \brief Begin an option definition with qouted default value */
+/** \brief Begin an option definition with quoted default value */
 #define DRI_CONF_OPT_BEGIN_Q(name,type,def) \
 "<option name=\""#name"\" type=\""#type"\" default="#def">\n"
 
diff --git a/src/mesa/drivers/dri/common/xmlpool/options.h b/src/mesa/drivers/dri/common/xmlpool/options.h
index d765955..1e584ba 100644
--- a/src/mesa/drivers/dri/common/xmlpool/options.h
+++ b/src/mesa/drivers/dri/common/xmlpool/options.h
@@ -425,6 +425,66 @@
         DRI_CONF_DESC(sv,"Använd HyperZ för att maximera prestandan") \
 DRI_CONF_OPT_END
 
+#define DRI_CONF_PP_CELSHADE(def) \
+DRI_CONF_OPT_BEGIN_V(pp_celshade,enum,def,"0:1") \
+        DRI_CONF_DESC(en,"A post-processing filter to cel-shade the output") \
+        DRI_CONF_DESC(de,"A post-processing filter to cel-shade the output") \
+        DRI_CONF_DESC(es,"A post-processing filter to cel-shade the output") \
+        DRI_CONF_DESC(nl,"A post-processing filter to cel-shade the output") \
+        DRI_CONF_DESC(fr,"A post-processing filter to cel-shade the output") \
+        DRI_CONF_DESC(sv,"A post-processing filter to cel-shade the output") \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_PP_NORED(def) \
+DRI_CONF_OPT_BEGIN_V(pp_nored,enum,def,"0:1") \
+        DRI_CONF_DESC(en,"A post-processing filter to remove the red channel") \
+        DRI_CONF_DESC(de,"A post-processing filter to remove the red channel") \
+        DRI_CONF_DESC(es,"A post-processing filter to remove the red channel") \
+        DRI_CONF_DESC(nl,"A post-processing filter to remove the red channel") \
+        DRI_CONF_DESC(fr,"A post-processing filter to remove the red channel") \
+        DRI_CONF_DESC(sv,"A post-processing filter to remove the red channel") \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_PP_NOGREEN(def) \
+DRI_CONF_OPT_BEGIN_V(pp_nogreen,enum,def,"0:1") \
+        DRI_CONF_DESC(en,"A post-processing filter to remove the green channel") \
+        DRI_CONF_DESC(de,"A post-processing filter to remove the green channel") \
+        DRI_CONF_DESC(es,"A post-processing filter to remove the green channel") \
+        DRI_CONF_DESC(nl,"A post-processing filter to remove the green channel") \
+        DRI_CONF_DESC(fr,"A post-processing filter to remove the green channel") \
+        DRI_CONF_DESC(sv,"A post-processing filter to remove the green channel") \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_PP_NOBLUE(def) \
+DRI_CONF_OPT_BEGIN_V(pp_noblue,enum,def,"0:1") \
+        DRI_CONF_DESC(en,"A post-processing filter to remove the blue channel") \
+        DRI_CONF_DESC(de,"A post-processing filter to remove the blue channel") \
+        DRI_CONF_DESC(es,"A post-processing filter to remove the blue channel") \
+        DRI_CONF_DESC(nl,"A post-processing filter to remove the blue channel") \
+        DRI_CONF_DESC(fr,"A post-processing filter to remove the blue channel") \
+        DRI_CONF_DESC(sv,"A post-processing filter to remove the blue channel") \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_PP_JIMENEZMLAA(def,min,max) \
+DRI_CONF_OPT_BEGIN_V(pp_jimenezmlaa,int,def, # min ":" # max ) \
+        DRI_CONF_DESC(en,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality") \
+        DRI_CONF_DESC(de,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality") \
+        DRI_CONF_DESC(es,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality") \
+        DRI_CONF_DESC(nl,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality") \
+        DRI_CONF_DESC(fr,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality") \
+        DRI_CONF_DESC(sv,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality") \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_PP_JIMENEZMLAA_COLOR(def,min,max) \
+DRI_CONF_OPT_BEGIN_V(pp_jimenezmlaa_color,int,def, # min ":" # max ) \
+        DRI_CONF_DESC(en,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality. Color version, usable with 2d GL apps") \
+        DRI_CONF_DESC(de,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality. Color version, usable with 2d GL apps") \
+        DRI_CONF_DESC(es,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality. Color version, usable with 2d GL apps") \
+        DRI_CONF_DESC(nl,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality. Color version, usable with 2d GL apps") \
+        DRI_CONF_DESC(fr,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality. Color version, usable with 2d GL apps") \
+        DRI_CONF_DESC(sv,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality. Color version, usable with 2d GL apps") \
+DRI_CONF_OPT_END
+
 #define DRI_CONF_MAX_TEXTURE_UNITS(def,min,max) \
 DRI_CONF_OPT_BEGIN_V(texture_units,int,def, # min ":" # max ) \
         DRI_CONF_DESC(en,"Number of texture units used") \
diff --git a/src/mesa/drivers/dri/common/xmlpool/t_options.h b/src/mesa/drivers/dri/common/xmlpool/t_options.h
index 5fd6ec6..2427aa7 100644
--- a/src/mesa/drivers/dri/common/xmlpool/t_options.h
+++ b/src/mesa/drivers/dri/common/xmlpool/t_options.h
@@ -191,6 +191,36 @@
         DRI_CONF_DESC(en,gettext("Use HyperZ to boost performance")) \
 DRI_CONF_OPT_END
 
+#define DRI_CONF_PP_CELSHADE(def) \
+DRI_CONF_OPT_BEGIN_V(pp_celshade,enum,def,"0:1") \
+        DRI_CONF_DESC(en,gettext("A post-processing filter to cel-shade the output")) \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_PP_NORED(def) \
+DRI_CONF_OPT_BEGIN_V(pp_nored,enum,def,"0:1") \
+        DRI_CONF_DESC(en,gettext("A post-processing filter to remove the red channel")) \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_PP_NOGREEN(def) \
+DRI_CONF_OPT_BEGIN_V(pp_nogreen,enum,def,"0:1") \
+        DRI_CONF_DESC(en,gettext("A post-processing filter to remove the green channel")) \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_PP_NOBLUE(def) \
+DRI_CONF_OPT_BEGIN_V(pp_noblue,enum,def,"0:1") \
+        DRI_CONF_DESC(en,gettext("A post-processing filter to remove the blue channel")) \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_PP_JIMENEZMLAA(def,min,max) \
+DRI_CONF_OPT_BEGIN_V(pp_jimenezmlaa,int,def, # min ":" # max ) \
+        DRI_CONF_DESC(en,gettext("Morphological anti-aliasing based on Jimenez\\\' MLAA. 0 to disable, 8 for default quality")) \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_PP_JIMENEZMLAA_COLOR(def,min,max) \
+DRI_CONF_OPT_BEGIN_V(pp_jimenezmlaa_color,int,def, # min ":" # max ) \
+        DRI_CONF_DESC(en,gettext("Morphological anti-aliasing based on Jimenez\\\' MLAA. 0 to disable, 8 for default quality. Color version, usable with 2d GL apps")) \
+DRI_CONF_OPT_END
+
 #define DRI_CONF_MAX_TEXTURE_UNITS(def,min,max) \
 DRI_CONF_OPT_BEGIN_V(texture_units,int,def, # min ":" # max ) \
         DRI_CONF_DESC(en,gettext("Number of texture units used")) \
diff --git a/src/mesa/drivers/dri/i915/i830_vtbl.c b/src/mesa/drivers/dri/i915/i830_vtbl.c
index 6d43726..ed5286f 100644
--- a/src/mesa/drivers/dri/i915/i830_vtbl.c
+++ b/src/mesa/drivers/dri/i915/i830_vtbl.c
@@ -881,6 +881,12 @@
       i830_update_provoking_vertex(&intel->ctx);
 }
 
+static bool
+i830_is_hiz_depth_format(struct intel_context *intel, gl_format format)
+{
+   return false;
+}
+
 void
 i830InitVtbl(struct i830_context *i830)
 {
@@ -898,4 +904,5 @@
    i830->intel.vtbl.finish_batch = intel_finish_vb;
    i830->intel.vtbl.invalidate_state = i830_invalidate_state;
    i830->intel.vtbl.render_target_supported = i830_render_target_supported;
+   i830->intel.vtbl.is_hiz_depth_format = i830_is_hiz_depth_format;
 }
diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c
index 6e1d709..d155b85 100644
--- a/src/mesa/drivers/dri/i915/i915_fragprog.c
+++ b/src/mesa/drivers/dri/i915/i915_fragprog.c
@@ -175,10 +175,8 @@
    case PROGRAM_STATE_VAR:
    case PROGRAM_NAMED_PARAM:
    case PROGRAM_UNIFORM:
-      src =
-         i915_emit_param4fv(p,
-                            program->Base.Parameters->ParameterValues[source->
-                                                                      Index]);
+      src = i915_emit_param4fv(p,
+	 &program->Base.Parameters->ParameterValues[source->Index][0].f);
       break;
 
    default:
@@ -303,7 +301,7 @@
 /* 
  * TODO: consider moving this into core 
  */
-static void calc_live_regs( struct i915_fragment_program *p )
+static bool calc_live_regs( struct i915_fragment_program *p )
 {
     const struct gl_fragment_program *program = &p->FragProg;
     GLuint regsUsed = 0xffff0000;
@@ -317,6 +315,9 @@
 
         /* Register is written to: unmark as live for this and preceeding ops */ 
         if (inst->DstReg.File == PROGRAM_TEMPORARY) {
+	    if (inst->DstReg.Index > 16)
+	       return false;
+
             live_components[inst->DstReg.Index] &= ~inst->DstReg.WriteMask;
             if (live_components[inst->DstReg.Index] == 0)
                 regsUsed &= ~(1 << inst->DstReg.Index);
@@ -327,6 +328,9 @@
             if (inst->SrcReg[a].File == PROGRAM_TEMPORARY) {
                 unsigned c;
 
+		if (inst->SrcReg[a].Index > 16)
+		   return false;
+
                 regsUsed |= 1 << inst->SrcReg[a].Index;
 
                 for (c = 0; c < 4; c++) {
@@ -340,6 +344,8 @@
 
         p->usedRegs[i] = regsUsed;
     }
+
+    return true;
 }
 
 static GLuint get_live_regs( struct i915_fragment_program *p, 
@@ -394,7 +400,10 @@
 
    /* Not always needed:
     */
-   calc_live_regs(p);
+   if (!calc_live_regs(p)) {
+      i915_program_error(p, "Could not allocate registers");
+      return;
+   }
 
    while (1) {
       GLuint src0, src1, src2, flags;
diff --git a/src/mesa/drivers/dri/i915/i915_program.c b/src/mesa/drivers/dri/i915/i915_program.c
index ca1949b..0a600d3 100644
--- a/src/mesa/drivers/dri/i915/i915_program.c
+++ b/src/mesa/drivers/dri/i915/i915_program.c
@@ -442,14 +442,16 @@
 void
 i915_program_error(struct i915_fragment_program *p, const char *fmt, ...)
 {
-   va_list args;
+   if (unlikely((INTEL_DEBUG & (DEBUG_WM | DEBUG_FALLBACKS)) != 0)) {
+      va_list args;
 
-   fprintf(stderr, "i915_program_error: ");
-   va_start(args, fmt);
-   vfprintf(stderr, fmt, args);
-   va_end(args);
+      fprintf(stderr, "i915_program_error: ");
+      va_start(args, fmt);
+      vfprintf(stderr, fmt, args);
+      va_end(args);
 
-   fprintf(stderr, "\n");
+      fprintf(stderr, "\n");
+   }
    p->error = 1;
 }
 
diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile
index 44f28cd..d9c885d 100644
--- a/src/mesa/drivers/dri/i965/Makefile
+++ b/src/mesa/drivers/dri/i965/Makefile
@@ -124,7 +124,11 @@
 	brw_fs_reg_allocate.cpp \
 	brw_fs_schedule_instructions.cpp \
 	brw_fs_vector_splitting.cpp \
-	brw_shader.cpp
+	brw_shader.cpp \
+	brw_vec4.cpp \
+	brw_vec4_emit.cpp \
+	brw_vec4_reg_allocate.cpp \
+	brw_vec4_visitor.cpp
 
 ASM_SOURCES = 
 
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 471015c..df63fe1 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -212,6 +212,7 @@
    AUB_TRACE_BINDING_TABLE =		0x101,
    AUB_TRACE_SURFACE_STATE =		0x102,
    AUB_TRACE_VS_CONSTANTS =		0x103,
+   AUB_TRACE_WM_CONSTANTS =		0x104,
 };
 
 /** Subclass of Mesa vertex program */
@@ -247,6 +248,7 @@
    PARAM_CONVERT_F2I,
    PARAM_CONVERT_F2U,
    PARAM_CONVERT_F2B,
+   PARAM_CONVERT_ZERO,
 };
 
 /* Data about a particular attempt to compile a program.  Note that
@@ -310,12 +312,20 @@
    GLuint total_grf;
    GLbitfield64 outputs_written;
    GLuint nr_params;       /**< number of float params/constants */
+   GLuint total_scratch;
 
    GLuint inputs_read;
 
    /* Used for calculating urb partitions:
     */
    GLuint urb_entry_size;
+
+   const float *param[MAX_UNIFORMS * 4]; /* should be: BRW_MAX_CURBE */
+   enum param_conversion param_convert[MAX_UNIFORMS * 4];
+   const float *pull_param[MAX_UNIFORMS * 4];
+   enum param_conversion pull_param_convert[MAX_UNIFORMS * 4];
+
+   bool uses_new_param_layout;
 };
 
 
@@ -528,7 +538,7 @@
        * the CURBE, the depth buffer, and a query BO.
        */
       drm_intel_bo *validated_bos[VERT_ATTRIB_MAX + BRW_WM_MAX_SURF + 16];
-      int validated_bo_count;
+      unsigned int validated_bo_count;
    } state;
 
    struct brw_cache cache;
@@ -662,6 +672,7 @@
       struct brw_vs_prog_data *prog_data;
       int8_t *constant_map; /* variable array following prog_data */
 
+      drm_intel_bo *scratch_bo;
       drm_intel_bo *const_bo;
       /** Offset in the program cache to the VS program */
       uint32_t prog_offset;
@@ -674,6 +685,23 @@
 
       uint32_t push_const_offset; /* Offset in the batchbuffer */
       int push_const_size; /* in 256-bit register increments */
+
+      /** @{ register allocator */
+
+      struct ra_regs *regs;
+
+      /**
+       * Array of the ra classes for the unaligned contiguous register
+       * block sizes used.
+       */
+      int *classes;
+
+      /**
+       * Mapping for register-allocated objects in *regs to the first
+       * GRF for that object.
+      */
+      uint8_t *ra_reg_to_grf;
+      /** @} */
    } vs;
 
    struct {
@@ -726,7 +754,6 @@
       GLuint render_surf;
       GLuint nr_surfaces;      
 
-      GLuint max_threads;
       drm_intel_bo *scratch_bo;
 
       GLuint sampler_count;
@@ -747,6 +774,29 @@
        * Pre-gen6, push constants live in the CURBE.
        */
       uint32_t push_const_offset;
+
+      /** @{ register allocator */
+
+      struct ra_regs *regs;
+
+      /** Array of the ra classes for the unaligned contiguous
+       * register block sizes used.
+       */
+      int *classes;
+
+      /**
+       * Mapping for register-allocated objects in *regs to the first
+       * GRF for that object.
+      */
+      uint8_t *ra_reg_to_grf;
+
+      /**
+       * ra class for the aligned pairs we use for PLN, which doesn't
+       * appear in *classes.
+       */
+      int aligned_pairs_class;
+
+      /** @} */
    } wm;
 
 
@@ -827,6 +877,10 @@
  */
 void brwInitFragProgFuncs( struct dd_function_table *functions );
 
+int brw_get_scratch_size(int size);
+void brw_get_scratch_bo(struct intel_context *intel,
+			drm_intel_bo **scratch_bo, int size);
+
 
 /* brw_urb.c
  */
@@ -874,7 +928,7 @@
 }
 
 static inline
-float convert_param(enum param_conversion conversion, float param)
+float convert_param(enum param_conversion conversion, const float *param)
 {
    union {
       float f;
@@ -884,21 +938,23 @@
 
    switch (conversion) {
    case PARAM_NO_CONVERT:
-      return param;
+      return *param;
    case PARAM_CONVERT_F2I:
-      fi.i = param;
+      fi.i = *param;
       return fi.f;
    case PARAM_CONVERT_F2U:
-      fi.u = param;
+      fi.u = *param;
       return fi.f;
    case PARAM_CONVERT_F2B:
-      if (param != 0.0)
+      if (*param != 0.0)
 	 fi.i = 1;
       else
 	 fi.i = 0;
       return fi.f;
+   case PARAM_CONVERT_ZERO:
+      return 0.0;
    default:
-      return param;
+      return *param;
    }
 }
 
diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c
index ae11c48..960be10 100644
--- a/src/mesa/drivers/dri/i965/brw_curbe.c
+++ b/src/mesa/drivers/dri/i965/brw_curbe.c
@@ -203,7 +203,7 @@
       /* copy float constants */
       for (i = 0; i < brw->wm.prog_data->nr_params; i++) {
 	 buf[offset + i] = convert_param(brw->wm.prog_data->param_convert[i],
-					 *brw->wm.prog_data->param[i]);
+					 brw->wm.prog_data->param[i]);
       }
    }
 
@@ -244,15 +244,22 @@
       GLuint offset = brw->curbe.vs_start * 16;
       GLuint nr = brw->vs.prog_data->nr_params / 4;
 
-      /* Load the subset of push constants that will get used when
-       * we also have a pull constant buffer.
-       */
-      for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) {
-	 if (brw->vs.constant_map[i] != -1) {
-	    assert(brw->vs.constant_map[i] <= nr);
-	    memcpy(buf + offset + brw->vs.constant_map[i] * 4,
-		   vp->program.Base.Parameters->ParameterValues[i],
-		   4 * sizeof(float));
+      if (brw->vs.prog_data->uses_new_param_layout) {
+	 for (i = 0; i < brw->vs.prog_data->nr_params; i++) {
+	    buf[offset + i] = convert_param(brw->vs.prog_data->param_convert[i],
+					    brw->vs.prog_data->param[i]);
+	 }
+      } else {
+	 /* Load the subset of push constants that will get used when
+	  * we also have a pull constant buffer.
+	  */
+	 for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) {
+	    if (brw->vs.constant_map[i] != -1) {
+	       assert(brw->vs.constant_map[i] <= nr);
+	       memcpy(buf + offset + brw->vs.constant_map[i] * 4,
+		      vp->program.Base.Parameters->ParameterValues[i],
+		      4 * sizeof(float));
+	    }
 	 }
       }
    }
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 0a3027d..d1799c0 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -557,58 +557,93 @@
 #define BRW_WE_ALL		1
 /** @} */
 
-#define BRW_OPCODE_MOV        1
-#define BRW_OPCODE_SEL        2
-#define BRW_OPCODE_NOT        4
-#define BRW_OPCODE_AND        5
-#define BRW_OPCODE_OR         6
-#define BRW_OPCODE_XOR        7
-#define BRW_OPCODE_SHR        8
-#define BRW_OPCODE_SHL        9
-#define BRW_OPCODE_RSR        10
-#define BRW_OPCODE_RSL        11
-#define BRW_OPCODE_ASR        12
-#define BRW_OPCODE_CMP        16
-#define BRW_OPCODE_CMPN       17
-#define BRW_OPCODE_JMPI       32
-#define BRW_OPCODE_IF         34
-#define BRW_OPCODE_IFF        35
-#define BRW_OPCODE_ELSE       36
-#define BRW_OPCODE_ENDIF      37
-#define BRW_OPCODE_DO         38
-#define BRW_OPCODE_WHILE      39
-#define BRW_OPCODE_BREAK      40
-#define BRW_OPCODE_CONTINUE   41
-#define BRW_OPCODE_HALT       42
-#define BRW_OPCODE_MSAVE      44
-#define BRW_OPCODE_MRESTORE   45
-#define BRW_OPCODE_PUSH       46
-#define BRW_OPCODE_POP        47
-#define BRW_OPCODE_WAIT       48
-#define BRW_OPCODE_SEND       49
-#define BRW_OPCODE_SENDC      50
-#define BRW_OPCODE_MATH       56
-#define BRW_OPCODE_ADD        64
-#define BRW_OPCODE_MUL        65
-#define BRW_OPCODE_AVG        66
-#define BRW_OPCODE_FRC        67
-#define BRW_OPCODE_RNDU       68
-#define BRW_OPCODE_RNDD       69
-#define BRW_OPCODE_RNDE       70
-#define BRW_OPCODE_RNDZ       71
-#define BRW_OPCODE_MAC        72
-#define BRW_OPCODE_MACH       73
-#define BRW_OPCODE_LZD        74
-#define BRW_OPCODE_SAD2       80
-#define BRW_OPCODE_SADA2      81
-#define BRW_OPCODE_DP4        84
-#define BRW_OPCODE_DPH        85
-#define BRW_OPCODE_DP3        86
-#define BRW_OPCODE_DP2        87
-#define BRW_OPCODE_DPA2       88
-#define BRW_OPCODE_LINE       89
-#define BRW_OPCODE_PLN        90
-#define BRW_OPCODE_NOP        126
+enum opcode {
+   /* These are the actual hardware opcodes. */
+   BRW_OPCODE_MOV =	1,
+   BRW_OPCODE_SEL =	2,
+   BRW_OPCODE_NOT =	4,
+   BRW_OPCODE_AND =	5,
+   BRW_OPCODE_OR =	6,
+   BRW_OPCODE_XOR =	7,
+   BRW_OPCODE_SHR =	8,
+   BRW_OPCODE_SHL =	9,
+   BRW_OPCODE_RSR =	10,
+   BRW_OPCODE_RSL =	11,
+   BRW_OPCODE_ASR =	12,
+   BRW_OPCODE_CMP =	16,
+   BRW_OPCODE_CMPN =	17,
+   BRW_OPCODE_JMPI =	32,
+   BRW_OPCODE_IF =	34,
+   BRW_OPCODE_IFF =	35,
+   BRW_OPCODE_ELSE =	36,
+   BRW_OPCODE_ENDIF =	37,
+   BRW_OPCODE_DO =	38,
+   BRW_OPCODE_WHILE =	39,
+   BRW_OPCODE_BREAK =	40,
+   BRW_OPCODE_CONTINUE = 41,
+   BRW_OPCODE_HALT =	42,
+   BRW_OPCODE_MSAVE =	44,
+   BRW_OPCODE_MRESTORE = 45,
+   BRW_OPCODE_PUSH =	46,
+   BRW_OPCODE_POP =	47,
+   BRW_OPCODE_WAIT =	48,
+   BRW_OPCODE_SEND =	49,
+   BRW_OPCODE_SENDC =	50,
+   BRW_OPCODE_MATH =	56,
+   BRW_OPCODE_ADD =	64,
+   BRW_OPCODE_MUL =	65,
+   BRW_OPCODE_AVG =	66,
+   BRW_OPCODE_FRC =	67,
+   BRW_OPCODE_RNDU =	68,
+   BRW_OPCODE_RNDD =	69,
+   BRW_OPCODE_RNDE =	70,
+   BRW_OPCODE_RNDZ =	71,
+   BRW_OPCODE_MAC =	72,
+   BRW_OPCODE_MACH =	73,
+   BRW_OPCODE_LZD =	74,
+   BRW_OPCODE_SAD2 =	80,
+   BRW_OPCODE_SADA2 =	81,
+   BRW_OPCODE_DP4 =	84,
+   BRW_OPCODE_DPH =	85,
+   BRW_OPCODE_DP3 =	86,
+   BRW_OPCODE_DP2 =	87,
+   BRW_OPCODE_DPA2 =	88,
+   BRW_OPCODE_LINE =	89,
+   BRW_OPCODE_PLN =	90,
+   BRW_OPCODE_NOP =	126,
+
+   /* These are compiler backend opcodes that get translated into other
+    * instructions.
+    */
+   FS_OPCODE_FB_WRITE = 128,
+   SHADER_OPCODE_RCP,
+   SHADER_OPCODE_RSQ,
+   SHADER_OPCODE_SQRT,
+   SHADER_OPCODE_EXP2,
+   SHADER_OPCODE_LOG2,
+   SHADER_OPCODE_POW,
+   SHADER_OPCODE_SIN,
+   SHADER_OPCODE_COS,
+   FS_OPCODE_DDX,
+   FS_OPCODE_DDY,
+   FS_OPCODE_PIXEL_X,
+   FS_OPCODE_PIXEL_Y,
+   FS_OPCODE_CINTERP,
+   FS_OPCODE_LINTERP,
+   FS_OPCODE_TEX,
+   FS_OPCODE_TXB,
+   FS_OPCODE_TXD,
+   FS_OPCODE_TXL,
+   FS_OPCODE_TXS,
+   FS_OPCODE_DISCARD,
+   FS_OPCODE_SPILL,
+   FS_OPCODE_UNSPILL,
+   FS_OPCODE_PULL_CONSTANT_LOAD,
+
+   VS_OPCODE_URB_WRITE,
+   VS_OPCODE_SCRATCH_READ,
+   VS_OPCODE_SCRATCH_WRITE,
+};
 
 #define BRW_PREDICATE_NONE             0
 #define BRW_PREDICATE_NORMAL           1
@@ -734,7 +769,6 @@
 #define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE 0
 #define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE  1
 #define BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO           2
-#define BRW_SAMPLER_MESSAGE_SIMD8_RESINFO             2
 #define BRW_SAMPLER_MESSAGE_SIMD16_RESINFO            2
 #define BRW_SAMPLER_MESSAGE_SIMD4X2_LD                3
 #define BRW_SAMPLER_MESSAGE_SIMD8_LD                  3
@@ -747,6 +781,7 @@
 #define GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS       4
 #define GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE 5
 #define GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE  6
+#define GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO      10
 
 /* for GEN5 only */
 #define BRW_SAMPLER_SIMD_MODE_SIMD4X2                   0
diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c
index af41c84..927b0b4 100644
--- a/src/mesa/drivers/dri/i965/brw_disasm.c
+++ b/src/mesa/drivers/dri/i965/brw_disasm.c
@@ -309,6 +309,35 @@
     [BRW_MESSAGE_TARGET_THREAD_SPAWNER] = "thread_spawner"
 };
 
+char *target_function_gen6[16] = {
+    [BRW_MESSAGE_TARGET_NULL] = "null",
+    [BRW_MESSAGE_TARGET_MATH] = "math",
+    [BRW_MESSAGE_TARGET_SAMPLER] = "sampler",
+    [BRW_MESSAGE_TARGET_GATEWAY] = "gateway",
+    [GEN6_MESSAGE_TARGET_DP_SAMPLER_CACHE] = "sampler",
+    [GEN6_MESSAGE_TARGET_DP_RENDER_CACHE] = "render",
+    [GEN6_MESSAGE_TARGET_DP_CONST_CACHE] = "const",
+    [BRW_MESSAGE_TARGET_URB] = "urb",
+    [BRW_MESSAGE_TARGET_THREAD_SPAWNER] = "thread_spawner"
+};
+
+char *dp_rc_msg_type_gen6[16] = {
+    [BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ] = "OWORD block read",
+    [GEN6_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ] = "RT UNORM read",
+    [GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ] = "OWORD dual block read",
+    [GEN6_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ] = "media block read",
+    [GEN6_DATAPORT_READ_MESSAGE_OWORD_UNALIGN_BLOCK_READ] = "OWORD unaligned block read",
+    [GEN6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ] = "DWORD scattered read",
+    [GEN6_DATAPORT_WRITE_MESSAGE_DWORD_ATOMIC_WRITE] = "DWORD atomic write",
+    [GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE] = "OWORD block write",
+    [GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE] = "OWORD dual block write",
+    [GEN6_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE] = "media block write",
+    [GEN6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE] = "DWORD scattered write",
+    [GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE] = "RT write",
+    [GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE] = "streamed VB write",
+    [GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_UNORM_WRITE] = "RT UNORMc write",
+};
+
 char *math_function[16] = {
     [BRW_MATH_FUNCTION_INV] = "inv",
     [BRW_MATH_FUNCTION_LOG] = "log",
@@ -927,8 +956,14 @@
 	newline (file);
 	pad (file, 16);
 	space = 0;
-	err |= control (file, "target function", target_function,
-			target, &space);
+
+	if (gen >= 6) {
+	   err |= control (file, "target function", target_function_gen6,
+			   target, &space);
+	} else {
+	   err |= control (file, "target function", target_function,
+			   target, &space);
+	}
 
 	switch (target) {
 	case BRW_MESSAGE_TARGET_MATH:
@@ -985,9 +1020,16 @@
 			inst->bits3.dp_read.msg_type);
 	    }
 	    break;
+
 	case BRW_MESSAGE_TARGET_DATAPORT_WRITE:
 	    if (gen >= 6) {
-		format (file, " (%d, %d, %d, %d, %d, %d)",
+		format (file, " (");
+
+		err |= control (file, "DP rc message type",
+				dp_rc_msg_type_gen6,
+				inst->bits3.gen6_dp.msg_type, &space);
+
+		format (file, ", %d, %d, %d, %d, %d, %d)",
 			inst->bits3.gen6_dp.binding_table_index,
 			inst->bits3.gen6_dp.msg_control,
 			inst->bits3.gen6_dp.msg_type,
@@ -1003,6 +1045,7 @@
 			inst->bits3.dp_write.send_commit_msg);
 	    }
 	    break;
+
 	case BRW_MESSAGE_TARGET_URB:
 	    if (gen >= 5) {
 		format (file, " %d", inst->bits3.urb_gen5.offset);
diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c
index 56a46ce..7bc69c6 100644
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -689,17 +689,17 @@
        * rebase it into a temporary.
        */
        if ((get_size(index_buffer->type) - 1) & offset) {
-           GLubyte *map = ctx->Driver.MapBuffer(ctx,
-                                                GL_ELEMENT_ARRAY_BUFFER_ARB,
-                                                GL_DYNAMIC_DRAW_ARB,
-                                                bufferobj);
-           map += offset;
+           GLubyte *map = ctx->Driver.MapBufferRange(ctx,
+						     offset,
+						     ib_size,
+						     GL_MAP_WRITE_BIT,
+						     bufferobj);
 
 	   intel_upload_data(&brw->intel, map, ib_size, ib_type_size,
 			     &bo, &offset);
 	   brw->ib.start_vertex_offset = offset / ib_type_size;
 
-           ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, bufferobj);
+           ctx->Driver.UnmapBuffer(ctx, bufferobj);
        } else {
 	  /* Use CMD_3D_PRIM's start_vertex_offset to avoid re-uploading
 	   * the index buffer state when we're just moving the start index
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index 72d50ea..af50305 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -44,6 +44,9 @@
 #define BRW_SWIZZLE_NOOP      BRW_SWIZZLE4(0,1,2,3)
 #define BRW_SWIZZLE_XYZW      BRW_SWIZZLE4(0,1,2,3)
 #define BRW_SWIZZLE_XXXX      BRW_SWIZZLE4(0,0,0,0)
+#define BRW_SWIZZLE_YYYY      BRW_SWIZZLE4(1,1,1,1)
+#define BRW_SWIZZLE_ZZZZ      BRW_SWIZZLE4(2,2,2,2)
+#define BRW_SWIZZLE_WWWW      BRW_SWIZZLE4(3,3,3,3)
 #define BRW_SWIZZLE_XYXY      BRW_SWIZZLE4(0,1,0,1)
 
 
@@ -798,6 +801,12 @@
 		      void *mem_ctx);
 const GLuint *brw_get_program( struct brw_compile *p, GLuint *sz );
 
+struct brw_instruction *brw_next_insn(struct brw_compile *p, GLuint opcode);
+void brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
+		  struct brw_reg dest);
+void brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
+		  struct brw_reg reg);
+
 
 /* Helpers for regular instructions:
  */
@@ -852,6 +861,27 @@
 
 /* Helpers for SEND instruction:
  */
+void brw_set_dp_read_message(struct brw_compile *p,
+			     struct brw_instruction *insn,
+			     GLuint binding_table_index,
+			     GLuint msg_control,
+			     GLuint msg_type,
+			     GLuint target_cache,
+			     GLuint msg_length,
+			     GLuint response_length);
+
+void brw_set_dp_write_message(struct brw_compile *p,
+			      struct brw_instruction *insn,
+			      GLuint binding_table_index,
+			      GLuint msg_control,
+			      GLuint msg_type,
+			      GLuint msg_length,
+			      GLboolean header_present,
+			      GLuint pixel_scoreboard_clear,
+			      GLuint response_length,
+			      GLuint end_of_thread,
+			      GLuint send_commit_msg);
+
 void brw_urb_WRITE(struct brw_compile *p,
 		   struct brw_reg dest,
 		   GLuint msg_reg_nr,
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index e7370f3..c5013de 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -89,9 +89,9 @@
 }
 
 
-static void brw_set_dest(struct brw_compile *p,
-			 struct brw_instruction *insn,
-			 struct brw_reg dest)
+void
+brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
+	     struct brw_reg dest)
 {
    if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
        dest.file != BRW_MESSAGE_REGISTER_FILE)
@@ -221,9 +221,9 @@
    /* 10. Check destination issues. */
 }
 
-static void brw_set_src0(struct brw_compile *p,
-			 struct brw_instruction *insn,
-			 struct brw_reg reg)
+void
+brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
+	     struct brw_reg reg)
 {
    if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
       assert(reg.nr < 128);
@@ -504,17 +504,18 @@
     }
 }
 
-static void brw_set_dp_write_message( struct brw_compile *p,
-				      struct brw_instruction *insn,
-				      GLuint binding_table_index,
-				      GLuint msg_control,
-				      GLuint msg_type,
-				      GLuint msg_length,
-				      GLboolean header_present,
-				      GLuint pixel_scoreboard_clear,
-				      GLuint response_length,
-				      GLuint end_of_thread,
-				      GLuint send_commit_msg)
+void
+brw_set_dp_write_message(struct brw_compile *p,
+			 struct brw_instruction *insn,
+			 GLuint binding_table_index,
+			 GLuint msg_control,
+			 GLuint msg_type,
+			 GLuint msg_length,
+			 GLboolean header_present,
+			 GLuint pixel_scoreboard_clear,
+			 GLuint response_length,
+			 GLuint end_of_thread,
+			 GLuint send_commit_msg)
 {
    struct brw_context *brw = p->brw;
    struct intel_context *intel = &brw->intel;
@@ -570,7 +571,7 @@
    }
 }
 
-static void
+void
 brw_set_dp_read_message(struct brw_compile *p,
 			struct brw_instruction *insn,
 			GLuint binding_table_index,
@@ -709,9 +710,9 @@
 }
 
 
-
-static struct brw_instruction *next_insn( struct brw_compile *p, 
-					  GLuint opcode )
+#define next_insn brw_next_insn
+struct brw_instruction *
+brw_next_insn(struct brw_compile *p, GLuint opcode)
 {
    struct brw_instruction *insn;
 
@@ -732,7 +733,6 @@
    return insn;
 }
 
-
 static struct brw_instruction *brw_alu1( struct brw_compile *p,
 					 GLuint opcode,
 					 struct brw_reg dest,
@@ -1341,8 +1341,7 @@
       brw_set_src1(p, insn, brw_imm_ud(0));
       insn->bits3.break_cont.jip = br * (do_insn - insn);
 
-      insn->header.execution_size = do_insn->header.execution_size;
-      assert(insn->header.execution_size == BRW_EXECUTE_8);
+      insn->header.execution_size = BRW_EXECUTE_8;
    } else if (intel->gen == 6) {
       insn = next_insn(p, BRW_OPCODE_WHILE);
 
@@ -1351,8 +1350,7 @@
       brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
       brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
 
-      insn->header.execution_size = do_insn->header.execution_size;
-      assert(insn->header.execution_size == BRW_EXECUTE_8);
+      insn->header.execution_size = BRW_EXECUTE_8;
    } else {
       if (p->single_program_flow) {
 	 insn = next_insn(p, BRW_OPCODE_ADD);
@@ -2246,10 +2244,13 @@
 
    if (intel->gen == 7) {
       /* Enable Channel Masks in the URB_WRITE_HWORD message header */
+      brw_push_insn_state(p);
+      brw_set_access_mode(p, BRW_ALIGN_1);
       brw_OR(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5),
 		       BRW_REGISTER_TYPE_UD),
 	        retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD),
 		brw_imm_ud(0xff00));
+      brw_pop_insn_state(p);
    }
 
    insn = next_insn(p, BRW_OPCODE_SEND);
@@ -2311,7 +2312,7 @@
       if (insn->header.opcode == BRW_OPCODE_WHILE) {
 	 int jip = intel->gen == 6 ? insn->bits1.branch_gen6.jump_count
 				   : insn->bits3.break_cont.jip;
-	 if (ip + jip / br < start)
+	 if (ip + jip / br <= start)
 	    return ip;
       }
    }
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index b5ea943..0b0445e 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -143,20 +143,21 @@
       return 0;
 
    switch (inst->opcode) {
-   case FS_OPCODE_RCP:
-   case FS_OPCODE_RSQ:
-   case FS_OPCODE_SQRT:
-   case FS_OPCODE_EXP2:
-   case FS_OPCODE_LOG2:
-   case FS_OPCODE_SIN:
-   case FS_OPCODE_COS:
+   case SHADER_OPCODE_RCP:
+   case SHADER_OPCODE_RSQ:
+   case SHADER_OPCODE_SQRT:
+   case SHADER_OPCODE_EXP2:
+   case SHADER_OPCODE_LOG2:
+   case SHADER_OPCODE_SIN:
+   case SHADER_OPCODE_COS:
       return 1 * c->dispatch_width / 8;
-   case FS_OPCODE_POW:
+   case SHADER_OPCODE_POW:
       return 2 * c->dispatch_width / 8;
    case FS_OPCODE_TEX:
    case FS_OPCODE_TXB:
    case FS_OPCODE_TXD:
    case FS_OPCODE_TXL:
+   case FS_OPCODE_TXS:
       return 1;
    case FS_OPCODE_FB_WRITE:
       return 2;
@@ -181,29 +182,26 @@
 	 virtual_grf_array_size *= 2;
       virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int,
 				   virtual_grf_array_size);
-
-      /* This slot is always unused. */
-      virtual_grf_sizes[0] = 0;
    }
    virtual_grf_sizes[virtual_grf_next] = size;
    return virtual_grf_next++;
 }
 
 /** Fixed HW reg constructor. */
-fs_reg::fs_reg(enum register_file file, int hw_reg)
+fs_reg::fs_reg(enum register_file file, int reg)
 {
    init();
    this->file = file;
-   this->hw_reg = hw_reg;
+   this->reg = reg;
    this->type = BRW_REGISTER_TYPE_F;
 }
 
 /** Fixed HW reg constructor. */
-fs_reg::fs_reg(enum register_file file, int hw_reg, uint32_t type)
+fs_reg::fs_reg(enum register_file file, int reg, uint32_t type)
 {
    init();
    this->file = file;
-   this->hw_reg = hw_reg;
+   this->reg = reg;
    this->type = type;
 }
 
@@ -242,11 +240,12 @@
  * This brings in those uniform definitions
  */
 void
-fs_visitor::import_uniforms(struct hash_table *src_variable_ht)
+fs_visitor::import_uniforms(fs_visitor *v)
 {
-   hash_table_call_foreach(src_variable_ht,
+   hash_table_call_foreach(v->variable_ht,
 			   import_uniforms_callback,
 			   variable_ht);
+   this->params_remap = v->params_remap;
 }
 
 /* Our support for uniforms is piggy-backed on the struct
@@ -281,23 +280,27 @@
 
 	 assert(param < ARRAY_SIZE(c->prog_data.param));
 
-	 switch (type->base_type) {
-	 case GLSL_TYPE_FLOAT:
+	 if (ctx->Const.NativeIntegers) {
 	    c->prog_data.param_convert[param] = PARAM_NO_CONVERT;
-	    break;
-	 case GLSL_TYPE_UINT:
-	    c->prog_data.param_convert[param] = PARAM_CONVERT_F2U;
-	    break;
-	 case GLSL_TYPE_INT:
-	    c->prog_data.param_convert[param] = PARAM_CONVERT_F2I;
-	    break;
-	 case GLSL_TYPE_BOOL:
-	    c->prog_data.param_convert[param] = PARAM_CONVERT_F2B;
-	    break;
-	 default:
-	    assert(!"not reached");
-	    c->prog_data.param_convert[param] = PARAM_NO_CONVERT;
-	    break;
+	 } else {
+	    switch (type->base_type) {
+	    case GLSL_TYPE_FLOAT:
+	       c->prog_data.param_convert[param] = PARAM_NO_CONVERT;
+	       break;
+	    case GLSL_TYPE_UINT:
+	       c->prog_data.param_convert[param] = PARAM_CONVERT_F2U;
+	       break;
+	    case GLSL_TYPE_INT:
+	       c->prog_data.param_convert[param] = PARAM_CONVERT_F2I;
+	       break;
+	    case GLSL_TYPE_BOOL:
+	       c->prog_data.param_convert[param] = PARAM_CONVERT_F2B;
+	       break;
+	    default:
+	       assert(!"not reached");
+	       c->prog_data.param_convert[param] = PARAM_NO_CONVERT;
+	       break;
+	    }
 	 }
 	 this->param_index[param] = loc;
 	 this->param_offset[param] = i;
@@ -463,9 +466,21 @@
 	 } else {
 	    /* Perspective interpolation case. */
 	    for (unsigned int k = 0; k < type->vector_elements; k++) {
-	       struct brw_reg interp = interp_reg(location, k);
-	       emit(FS_OPCODE_LINTERP, attr,
-		    this->delta_x, this->delta_y, fs_reg(interp));
+	       /* FINISHME: At some point we probably want to push
+		* this farther by giving similar treatment to the
+		* other potentially constant components of the
+		* attribute, as well as making brw_vs_constval.c
+		* handle varyings other than gl_TexCoord.
+		*/
+	       if (location >= FRAG_ATTRIB_TEX0 &&
+		   location <= FRAG_ATTRIB_TEX7 &&
+		   k == 3 && !(c->key.proj_attrib_mask & (1 << location))) {
+		  emit(BRW_OPCODE_MOV, attr, fs_reg(1.0f));
+	       } else {
+		  struct brw_reg interp = interp_reg(location, k);
+		  emit(FS_OPCODE_LINTERP, attr,
+		       this->delta_x, this->delta_y, fs_reg(interp));
+	       }
 	       attr.reg_offset++;
 	    }
 
@@ -512,16 +527,16 @@
 }
 
 fs_inst *
-fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src)
+fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src)
 {
    switch (opcode) {
-   case FS_OPCODE_RCP:
-   case FS_OPCODE_RSQ:
-   case FS_OPCODE_SQRT:
-   case FS_OPCODE_EXP2:
-   case FS_OPCODE_LOG2:
-   case FS_OPCODE_SIN:
-   case FS_OPCODE_COS:
+   case SHADER_OPCODE_RCP:
+   case SHADER_OPCODE_RSQ:
+   case SHADER_OPCODE_SQRT:
+   case SHADER_OPCODE_EXP2:
+   case SHADER_OPCODE_LOG2:
+   case SHADER_OPCODE_SIN:
+   case SHADER_OPCODE_COS:
       break;
    default:
       assert(!"not reached: bad math opcode");
@@ -555,12 +570,12 @@
 }
 
 fs_inst *
-fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src0, fs_reg src1)
+fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1)
 {
    int base_mrf = 2;
    fs_inst *inst;
 
-   assert(opcode == FS_OPCODE_POW);
+   assert(opcode == SHADER_OPCODE_POW);
 
    if (intel->gen >= 6) {
       /* Can't do hstride == 0 args to gen6 math, so expand it out.
@@ -605,7 +620,7 @@
    /* Set up the pointers to ParamValues now that that array is finalized. */
    for (unsigned int i = 0; i < c->prog_data.nr_params; i++) {
       c->prog_data.param[i] =
-	 fp->Base.Parameters->ParameterValues[this->param_index[i]] +
+	 (const float *)fp->Base.Parameters->ParameterValues[this->param_index[i]] +
 	 this->param_offset[i];
    }
 }
@@ -621,12 +636,12 @@
    }
 
    /* Map the offsets in the UNIFORM file to fixed HW regs. */
-   foreach_iter(exec_list_iterator, iter, this->instructions) {
-      fs_inst *inst = (fs_inst *)iter.get();
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
 
       for (unsigned int i = 0; i < 3; i++) {
 	 if (inst->src[i].file == UNIFORM) {
-	    int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset;
+	    int constant_nr = inst->src[i].reg + inst->src[i].reg_offset;
 	    struct brw_reg brw_reg = brw_vec1_grf(c->nr_payload_regs +
 						  constant_nr / 8,
 						  constant_nr % 8);
@@ -684,8 +699,8 @@
    /* Offset all the urb_setup[] index by the actual position of the
     * setup regs, now that the location of the constants has been chosen.
     */
-   foreach_iter(exec_list_iterator, iter, this->instructions) {
-      fs_inst *inst = (fs_inst *)iter.get();
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
 
       if (inst->opcode == FS_OPCODE_LINTERP) {
 	 assert(inst->src[2].file == FIXED_HW_REG);
@@ -739,8 +754,8 @@
       split_grf[this->delta_x.reg] = false;
    }
 
-   foreach_iter(exec_list_iterator, iter, this->instructions) {
-      fs_inst *inst = (fs_inst *)iter.get();
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
 
       /* Texturing produces 4 contiguous registers, so no splitting. */
       if (inst->is_tex()) {
@@ -763,8 +778,8 @@
       }
    }
 
-   foreach_iter(exec_list_iterator, iter, this->instructions) {
-      fs_inst *inst = (fs_inst *)iter.get();
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
 
       if (inst->dst.file == GRF &&
 	  split_grf[inst->dst.reg] &&
@@ -786,6 +801,86 @@
    this->live_intervals_valid = false;
 }
 
+bool
+fs_visitor::remove_dead_constants()
+{
+   if (c->dispatch_width == 8) {
+      this->params_remap = ralloc_array(mem_ctx, int, c->prog_data.nr_params);
+
+      for (unsigned int i = 0; i < c->prog_data.nr_params; i++)
+	 this->params_remap[i] = -1;
+
+      /* Find which params are still in use. */
+      foreach_list(node, &this->instructions) {
+	 fs_inst *inst = (fs_inst *)node;
+
+	 for (int i = 0; i < 3; i++) {
+	    int constant_nr = inst->src[i].reg + inst->src[i].reg_offset;
+
+	    if (inst->src[i].file != UNIFORM)
+	       continue;
+
+	    assert(constant_nr < (int)c->prog_data.nr_params);
+
+	    /* For now, set this to non-negative.  We'll give it the
+	     * actual new number in a moment, in order to keep the
+	     * register numbers nicely ordered.
+	     */
+	    this->params_remap[constant_nr] = 0;
+	 }
+      }
+
+      /* Figure out what the new numbers for the params will be.  At some
+       * point when we're doing uniform array access, we're going to want
+       * to keep the distinction between .reg and .reg_offset, but for
+       * now we don't care.
+       */
+      unsigned int new_nr_params = 0;
+      for (unsigned int i = 0; i < c->prog_data.nr_params; i++) {
+	 if (this->params_remap[i] != -1) {
+	    this->params_remap[i] = new_nr_params++;
+	 }
+      }
+
+      /* Update the list of params to be uploaded to match our new numbering. */
+      for (unsigned int i = 0; i < c->prog_data.nr_params; i++) {
+	 int remapped = this->params_remap[i];
+
+	 if (remapped == -1)
+	    continue;
+
+	 /* We've already done setup_paramvalues_refs() so no need to worry
+	  * about param_index and param_offset.
+	  */
+	 c->prog_data.param[remapped] = c->prog_data.param[i];
+	 c->prog_data.param_convert[remapped] = c->prog_data.param_convert[i];
+      }
+
+      c->prog_data.nr_params = new_nr_params;
+   } else {
+      /* This should have been generated in the 8-wide pass already. */
+      assert(this->params_remap);
+   }
+
+   /* Now do the renumbering of the shader to remove unused params. */
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
+
+      for (int i = 0; i < 3; i++) {
+	 int constant_nr = inst->src[i].reg + inst->src[i].reg_offset;
+
+	 if (inst->src[i].file != UNIFORM)
+	    continue;
+
+	 assert(this->params_remap[constant_nr] != -1);
+	 inst->src[i].reg = this->params_remap[constant_nr];
+	 inst->src[i].reg_offset = 0;
+      }
+   }
+
+   return true;
+}
+
 /**
  * Choose accesses from the UNIFORM file to demote to using the pull
  * constant buffer.
@@ -815,14 +910,14 @@
    int pull_uniform_base = max_uniform_components;
    int pull_uniform_count = c->prog_data.nr_params - pull_uniform_base;
 
-   foreach_iter(exec_list_iterator, iter, this->instructions) {
-      fs_inst *inst = (fs_inst *)iter.get();
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
 
       for (int i = 0; i < 3; i++) {
 	 if (inst->src[i].file != UNIFORM)
 	    continue;
 
-	 int uniform_nr = inst->src[i].hw_reg + inst->src[i].reg_offset;
+	 int uniform_nr = inst->src[i].reg + inst->src[i].reg_offset;
 	 if (uniform_nr < pull_uniform_base)
 	    continue;
 
@@ -871,8 +966,8 @@
    }
 
    int ip = 0;
-   foreach_iter(exec_list_iterator, iter, this->instructions) {
-      fs_inst *inst = (fs_inst *)iter.get();
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
 
       if (inst->opcode == BRW_OPCODE_DO) {
 	 if (loop_depth++ == 0)
@@ -892,7 +987,7 @@
 	 }
       } else {
 	 for (unsigned int i = 0; i < 3; i++) {
-	    if (inst->src[i].file == GRF && inst->src[i].reg != 0) {
+	    if (inst->src[i].file == GRF) {
 	       int reg = inst->src[i].reg;
 
 	       if (!loop_depth) {
@@ -908,7 +1003,7 @@
 	       }
 	    }
 	 }
-	 if (inst->dst.file == GRF && inst->dst.reg != 0) {
+	 if (inst->dst.file == GRF) {
 	    int reg = inst->dst.reg;
 
 	    if (!loop_depth) {
@@ -945,8 +1040,8 @@
 
    calculate_live_intervals();
 
-   foreach_iter(exec_list_iterator, iter, this->instructions) {
-      fs_inst *inst = (fs_inst *)iter.get();
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
 
       if (inst->opcode != BRW_OPCODE_MOV ||
 	  inst->predicated ||
@@ -965,11 +1060,9 @@
       /* Found a move of a constant to a GRF.  Find anything else using the GRF
        * before it's written, and replace it with the constant if we can.
        */
-      exec_list_iterator scan_iter = iter;
-      scan_iter.next();
-      for (; scan_iter.has_next(); scan_iter.next()) {
-	 fs_inst *scan_inst = (fs_inst *)scan_iter.get();
-
+      for (fs_inst *scan_inst = (fs_inst *)inst->next;
+	   !scan_inst->is_tail_sentinel();
+	   scan_inst = (fs_inst *)scan_inst->next) {
 	 if (scan_inst->opcode == BRW_OPCODE_DO ||
 	     scan_inst->opcode == BRW_OPCODE_WHILE ||
 	     scan_inst->opcode == BRW_OPCODE_ELSE ||
@@ -1046,6 +1139,24 @@
 		  progress = true;
 	       }
 	       break;
+
+	    case SHADER_OPCODE_RCP:
+	       /* The hardware doesn't do math on immediate values
+		* (because why are you doing that, seriously?), but
+		* the correct answer is to just constant fold it
+		* anyway.
+		*/
+	       assert(i == 0);
+	       if (inst->src[0].imm.f != 0.0f) {
+		  scan_inst->opcode = BRW_OPCODE_MOV;
+		  scan_inst->src[0] = inst->src[0];
+		  scan_inst->src[0].imm.f = 1.0f / scan_inst->src[0].imm.f;
+		  progress = true;
+	       }
+	       break;
+
+	    default:
+	       break;
 	    }
 	 }
 
@@ -1063,6 +1174,49 @@
 
    return progress;
 }
+
+
+/**
+ * Attempts to move immediate constants into the immediate
+ * constant slot of following instructions.
+ *
+ * Immediate constants are a bit tricky -- they have to be in the last
+ * operand slot, you can't do abs/negate on them,
+ */
+
+bool
+fs_visitor::opt_algebraic()
+{
+   bool progress = false;
+
+   calculate_live_intervals();
+
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
+
+      switch (inst->opcode) {
+      case BRW_OPCODE_MUL:
+	 if (inst->src[1].file != IMM)
+	    continue;
+
+	 /* a * 1.0 = a */
+	 if (inst->src[1].type == BRW_REGISTER_TYPE_F &&
+	     inst->src[1].imm.f == 1.0) {
+	    inst->opcode = BRW_OPCODE_MOV;
+	    inst->src[1] = reg_undef;
+	    progress = true;
+	    break;
+	 }
+
+	 break;
+      default:
+	 break;
+      }
+   }
+
+   return progress;
+}
+
 /**
  * Must be called after calculate_live_intervales() to remove unused
  * writes to registers -- register allocation will fail otherwise
@@ -1077,8 +1231,8 @@
 
    calculate_live_intervals();
 
-   foreach_iter(exec_list_iterator, iter, this->instructions) {
-      fs_inst *inst = (fs_inst *)iter.get();
+   foreach_list_safe(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
 
       if (inst->dst.file == GRF && this->virtual_grf_use[inst->dst.reg] <= pc) {
 	 inst->remove();
@@ -1101,8 +1255,8 @@
    int if_depth = 0;
    int loop_depth = 0;
 
-   foreach_iter(exec_list_iterator, iter, this->instructions) {
-      fs_inst *inst = (fs_inst *)iter.get();
+   foreach_list_safe(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
 
       /* Make sure that we dominate the instructions we're going to
        * scan for interfering with our coalescing, or we won't have
@@ -1123,6 +1277,8 @@
       case BRW_OPCODE_ENDIF:
 	 if_depth--;
 	 break;
+      default:
+	 break;
       }
       if (loop_depth || if_depth)
 	 continue;
@@ -1130,7 +1286,8 @@
       if (inst->opcode != BRW_OPCODE_MOV ||
 	  inst->predicated ||
 	  inst->saturate ||
-	  inst->dst.file != GRF || inst->src[0].file != GRF ||
+	  inst->dst.file != GRF || (inst->src[0].file != GRF &&
+				    inst->src[0].file != UNIFORM)||
 	  inst->dst.type != inst->src[0].type)
 	 continue;
 
@@ -1141,11 +1298,10 @@
        * program.
        */
       bool interfered = false;
-      exec_list_iterator scan_iter = iter;
-      scan_iter.next();
-      for (; scan_iter.has_next(); scan_iter.next()) {
-	 fs_inst *scan_inst = (fs_inst *)scan_iter.get();
 
+      for (fs_inst *scan_inst = (fs_inst *)inst->next;
+	   !scan_inst->is_tail_sentinel();
+	   scan_inst = (fs_inst *)scan_inst->next) {
 	 if (scan_inst->dst.file == GRF) {
 	    if (scan_inst->dst.reg == inst->dst.reg &&
 		(scan_inst->dst.reg_offset == inst->dst.reg_offset ||
@@ -1153,7 +1309,8 @@
 	       interfered = true;
 	       break;
 	    }
-	    if (scan_inst->dst.reg == inst->src[0].reg &&
+	    if (inst->src[0].file == GRF &&
+		scan_inst->dst.reg == inst->src[0].reg &&
 		(scan_inst->dst.reg_offset == inst->src[0].reg_offset ||
 		 scan_inst->is_tex())) {
 	       interfered = true;
@@ -1161,10 +1318,13 @@
 	    }
 	 }
 
-	 /* The gen6 MATH instruction can't handle source modifiers, so avoid
-	  * coalescing those for now.  We should do something more specific.
+	 /* The gen6 MATH instruction can't handle source modifiers or
+	  * unusual register regions, so avoid coalescing those for
+	  * now.  We should do something more specific.
 	  */
-	 if (intel->gen >= 6 && scan_inst->is_math() && has_source_modifiers) {
+	 if (intel->gen >= 6 &&
+	     scan_inst->is_math() &&
+	     (has_source_modifiers || inst->src[0].file == UNIFORM)) {
 	    interfered = true;
 	    break;
 	 }
@@ -1176,19 +1336,17 @@
       /* Rewrite the later usage to point at the source of the move to
        * be removed.
        */
-      for (exec_list_iterator scan_iter = iter; scan_iter.has_next();
-	   scan_iter.next()) {
-	 fs_inst *scan_inst = (fs_inst *)scan_iter.get();
-
+      for (fs_inst *scan_inst = inst;
+	   !scan_inst->is_tail_sentinel();
+	   scan_inst = (fs_inst *)scan_inst->next) {
 	 for (int i = 0; i < 3; i++) {
 	    if (scan_inst->src[i].file == GRF &&
 		scan_inst->src[i].reg == inst->dst.reg &&
 		scan_inst->src[i].reg_offset == inst->dst.reg_offset) {
-	       scan_inst->src[i].reg = inst->src[0].reg;
-	       scan_inst->src[i].reg_offset = inst->src[0].reg_offset;
-	       scan_inst->src[i].abs |= inst->src[0].abs;
-	       scan_inst->src[i].negate ^= inst->src[0].negate;
-	       scan_inst->src[i].smear = inst->src[0].smear;
+	       fs_reg new_src = inst->src[0];
+	       new_src.negate ^= scan_inst->src[i].negate;
+	       new_src.abs |= scan_inst->src[i].abs;
+	       scan_inst->src[i] = new_src;
 	    }
 	 }
       }
@@ -1212,8 +1370,8 @@
 
    calculate_live_intervals();
 
-   foreach_iter(exec_list_iterator, iter, this->instructions) {
-      fs_inst *inst = (fs_inst *)iter.get();
+   foreach_list_safe(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
 
       int ip = next_ip;
       next_ip++;
@@ -1228,9 +1386,9 @@
       /* Work out which hardware MRF registers are written by this
        * instruction.
        */
-      int mrf_low = inst->dst.hw_reg & ~BRW_MRF_COMPR4;
+      int mrf_low = inst->dst.reg & ~BRW_MRF_COMPR4;
       int mrf_high;
-      if (inst->dst.hw_reg & BRW_MRF_COMPR4) {
+      if (inst->dst.reg & BRW_MRF_COMPR4) {
 	 mrf_high = mrf_low + 4;
       } else if (c->dispatch_width == 16 &&
 		 (!inst->force_uncompressed && !inst->force_sechalf)) {
@@ -1297,7 +1455,7 @@
 	    if (scan_inst->dst.reg_offset == inst->src[0].reg_offset) {
 	       /* Found the creator of our MRF's source value. */
 	       scan_inst->dst.file = MRF;
-	       scan_inst->dst.hw_reg = inst->dst.hw_reg;
+	       scan_inst->dst.reg = inst->dst.reg;
 	       scan_inst->saturate |= inst->saturate;
 	       inst->remove();
 	       progress = true;
@@ -1334,10 +1492,10 @@
 	    /* If somebody else writes our MRF here, we can't
 	     * compute-to-MRF before that.
 	     */
-	    int scan_mrf_low = scan_inst->dst.hw_reg & ~BRW_MRF_COMPR4;
+	    int scan_mrf_low = scan_inst->dst.reg & ~BRW_MRF_COMPR4;
 	    int scan_mrf_high;
 
-	    if (scan_inst->dst.hw_reg & BRW_MRF_COMPR4) {
+	    if (scan_inst->dst.reg & BRW_MRF_COMPR4) {
 	       scan_mrf_high = scan_mrf_low + 4;
 	    } else if (c->dispatch_width == 16 &&
 		       (!scan_inst->force_uncompressed &&
@@ -1392,8 +1550,8 @@
 
    memset(last_mrf_move, 0, sizeof(last_mrf_move));
 
-   foreach_iter(exec_list_iterator, iter, this->instructions) {
-      fs_inst *inst = (fs_inst *)iter.get();
+   foreach_list_safe(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
 
       switch (inst->opcode) {
       case BRW_OPCODE_DO:
@@ -1409,7 +1567,7 @@
 
       if (inst->opcode == BRW_OPCODE_MOV &&
 	  inst->dst.file == MRF) {
-	 fs_inst *prev_inst = last_mrf_move[inst->dst.hw_reg];
+	 fs_inst *prev_inst = last_mrf_move[inst->dst.reg];
 	 if (prev_inst && inst->equals(prev_inst)) {
 	    inst->remove();
 	    progress = true;
@@ -1419,7 +1577,7 @@
 
       /* Clear out the last-write records for MRFs that were overwritten. */
       if (inst->dst.file == MRF) {
-	 last_mrf_move[inst->dst.hw_reg] = NULL;
+	 last_mrf_move[inst->dst.reg] = NULL;
       }
 
       if (inst->mlen > 0) {
@@ -1445,7 +1603,7 @@
 	  inst->dst.file == MRF &&
 	  inst->src[0].file == GRF &&
 	  !inst->predicated) {
-	 last_mrf_move[inst->dst.hw_reg] = inst;
+	 last_mrf_move[inst->dst.reg] = inst;
       }
    }
 
@@ -1527,8 +1685,8 @@
       /* Generate FS IR for main().  (the visitor only descends into
        * functions called "main").
        */
-      foreach_iter(exec_list_iterator, iter, *shader->ir) {
-	 ir_instruction *ir = (ir_instruction *)iter.get();
+      foreach_list(node, &*shader->ir) {
+	 ir_instruction *ir = (ir_instruction *)node;
 	 base_ir = ir;
 	 this->result = reg_undef;
 	 ir->accept(this);
@@ -1550,11 +1708,14 @@
 	 progress = remove_duplicate_mrf_writes() || progress;
 
 	 progress = propagate_constants() || progress;
+	 progress = opt_algebraic() || progress;
 	 progress = register_coalesce() || progress;
 	 progress = compute_to_mrf() || progress;
 	 progress = dead_code_eliminate() || progress;
       } while (progress);
 
+      remove_dead_constants();
+
       schedule_instructions();
 
       assign_curb_setup();
@@ -1563,7 +1724,7 @@
       if (0) {
 	 /* Debug of register spilling: Go spill everything. */
 	 int virtual_grf_count = virtual_grf_next;
-	 for (int i = 1; i < virtual_grf_count; i++) {
+	 for (int i = 0; i < virtual_grf_count; i++) {
 	    spill_reg(i);
 	 }
       }
@@ -1625,7 +1786,7 @@
    fs_visitor v(c, prog, shader);
    if (!v.run()) {
       prog->LinkStatus = GL_FALSE;
-      prog->InfoLog = ralloc_strdup(prog, v.fail_msg);
+      ralloc_strcat(&prog->InfoLog, v.fail_msg);
 
       return false;
    }
@@ -1633,7 +1794,7 @@
    if (intel->gen >= 5 && c->prog_data.nr_pull_params == 0) {
       c->dispatch_width = 16;
       fs_visitor v2(c, prog, shader);
-      v2.import_uniforms(v.variable_ht);
+      v2.import_uniforms(&v);
       v2.run();
    }
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 2bf850e..10f45f3 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -25,6 +25,8 @@
  *
  */
 
+#include "brw_shader.h"
+
 extern "C" {
 
 #include <sys/types.h>
@@ -51,37 +53,10 @@
    MRF = BRW_MESSAGE_REGISTER_FILE,
    IMM = BRW_IMMEDIATE_VALUE,
    FIXED_HW_REG, /* a struct brw_reg */
-   UNIFORM, /* prog_data->params[hw_reg] */
+   UNIFORM, /* prog_data->params[reg] */
    BAD_FILE
 };
 
-enum fs_opcodes {
-   FS_OPCODE_FB_WRITE = 256,
-   FS_OPCODE_RCP,
-   FS_OPCODE_RSQ,
-   FS_OPCODE_SQRT,
-   FS_OPCODE_EXP2,
-   FS_OPCODE_LOG2,
-   FS_OPCODE_POW,
-   FS_OPCODE_SIN,
-   FS_OPCODE_COS,
-   FS_OPCODE_DDX,
-   FS_OPCODE_DDY,
-   FS_OPCODE_PIXEL_X,
-   FS_OPCODE_PIXEL_Y,
-   FS_OPCODE_CINTERP,
-   FS_OPCODE_LINTERP,
-   FS_OPCODE_TEX,
-   FS_OPCODE_TXB,
-   FS_OPCODE_TXD,
-   FS_OPCODE_TXL,
-   FS_OPCODE_DISCARD,
-   FS_OPCODE_SPILL,
-   FS_OPCODE_UNSPILL,
-   FS_OPCODE_PULL_CONSTANT_LOAD,
-};
-
-
 class fs_reg {
 public:
    /* Callers of this ralloc-based new need not call delete. It's
@@ -99,7 +74,6 @@
    void init()
    {
       memset(this, 0, sizeof(*this));
-      this->hw_reg = -1;
       this->smear = -1;
    }
 
@@ -146,8 +120,8 @@
       this->type = fixed_hw_reg.type;
    }
 
-   fs_reg(enum register_file file, int hw_reg);
-   fs_reg(enum register_file file, int hw_reg, uint32_t type);
+   fs_reg(enum register_file file, int reg);
+   fs_reg(enum register_file file, int reg, uint32_t type);
    fs_reg(class fs_visitor *v, const struct glsl_type *type);
 
    bool equals(fs_reg *r)
@@ -155,7 +129,6 @@
       return (file == r->file &&
 	      reg == r->reg &&
 	      reg_offset == r->reg_offset &&
-	      hw_reg == r->hw_reg &&
 	      type == r->type &&
 	      negate == r->negate &&
 	      abs == r->abs &&
@@ -167,12 +140,17 @@
 
    /** Register file: ARF, GRF, MRF, IMM. */
    enum register_file file;
-   /** virtual register number.  0 = fixed hw reg */
+   /**
+    * Register number.  For ARF/MRF, it's the hardware register.  For
+    * GRF, it's a virtual register number until register allocation
+    */
    int reg;
-   /** Offset within the virtual register. */
+   /**
+    * For virtual registers, this is a hardware register offset from
+    * the start of the register block (for example, a constant index
+    * in an array access).
+    */
    int reg_offset;
-   /** HW register number.  Generally unset until register allocation. */
-   int hw_reg;
    /** Register type.  BRW_REGISTER_TYPE_* */
    int type;
    bool negate;
@@ -224,13 +202,13 @@
       init();
    }
 
-   fs_inst(int opcode)
+   fs_inst(enum opcode opcode)
    {
       init();
       this->opcode = opcode;
    }
 
-   fs_inst(int opcode, fs_reg dst)
+   fs_inst(enum opcode opcode, fs_reg dst)
    {
       init();
       this->opcode = opcode;
@@ -240,7 +218,7 @@
 	 assert(dst.reg_offset >= 0);
    }
 
-   fs_inst(int opcode, fs_reg dst, fs_reg src0)
+   fs_inst(enum opcode opcode, fs_reg dst, fs_reg src0)
    {
       init();
       this->opcode = opcode;
@@ -253,7 +231,7 @@
 	 assert(src[0].reg_offset >= 0);
    }
 
-   fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1)
+   fs_inst(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1)
    {
       init();
       this->opcode = opcode;
@@ -269,7 +247,7 @@
 	 assert(src[1].reg_offset >= 0);
    }
 
-   fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2)
+   fs_inst(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2)
    {
       init();
       this->opcode = opcode;
@@ -313,22 +291,23 @@
       return (opcode == FS_OPCODE_TEX ||
 	      opcode == FS_OPCODE_TXB ||
 	      opcode == FS_OPCODE_TXD ||
-	      opcode == FS_OPCODE_TXL);
+	      opcode == FS_OPCODE_TXL ||
+	      opcode == FS_OPCODE_TXS);
    }
 
    bool is_math()
    {
-      return (opcode == FS_OPCODE_RCP ||
-	      opcode == FS_OPCODE_RSQ ||
-	      opcode == FS_OPCODE_SQRT ||
-	      opcode == FS_OPCODE_EXP2 ||
-	      opcode == FS_OPCODE_LOG2 ||
-	      opcode == FS_OPCODE_SIN ||
-	      opcode == FS_OPCODE_COS ||
-	      opcode == FS_OPCODE_POW);
+      return (opcode == SHADER_OPCODE_RCP ||
+	      opcode == SHADER_OPCODE_RSQ ||
+	      opcode == SHADER_OPCODE_SQRT ||
+	      opcode == SHADER_OPCODE_EXP2 ||
+	      opcode == SHADER_OPCODE_LOG2 ||
+	      opcode == SHADER_OPCODE_SIN ||
+	      opcode == SHADER_OPCODE_COS ||
+	      opcode == SHADER_OPCODE_POW);
    }
 
-   int opcode; /* BRW_OPCODE_* or FS_OPCODE_* */
+   enum opcode opcode; /* BRW_OPCODE_* or FS_OPCODE_* */
    fs_reg dst;
    fs_reg src[3];
    bool saturate;
@@ -402,7 +381,7 @@
       this->base_ir = NULL;
 
       this->virtual_grf_sizes = NULL;
-      this->virtual_grf_next = 1;
+      this->virtual_grf_next = 0;
       this->virtual_grf_array_size = 0;
       this->virtual_grf_def = NULL;
       this->virtual_grf_use = NULL;
@@ -421,7 +400,7 @@
 
    fs_reg *variable_storage(ir_variable *var);
    int virtual_grf_alloc(int size);
-   void import_uniforms(struct hash_table *src_variable_ht);
+   void import_uniforms(fs_visitor *v);
 
    void visit(ir_variable *ir);
    void visit(ir_assignment *ir);
@@ -445,27 +424,28 @@
 
    fs_inst *emit(fs_inst inst);
 
-   fs_inst *emit(int opcode)
+   fs_inst *emit(enum opcode opcode)
    {
       return emit(fs_inst(opcode));
    }
 
-   fs_inst *emit(int opcode, fs_reg dst)
+   fs_inst *emit(enum opcode opcode, fs_reg dst)
    {
       return emit(fs_inst(opcode, dst));
    }
 
-   fs_inst *emit(int opcode, fs_reg dst, fs_reg src0)
+   fs_inst *emit(enum opcode opcode, fs_reg dst, fs_reg src0)
    {
       return emit(fs_inst(opcode, dst, src0));
    }
 
-   fs_inst *emit(int opcode, fs_reg dst, fs_reg src0, fs_reg src1)
+   fs_inst *emit(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1)
    {
       return emit(fs_inst(opcode, dst, src0, src1));
    }
 
-   fs_inst *emit(int opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2)
+   fs_inst *emit(enum opcode opcode, fs_reg dst,
+		 fs_reg src0, fs_reg src1, fs_reg src2)
    {
       return emit(fs_inst(opcode, dst, src0, src1, src2));
    }
@@ -485,9 +465,11 @@
    void setup_pull_constants();
    void calculate_live_intervals();
    bool propagate_constants();
+   bool opt_algebraic();
    bool register_coalesce();
    bool compute_to_mrf();
    bool dead_code_eliminate();
+   bool remove_dead_constants();
    bool remove_duplicate_mrf_writes();
    bool virtual_grf_interferes(int a, int b);
    void schedule_instructions();
@@ -524,8 +506,8 @@
 			      int sampler);
    fs_inst *emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
 			      int sampler);
-   fs_inst *emit_math(fs_opcodes op, fs_reg dst, fs_reg src0);
-   fs_inst *emit_math(fs_opcodes op, fs_reg dst, fs_reg src0, fs_reg src1);
+   fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0);
+   fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0, fs_reg src1);
    bool try_emit_saturate(ir_expression *ir);
    void emit_bool_to_cond_code(ir_rvalue *condition);
    void emit_if_gen6(ir_if *ir);
@@ -565,6 +547,13 @@
    int *virtual_grf_use;
    bool live_intervals_valid;
 
+   /* This is the map from UNIFORM hw_reg + reg_offset as generated by
+    * the visitor to the packed uniform number after
+    * remove_dead_constants() that represents the actual uploaded
+    * uniform index.
+    */
+   int *params_remap;
+
    struct hash_table *variable_ht;
    ir_variable *frag_color, *frag_data, *frag_depth;
    int first_non_payload_grf;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
index 1d89b8f..28efbd3 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
@@ -59,7 +59,8 @@
 
 	 if (inst->target > 0) {
 	    /* Set the render target index for choosing BLEND_STATE. */
-	    brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 0, 2),
+	    brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
+					   inst->base_mrf, 2),
 			      BRW_REGISTER_TYPE_UD),
 		    brw_imm_ud(inst->target));
 	 }
@@ -145,43 +146,12 @@
 fs_visitor::generate_math(fs_inst *inst,
 			  struct brw_reg dst, struct brw_reg *src)
 {
-   int op;
-
-   switch (inst->opcode) {
-   case FS_OPCODE_RCP:
-      op = BRW_MATH_FUNCTION_INV;
-      break;
-   case FS_OPCODE_RSQ:
-      op = BRW_MATH_FUNCTION_RSQ;
-      break;
-   case FS_OPCODE_SQRT:
-      op = BRW_MATH_FUNCTION_SQRT;
-      break;
-   case FS_OPCODE_EXP2:
-      op = BRW_MATH_FUNCTION_EXP;
-      break;
-   case FS_OPCODE_LOG2:
-      op = BRW_MATH_FUNCTION_LOG;
-      break;
-   case FS_OPCODE_POW:
-      op = BRW_MATH_FUNCTION_POW;
-      break;
-   case FS_OPCODE_SIN:
-      op = BRW_MATH_FUNCTION_SIN;
-      break;
-   case FS_OPCODE_COS:
-      op = BRW_MATH_FUNCTION_COS;
-      break;
-   default:
-      assert(!"not reached: unknown math function");
-      op = 0;
-      break;
-   }
+   int op = brw_math_function(inst->opcode);
 
    if (intel->gen >= 6) {
       assert(inst->mlen == 0);
 
-      if (inst->opcode == FS_OPCODE_POW) {
+      if (inst->opcode == SHADER_OPCODE_POW) {
 	 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 	 brw_math2(p, dst, op, src[0], src[1]);
 
@@ -272,10 +242,16 @@
 	    msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
 	 }
 	 break;
+      case FS_OPCODE_TXS:
+	 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
+	 break;
       case FS_OPCODE_TXD:
 	 /* There is no sample_d_c message; comparisons are done manually */
 	 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
 	 break;
+      default:
+	 assert(!"not reached");
+	 break;
       }
    } else {
       switch (inst->opcode) {
@@ -316,6 +292,14 @@
 	 assert(inst->mlen == 7 || inst->mlen == 10);
 	 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS;
 	 break;
+      case FS_OPCODE_TXS:
+	 assert(inst->mlen == 3);
+	 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_RESINFO;
+	 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
+	 break;
+      default:
+	 assert(!"not reached");
+	 break;
       }
    }
    assert(msg_type != -1);
@@ -537,11 +521,9 @@
    case ARF:
    case MRF:
       if (reg->smear == -1) {
-	 brw_reg = brw_vec8_reg(reg->file,
-				reg->hw_reg, 0);
+	 brw_reg = brw_vec8_reg(reg->file, reg->reg, 0);
       } else {
-	 brw_reg = brw_vec1_reg(reg->file,
-				reg->hw_reg, reg->smear);
+	 brw_reg = brw_vec1_reg(reg->file, reg->reg, reg->smear);
       }
       brw_reg = retype(brw_reg, reg->type);
       if (reg->sechalf)
@@ -608,8 +590,8 @@
 	     prog->Name, c->dispatch_width);
    }
 
-   foreach_iter(exec_list_iterator, iter, this->instructions) {
-      fs_inst *inst = (fs_inst *)iter.get();
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
       struct brw_reg src[3], dst;
 
       if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
@@ -656,6 +638,11 @@
       case BRW_OPCODE_MUL:
 	 brw_MUL(p, dst, src[0], src[1]);
 	 break;
+      case BRW_OPCODE_MACH:
+	 brw_set_acc_write_control(p, 1);
+	 brw_MACH(p, dst, src[0], src[1]);
+	 brw_set_acc_write_control(p, 0);
+	 break;
 
       case BRW_OPCODE_FRC:
 	 brw_FRC(p, dst, src[0]);
@@ -770,14 +757,14 @@
       }
 	 break;
 
-      case FS_OPCODE_RCP:
-      case FS_OPCODE_RSQ:
-      case FS_OPCODE_SQRT:
-      case FS_OPCODE_EXP2:
-      case FS_OPCODE_LOG2:
-      case FS_OPCODE_POW:
-      case FS_OPCODE_SIN:
-      case FS_OPCODE_COS:
+      case SHADER_OPCODE_RCP:
+      case SHADER_OPCODE_RSQ:
+      case SHADER_OPCODE_SQRT:
+      case SHADER_OPCODE_EXP2:
+      case SHADER_OPCODE_LOG2:
+      case SHADER_OPCODE_POW:
+      case SHADER_OPCODE_SIN:
+      case SHADER_OPCODE_COS:
 	 generate_math(inst, dst, src);
 	 break;
       case FS_OPCODE_PIXEL_X:
@@ -796,6 +783,7 @@
       case FS_OPCODE_TXB:
       case FS_OPCODE_TXD:
       case FS_OPCODE_TXL:
+      case FS_OPCODE_TXS:
 	 generate_tex(inst, dst, src[0]);
 	 break;
       case FS_OPCODE_DISCARD:
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index b4689d2..7c5414a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -25,23 +25,6 @@
  *
  */
 
-extern "C" {
-
-#include <sys/types.h>
-
-#include "main/macros.h"
-#include "main/shaderobj.h"
-#include "main/uniforms.h"
-#include "program/prog_parameter.h"
-#include "program/prog_print.h"
-#include "program/prog_optimize.h"
-#include "program/register_allocate.h"
-#include "program/sampler.h"
-#include "program/hash_table.h"
-#include "brw_context.h"
-#include "brw_eu.h"
-#include "brw_wm.h"
-}
 #include "brw_fs.h"
 #include "../glsl/glsl_types.h"
 #include "../glsl/ir_optimization.h"
@@ -50,45 +33,115 @@
 static void
 assign_reg(int *reg_hw_locations, fs_reg *reg, int reg_width)
 {
-   if (reg->file == GRF && reg->reg != 0) {
+   if (reg->file == GRF) {
       assert(reg->reg_offset >= 0);
-      reg->hw_reg = reg_hw_locations[reg->reg] + reg->reg_offset * reg_width;
-      reg->reg = 0;
+      reg->reg = reg_hw_locations[reg->reg] + reg->reg_offset * reg_width;
+      reg->reg_offset = 0;
    }
 }
 
 void
 fs_visitor::assign_regs_trivial()
 {
-   int last_grf = 0;
-   int hw_reg_mapping[this->virtual_grf_next];
+   int hw_reg_mapping[this->virtual_grf_next + 1];
    int i;
    int reg_width = c->dispatch_width / 8;
 
-   hw_reg_mapping[0] = 0;
    /* Note that compressed instructions require alignment to 2 registers. */
-   hw_reg_mapping[1] = ALIGN(this->first_non_payload_grf, reg_width);
-   for (i = 2; i < this->virtual_grf_next; i++) {
+   hw_reg_mapping[0] = ALIGN(this->first_non_payload_grf, reg_width);
+   for (i = 1; i <= this->virtual_grf_next; i++) {
       hw_reg_mapping[i] = (hw_reg_mapping[i - 1] +
 			   this->virtual_grf_sizes[i - 1] * reg_width);
    }
-   last_grf = hw_reg_mapping[i - 1] + (this->virtual_grf_sizes[i - 1] *
-				       reg_width);
+   this->grf_used = hw_reg_mapping[this->virtual_grf_next];
 
-   foreach_iter(exec_list_iterator, iter, this->instructions) {
-      fs_inst *inst = (fs_inst *)iter.get();
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
 
       assign_reg(hw_reg_mapping, &inst->dst, reg_width);
       assign_reg(hw_reg_mapping, &inst->src[0], reg_width);
       assign_reg(hw_reg_mapping, &inst->src[1], reg_width);
    }
 
-   if (last_grf >= BRW_MAX_GRF) {
+   if (this->grf_used >= BRW_MAX_GRF) {
       fail("Ran out of regs on trivial allocator (%d/%d)\n",
-	   last_grf, BRW_MAX_GRF);
+	   this->grf_used, BRW_MAX_GRF);
    }
 
-   this->grf_used = last_grf + reg_width;
+}
+
+static void
+brw_alloc_reg_set_for_classes(struct brw_context *brw,
+			      int *class_sizes,
+			      int class_count,
+			      int reg_width,
+			      int base_reg_count)
+{
+   struct intel_context *intel = &brw->intel;
+
+   /* Compute the total number of registers across all classes. */
+   int ra_reg_count = 0;
+   for (int i = 0; i < class_count; i++) {
+      ra_reg_count += base_reg_count - (class_sizes[i] - 1);
+   }
+
+   ralloc_free(brw->wm.ra_reg_to_grf);
+   brw->wm.ra_reg_to_grf = ralloc_array(brw, uint8_t, ra_reg_count);
+   ralloc_free(brw->wm.regs);
+   brw->wm.regs = ra_alloc_reg_set(ra_reg_count);
+   ralloc_free(brw->wm.classes);
+   brw->wm.classes = ralloc_array(brw, int, class_count + 1);
+
+   brw->wm.aligned_pairs_class = -1;
+
+   /* Now, add the registers to their classes, and add the conflicts
+    * between them and the base GRF registers (and also each other).
+    */
+   int reg = 0;
+   int pairs_base_reg = 0;
+   int pairs_reg_count = 0;
+   for (int i = 0; i < class_count; i++) {
+      int class_reg_count = base_reg_count - (class_sizes[i] - 1);
+      brw->wm.classes[i] = ra_alloc_reg_class(brw->wm.regs);
+
+      /* Save this off for the aligned pair class at the end. */
+      if (class_sizes[i] == 2) {
+	 pairs_base_reg = reg;
+	 pairs_reg_count = class_reg_count;
+      }
+
+      for (int j = 0; j < class_reg_count; j++) {
+	 ra_class_add_reg(brw->wm.regs, brw->wm.classes[i], reg);
+
+	 brw->wm.ra_reg_to_grf[reg] = j;
+
+	 for (int base_reg = j;
+	      base_reg < j + class_sizes[i];
+	      base_reg++) {
+	    ra_add_transitive_reg_conflict(brw->wm.regs, base_reg, reg);
+	 }
+
+	 reg++;
+      }
+   }
+   assert(reg == ra_reg_count);
+
+   /* Add a special class for aligned pairs, which we'll put delta_x/y
+    * in on gen5 so that we can do PLN.
+    */
+   if (brw->has_pln && reg_width == 1 && intel->gen < 6) {
+      brw->wm.aligned_pairs_class = ra_alloc_reg_class(brw->wm.regs);
+
+      for (int i = 0; i < pairs_reg_count; i++) {
+	 if ((brw->wm.ra_reg_to_grf[pairs_base_reg + i] & 1) == 0) {
+	    ra_class_add_reg(brw->wm.regs, brw->wm.aligned_pairs_class,
+			     pairs_base_reg + i);
+	 }
+      }
+      class_count++;
+   }
+
+   ra_set_finalize(brw->wm.regs);
 }
 
 bool
@@ -101,12 +154,11 @@
     * for reg_width == 2.
     */
    int reg_width = c->dispatch_width / 8;
-   int hw_reg_mapping[this->virtual_grf_next + 1];
+   int hw_reg_mapping[this->virtual_grf_next];
    int first_assigned_grf = ALIGN(this->first_non_payload_grf, reg_width);
    int base_reg_count = (BRW_MAX_GRF - first_assigned_grf) / reg_width;
    int class_sizes[base_reg_count];
    int class_count = 0;
-   int aligned_pair_class = -1;
 
    calculate_live_intervals();
 
@@ -125,7 +177,7 @@
        */
       class_sizes[class_count++] = 2;
    }
-   for (int r = 1; r < this->virtual_grf_next; r++) {
+   for (int r = 0; r < this->virtual_grf_next; r++) {
       int i;
 
       for (i = 0; i < class_count; i++) {
@@ -141,94 +193,26 @@
       }
    }
 
-   int ra_reg_count = 0;
-   int class_base_reg[class_count];
-   int class_reg_count[class_count];
-   int classes[class_count + 1];
+   brw_alloc_reg_set_for_classes(brw, class_sizes, class_count,
+				 reg_width, base_reg_count);
 
-   for (int i = 0; i < class_count; i++) {
-      class_base_reg[i] = ra_reg_count;
-      class_reg_count[i] = base_reg_count - (class_sizes[i] - 1);
-      ra_reg_count += class_reg_count[i];
-   }
-
-   struct ra_regs *regs = ra_alloc_reg_set(ra_reg_count);
-   for (int i = 0; i < class_count; i++) {
-      classes[i] = ra_alloc_reg_class(regs);
-
-      for (int i_r = 0; i_r < class_reg_count[i]; i_r++) {
-	 ra_class_add_reg(regs, classes[i], class_base_reg[i] + i_r);
-      }
-
-      /* Add conflicts between our contiguous registers aliasing
-       * base regs and other register classes' contiguous registers
-       * that alias base regs, or the base regs themselves for classes[0].
-       */
-      for (int c = 0; c <= i; c++) {
-	 for (int i_r = 0; i_r < class_reg_count[i]; i_r++) {
-	    for (int c_r = MAX2(0, i_r - (class_sizes[c] - 1));
-		 c_r < MIN2(class_reg_count[c], i_r + class_sizes[i]);
-		 c_r++) {
-
-	       if (0) {
-		  printf("%d/%d conflicts %d/%d\n",
-			 class_sizes[i], first_assigned_grf + i_r,
-			 class_sizes[c], first_assigned_grf + c_r);
-	       }
-
-	       ra_add_reg_conflict(regs,
-				   class_base_reg[i] + i_r,
-				   class_base_reg[c] + c_r);
-	    }
-	 }
-      }
-   }
-
-   /* Add a special class for aligned pairs, which we'll put delta_x/y
-    * in on gen5 so that we can do PLN.
-    */
-   if (brw->has_pln && reg_width == 1 && intel->gen < 6) {
-      int reg_count = (base_reg_count - 1) / 2;
-      int unaligned_pair_class = 1;
-      assert(class_sizes[unaligned_pair_class] == 2);
-
-      aligned_pair_class = class_count;
-      classes[aligned_pair_class] = ra_alloc_reg_class(regs);
-      class_sizes[aligned_pair_class] = 2;
-      class_base_reg[aligned_pair_class] = 0;
-      class_reg_count[aligned_pair_class] = 0;
-      int start = (first_assigned_grf & 1) ? 1 : 0;
-
-      for (int i = 0; i < reg_count; i++) {
-	 ra_class_add_reg(regs, classes[aligned_pair_class],
-			  class_base_reg[unaligned_pair_class] + i * 2 + start);
-      }
-      class_count++;
-   }
-
-   ra_set_finalize(regs);
-
-   struct ra_graph *g = ra_alloc_interference_graph(regs,
+   struct ra_graph *g = ra_alloc_interference_graph(brw->wm.regs,
 						    this->virtual_grf_next);
-   /* Node 0 is just a placeholder to keep virtual_grf[] mapping 1:1
-    * with nodes.
-    */
-   ra_set_node_class(g, 0, classes[0]);
 
-   for (int i = 1; i < this->virtual_grf_next; i++) {
+   for (int i = 0; i < this->virtual_grf_next; i++) {
       for (int c = 0; c < class_count; c++) {
 	 if (class_sizes[c] == this->virtual_grf_sizes[i]) {
-	    if (aligned_pair_class >= 0 &&
+	    if (brw->wm.aligned_pairs_class >= 0 &&
 		this->delta_x.reg == i) {
-	       ra_set_node_class(g, i, classes[aligned_pair_class]);
+	       ra_set_node_class(g, i, brw->wm.aligned_pairs_class);
 	    } else {
-	       ra_set_node_class(g, i, classes[c]);
+	       ra_set_node_class(g, i, brw->wm.classes[c]);
 	    }
 	    break;
 	 }
       }
 
-      for (int j = 1; j < i; j++) {
+      for (int j = 0; j < i; j++) {
 	 if (virtual_grf_interferes(i, j)) {
 	    ra_add_node_interference(g, i, j);
 	 }
@@ -253,7 +237,6 @@
 
 
       ralloc_free(g);
-      ralloc_free(regs);
 
       return false;
    }
@@ -263,28 +246,18 @@
     * numbers.
     */
    this->grf_used = first_assigned_grf;
-   hw_reg_mapping[0] = 0; /* unused */
-   for (int i = 1; i < this->virtual_grf_next; i++) {
+   for (int i = 0; i < this->virtual_grf_next; i++) {
       int reg = ra_get_node_reg(g, i);
-      int hw_reg = -1;
 
-      for (int c = 0; c < class_count; c++) {
-	 if (reg >= class_base_reg[c] &&
-	     reg < class_base_reg[c] + class_reg_count[c]) {
-	    hw_reg = reg - class_base_reg[c];
-	    break;
-	 }
-      }
-
-      assert(hw_reg >= 0);
-      hw_reg_mapping[i] = first_assigned_grf + hw_reg * reg_width;
+      hw_reg_mapping[i] = (first_assigned_grf +
+			   brw->wm.ra_reg_to_grf[reg] * reg_width);
       this->grf_used = MAX2(this->grf_used,
 			    hw_reg_mapping[i] + this->virtual_grf_sizes[i] *
 			    reg_width);
    }
 
-   foreach_iter(exec_list_iterator, iter, this->instructions) {
-      fs_inst *inst = (fs_inst *)iter.get();
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
 
       assign_reg(hw_reg_mapping, &inst->dst, reg_width);
       assign_reg(hw_reg_mapping, &inst->src[0], reg_width);
@@ -292,7 +265,6 @@
    }
 
    ralloc_free(g);
-   ralloc_free(regs);
 
    return true;
 }
@@ -336,8 +308,8 @@
     * spill/unspill we'll have to do, and guess that the insides of
     * loops run 10 times.
     */
-   foreach_iter(exec_list_iterator, iter, this->instructions) {
-      fs_inst *inst = (fs_inst *)iter.get();
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
 
       for (unsigned int i = 0; i < 3; i++) {
 	 if (inst->src[i].file == GRF) {
@@ -370,6 +342,9 @@
 	 if (inst->dst.file == GRF)
 	    no_spill[inst->dst.reg] = true;
 	 break;
+
+      default:
+	 break;
       }
    }
 
@@ -394,8 +369,8 @@
     * virtual grf of the same size.  For most instructions, though, we
     * could just spill/unspill the GRF being accessed.
     */
-   foreach_iter(exec_list_iterator, iter, this->instructions) {
-      fs_inst *inst = (fs_inst *)iter.get();
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
 
       for (unsigned int i = 0; i < 3; i++) {
 	 if (inst->src[i].file == GRF &&
diff --git a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
index d8218c2..0ea4e5c 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
@@ -25,21 +25,6 @@
  *
  */
 
-extern "C" {
-
-#include <sys/types.h>
-
-#include "main/macros.h"
-#include "main/shaderobj.h"
-#include "main/uniforms.h"
-#include "program/prog_optimize.h"
-#include "program/register_allocate.h"
-#include "program/sampler.h"
-#include "program/hash_table.h"
-#include "brw_context.h"
-#include "brw_eu.h"
-#include "brw_wm.h"
-}
 #include "brw_fs.h"
 #include "../glsl/glsl_types.h"
 #include "../glsl/ir_optimization.h"
@@ -84,26 +69,26 @@
       int math_latency = 22;
 
       switch (inst->opcode) {
-      case FS_OPCODE_RCP:
+      case SHADER_OPCODE_RCP:
 	 this->latency = 1 * chans * math_latency;
 	 break;
-      case FS_OPCODE_RSQ:
+      case SHADER_OPCODE_RSQ:
 	 this->latency = 2 * chans * math_latency;
 	 break;
-      case FS_OPCODE_SQRT:
-      case FS_OPCODE_LOG2:
+      case SHADER_OPCODE_SQRT:
+      case SHADER_OPCODE_LOG2:
 	 /* full precision log.  partial is 2. */
 	 this->latency = 3 * chans * math_latency;
 	 break;
-      case FS_OPCODE_EXP2:
+      case SHADER_OPCODE_EXP2:
 	 /* full precision.  partial is 3, same throughput. */
 	 this->latency = 4 * chans * math_latency;
 	 break;
-      case FS_OPCODE_POW:
+      case SHADER_OPCODE_POW:
 	 this->latency = 8 * chans * math_latency;
 	 break;
-      case FS_OPCODE_SIN:
-      case FS_OPCODE_COS:
+      case SHADER_OPCODE_SIN:
+      case SHADER_OPCODE_COS:
 	 /* minimum latency, max is 12 rounds. */
 	 this->latency = 5 * chans * math_latency;
 	 break;
@@ -283,8 +268,8 @@
    memset(last_mrf_write, 0, sizeof(last_mrf_write));
 
    /* top-to-bottom dependencies: RAW and WAW. */
-   foreach_iter(exec_list_iterator, iter, instructions) {
-      schedule_node *n = (schedule_node *)iter.get();
+   foreach_list(node, &instructions) {
+      schedule_node *n = (schedule_node *)node;
       fs_inst *inst = n->inst;
 
       /* read-after-write deps. */
@@ -321,12 +306,12 @@
 	 add_dep(last_grf_write[inst->dst.reg], n);
 	 last_grf_write[inst->dst.reg] = n;
       } else if (inst->dst.file == MRF) {
-	 int reg = inst->dst.hw_reg & ~BRW_MRF_COMPR4;
+	 int reg = inst->dst.reg & ~BRW_MRF_COMPR4;
 
 	 add_dep(last_mrf_write[reg], n);
 	 last_mrf_write[reg] = n;
 	 if (is_compressed(inst)) {
-	    if (inst->dst.hw_reg & BRW_MRF_COMPR4)
+	    if (inst->dst.reg & BRW_MRF_COMPR4)
 	       reg += 4;
 	    else
 	       reg++;
@@ -401,12 +386,12 @@
       if (inst->dst.file == GRF) {
 	 last_grf_write[inst->dst.reg] = n;
       } else if (inst->dst.file == MRF) {
-	 int reg = inst->dst.hw_reg & ~BRW_MRF_COMPR4;
+	 int reg = inst->dst.reg & ~BRW_MRF_COMPR4;
 
 	 last_mrf_write[reg] = n;
 
 	 if (is_compressed(inst)) {
-	    if (inst->dst.hw_reg & BRW_MRF_COMPR4)
+	    if (inst->dst.reg & BRW_MRF_COMPR4)
 	       reg += 4;
 	    else
 	       reg++;
@@ -437,8 +422,8 @@
    int time = 0;
 
    /* Remove non-DAG heads from the list. */
-   foreach_iter(exec_list_iterator, iter, instructions) {
-      schedule_node *n = (schedule_node *)iter.get();
+   foreach_list_safe(node, &instructions) {
+      schedule_node *n = (schedule_node *)node;
       if (n->parent_count != 0)
 	 n->remove();
    }
@@ -447,8 +432,8 @@
       schedule_node *chosen = NULL;
       int chosen_time = 0;
 
-      foreach_iter(exec_list_iterator, iter, instructions) {
-	 schedule_node *n = (schedule_node *)iter.get();
+      foreach_list(node, &instructions) {
+	 schedule_node *n = (schedule_node *)node;
 
 	 if (!chosen || n->unblocked_time < chosen_time) {
 	    chosen = n;
@@ -490,8 +475,8 @@
        * progress until the first is done.
        */
       if (chosen->inst->is_math()) {
-	 foreach_iter(exec_list_iterator, iter, instructions) {
-	    schedule_node *n = (schedule_node *)iter.get();
+	 foreach_list(node, &instructions) {
+	    schedule_node *n = (schedule_node *)node;
 
 	    if (n->inst->is_math())
 	       n->unblocked_time = MAX2(n->unblocked_time,
diff --git a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp
index 530ffa2..a9a60c2 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp
@@ -122,8 +122,8 @@
       break;
    }
 
-   foreach_iter(exec_list_iterator, iter, this->variable_list) {
-      variable_entry *entry = (variable_entry *)iter.get();
+   foreach_list(node, &this->variable_list) {
+      variable_entry *entry = (variable_entry *)node;
       if (entry->var == var)
 	 return entry;
    }
@@ -222,8 +222,8 @@
    if (!var->type->is_vector())
       return NULL;
 
-   foreach_iter(exec_list_iterator, iter, *this->variable_list) {
-      variable_entry *entry = (variable_entry *)iter.get();
+   foreach_list(node, &*this->variable_list) {
+      variable_entry *entry = (variable_entry *)node;
       if (entry->var == var) {
 	 return entry;
       }
@@ -341,8 +341,8 @@
    visit_list_elements(&refs, instructions);
 
    /* Trim out variables we can't split. */
-   foreach_iter(exec_list_iterator, iter, refs.variable_list) {
-      variable_entry *entry = (variable_entry *)iter.get();
+   foreach_list_safe(node, &refs.variable_list) {
+      variable_entry *entry = (variable_entry *)node;
 
       if (debug) {
 	 printf("vector %s@%p: decl %d, whole_access %d\n",
@@ -363,8 +363,8 @@
    /* Replace the decls of the vectors to be split with their split
     * components.
     */
-   foreach_iter(exec_list_iterator, iter, refs.variable_list) {
-      variable_entry *entry = (variable_entry *)iter.get();
+   foreach_list(node, &refs.variable_list) {
+      variable_entry *entry = (variable_entry *)node;
       const struct glsl_type *type;
       type = glsl_type::get_instance(entry->var->type->base_type, 1, 1);
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index cbe5cf4..cdaf543 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -142,9 +142,7 @@
    this->result.type = brw_type_for_base_type(ir->type);
 
    if (index) {
-      assert(this->result.file == UNIFORM ||
-	     (this->result.file == GRF &&
-	      this->result.reg != 0));
+      assert(this->result.file == UNIFORM || this->result.file == GRF);
       this->result.reg_offset += index->value.i[0] * element_size;
    } else {
       assert(!"FINISHME: non-constant array element");
@@ -252,14 +250,14 @@
 
       break;
    case ir_unop_rcp:
-      emit_math(FS_OPCODE_RCP, this->result, op[0]);
+      emit_math(SHADER_OPCODE_RCP, this->result, op[0]);
       break;
 
    case ir_unop_exp2:
-      emit_math(FS_OPCODE_EXP2, this->result, op[0]);
+      emit_math(SHADER_OPCODE_EXP2, this->result, op[0]);
       break;
    case ir_unop_log2:
-      emit_math(FS_OPCODE_LOG2, this->result, op[0]);
+      emit_math(SHADER_OPCODE_LOG2, this->result, op[0]);
       break;
    case ir_unop_exp:
    case ir_unop_log:
@@ -267,11 +265,11 @@
       break;
    case ir_unop_sin:
    case ir_unop_sin_reduced:
-      emit_math(FS_OPCODE_SIN, this->result, op[0]);
+      emit_math(SHADER_OPCODE_SIN, this->result, op[0]);
       break;
    case ir_unop_cos:
    case ir_unop_cos_reduced:
-      emit_math(FS_OPCODE_COS, this->result, op[0]);
+      emit_math(SHADER_OPCODE_COS, this->result, op[0]);
       break;
 
    case ir_unop_dFdx:
@@ -289,7 +287,23 @@
       break;
 
    case ir_binop_mul:
-      emit(BRW_OPCODE_MUL, this->result, op[0], op[1]);
+      if (ir->type->is_integer()) {
+	 /* For integer multiplication, the MUL uses the low 16 bits
+	  * of one of the operands (src0 on gen6, src1 on gen7).  The
+	  * MACH accumulates in the contribution of the upper 16 bits
+	  * of that operand.
+	  *
+	  * FINISHME: Emit just the MUL if we know an operand is small
+	  * enough.
+	  */
+	 struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D);
+
+	 emit(BRW_OPCODE_MUL, acc, op[0], op[1]);
+	 emit(BRW_OPCODE_MACH, reg_null_d, op[0], op[1]);
+	 emit(BRW_OPCODE_MOV, this->result, fs_reg(acc));
+      } else {
+	 emit(BRW_OPCODE_MUL, this->result, op[0], op[1]);
+      }
       break;
    case ir_binop_div:
       assert(!"not reached: should be handled by ir_div_to_mul_rcp");
@@ -342,11 +356,11 @@
       break;
 
    case ir_unop_sqrt:
-      emit_math(FS_OPCODE_SQRT, this->result, op[0]);
+      emit_math(SHADER_OPCODE_SQRT, this->result, op[0]);
       break;
 
    case ir_unop_rsq:
-      emit_math(FS_OPCODE_RSQ, this->result, op[0]);
+      emit_math(SHADER_OPCODE_RSQ, this->result, op[0]);
       break;
 
    case ir_unop_i2u:
@@ -425,7 +439,7 @@
       break;
 
    case ir_binop_pow:
-      emit_math(FS_OPCODE_POW, this->result, op[0], op[1]);
+      emit_math(SHADER_OPCODE_POW, this->result, op[0], op[1]);
       break;
 
    case ir_unop_bit_not:
@@ -496,7 +510,7 @@
 void
 fs_visitor::visit(ir_assignment *ir)
 {
-   struct fs_reg l, r;
+   fs_reg l, r;
    fs_inst *inst;
 
    /* FINISHME: arrays on the lhs */
@@ -603,9 +617,11 @@
       /* gen4's SIMD8 sampler always has the slots for u,v,r present. */
       mlen += 3;
    } else if (ir->op == ir_txd) {
+      this->result = reg_undef;
       ir->lod_info.grad.dPdx->accept(this);
       fs_reg dPdx = this->result;
 
+      this->result = reg_undef;
       ir->lod_info.grad.dPdy->accept(this);
       fs_reg dPdy = this->result;
 
@@ -620,6 +636,8 @@
        * dPdx = dudx, dvdx, drdx
        * dPdy = dudy, dvdy, drdy
        *
+       * 1-arg: Does not exist.
+       *
        * 2-arg: dudx   dvdx   dudy   dvdy
        *        dPdx.x dPdx.y dPdy.x dPdy.y
        *        m4     m5     m6     m7
@@ -631,18 +649,26 @@
       for (int i = 0; i < ir->lod_info.grad.dPdx->type->vector_elements; i++) {
 	 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdx);
 	 dPdx.reg_offset++;
-	 mlen++;
       }
+      mlen += MAX2(ir->lod_info.grad.dPdx->type->vector_elements, 2);
 
       for (int i = 0; i < ir->lod_info.grad.dPdy->type->vector_elements; i++) {
 	 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdy);
 	 dPdy.reg_offset++;
-	 mlen++;
       }
+      mlen += MAX2(ir->lod_info.grad.dPdy->type->vector_elements, 2);
+   } else if (ir->op == ir_txs) {
+      /* There's no SIMD8 resinfo message on Gen4.  Use SIMD16 instead. */
+      simd16 = true;
+      this->result = reg_undef;
+      ir->lod_info.lod->accept(this);
+      emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), this->result);
+      mlen += 2;
    } else {
       /* Oh joy.  gen4 doesn't have SIMD8 non-shadow-compare bias/lod
        * instructions.  We'll need to do SIMD16 here.
        */
+      simd16 = true;
       assert(ir->op == ir_txb || ir->op == ir_txl);
 
       for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
@@ -671,16 +697,19 @@
 
       /* The unused upper half. */
       mlen++;
+   }
 
+   if (simd16) {
       /* Now, since we're doing simd16, the return is 2 interleaved
        * vec4s where the odd-indexed ones are junk. We'll need to move
        * this weirdness around to the expected layout.
        */
-      simd16 = true;
       orig_dst = dst;
-      dst = fs_reg(this, glsl_type::get_array_instance(glsl_type::vec4_type,
-						       2));
-      dst.type = BRW_REGISTER_TYPE_F;
+      const glsl_type *vec_type =
+	 glsl_type::get_instance(ir->type->base_type, 4, 1);
+      dst = fs_reg(this, glsl_type::get_array_instance(vec_type, 2));
+      dst.type = intel->is_g4x ? brw_type_for_base_type(ir->type)
+			       : BRW_REGISTER_TYPE_F;
    }
 
    fs_inst *inst = NULL;
@@ -697,6 +726,9 @@
    case ir_txd:
       inst = emit(FS_OPCODE_TXD, dst);
       break;
+   case ir_txs:
+      inst = emit(FS_OPCODE_TXS, dst);
+      break;
    case ir_txf:
       assert(!"GLSL 1.30 features unsupported");
       break;
@@ -732,6 +764,8 @@
    int base_mrf = 2;
    int reg_width = c->dispatch_width / 8;
    bool header_present = false;
+   const int vector_elements =
+      ir->coordinate ? ir->coordinate->type->vector_elements : 0;
 
    if (ir->offset) {
       /* The offsets set up by the ir_texture visitor are in the
@@ -742,7 +776,7 @@
       base_mrf--;
    }
 
-   for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
+   for (int i = 0; i < vector_elements; i++) {
       fs_inst *inst = emit(BRW_OPCODE_MOV,
 			   fs_reg(MRF, base_mrf + mlen + i * reg_width),
 			   coordinate);
@@ -750,7 +784,7 @@
 	 inst->saturate = true;
       coordinate.reg_offset++;
    }
-   mlen += ir->coordinate->type->vector_elements * reg_width;
+   mlen += vector_elements * reg_width;
 
    if (ir->shadow_comparitor && ir->op != ir_txd) {
       mlen = MAX2(mlen, header_present + 4 * reg_width);
@@ -786,9 +820,11 @@
       inst = emit(FS_OPCODE_TXL, dst);
       break;
    case ir_txd: {
+      this->result = reg_undef;
       ir->lod_info.grad.dPdx->accept(this);
       fs_reg dPdx = this->result;
 
+      this->result = reg_undef;
       ir->lod_info.grad.dPdy->accept(this);
       fs_reg dPdy = this->result;
 
@@ -816,6 +852,13 @@
       inst = emit(FS_OPCODE_TXD, dst);
       break;
    }
+   case ir_txs:
+      this->result = reg_undef;
+      ir->lod_info.lod->accept(this);
+      emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), this->result);
+      mlen += reg_width;
+      inst = emit(FS_OPCODE_TXS, dst);
+      break;
    case ir_txf:
       assert(!"GLSL 1.30 features unsupported");
       break;
@@ -850,6 +893,7 @@
    }
 
    if (ir->shadow_comparitor && ir->op != ir_txd) {
+      this->result = reg_undef;
       ir->shadow_comparitor->accept(this);
       emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
       mlen += reg_width;
@@ -860,11 +904,13 @@
    case ir_tex:
       break;
    case ir_txb:
+      this->result = reg_undef;
       ir->lod_info.bias->accept(this);
       emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
       mlen += reg_width;
       break;
    case ir_txl:
+      this->result = reg_undef;
       ir->lod_info.lod->accept(this);
       emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
       mlen += reg_width;
@@ -873,9 +919,11 @@
       if (c->dispatch_width == 16)
 	 fail("Gen7 does not support sample_d/sample_d_c in SIMD16 mode.");
 
+      this->result = reg_undef;
       ir->lod_info.grad.dPdx->accept(this);
       fs_reg dPdx = this->result;
 
+      this->result = reg_undef;
       ir->lod_info.grad.dPdy->accept(this);
       fs_reg dPdy = this->result;
 
@@ -900,13 +948,19 @@
       }
       break;
    }
+   case ir_txs:
+      this->result = reg_undef;
+      ir->lod_info.lod->accept(this);
+      emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), this->result);
+      mlen += reg_width;
+      break;
    case ir_txf:
       assert(!"GLSL 1.30 features unsupported");
       break;
    }
 
    /* Set up the coordinate (except for TXD where it was done earlier) */
-   if (ir->op != ir_txd) {
+   if (ir->op != ir_txd && ir->op != ir_txs) {
       for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
 	 fs_inst *inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
 			      coordinate);
@@ -924,7 +978,8 @@
    case ir_txb: inst = emit(FS_OPCODE_TXB, dst); break;
    case ir_txl: inst = emit(FS_OPCODE_TXL, dst); break;
    case ir_txd: inst = emit(FS_OPCODE_TXD, dst); break;
-   case ir_txf: assert(!"TXF unsupported.");
+   case ir_txf: assert(!"TXF unsupported."); break;
+   case ir_txs: inst = emit(FS_OPCODE_TXS, dst); break;
    }
    inst->base_mrf = base_mrf;
    inst->mlen = mlen;
@@ -959,7 +1014,8 @@
    }
 
    this->result = reg_undef;
-   ir->coordinate->accept(this);
+   if (ir->coordinate)
+      ir->coordinate->accept(this);
    fs_reg coordinate = this->result;
 
    if (ir->offset != NULL) {
@@ -1000,7 +1056,8 @@
     * texture coordinates.  We use the program parameter state
     * tracking to get the scaling factor.
     */
-   if (ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_RECT) {
+   if (intel->gen < 6 &&
+       ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_RECT) {
       struct gl_program_parameter_list *params = c->fp->program.Base.Parameters;
       int tokens[STATE_LENGTH] = {
 	 STATE_INTERNAL,
@@ -1046,7 +1103,7 @@
    /* Writemasking doesn't eliminate channels on SIMD8 texture
     * samples, so don't worry about them.
     */
-   fs_reg dst = fs_reg(this, glsl_type::vec4_type);
+   fs_reg dst = fs_reg(this, glsl_type::get_instance(ir->type->base_type, 4, 1));
 
    if (intel->gen >= 7) {
       inst = emit_texture_gen7(ir, dst, coordinate, sampler);
@@ -1070,6 +1127,7 @@
       if (hw_compare_supported) {
 	 inst->shadow_compare = true;
       } else {
+	 this->result = reg_undef;
 	 ir->shadow_comparitor->accept(this);
 	 fs_reg ref = this->result;
 
@@ -1465,8 +1523,8 @@
       inst->predicated = true;
    }
 
-   foreach_iter(exec_list_iterator, iter, ir->then_instructions) {
-      ir_instruction *ir = (ir_instruction *)iter.get();
+   foreach_list(node, &ir->then_instructions) {
+      ir_instruction *ir = (ir_instruction *)node;
       this->base_ir = ir;
       this->result = reg_undef;
       ir->accept(this);
@@ -1475,8 +1533,8 @@
    if (!ir->else_instructions.is_empty()) {
       emit(BRW_OPCODE_ELSE);
 
-      foreach_iter(exec_list_iterator, iter, ir->else_instructions) {
-	 ir_instruction *ir = (ir_instruction *)iter.get();
+      foreach_list(node, &ir->else_instructions) {
+	 ir_instruction *ir = (ir_instruction *)node;
 	 this->base_ir = ir;
 	 this->result = reg_undef;
 	 ir->accept(this);
@@ -1526,8 +1584,8 @@
       inst->predicated = true;
    }
 
-   foreach_iter(exec_list_iterator, iter, ir->body_instructions) {
-      ir_instruction *ir = (ir_instruction *)iter.get();
+   foreach_list(node, &ir->body_instructions) {
+      ir_instruction *ir = (ir_instruction *)node;
 
       this->base_ir = ir;
       this->result = reg_undef;
@@ -1583,8 +1641,8 @@
 
       assert(sig);
 
-      foreach_iter(exec_list_iterator, iter, sig->body) {
-	 ir_instruction *ir = (ir_instruction *)iter.get();
+      foreach_list(node, &sig->body) {
+	 ir_instruction *ir = (ir_instruction *)node;
 	 this->base_ir = ir;
 	 this->result = reg_undef;
 	 ir->accept(this);
@@ -1684,7 +1742,7 @@
 	interp_reg(FRAG_ATTRIB_WPOS, 3));
    /* Compute the pixel 1/W value from wpos.w. */
    this->pixel_w = fs_reg(this, glsl_type::float_type);
-   emit_math(FS_OPCODE_RCP, this->pixel_w, wpos_w);
+   emit_math(SHADER_OPCODE_RCP, this->pixel_w, wpos_w);
    this->current_annotation = NULL;
 }
 
@@ -1721,7 +1779,7 @@
    this->current_annotation = "compute pos.w";
    this->pixel_w = fs_reg(brw_vec8_grf(c->source_w_reg, 0));
    this->wpos_w = fs_reg(this, glsl_type::float_type);
-   emit_math(FS_OPCODE_RCP, this->wpos_w, this->pixel_w);
+   emit_math(SHADER_OPCODE_RCP, this->wpos_w, this->pixel_w);
 
    this->delta_x = fs_reg(brw_vec8_grf(2, 0));
    this->delta_y = fs_reg(brw_vec8_grf(3, 0));
@@ -1733,6 +1791,7 @@
 fs_visitor::emit_color_write(int index, int first_color_mrf, fs_reg color)
 {
    int reg_width = c->dispatch_width / 8;
+   fs_inst *inst;
 
    if (c->dispatch_width == 8 || intel->gen == 6) {
       /* SIMD8 write looks like:
@@ -1751,8 +1810,10 @@
        * m + 6: a0
        * m + 7: a1
        */
-      emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index * reg_width),
-	   color);
+      inst = emit(BRW_OPCODE_MOV,
+		  fs_reg(MRF, first_color_mrf + index * reg_width),
+		  color);
+      inst->saturate = c->key.clamp_fragment_color;
    } else {
       /* pre-gen6 SIMD16 single source DP write looks like:
        * m + 0: r0
@@ -1770,16 +1831,22 @@
 	  * usual destination + 1 for the second half we get
 	  * destination + 4.
 	  */
-	 emit(BRW_OPCODE_MOV,
-	      fs_reg(MRF, BRW_MRF_COMPR4 + first_color_mrf + index), color);
+	 inst = emit(BRW_OPCODE_MOV,
+		     fs_reg(MRF, BRW_MRF_COMPR4 + first_color_mrf + index),
+		     color);
+	 inst->saturate = c->key.clamp_fragment_color;
       } else {
 	 push_force_uncompressed();
-	 emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index), color);
+	 inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index),
+		     color);
+	 inst->saturate = c->key.clamp_fragment_color;
 	 pop_force_uncompressed();
 
 	 push_force_sechalf();
 	 color.sechalf = true;
-	 emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index + 4), color);
+	 inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index + 4),
+		     color);
+	 inst->saturate = c->key.clamp_fragment_color;
 	 pop_force_sechalf();
 	 color.sechalf = false;
       }
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index 03cebbb..f7e6e7c 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -46,7 +46,7 @@
    struct gl_context *ctx = &intel->ctx;
 
    BEGIN_BATCH(4);
-   OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE);
+   OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2));
    OUT_BATCH(0); /* xmin, ymin */
    OUT_BATCH(((ctx->DrawBuffer->Width - 1) & 0xffff) |
 	    ((ctx->DrawBuffer->Height - 1) << 16));
diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c
index 6674f16..09b5be4 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -226,6 +226,34 @@
    return GL_TRUE;
 }
 
+/* Per-thread scratch space is a power-of-two multiple of 1KB. */
+int
+brw_get_scratch_size(int size)
+{
+   int i;
+
+   for (i = 1024; i < size; i *= 2)
+      ;
+
+   return i;
+}
+
+void
+brw_get_scratch_bo(struct intel_context *intel,
+		   drm_intel_bo **scratch_bo, int size)
+{
+   drm_intel_bo *old_bo = *scratch_bo;
+
+   if (old_bo && old_bo->size < size) {
+      drm_intel_bo_unreference(old_bo);
+      old_bo = NULL;
+   }
+
+   if (!old_bo) {
+      *scratch_bo = drm_intel_bo_alloc(intel->bufmgr, "scratch bo", size, 4096);
+   }
+}
+
 void brwInitFragProgFuncs( struct dd_function_table *functions )
 {
    assert(functions->ProgramStringNotify == _tnl_program_string); 
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 9471883..3ff6bba 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -24,6 +24,7 @@
 extern "C" {
 #include "main/macros.h"
 #include "brw_context.h"
+#include "brw_vs.h"
 }
 #include "brw_fs.h"
 #include "../glsl/ir_optimization.h"
@@ -67,6 +68,9 @@
    if (!brw_fs_precompile(ctx, prog))
       return false;
 
+   if (!brw_vs_precompile(ctx, prog))
+      return false;
+
    return true;
 }
 
@@ -75,10 +79,15 @@
 {
    struct brw_context *brw = brw_context(ctx);
    struct intel_context *intel = &brw->intel;
+   unsigned int stage;
 
-   struct brw_shader *shader =
-      (struct brw_shader *)prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
-   if (shader != NULL) {
+   for (stage = 0; stage < ARRAY_SIZE(prog->_LinkedShaders); stage++) {
+      struct brw_shader *shader =
+	 (struct brw_shader *)prog->_LinkedShaders[stage];
+
+      if (!shader)
+	 continue;
+
       void *mem_ctx = ralloc_context(NULL);
       bool progress;
 
@@ -106,18 +115,22 @@
       brw_do_cubemap_normalize(shader->ir);
       lower_noise(shader->ir);
       lower_quadop_vector(shader->ir, false);
+
+      bool input = true;
+      bool output = stage == MESA_SHADER_FRAGMENT;
+      bool temp = stage == MESA_SHADER_FRAGMENT;
+      bool uniform = true;
+
       lower_variable_index_to_cond_assign(shader->ir,
-					  GL_TRUE, /* input */
-					  GL_TRUE, /* output */
-					  GL_TRUE, /* temp */
-					  GL_TRUE /* uniform */
-					  );
+					  input, output, temp, uniform);
 
       do {
 	 progress = false;
 
-	 brw_do_channel_expressions(shader->ir);
-	 brw_do_vector_splitting(shader->ir);
+	 if (stage == MESA_SHADER_FRAGMENT) {
+	    brw_do_channel_expressions(shader->ir);
+	    brw_do_vector_splitting(shader->ir);
+	 }
 
 	 progress = do_lower_jumps(shader->ir, true, true,
 				   true, /* main return */
@@ -192,3 +205,29 @@
       return BRW_CONDITIONAL_NZ;
    }
 }
+
+uint32_t
+brw_math_function(enum opcode op)
+{
+   switch (op) {
+   case SHADER_OPCODE_RCP:
+      return BRW_MATH_FUNCTION_INV;
+   case SHADER_OPCODE_RSQ:
+      return BRW_MATH_FUNCTION_RSQ;
+   case SHADER_OPCODE_SQRT:
+      return BRW_MATH_FUNCTION_SQRT;
+   case SHADER_OPCODE_EXP2:
+      return BRW_MATH_FUNCTION_EXP;
+   case SHADER_OPCODE_LOG2:
+      return BRW_MATH_FUNCTION_LOG;
+   case SHADER_OPCODE_POW:
+      return BRW_MATH_FUNCTION_POW;
+   case SHADER_OPCODE_SIN:
+      return BRW_MATH_FUNCTION_SIN;
+   case SHADER_OPCODE_COS:
+      return BRW_MATH_FUNCTION_COS;
+   default:
+      assert(!"not reached: unknown math function");
+      return 0;
+   }
+}
diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h
index 4c568a2..1054d7a 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.h
+++ b/src/mesa/drivers/dri/i965/brw_shader.h
@@ -21,5 +21,11 @@
  * IN THE SOFTWARE.
  */
 
+#include <stdint.h>
+#include "brw_defines.h"
+
+#pragma once
+
 int brw_type_for_base_type(const struct glsl_type *type);
 uint32_t brw_conditional_for_comparison(unsigned int op);
+uint32_t brw_math_function(enum opcode op);
diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c
index b9e5cc1..cb7a3ef 100644
--- a/src/mesa/drivers/dri/i965/brw_state_dump.c
+++ b/src/mesa/drivers/dri/i965/brw_state_dump.c
@@ -455,6 +455,23 @@
    }
 }
 
+static void
+dump_wm_constants(struct brw_context *brw, uint32_t offset, uint32_t size)
+{
+   const char *name = "WM_CONST";
+   struct intel_context *intel = &brw->intel;
+   uint32_t *as_uint = intel->batch.bo->virtual + offset;
+   float *as_float = intel->batch.bo->virtual + offset;
+   int i;
+
+   for (i = 0; i < size / 4; i += 4) {
+      batch_out(brw, name, offset, i, "%3d: (% f % f % f % f) (0x%08x 0x%08x 0x%08x 0x%08x)\n",
+		i / 4,
+		as_float[i], as_float[i + 1], as_float[i + 2], as_float[i + 3],
+		as_uint[i], as_uint[i + 1], as_uint[i + 2], as_uint[i + 3]);
+   }
+}
+
 static void dump_binding_table(struct brw_context *brw, uint32_t offset,
 			       uint32_t size)
 {
@@ -602,6 +619,9 @@
       case AUB_TRACE_VS_CONSTANTS:
 	 dump_vs_constants(brw, offset, size);
 	 break;
+      case AUB_TRACE_WM_CONSTANTS:
+	 dump_wm_constants(brw, offset, size);
+	 break;
       default:
 	 break;
       }
diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c
index f462f32..46a417a 100644
--- a/src/mesa/drivers/dri/i965/brw_tex_layout.c
+++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c
@@ -60,7 +60,7 @@
 	   * given in Volume 1 of the BSpec.
 	   */
 	  h0 = ALIGN(mt->height0, align_h);
-	  h1 = ALIGN(minify(h0), align_h);
+	  h1 = ALIGN(minify(mt->height0), align_h);
 	  qpitch = (h0 + h1 + (intel->gen >= 7 ? 12 : 11) * align_h);
           if (mt->compressed)
 	     qpitch /= 4;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
new file mode 100644
index 0000000..760bc1f
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -0,0 +1,161 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_vec4.h"
+extern "C" {
+#include "main/macros.h"
+#include "program/prog_parameter.h"
+}
+
+#define MAX_INSTRUCTION (1 << 30)
+
+namespace brw {
+
+void
+vec4_visitor::calculate_live_intervals()
+{
+   int *def = ralloc_array(mem_ctx, int, virtual_grf_count);
+   int *use = ralloc_array(mem_ctx, int, virtual_grf_count);
+   int loop_depth = 0;
+   int loop_start = 0;
+
+   if (this->live_intervals_valid)
+      return;
+
+   for (int i = 0; i < virtual_grf_count; i++) {
+      def[i] = MAX_INSTRUCTION;
+      use[i] = -1;
+   }
+
+   int ip = 0;
+   foreach_list(node, &this->instructions) {
+      vec4_instruction *inst = (vec4_instruction *)node;
+
+      if (inst->opcode == BRW_OPCODE_DO) {
+	 if (loop_depth++ == 0)
+	    loop_start = ip;
+      } else if (inst->opcode == BRW_OPCODE_WHILE) {
+	 loop_depth--;
+
+	 if (loop_depth == 0) {
+	    /* Patches up the use of vars marked for being live across
+	     * the whole loop.
+	     */
+	    for (int i = 0; i < virtual_grf_count; i++) {
+	       if (use[i] == loop_start) {
+		  use[i] = ip;
+	       }
+	    }
+	 }
+      } else {
+	 for (unsigned int i = 0; i < 3; i++) {
+	    if (inst->src[i].file == GRF) {
+	       int reg = inst->src[i].reg;
+
+	       if (!loop_depth) {
+		  use[reg] = ip;
+	       } else {
+		  def[reg] = MIN2(loop_start, def[reg]);
+		  use[reg] = loop_start;
+
+		  /* Nobody else is going to go smash our start to
+		   * later in the loop now, because def[reg] now
+		   * points before the bb header.
+		   */
+	       }
+	    }
+	 }
+	 if (inst->dst.file == GRF) {
+	    int reg = inst->dst.reg;
+
+	    if (!loop_depth) {
+	       def[reg] = MIN2(def[reg], ip);
+	    } else {
+	       def[reg] = MIN2(def[reg], loop_start);
+	    }
+	 }
+      }
+
+      ip++;
+   }
+
+   ralloc_free(this->virtual_grf_def);
+   ralloc_free(this->virtual_grf_use);
+   this->virtual_grf_def = def;
+   this->virtual_grf_use = use;
+
+   this->live_intervals_valid = true;
+}
+
+bool
+vec4_visitor::virtual_grf_interferes(int a, int b)
+{
+   int start = MAX2(this->virtual_grf_def[a], this->virtual_grf_def[b]);
+   int end = MIN2(this->virtual_grf_use[a], this->virtual_grf_use[b]);
+
+   /* We can't handle dead register writes here, without iterating
+    * over the whole instruction stream to find every single dead
+    * write to that register to compare to the live interval of the
+    * other register.  Just assert that dead_code_eliminate() has been
+    * called.
+    */
+   assert((this->virtual_grf_use[a] != -1 ||
+	   this->virtual_grf_def[a] == MAX_INSTRUCTION) &&
+	  (this->virtual_grf_use[b] != -1 ||
+	   this->virtual_grf_def[b] == MAX_INSTRUCTION));
+
+   return start < end;
+}
+
+/**
+ * Must be called after calculate_live_intervales() to remove unused
+ * writes to registers -- register allocation will fail otherwise
+ * because something deffed but not used won't be considered to
+ * interfere with other regs.
+ */
+bool
+vec4_visitor::dead_code_eliminate()
+{
+   bool progress = false;
+   int pc = 0;
+
+   calculate_live_intervals();
+
+   foreach_list_safe(node, &this->instructions) {
+      vec4_instruction *inst = (vec4_instruction *)node;
+
+      if (inst->dst.file == GRF && this->virtual_grf_use[inst->dst.reg] <= pc) {
+	 inst->remove();
+	 progress = true;
+      }
+
+      pc++;
+   }
+
+   if (progress)
+      live_intervals_valid = false;
+
+   return progress;
+}
+
+} /* namespace brw */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
new file mode 100644
index 0000000..1db910e
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -0,0 +1,489 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef BRW_VEC4_H
+#define BRW_VEC4_H
+
+#include <stdint.h>
+#include "brw_shader.h"
+#include "main/compiler.h"
+#include "program/hash_table.h"
+
+extern "C" {
+#include "brw_vs.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+};
+
+#include "../glsl/ir.h"
+
+namespace brw {
+
+class dst_reg;
+
+/**
+ * Common helper for constructing swizzles.  When only a subset of
+ * channels of a vec4 are used, we don't want to reference the other
+ * channels, as that will tell optimization passes that those other
+ * channels are used.
+ */
+static int
+swizzle_for_size(int size)
+{
+   int size_swizzles[4] = {
+      BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
+      BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
+      BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z),
+      BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W),
+   };
+
+   assert((size >= 1) && (size <= 4));
+   return size_swizzles[size - 1];
+}
+
+enum register_file {
+   ARF = BRW_ARCHITECTURE_REGISTER_FILE,
+   GRF = BRW_GENERAL_REGISTER_FILE,
+   MRF = BRW_MESSAGE_REGISTER_FILE,
+   IMM = BRW_IMMEDIATE_VALUE,
+   HW_REG, /* a struct brw_reg */
+   ATTR,
+   UNIFORM, /* prog_data->params[hw_reg] */
+   BAD_FILE
+};
+
+class reg
+{
+public:
+   /** Register file: ARF, GRF, MRF, IMM. */
+   enum register_file file;
+   /** virtual register number.  0 = fixed hw reg */
+   int reg;
+   /** Offset within the virtual register. */
+   int reg_offset;
+   /** Register type.  BRW_REGISTER_TYPE_* */
+   int type;
+   bool sechalf;
+   struct brw_reg fixed_hw_reg;
+   int smear; /* -1, or a channel of the reg to smear to all channels. */
+
+   /** Value for file == BRW_IMMMEDIATE_FILE */
+   union {
+      int32_t i;
+      uint32_t u;
+      float f;
+   } imm;
+};
+
+class src_reg : public reg
+{
+public:
+   /* Callers of this ralloc-based new need not call delete. It's
+    * easier to just ralloc_free 'ctx' (or any of its ancestors). */
+   static void* operator new(size_t size, void *ctx)
+   {
+      void *node;
+
+      node = ralloc_size(ctx, size);
+      assert(node != NULL);
+
+      return node;
+   }
+
+   void init()
+   {
+      memset(this, 0, sizeof(*this));
+
+      this->file = BAD_FILE;
+   }
+
+   src_reg(register_file file, int reg, const glsl_type *type)
+   {
+      init();
+
+      this->file = file;
+      this->reg = reg;
+      if (type && (type->is_scalar() || type->is_vector() || type->is_matrix()))
+	 this->swizzle = swizzle_for_size(type->vector_elements);
+      else
+	 this->swizzle = SWIZZLE_XYZW;
+   }
+
+   /** Generic unset register constructor. */
+   src_reg()
+   {
+      init();
+   }
+
+   src_reg(float f)
+   {
+      init();
+
+      this->file = IMM;
+      this->type = BRW_REGISTER_TYPE_F;
+      this->imm.f = f;
+   }
+
+   src_reg(uint32_t u)
+   {
+      init();
+
+      this->file = IMM;
+      this->type = BRW_REGISTER_TYPE_UD;
+      this->imm.f = u;
+   }
+
+   src_reg(int32_t i)
+   {
+      init();
+
+      this->file = IMM;
+      this->type = BRW_REGISTER_TYPE_D;
+      this->imm.i = i;
+   }
+
+   src_reg(class vec4_visitor *v, const struct glsl_type *type);
+
+   explicit src_reg(dst_reg reg);
+
+   GLuint swizzle; /**< SWIZZLE_XYZW swizzles from Mesa. */
+   bool negate;
+   bool abs;
+
+   src_reg *reladdr;
+};
+
+class dst_reg : public reg
+{
+public:
+   /* Callers of this ralloc-based new need not call delete. It's
+    * easier to just ralloc_free 'ctx' (or any of its ancestors). */
+   static void* operator new(size_t size, void *ctx)
+   {
+      void *node;
+
+      node = ralloc_size(ctx, size);
+      assert(node != NULL);
+
+      return node;
+   }
+
+   void init()
+   {
+      memset(this, 0, sizeof(*this));
+      this->file = BAD_FILE;
+      this->writemask = WRITEMASK_XYZW;
+   }
+
+   dst_reg()
+   {
+      init();
+   }
+
+   dst_reg(register_file file, int reg)
+   {
+      init();
+
+      this->file = file;
+      this->reg = reg;
+   }
+
+   dst_reg(struct brw_reg reg)
+   {
+      init();
+
+      this->file = HW_REG;
+      this->fixed_hw_reg = reg;
+   }
+
+   dst_reg(class vec4_visitor *v, const struct glsl_type *type);
+
+   explicit dst_reg(src_reg reg);
+
+   int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
+
+   src_reg *reladdr;
+};
+
+class vec4_instruction : public exec_node {
+public:
+   /* Callers of this ralloc-based new need not call delete. It's
+    * easier to just ralloc_free 'ctx' (or any of its ancestors). */
+   static void* operator new(size_t size, void *ctx)
+   {
+      void *node;
+
+      node = rzalloc_size(ctx, size);
+      assert(node != NULL);
+
+      return node;
+   }
+
+   struct brw_reg get_dst(void);
+   struct brw_reg get_src(int i);
+
+   enum opcode opcode; /* BRW_OPCODE_* or FS_OPCODE_* */
+   dst_reg dst;
+   src_reg src[3];
+
+   bool saturate;
+   bool predicate_inverse;
+   uint32_t predicate;
+
+   int conditional_mod; /**< BRW_CONDITIONAL_* */
+
+   int sampler;
+   int target; /**< MRT target. */
+   bool shadow_compare;
+
+   bool eot;
+   bool header_present;
+   int mlen; /**< SEND message length */
+   int base_mrf; /**< First MRF in the SEND message, if mlen is nonzero. */
+
+   uint32_t offset; /* spill/unspill offset */
+   /** @{
+    * Annotation for the generated IR.  One of the two can be set.
+    */
+   ir_instruction *ir;
+   const char *annotation;
+};
+
+class vec4_visitor : public ir_visitor
+{
+public:
+   vec4_visitor(struct brw_vs_compile *c,
+		struct gl_shader_program *prog, struct brw_shader *shader);
+   ~vec4_visitor();
+
+   dst_reg dst_null_f()
+   {
+      return dst_reg(brw_null_reg());
+   }
+
+   dst_reg dst_null_d()
+   {
+      return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+   }
+
+   dst_reg dst_null_cmp()
+   {
+      if (intel->gen > 4)
+	 return dst_null_d();
+      else
+	 return dst_null_f();
+   }
+
+   struct brw_context *brw;
+   const struct gl_vertex_program *vp;
+   struct intel_context *intel;
+   struct gl_context *ctx;
+   struct brw_vs_compile *c;
+   struct brw_vs_prog_data *prog_data;
+   struct brw_compile *p;
+   struct brw_shader *shader;
+   struct gl_shader_program *prog;
+   void *mem_ctx;
+   exec_list instructions;
+
+   char *fail_msg;
+   bool failed;
+
+   /**
+    * GLSL IR currently being processed, which is associated with our
+    * driver IR instructions for debugging purposes.
+    */
+   ir_instruction *base_ir;
+   const char *current_annotation;
+
+   int *virtual_grf_sizes;
+   int virtual_grf_count;
+   int virtual_grf_array_size;
+   int first_non_payload_grf;
+   int *virtual_grf_def;
+   int *virtual_grf_use;
+   bool live_intervals_valid;
+
+   dst_reg *variable_storage(ir_variable *var);
+
+   void reladdr_to_temp(ir_instruction *ir, src_reg *reg, int *num_reladdr);
+
+   src_reg src_reg_for_float(float val);
+
+   /**
+    * \name Visit methods
+    *
+    * As typical for the visitor pattern, there must be one \c visit method for
+    * each concrete subclass of \c ir_instruction.  Virtual base classes within
+    * the hierarchy should not have \c visit methods.
+    */
+   /*@{*/
+   virtual void visit(ir_variable *);
+   virtual void visit(ir_loop *);
+   virtual void visit(ir_loop_jump *);
+   virtual void visit(ir_function_signature *);
+   virtual void visit(ir_function *);
+   virtual void visit(ir_expression *);
+   virtual void visit(ir_swizzle *);
+   virtual void visit(ir_dereference_variable  *);
+   virtual void visit(ir_dereference_array *);
+   virtual void visit(ir_dereference_record *);
+   virtual void visit(ir_assignment *);
+   virtual void visit(ir_constant *);
+   virtual void visit(ir_call *);
+   virtual void visit(ir_return *);
+   virtual void visit(ir_discard *);
+   virtual void visit(ir_texture *);
+   virtual void visit(ir_if *);
+   /*@}*/
+
+   src_reg result;
+
+   /* Regs for vertex results.  Generated at ir_variable visiting time
+    * for the ir->location's used.
+    */
+   dst_reg output_reg[VERT_RESULT_MAX];
+   int uniform_size[MAX_UNIFORMS];
+   int uniforms;
+
+   struct hash_table *variable_ht;
+
+   bool run(void);
+   void fail(const char *msg, ...);
+
+   int virtual_grf_alloc(int size);
+   int setup_uniform_values(int loc, const glsl_type *type);
+   void setup_builtin_uniform_values(ir_variable *ir);
+   int setup_attributes(int payload_reg);
+   int setup_uniforms(int payload_reg);
+   void setup_payload();
+   void reg_allocate_trivial();
+   void reg_allocate();
+   void move_grf_array_access_to_scratch();
+   void calculate_live_intervals();
+   bool dead_code_eliminate();
+   bool virtual_grf_interferes(int a, int b);
+
+   vec4_instruction *emit(enum opcode opcode);
+
+   vec4_instruction *emit(enum opcode opcode, dst_reg dst, src_reg src0);
+
+   vec4_instruction *emit(enum opcode opcode, dst_reg dst,
+			  src_reg src0, src_reg src1);
+
+   vec4_instruction *emit(enum opcode opcode, dst_reg dst,
+			  src_reg src0, src_reg src1, src_reg src2);
+
+   bool try_rewrite_rhs_to_dst(ir_assignment *ir,
+			       dst_reg dst,
+			       src_reg src,
+			       vec4_instruction *pre_rhs_inst,
+			       vec4_instruction *last_rhs_inst);
+
+   /** Walks an exec_list of ir_instruction and sends it through this visitor. */
+   void visit_instructions(const exec_list *list);
+
+   void emit_bool_to_cond_code(ir_rvalue *ir);
+   void emit_bool_comparison(unsigned int op, dst_reg dst, src_reg src0, src_reg src1);
+   void emit_if_gen6(ir_if *ir);
+
+   void emit_block_move(dst_reg *dst, src_reg *src,
+			const struct glsl_type *type, bool predicated);
+
+   void emit_constant_values(dst_reg *dst, ir_constant *value);
+
+   /**
+    * Emit the correct dot-product instruction for the type of arguments
+    */
+   void emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements);
+
+   void emit_scalar(ir_instruction *ir, enum prog_opcode op,
+		    dst_reg dst, src_reg src0);
+
+   void emit_scalar(ir_instruction *ir, enum prog_opcode op,
+		    dst_reg dst, src_reg src0, src_reg src1);
+
+   void emit_scs(ir_instruction *ir, enum prog_opcode op,
+		 dst_reg dst, const src_reg &src);
+
+   void emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src);
+   void emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src);
+   void emit_math(enum opcode opcode, dst_reg dst, src_reg src);
+   void emit_math2_gen6(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1);
+   void emit_math2_gen4(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1);
+   void emit_math(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1);
+
+   int emit_vue_header_gen6(int header_mrf);
+   int emit_vue_header_gen4(int header_mrf);
+   void emit_urb_writes(void);
+
+   src_reg get_scratch_offset(vec4_instruction *inst,
+			      src_reg *reladdr, int reg_offset);
+   void emit_scratch_read(vec4_instruction *inst,
+			  dst_reg dst,
+			  src_reg orig_src,
+			  int base_offset);
+   void emit_scratch_write(vec4_instruction *inst,
+			   src_reg temp,
+			   dst_reg orig_dst,
+			   int base_offset);
+
+   GLboolean try_emit_sat(ir_expression *ir);
+
+   bool process_move_condition(ir_rvalue *ir);
+
+   void generate_code();
+   void generate_vs_instruction(vec4_instruction *inst,
+				struct brw_reg dst,
+				struct brw_reg *src);
+
+   void generate_math1_gen4(vec4_instruction *inst,
+			    struct brw_reg dst,
+			    struct brw_reg src);
+   void generate_math1_gen6(vec4_instruction *inst,
+			    struct brw_reg dst,
+			    struct brw_reg src);
+   void generate_math2_gen4(vec4_instruction *inst,
+			    struct brw_reg dst,
+			    struct brw_reg src0,
+			    struct brw_reg src1);
+   void generate_math2_gen6(vec4_instruction *inst,
+			    struct brw_reg dst,
+			    struct brw_reg src0,
+			    struct brw_reg src1);
+
+   void generate_urb_write(vec4_instruction *inst);
+   void generate_oword_dual_block_offsets(struct brw_reg m1,
+					  struct brw_reg index);
+   void generate_scratch_write(vec4_instruction *inst,
+			       struct brw_reg dst,
+			       struct brw_reg src,
+			       struct brw_reg index);
+   void generate_scratch_read(vec4_instruction *inst,
+			      struct brw_reg dst,
+			      struct brw_reg index);
+};
+
+} /* namespace brw */
+
+#endif /* BRW_VEC4_H */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
new file mode 100644
index 0000000..65ac7d9
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -0,0 +1,854 @@
+/* Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_vec4.h"
+#include "../glsl/ir_print_visitor.h"
+
+extern "C" {
+#include "brw_eu.h"
+};
+
+using namespace brw;
+
+namespace brw {
+
+int
+vec4_visitor::setup_attributes(int payload_reg)
+{
+   int nr_attributes;
+   int attribute_map[VERT_ATTRIB_MAX];
+
+   nr_attributes = 0;
+   for (int i = 0; i < VERT_ATTRIB_MAX; i++) {
+      if (prog_data->inputs_read & BITFIELD64_BIT(i)) {
+	 attribute_map[i] = payload_reg + nr_attributes;
+	 nr_attributes++;
+
+	 /* Do GL_FIXED rescaling for GLES2.0.  Our GL_FIXED
+	  * attributes come in as floating point conversions of the
+	  * integer values.
+	  */
+	 if (c->key.gl_fixed_input_size[i] != 0) {
+	    struct brw_reg reg = brw_vec8_grf(attribute_map[i], 0);
+
+	    brw_MUL(p,
+		    brw_writemask(reg, (1 << c->key.gl_fixed_input_size[i]) - 1),
+		    reg, brw_imm_f(1.0 / 65536.0));
+	 }
+      }
+   }
+
+   foreach_list(node, &this->instructions) {
+      vec4_instruction *inst = (vec4_instruction *)node;
+
+      for (int i = 0; i < 3; i++) {
+	 if (inst->src[i].file != ATTR)
+	    continue;
+
+	 int grf = attribute_map[inst->src[i].reg + inst->src[i].reg_offset];
+
+	 struct brw_reg reg = brw_vec8_grf(grf, 0);
+	 reg.dw1.bits.swizzle = inst->src[i].swizzle;
+	 if (inst->src[i].abs)
+	    reg = brw_abs(reg);
+	 if (inst->src[i].negate)
+	    reg = negate(reg);
+
+	 inst->src[i].file = HW_REG;
+	 inst->src[i].fixed_hw_reg = reg;
+      }
+   }
+
+   /* The BSpec says we always have to read at least one thing from
+    * the VF, and it appears that the hardware wedges otherwise.
+    */
+   if (nr_attributes == 0)
+      nr_attributes = 1;
+
+   prog_data->urb_read_length = (nr_attributes + 1) / 2;
+
+   return payload_reg + nr_attributes;
+}
+
+int
+vec4_visitor::setup_uniforms(int reg)
+{
+   /* User clip planes from curbe:
+    */
+   if (c->key.nr_userclip) {
+      if (intel->gen >= 6) {
+	 for (int i = 0; i < c->key.nr_userclip; i++) {
+	    c->userplane[i] = stride(brw_vec4_grf(reg + i / 2,
+						  (i % 2) * 4), 0, 4, 1);
+	 }
+	 reg += ALIGN(c->key.nr_userclip, 2) / 2;
+      } else {
+	 for (int i = 0; i < c->key.nr_userclip; i++) {
+	    c->userplane[i] = stride(brw_vec4_grf(reg + (6 + i) / 2,
+						  (i % 2) * 4), 0, 4, 1);
+	 }
+	 reg += (ALIGN(6 + c->key.nr_userclip, 4) / 4) * 2;
+      }
+   }
+
+   /* The pre-gen6 VS requires that some push constants get loaded no
+    * matter what, or the GPU would hang.
+    */
+   if (intel->gen < 6 && this->uniforms == 0) {
+      this->uniform_size[this->uniforms] = 1;
+
+      for (unsigned int i = 0; i < 4; i++) {
+	 unsigned int slot = this->uniforms * 4 + i;
+
+	 c->prog_data.param[slot] = NULL;
+	 c->prog_data.param_convert[slot] = PARAM_CONVERT_ZERO;
+      }
+
+      this->uniforms++;
+      reg++;
+   } else {
+      reg += ALIGN(uniforms, 2) / 2;
+   }
+
+   /* for now, we are not doing any elimination of unused slots, nor
+    * are we packing our uniforms.
+    */
+   c->prog_data.nr_params = this->uniforms * 4;
+
+   c->prog_data.curb_read_length = reg - 1;
+   c->prog_data.uses_new_param_layout = true;
+
+   return reg;
+}
+
+void
+vec4_visitor::setup_payload(void)
+{
+   int reg = 0;
+
+   /* The payload always contains important data in g0, which contains
+    * the URB handles that are passed on to the URB write at the end
+    * of the thread.  So, we always start push constants at g1.
+    */
+   reg++;
+
+   reg = setup_uniforms(reg);
+
+   reg = setup_attributes(reg);
+
+   this->first_non_payload_grf = reg;
+}
+
+struct brw_reg
+vec4_instruction::get_dst(void)
+{
+   struct brw_reg brw_reg;
+
+   switch (dst.file) {
+   case GRF:
+      brw_reg = brw_vec8_grf(dst.reg + dst.reg_offset, 0);
+      brw_reg = retype(brw_reg, dst.type);
+      brw_reg.dw1.bits.writemask = dst.writemask;
+      break;
+
+   case HW_REG:
+      brw_reg = dst.fixed_hw_reg;
+      break;
+
+   case BAD_FILE:
+      brw_reg = brw_null_reg();
+      break;
+
+   default:
+      assert(!"not reached");
+      brw_reg = brw_null_reg();
+      break;
+   }
+   return brw_reg;
+}
+
+struct brw_reg
+vec4_instruction::get_src(int i)
+{
+   struct brw_reg brw_reg;
+
+   switch (src[i].file) {
+   case GRF:
+      brw_reg = brw_vec8_grf(src[i].reg + src[i].reg_offset, 0);
+      brw_reg = retype(brw_reg, src[i].type);
+      brw_reg.dw1.bits.swizzle = src[i].swizzle;
+      if (src[i].abs)
+	 brw_reg = brw_abs(brw_reg);
+      if (src[i].negate)
+	 brw_reg = negate(brw_reg);
+      break;
+
+   case IMM:
+      switch (src[i].type) {
+      case BRW_REGISTER_TYPE_F:
+	 brw_reg = brw_imm_f(src[i].imm.f);
+	 break;
+      case BRW_REGISTER_TYPE_D:
+	 brw_reg = brw_imm_d(src[i].imm.i);
+	 break;
+      case BRW_REGISTER_TYPE_UD:
+	 brw_reg = brw_imm_ud(src[i].imm.u);
+	 break;
+      default:
+	 assert(!"not reached");
+	 brw_reg = brw_null_reg();
+	 break;
+      }
+      break;
+
+   case UNIFORM:
+      brw_reg = stride(brw_vec4_grf(1 + (src[i].reg + src[i].reg_offset) / 2,
+				    ((src[i].reg + src[i].reg_offset) % 2) * 4),
+		       0, 4, 1);
+      brw_reg = retype(brw_reg, src[i].type);
+      brw_reg.dw1.bits.swizzle = src[i].swizzle;
+      if (src[i].abs)
+	 brw_reg = brw_abs(brw_reg);
+      if (src[i].negate)
+	 brw_reg = negate(brw_reg);
+      break;
+
+   case HW_REG:
+      brw_reg = src[i].fixed_hw_reg;
+      break;
+
+   case BAD_FILE:
+      /* Probably unused. */
+      brw_reg = brw_null_reg();
+      break;
+   case ATTR:
+   default:
+      assert(!"not reached");
+      brw_reg = brw_null_reg();
+      break;
+   }
+
+   return brw_reg;
+}
+
+void
+vec4_visitor::generate_math1_gen4(vec4_instruction *inst,
+				  struct brw_reg dst,
+				  struct brw_reg src)
+{
+   brw_math(p,
+	    dst,
+	    brw_math_function(inst->opcode),
+	    BRW_MATH_SATURATE_NONE,
+	    inst->base_mrf,
+	    src,
+	    BRW_MATH_DATA_SCALAR,
+	    BRW_MATH_PRECISION_FULL);
+}
+
+static void
+check_gen6_math_src_arg(struct brw_reg src)
+{
+   /* Source swizzles are ignored. */
+   assert(!src.abs);
+   assert(!src.negate);
+   assert(src.dw1.bits.swizzle = BRW_SWIZZLE_XYZW);
+}
+
+void
+vec4_visitor::generate_math1_gen6(vec4_instruction *inst,
+				  struct brw_reg dst,
+				  struct brw_reg src)
+{
+   /* Can't do writemask because math can't be align16. */
+   assert(dst.dw1.bits.writemask == WRITEMASK_XYZW);
+   check_gen6_math_src_arg(src);
+
+   brw_set_access_mode(p, BRW_ALIGN_1);
+   brw_math(p,
+	    dst,
+	    brw_math_function(inst->opcode),
+	    BRW_MATH_SATURATE_NONE,
+	    inst->base_mrf,
+	    src,
+	    BRW_MATH_DATA_SCALAR,
+	    BRW_MATH_PRECISION_FULL);
+   brw_set_access_mode(p, BRW_ALIGN_16);
+}
+
+void
+vec4_visitor::generate_math2_gen6(vec4_instruction *inst,
+				  struct brw_reg dst,
+				  struct brw_reg src0,
+				  struct brw_reg src1)
+{
+   /* Can't do writemask because math can't be align16. */
+   assert(dst.dw1.bits.writemask == WRITEMASK_XYZW);
+   /* Source swizzles are ignored. */
+   check_gen6_math_src_arg(src0);
+   check_gen6_math_src_arg(src1);
+
+   brw_set_access_mode(p, BRW_ALIGN_1);
+   brw_math2(p,
+	     dst,
+	     brw_math_function(inst->opcode),
+	     src0, src1);
+   brw_set_access_mode(p, BRW_ALIGN_16);
+}
+
+void
+vec4_visitor::generate_math2_gen4(vec4_instruction *inst,
+				  struct brw_reg dst,
+				  struct brw_reg src0,
+				  struct brw_reg src1)
+{
+   /* Can't do writemask because math can't be align16. */
+   assert(dst.dw1.bits.writemask == WRITEMASK_XYZW);
+
+   brw_MOV(p, brw_message_reg(inst->base_mrf + 1), src1);
+
+   brw_set_access_mode(p, BRW_ALIGN_1);
+   brw_math(p,
+	    dst,
+	    brw_math_function(inst->opcode),
+	    BRW_MATH_SATURATE_NONE,
+	    inst->base_mrf,
+	    src0,
+	    BRW_MATH_DATA_VECTOR,
+	    BRW_MATH_PRECISION_FULL);
+   brw_set_access_mode(p, BRW_ALIGN_16);
+}
+
+void
+vec4_visitor::generate_urb_write(vec4_instruction *inst)
+{
+   brw_urb_WRITE(p,
+		 brw_null_reg(), /* dest */
+		 inst->base_mrf, /* starting mrf reg nr */
+		 brw_vec8_grf(0, 0), /* src */
+		 false,		/* allocate */
+		 true,		/* used */
+		 inst->mlen,
+		 0,		/* response len */
+		 inst->eot,	/* eot */
+		 inst->eot,	/* writes complete */
+		 inst->offset,	/* urb destination offset */
+		 BRW_URB_SWIZZLE_INTERLEAVE);
+}
+
+void
+vec4_visitor::generate_oword_dual_block_offsets(struct brw_reg m1,
+						struct brw_reg index)
+{
+   int second_vertex_offset;
+
+   if (intel->gen >= 6)
+      second_vertex_offset = 1;
+   else
+      second_vertex_offset = 16;
+
+   m1 = retype(m1, BRW_REGISTER_TYPE_D);
+
+   /* Set up M1 (message payload).  Only the block offsets in M1.0 and
+    * M1.4 are used, and the rest are ignored.
+    */
+   struct brw_reg m1_0 = suboffset(vec1(m1), 0);
+   struct brw_reg m1_4 = suboffset(vec1(m1), 4);
+   struct brw_reg index_0 = suboffset(vec1(index), 0);
+   struct brw_reg index_4 = suboffset(vec1(index), 4);
+
+   brw_push_insn_state(p);
+   brw_set_mask_control(p, BRW_MASK_DISABLE);
+   brw_set_access_mode(p, BRW_ALIGN_1);
+
+   brw_MOV(p, m1_0, index_0);
+
+   brw_set_predicate_inverse(p, true);
+   if (index.file == BRW_IMMEDIATE_VALUE) {
+      index_4.dw1.ud++;
+      brw_MOV(p, m1_4, index_4);
+   } else {
+      brw_ADD(p, m1_4, index_4, brw_imm_d(second_vertex_offset));
+   }
+
+   brw_pop_insn_state(p);
+}
+
+void
+vec4_visitor::generate_scratch_read(vec4_instruction *inst,
+				    struct brw_reg dst,
+				    struct brw_reg index)
+{
+   if (intel->gen >= 6) {
+      brw_push_insn_state(p);
+      brw_set_mask_control(p, BRW_MASK_DISABLE);
+      brw_MOV(p,
+	      retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_D),
+	      retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_D));
+      brw_pop_insn_state(p);
+   }
+
+   generate_oword_dual_block_offsets(brw_message_reg(inst->base_mrf + 1),
+				     index);
+
+   uint32_t msg_type;
+
+   if (intel->gen >= 6)
+      msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
+   else if (intel->gen == 5 || intel->is_g4x)
+      msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
+   else
+      msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
+
+   /* Each of the 8 channel enables is considered for whether each
+    * dword is written.
+    */
+   struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
+   brw_set_dest(p, send, dst);
+   brw_set_src0(p, send, brw_message_reg(inst->base_mrf));
+   brw_set_dp_read_message(p, send,
+			   255, /* binding table index: stateless access */
+			   BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
+			   msg_type,
+			   BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
+			   2, /* mlen */
+			   1 /* rlen */);
+}
+
+void
+vec4_visitor::generate_scratch_write(vec4_instruction *inst,
+				     struct brw_reg dst,
+				     struct brw_reg src,
+				     struct brw_reg index)
+{
+   /* If the instruction is predicated, we'll predicate the send, not
+    * the header setup.
+    */
+   brw_set_predicate_control(p, false);
+
+   if (intel->gen >= 6) {
+      brw_push_insn_state(p);
+      brw_set_mask_control(p, BRW_MASK_DISABLE);
+      brw_MOV(p,
+	      retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_D),
+	      retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_D));
+      brw_pop_insn_state(p);
+   }
+
+   generate_oword_dual_block_offsets(brw_message_reg(inst->base_mrf + 1),
+				     index);
+
+   brw_MOV(p,
+	   retype(brw_message_reg(inst->base_mrf + 2), BRW_REGISTER_TYPE_D),
+	   retype(src, BRW_REGISTER_TYPE_D));
+
+   uint32_t msg_type;
+
+   if (intel->gen >= 6)
+      msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE;
+   else
+      msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE;
+
+   brw_set_predicate_control(p, inst->predicate);
+
+   /* Each of the 8 channel enables is considered for whether each
+    * dword is written.
+    */
+   struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
+   brw_set_dest(p, send, dst);
+   brw_set_src0(p, send, brw_message_reg(inst->base_mrf));
+   brw_set_dp_write_message(p, send,
+			    255, /* binding table index: stateless access */
+			    BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
+			    msg_type,
+			    3, /* mlen */
+			    true, /* header present */
+			    false, /* pixel scoreboard */
+			    0, /* rlen */
+			    false, /* eot */
+			    false /* commit */);
+}
+
+void
+vec4_visitor::generate_vs_instruction(vec4_instruction *instruction,
+				      struct brw_reg dst,
+				      struct brw_reg *src)
+{
+   vec4_instruction *inst = (vec4_instruction *)instruction;
+
+   switch (inst->opcode) {
+   case SHADER_OPCODE_RCP:
+   case SHADER_OPCODE_RSQ:
+   case SHADER_OPCODE_SQRT:
+   case SHADER_OPCODE_EXP2:
+   case SHADER_OPCODE_LOG2:
+   case SHADER_OPCODE_SIN:
+   case SHADER_OPCODE_COS:
+      if (intel->gen >= 6) {
+	 generate_math1_gen6(inst, dst, src[0]);
+      } else {
+	 generate_math1_gen4(inst, dst, src[0]);
+      }
+      break;
+
+   case SHADER_OPCODE_POW:
+      if (intel->gen >= 6) {
+	 generate_math2_gen6(inst, dst, src[0], src[1]);
+      } else {
+	 generate_math2_gen4(inst, dst, src[0], src[1]);
+      }
+      break;
+
+   case VS_OPCODE_URB_WRITE:
+      generate_urb_write(inst);
+      break;
+
+   case VS_OPCODE_SCRATCH_READ:
+      generate_scratch_read(inst, dst, src[0]);
+      break;
+
+   case VS_OPCODE_SCRATCH_WRITE:
+      generate_scratch_write(inst, dst, src[0], src[1]);
+      break;
+
+   default:
+      if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) {
+	 fail("unsupported opcode in `%s' in VS\n",
+	      brw_opcodes[inst->opcode].name);
+      } else {
+	 fail("Unsupported opcode %d in VS", inst->opcode);
+      }
+   }
+}
+
+bool
+vec4_visitor::run()
+{
+   /* Generate VS IR for main().  (the visitor only descends into
+    * functions called "main").
+    */
+   visit_instructions(shader->ir);
+
+   emit_urb_writes();
+
+   /* Before any optimization, push array accesses out to scratch
+    * space where we need them to be.  This pass may allocate new
+    * virtual GRFs, so we want to do it early.  It also makes sure
+    * that we have reladdr computations available for CSE, since we'll
+    * often do repeated subexpressions for those.
+    */
+   move_grf_array_access_to_scratch();
+
+   bool progress;
+   do {
+      progress = false;
+      progress = dead_code_eliminate() || progress;
+   } while (progress);
+
+   if (failed)
+      return false;
+
+   setup_payload();
+   reg_allocate();
+
+   if (failed)
+      return false;
+
+   brw_set_access_mode(p, BRW_ALIGN_16);
+
+   generate_code();
+
+   return !failed;
+}
+
+void
+vec4_visitor::generate_code()
+{
+   int last_native_inst = p->nr_insn;
+   const char *last_annotation_string = NULL;
+   ir_instruction *last_annotation_ir = NULL;
+
+   int loop_stack_array_size = 16;
+   int loop_stack_depth = 0;
+   brw_instruction **loop_stack =
+      rzalloc_array(this->mem_ctx, brw_instruction *, loop_stack_array_size);
+   int *if_depth_in_loop =
+      rzalloc_array(this->mem_ctx, int, loop_stack_array_size);
+
+
+   if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
+      printf("Native code for vertex shader %d:\n", prog->Name);
+   }
+
+   foreach_list(node, &this->instructions) {
+      vec4_instruction *inst = (vec4_instruction *)node;
+      struct brw_reg src[3], dst;
+
+      if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
+	 if (last_annotation_ir != inst->ir) {
+	    last_annotation_ir = inst->ir;
+	    if (last_annotation_ir) {
+	       printf("   ");
+	       last_annotation_ir->print();
+	       printf("\n");
+	    }
+	 }
+	 if (last_annotation_string != inst->annotation) {
+	    last_annotation_string = inst->annotation;
+	    if (last_annotation_string)
+	       printf("   %s\n", last_annotation_string);
+	 }
+      }
+
+      for (unsigned int i = 0; i < 3; i++) {
+	 src[i] = inst->get_src(i);
+      }
+      dst = inst->get_dst();
+
+      brw_set_conditionalmod(p, inst->conditional_mod);
+      brw_set_predicate_control(p, inst->predicate);
+      brw_set_predicate_inverse(p, inst->predicate_inverse);
+      brw_set_saturate(p, inst->saturate);
+
+      switch (inst->opcode) {
+      case BRW_OPCODE_MOV:
+	 brw_MOV(p, dst, src[0]);
+	 break;
+      case BRW_OPCODE_ADD:
+	 brw_ADD(p, dst, src[0], src[1]);
+	 break;
+      case BRW_OPCODE_MUL:
+	 brw_MUL(p, dst, src[0], src[1]);
+	 break;
+      case BRW_OPCODE_MACH:
+	 brw_set_acc_write_control(p, 1);
+	 brw_MACH(p, dst, src[0], src[1]);
+	 brw_set_acc_write_control(p, 0);
+	 break;
+
+      case BRW_OPCODE_FRC:
+	 brw_FRC(p, dst, src[0]);
+	 break;
+      case BRW_OPCODE_RNDD:
+	 brw_RNDD(p, dst, src[0]);
+	 break;
+      case BRW_OPCODE_RNDE:
+	 brw_RNDE(p, dst, src[0]);
+	 break;
+      case BRW_OPCODE_RNDZ:
+	 brw_RNDZ(p, dst, src[0]);
+	 break;
+
+      case BRW_OPCODE_AND:
+	 brw_AND(p, dst, src[0], src[1]);
+	 break;
+      case BRW_OPCODE_OR:
+	 brw_OR(p, dst, src[0], src[1]);
+	 break;
+      case BRW_OPCODE_XOR:
+	 brw_XOR(p, dst, src[0], src[1]);
+	 break;
+      case BRW_OPCODE_NOT:
+	 brw_NOT(p, dst, src[0]);
+	 break;
+      case BRW_OPCODE_ASR:
+	 brw_ASR(p, dst, src[0], src[1]);
+	 break;
+      case BRW_OPCODE_SHR:
+	 brw_SHR(p, dst, src[0], src[1]);
+	 break;
+      case BRW_OPCODE_SHL:
+	 brw_SHL(p, dst, src[0], src[1]);
+	 break;
+
+      case BRW_OPCODE_CMP:
+	 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
+	 break;
+      case BRW_OPCODE_SEL:
+	 brw_SEL(p, dst, src[0], src[1]);
+	 break;
+
+      case BRW_OPCODE_DP4:
+	 brw_DP4(p, dst, src[0], src[1]);
+	 break;
+
+      case BRW_OPCODE_DP3:
+	 brw_DP3(p, dst, src[0], src[1]);
+	 break;
+
+      case BRW_OPCODE_DP2:
+	 brw_DP2(p, dst, src[0], src[1]);
+	 break;
+
+      case BRW_OPCODE_IF:
+	 if (inst->src[0].file != BAD_FILE) {
+	    /* The instruction has an embedded compare (only allowed on gen6) */
+	    assert(intel->gen == 6);
+	    gen6_IF(p, inst->conditional_mod, src[0], src[1]);
+	 } else {
+	    struct brw_instruction *brw_inst = brw_IF(p, BRW_EXECUTE_8);
+	    brw_inst->header.predicate_control = inst->predicate;
+	 }
+	 if_depth_in_loop[loop_stack_depth]++;
+	 break;
+
+      case BRW_OPCODE_ELSE:
+	 brw_ELSE(p);
+	 break;
+      case BRW_OPCODE_ENDIF:
+	 brw_ENDIF(p);
+	 if_depth_in_loop[loop_stack_depth]--;
+	 break;
+
+      case BRW_OPCODE_DO:
+	 loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8);
+	 if (loop_stack_array_size <= loop_stack_depth) {
+	    loop_stack_array_size *= 2;
+	    loop_stack = reralloc(this->mem_ctx, loop_stack, brw_instruction *,
+				  loop_stack_array_size);
+	    if_depth_in_loop = reralloc(this->mem_ctx, if_depth_in_loop, int,
+				        loop_stack_array_size);
+	 }
+	 if_depth_in_loop[loop_stack_depth] = 0;
+	 break;
+
+      case BRW_OPCODE_BREAK:
+	 brw_BREAK(p, if_depth_in_loop[loop_stack_depth]);
+	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+	 break;
+      case BRW_OPCODE_CONTINUE:
+	 /* FINISHME: We need to write the loop instruction support still. */
+	 if (intel->gen >= 6)
+	    gen6_CONT(p, loop_stack[loop_stack_depth - 1]);
+	 else
+	    brw_CONT(p, if_depth_in_loop[loop_stack_depth]);
+	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+	 break;
+
+      case BRW_OPCODE_WHILE: {
+	 struct brw_instruction *inst0, *inst1;
+	 GLuint br = 1;
+
+	 if (intel->gen >= 5)
+	    br = 2;
+
+	 assert(loop_stack_depth > 0);
+	 loop_stack_depth--;
+	 inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]);
+	 if (intel->gen < 6) {
+	    /* patch all the BREAK/CONT instructions from last BGNLOOP */
+	    while (inst0 > loop_stack[loop_stack_depth]) {
+	       inst0--;
+	       if (inst0->header.opcode == BRW_OPCODE_BREAK &&
+		   inst0->bits3.if_else.jump_count == 0) {
+		  inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
+	    }
+	       else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
+			inst0->bits3.if_else.jump_count == 0) {
+		  inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
+	       }
+	    }
+	 }
+      }
+	 break;
+
+      default:
+	 generate_vs_instruction(inst, dst, src);
+	 break;
+      }
+
+      if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
+	 for (unsigned int i = last_native_inst; i < p->nr_insn; i++) {
+	    if (0) {
+	       printf("0x%08x 0x%08x 0x%08x 0x%08x ",
+		      ((uint32_t *)&p->store[i])[3],
+		      ((uint32_t *)&p->store[i])[2],
+		      ((uint32_t *)&p->store[i])[1],
+		      ((uint32_t *)&p->store[i])[0]);
+	    }
+	    brw_disasm(stdout, &p->store[i], intel->gen);
+	 }
+      }
+
+      last_native_inst = p->nr_insn;
+   }
+
+   if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
+      printf("\n");
+   }
+
+   ralloc_free(loop_stack);
+   ralloc_free(if_depth_in_loop);
+
+   brw_set_uip_jip(p);
+
+   /* OK, while the INTEL_DEBUG=vs above is very nice for debugging VS
+    * emit issues, it doesn't get the jump distances into the output,
+    * which is often something we want to debug.  So this is here in
+    * case you're doing that.
+    */
+   if (0) {
+      if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
+	 for (unsigned int i = 0; i < p->nr_insn; i++) {
+	    printf("0x%08x 0x%08x 0x%08x 0x%08x ",
+		   ((uint32_t *)&p->store[i])[3],
+		   ((uint32_t *)&p->store[i])[2],
+		   ((uint32_t *)&p->store[i])[1],
+		   ((uint32_t *)&p->store[i])[0]);
+	    brw_disasm(stdout, &p->store[i], intel->gen);
+	 }
+      }
+   }
+}
+
+extern "C" {
+
+bool
+brw_vs_emit(struct gl_shader_program *prog, struct brw_vs_compile *c)
+{
+   if (!prog)
+      return false;
+
+   struct brw_shader *shader =
+     (brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX];
+   if (!shader)
+      return false;
+
+   if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
+      printf("GLSL IR for native vertex shader %d:\n", prog->Name);
+      _mesa_print_ir(shader->ir, NULL);
+      printf("\n\n");
+   }
+
+   vec4_visitor v(c, prog, shader);
+   if (!v.run()) {
+      prog->LinkStatus = GL_FALSE;
+      ralloc_strcat(&prog->InfoLog, v.fail_msg);
+      return false;
+   }
+
+   return true;
+}
+
+} /* extern "C" */
+
+} /* namespace brw */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
new file mode 100644
index 0000000..3f052ff
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
@@ -0,0 +1,234 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+extern "C" {
+#include "main/macros.h"
+#include "program/register_allocate.h"
+} /* extern "C" */
+
+#include "brw_vec4.h"
+#include "../glsl/ir_print_visitor.h"
+
+using namespace brw;
+
+namespace brw {
+
+static void
+assign(int *reg_hw_locations, reg *reg)
+{
+   if (reg->file == GRF) {
+      reg->reg = reg_hw_locations[reg->reg];
+   }
+}
+
+void
+vec4_visitor::reg_allocate_trivial()
+{
+   int hw_reg_mapping[this->virtual_grf_count];
+   bool virtual_grf_used[this->virtual_grf_count];
+   int i;
+   int next;
+
+   /* Calculate which virtual GRFs are actually in use after whatever
+    * optimization passes have occurred.
+    */
+   for (int i = 0; i < this->virtual_grf_count; i++) {
+      virtual_grf_used[i] = false;
+   }
+
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      vec4_instruction *inst = (vec4_instruction *)iter.get();
+
+      if (inst->dst.file == GRF)
+	 virtual_grf_used[inst->dst.reg] = true;
+
+      for (int i = 0; i < 3; i++) {
+	 if (inst->src[i].file == GRF)
+	    virtual_grf_used[inst->src[i].reg] = true;
+      }
+   }
+
+   hw_reg_mapping[0] = this->first_non_payload_grf;
+   next = hw_reg_mapping[0] + this->virtual_grf_sizes[0];
+   for (i = 1; i < this->virtual_grf_count; i++) {
+      if (virtual_grf_used[i]) {
+	 hw_reg_mapping[i] = next;
+	 next += this->virtual_grf_sizes[i];
+      }
+   }
+   prog_data->total_grf = next;
+
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      vec4_instruction *inst = (vec4_instruction *)iter.get();
+
+      assign(hw_reg_mapping, &inst->dst);
+      assign(hw_reg_mapping, &inst->src[0]);
+      assign(hw_reg_mapping, &inst->src[1]);
+      assign(hw_reg_mapping, &inst->src[2]);
+   }
+
+   if (prog_data->total_grf > BRW_MAX_GRF) {
+      fail("Ran out of regs on trivial allocator (%d/%d)\n",
+	   prog_data->total_grf, BRW_MAX_GRF);
+   }
+}
+
+static void
+brw_alloc_reg_set_for_classes(struct brw_context *brw,
+			      int *class_sizes,
+			      int class_count,
+			      int base_reg_count)
+{
+   /* Compute the total number of registers across all classes. */
+   int ra_reg_count = 0;
+   for (int i = 0; i < class_count; i++) {
+      ra_reg_count += base_reg_count - (class_sizes[i] - 1);
+   }
+
+   ralloc_free(brw->vs.ra_reg_to_grf);
+   brw->vs.ra_reg_to_grf = ralloc_array(brw, uint8_t, ra_reg_count);
+   ralloc_free(brw->vs.regs);
+   brw->vs.regs = ra_alloc_reg_set(ra_reg_count);
+   ralloc_free(brw->vs.classes);
+   brw->vs.classes = ralloc_array(brw, int, class_count + 1);
+
+   /* Now, add the registers to their classes, and add the conflicts
+    * between them and the base GRF registers (and also each other).
+    */
+   int reg = 0;
+   for (int i = 0; i < class_count; i++) {
+      int class_reg_count = base_reg_count - (class_sizes[i] - 1);
+      brw->vs.classes[i] = ra_alloc_reg_class(brw->vs.regs);
+
+      for (int j = 0; j < class_reg_count; j++) {
+	 ra_class_add_reg(brw->vs.regs, brw->vs.classes[i], reg);
+
+	 brw->vs.ra_reg_to_grf[reg] = j;
+
+	 for (int base_reg = j;
+	      base_reg < j + class_sizes[i];
+	      base_reg++) {
+	    ra_add_transitive_reg_conflict(brw->vs.regs, base_reg, reg);
+	 }
+
+	 reg++;
+      }
+   }
+   assert(reg == ra_reg_count);
+
+   ra_set_finalize(brw->vs.regs);
+}
+
+void
+vec4_visitor::reg_allocate()
+{
+   int hw_reg_mapping[virtual_grf_count];
+   int first_assigned_grf = this->first_non_payload_grf;
+   int base_reg_count = BRW_MAX_GRF - first_assigned_grf;
+   int class_sizes[base_reg_count];
+   int class_count = 0;
+
+   /* Using the trivial allocator can be useful in debugging undefined
+    * register access as a result of broken optimization passes.
+    */
+   if (0) {
+      reg_allocate_trivial();
+      return;
+   }
+
+   calculate_live_intervals();
+
+   /* Set up the register classes.
+    *
+    * The base registers store a vec4.  However, we'll need larger
+    * storage for arrays, structures, and matrices, which will be sets
+    * of contiguous registers.
+    */
+   class_sizes[class_count++] = 1;
+
+   for (int r = 0; r < virtual_grf_count; r++) {
+      int i;
+
+      for (i = 0; i < class_count; i++) {
+	 if (class_sizes[i] == this->virtual_grf_sizes[r])
+	    break;
+      }
+      if (i == class_count) {
+	 if (this->virtual_grf_sizes[r] >= base_reg_count) {
+	    fail("Object too large to register allocate.\n");
+	 }
+
+	 class_sizes[class_count++] = this->virtual_grf_sizes[r];
+      }
+   }
+
+   brw_alloc_reg_set_for_classes(brw, class_sizes, class_count, base_reg_count);
+
+   struct ra_graph *g = ra_alloc_interference_graph(brw->vs.regs,
+						    virtual_grf_count);
+
+   for (int i = 0; i < virtual_grf_count; i++) {
+      for (int c = 0; c < class_count; c++) {
+	 if (class_sizes[c] == this->virtual_grf_sizes[i]) {
+	    ra_set_node_class(g, i, brw->vs.classes[c]);
+	    break;
+	 }
+      }
+
+      for (int j = 0; j < i; j++) {
+	 if (virtual_grf_interferes(i, j)) {
+	    ra_add_node_interference(g, i, j);
+	 }
+      }
+   }
+
+   if (!ra_allocate_no_spills(g)) {
+      ralloc_free(g);
+      fail("No register spilling support yet\n");
+   }
+
+   /* Get the chosen virtual registers for each node, and map virtual
+    * regs in the register classes back down to real hardware reg
+    * numbers.
+    */
+   prog_data->total_grf = first_assigned_grf;
+   for (int i = 0; i < virtual_grf_count; i++) {
+      int reg = ra_get_node_reg(g, i);
+
+      hw_reg_mapping[i] = first_assigned_grf + brw->vs.ra_reg_to_grf[reg];
+      prog_data->total_grf = MAX2(prog_data->total_grf, hw_reg_mapping[i] + 1);
+   }
+
+   foreach_list(node, &this->instructions) {
+      vec4_instruction *inst = (vec4_instruction *)node;
+
+      assign(hw_reg_mapping, &inst->dst);
+      assign(hw_reg_mapping, &inst->src[0]);
+      assign(hw_reg_mapping, &inst->src[1]);
+      assign(hw_reg_mapping, &inst->src[2]);
+   }
+
+   ralloc_free(g);
+}
+
+} /* namespace brw */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
new file mode 100644
index 0000000..b3a07bd
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -0,0 +1,2156 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_vec4.h"
+extern "C" {
+#include "main/macros.h"
+#include "program/prog_parameter.h"
+}
+
+namespace brw {
+
+src_reg::src_reg(dst_reg reg)
+{
+   init();
+
+   this->file = reg.file;
+   this->reg = reg.reg;
+   this->reg_offset = reg.reg_offset;
+   this->type = reg.type;
+   this->reladdr = reg.reladdr;
+   this->fixed_hw_reg = reg.fixed_hw_reg;
+
+   int swizzles[4];
+   int next_chan = 0;
+   int last = 0;
+
+   for (int i = 0; i < 4; i++) {
+      if (!(reg.writemask & (1 << i)))
+	 continue;
+
+      swizzles[next_chan++] = last = i;
+   }
+
+   for (; next_chan < 4; next_chan++) {
+      swizzles[next_chan] = last;
+   }
+
+   this->swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
+				swizzles[2], swizzles[3]);
+}
+
+dst_reg::dst_reg(src_reg reg)
+{
+   init();
+
+   this->file = reg.file;
+   this->reg = reg.reg;
+   this->reg_offset = reg.reg_offset;
+   this->type = reg.type;
+   this->writemask = WRITEMASK_XYZW;
+   this->reladdr = reg.reladdr;
+   this->fixed_hw_reg = reg.fixed_hw_reg;
+}
+
+vec4_instruction *
+vec4_visitor::emit(enum opcode opcode, dst_reg dst,
+		   src_reg src0, src_reg src1, src_reg src2)
+{
+   vec4_instruction *inst = new(mem_ctx) vec4_instruction();
+
+   inst->opcode = opcode;
+   inst->dst = dst;
+   inst->src[0] = src0;
+   inst->src[1] = src1;
+   inst->src[2] = src2;
+   inst->ir = this->base_ir;
+   inst->annotation = this->current_annotation;
+
+   this->instructions.push_tail(inst);
+
+   return inst;
+}
+
+
+vec4_instruction *
+vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1)
+{
+   return emit(opcode, dst, src0, src1, src_reg());
+}
+
+vec4_instruction *
+vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0)
+{
+   assert(dst.writemask != 0);
+   return emit(opcode, dst, src0, src_reg(), src_reg());
+}
+
+vec4_instruction *
+vec4_visitor::emit(enum opcode opcode)
+{
+   return emit(opcode, dst_reg(), src_reg(), src_reg(), src_reg());
+}
+
+void
+vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements)
+{
+   static enum opcode dot_opcodes[] = {
+      BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4
+   };
+
+   emit(dot_opcodes[elements - 2], dst, src0, src1);
+}
+
+void
+vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src)
+{
+   /* The gen6 math instruction ignores the source modifiers --
+    * swizzle, abs, negate, and at least some parts of the register
+    * region description.
+    */
+   src_reg temp_src = src_reg(this, glsl_type::vec4_type);
+   emit(BRW_OPCODE_MOV, dst_reg(temp_src), src);
+
+   if (dst.writemask != WRITEMASK_XYZW) {
+      /* The gen6 math instruction must be align1, so we can't do
+       * writemasks.
+       */
+      dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
+
+      emit(opcode, temp_dst, temp_src);
+
+      emit(BRW_OPCODE_MOV, dst, src_reg(temp_dst));
+   } else {
+      emit(opcode, dst, temp_src);
+   }
+}
+
+void
+vec4_visitor::emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src)
+{
+   vec4_instruction *inst = emit(opcode, dst, src);
+   inst->base_mrf = 1;
+   inst->mlen = 1;
+}
+
+void
+vec4_visitor::emit_math(opcode opcode, dst_reg dst, src_reg src)
+{
+   switch (opcode) {
+   case SHADER_OPCODE_RCP:
+   case SHADER_OPCODE_RSQ:
+   case SHADER_OPCODE_SQRT:
+   case SHADER_OPCODE_EXP2:
+   case SHADER_OPCODE_LOG2:
+   case SHADER_OPCODE_SIN:
+   case SHADER_OPCODE_COS:
+      break;
+   default:
+      assert(!"not reached: bad math opcode");
+      return;
+   }
+
+   if (intel->gen >= 6) {
+      return emit_math1_gen6(opcode, dst, src);
+   } else {
+      return emit_math1_gen4(opcode, dst, src);
+   }
+}
+
+void
+vec4_visitor::emit_math2_gen6(enum opcode opcode,
+			      dst_reg dst, src_reg src0, src_reg src1)
+{
+   src_reg expanded;
+
+   /* The gen6 math instruction ignores the source modifiers --
+    * swizzle, abs, negate, and at least some parts of the register
+    * region description.  Move the sources to temporaries to make it
+    * generally work.
+    */
+
+   expanded = src_reg(this, glsl_type::vec4_type);
+   emit(BRW_OPCODE_MOV, dst_reg(expanded), src0);
+   src0 = expanded;
+
+   expanded = src_reg(this, glsl_type::vec4_type);
+   emit(BRW_OPCODE_MOV, dst_reg(expanded), src1);
+   src1 = expanded;
+
+   if (dst.writemask != WRITEMASK_XYZW) {
+      /* The gen6 math instruction must be align1, so we can't do
+       * writemasks.
+       */
+      dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
+
+      emit(opcode, temp_dst, src0, src1);
+
+      emit(BRW_OPCODE_MOV, dst, src_reg(temp_dst));
+   } else {
+      emit(opcode, dst, src0, src1);
+   }
+}
+
+void
+vec4_visitor::emit_math2_gen4(enum opcode opcode,
+			      dst_reg dst, src_reg src0, src_reg src1)
+{
+   vec4_instruction *inst = emit(opcode, dst, src0, src1);
+   inst->base_mrf = 1;
+   inst->mlen = 2;
+}
+
+void
+vec4_visitor::emit_math(enum opcode opcode,
+			dst_reg dst, src_reg src0, src_reg src1)
+{
+   assert(opcode == SHADER_OPCODE_POW);
+
+   if (intel->gen >= 6) {
+      return emit_math2_gen6(opcode, dst, src0, src1);
+   } else {
+      return emit_math2_gen4(opcode, dst, src0, src1);
+   }
+}
+
+void
+vec4_visitor::visit_instructions(const exec_list *list)
+{
+   foreach_list(node, list) {
+      ir_instruction *ir = (ir_instruction *)node;
+
+      base_ir = ir;
+      ir->accept(this);
+   }
+}
+
+
+static int
+type_size(const struct glsl_type *type)
+{
+   unsigned int i;
+   int size;
+
+   switch (type->base_type) {
+   case GLSL_TYPE_UINT:
+   case GLSL_TYPE_INT:
+   case GLSL_TYPE_FLOAT:
+   case GLSL_TYPE_BOOL:
+      if (type->is_matrix()) {
+	 return type->matrix_columns;
+      } else {
+	 /* Regardless of size of vector, it gets a vec4. This is bad
+	  * packing for things like floats, but otherwise arrays become a
+	  * mess.  Hopefully a later pass over the code can pack scalars
+	  * down if appropriate.
+	  */
+	 return 1;
+      }
+   case GLSL_TYPE_ARRAY:
+      assert(type->length > 0);
+      return type_size(type->fields.array) * type->length;
+   case GLSL_TYPE_STRUCT:
+      size = 0;
+      for (i = 0; i < type->length; i++) {
+	 size += type_size(type->fields.structure[i].type);
+      }
+      return size;
+   case GLSL_TYPE_SAMPLER:
+      /* Samplers take up one slot in UNIFORMS[], but they're baked in
+       * at link time.
+       */
+      return 1;
+   default:
+      assert(0);
+      return 0;
+   }
+}
+
+int
+vec4_visitor::virtual_grf_alloc(int size)
+{
+   if (virtual_grf_array_size <= virtual_grf_count) {
+      if (virtual_grf_array_size == 0)
+	 virtual_grf_array_size = 16;
+      else
+	 virtual_grf_array_size *= 2;
+      virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int,
+				   virtual_grf_array_size);
+   }
+   virtual_grf_sizes[virtual_grf_count] = size;
+   return virtual_grf_count++;
+}
+
+src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
+{
+   init();
+
+   this->file = GRF;
+   this->reg = v->virtual_grf_alloc(type_size(type));
+
+   if (type->is_array() || type->is_record()) {
+      this->swizzle = BRW_SWIZZLE_NOOP;
+   } else {
+      this->swizzle = swizzle_for_size(type->vector_elements);
+   }
+
+   this->type = brw_type_for_base_type(type);
+}
+
+dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
+{
+   init();
+
+   this->file = GRF;
+   this->reg = v->virtual_grf_alloc(type_size(type));
+
+   if (type->is_array() || type->is_record()) {
+      this->writemask = WRITEMASK_XYZW;
+   } else {
+      this->writemask = (1 << type->vector_elements) - 1;
+   }
+
+   this->type = brw_type_for_base_type(type);
+}
+
+/* Our support for uniforms is piggy-backed on the struct
+ * gl_fragment_program, because that's where the values actually
+ * get stored, rather than in some global gl_shader_program uniform
+ * store.
+ */
+int
+vec4_visitor::setup_uniform_values(int loc, const glsl_type *type)
+{
+   unsigned int offset = 0;
+   float *values = &this->vp->Base.Parameters->ParameterValues[loc][0].f;
+
+   if (type->is_matrix()) {
+      const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT,
+							type->vector_elements,
+							1);
+
+      for (unsigned int i = 0; i < type->matrix_columns; i++) {
+	 offset += setup_uniform_values(loc + offset, column);
+      }
+
+      return offset;
+   }
+
+   switch (type->base_type) {
+   case GLSL_TYPE_FLOAT:
+   case GLSL_TYPE_UINT:
+   case GLSL_TYPE_INT:
+   case GLSL_TYPE_BOOL:
+      for (unsigned int i = 0; i < type->vector_elements; i++) {
+	 int slot = this->uniforms * 4 + i;
+	 switch (type->base_type) {
+	 case GLSL_TYPE_FLOAT:
+	    c->prog_data.param_convert[slot] = PARAM_NO_CONVERT;
+	    break;
+	 case GLSL_TYPE_UINT:
+	    c->prog_data.param_convert[slot] = PARAM_CONVERT_F2U;
+	    break;
+	 case GLSL_TYPE_INT:
+	    c->prog_data.param_convert[slot] = PARAM_CONVERT_F2I;
+	    break;
+	 case GLSL_TYPE_BOOL:
+	    c->prog_data.param_convert[slot] = PARAM_CONVERT_F2B;
+	    break;
+	 default:
+	    assert(!"not reached");
+	    c->prog_data.param_convert[slot] = PARAM_NO_CONVERT;
+	    break;
+	 }
+	 c->prog_data.param[slot] = &values[i];
+      }
+
+      for (unsigned int i = type->vector_elements; i < 4; i++) {
+	 c->prog_data.param_convert[this->uniforms * 4 + i] =
+	    PARAM_CONVERT_ZERO;
+	 c->prog_data.param[this->uniforms * 4 + i] = NULL;
+      }
+
+      this->uniform_size[this->uniforms] = type->vector_elements;
+      this->uniforms++;
+
+      return 1;
+
+   case GLSL_TYPE_STRUCT:
+      for (unsigned int i = 0; i < type->length; i++) {
+	 offset += setup_uniform_values(loc + offset,
+					type->fields.structure[i].type);
+      }
+      return offset;
+
+   case GLSL_TYPE_ARRAY:
+      for (unsigned int i = 0; i < type->length; i++) {
+	 offset += setup_uniform_values(loc + offset, type->fields.array);
+      }
+      return offset;
+
+   case GLSL_TYPE_SAMPLER:
+      /* The sampler takes up a slot, but we don't use any values from it. */
+      return 1;
+
+   default:
+      assert(!"not reached");
+      return 0;
+   }
+}
+
+/* Our support for builtin uniforms is even scarier than non-builtin.
+ * It sits on top of the PROG_STATE_VAR parameters that are
+ * automatically updated from GL context state.
+ */
+void
+vec4_visitor::setup_builtin_uniform_values(ir_variable *ir)
+{
+   const ir_state_slot *const slots = ir->state_slots;
+   assert(ir->state_slots != NULL);
+
+   for (unsigned int i = 0; i < ir->num_state_slots; i++) {
+      /* This state reference has already been setup by ir_to_mesa,
+       * but we'll get the same index back here.  We can reference
+       * ParameterValues directly, since unlike brw_fs.cpp, we never
+       * add new state references during compile.
+       */
+      int index = _mesa_add_state_reference(this->vp->Base.Parameters,
+					    (gl_state_index *)slots[i].tokens);
+      float *values = &this->vp->Base.Parameters->ParameterValues[index][0].f;
+
+      this->uniform_size[this->uniforms] = 0;
+      /* Add each of the unique swizzled channels of the element.
+       * This will end up matching the size of the glsl_type of this field.
+       */
+      int last_swiz = -1;
+      for (unsigned int j = 0; j < 4; j++) {
+	 int swiz = GET_SWZ(slots[i].swizzle, j);
+	 last_swiz = swiz;
+
+	 c->prog_data.param[this->uniforms * 4 + j] = &values[swiz];
+	 c->prog_data.param_convert[this->uniforms * 4 + j] = PARAM_NO_CONVERT;
+	 if (swiz <= last_swiz)
+	    this->uniform_size[this->uniforms]++;
+      }
+      this->uniforms++;
+   }
+}
+
+dst_reg *
+vec4_visitor::variable_storage(ir_variable *var)
+{
+   return (dst_reg *)hash_table_find(this->variable_ht, var);
+}
+
+void
+vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
+{
+   ir_expression *expr = ir->as_expression();
+
+   if (expr) {
+      src_reg op[2];
+      vec4_instruction *inst;
+
+      assert(expr->get_num_operands() <= 2);
+      for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
+	 assert(expr->operands[i]->type->is_scalar());
+
+	 expr->operands[i]->accept(this);
+	 op[i] = this->result;
+      }
+
+      switch (expr->operation) {
+      case ir_unop_logic_not:
+	 inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], src_reg(1));
+	 inst->conditional_mod = BRW_CONDITIONAL_Z;
+	 break;
+
+      case ir_binop_logic_xor:
+	 inst = emit(BRW_OPCODE_XOR, dst_null_d(), op[0], op[1]);
+	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
+	 break;
+
+      case ir_binop_logic_or:
+	 inst = emit(BRW_OPCODE_OR, dst_null_d(), op[0], op[1]);
+	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
+	 break;
+
+      case ir_binop_logic_and:
+	 inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], op[1]);
+	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
+	 break;
+
+      case ir_unop_f2b:
+	 if (intel->gen >= 6) {
+	    inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0.0f));
+	 } else {
+	    inst = emit(BRW_OPCODE_MOV, dst_null_f(), op[0]);
+	 }
+	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
+	 break;
+
+      case ir_unop_i2b:
+	 if (intel->gen >= 6) {
+	    inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0));
+	 } else {
+	    inst = emit(BRW_OPCODE_MOV, dst_null_d(), op[0]);
+	 }
+	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
+	 break;
+
+      case ir_binop_greater:
+      case ir_binop_gequal:
+      case ir_binop_less:
+      case ir_binop_lequal:
+      case ir_binop_equal:
+      case ir_binop_all_equal:
+      case ir_binop_nequal:
+      case ir_binop_any_nequal:
+	 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]);
+	 inst->conditional_mod =
+	    brw_conditional_for_comparison(expr->operation);
+	 break;
+
+      default:
+	 assert(!"not reached");
+	 break;
+      }
+      return;
+   }
+
+   ir->accept(this);
+
+   if (intel->gen >= 6) {
+      vec4_instruction *inst = emit(BRW_OPCODE_AND, dst_null_d(),
+			       this->result, src_reg(1));
+      inst->conditional_mod = BRW_CONDITIONAL_NZ;
+   } else {
+      vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst_null_d(), this->result);
+      inst->conditional_mod = BRW_CONDITIONAL_NZ;
+   }
+}
+
+/**
+ * Emit a gen6 IF statement with the comparison folded into the IF
+ * instruction.
+ */
+void
+vec4_visitor::emit_if_gen6(ir_if *ir)
+{
+   ir_expression *expr = ir->condition->as_expression();
+
+   if (expr) {
+      src_reg op[2];
+      vec4_instruction *inst;
+      dst_reg temp;
+
+      assert(expr->get_num_operands() <= 2);
+      for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
+	 expr->operands[i]->accept(this);
+	 op[i] = this->result;
+      }
+
+      switch (expr->operation) {
+      case ir_unop_logic_not:
+	 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0));
+	 inst->conditional_mod = BRW_CONDITIONAL_Z;
+	 return;
+
+      case ir_binop_logic_xor:
+	 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]);
+	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
+	 return;
+
+      case ir_binop_logic_or:
+	 temp = dst_reg(this, glsl_type::bool_type);
+	 emit(BRW_OPCODE_OR, temp, op[0], op[1]);
+	 inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0));
+	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
+	 return;
+
+      case ir_binop_logic_and:
+	 temp = dst_reg(this, glsl_type::bool_type);
+	 emit(BRW_OPCODE_AND, temp, op[0], op[1]);
+	 inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0));
+	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
+	 return;
+
+      case ir_unop_f2b:
+	 inst = emit(BRW_OPCODE_IF, dst_null_f(), op[0], src_reg(0));
+	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
+	 return;
+
+      case ir_unop_i2b:
+	 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0));
+	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
+	 return;
+
+      case ir_binop_greater:
+      case ir_binop_gequal:
+      case ir_binop_less:
+      case ir_binop_lequal:
+      case ir_binop_equal:
+      case ir_binop_nequal:
+	 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]);
+	 inst->conditional_mod =
+	    brw_conditional_for_comparison(expr->operation);
+	 return;
+
+      case ir_binop_all_equal:
+	 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], op[1]);
+	 inst->conditional_mod = BRW_CONDITIONAL_Z;
+
+	 inst = emit(BRW_OPCODE_IF);
+	 inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
+	 return;
+
+      case ir_binop_any_nequal:
+	 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], op[1]);
+	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
+
+	 inst = emit(BRW_OPCODE_IF);
+	 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
+	 return;
+
+      case ir_unop_any:
+	 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0));
+	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
+
+	 inst = emit(BRW_OPCODE_IF);
+	 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
+	 return;
+
+      default:
+	 assert(!"not reached");
+	 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0));
+	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
+	 return;
+      }
+      return;
+   }
+
+   ir->condition->accept(this);
+
+   vec4_instruction *inst = emit(BRW_OPCODE_IF, dst_null_d(),
+			    this->result, src_reg(0));
+   inst->conditional_mod = BRW_CONDITIONAL_NZ;
+}
+
+void
+vec4_visitor::visit(ir_variable *ir)
+{
+   dst_reg *reg = NULL;
+
+   if (variable_storage(ir))
+      return;
+
+   switch (ir->mode) {
+   case ir_var_in:
+      reg = new(mem_ctx) dst_reg(ATTR, ir->location);
+      break;
+
+   case ir_var_out:
+      reg = new(mem_ctx) dst_reg(this, ir->type);
+
+      for (int i = 0; i < type_size(ir->type); i++) {
+	 output_reg[ir->location + i] = *reg;
+	 output_reg[ir->location + i].reg_offset = i;
+	 output_reg[ir->location + i].type = BRW_REGISTER_TYPE_F;
+      }
+      break;
+
+   case ir_var_auto:
+   case ir_var_temporary:
+      reg = new(mem_ctx) dst_reg(this, ir->type);
+      break;
+
+   case ir_var_uniform:
+      reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms);
+
+      if (!strncmp(ir->name, "gl_", 3)) {
+	 setup_builtin_uniform_values(ir);
+      } else {
+	 setup_uniform_values(ir->location, ir->type);
+      }
+      break;
+
+   default:
+      assert(!"not reached");
+   }
+
+   reg->type = brw_type_for_base_type(ir->type);
+   hash_table_insert(this->variable_ht, reg, ir);
+}
+
+void
+vec4_visitor::visit(ir_loop *ir)
+{
+   dst_reg counter;
+
+   /* We don't want debugging output to print the whole body of the
+    * loop as the annotation.
+    */
+   this->base_ir = NULL;
+
+   if (ir->counter != NULL) {
+      this->base_ir = ir->counter;
+      ir->counter->accept(this);
+      counter = *(variable_storage(ir->counter));
+
+      if (ir->from != NULL) {
+	 this->base_ir = ir->from;
+	 ir->from->accept(this);
+
+	 emit(BRW_OPCODE_MOV, counter, this->result);
+      }
+   }
+
+   emit(BRW_OPCODE_DO);
+
+   if (ir->to) {
+      this->base_ir = ir->to;
+      ir->to->accept(this);
+
+      vec4_instruction *inst = emit(BRW_OPCODE_CMP, dst_null_d(),
+				    src_reg(counter), this->result);
+      inst->conditional_mod = brw_conditional_for_comparison(ir->cmp);
+
+      inst = emit(BRW_OPCODE_BREAK);
+      inst->predicate = BRW_PREDICATE_NORMAL;
+   }
+
+   visit_instructions(&ir->body_instructions);
+
+
+   if (ir->increment) {
+      this->base_ir = ir->increment;
+      ir->increment->accept(this);
+      emit(BRW_OPCODE_ADD, counter, src_reg(counter), this->result);
+   }
+
+   emit(BRW_OPCODE_WHILE);
+}
+
+void
+vec4_visitor::visit(ir_loop_jump *ir)
+{
+   switch (ir->mode) {
+   case ir_loop_jump::jump_break:
+      emit(BRW_OPCODE_BREAK);
+      break;
+   case ir_loop_jump::jump_continue:
+      emit(BRW_OPCODE_CONTINUE);
+      break;
+   }
+}
+
+
+void
+vec4_visitor::visit(ir_function_signature *ir)
+{
+   assert(0);
+   (void)ir;
+}
+
+void
+vec4_visitor::visit(ir_function *ir)
+{
+   /* Ignore function bodies other than main() -- we shouldn't see calls to
+    * them since they should all be inlined.
+    */
+   if (strcmp(ir->name, "main") == 0) {
+      const ir_function_signature *sig;
+      exec_list empty;
+
+      sig = ir->matching_signature(&empty);
+
+      assert(sig);
+
+      visit_instructions(&sig->body);
+   }
+}
+
+GLboolean
+vec4_visitor::try_emit_sat(ir_expression *ir)
+{
+   ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
+   if (!sat_src)
+      return false;
+
+   sat_src->accept(this);
+   src_reg src = this->result;
+
+   this->result = src_reg(this, ir->type);
+   vec4_instruction *inst;
+   inst = emit(BRW_OPCODE_MOV, dst_reg(this->result), src);
+   inst->saturate = true;
+
+   return true;
+}
+
+void
+vec4_visitor::emit_bool_comparison(unsigned int op,
+				 dst_reg dst, src_reg src0, src_reg src1)
+{
+   /* original gen4 does destination conversion before comparison. */
+   if (intel->gen < 5)
+      dst.type = src0.type;
+
+   vec4_instruction *inst = emit(BRW_OPCODE_CMP, dst, src0, src1);
+   inst->conditional_mod = brw_conditional_for_comparison(op);
+
+   dst.type = BRW_REGISTER_TYPE_D;
+   emit(BRW_OPCODE_AND, dst, src_reg(dst), src_reg(0x1));
+}
+
+void
+vec4_visitor::visit(ir_expression *ir)
+{
+   unsigned int operand;
+   src_reg op[Elements(ir->operands)];
+   src_reg result_src;
+   dst_reg result_dst;
+   vec4_instruction *inst;
+
+   if (try_emit_sat(ir))
+      return;
+
+   for (operand = 0; operand < ir->get_num_operands(); operand++) {
+      this->result.file = BAD_FILE;
+      ir->operands[operand]->accept(this);
+      if (this->result.file == BAD_FILE) {
+	 printf("Failed to get tree for expression operand:\n");
+	 ir->operands[operand]->print();
+	 exit(1);
+      }
+      op[operand] = this->result;
+
+      /* Matrix expression operands should have been broken down to vector
+       * operations already.
+       */
+      assert(!ir->operands[operand]->type->is_matrix());
+   }
+
+   int vector_elements = ir->operands[0]->type->vector_elements;
+   if (ir->operands[1]) {
+      vector_elements = MAX2(vector_elements,
+			     ir->operands[1]->type->vector_elements);
+   }
+
+   this->result.file = BAD_FILE;
+
+   /* Storage for our result.  Ideally for an assignment we'd be using
+    * the actual storage for the result here, instead.
+    */
+   result_src = src_reg(this, ir->type);
+   /* convenience for the emit functions below. */
+   result_dst = dst_reg(result_src);
+   /* If nothing special happens, this is the result. */
+   this->result = result_src;
+   /* Limit writes to the channels that will be used by result_src later.
+    * This does limit this temp's use as a temporary for multi-instruction
+    * sequences.
+    */
+   result_dst.writemask = (1 << ir->type->vector_elements) - 1;
+
+   switch (ir->operation) {
+   case ir_unop_logic_not:
+      /* Note that BRW_OPCODE_NOT is not appropriate here, since it is
+       * ones complement of the whole register, not just bit 0.
+       */
+      emit(BRW_OPCODE_XOR, result_dst, op[0], src_reg(1));
+      break;
+   case ir_unop_neg:
+      op[0].negate = !op[0].negate;
+      this->result = op[0];
+      break;
+   case ir_unop_abs:
+      op[0].abs = true;
+      op[0].negate = false;
+      this->result = op[0];
+      break;
+
+   case ir_unop_sign:
+      emit(BRW_OPCODE_MOV, result_dst, src_reg(0.0f));
+
+      inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f));
+      inst->conditional_mod = BRW_CONDITIONAL_G;
+      inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1.0f));
+      inst->predicate = BRW_PREDICATE_NORMAL;
+
+      inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f));
+      inst->conditional_mod = BRW_CONDITIONAL_L;
+      inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(-1.0f));
+      inst->predicate = BRW_PREDICATE_NORMAL;
+
+      break;
+
+   case ir_unop_rcp:
+      emit_math(SHADER_OPCODE_RCP, result_dst, op[0]);
+      break;
+
+   case ir_unop_exp2:
+      emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]);
+      break;
+   case ir_unop_log2:
+      emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]);
+      break;
+   case ir_unop_exp:
+   case ir_unop_log:
+      assert(!"not reached: should be handled by ir_explog_to_explog2");
+      break;
+   case ir_unop_sin:
+   case ir_unop_sin_reduced:
+      emit_math(SHADER_OPCODE_SIN, result_dst, op[0]);
+      break;
+   case ir_unop_cos:
+   case ir_unop_cos_reduced:
+      emit_math(SHADER_OPCODE_COS, result_dst, op[0]);
+      break;
+
+   case ir_unop_dFdx:
+   case ir_unop_dFdy:
+      assert(!"derivatives not valid in vertex shader");
+      break;
+
+   case ir_unop_noise:
+      assert(!"not reached: should be handled by lower_noise");
+      break;
+
+   case ir_binop_add:
+      emit(BRW_OPCODE_ADD, result_dst, op[0], op[1]);
+      break;
+   case ir_binop_sub:
+      assert(!"not reached: should be handled by ir_sub_to_add_neg");
+      break;
+
+   case ir_binop_mul:
+      if (ir->type->is_integer()) {
+	 /* For integer multiplication, the MUL uses the low 16 bits
+	  * of one of the operands (src0 on gen6, src1 on gen7).  The
+	  * MACH accumulates in the contribution of the upper 16 bits
+	  * of that operand.
+	  *
+	  * FINISHME: Emit just the MUL if we know an operand is small
+	  * enough.
+	  */
+	 struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D);
+
+	 emit(BRW_OPCODE_MUL, acc, op[0], op[1]);
+	 emit(BRW_OPCODE_MACH, dst_null_d(), op[0], op[1]);
+	 emit(BRW_OPCODE_MOV, result_dst, src_reg(acc));
+      } else {
+	 emit(BRW_OPCODE_MUL, result_dst, op[0], op[1]);
+      }
+      break;
+   case ir_binop_div:
+      assert(!"not reached: should be handled by ir_div_to_mul_rcp");
+   case ir_binop_mod:
+      assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
+      break;
+
+   case ir_binop_less:
+   case ir_binop_greater:
+   case ir_binop_lequal:
+   case ir_binop_gequal:
+   case ir_binop_equal:
+   case ir_binop_nequal: {
+      dst_reg temp = result_dst;
+      /* original gen4 does implicit conversion before comparison. */
+      if (intel->gen < 5)
+	 temp.type = op[0].type;
+
+      inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
+      inst->conditional_mod = brw_conditional_for_comparison(ir->operation);
+      emit(BRW_OPCODE_AND, result_dst, this->result, src_reg(0x1));
+      break;
+   }
+
+   case ir_binop_all_equal:
+      /* "==" operator producing a scalar boolean. */
+      if (ir->operands[0]->type->is_vector() ||
+	  ir->operands[1]->type->is_vector()) {
+	 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]);
+	 inst->conditional_mod = BRW_CONDITIONAL_Z;
+
+	 emit(BRW_OPCODE_MOV, result_dst, src_reg(0));
+	 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1));
+	 inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
+      } else {
+	 dst_reg temp = result_dst;
+	 /* original gen4 does implicit conversion before comparison. */
+	 if (intel->gen < 5)
+	    temp.type = op[0].type;
+
+	 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
+	 inst->conditional_mod = BRW_CONDITIONAL_Z;
+	 emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1));
+      }
+      break;
+   case ir_binop_any_nequal:
+      /* "!=" operator producing a scalar boolean. */
+      if (ir->operands[0]->type->is_vector() ||
+	  ir->operands[1]->type->is_vector()) {
+	 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]);
+	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
+
+	 emit(BRW_OPCODE_MOV, result_dst, src_reg(0));
+	 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1));
+	 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
+      } else {
+	 dst_reg temp = result_dst;
+	 /* original gen4 does implicit conversion before comparison. */
+	 if (intel->gen < 5)
+	    temp.type = op[0].type;
+
+	 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
+	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
+	 emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1));
+      }
+      break;
+
+   case ir_unop_any:
+      inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0));
+      inst->conditional_mod = BRW_CONDITIONAL_NZ;
+
+      emit(BRW_OPCODE_MOV, result_dst, src_reg(0));
+
+      inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1));
+      inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
+      break;
+
+   case ir_binop_logic_xor:
+      emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]);
+      break;
+
+   case ir_binop_logic_or:
+      emit(BRW_OPCODE_OR, result_dst, op[0], op[1]);
+      break;
+
+   case ir_binop_logic_and:
+      emit(BRW_OPCODE_AND, result_dst, op[0], op[1]);
+      break;
+
+   case ir_binop_dot:
+      assert(ir->operands[0]->type->is_vector());
+      assert(ir->operands[0]->type == ir->operands[1]->type);
+      emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements);
+      break;
+
+   case ir_unop_sqrt:
+      emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]);
+      break;
+   case ir_unop_rsq:
+      emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]);
+      break;
+   case ir_unop_i2f:
+   case ir_unop_i2u:
+   case ir_unop_u2i:
+   case ir_unop_u2f:
+   case ir_unop_b2f:
+   case ir_unop_b2i:
+   case ir_unop_f2i:
+      emit(BRW_OPCODE_MOV, result_dst, op[0]);
+      break;
+   case ir_unop_f2b:
+   case ir_unop_i2b: {
+      dst_reg temp = result_dst;
+      /* original gen4 does implicit conversion before comparison. */
+      if (intel->gen < 5)
+	 temp.type = op[0].type;
+
+      inst = emit(BRW_OPCODE_CMP, temp, op[0], src_reg(0.0f));
+      inst->conditional_mod = BRW_CONDITIONAL_NZ;
+      inst = emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(1));
+      break;
+   }
+
+   case ir_unop_trunc:
+      emit(BRW_OPCODE_RNDZ, result_dst, op[0]);
+      break;
+   case ir_unop_ceil:
+      op[0].negate = !op[0].negate;
+      inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]);
+      this->result.negate = true;
+      break;
+   case ir_unop_floor:
+      inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]);
+      break;
+   case ir_unop_fract:
+      inst = emit(BRW_OPCODE_FRC, result_dst, op[0]);
+      break;
+   case ir_unop_round_even:
+      emit(BRW_OPCODE_RNDE, result_dst, op[0]);
+      break;
+
+   case ir_binop_min:
+      inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]);
+      inst->conditional_mod = BRW_CONDITIONAL_L;
+
+      inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
+      inst->predicate = BRW_PREDICATE_NORMAL;
+      break;
+   case ir_binop_max:
+      inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]);
+      inst->conditional_mod = BRW_CONDITIONAL_G;
+
+      inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
+      inst->predicate = BRW_PREDICATE_NORMAL;
+      break;
+
+   case ir_binop_pow:
+      emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]);
+      break;
+
+   case ir_unop_bit_not:
+      inst = emit(BRW_OPCODE_NOT, result_dst, op[0]);
+      break;
+   case ir_binop_bit_and:
+      inst = emit(BRW_OPCODE_AND, result_dst, op[0], op[1]);
+      break;
+   case ir_binop_bit_xor:
+      inst = emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]);
+      break;
+   case ir_binop_bit_or:
+      inst = emit(BRW_OPCODE_OR, result_dst, op[0], op[1]);
+      break;
+
+   case ir_binop_lshift:
+   case ir_binop_rshift:
+      assert(!"GLSL 1.30 features unsupported");
+      break;
+
+   case ir_quadop_vector:
+      assert(!"not reached: should be handled by lower_quadop_vector");
+      break;
+   }
+}
+
+
+void
+vec4_visitor::visit(ir_swizzle *ir)
+{
+   src_reg src;
+   int i = 0;
+   int swizzle[4];
+
+   /* Note that this is only swizzles in expressions, not those on the left
+    * hand side of an assignment, which do write masking.  See ir_assignment
+    * for that.
+    */
+
+   ir->val->accept(this);
+   src = this->result;
+   assert(src.file != BAD_FILE);
+
+   for (i = 0; i < ir->type->vector_elements; i++) {
+      switch (i) {
+      case 0:
+	 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.x);
+	 break;
+      case 1:
+	 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.y);
+	 break;
+      case 2:
+	 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.z);
+	 break;
+      case 3:
+	 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.w);
+	    break;
+      }
+   }
+   for (; i < 4; i++) {
+      /* Replicate the last channel out. */
+      swizzle[i] = swizzle[ir->type->vector_elements - 1];
+   }
+
+   src.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
+
+   this->result = src;
+}
+
+void
+vec4_visitor::visit(ir_dereference_variable *ir)
+{
+   const struct glsl_type *type = ir->type;
+   dst_reg *reg = variable_storage(ir->var);
+
+   if (!reg) {
+      fail("Failed to find variable storage for %s\n", ir->var->name);
+      this->result = src_reg(brw_null_reg());
+      return;
+   }
+
+   this->result = src_reg(*reg);
+
+   if (type->is_scalar() || type->is_vector() || type->is_matrix())
+      this->result.swizzle = swizzle_for_size(type->vector_elements);
+}
+
+void
+vec4_visitor::visit(ir_dereference_array *ir)
+{
+   ir_constant *constant_index;
+   src_reg src;
+   int element_size = type_size(ir->type);
+
+   constant_index = ir->array_index->constant_expression_value();
+
+   ir->array->accept(this);
+   src = this->result;
+
+   if (constant_index) {
+      src.reg_offset += constant_index->value.i[0] * element_size;
+   } else {
+      /* Variable index array dereference.  It eats the "vec4" of the
+       * base of the array and an index that offsets the Mesa register
+       * index.
+       */
+      ir->array_index->accept(this);
+
+      src_reg index_reg;
+
+      if (element_size == 1) {
+	 index_reg = this->result;
+      } else {
+	 index_reg = src_reg(this, glsl_type::int_type);
+
+	 emit(BRW_OPCODE_MUL, dst_reg(index_reg),
+	      this->result, src_reg(element_size));
+      }
+
+      if (src.reladdr) {
+	 src_reg temp = src_reg(this, glsl_type::int_type);
+
+	 emit(BRW_OPCODE_ADD, dst_reg(temp), *src.reladdr, index_reg);
+
+	 index_reg = temp;
+      }
+
+      src.reladdr = ralloc(mem_ctx, src_reg);
+      memcpy(src.reladdr, &index_reg, sizeof(index_reg));
+   }
+
+   /* If the type is smaller than a vec4, replicate the last channel out. */
+   if (ir->type->is_scalar() || ir->type->is_vector())
+      src.swizzle = swizzle_for_size(ir->type->vector_elements);
+   else
+      src.swizzle = BRW_SWIZZLE_NOOP;
+   src.type = brw_type_for_base_type(ir->type);
+
+   this->result = src;
+}
+
+void
+vec4_visitor::visit(ir_dereference_record *ir)
+{
+   unsigned int i;
+   const glsl_type *struct_type = ir->record->type;
+   int offset = 0;
+
+   ir->record->accept(this);
+
+   for (i = 0; i < struct_type->length; i++) {
+      if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
+	 break;
+      offset += type_size(struct_type->fields.structure[i].type);
+   }
+
+   /* If the type is smaller than a vec4, replicate the last channel out. */
+   if (ir->type->is_scalar() || ir->type->is_vector())
+      this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
+   else
+      this->result.swizzle = BRW_SWIZZLE_NOOP;
+   this->result.type = brw_type_for_base_type(ir->type);
+
+   this->result.reg_offset += offset;
+}
+
+/**
+ * We want to be careful in assignment setup to hit the actual storage
+ * instead of potentially using a temporary like we might with the
+ * ir_dereference handler.
+ */
+static dst_reg
+get_assignment_lhs(ir_dereference *ir, vec4_visitor *v)
+{
+   /* The LHS must be a dereference.  If the LHS is a variable indexed array
+    * access of a vector, it must be separated into a series conditional moves
+    * before reaching this point (see ir_vec_index_to_cond_assign).
+    */
+   assert(ir->as_dereference());
+   ir_dereference_array *deref_array = ir->as_dereference_array();
+   if (deref_array) {
+      assert(!deref_array->array->type->is_vector());
+   }
+
+   /* Use the rvalue deref handler for the most part.  We'll ignore
+    * swizzles in it and write swizzles using writemask, though.
+    */
+   ir->accept(v);
+   return dst_reg(v->result);
+}
+
+void
+vec4_visitor::emit_block_move(dst_reg *dst, src_reg *src,
+			      const struct glsl_type *type, bool predicated)
+{
+   if (type->base_type == GLSL_TYPE_STRUCT) {
+      for (unsigned int i = 0; i < type->length; i++) {
+	 emit_block_move(dst, src, type->fields.structure[i].type, predicated);
+      }
+      return;
+   }
+
+   if (type->is_array()) {
+      for (unsigned int i = 0; i < type->length; i++) {
+	 emit_block_move(dst, src, type->fields.array, predicated);
+      }
+      return;
+   }
+
+   if (type->is_matrix()) {
+      const struct glsl_type *vec_type;
+
+      vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
+					 type->vector_elements, 1);
+
+      for (int i = 0; i < type->matrix_columns; i++) {
+	 emit_block_move(dst, src, vec_type, predicated);
+      }
+      return;
+   }
+
+   assert(type->is_scalar() || type->is_vector());
+
+   dst->type = brw_type_for_base_type(type);
+   src->type = dst->type;
+
+   dst->writemask = (1 << type->vector_elements) - 1;
+
+   /* Do we need to worry about swizzling a swizzle? */
+   assert(src->swizzle = BRW_SWIZZLE_NOOP);
+   src->swizzle = swizzle_for_size(type->vector_elements);
+
+   vec4_instruction *inst = emit(BRW_OPCODE_MOV, *dst, *src);
+   if (predicated)
+      inst->predicate = BRW_PREDICATE_NORMAL;
+
+   dst->reg_offset++;
+   src->reg_offset++;
+}
+
+
+/* If the RHS processing resulted in an instruction generating a
+ * temporary value, and it would be easy to rewrite the instruction to
+ * generate its result right into the LHS instead, do so.  This ends
+ * up reliably removing instructions where it can be tricky to do so
+ * later without real UD chain information.
+ */
+bool
+vec4_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
+				     dst_reg dst,
+				     src_reg src,
+				     vec4_instruction *pre_rhs_inst,
+				     vec4_instruction *last_rhs_inst)
+{
+   /* This could be supported, but it would take more smarts. */
+   if (ir->condition)
+      return false;
+
+   if (pre_rhs_inst == last_rhs_inst)
+      return false; /* No instructions generated to work with. */
+
+   /* Make sure the last instruction generated our source reg. */
+   if (src.file != GRF ||
+       src.file != last_rhs_inst->dst.file ||
+       src.reg != last_rhs_inst->dst.reg ||
+       src.reg_offset != last_rhs_inst->dst.reg_offset ||
+       src.reladdr ||
+       src.abs ||
+       src.negate ||
+       last_rhs_inst->predicate != BRW_PREDICATE_NONE)
+      return false;
+
+   /* Check that that last instruction fully initialized the channels
+    * we want to use, in the order we want to use them.  We could
+    * potentially reswizzle the operands of many instructions so that
+    * we could handle out of order channels, but don't yet.
+    */
+   for (int i = 0; i < 4; i++) {
+      if (dst.writemask & (1 << i)) {
+	 if (!(last_rhs_inst->dst.writemask & (1 << i)))
+	    return false;
+
+	 if (BRW_GET_SWZ(src.swizzle, i) != i)
+	    return false;
+      }
+   }
+
+   /* Success!  Rewrite the instruction. */
+   last_rhs_inst->dst.file = dst.file;
+   last_rhs_inst->dst.reg = dst.reg;
+   last_rhs_inst->dst.reg_offset = dst.reg_offset;
+   last_rhs_inst->dst.reladdr = dst.reladdr;
+   last_rhs_inst->dst.writemask &= dst.writemask;
+
+   return true;
+}
+
+void
+vec4_visitor::visit(ir_assignment *ir)
+{
+   dst_reg dst = get_assignment_lhs(ir->lhs, this);
+
+   if (!ir->lhs->type->is_scalar() &&
+       !ir->lhs->type->is_vector()) {
+      ir->rhs->accept(this);
+      src_reg src = this->result;
+
+      if (ir->condition) {
+	 emit_bool_to_cond_code(ir->condition);
+      }
+
+      emit_block_move(&dst, &src, ir->rhs->type, ir->condition != NULL);
+      return;
+   }
+
+   /* Now we're down to just a scalar/vector with writemasks. */
+   int i;
+
+   vec4_instruction *pre_rhs_inst, *last_rhs_inst;
+   pre_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
+
+   ir->rhs->accept(this);
+
+   last_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
+
+   src_reg src = this->result;
+
+   int swizzles[4];
+   int first_enabled_chan = 0;
+   int src_chan = 0;
+
+   assert(ir->lhs->type->is_vector() ||
+	  ir->lhs->type->is_scalar());
+   dst.writemask = ir->write_mask;
+
+   for (int i = 0; i < 4; i++) {
+      if (dst.writemask & (1 << i)) {
+	 first_enabled_chan = BRW_GET_SWZ(src.swizzle, i);
+	 break;
+      }
+   }
+
+   /* Swizzle a small RHS vector into the channels being written.
+    *
+    * glsl ir treats write_mask as dictating how many channels are
+    * present on the RHS while in our instructions we need to make
+    * those channels appear in the slots of the vec4 they're written to.
+    */
+   for (int i = 0; i < 4; i++) {
+      if (dst.writemask & (1 << i))
+	 swizzles[i] = BRW_GET_SWZ(src.swizzle, src_chan++);
+      else
+	 swizzles[i] = first_enabled_chan;
+   }
+   src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
+			      swizzles[2], swizzles[3]);
+
+   if (try_rewrite_rhs_to_dst(ir, dst, src, pre_rhs_inst, last_rhs_inst)) {
+      return;
+   }
+
+   if (ir->condition) {
+      emit_bool_to_cond_code(ir->condition);
+   }
+
+   for (i = 0; i < type_size(ir->lhs->type); i++) {
+      vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst, src);
+
+      if (ir->condition)
+	 inst->predicate = BRW_PREDICATE_NORMAL;
+
+      dst.reg_offset++;
+      src.reg_offset++;
+   }
+}
+
+void
+vec4_visitor::emit_constant_values(dst_reg *dst, ir_constant *ir)
+{
+   if (ir->type->base_type == GLSL_TYPE_STRUCT) {
+      foreach_list(node, &ir->components) {
+	 ir_constant *field_value = (ir_constant *)node;
+
+	 emit_constant_values(dst, field_value);
+      }
+      return;
+   }
+
+   if (ir->type->is_array()) {
+      for (unsigned int i = 0; i < ir->type->length; i++) {
+	 emit_constant_values(dst, ir->array_elements[i]);
+      }
+      return;
+   }
+
+   if (ir->type->is_matrix()) {
+      for (int i = 0; i < ir->type->matrix_columns; i++) {
+	 for (int j = 0; j < ir->type->vector_elements; j++) {
+	    dst->writemask = 1 << j;
+	    dst->type = BRW_REGISTER_TYPE_F;
+
+	    emit(BRW_OPCODE_MOV, *dst,
+		 src_reg(ir->value.f[i * ir->type->vector_elements + j]));
+	 }
+	 dst->reg_offset++;
+      }
+      return;
+   }
+
+   for (int i = 0; i < ir->type->vector_elements; i++) {
+      dst->writemask = 1 << i;
+      dst->type = brw_type_for_base_type(ir->type);
+
+      switch (ir->type->base_type) {
+      case GLSL_TYPE_FLOAT:
+	 emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.f[i]));
+	 break;
+      case GLSL_TYPE_INT:
+	 emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.i[i]));
+	 break;
+      case GLSL_TYPE_UINT:
+	 emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.u[i]));
+	 break;
+      case GLSL_TYPE_BOOL:
+	 emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.b[i]));
+	 break;
+      default:
+	 assert(!"Non-float/uint/int/bool constant");
+	 break;
+      }
+   }
+   dst->reg_offset++;
+}
+
+void
+vec4_visitor::visit(ir_constant *ir)
+{
+   dst_reg dst = dst_reg(this, ir->type);
+   this->result = src_reg(dst);
+
+   emit_constant_values(&dst, ir);
+}
+
+void
+vec4_visitor::visit(ir_call *ir)
+{
+   assert(!"not reached");
+}
+
+void
+vec4_visitor::visit(ir_texture *ir)
+{
+   /* FINISHME: Implement vertex texturing.
+    *
+    * With 0 vertex samplers available, the linker will reject
+    * programs that do vertex texturing, but after our visitor has
+    * run.
+    */
+}
+
+void
+vec4_visitor::visit(ir_return *ir)
+{
+   assert(!"not reached");
+}
+
+void
+vec4_visitor::visit(ir_discard *ir)
+{
+   assert(!"not reached");
+}
+
+void
+vec4_visitor::visit(ir_if *ir)
+{
+   /* Don't point the annotation at the if statement, because then it plus
+    * the then and else blocks get printed.
+    */
+   this->base_ir = ir->condition;
+
+   if (intel->gen == 6) {
+      emit_if_gen6(ir);
+   } else {
+      emit_bool_to_cond_code(ir->condition);
+      vec4_instruction *inst = emit(BRW_OPCODE_IF);
+      inst->predicate = BRW_PREDICATE_NORMAL;
+   }
+
+   visit_instructions(&ir->then_instructions);
+
+   if (!ir->else_instructions.is_empty()) {
+      this->base_ir = ir->condition;
+      emit(BRW_OPCODE_ELSE);
+
+      visit_instructions(&ir->else_instructions);
+   }
+
+   this->base_ir = ir->condition;
+   emit(BRW_OPCODE_ENDIF);
+}
+
+int
+vec4_visitor::emit_vue_header_gen4(int header_mrf)
+{
+   /* Get the position */
+   src_reg pos = src_reg(output_reg[VERT_RESULT_HPOS]);
+
+   /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */
+   dst_reg ndc = dst_reg(this, glsl_type::vec4_type);
+
+   current_annotation = "NDC";
+   dst_reg ndc_w = ndc;
+   ndc_w.writemask = WRITEMASK_W;
+   src_reg pos_w = pos;
+   pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W);
+   emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w);
+
+   dst_reg ndc_xyz = ndc;
+   ndc_xyz.writemask = WRITEMASK_XYZ;
+
+   emit(BRW_OPCODE_MUL, ndc_xyz, pos, src_reg(ndc_w));
+
+   if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) ||
+       c->key.nr_userclip || brw->has_negative_rhw_bug) {
+      dst_reg header1 = dst_reg(this, glsl_type::uvec4_type);
+      GLuint i;
+
+      emit(BRW_OPCODE_MOV, header1, 0u);
+
+      if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
+	 assert(!"finishme: psiz");
+	 src_reg psiz;
+
+	 header1.writemask = WRITEMASK_W;
+	 emit(BRW_OPCODE_MUL, header1, psiz, 1u << 11);
+	 emit(BRW_OPCODE_AND, header1, src_reg(header1), 0x7ff << 8);
+      }
+
+      for (i = 0; i < c->key.nr_userclip; i++) {
+	 vec4_instruction *inst;
+
+	 inst = emit(BRW_OPCODE_DP4, dst_reg(brw_null_reg()),
+		     pos, src_reg(c->userplane[i]));
+	 inst->conditional_mod = BRW_CONDITIONAL_L;
+
+	 emit(BRW_OPCODE_OR, header1, src_reg(header1), 1u << i);
+	 inst->predicate = BRW_PREDICATE_NORMAL;
+      }
+
+      /* i965 clipping workaround:
+       * 1) Test for -ve rhw
+       * 2) If set,
+       *      set ndc = (0,0,0,0)
+       *      set ucp[6] = 1
+       *
+       * Later, clipping will detect ucp[6] and ensure the primitive is
+       * clipped against all fixed planes.
+       */
+      if (brw->has_negative_rhw_bug) {
+#if 0
+	 /* FINISHME */
+	 brw_CMP(p,
+		 vec8(brw_null_reg()),
+		 BRW_CONDITIONAL_L,
+		 brw_swizzle1(ndc, 3),
+		 brw_imm_f(0));
+
+	 brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6));
+	 brw_MOV(p, ndc, brw_imm_f(0));
+	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+#endif
+      }
+
+      header1.writemask = WRITEMASK_XYZW;
+      emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(header1));
+   } else {
+      emit(BRW_OPCODE_MOV, retype(brw_message_reg(header_mrf++),
+				  BRW_REGISTER_TYPE_UD), 0u);
+   }
+
+   if (intel->gen == 5) {
+      /* There are 20 DWs (D0-D19) in VUE header on Ironlake:
+       * dword 0-3 (m1) of the header is indices, point width, clip flags.
+       * dword 4-7 (m2) is the ndc position (set above)
+       * dword 8-11 (m3) of the vertex header is the 4D space position
+       * dword 12-19 (m4,m5) of the vertex header is the user clip distance.
+       * m6 is a pad so that the vertex element data is aligned
+       * m7 is the first vertex data we fill.
+       */
+      current_annotation = "NDC";
+      emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc));
+
+      current_annotation = "gl_Position";
+      emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos);
+
+      /* user clip distance. */
+      header_mrf += 2;
+
+      /* Pad so that vertex element data is aligned. */
+      header_mrf++;
+   } else {
+      /* There are 8 dwords in VUE header pre-Ironlake:
+       * dword 0-3 (m1) is indices, point width, clip flags.
+       * dword 4-7 (m2) is ndc position (set above)
+       *
+       * dword 8-11 (m3) is the first vertex data.
+       */
+      current_annotation = "NDC";
+      emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc));
+
+      current_annotation = "gl_Position";
+      emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos);
+   }
+
+   return header_mrf;
+}
+
+int
+vec4_visitor::emit_vue_header_gen6(int header_mrf)
+{
+   struct brw_reg reg;
+
+   /* There are 8 or 16 DWs (D0-D15) in VUE header on Sandybridge:
+    * dword 0-3 (m2) of the header is indices, point width, clip flags.
+    * dword 4-7 (m3) is the 4D space position
+    * dword 8-15 (m4,m5) of the vertex header is the user clip distance if
+    * enabled.
+    *
+    * m4 or 6 is the first vertex element data we fill.
+    */
+
+   current_annotation = "indices, point width, clip flags";
+   reg = brw_message_reg(header_mrf++);
+   emit(BRW_OPCODE_MOV, retype(reg, BRW_REGISTER_TYPE_D), src_reg(0));
+   if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
+      emit(BRW_OPCODE_MOV, brw_writemask(reg, WRITEMASK_W),
+	   src_reg(output_reg[VERT_RESULT_PSIZ]));
+   }
+
+   current_annotation = "gl_Position";
+   emit(BRW_OPCODE_MOV,
+	brw_message_reg(header_mrf++), src_reg(output_reg[VERT_RESULT_HPOS]));
+
+   current_annotation = "user clip distances";
+   if (c->key.nr_userclip) {
+      for (int i = 0; i < c->key.nr_userclip; i++) {
+	 struct brw_reg m;
+	 if (i < 4)
+	    m = brw_message_reg(header_mrf);
+	 else
+	    m = brw_message_reg(header_mrf + 1);
+
+	 emit(BRW_OPCODE_DP4,
+	      dst_reg(brw_writemask(m, 1 << (i & 3))),
+	      src_reg(c->userplane[i]));
+      }
+      header_mrf += 2;
+   }
+
+   current_annotation = NULL;
+
+   return header_mrf;
+}
+
+static int
+align_interleaved_urb_mlen(struct brw_context *brw, int mlen)
+{
+   struct intel_context *intel = &brw->intel;
+
+   if (intel->gen >= 6) {
+      /* URB data written (does not include the message header reg) must
+       * be a multiple of 256 bits, or 2 VS registers.  See vol5c.5,
+       * section 5.4.3.2.2: URB_INTERLEAVED.
+       *
+       * URB entries are allocated on a multiple of 1024 bits, so an
+       * extra 128 bits written here to make the end align to 256 is
+       * no problem.
+       */
+      if ((mlen % 2) != 1)
+	 mlen++;
+   }
+
+   return mlen;
+}
+
+/**
+ * Generates the VUE payload plus the 1 or 2 URB write instructions to
+ * complete the VS thread.
+ *
+ * The VUE layout is documented in Volume 2a.
+ */
+void
+vec4_visitor::emit_urb_writes()
+{
+   /* MRF 0 is reserved for the debugger, so start with message header
+    * in MRF 1.
+    */
+   int base_mrf = 1;
+   int mrf = base_mrf;
+   int urb_entry_size;
+   uint64_t outputs_remaining = c->prog_data.outputs_written;
+   /* In the process of generating our URB write message contents, we
+    * may need to unspill a register or load from an array.  Those
+    * reads would use MRFs 14-15.
+    */
+   int max_usable_mrf = 13;
+
+   /* FINISHME: edgeflag */
+
+   /* First mrf is the g0-based message header containing URB handles and such,
+    * which is implied in VS_OPCODE_URB_WRITE.
+    */
+   mrf++;
+
+   if (intel->gen >= 6) {
+      mrf = emit_vue_header_gen6(mrf);
+   } else {
+      mrf = emit_vue_header_gen4(mrf);
+   }
+
+   /* Set up the VUE data for the first URB write */
+   int attr;
+   for (attr = 0; attr < VERT_RESULT_MAX; attr++) {
+      if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr)))
+	 continue;
+
+      outputs_remaining &= ~BITFIELD64_BIT(attr);
+
+      /* This is set up in the VUE header. */
+      if (attr == VERT_RESULT_HPOS)
+	 continue;
+
+      /* This is loaded into the VUE header, and thus doesn't occupy
+       * an attribute slot.
+       */
+      if (attr == VERT_RESULT_PSIZ)
+	 continue;
+
+      vec4_instruction *inst = emit(BRW_OPCODE_MOV, brw_message_reg(mrf++),
+				    src_reg(output_reg[attr]));
+
+      if ((attr == VERT_RESULT_COL0 ||
+	   attr == VERT_RESULT_COL1 ||
+	   attr == VERT_RESULT_BFC0 ||
+	   attr == VERT_RESULT_BFC1) &&
+	  c->key.clamp_vertex_color) {
+	 inst->saturate = true;
+      }
+
+      /* If this was MRF 15, we can't fit anything more into this URB
+       * WRITE.  Note that base_mrf of 1 means that MRF 15 is an
+       * even-numbered amount of URB write data, which will meet
+       * gen6's requirements for length alignment.
+       */
+      if (mrf > max_usable_mrf) {
+	 attr++;
+	 break;
+      }
+   }
+
+   vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
+   inst->base_mrf = base_mrf;
+   inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
+   inst->eot = !outputs_remaining;
+
+   urb_entry_size = mrf - base_mrf;
+
+   /* Optional second URB write */
+   if (outputs_remaining) {
+      mrf = base_mrf + 1;
+
+      for (; attr < VERT_RESULT_MAX; attr++) {
+	 if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr)))
+	    continue;
+
+	 assert(mrf < max_usable_mrf);
+
+	 emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), src_reg(output_reg[attr]));
+      }
+
+      inst = emit(VS_OPCODE_URB_WRITE);
+      inst->base_mrf = base_mrf;
+      inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
+      inst->eot = true;
+      /* URB destination offset.  In the previous write, we got MRFs
+       * 2-13 minus the one header MRF, so 12 regs.  URB offset is in
+       * URB row increments, and each of our MRFs is half of one of
+       * those, since we're doing interleaved writes.
+       */
+      inst->offset = (max_usable_mrf - base_mrf) / 2;
+
+      urb_entry_size += mrf - base_mrf;
+   }
+
+   if (intel->gen == 6)
+      c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 8) / 8;
+   else
+      c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 4) / 4;
+}
+
+src_reg
+vec4_visitor::get_scratch_offset(vec4_instruction *inst,
+				 src_reg *reladdr, int reg_offset)
+{
+   /* Because we store the values to scratch interleaved like our
+    * vertex data, we need to scale the vec4 index by 2.
+    */
+   int message_header_scale = 2;
+
+   /* Pre-gen6, the message header uses byte offsets instead of vec4
+    * (16-byte) offset units.
+    */
+   if (intel->gen < 6)
+      message_header_scale *= 16;
+
+   if (reladdr) {
+      src_reg index = src_reg(this, glsl_type::int_type);
+
+      vec4_instruction *add = emit(BRW_OPCODE_ADD,
+				   dst_reg(index),
+				   *reladdr,
+				   src_reg(reg_offset));
+      /* Move our new instruction from the tail to its correct place. */
+      add->remove();
+      inst->insert_before(add);
+
+      vec4_instruction *mul = emit(BRW_OPCODE_MUL, dst_reg(index),
+				   index, src_reg(message_header_scale));
+      mul->remove();
+      inst->insert_before(mul);
+
+      return index;
+   } else {
+      return src_reg(reg_offset * message_header_scale);
+   }
+}
+
+/**
+ * Emits an instruction before @inst to load the value named by @orig_src
+ * from scratch space at @base_offset to @temp.
+ */
+void
+vec4_visitor::emit_scratch_read(vec4_instruction *inst,
+				dst_reg temp, src_reg orig_src,
+				int base_offset)
+{
+   int reg_offset = base_offset + orig_src.reg_offset;
+   src_reg index = get_scratch_offset(inst, orig_src.reladdr, reg_offset);
+
+   vec4_instruction *scratch_read_inst = emit(VS_OPCODE_SCRATCH_READ,
+					      temp, index);
+
+   scratch_read_inst->base_mrf = 14;
+   scratch_read_inst->mlen = 1;
+   /* Move our instruction from the tail to its correct place. */
+   scratch_read_inst->remove();
+   inst->insert_before(scratch_read_inst);
+}
+
+/**
+ * Emits an instruction after @inst to store the value to be written
+ * to @orig_dst to scratch space at @base_offset, from @temp.
+ */
+void
+vec4_visitor::emit_scratch_write(vec4_instruction *inst,
+				 src_reg temp, dst_reg orig_dst,
+				 int base_offset)
+{
+   int reg_offset = base_offset + orig_dst.reg_offset;
+   src_reg index = get_scratch_offset(inst, orig_dst.reladdr, reg_offset);
+
+   dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0),
+				       orig_dst.writemask));
+   vec4_instruction *scratch_write_inst = emit(VS_OPCODE_SCRATCH_WRITE,
+					       dst, temp, index);
+   scratch_write_inst->base_mrf = 13;
+   scratch_write_inst->mlen = 2;
+   scratch_write_inst->predicate = inst->predicate;
+   /* Move our instruction from the tail to its correct place. */
+   scratch_write_inst->remove();
+   inst->insert_after(scratch_write_inst);
+}
+
+/**
+ * We can't generally support array access in GRF space, because a
+ * single instruction's destination can only span 2 contiguous
+ * registers.  So, we send all GRF arrays that get variable index
+ * access to scratch space.
+ */
+void
+vec4_visitor::move_grf_array_access_to_scratch()
+{
+   int scratch_loc[this->virtual_grf_count];
+
+   for (int i = 0; i < this->virtual_grf_count; i++) {
+      scratch_loc[i] = -1;
+   }
+
+   /* First, calculate the set of virtual GRFs that need to be punted
+    * to scratch due to having any array access on them, and where in
+    * scratch.
+    */
+   foreach_list(node, &this->instructions) {
+      vec4_instruction *inst = (vec4_instruction *)node;
+
+      if (inst->dst.file == GRF && inst->dst.reladdr &&
+	  scratch_loc[inst->dst.reg] == -1) {
+	 scratch_loc[inst->dst.reg] = c->last_scratch;
+	 c->last_scratch += this->virtual_grf_sizes[inst->dst.reg] * 8 * 4;
+      }
+
+      for (int i = 0 ; i < 3; i++) {
+	 src_reg *src = &inst->src[i];
+
+	 if (src->file == GRF && src->reladdr &&
+	     scratch_loc[src->reg] == -1) {
+	    scratch_loc[src->reg] = c->last_scratch;
+	    c->last_scratch += this->virtual_grf_sizes[src->reg] * 8 * 4;
+	 }
+      }
+   }
+
+   /* Now, for anything that will be accessed through scratch, rewrite
+    * it to load/store.  Note that this is a _safe list walk, because
+    * we may generate a new scratch_write instruction after the one
+    * we're processing.
+    */
+   foreach_list_safe(node, &this->instructions) {
+      vec4_instruction *inst = (vec4_instruction *)node;
+
+      /* Set up the annotation tracking for new generated instructions. */
+      base_ir = inst->ir;
+      current_annotation = inst->annotation;
+
+      if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1) {
+	 src_reg temp = src_reg(this, glsl_type::vec4_type);
+
+	 emit_scratch_write(inst, temp, inst->dst, scratch_loc[inst->dst.reg]);
+
+	 inst->dst.file = temp.file;
+	 inst->dst.reg = temp.reg;
+	 inst->dst.reg_offset = temp.reg_offset;
+	 inst->dst.reladdr = NULL;
+      }
+
+      for (int i = 0 ; i < 3; i++) {
+	 if (inst->src[i].file != GRF || scratch_loc[inst->src[i].reg] == -1)
+	    continue;
+
+	 dst_reg temp = dst_reg(this, glsl_type::vec4_type);
+
+	 emit_scratch_read(inst, temp, inst->src[i],
+			   scratch_loc[inst->src[i].reg]);
+
+	 inst->src[i].file = temp.file;
+	 inst->src[i].reg = temp.reg;
+	 inst->src[i].reg_offset = temp.reg_offset;
+	 inst->src[i].reladdr = NULL;
+      }
+   }
+}
+
+
+vec4_visitor::vec4_visitor(struct brw_vs_compile *c,
+			   struct gl_shader_program *prog,
+			   struct brw_shader *shader)
+{
+   this->c = c;
+   this->p = &c->func;
+   this->brw = p->brw;
+   this->intel = &brw->intel;
+   this->ctx = &intel->ctx;
+   this->prog = prog;
+   this->shader = shader;
+
+   this->mem_ctx = ralloc_context(NULL);
+   this->failed = false;
+
+   this->base_ir = NULL;
+   this->current_annotation = NULL;
+
+   this->c = c;
+   this->vp = prog->VertexProgram;
+   this->prog_data = &c->prog_data;
+
+   this->variable_ht = hash_table_ctor(0,
+				       hash_table_pointer_hash,
+				       hash_table_pointer_compare);
+
+   this->virtual_grf_def = NULL;
+   this->virtual_grf_use = NULL;
+   this->virtual_grf_sizes = NULL;
+   this->virtual_grf_count = 0;
+   this->virtual_grf_array_size = 0;
+   this->live_intervals_valid = false;
+
+   this->uniforms = 0;
+
+   this->variable_ht = hash_table_ctor(0,
+				       hash_table_pointer_hash,
+				       hash_table_pointer_compare);
+}
+
+vec4_visitor::~vec4_visitor()
+{
+   ralloc_free(this->mem_ctx);
+   hash_table_dtor(this->variable_ht);
+}
+
+
+void
+vec4_visitor::fail(const char *format, ...)
+{
+   va_list va;
+   char *msg;
+
+   if (failed)
+      return;
+
+   failed = true;
+
+   va_start(va, format);
+   msg = ralloc_vasprintf(mem_ctx, format, va);
+   va_end(va);
+   msg = ralloc_asprintf(mem_ctx, "VS compile failed: %s\n", msg);
+
+   this->fail_msg = msg;
+
+   if (INTEL_DEBUG & DEBUG_VS) {
+      fprintf(stderr, "%s",  msg);
+   }
+}
+
+} /* namespace brw */
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index a9ad531..3373e70 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -30,6 +30,7 @@
   */
            
 
+#include "main/compiler.h"
 #include "brw_context.h"
 #include "brw_vs.h"
 #include "brw_util.h"
@@ -39,17 +40,21 @@
 
 #include "../glsl/ralloc.h"
 
-static void do_vs_prog( struct brw_context *brw, 
-			struct brw_vertex_program *vp,
-			struct brw_vs_prog_key *key )
+static bool
+do_vs_prog(struct brw_context *brw,
+	   struct gl_shader_program *prog,
+	   struct brw_vertex_program *vp,
+	   struct brw_vs_prog_key *key)
 {
    struct gl_context *ctx = &brw->intel.ctx;
+   struct intel_context *intel = &brw->intel;
    GLuint program_size;
    const GLuint *program;
    struct brw_vs_compile c;
    void *mem_ctx;
    int aux_size;
    int i;
+   static int new_vs = -1;
 
    memset(&c, 0, sizeof(c));
    memcpy(&c.key, key, sizeof(*key));
@@ -85,7 +90,25 @@
 
    /* Emit GEN4 code.
     */
-   brw_vs_emit(&c);
+   if (new_vs == -1)
+      new_vs = getenv("INTEL_NEW_VS") != NULL;
+
+   if (new_vs && prog) {
+      if (!brw_vs_emit(prog, &c)) {
+	 ralloc_free(mem_ctx);
+	 return false;
+      }
+   } else {
+      brw_old_vs_emit(&c);
+   }
+
+   /* Scratch space is used for register spilling */
+   if (c.last_scratch) {
+      c.prog_data.total_scratch = brw_get_scratch_size(c.last_scratch);
+
+      brw_get_scratch_bo(intel, &brw->vs.scratch_bo,
+			 c.prog_data.total_scratch * brw->vs_max_threads);
+   }
 
    /* get the program
     */
@@ -111,6 +134,8 @@
 		    &c.prog_data, aux_size,
 		    &brw->vs.prog_offset, &brw->vs.prog_data);
    ralloc_free(mem_ctx);
+
+   return true;
 }
 
 
@@ -155,13 +180,15 @@
    if (!brw_search_cache(&brw->cache, BRW_VS_PROG,
 			 &key, sizeof(key),
 			 &brw->vs.prog_offset, &brw->vs.prog_data)) {
-      do_vs_prog(brw, vp, &key);
+      bool success = do_vs_prog(brw, ctx->Shader.CurrentVertexProgram,
+				vp, &key);
+
+      assert(success);
    }
    brw->vs.constant_map = ((int8_t *)brw->vs.prog_data +
 			   sizeof(*brw->vs.prog_data));
 }
 
-
 /* See brw_vs.c:
  */
 const struct brw_tracked_state brw_vs_prog = {
@@ -174,3 +201,30 @@
    },
    .prepare = brw_upload_vs_prog
 };
+
+bool
+brw_vs_precompile(struct gl_context *ctx, struct gl_shader_program *prog)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct brw_vs_prog_key key;
+   struct gl_vertex_program *vp = prog->VertexProgram;
+   struct brw_vertex_program *bvp = brw_vertex_program(vp);
+   uint32_t old_prog_offset = brw->vs.prog_offset;
+   struct brw_vs_prog_data *old_prog_data = brw->vs.prog_data;
+   bool success;
+
+   if (!vp)
+      return true;
+
+   memset(&key, 0, sizeof(key));
+
+   key.program_string_id = bvp->id;
+   key.clamp_vertex_color = true;
+
+   success = do_vs_prog(brw, prog, bvp, &key);
+
+   brw->vs.prog_offset = old_prog_offset;
+   brw->vs.prog_data = old_prog_data;
+
+   return success;
+}
diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h
index 432994a..beccb38 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.h
+++ b/src/mesa/drivers/dri/i965/brw_vs.h
@@ -66,6 +66,7 @@
    GLuint first_output;
    GLuint nr_outputs;
    GLuint first_overflow_output; /**< VERT_ATTRIB_x */
+   GLuint last_scratch;
 
    GLuint first_tmp;
    GLuint last_tmp;
@@ -92,6 +93,8 @@
    GLboolean needs_stack;
 };
 
-void brw_vs_emit( struct brw_vs_compile *c );
+bool brw_vs_emit(struct gl_shader_program *prog, struct brw_vs_compile *c);
+void brw_old_vs_emit(struct brw_vs_compile *c);
+bool brw_vs_precompile(struct gl_context *ctx, struct gl_shader_program *prog);
 
 #endif
diff --git a/src/mesa/drivers/dri/i965/brw_vs_constval.c b/src/mesa/drivers/dri/i965/brw_vs_constval.c
index 9fdfebe..47cc0a7 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_constval.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_constval.c
@@ -194,19 +194,11 @@
    /* BRW_NEW_VERTEX_PROGRAM */
    const struct brw_vertex_program *vp =
       brw_vertex_program_const(brw->vertex_program);
-   /* BRW_NEW_FRAGMENT_PROGRAM */
-   struct gl_shader_program *prog = ctx->Shader.CurrentFragmentProgram;
    /* BRW_NEW_INPUT_DIMENSIONS */
    struct tracker t;
    GLuint insn;
    GLuint i;
 
-   /* If we're going to go through brw_fs.cpp, we don't end up using
-    * brw->wm.input_size_masks.
-    */
-   if (prog && prog->_LinkedShaders[MESA_SHADER_FRAGMENT])
-      return;
-
    memset(&t, 0, sizeof(t));
 
    /* _NEW_LIGHT */
@@ -246,9 +238,7 @@
 const struct brw_tracked_state brw_wm_input_sizes = {
    .dirty = {
       .mesa  = _NEW_LIGHT,
-      .brw   = (BRW_NEW_FRAGMENT_PROGRAM |
-		BRW_NEW_VERTEX_PROGRAM |
-		BRW_NEW_INPUT_DIMENSIONS),
+      .brw   = BRW_NEW_VERTEX_PROGRAM | BRW_NEW_INPUT_DIMENSIONS,
       .cache = 0
    },
    .prepare = calc_wm_input_sizes
diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
index 9d73334..bfee811 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@@ -1096,31 +1096,6 @@
    brw_MAC(p, dst, arg0, arg1);
 }
 
-/** 3 or 4-component vector normalization */
-static void emit_nrm( struct brw_vs_compile *c, 
-                      struct brw_reg dst,
-                      struct brw_reg arg0,
-                      int num_comps)
-{
-   struct brw_compile *p = &c->func;
-   struct brw_reg tmp = get_tmp(c);
-
-   /* tmp = dot(arg0, arg0) */
-   if (num_comps == 3)
-      brw_DP3(p, tmp, arg0, arg0);
-   else
-      brw_DP4(p, tmp, arg0, arg0);
-
-   /* tmp = 1 / sqrt(tmp) */
-   emit_math1(c, BRW_MATH_FUNCTION_RSQ, tmp, tmp, BRW_MATH_PRECISION_FULL);
-
-   /* dst = arg0 * tmp */
-   brw_MUL(p, dst, arg0, tmp);
-
-   release_tmp(c, tmp);
-}
-
-
 static struct brw_reg
 get_constant(struct brw_vs_compile *c,
              const struct prog_instruction *inst,
@@ -1359,7 +1334,7 @@
 
 	 if (component >= 0) {
 	    params = c->vp->program.Base.Parameters;
-	    f = params->ParameterValues[src->Index][component];
+	    f = params->ParameterValues[src->Index][component].f;
 
 	    if (src->Abs)
 	       f = fabs(f);
@@ -1821,6 +1796,9 @@
    if (val.address_mode != BRW_ADDRESS_DIRECT)
       return GL_FALSE;
 
+   if (val.negate || val.abs)
+      return GL_FALSE;
+
    switch (prev_insn->header.opcode) {
    case BRW_OPCODE_MOV:
    case BRW_OPCODE_MAC:
@@ -1900,7 +1878,7 @@
 
 /* Emit the vertex program instructions here.
  */
-void brw_vs_emit(struct brw_vs_compile *c )
+void brw_old_vs_emit(struct brw_vs_compile *c )
 {
 #define MAX_IF_DEPTH 32
 #define MAX_LOOP_DEPTH 32
@@ -1980,9 +1958,22 @@
 	      const struct prog_src_register *src = &inst->SrcReg[i];
 	      index = src->Index;
 	      file = src->File;	
-	      if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src)
-		  args[i] = c->output_regs[index].reg;
-	      else
+	      if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src) {
+		 /* Can't just make get_arg "do the right thing" here because
+		  * other callers of get_arg and get_src_reg don't expect any
+		  * special behavior for the c->output_regs[index].used_in_src
+		  * case.
+		  */
+		 args[i] = c->output_regs[index].reg;
+		 args[i].dw1.bits.swizzle =
+		    BRW_SWIZZLE4(GET_SWZ(src->Swizzle, 0),
+				 GET_SWZ(src->Swizzle, 1),
+				 GET_SWZ(src->Swizzle, 2),
+				 GET_SWZ(src->Swizzle, 3));
+
+		 /* Note this is ok for non-swizzle ARB_vp instructions */
+		 args[i].negate = src->Negate ? 1 : 0;
+	      } else
                   args[i] = get_arg(c, inst, i);
 	  }
 
@@ -1993,7 +1984,11 @@
       index = inst->DstReg.Index;
       file = inst->DstReg.File;
       if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src)
-	  dst = c->output_regs[index].reg;
+	 /* Can't just make get_dst "do the right thing" here because other
+	  * callers of get_dst don't expect any special behavior for the
+	  * c->output_regs[index].used_in_src case.
+	  */
+	 dst = brw_writemask(c->output_regs[index].reg, inst->DstReg.WriteMask);
       else
 	  dst = get_dst(c, inst->DstReg);
 
@@ -2025,12 +2020,6 @@
       case OPCODE_DPH:
 	 brw_DPH(p, dst, args[0], args[1]);
 	 break;
-      case OPCODE_NRM3:
-	 emit_nrm(c, dst, args[0], 3);
-	 break;
-      case OPCODE_NRM4:
-	 emit_nrm(c, dst, args[0], 4);
-	 break;
       case OPCODE_DST:
 	 unalias2(c, dst, args[0], args[1], emit_dst_noalias); 
 	 break;
diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c
index fc4373a..29b3e47 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_state.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_state.c
@@ -77,6 +77,16 @@
    else
       vs->thread1.binding_table_entry_count = brw->vs.nr_surfaces;
 
+   if (brw->vs.prog_data->total_scratch != 0) {
+      vs->thread2.scratch_space_base_pointer =
+	 brw->vs.scratch_bo->offset >> 10; /* reloc */
+      vs->thread2.per_thread_scratch_space =
+	 ffs(brw->vs.prog_data->total_scratch) - 11;
+   } else {
+      vs->thread2.scratch_space_base_pointer = 0;
+      vs->thread2.per_thread_scratch_space = 0;
+   }
+
    vs->thread3.urb_entry_read_length = brw->vs.prog_data->urb_read_length;
    vs->thread3.const_urb_entry_read_length = brw->vs.prog_data->curb_read_length;
    vs->thread3.dispatch_grf_start_reg = 1;
diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c
index 55dbd4f..40360b2 100644
--- a/src/mesa/drivers/dri/i965/brw_vtbl.c
+++ b/src/mesa/drivers/dri/i965/brw_vtbl.c
@@ -213,6 +213,7 @@
    brw->state_batch_count = 0;
 
    brw->vb.nr_current_buffers = 0;
+   brw->ib.type = -1;
 
    /* Mark that the current program cache BO has been used by the GPU.
     * It will be reallocated if we need to put new programs in for the
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
index b0dfdd5..e768325 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -206,10 +206,6 @@
           */
          return false;
       }
-      c->instruction = rzalloc_array(c, struct brw_wm_instruction, BRW_WM_MAX_INSN);
-      c->prog_instructions = rzalloc_array(c, struct prog_instruction, BRW_WM_MAX_INSN);
-      c->vreg = rzalloc_array(c, struct brw_wm_value, BRW_WM_MAX_VREG);
-      c->refs = rzalloc_array(c, struct brw_wm_ref, BRW_WM_MAX_REF);
    } else {
       void *instruction = c->instruction;
       void *prog_instructions = c->prog_instructions;
@@ -232,6 +228,13 @@
       if (!brw_wm_fs_emit(brw, c, prog))
 	 return false;
    } else {
+      if (!c->instruction) {
+	 c->instruction = rzalloc_array(c, struct brw_wm_instruction, BRW_WM_MAX_INSN);
+	 c->prog_instructions = rzalloc_array(c, struct prog_instruction, BRW_WM_MAX_INSN);
+	 c->vreg = rzalloc_array(c, struct brw_wm_value, BRW_WM_MAX_VREG);
+	 c->refs = rzalloc_array(c, struct brw_wm_ref, BRW_WM_MAX_REF);
+      }
+
       /* Fallback for fixed function and ARB_fp shaders. */
       c->dispatch_width = 16;
       brw_wm_payload_setup(brw, c);
@@ -241,29 +244,10 @@
 
    /* Scratch space is used for register spilling */
    if (c->last_scratch) {
-      uint32_t total_scratch;
+      c->prog_data.total_scratch = brw_get_scratch_size(c->last_scratch);
 
-      /* Per-thread scratch space is power-of-two sized. */
-      for (c->prog_data.total_scratch = 1024;
-	   c->prog_data.total_scratch <= c->last_scratch;
-	   c->prog_data.total_scratch *= 2) {
-	 /* empty */
-      }
-      total_scratch = c->prog_data.total_scratch * brw->wm_max_threads;
-
-      if (brw->wm.scratch_bo && total_scratch > brw->wm.scratch_bo->size) {
-	 drm_intel_bo_unreference(brw->wm.scratch_bo);
-	 brw->wm.scratch_bo = NULL;
-      }
-      if (brw->wm.scratch_bo == NULL) {
-	 brw->wm.scratch_bo = drm_intel_bo_alloc(intel->bufmgr,
-						 "wm scratch",
-						 total_scratch,
-						 4096);
-      }
-   }
-   else {
-      c->prog_data.total_scratch = 0;
+      brw_get_scratch_bo(intel, &brw->wm.scratch_bo,
+			 c->prog_data.total_scratch * brw->wm_max_threads);
    }
 
    if (unlikely(INTEL_DEBUG & DEBUG_WM))
diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c
index f61757a..6ea4a7d 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c
@@ -1094,9 +1094,16 @@
    if (intel->gen < 5 && c->dispatch_width == 8)
       nr_texcoords = 3;
 
-   /* For shadow comparisons, we have to supply u,v,r. */
-   if (shadow)
-      nr_texcoords = 3;
+   if (shadow) {
+      if (intel->gen < 7) {
+	 /* For shadow comparisons, we have to supply u,v,r. */
+	 nr_texcoords = 3;
+      } else {
+	 /* On Ivybridge, the shadow comparitor comes first. Just load it. */
+	 brw_MOV(p, brw_message_reg(cur_mrf), arg[2]);
+	 cur_mrf += mrf_per_channel;
+      }
+   }
 
    /* Emit the texcoords. */
    for (i = 0; i < nr_texcoords; i++) {
@@ -1113,7 +1120,7 @@
    }
 
    /* Fill in the shadow comparison reference value. */
-   if (shadow) {
+   if (shadow && intel->gen < 7) {
       if (intel->gen >= 5) {
 	 /* Fill in the cube map array index value. */
 	 brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0));
diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c
index 7cd3eda..bd46bd8 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_fp.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c
@@ -535,15 +535,15 @@
 						     GLfloat s3)
 {
    struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
-   GLfloat values[4];
+   gl_constant_value values[4];
    GLuint idx;
    GLuint swizzle;
    struct prog_src_register reg;
 
-   values[0] = s0;
-   values[1] = s1;
-   values[2] = s2;
-   values[3] = s3;
+   values[0].f = s0;
+   values[1].f = s1;
+   values[2].f = s2;
+   values[3].f = s3;
 
    idx = _mesa_add_unnamed_constant( paramList, values, 4, &swizzle );
    reg = src_reg(PROGRAM_STATE_VAR, idx);
@@ -664,6 +664,8 @@
 static void precalc_tex( struct brw_wm_compile *c,
 			 const struct prog_instruction *inst )
 {
+   struct brw_compile *p = &c->func;
+   struct intel_context *intel = &p->brw->intel;
    struct prog_src_register coord;
    struct prog_dst_register tmpcoord = { 0 };
    const GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit];
@@ -727,7 +729,7 @@
        release_temp(c, tmp0);
        release_temp(c, tmp1);
    }
-   else if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) {
+   else if (intel->gen < 6 && inst->TexSrcTarget == TEXTURE_RECT_INDEX) {
       struct prog_src_register scale = 
 	 search_or_add_param5( c, 
 			       STATE_INTERNAL, 
diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass0.c b/src/mesa/drivers/dri/i965/brw_wm_pass0.c
index f78bdc3..ccf9dc2 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_pass0.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_pass0.c
@@ -205,14 +205,14 @@
 	 case PROGRAM_CONSTANT:
 	    /* These are invarient:
 	     */
-	    ref = get_const_ref(c, &plist->ParameterValues[idx][component]);
+	    ref = get_const_ref(c, &plist->ParameterValues[idx][component].f);
 	    break;
 
 	 case PROGRAM_STATE_VAR:
 	 case PROGRAM_UNIFORM:
 	    /* These may change from run to run:
 	     */
-	    ref = get_param_ref(c, &plist->ParameterValues[idx][component] );
+	    ref = get_param_ref(c, &plist->ParameterValues[idx][component].f );
 	    break;
 
 	 default:
diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
index 9814613..6834eba 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
@@ -289,6 +289,13 @@
    sampler->ss1.max_lod = U_FIXED(CLAMP(gl_sampler->MaxLod, 0, 13), 6);
    sampler->ss1.min_lod = U_FIXED(CLAMP(gl_sampler->MinLod, 0, 13), 6);
 
+   /* On Gen6+, the sampler can handle non-normalized texture
+    * rectangle coordinates natively
+    */
+   if (intel->gen >= 6 && texObj->Target == GL_TEXTURE_RECTANGLE) {
+      sampler->ss3.non_normalized_coord = 1;
+   }
+
    upload_default_color(brw, gl_sampler, unit);
 
    if (intel->gen >= 6) {
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index fb4fb14..ad90978 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -342,7 +342,7 @@
    constants = brw->wm.const_bo->virtual;
    for (i = 0; i < brw->wm.prog_data->nr_pull_params; i++) {
       constants[i] = convert_param(brw->wm.prog_data->pull_param_convert[i],
-				   *brw->wm.prog_data->pull_param[i]);
+				   brw->wm.prog_data->pull_param[i]);
    }
    drm_intel_gem_bo_unmap_gtt(brw->wm.const_bo);
 
diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c
index fb4cdba..b94121e 100644
--- a/src/mesa/drivers/dri/i965/gen6_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c
@@ -81,12 +81,21 @@
 	 params_uploaded++;
       }
 
-      for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) {
-	 if (brw->vs.constant_map[i] != -1) {
-	    memcpy(param + brw->vs.constant_map[i] * 4,
-		   vp->program.Base.Parameters->ParameterValues[i],
-		   4 * sizeof(float));
-	    params_uploaded++;
+      if (brw->vs.prog_data->uses_new_param_layout) {
+	 for (i = 0; i < brw->vs.prog_data->nr_params; i++) {
+	    *param = convert_param(brw->vs.prog_data->param_convert[i],
+				   brw->vs.prog_data->param[i]);
+	    param++;
+	 }
+	 params_uploaded += brw->vs.prog_data->nr_params / 4;
+      } else {
+	 for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) {
+	    if (brw->vs.constant_map[i] != -1) {
+	       memcpy(param + brw->vs.constant_map[i] * 4,
+		      vp->program.Base.Parameters->ParameterValues[i],
+		      4 * sizeof(float));
+	       params_uploaded++;
+	    }
 	 }
       }
 
@@ -151,7 +160,15 @@
    OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) |
 	     GEN6_VS_FLOATING_POINT_MODE_ALT |
 	     (brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
-   OUT_BATCH(0); /* scratch space base offset */
+
+   if (brw->vs.prog_data->total_scratch) {
+      OUT_RELOC(brw->vs.scratch_bo,
+		I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+		ffs(brw->vs.prog_data->total_scratch) - 11);
+   } else {
+      OUT_BATCH(0);
+   }
+
    OUT_BATCH((1 << GEN6_VS_DISPATCH_START_GRF_SHIFT) |
 	     (brw->vs.prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) |
 	     (0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT));
@@ -160,6 +177,32 @@
 	     GEN6_VS_STATISTICS_ENABLE |
 	     GEN6_VS_ENABLE);
    ADVANCE_BATCH();
+
+   /* Based on my reading of the simulator, the VS constants don't get
+    * pulled into the VS FF unit until an appropriate pipeline flush
+    * happens, and instead the 3DSTATE_CONSTANT_VS packet just adds
+    * references to them into a little FIFO.  The flushes are common,
+    * but don't reliably happen between this and a 3DPRIMITIVE, causing
+    * the primitive to use the wrong constants.  Then the FIFO
+    * containing the constant setup gets added to again on the next
+    * constants change, and eventually when a flush does happen the
+    * unit is overwhelmed by constant changes and dies.
+    *
+    * To avoid this, send a PIPE_CONTROL down the line that will
+    * update the unit immediately loading the constants.  The flush
+    * type bits here were those set by the STATE_BASE_ADDRESS whose
+    * move in a82a43e8d99e1715dd11c9c091b5ab734079b6a6 triggered the
+    * bug reports that led to this workaround, and may be more than
+    * what is strictly required to avoid the issue.
+    */
+   BEGIN_BATCH(4);
+   OUT_BATCH(_3DSTATE_PIPE_CONTROL);
+   OUT_BATCH(PIPE_CONTROL_DEPTH_STALL |
+	     PIPE_CONTROL_INSTRUCTION_FLUSH |
+	     PIPE_CONTROL_STATE_CACHE_INVALIDATE);
+   OUT_BATCH(0); /* address */
+   OUT_BATCH(0); /* write data */
+   ADVANCE_BATCH();
 }
 
 const struct brw_tracked_state gen6_vs_state = {
diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c
index 185da9c..07e9995 100644
--- a/src/mesa/drivers/dri/i965/gen6_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c
@@ -54,14 +54,14 @@
       float *constants;
       unsigned int i;
 
-      constants = brw_state_batch(brw, AUB_TRACE_NO_TYPE,
+      constants = brw_state_batch(brw, AUB_TRACE_WM_CONSTANTS,
 				  brw->wm.prog_data->nr_params *
 				  sizeof(float),
 				  32, &brw->wm.push_const_offset);
 
       for (i = 0; i < brw->wm.prog_data->nr_params; i++) {
 	 constants[i] = convert_param(brw->wm.prog_data->param_convert[i],
-				      *brw->wm.prog_data->param[i]);
+				      brw->wm.prog_data->param[i]);
       }
 
       if (0) {
diff --git a/src/mesa/drivers/dri/i965/gen7_sampler_state.c b/src/mesa/drivers/dri/i965/gen7_sampler_state.c
index e787c21..aee67c8 100644
--- a/src/mesa/drivers/dri/i965/gen7_sampler_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_sampler_state.c
@@ -157,6 +157,13 @@
    sampler->ss1.max_lod = U_FIXED(CLAMP(gl_sampler->MaxLod, 0, 13), 8);
    sampler->ss1.min_lod = U_FIXED(CLAMP(gl_sampler->MinLod, 0, 13), 8);
 
+   /* The sampler can handle non-normalized texture rectangle coordinates
+    * natively
+    */
+   if (texObj->Target == GL_TEXTURE_RECTANGLE) {
+      sampler->ss3.non_normalized_coord = 1;
+   }
+
    upload_default_color(brw, gl_sampler, unit);
 
    sampler->ss2.default_color_pointer = brw->wm.sdc_offset[unit] >> 5;
diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c
index 0fad3d2..f3cd5d1 100644
--- a/src/mesa/drivers/dri/i965/gen7_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c
@@ -71,7 +71,15 @@
    OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) |
 	     GEN6_VS_FLOATING_POINT_MODE_ALT |
 	     (brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
-   OUT_BATCH(0); /* scratch space base offset */
+
+   if (brw->vs.prog_data->total_scratch) {
+      OUT_RELOC(brw->vs.scratch_bo,
+		I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+		ffs(brw->vs.prog_data->total_scratch) - 11);
+   } else {
+      OUT_BATCH(0);
+   }
+
    OUT_BATCH((1 << GEN6_VS_DISPATCH_START_GRF_SHIFT) |
 	     (brw->vs.prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) |
 	     (0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT));
diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c
index a102ca7..55a603e 100644
--- a/src/mesa/drivers/dri/i965/gen7_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c
@@ -58,7 +58,7 @@
 
       for (i = 0; i < brw->wm.prog_data->nr_params; i++) {
 	 constants[i] = convert_param(brw->wm.prog_data->param_convert[i],
-				      *brw->wm.prog_data->param[i]);
+				      brw->wm.prog_data->param[i]);
       }
 
       if (0) {
@@ -228,7 +228,13 @@
    OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2));
    OUT_BATCH(brw->wm.prog_offset);
    OUT_BATCH(dw2);
-   OUT_BATCH(0); /* scratch space base offset */
+   if (brw->wm.prog_data->total_scratch) {
+      OUT_RELOC(brw->wm.scratch_bo,
+		I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+		ffs(brw->wm.prog_data->total_scratch) - 11);
+   } else {
+      OUT_BATCH(0);
+   }
    OUT_BATCH(dw4);
    OUT_BATCH(dw5);
    OUT_BATCH(0); /* kernel 1 pointer */
diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
index b61a2ff..db4343b 100644
--- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
@@ -308,12 +308,29 @@
  * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
  * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
  *
- * XXX: There is also a workaround that would appear to apply to this
- * workaround, but it doesn't appear to be necessary so far:
+ * And the workaround for these two requires this workaround first:
  *
- * Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
+ * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
  * BEFORE the pipe-control with a post-sync op and no write-cache
  * flushes.
+ *
+ * And this last workaround is tricky because of the requirements on
+ * that bit.  From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
+ * volume 2 part 1:
+ *
+ *     "1 of the following must also be set:
+ *      - Render Target Cache Flush Enable ([12] of DW1)
+ *      - Depth Cache Flush Enable ([0] of DW1)
+ *      - Stall at Pixel Scoreboard ([1] of DW1)
+ *      - Depth Stall ([13] of DW1)
+ *      - Post-Sync Operation ([13] of DW1)
+ *      - Notify Enable ([8] of DW1)"
+ *
+ * The cache flushes require the workaround flush that triggered this
+ * one, so we can't use it.  Depth stall would trigger the same.
+ * Post-sync nonzero is what triggered this second workaround, so we
+ * can't use that one either.  Notify enable is IRQs, which aren't
+ * really our business.  That leaves only stall at scoreboard.
  */
 void
 intel_emit_post_sync_nonzero_flush(struct intel_context *intel)
@@ -323,9 +340,17 @@
 
    BEGIN_BATCH(4);
    OUT_BATCH(_3DSTATE_PIPE_CONTROL);
+   OUT_BATCH(PIPE_CONTROL_CS_STALL |
+	     PIPE_CONTROL_STALL_AT_SCOREBOARD);
+   OUT_BATCH(0); /* address */
+   OUT_BATCH(0); /* write data */
+   ADVANCE_BATCH();
+
+   BEGIN_BATCH(4);
+   OUT_BATCH(_3DSTATE_PIPE_CONTROL);
    OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE);
    OUT_RELOC(intel->batch.workaround_bo,
-	     I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT, 0);
+	     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);
    OUT_BATCH(0); /* write data */
    ADVANCE_BATCH();
 
@@ -365,6 +390,7 @@
 	 OUT_BATCH(PIPE_CONTROL_INSTRUCTION_FLUSH |
 		   PIPE_CONTROL_WRITE_FLUSH |
 		   PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+		   PIPE_CONTROL_TC_FLUSH |
 		   PIPE_CONTROL_NO_WRITE);
 	 OUT_BATCH(0); /* write address */
 	 OUT_BATCH(0); /* write data */
diff --git a/src/mesa/drivers/dri/intel/intel_blit.c b/src/mesa/drivers/dri/intel/intel_blit.c
index 30be1b9..b18dd29 100644
--- a/src/mesa/drivers/dri/intel/intel_blit.c
+++ b/src/mesa/drivers/dri/intel/intel_blit.c
@@ -541,8 +541,8 @@
 
    /* get dest x/y in destination texture */
    intel_miptree_get_image_offset(intel_image->mt,
-				  intel_image->level,
-				  intel_image->face,
+				  intel_image->base.Level,
+				  intel_image->base.Face,
 				  0,
 				  &image_x, &image_y);
 
diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
index 439d6fc..d908975 100644
--- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c
+++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
@@ -41,8 +41,7 @@
 #include "intel_regions.h"
 
 static GLboolean
-intel_bufferobj_unmap(struct gl_context * ctx,
-                      GLenum target, struct gl_buffer_object *obj);
+intel_bufferobj_unmap(struct gl_context * ctx, struct gl_buffer_object *obj);
 
 /** Allocates a new drm_intel_bo to store the data for the buffer object. */
 static void
@@ -122,7 +121,7 @@
     * (though it does if you call glDeleteBuffers)
     */
    if (obj->Pointer)
-      intel_bufferobj_unmap(ctx, 0, obj);
+      intel_bufferobj_unmap(ctx, obj);
 
    free(intel_obj->sys_buffer);
    if (intel_obj->region) {
@@ -203,7 +202,6 @@
  */
 static void
 intel_bufferobj_subdata(struct gl_context * ctx,
-                        GLenum target,
                         GLintptrARB offset,
                         GLsizeiptrARB size,
                         const GLvoid * data, struct gl_buffer_object *obj)
@@ -276,82 +274,28 @@
  */
 static void
 intel_bufferobj_get_subdata(struct gl_context * ctx,
-                            GLenum target,
                             GLintptrARB offset,
                             GLsizeiptrARB size,
                             GLvoid * data, struct gl_buffer_object *obj)
 {
    struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
+   struct intel_context *intel = intel_context(ctx);
 
    assert(intel_obj);
    if (intel_obj->sys_buffer)
       memcpy(data, (char *)intel_obj->sys_buffer + offset, size);
-   else
+   else {
+      if (drm_intel_bo_references(intel->batch.bo, intel_obj->buffer)) {
+	 intel_batchbuffer_flush(intel);
+      }
       drm_intel_bo_get_subdata(intel_obj->buffer, offset, size, data);
+   }
 }
 
 
 
 /**
- * Called via glMapBufferARB().
- */
-static void *
-intel_bufferobj_map(struct gl_context * ctx,
-                    GLenum target,
-                    GLenum access, struct gl_buffer_object *obj)
-{
-   struct intel_context *intel = intel_context(ctx);
-   struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
-   GLboolean read_only = (access == GL_READ_ONLY_ARB);
-   GLboolean write_only = (access == GL_WRITE_ONLY_ARB);
-
-   assert(intel_obj);
-
-   if (intel_obj->sys_buffer) {
-      if (!read_only && intel_obj->source) {
-	 release_buffer(intel_obj);
-      }
-
-      if (!intel_obj->buffer || intel_obj->source) {
-	 obj->Pointer = intel_obj->sys_buffer;
-	 obj->Length = obj->Size;
-	 obj->Offset = 0;
-	 return obj->Pointer;
-      }
-
-      free(intel_obj->sys_buffer);
-      intel_obj->sys_buffer = NULL;
-   }
-
-   /* Flush any existing batchbuffer that might reference this data. */
-   if (drm_intel_bo_references(intel->batch.bo, intel_obj->buffer))
-      intel_flush(ctx);
-
-   if (intel_obj->region)
-      intel_bufferobj_cow(intel, intel_obj);
-
-   if (intel_obj->buffer == NULL) {
-      obj->Pointer = NULL;
-      return NULL;
-   }
-
-   if (write_only) {
-      drm_intel_gem_bo_map_gtt(intel_obj->buffer);
-      intel_obj->mapped_gtt = GL_TRUE;
-   } else {
-      drm_intel_bo_map(intel_obj->buffer, !read_only);
-      intel_obj->mapped_gtt = GL_FALSE;
-   }
-
-   obj->Pointer = intel_obj->buffer->virtual;
-   obj->Length = obj->Size;
-   obj->Offset = 0;
-
-   return obj->Pointer;
-}
-
-/**
- * Called via glMapBufferRange().
+ * Called via glMapBufferRange and glMapBuffer
  *
  * The goal of this extension is to allow apps to accumulate their rendering
  * at the same time as they accumulate their buffer object.  Without it,
@@ -368,12 +312,11 @@
  */
 static void *
 intel_bufferobj_map_range(struct gl_context * ctx,
-			  GLenum target, GLintptr offset, GLsizeiptr length,
+			  GLintptr offset, GLsizeiptr length,
 			  GLbitfield access, struct gl_buffer_object *obj)
 {
    struct intel_context *intel = intel_context(ctx);
    struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
-   GLboolean read_only = (access == GL_READ_ONLY_ARB);
 
    assert(intel_obj);
 
@@ -385,6 +328,9 @@
    obj->AccessFlags = access;
 
    if (intel_obj->sys_buffer) {
+      const bool read_only =
+	 (access & (GL_MAP_READ_BIT | GL_MAP_WRITE_BIT)) == GL_MAP_READ_BIT;
+
       if (!read_only && intel_obj->source)
 	 release_buffer(intel_obj);
 
@@ -468,7 +414,7 @@
  * would defeat the point.
  */
 static void
-intel_bufferobj_flush_mapped_range(struct gl_context *ctx, GLenum target,
+intel_bufferobj_flush_mapped_range(struct gl_context *ctx,
 				   GLintptr offset, GLsizeiptr length,
 				   struct gl_buffer_object *obj)
 {
@@ -502,8 +448,7 @@
  * Called via glUnmapBuffer().
  */
 static GLboolean
-intel_bufferobj_unmap(struct gl_context * ctx,
-                      GLenum target, struct gl_buffer_object *obj)
+intel_bufferobj_unmap(struct gl_context * ctx, struct gl_buffer_object *obj)
 {
    struct intel_context *intel = intel_context(ctx);
    struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
@@ -758,23 +703,23 @@
        * not overlap.
        */
       if (src == dst) {
-	 char *ptr = intel_bufferobj_map(ctx, GL_COPY_WRITE_BUFFER,
-					 GL_READ_WRITE, dst);
+	 char *ptr = intel_bufferobj_map_range(ctx, 0, dst->Size,
+					       GL_MAP_READ_BIT, dst);
 	 memmove(ptr + write_offset, ptr + read_offset, size);
-	 intel_bufferobj_unmap(ctx, GL_COPY_WRITE_BUFFER, dst);
+	 intel_bufferobj_unmap(ctx, dst);
       } else {
 	 const char *src_ptr;
 	 char *dst_ptr;
 
-	 src_ptr =  intel_bufferobj_map(ctx, GL_COPY_READ_BUFFER,
-					GL_READ_ONLY, src);
-	 dst_ptr =  intel_bufferobj_map(ctx, GL_COPY_WRITE_BUFFER,
-					GL_WRITE_ONLY, dst);
+	 src_ptr =  intel_bufferobj_map_range(ctx, 0, src->Size,
+					      GL_MAP_READ_BIT, src);
+	 dst_ptr =  intel_bufferobj_map_range(ctx, 0, dst->Size,
+					      GL_MAP_WRITE_BIT, dst);
 
 	 memcpy(dst_ptr + write_offset, src_ptr + read_offset, size);
 
-	 intel_bufferobj_unmap(ctx, GL_COPY_READ_BUFFER, src);
-	 intel_bufferobj_unmap(ctx, GL_COPY_WRITE_BUFFER, dst);
+	 intel_bufferobj_unmap(ctx, src);
+	 intel_bufferobj_unmap(ctx, dst);
       }
       return;
    }
@@ -924,7 +869,6 @@
    functions->BufferData = intel_bufferobj_data;
    functions->BufferSubData = intel_bufferobj_subdata;
    functions->GetBufferSubData = intel_bufferobj_get_subdata;
-   functions->MapBuffer = intel_bufferobj_map;
    functions->MapBufferRange = intel_bufferobj_map_range;
    functions->FlushMappedBufferRange = intel_bufferobj_flush_mapped_range;
    functions->UnmapBuffer = intel_bufferobj_unmap;
diff --git a/src/mesa/drivers/dri/intel/intel_clear.c b/src/mesa/drivers/dri/intel/intel_clear.c
index dfca03c..76d33f9 100644
--- a/src/mesa/drivers/dri/intel/intel_clear.c
+++ b/src/mesa/drivers/dri/intel/intel_clear.c
@@ -116,13 +116,13 @@
    }
 
    /* HW color buffers (front, back, aux, generic FBO, etc) */
-   if (colorMask == ~0) {
+   if (intel->gen < 6 && colorMask == ~0) {
       /* clear all R,G,B,A */
       blit_mask |= (mask & BUFFER_BITS_COLOR);
    }
    else {
       /* glColorMask in effect */
-      tri_mask |= (mask & (BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_BACK_LEFT));
+      tri_mask |= (mask & BUFFER_BITS_COLOR);
    }
 
    /* Make sure we have up to date buffers before we start looking at
@@ -143,6 +143,12 @@
 	     */
             tri_mask |= BUFFER_BIT_STENCIL;
          }
+	 else if (intel->has_separate_stencil &&
+	       stencilRegion->tiling == I915_TILING_NONE) {
+	    /* The stencil buffer is actually W tiled, which the hardware
+	     * cannot blit to. */
+	    tri_mask |= BUFFER_BIT_STENCIL;
+	 }
          else {
             /* clearing all stencil bits, use blitting */
             blit_mask |= BUFFER_BIT_STENCIL;
@@ -182,7 +188,10 @@
 
    if (tri_mask) {
       debug_mask("tri", tri_mask);
-      _mesa_meta_Clear(&intel->ctx, tri_mask);
+      if (ctx->Extensions.ARB_fragment_shader)
+	 _mesa_meta_glsl_Clear(&intel->ctx, tri_mask);
+      else
+	 _mesa_meta_Clear(&intel->ctx, tri_mask);
    }
 }
 
diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c
index 2ba1363..14342ef 100644
--- a/src/mesa/drivers/dri/intel/intel_context.c
+++ b/src/mesa/drivers/dri/intel/intel_context.c
@@ -1439,7 +1439,12 @@
       assert(stencil_rb->Base.Format == MESA_FORMAT_S8);
       assert(depth_rb && depth_rb->Base.Format == MESA_FORMAT_X8_Z24);
 
-      if (stencil_rb->region->tiling == I915_TILING_Y) {
+      if (stencil_rb->region->tiling == I915_TILING_NONE) {
+	 /*
+	  * The stencil buffer is actually W tiled. The region's tiling is
+	  * I915_TILING_NONE, however, because the GTT is incapable of W
+	  * fencing.
+	  */
 	 intel->intelScreen->dri2_has_hiz = INTEL_DRI2_HAS_HIZ_TRUE;
 	 return;
       } else {
@@ -1449,6 +1454,13 @@
 	  * a combined depth/stencil buffer. Discard the hiz buffer too.
 	  */
 	 intel->intelScreen->dri2_has_hiz = INTEL_DRI2_HAS_HIZ_FALSE;
+	 if (intel->must_use_separate_stencil) {
+	    _mesa_problem(&intel->ctx,
+			  "intel_context requires separate stencil, but the "
+			  "DRIscreen does not support it. You may need to "
+			  "upgrade the Intel X driver to 2.16.0");
+	    abort();
+	 }
 
 	 /* 1. Discard depth and stencil renderbuffers. */
 	 _mesa_remove_renderbuffer(fb, BUFFER_DEPTH);
@@ -1527,7 +1539,7 @@
        * Presently, however, no verification or clean up is necessary, and
        * execution should not reach here. If the framebuffer still has a hiz
        * region, then we have already set dri2_has_hiz to true after
-       * confirming above that the stencil buffer is Y tiled.
+       * confirming above that the stencil buffer is W tiled.
        */
       assert(0);
    }
diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c
index 55bcc75..754f9f2 100644
--- a/src/mesa/drivers/dri/intel/intel_fbo.c
+++ b/src/mesa/drivers/dri/intel/intel_fbo.c
@@ -173,6 +173,9 @@
 
    if (irb->Base.Format == MESA_FORMAT_S8) {
       /*
+       * The stencil buffer is W tiled. However, we request from the kernel a
+       * non-tiled buffer because the GTT is incapable of W fencing.
+       *
        * The stencil buffer has quirky pitch requirements.  From Vol 2a,
        * 11.5.6.2.1 3DSTATE_STENCIL_BUFFER, field "Surface Pitch":
        *    The pitch must be set to 2x the value computed based on width, as
@@ -180,14 +183,13 @@
        * To accomplish this, we resort to the nasty hack of doubling the drm
        * region's cpp and halving its height.
        *
-       * If we neglect to double the pitch, then drm_intel_gem_bo_map_gtt()
-       * maps the memory incorrectly.
+       * If we neglect to double the pitch, then render corruption occurs.
        */
       irb->region = intel_region_alloc(intel->intelScreen,
-				       I915_TILING_Y,
+				       I915_TILING_NONE,
 				       cpp * 2,
-				       width,
-				       height / 2,
+				       ALIGN(width, 64),
+				       ALIGN((height + 1) / 2, 64),
 				       GL_TRUE);
       if (!irb->region)
 	return false;
@@ -594,17 +596,15 @@
 				   struct intel_texture_image *intel_image,
 				   int zoffset)
 {
-   struct intel_mipmap_tree *mt = intel_image->mt;
    unsigned int dst_x, dst_y;
 
    /* compute offset of the particular 2D image within the texture region */
    intel_miptree_get_image_offset(intel_image->mt,
-				  intel_image->level,
-				  intel_image->face,
+				  intel_image->base.Level,
+				  intel_image->base.Face,
 				  zoffset,
 				  &dst_x, &dst_y);
 
-   irb->draw_offset = (dst_y * mt->region->pitch + dst_x) * mt->cpp;
    irb->draw_x = dst_x;
    irb->draw_y = dst_y;
 }
@@ -645,6 +645,22 @@
    }
 }
 
+#ifndef I915
+static bool
+need_tile_offset_workaround(struct brw_context *brw,
+			    struct intel_renderbuffer *irb)
+{
+   uint32_t tile_x, tile_y;
+
+   if (brw->has_surface_tile_offset)
+      return false;
+
+   intel_renderbuffer_tile_offsets(irb, &tile_x, &tile_y);
+
+   return tile_x != 0 || tile_y != 0;
+}
+#endif
+
 /**
  * Called by glFramebufferTexture[123]DEXT() (and other places) to
  * prepare for rendering into texture memory.  This might be called
@@ -698,8 +714,7 @@
    intel_image->used_as_render_target = GL_TRUE;
 
 #ifndef I915
-   if (!brw_context(ctx)->has_surface_tile_offset &&
-       (irb->draw_offset & 4095) != 0) {
+   if (need_tile_offset_workaround(brw_context(ctx), irb)) {
       /* Original gen4 hardware couldn't draw to a non-tile-aligned
        * destination in a miptree unless you actually setup your
        * renderbuffer as a miptree and used the fragile
@@ -713,8 +728,8 @@
 
       new_mt = intel_miptree_create(intel, image->TexObject->Target,
 				    intel_image->base.TexFormat,
-				    intel_image->level,
-				    intel_image->level,
+				    intel_image->base.Level,
+				    intel_image->base.Level,
 				    intel_image->base.Width,
 				    intel_image->base.Height,
 				    intel_image->base.Depth,
@@ -722,8 +737,8 @@
 
       intel_miptree_image_copy(intel,
                                new_mt,
-                               intel_image->face,
-			       intel_image->level,
+			       intel_image->base.Face,
+			       intel_image->base.Level,
 			       old_mt);
 
       intel_miptree_release(intel, &intel_image->mt);
diff --git a/src/mesa/drivers/dri/intel/intel_fbo.h b/src/mesa/drivers/dri/intel/intel_fbo.h
index f7f99a4..2487994 100644
--- a/src/mesa/drivers/dri/intel/intel_fbo.h
+++ b/src/mesa/drivers/dri/intel/intel_fbo.h
@@ -58,7 +58,6 @@
 
    /** \} */
 
-   GLuint draw_offset; /**< Offset of drawing address within the region */
    GLuint draw_x, draw_y; /**< Offset of drawing within the region */
 };
 
diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
index 4e711de..f36240d 100644
--- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
@@ -227,7 +227,7 @@
                           struct gl_texture_image *image)
 {
    struct intel_texture_image *intelImage = intel_texture_image(image);
-   GLuint level = intelImage->level;
+   GLuint level = intelImage->base.Level;
 
    /* Images with borders are never pulled into mipmap trees. */
    if (image->Border)
diff --git a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
index 86d0ef2..d9873a3 100644
--- a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
+++ b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
@@ -74,9 +74,9 @@
       return NULL;
    }
 
-   buf = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
-					   GL_READ_ONLY_ARB,
-					   unpack->BufferObj);
+   buf = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0, unpack->BufferObj->Size,
+						GL_MAP_READ_BIT,
+						unpack->BufferObj);
    if (!buf) {
       _mesa_error(ctx, GL_INVALID_OPERATION, "glBitmap(PBO is mapped)");
       return NULL;
@@ -292,8 +292,7 @@
 
    if (_mesa_is_bufferobj(unpack->BufferObj)) {
       /* done with PBO so unmap it now */
-      ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
-                              unpack->BufferObj);
+      ctx->Driver.UnmapBuffer(ctx, unpack->BufferObj);
    }
 
    intel_check_front_buffer_rendering(intel);
diff --git a/src/mesa/drivers/dri/intel/intel_reg.h b/src/mesa/drivers/dri/intel/intel_reg.h
index 5aa6291..a98a669 100644
--- a/src/mesa/drivers/dri/intel/intel_reg.h
+++ b/src/mesa/drivers/dri/intel/intel_reg.h
@@ -75,6 +75,7 @@
 #define PIPE_CONTROL_VF_CACHE_INVALIDATE	(1 << 4)
 #define PIPE_CONTROL_CONST_CACHE_INVALIDATE	(1 << 3)
 #define PIPE_CONTROL_STATE_CACHE_INVALIDATE	(1 << 2)
+#define PIPE_CONTROL_STALL_AT_SCOREBOARD	(1 << 1)
 #define PIPE_CONTROL_DEPTH_CACHE_FLUSH		(1 << 0)
 #define PIPE_CONTROL_PPGTT_WRITE	(0 << 2)
 #define PIPE_CONTROL_GLOBAL_GTT_WRITE	(1 << 2)
diff --git a/src/mesa/drivers/dri/intel/intel_screen.h b/src/mesa/drivers/dri/intel/intel_screen.h
index b2013af..9dd6a52 100644
--- a/src/mesa/drivers/dri/intel/intel_screen.h
+++ b/src/mesa/drivers/dri/intel/intel_screen.h
@@ -63,9 +63,12 @@
  * x8_z24 and s8).
  *
  * Eventually, intel_update_renderbuffers() makes a DRI2 request for
- * DRI2BufferStencil and DRI2BufferHiz. If the returned buffers are Y tiled,
- * then we joyfully set intel_screen.dri2_has_hiz to true and continue as if
- * nothing happend.
+ * DRI2BufferStencil and DRI2BufferHiz. If the stencil buffer's tiling is
+ * I915_TILING_NONE [1], then we joyfully set intel_screen.dri2_has_hiz to
+ * true and continue as if nothing happend.
+ *
+ * [1] The stencil buffer is actually W tiled. However, we request from the
+ *     kernel a non-tiled buffer because the GTT is incapable of W fencing.
  *
  * If the buffers are X tiled, however, the handshake has failed and we must
  * clean up.
diff --git a/src/mesa/drivers/dri/intel/intel_span.c b/src/mesa/drivers/dri/intel/intel_span.c
index 153803f..2e1c80c 100644
--- a/src/mesa/drivers/dri/intel/intel_span.c
+++ b/src/mesa/drivers/dri/intel/intel_span.c
@@ -131,38 +131,84 @@
    int miny = 0;							\
    int maxx = rb->Width;						\
    int maxy = rb->Height;						\
-   int stride = rb->RowStride;						\
-   uint8_t *buf = rb->Data;						\
+									\
+   /*									\
+    * Here we ignore rb->Data and rb->RowStride as set by		\
+    * intelSpanRenderStart. Since intel_offset_S8 decodes the W tile	\
+    * manually, the region's *real* base address and stride is		\
+    * required.								\
+    */									\
+   struct intel_renderbuffer *irb = intel_renderbuffer(rb);		\
+   uint8_t *buf = irb->region->buffer->virtual;				\
+   unsigned stride = irb->region->pitch;				\
+   unsigned height = 2 * irb->region->height;				\
+   bool flip = rb->Name == 0;						\
+   int y_scale = flip ? -1 : 1;						\
+   int y_bias = flip ? (height - 1) : 0;				\
 
-/* Don't flip y. */
 #undef Y_FLIP
-#define Y_FLIP(y) y
+#define Y_FLIP(y) (y_scale * (y) + y_bias)
 
 /**
  * \brief Get pointer offset into stencil buffer.
  *
- * The stencil buffer interleaves two rows into one. Yay for crazy hardware.
- * The table below demonstrates how the pointer arithmetic behaves for a buffer
- * with positive stride (s=stride).
+ * The stencil buffer is W tiled. Since the GTT is incapable of W fencing, we
+ * must decode the tile's layout in software.
  *
- *     x    | y     | byte offset
- *     --------------------------
- *     0    | 0     | 0
- *     0    | 1     | 1
- *     1    | 0     | 2
- *     1    | 1     | 3
- *     ...  | ...   | ...
- *     0    | 2     | s
- *     0    | 3     | s + 1
- *     1    | 2     | s + 2
- *     1    | 3     | s + 3
+ * See
+ *   - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.2.1 W-Major Tile
+ *     Format.
+ *   - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.3 Tiling Algorithm
  *
- *
+ * Even though the returned offset is always positive, the return type is
+ * signed due to
+ *    commit e8b1c6d6f55f5be3bef25084fdd8b6127517e137
+ *    mesa: Fix return type of  _mesa_get_format_bytes() (#37351)
  */
 static inline intptr_t
-intel_offset_S8(int stride, GLint x, GLint y)
+intel_offset_S8(uint32_t stride, uint32_t x, uint32_t y)
 {
-   return 2 * ((y / 2) * stride + x) + y % 2;
+   uint32_t tile_size = 4096;
+   uint32_t tile_width = 64;
+   uint32_t tile_height = 64;
+   uint32_t row_size = 64 * stride;
+
+   uint32_t tile_x = x / tile_width;
+   uint32_t tile_y = y / tile_height;
+
+   /* The byte's address relative to the tile's base addres. */
+   uint32_t byte_x = x % tile_width;
+   uint32_t byte_y = y % tile_height;
+
+   uintptr_t u = tile_y * row_size
+               + tile_x * tile_size
+               + 512 * (byte_x / 8)
+               +  64 * (byte_y / 8)
+               +  32 * ((byte_y / 4) % 2)
+               +  16 * ((byte_x / 4) % 2)
+               +   8 * ((byte_y / 2) % 2)
+               +   4 * ((byte_x / 2) % 2)
+               +   2 * (byte_y % 2)
+               +   1 * (byte_x % 2);
+
+   /*
+    * Errata for Gen5:
+    *
+    * An additional offset is needed which is not documented in the PRM.
+    *
+    * if ((byte_x / 8) % 2 == 1) {
+    *    if ((byte_y / 8) % 2) == 0) {
+    *       u += 64;
+    *    } else {
+    *       u -= 64;
+    *    }
+    * }
+    *
+    * The offset is expressed more tersely as
+    * u += ((int) x & 0x8) * (8 - (((int) y & 0x8) << 1));
+    */
+
+   return u;
 }
 
 #define WRITE_STENCIL(x, y, src)  buf[intel_offset_S8(stride, x, y)] = src;
diff --git a/src/mesa/drivers/dri/intel/intel_tex.c b/src/mesa/drivers/dri/intel/intel_tex.c
index 21c4a1d..ee0cd25 100644
--- a/src/mesa/drivers/dri/intel/intel_tex.c
+++ b/src/mesa/drivers/dri/intel/intel_tex.c
@@ -95,17 +95,12 @@
       if (!_mesa_is_format_compressed(first_image->TexFormat)) {
          GLuint nr_faces = (texObj->Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
          GLuint face, i;
-         /* Update the level information in our private data in the new images,
-          * since it didn't get set as part of a normal TexImage path.
-          */
          for (face = 0; face < nr_faces; face++) {
             for (i = texObj->BaseLevel + 1; i < texObj->MaxLevel; i++) {
                struct intel_texture_image *intelImage =
                   intel_texture_image(texObj->Image[face][i]);
                if (!intelImage)
                   break;
-               intelImage->level = i;
-               intelImage->face = face;
                /* Unreference the miptree to signal that the new Data is a
                 * bare pointer from mesa.
                 */
diff --git a/src/mesa/drivers/dri/intel/intel_tex_copy.c b/src/mesa/drivers/dri/intel/intel_tex_copy.c
index 1a3643d..600bd12 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_copy.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_copy.c
@@ -118,8 +118,8 @@
 
       /* get dest x/y in destination texture */
       intel_miptree_get_image_offset(intelImage->mt,
-				     intelImage->level,
-				     intelImage->face,
+				     intelImage->base.Level,
+				     intelImage->base.Face,
 				     0,
 				     &image_x, &image_y);
 
@@ -164,101 +164,6 @@
 
 
 static void
-intelCopyTexImage1D(struct gl_context * ctx, GLenum target, GLint level,
-                    GLenum internalFormat,
-                    GLint x, GLint y, GLsizei width, GLint border)
-{
-   struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx);
-   struct gl_texture_object *texObj =
-      _mesa_select_tex_object(ctx, texUnit, target);
-   struct gl_texture_image *texImage =
-      _mesa_select_tex_image(ctx, texObj, target, level);
-   int srcx, srcy, dstx, dsty, height;
-
-   if (border)
-      goto fail;
-
-   /* Setup or redefine the texture object, mipmap tree and texture
-    * image.  Don't populate yet.  
-    */
-   ctx->Driver.TexImage1D(ctx, target, level, internalFormat,
-                          width, border,
-                          GL_RGBA, CHAN_TYPE, NULL,
-                          &ctx->DefaultPacking, texObj, texImage);
-   srcx = x;
-   srcy = y;
-   dstx = 0;
-   dsty = 0;
-   height = 1;
-   if (!_mesa_clip_copytexsubimage(ctx,
-				   &dstx, &dsty,
-				   &srcx, &srcy,
-				   &width, &height))
-      return;
-
-   if (!intel_copy_texsubimage(intel_context(ctx), target,
-                               intel_texture_image(texImage),
-                               internalFormat, 0, 0, x, y, width, height))
-      goto fail;
-
-   return;
-
- fail:
-   fallback_debug("%s - fallback to swrast\n", __FUNCTION__);
-   _mesa_meta_CopyTexImage1D(ctx, target, level, internalFormat, x, y,
-                             width, border);
-}
-
-
-static void
-intelCopyTexImage2D(struct gl_context * ctx, GLenum target, GLint level,
-                    GLenum internalFormat,
-                    GLint x, GLint y, GLsizei width, GLsizei height,
-                    GLint border)
-{
-   struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx);
-   struct gl_texture_object *texObj =
-      _mesa_select_tex_object(ctx, texUnit, target);
-   struct gl_texture_image *texImage =
-      _mesa_select_tex_image(ctx, texObj, target, level);
-   int srcx, srcy, dstx, dsty;
-
-   if (border)
-      goto fail;
-
-   /* Setup or redefine the texture object, mipmap tree and texture
-    * image.  Don't populate yet.
-    */
-   ctx->Driver.TexImage2D(ctx, target, level, internalFormat,
-                          width, height, border,
-                          GL_RGBA, GL_UNSIGNED_BYTE, NULL,
-                          &ctx->DefaultPacking, texObj, texImage);
-
-   srcx = x;
-   srcy = y;
-   dstx = 0;
-   dsty = 0;
-   if (!_mesa_clip_copytexsubimage(ctx,
-				   &dstx, &dsty,
-				   &srcx, &srcy,
-				   &width, &height))
-      return;
-
-   if (!intel_copy_texsubimage(intel_context(ctx), target,
-                               intel_texture_image(texImage),
-                               internalFormat, 0, 0, x, y, width, height))
-      goto fail;
-
-   return;
-
- fail:
-   fallback_debug("%s - fallback to swrast\n", __FUNCTION__);
-   _mesa_meta_CopyTexImage2D(ctx, target, level, internalFormat, x, y,
-                             width, height, border);
-}
-
-
-static void
 intelCopyTexSubImage1D(struct gl_context * ctx, GLenum target, GLint level,
                        GLint xoffset, GLint x, GLint y, GLsizei width)
 {
@@ -312,8 +217,6 @@
 void
 intelInitTextureCopyImageFuncs(struct dd_function_table *functions)
 {
-   functions->CopyTexImage1D = intelCopyTexImage1D;
-   functions->CopyTexImage2D = intelCopyTexImage2D;
    functions->CopyTexSubImage1D = intelCopyTexSubImage1D;
    functions->CopyTexSubImage2D = intelCopyTexSubImage2D;
 }
diff --git a/src/mesa/drivers/dri/intel/intel_tex_image.c b/src/mesa/drivers/dri/intel/intel_tex_image.c
index 1f8b885..4ee6684 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_image.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_image.c
@@ -63,7 +63,7 @@
    if (intelImage->base.Border)
       return NULL;
 
-   if (intelImage->level > intelObj->base.BaseLevel &&
+   if (intelImage->base.Level > intelObj->base.BaseLevel &&
        (intelImage->base.Width == 1 ||
         (intelObj->base.Target != GL_TEXTURE_1D &&
          intelImage->base.Height == 1) ||
@@ -74,19 +74,19 @@
        * likely base level width/height/depth for a full mipmap stack
        * from this info, so just allocate this one level.
        */
-      firstLevel = intelImage->level;
-      lastLevel = intelImage->level;
+      firstLevel = intelImage->base.Level;
+      lastLevel = intelImage->base.Level;
    } else {
       /* If this image disrespects BaseLevel, allocate from level zero.
        * Usually BaseLevel == 0, so it's unlikely to happen.
        */
-      if (intelImage->level < intelObj->base.BaseLevel)
+      if (intelImage->base.Level < intelObj->base.BaseLevel)
 	 firstLevel = 0;
       else
 	 firstLevel = intelObj->base.BaseLevel;
 
       /* Figure out image dimensions at start level. */
-      for (i = intelImage->level; i > firstLevel; i--) {
+      for (i = intelImage->base.Level; i > firstLevel; i--) {
 	 width <<= 1;
 	 if (height != 1)
 	    height <<= 1;
@@ -101,7 +101,7 @@
        */
       if ((intelObj->base.Sampler.MinFilter == GL_NEAREST ||
 	   intelObj->base.Sampler.MinFilter == GL_LINEAR) &&
-	  intelImage->level == firstLevel &&
+	  intelImage->base.Level == firstLevel &&
 	  (intel->gen < 4 || firstLevel == 0)) {
 	 lastLevel = firstLevel;
       } else {
@@ -186,8 +186,8 @@
    else
       src_stride = width;
 
-   intel_miptree_get_image_offset(intelImage->mt, intelImage->level,
-				  intelImage->face, 0,
+   intel_miptree_get_image_offset(intelImage->mt, intelImage->base.Level,
+				  intelImage->base.Face, 0,
 				  &dst_x, &dst_y);
 
    dst_stride = intelImage->mt->region->pitch;
@@ -243,8 +243,8 @@
    else
       src_stride = width;
 
-   intel_miptree_get_image_offset(intelImage->mt, intelImage->level,
-				  intelImage->face, 0,
+   intel_miptree_get_image_offset(intelImage->mt, intelImage->base.Level,
+				  intelImage->base.Face, 0,
 				  &dst_x, &dst_y);
 
    dst_stride = intelImage->mt->region->pitch;
@@ -407,9 +407,6 @@
    DBG("%s target %s level %d %dx%dx%d border %d\n", __FUNCTION__,
        _mesa_lookup_enum_by_nr(target), level, width, height, depth, border);
 
-   intelImage->face = _mesa_tex_target_to_face(target);
-   intelImage->level = level;
-
    if (_mesa_is_format_compressed(texImage->TexFormat)) {
       texelBytes = 0;
    }
@@ -514,8 +511,8 @@
 	 }
          texImage->Data = intel_miptree_image_map(intel,
                                                   intelImage->mt,
-                                                  intelImage->face,
-                                                  intelImage->level,
+                                                  intelImage->base.Face,
+                                                  intelImage->base.Level,
                                                   &dstRowStride,
                                                   intelImage->base.ImageOffsets);
       }
@@ -684,8 +681,8 @@
       intelImage->base.Data =
          intel_miptree_image_map(intel,
                                  intelImage->mt,
-                                 intelImage->face,
-                                 intelImage->level,
+                                 intelImage->base.Face,
+                                 intelImage->base.Level,
                                  &intelImage->base.RowStride,
                                  intelImage->base.ImageOffsets);
       intelImage->base.RowStride /= intelImage->mt->cpp;
@@ -816,8 +813,6 @@
 			      rb->region->width, rb->region->height, 1,
 			      0, internalFormat, texFormat);
 
-   intelImage->face = _mesa_tex_target_to_face(target);
-   intelImage->level = level;
    texImage->RowStride = rb->region->pitch;
    intel_miptree_reference(&intelImage->mt, intelObj->mt);
 
@@ -874,8 +869,6 @@
 			      image->region->width, image->region->height, 1,
 			      0, image->internal_format, image->format);
 
-   intelImage->face = _mesa_tex_target_to_face(target);
-   intelImage->level = 0;
    texImage->RowStride = image->region->pitch;
    intel_miptree_reference(&intelImage->mt, intelObj->mt);
 
diff --git a/src/mesa/drivers/dri/intel/intel_tex_obj.h b/src/mesa/drivers/dri/intel/intel_tex_obj.h
index a9ae2ec..e7a4318 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_obj.h
+++ b/src/mesa/drivers/dri/intel/intel_tex_obj.h
@@ -52,11 +52,6 @@
 {
    struct gl_texture_image base;
 
-   /* These aren't stored in gl_texture_image 
-    */
-   GLuint level;
-   GLuint face;
-
    /* If intelImage->mt != NULL, image data is stored here.
     * Else if intelImage->base.Data != NULL, image is stored there.
     * Else there is no image data.
diff --git a/src/mesa/drivers/dri/intel/intel_tex_subimage.c b/src/mesa/drivers/dri/intel/intel_tex_subimage.c
index 8b43c40..5fd2cc3 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_subimage.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_subimage.c
@@ -113,7 +113,7 @@
 	 dstRowStride = pitch;
 
 	 intel_miptree_get_image_offset(intelImage->mt, level,
-					intelImage->face, 0,
+					intelImage->base.Face, 0,
 					&blit_x, &blit_y);
 	 blit_x += xoffset;
 	 blit_y += yoffset;
@@ -122,8 +122,8 @@
       } else {
 	 texImage->Data = intel_miptree_image_map(intel,
 						  intelImage->mt,
-						  intelImage->face,
-						  intelImage->level,
+						  intelImage->base.Face,
+						  intelImage->base.Level,
 						  &dstRowStride,
 						  texImage->ImageOffsets);
       }
diff --git a/src/mesa/drivers/dri/intel/intel_tex_validate.c b/src/mesa/drivers/dri/intel/intel_tex_validate.c
index 7135a62..31ac689 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_validate.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_validate.c
@@ -42,8 +42,8 @@
        */
       intel_miptree_image_copy(intel,
                                intelObj->mt,
-                               intelImage->face,
-                               intelImage->level, intelImage->mt);
+                               intelImage->base.Face,
+                               intelImage->base.Level, intelImage->mt);
 
       intel_miptree_release(intel, &intelImage->mt);
    }
@@ -54,8 +54,8 @@
        */
       intel_miptree_image_data(intel,
                                intelObj->mt,
-                               intelImage->face,
-                               intelImage->level,
+                               intelImage->base.Face,
+                               intelImage->base.Level,
                                intelImage->base.Data,
                                intelImage->base.RowStride,
                                intelImage->base.RowStride *
@@ -177,8 +177,8 @@
 	 intelImage->base.Data =
 	    intel_miptree_image_map(intel,
 				    intelImage->mt,
-				    intelImage->face,
-				    intelImage->level,
+				    intelImage->base.Face,
+				    intelImage->base.Level,
 				    &intelImage->base.RowStride,
 				    intelImage->base.ImageOffsets);
 	 /* convert stride to texels, not bytes */
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c b/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c
index e60b91f..433590c 100644
--- a/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c
+++ b/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c
@@ -107,7 +107,7 @@
 }
 
 static void
-nouveau_bufferobj_subdata(struct gl_context *ctx, GLenum target, GLintptrARB offset,
+nouveau_bufferobj_subdata(struct gl_context *ctx, GLintptrARB offset,
 			  GLsizeiptrARB size, const GLvoid *data,
 			  struct gl_buffer_object *obj)
 {
@@ -115,7 +115,7 @@
 }
 
 static void
-nouveau_bufferobj_get_subdata(struct gl_context *ctx, GLenum target, GLintptrARB offset,
+nouveau_bufferobj_get_subdata(struct gl_context *ctx, GLintptrARB offset,
 			   GLsizeiptrARB size, GLvoid *data,
 			   struct gl_buffer_object *obj)
 {
@@ -123,23 +123,6 @@
 }
 
 static void *
-nouveau_bufferobj_map(struct gl_context *ctx, GLenum target, GLenum access,
-		   struct gl_buffer_object *obj)
-{
-	unsigned flags = 0;
-
-	if (access == GL_READ_ONLY_ARB ||
-	    access == GL_READ_WRITE_ARB)
-		flags |= GL_MAP_READ_BIT;
-	if (access == GL_WRITE_ONLY_ARB ||
-	    access == GL_READ_WRITE_ARB)
-		flags |= GL_MAP_WRITE_BIT;
-
-	return ctx->Driver.MapBufferRange(ctx, target, 0, obj->Size, flags,
-					  obj);
-}
-
-static void *
 nouveau_bufferobj_map_range(struct gl_context *ctx, GLenum target, GLintptr offset,
 			    GLsizeiptr length, GLbitfield access,
 			    struct gl_buffer_object *obj)
@@ -169,7 +152,7 @@
 }
 
 static GLboolean
-nouveau_bufferobj_unmap(struct gl_context *ctx, GLenum target, struct gl_buffer_object *obj)
+nouveau_bufferobj_unmap(struct gl_context *ctx, struct gl_buffer_object *obj)
 {
 	assert(obj->Pointer);
 
@@ -189,7 +172,6 @@
 	functions->BufferData = nouveau_bufferobj_data;
 	functions->BufferSubData = nouveau_bufferobj_subdata;
 	functions->GetBufferSubData = nouveau_bufferobj_get_subdata;
-	functions->MapBuffer = nouveau_bufferobj_map;
 	functions->MapBufferRange = nouveau_bufferobj_map_range;
 	functions->UnmapBuffer = nouveau_bufferobj_unmap;
 }
diff --git a/src/mesa/drivers/dri/r200/r200_ioctl.c b/src/mesa/drivers/dri/r200/r200_ioctl.c
index 02201cb..44a794d 100644
--- a/src/mesa/drivers/dri/r200/r200_ioctl.c
+++ b/src/mesa/drivers/dri/r200/r200_ioctl.c
@@ -185,7 +185,6 @@
    r200ContextPtr rmesa = R200_CONTEXT(ctx);
    __DRIdrawable *dPriv = radeon_get_drawable(&rmesa->radeon);
    GLuint flags = 0;
-   GLuint color_mask = 0;
    GLuint orig_mask = mask;
 
    if ( R200_DEBUG & RADEON_IOCTL ) {
@@ -206,13 +205,11 @@
 
    if ( mask & BUFFER_BIT_FRONT_LEFT ) {
       flags |= RADEON_FRONT;
-      color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
       mask &= ~BUFFER_BIT_FRONT_LEFT;
    }
 
    if ( mask & BUFFER_BIT_BACK_LEFT ) {
       flags |= RADEON_BACK;
-      color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
       mask &= ~BUFFER_BIT_BACK_LEFT;
    }
 
diff --git a/src/mesa/drivers/dri/r200/r200_tex.c b/src/mesa/drivers/dri/r200/r200_tex.c
index d42e8f1..91e77f9 100644
--- a/src/mesa/drivers/dri/r200/r200_tex.c
+++ b/src/mesa/drivers/dri/r200/r200_tex.c
@@ -527,7 +527,6 @@
    functions->CompressedTexSubImage2D	= radeonCompressedTexSubImage2D;
 
    if (radeon->radeonScreen->kernel_mm) {
-      functions->CopyTexImage2D = radeonCopyTexImage2D;
       functions->CopyTexSubImage2D = radeonCopyTexSubImage2D;
    }
 
diff --git a/src/mesa/drivers/dri/r200/r200_texstate.c b/src/mesa/drivers/dri/r200/r200_texstate.c
index 7adf9ad..8c9bd6d 100644
--- a/src/mesa/drivers/dri/r200/r200_texstate.c
+++ b/src/mesa/drivers/dri/r200/r200_texstate.c
@@ -773,18 +773,12 @@
 	struct radeon_renderbuffer *rb;
 	radeon_texture_image *rImage;
 	radeonContextPtr radeon;
-	r200ContextPtr rmesa;
 	struct radeon_framebuffer *rfb;
 	radeonTexObjPtr t;
 	uint32_t pitch_val;
-	uint32_t internalFormat, format;
 	gl_format texFormat;
 
-	format = GL_UNSIGNED_BYTE;
-	internalFormat = (texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4);
-
 	radeon = pDRICtx->driverPrivate;
-	rmesa = pDRICtx->driverPrivate;
 
 	rfb = dPriv->driverPrivate;
         texUnit = &radeon->glCtx->Texture.Unit[radeon->glCtx->Texture.CurrentUnit];
diff --git a/src/mesa/drivers/dri/r200/r200_vertprog.c b/src/mesa/drivers/dri/r200/r200_vertprog.c
index 63e03b0..cf44d7f 100644
--- a/src/mesa/drivers/dri/r200/r200_vertprog.c
+++ b/src/mesa/drivers/dri/r200/r200_vertprog.c
@@ -126,10 +126,10 @@
       case PROGRAM_NAMED_PARAM:
       //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
       case PROGRAM_CONSTANT:
-	 *fcmd++ = paramList->ParameterValues[pi][0];
-	 *fcmd++ = paramList->ParameterValues[pi][1];
-	 *fcmd++ = paramList->ParameterValues[pi][2];
-	 *fcmd++ = paramList->ParameterValues[pi][3];
+	 *fcmd++ = paramList->ParameterValues[pi][0].f;
+	 *fcmd++ = paramList->ParameterValues[pi][1].f;
+	 *fcmd++ = paramList->ParameterValues[pi][2].f;
+	 *fcmd++ = paramList->ParameterValues[pi][3].f;
 	 break;
       default:
 	 _mesa_problem(NULL, "Bad param type in %s", __FUNCTION__);
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
index b242742..39dcb21 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
@@ -561,28 +561,29 @@
 	struct rc_instruction * inst_add)
 {
 	unsigned dstmask = inst_add->U.I.DstReg.WriteMask;
-	struct rc_src_register * src1 = NULL;
-	unsigned int i;
-
-	if (!is_presub_candidate(c, inst_add))
-		return 0;
+        unsigned src0_neg = inst_add->U.I.SrcReg[0].Negate & dstmask;
+        unsigned src1_neg = inst_add->U.I.SrcReg[1].Negate & dstmask;
 
 	if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle)
 		return 0;
 
-	/* XXX This isn't fully implemented, is it? */
-	/*   src0 and src1 can't have absolute values only one can be negative and they must be all negative or all positive. */
-	for (i = 0; i < 2; i++) {
-		if (inst_add->U.I.SrcReg[i].Abs)
-			return 0;
+	/* src0 and src1 can't have absolute values */
+	if (inst_add->U.I.SrcReg[0].Abs || inst_add->U.I.SrcReg[1].Abs)
+	        return 0;
 
-		/* XXX This looks weird, but it's basically what was here before this commit (see git blame): */
-		if ((inst_add->U.I.SrcReg[i].Negate & dstmask) != dstmask && !src1) {
-			src1 = &inst_add->U.I.SrcReg[i];
-		}
-	}
+	/* presub_replace_add() assumes only one is negative */
+	if (inst_add->U.I.SrcReg[0].Negate && inst_add->U.I.SrcReg[1].Negate)
+	        return 0;
 
-	if (!src1)
+        /* if src0 is negative, at least all bits of dstmask have to be set */
+        if (inst_add->U.I.SrcReg[0].Negate && src0_neg != dstmask)
+	        return 0;
+
+        /* if src1 is negative, at least all bits of dstmask have to be set */
+        if (inst_add->U.I.SrcReg[1].Negate && src1_neg != dstmask)
+	        return 0;
+
+	if (!is_presub_candidate(c, inst_add))
 		return 0;
 
 	if (presub_helper(c, inst_add, RC_PRESUB_ADD, presub_replace_add)) {
@@ -615,7 +616,7 @@
  * of the add instruction must have the constatnt 1 swizzle.  This function
  * does not check const registers to see if their value is 1.0, so it should
  * be called after the constant_folding optimization.
- * @return 
+ * @return
  * 	0 if the ADD instruction is still part of the program.
  * 	1 if the ADD instruction is no longer part of the program.
  */
diff --git a/src/mesa/drivers/dri/r300/r300_draw.c b/src/mesa/drivers/dri/r300/r300_draw.c
index 0c4d853..5587c16 100644
--- a/src/mesa/drivers/dri/r300/r300_draw.c
+++ b/src/mesa/drivers/dri/r300/r300_draw.c
@@ -84,7 +84,8 @@
 	GLboolean mapped_named_bo = GL_FALSE;
 
 	if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) {
-		ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
+		ctx->Driver.MapBufferRange(ctx, 0, mesa_ind_buf->obj->Size,
+					   GL_MAP_READ_BIT, mesa_ind_buf->obj);
 		mapped_named_bo = GL_TRUE;
 		assert(mesa_ind_buf->obj->Pointer != NULL);
 	}
@@ -138,7 +139,7 @@
 	r300->ind_buf.count = mesa_ind_buf->count;
 
 	if (mapped_named_bo) {
-		ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj);
+		ctx->Driver.UnmapBuffer(ctx, mesa_ind_buf->obj);
 	}
 }
 
@@ -163,7 +164,10 @@
 		GLboolean mapped_named_bo = GL_FALSE;
 
 		if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) {
-			ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
+			ctx->Driver.MapBufferRange(ctx, 0,
+						   mesa_ind_buf->obj->Size,
+						   GL_MAP_READ_BIT,
+						   mesa_ind_buf->obj);
 			assert(mesa_ind_buf->obj->Pointer != NULL);
 			mapped_named_bo = GL_TRUE;
 		}
@@ -184,7 +188,7 @@
 		r300->ind_buf.count = mesa_ind_buf->count;
 
 		if (mapped_named_bo) {
-			ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj);
+			ctx->Driver.UnmapBuffer(ctx, mesa_ind_buf->obj);
 		}
 	} else {
 		r300FixupIndexBuffer(ctx, mesa_ind_buf);
@@ -235,7 +239,8 @@
 
 	if (input->BufferObj->Name) {
 		if (!input->BufferObj->Pointer) {
-			ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj);
+			ctx->Driver.MapBufferRange(ctx, 0, input->BufferObj->Size,
+					      GL_MAP_READ_BIT, input->BufferObj);
 			mapped_named_bo = GL_TRUE;
 		}
 
@@ -286,7 +291,7 @@
 
 	radeon_bo_unmap(attr->bo);
 	if (mapped_named_bo) {
-		ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj);
+		ctx->Driver.UnmapBuffer(ctx, input->BufferObj);
 	}
 }
 
@@ -302,7 +307,8 @@
 	radeon_bo_map(attr->bo, 1);
 
 	if (!input->BufferObj->Pointer) {
-		ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj);
+		ctx->Driver.MapBufferRange(ctx, 0, input->BufferObj->Size,
+					   GL_MAP_READ_BIT, input->BufferObj);
 		mapped_named_bo = GL_TRUE;
 	}
 
@@ -321,7 +327,7 @@
 	}
 
 	if (mapped_named_bo) {
-		ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj);
+		ctx->Driver.UnmapBuffer(ctx, input->BufferObj);
 	}
 
 	radeon_bo_unmap(attr->bo);
diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c
index 590d9af..93d8fe1 100644
--- a/src/mesa/drivers/dri/r300/r300_tex.c
+++ b/src/mesa/drivers/dri/r300/r300_tex.c
@@ -379,7 +379,6 @@
 	functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D;
 
 	if (radeon->radeonScreen->kernel_mm) {
-		functions->CopyTexImage2D = radeonCopyTexImage2D;
 		functions->CopyTexSubImage2D = radeonCopyTexSubImage2D;
 	}
 
diff --git a/src/mesa/drivers/dri/r300/r300_texstate.c b/src/mesa/drivers/dri/r300/r300_texstate.c
index e24ad6f..e4388a0 100644
--- a/src/mesa/drivers/dri/r300/r300_texstate.c
+++ b/src/mesa/drivers/dri/r300/r300_texstate.c
@@ -427,13 +427,8 @@
 	struct radeon_framebuffer *rfb;
 	radeonTexObjPtr t;
 	uint32_t pitch_val;
-	uint32_t internalFormat, type, format;
 	gl_format texFormat;
 
-	type = GL_BGRA;
-	format = GL_UNSIGNED_BYTE;
-	internalFormat = (texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4);
-
 	radeon = pDRICtx->driverPrivate;
 	rmesa = pDRICtx->driverPrivate;
 
diff --git a/src/mesa/drivers/dri/r600/evergreen_fragprog.c b/src/mesa/drivers/dri/r600/evergreen_fragprog.c
index e527c37..cc584ca2 100644
--- a/src/mesa/drivers/dri/r600/evergreen_fragprog.c
+++ b/src/mesa/drivers/dri/r600/evergreen_fragprog.c
@@ -752,10 +752,10 @@
 	    unNumParamData = paramList->NumParameters;
 
 	    for(ui=0; ui<unNumParamData; ui++) {
-		        evergreen->ps.consts[ui][0].f32All = paramList->ParameterValues[ui][0];
-		        evergreen->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1];
-		        evergreen->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2];
-		        evergreen->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3];
+		        evergreen->ps.consts[ui][0].f32All = paramList->ParameterValues[ui][0].f;
+		        evergreen->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1].f;
+		        evergreen->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2].f;
+		        evergreen->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3].f;
 	    }
 
 	    /* alloc multiple of 16 constants */
diff --git a/src/mesa/drivers/dri/r600/evergreen_render.c b/src/mesa/drivers/dri/r600/evergreen_render.c
index 4507be2..74563ca 100644
--- a/src/mesa/drivers/dri/r600/evergreen_render.c
+++ b/src/mesa/drivers/dri/r600/evergreen_render.c
@@ -403,7 +403,8 @@
     {
         if (!input->BufferObj->Pointer) 
         {
-            ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj);
+	    ctx->Driver.MapBufferRange(ctx, 0, input->BufferObj->Size,
+				       GL_MAP_READ_BIT, input->BufferObj);
             mapped_named_bo = GL_TRUE;
         }
 
@@ -456,7 +457,7 @@
 
     if (mapped_named_bo) 
     {
-        ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj);
+        ctx->Driver.UnmapBuffer(ctx, input->BufferObj);
     }
 }
 
@@ -470,7 +471,8 @@
 
     if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer)
     {
-        ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
+        ctx->Driver.MapBufferRange(ctx, 0, mesa_ind_buf->obj->Size,
+				   GL_MAP_READ_BIT, mesa_ind_buf->obj);
         mapped_named_bo = GL_TRUE;
         assert(mesa_ind_buf->obj->Pointer != NULL);
     }
@@ -531,7 +533,7 @@
 
     if (mapped_named_bo)
     {
-        ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj);
+        ctx->Driver.UnmapBuffer(ctx, mesa_ind_buf->obj);
     }
 }
 
@@ -606,7 +608,8 @@
 
         if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer)
         {
-	        ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
+	        ctx->Driver.MapBufferRange(ctx, 0, mesa_ind_buf->obj->Size,
+					   GL_MAP_READ_BIT, mesa_ind_buf->obj);
 	        assert(mesa_ind_buf->obj->Pointer != NULL);
 	        mapped_named_bo = GL_TRUE;
         }
@@ -629,7 +632,7 @@
 
         if (mapped_named_bo)
         {
-	        ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj);
+	        ctx->Driver.UnmapBuffer(ctx, mesa_ind_buf->obj);
         }
     }
     else
@@ -655,7 +658,8 @@
 
     if (!input->BufferObj->Pointer) 
     {
-        ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj);
+	ctx->Driver.MapBufferRange(ctx, 0, input->BufferObj->Size,
+				   GL_MAP_READ_BIT, input->BufferObj->obj);
         mapped_named_bo = GL_TRUE;
     }
 
@@ -675,7 +679,7 @@
     radeon_bo_unmap(attr->bo);
     if (mapped_named_bo) 
     {
-        ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj);
+        ctx->Driver.UnmapBuffer(ctx, input->BufferObj);
     }
 
     attr->stride = dst_stride;
diff --git a/src/mesa/drivers/dri/r600/evergreen_tex.c b/src/mesa/drivers/dri/r600/evergreen_tex.c
index 33a5f27..d240a21 100644
--- a/src/mesa/drivers/dri/r600/evergreen_tex.c
+++ b/src/mesa/drivers/dri/r600/evergreen_tex.c
@@ -1288,19 +1288,12 @@
 	struct radeon_renderbuffer *rb;
 	radeon_texture_image *rImage;
 	radeonContextPtr radeon;
-	context_t *rmesa;
 	struct radeon_framebuffer *rfb;
 	radeonTexObjPtr t;
 	uint32_t pitch_val;
-	uint32_t internalFormat, type, format;
 	gl_format texFormat;
 
-	type = GL_BGRA;
-	format = GL_UNSIGNED_BYTE;
-	internalFormat = (glx_texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4);
-
 	radeon = pDRICtx->driverPrivate;
-	rmesa = pDRICtx->driverPrivate;
 
 	rfb = dPriv->driverPrivate;
         texUnit = &radeon->glCtx->Texture.Unit[radeon->glCtx->Texture.CurrentUnit];
@@ -1688,7 +1681,6 @@
 	functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D;
 
 	if (radeon->radeonScreen->kernel_mm) {
-		functions->CopyTexImage2D = radeonCopyTexImage2D;
 		functions->CopyTexSubImage2D = radeonCopyTexSubImage2D;
 	}
 
diff --git a/src/mesa/drivers/dri/r600/evergreen_vertprog.c b/src/mesa/drivers/dri/r600/evergreen_vertprog.c
index 018869b..117916a 100644
--- a/src/mesa/drivers/dri/r600/evergreen_vertprog.c
+++ b/src/mesa/drivers/dri/r600/evergreen_vertprog.c
@@ -684,17 +684,17 @@
 	    for(ui=0; ui<unNumParamData; ui++) {
             if(paramList->Parameters[ui].Type == PROGRAM_UNIFORM) 
             {
-                evergreen->vs.consts[ui][0].f32All = paramListOrginal->ParameterValues[ui][0];
-		        evergreen->vs.consts[ui][1].f32All = paramListOrginal->ParameterValues[ui][1];
-		        evergreen->vs.consts[ui][2].f32All = paramListOrginal->ParameterValues[ui][2];
-		        evergreen->vs.consts[ui][3].f32All = paramListOrginal->ParameterValues[ui][3];
+                evergreen->vs.consts[ui][0].f32All = paramListOrginal->ParameterValues[ui][0].f;
+		        evergreen->vs.consts[ui][1].f32All = paramListOrginal->ParameterValues[ui][1].f;
+		        evergreen->vs.consts[ui][2].f32All = paramListOrginal->ParameterValues[ui][2].f;
+		        evergreen->vs.consts[ui][3].f32All = paramListOrginal->ParameterValues[ui][3].f;
             }
             else
             {
-		        evergreen->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0];
-		        evergreen->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1];
-		        evergreen->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2];
-		        evergreen->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3];
+		        evergreen->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0].f;
+		        evergreen->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1].f;
+		        evergreen->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2].f;
+		        evergreen->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3].f;
             }
 	    }
 
diff --git a/src/mesa/drivers/dri/r600/r600_cmdbuf.c b/src/mesa/drivers/dri/r600/r600_cmdbuf.c
index ce2f777..74f048b 100644
--- a/src/mesa/drivers/dri/r600/r600_cmdbuf.c
+++ b/src/mesa/drivers/dri/r600/r600_cmdbuf.c
@@ -259,13 +259,11 @@
                                   uint32_t * reloc_chunk,
                                   uint32_t * length_dw_reloc_chunk) 
 {
-    struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)csi->csm;
     struct r600_cs_reloc_legacy *relocs;
     int i, j, r;
 
     uint32_t offset_dw = 0;
 
-    csm = (struct r600_cs_manager_legacy*)csi->csm;
     relocs = (struct r600_cs_reloc_legacy *)csi->relocs;
 restart:
     for (i = 0; i < csi->crelocs; i++) {
diff --git a/src/mesa/drivers/dri/r600/r600_tex.c b/src/mesa/drivers/dri/r600/r600_tex.c
index eb7ed30..3efa1d1 100644
--- a/src/mesa/drivers/dri/r600/r600_tex.c
+++ b/src/mesa/drivers/dri/r600/r600_tex.c
@@ -470,7 +470,6 @@
 	functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D;
 
 	if (radeon->radeonScreen->kernel_mm) {
-		functions->CopyTexImage2D = radeonCopyTexImage2D;
 		functions->CopyTexSubImage2D = radeonCopyTexSubImage2D;
 	}
 
diff --git a/src/mesa/drivers/dri/r600/r600_texstate.c b/src/mesa/drivers/dri/r600/r600_texstate.c
index 949db29..65fae71 100644
--- a/src/mesa/drivers/dri/r600/r600_texstate.c
+++ b/src/mesa/drivers/dri/r600/r600_texstate.c
@@ -1141,13 +1141,8 @@
 	struct radeon_framebuffer *rfb;
 	radeonTexObjPtr t;
 	uint32_t pitch_val;
-	uint32_t internalFormat, type, format;
         gl_format texFormat;
 
-	type = GL_BGRA;
-	format = GL_UNSIGNED_BYTE;
-	internalFormat = (glx_texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4);
-
 	radeon = pDRICtx->driverPrivate;
 	rmesa = pDRICtx->driverPrivate;
 
diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.c b/src/mesa/drivers/dri/r600/r700_fragprog.c
index 40494cd..6f9834e 100644
--- a/src/mesa/drivers/dri/r600/r700_fragprog.c
+++ b/src/mesa/drivers/dri/r600/r700_fragprog.c
@@ -778,10 +778,10 @@
 	    unNumParamData = paramList->NumParameters;
 
 	    for(ui=0; ui<unNumParamData; ui++) {
-		        r700->ps.consts[ui][0].f32All = paramList->ParameterValues[ui][0];
-		        r700->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1];
-		        r700->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2];
-		        r700->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3];
+		        r700->ps.consts[ui][0].f32All = paramList->ParameterValues[ui][0].f;
+		        r700->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1].f;
+		        r700->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2].f;
+		        r700->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3].f;
 	    }
 
         /* Load fp constants to gpu */
diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c
index 0f7a7a4..a565c9f 100644
--- a/src/mesa/drivers/dri/r600/r700_render.c
+++ b/src/mesa/drivers/dri/r600/r700_render.c
@@ -490,7 +490,8 @@
     {
         if (!input->BufferObj->Pointer) 
         {
-            ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj);
+	   ctx->Driver.MapBufferRange(ctx, 0, input->BufferObj->Size,
+				      GL_MAP_READ_BIT, input->BufferObj);
             mapped_named_bo = GL_TRUE;
         }
 
@@ -543,7 +544,7 @@
 
     if (mapped_named_bo) 
     {
-        ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj);
+        ctx->Driver.UnmapBuffer(ctx, input->BufferObj);
     }
 }
 
@@ -564,7 +565,8 @@
 
     if (!input->BufferObj->Pointer) 
     {
-        ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj);
+        ctx->Driver.MapBufferRange(ctx, 0, input->BufferObj->Size,
+				   GL_MAP_READ_BIT, input->BufferObj);
         mapped_named_bo = GL_TRUE;
     }
 
@@ -584,7 +586,7 @@
     radeon_bo_unmap(attr->bo);
     if (mapped_named_bo) 
     {
-        ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj);
+        ctx->Driver.UnmapBuffer(ctx, input->BufferObj);
     }
 
     attr->stride = dst_stride;
@@ -727,7 +729,8 @@
 
     if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer)
     {
-        ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
+	ctx->Driver.MapBufferRange(ctx, 0, mesa_ind_buf->obj->Size,
+				   GL_MAP_READ_BIT, mesa_ind_buf->obj);
         mapped_named_bo = GL_TRUE;
         assert(mesa_ind_buf->obj->Pointer != NULL);
     }
@@ -788,7 +791,7 @@
 
     if (mapped_named_bo)
     {
-        ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj);
+        ctx->Driver.UnmapBuffer(ctx, mesa_ind_buf->obj);
     }
 }
 
@@ -813,7 +816,8 @@
 
         if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer)
         {
-	        ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
+		ctx->Driver.MapBufferRange(ctx, 0, mesa_ind_buf->obj->Size,
+					   GL_MAP_READ_BIT, mesa_ind_buf->obj);
 	        assert(mesa_ind_buf->obj->Pointer != NULL);
 	        mapped_named_bo = GL_TRUE;
         }
@@ -836,7 +840,7 @@
 
         if (mapped_named_bo)
         {
-	        ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj);
+	        ctx->Driver.UnmapBuffer(ctx, mesa_ind_buf->obj);
         }
     }
     else
diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c
index 7d4be91..b1e2742 100644
--- a/src/mesa/drivers/dri/r600/r700_vertprog.c
+++ b/src/mesa/drivers/dri/r600/r700_vertprog.c
@@ -720,17 +720,17 @@
 	    for(ui=0; ui<unNumParamData; ui++) {
             if(paramList->Parameters[ui].Type == PROGRAM_UNIFORM) 
             {
-                r700->vs.consts[ui][0].f32All = paramListOrginal->ParameterValues[ui][0];
-		        r700->vs.consts[ui][1].f32All = paramListOrginal->ParameterValues[ui][1];
-		        r700->vs.consts[ui][2].f32All = paramListOrginal->ParameterValues[ui][2];
-		        r700->vs.consts[ui][3].f32All = paramListOrginal->ParameterValues[ui][3];
+              r700->vs.consts[ui][0].f32All = paramListOrginal->ParameterValues[ui][0].f;
+		        r700->vs.consts[ui][1].f32All = paramListOrginal->ParameterValues[ui][1].f;
+		        r700->vs.consts[ui][2].f32All = paramListOrginal->ParameterValues[ui][2].f;
+		        r700->vs.consts[ui][3].f32All = paramListOrginal->ParameterValues[ui][3].f;
             }
             else
             {
-		        r700->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0];
-		        r700->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1];
-		        r700->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2];
-		        r700->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3];
+		        r700->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0].f;
+		        r700->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1].f;
+		        r700->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2].f;
+		        r700->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3].f;
             }
 	    }
 
diff --git a/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h b/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h
index 607b747..a74c6c7 100644
--- a/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h
+++ b/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h
@@ -78,6 +78,9 @@
 
 static inline void *radeon_bo_manager_gem_ctor(int fd)
 {
+  fprintf(stderr, "[%s:%u] Mesa built without Radeon libdrm support.\n",
+	  __func__, __LINE__);
+
   return NULL;
 }
 
@@ -87,6 +90,9 @@
 
 static inline void *radeon_cs_manager_gem_ctor(int fd)
 {
+  fprintf(stderr, "[%s:%u] Mesa built without Radeon libdrm support.\n",
+	  __func__, __LINE__);
+
   return NULL;
 }
 
diff --git a/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c
index 0d1af72..7b59c03 100644
--- a/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c
+++ b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c
@@ -130,7 +130,6 @@
  */
 static void
 radeonBufferSubData(struct gl_context * ctx,
-                    GLenum target,
                     GLintptrARB offset,
                     GLsizeiptrARB size,
                     const GLvoid * data,
@@ -155,7 +154,6 @@
  */
 static void
 radeonGetBufferSubData(struct gl_context * ctx,
-                       GLenum target,
                        GLintptrARB offset,
                        GLsizeiptrARB size,
                        GLvoid * data,
@@ -171,17 +169,18 @@
 }
 
 /**
- * Called via glMapBufferARB()
+ * Called via glMapBuffer() and glMapBufferRange()
  */
 static void *
-radeonMapBuffer(struct gl_context * ctx,
-                GLenum target,
-                GLenum access,
-                struct gl_buffer_object *obj)
+radeonMapBufferRange(struct gl_context * ctx,
+		     GLintptr offset, GLsizeiptr length,
+		     GLbitfield access, struct gl_buffer_object *obj)
 {
     struct radeon_buffer_object *radeon_obj = get_radeon_buffer_object(obj);
+    const GLboolean write_only =
+       (access & (GL_MAP_READ_BIT | GL_MAP_WRITE_BIT)) == GL_MAP_WRITE_BIT;
 
-    if (access == GL_WRITE_ONLY_ARB) {
+    if (write_only) {
         ctx->Driver.Flush(ctx);
     }
 
@@ -190,12 +189,13 @@
         return NULL;
     }
 
-    radeon_bo_map(radeon_obj->bo, access == GL_WRITE_ONLY_ARB);
+    obj->Offset = offset;
+    obj->Length = length;
+    obj->AccessFlags = access;
 
-    obj->Pointer = radeon_obj->bo->ptr;
-    obj->Length = obj->Size;
-    obj->Offset = 0;
+    radeon_bo_map(radeon_obj->bo, write_only);
 
+    obj->Pointer = radeon_obj->bo->ptr + offset;
     return obj->Pointer;
 }
 
@@ -205,7 +205,6 @@
  */
 static GLboolean
 radeonUnmapBuffer(struct gl_context * ctx,
-                  GLenum target,
                   struct gl_buffer_object *obj)
 {
     struct radeon_buffer_object *radeon_obj = get_radeon_buffer_object(obj);
@@ -229,6 +228,6 @@
     functions->BufferData = radeonBufferData;
     functions->BufferSubData = radeonBufferSubData;
     functions->GetBufferSubData = radeonGetBufferSubData;
-    functions->MapBuffer = radeonMapBuffer;
+    functions->MapBufferRange = radeonMapBufferRange;
     functions->UnmapBuffer = radeonUnmapBuffer;
 }
diff --git a/src/mesa/drivers/dri/radeon/radeon_common.c b/src/mesa/drivers/dri/radeon/radeon_common.c
index bfc307c..e7a6623 100644
--- a/src/mesa/drivers/dri/radeon/radeon_common.c
+++ b/src/mesa/drivers/dri/radeon/radeon_common.c
@@ -436,7 +436,6 @@
 		       const drm_clip_rect_t	  *rect)
 {
 	radeonContextPtr rmesa;
-	struct radeon_framebuffer *rfb;
 	GLint nbox, i, ret;
 
 	assert(dPriv);
@@ -447,8 +446,6 @@
 
 	LOCK_HARDWARE(rmesa);
 
-	rfb = dPriv->driverPrivate;
-
 	if ( RADEON_DEBUG & RADEON_IOCTL ) {
 		fprintf( stderr, "\n%s( %p )\n\n", __FUNCTION__, (void *) rmesa->glCtx );
 	}
@@ -527,8 +524,6 @@
 {
 	radeonContextPtr radeon;
 	GLint ret;
-	__DRIscreen *psp;
-	struct radeon_renderbuffer *rrb;
 	struct radeon_framebuffer *rfb;
 
 	assert(dPriv);
@@ -537,9 +532,6 @@
 
 	radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
 	rfb = dPriv->driverPrivate;
-	rrb = (void *)rfb->base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
-
-	psp = dPriv->driScreenPriv;
 
 	LOCK_HARDWARE(radeon);
 
diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c
index bf8925f..c08b794 100644
--- a/src/mesa/drivers/dri/radeon/radeon_common_context.c
+++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c
@@ -515,7 +515,6 @@
     __DRIcontext *driContext = radeon->dri.context;
     __DRIdrawable *drawable;
     __DRIscreen *screen;
-    struct radeon_framebuffer *draw;
 
     screen = driContext->driScreenPriv;
     if (!screen->dri2.loader)
@@ -527,7 +526,6 @@
 	    radeon_update_renderbuffers(driContext, drawable, GL_FALSE);
 
 	/* Intel driver does the equivalent of this, no clue if it is needed:*/
-	draw = drawable->driverPrivate;
 	radeon_draw_buffer(radeon->glCtx, radeon->glCtx->DrawBuffer);
 
 	driContext->dri2.draw_stamp = drawable->dri2.stamp;
diff --git a/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c
index c2722a4..5595b70 100644
--- a/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c
+++ b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c
@@ -218,11 +218,9 @@
 
 static int cs_process_relocs(struct radeon_cs_int *cs)
 {
-    struct cs_manager_legacy *csm = (struct cs_manager_legacy*)cs->csm;
     struct cs_reloc_legacy *relocs;
     int i, j, r;
 
-    csm = (struct cs_manager_legacy*)cs->csm;
     relocs = (struct cs_reloc_legacy *)cs->relocs;
 restart:
     for (i = 0; i < cs->crelocs; i++) 
diff --git a/src/mesa/drivers/dri/radeon/radeon_ioctl.c b/src/mesa/drivers/dri/radeon/radeon_ioctl.c
index a91d872..c23e9c2 100644
--- a/src/mesa/drivers/dri/radeon/radeon_ioctl.c
+++ b/src/mesa/drivers/dri/radeon/radeon_ioctl.c
@@ -560,7 +560,6 @@
    r100ContextPtr rmesa = R100_CONTEXT(ctx);
    __DRIdrawable *dPriv = radeon_get_drawable(&rmesa->radeon);
    GLuint flags = 0;
-   GLuint color_mask = 0;
    GLuint orig_mask = mask;
 
    if (mask & (BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_FRONT_RIGHT)) {
@@ -582,13 +581,11 @@
 
    if ( mask & BUFFER_BIT_FRONT_LEFT ) {
       flags |= RADEON_FRONT;
-      color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
       mask &= ~BUFFER_BIT_FRONT_LEFT;
    }
 
    if ( mask & BUFFER_BIT_BACK_LEFT ) {
       flags |= RADEON_BACK;
-      color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
       mask &= ~BUFFER_BIT_BACK_LEFT;
    }
 
diff --git a/src/mesa/drivers/dri/radeon/radeon_lock.c b/src/mesa/drivers/dri/radeon/radeon_lock.c
index 7b6bd36..ae8a212 100644
--- a/src/mesa/drivers/dri/radeon/radeon_lock.c
+++ b/src/mesa/drivers/dri/radeon/radeon_lock.c
@@ -114,16 +114,6 @@
 		)
 {
 	char ret = 0;
-	struct radeon_framebuffer *rfb = NULL;
-	struct radeon_renderbuffer *rrb = NULL;
-
-	if (radeon_get_drawable(radeon)) {
-		rfb = radeon_get_drawable(radeon)->driverPrivate;
-
-		if (rfb)
-			rrb = radeon_get_renderbuffer(&rfb->base,
-						      rfb->base._ColorDrawBufferIndexes[0]);
-	}
 
 	if (!radeon->radeonScreen->driScreen->dri2.enabled) {
 		if (ATOMIC_INC_AND_FETCH(radeon->dri.hwLockCount) > 1)
diff --git a/src/mesa/drivers/dri/radeon/radeon_tex.c b/src/mesa/drivers/dri/radeon/radeon_tex.c
index 25a8ddf..a0b5506 100644
--- a/src/mesa/drivers/dri/radeon/radeon_tex.c
+++ b/src/mesa/drivers/dri/radeon/radeon_tex.c
@@ -455,7 +455,6 @@
    functions->CompressedTexSubImage2D	= radeonCompressedTexSubImage2D;
 
    if (radeon->radeonScreen->kernel_mm) {
-      functions->CopyTexImage2D = radeonCopyTexImage2D;
       functions->CopyTexSubImage2D = radeonCopyTexSubImage2D;
    }
 
diff --git a/src/mesa/drivers/dri/radeon/radeon_tex_copy.c b/src/mesa/drivers/dri/radeon/radeon_tex_copy.c
index f14dfa2..94ff3c4 100644
--- a/src/mesa/drivers/dri/radeon/radeon_tex_copy.c
+++ b/src/mesa/drivers/dri/radeon/radeon_tex_copy.c
@@ -141,61 +141,6 @@
 }
 
 void
-radeonCopyTexImage2D(struct gl_context *ctx, GLenum target, GLint level,
-                     GLenum internalFormat,
-                     GLint x, GLint y, GLsizei width, GLsizei height,
-                     GLint border)
-{
-    struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx);
-    struct gl_texture_object *texObj =
-        _mesa_select_tex_object(ctx, texUnit, target);
-    struct gl_texture_image *texImage =
-        _mesa_select_tex_image(ctx, texObj, target, level);
-    int srcx, srcy, dstx, dsty;
-
-    radeonContextPtr radeon = RADEON_CONTEXT(ctx);
-    radeon_prepare_render(radeon);
-
-    if (border)
-        goto fail;
-
-    /* Setup or redefine the texture object, mipmap tree and texture
-     * image.  Don't populate yet.
-     */
-    ctx->Driver.TexImage2D(ctx, target, level, internalFormat,
-                           width, height, border,
-                           GL_RGBA, GL_UNSIGNED_BYTE, NULL,
-                           &ctx->DefaultPacking, texObj, texImage);
-
-    srcx = x;
-    srcy = y;
-    dstx = 0;
-    dsty = 0;
-    if (!_mesa_clip_copytexsubimage(ctx,
-                                    &dstx, &dsty,
-                                    &srcx, &srcy,
-                                    &width, &height)) {
-        return;
-    }
-
-    if (!do_copy_texsubimage(ctx, target, level,
-                             radeon_tex_obj(texObj), (radeon_texture_image *)texImage,
-                             0, 0, x, y, width, height)) {
-        goto fail;
-    }
-
-    return;
-
-fail:
-    radeon_print(RADEON_FALLBACKS, RADEON_NORMAL,
-                 "Falling back to sw for glCopyTexImage2D (internalFormat %s, border %d)\n",
-                 _mesa_lookup_enum_by_nr(internalFormat), border);
-
-    _mesa_meta_CopyTexImage2D(ctx, target, level, internalFormat, x, y,
-                              width, height, border);
-}
-
-void
 radeonCopyTexSubImage2D(struct gl_context *ctx, GLenum target, GLint level,
                         GLint xoffset, GLint yoffset,
                         GLint x, GLint y,
diff --git a/src/mesa/drivers/dri/radeon/radeon_texstate.c b/src/mesa/drivers/dri/radeon/radeon_texstate.c
index 9ba98e3..4303093 100644
--- a/src/mesa/drivers/dri/radeon/radeon_texstate.c
+++ b/src/mesa/drivers/dri/radeon/radeon_texstate.c
@@ -648,18 +648,12 @@
 	struct radeon_renderbuffer *rb;
 	radeon_texture_image *rImage;
 	radeonContextPtr radeon;
-	r100ContextPtr rmesa;
 	struct radeon_framebuffer *rfb;
 	radeonTexObjPtr t;
 	uint32_t pitch_val;
-	uint32_t internalFormat, format;
 	gl_format texFormat;
 
-	format = GL_UNSIGNED_BYTE;
-	internalFormat = (texture_format == __DRI_TEXTURE_FORMAT_RGB ? GL_RGB : GL_RGBA);
-
 	radeon = pDRICtx->driverPrivate;
-	rmesa = pDRICtx->driverPrivate;
 
 	rfb = dPriv->driverPrivate;
         texUnit = _mesa_get_current_tex_unit(radeon->glCtx);
@@ -1018,7 +1012,7 @@
 static GLboolean setup_hardware_state(r100ContextPtr rmesa, radeonTexObj *t, int unit)
 {
    const struct gl_texture_image *firstImage;
-   GLint log2Width, log2Height, log2Depth, texelBytes;
+   GLint log2Width, log2Height, texelBytes;
 
    if ( t->bo ) {
 	return GL_TRUE;
@@ -1033,7 +1027,6 @@
 
    log2Width  = firstImage->WidthLog2;
    log2Height = firstImage->HeightLog2;
-   log2Depth  = firstImage->DepthLog2;
    texelBytes = _mesa_get_format_bytes(firstImage->TexFormat);
 
    if (!t->image_override) {
diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.c b/src/mesa/drivers/dri/radeon/radeon_texture.c
index ce0df32..ad7e4c1 100644
--- a/src/mesa/drivers/dri/radeon/radeon_texture.c
+++ b/src/mesa/drivers/dri/radeon/radeon_texture.c
@@ -787,18 +787,6 @@
 	radeon_print(RADEON_TEXTURE, RADEON_NORMAL,
 			"%s %dd: texObj %p, texImage %p, face %d, level %d\n",
 			__func__, dims, texObj, texImage, face, level);
-	{
-		struct radeon_bo *bo;
-		bo = !image->mt ? image->bo : image->mt->bo;
-		if (bo && radeon_bo_is_referenced_by_cs(bo, rmesa->cmdbuf.cs)) {
-			radeon_print(RADEON_TEXTURE, RADEON_VERBOSE,
-				"%s Calling teximage for texture that is "
-				"queued for GPU processing.\n",
-				__func__);
-			radeon_firevertices(rmesa);
-		}
-	}
-
 
 	t->validated = GL_FALSE;
 
@@ -820,6 +808,18 @@
 		}
 	}
 
+	{
+		struct radeon_bo *bo;
+		bo = !image->mt ? image->bo : image->mt->bo;
+		if (bo && radeon_bo_is_referenced_by_cs(bo, rmesa->cmdbuf.cs)) {
+			radeon_print(RADEON_TEXTURE, RADEON_VERBOSE,
+				"%s Calling teximage for texture that is "
+				"queued for GPU processing.\n",
+				__func__);
+			radeon_firevertices(rmesa);
+		}
+	}
+
 	/* Upload texture image; note that the spec allows pixels to be NULL */
 	if (compressed) {
 		pixels = _mesa_validate_pbo_compressed_teximage(
diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.h b/src/mesa/drivers/dri/radeon/radeon_texture.h
index 538a07f..6fc06d9 100644
--- a/src/mesa/drivers/dri/radeon/radeon_texture.h
+++ b/src/mesa/drivers/dri/radeon/radeon_texture.h
@@ -126,11 +126,6 @@
 				 struct gl_texture_object *texObj,
 				 struct gl_texture_image *texImage);
 
-void radeonCopyTexImage2D(struct gl_context *ctx, GLenum target, GLint level,
-			GLenum internalFormat,
-			GLint x, GLint y, GLsizei width, GLsizei height,
-			GLint border);
-
 void radeonCopyTexSubImage2D(struct gl_context *ctx, GLenum target, GLint level,
 			GLint xoffset, GLint yoffset,
 			GLint x, GLint y,
diff --git a/src/mesa/drivers/x11/xm_dd.c b/src/mesa/drivers/x11/xm_dd.c
index 81f48f9..81d000b 100644
--- a/src/mesa/drivers/x11/xm_dd.c
+++ b/src/mesa/drivers/x11/xm_dd.c
@@ -454,10 +454,10 @@
                         "glDrawPixels(invalid PBO access)");
             return;
          }
-         buf = (GLubyte *) ctx->Driver.MapBuffer(ctx,
-                                                 GL_PIXEL_UNPACK_BUFFER_EXT,
-                                                 GL_READ_ONLY_ARB,
-                                                 unpack->BufferObj);
+         buf = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0,
+						      unpack->BufferObj->Size,
+						      GL_MAP_READ_BIT,
+						      unpack->BufferObj);
          if (!buf) {
             /* buffer is already mapped - that's an error */
             _mesa_error(ctx, GL_INVALID_OPERATION,
@@ -508,8 +508,7 @@
       }
 
       if (_mesa_is_bufferobj(unpack->BufferObj)) {
-         ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
-                                 unpack->BufferObj);
+         ctx->Driver.UnmapBuffer(ctx, unpack->BufferObj);
       }
    }
    else {
@@ -589,10 +588,10 @@
                         "glDrawPixels(invalid PBO access)");
             return;
          }
-         buf = (GLubyte *) ctx->Driver.MapBuffer(ctx,
-                                                 GL_PIXEL_UNPACK_BUFFER_EXT,
-                                                 GL_READ_ONLY_ARB,
-                                                 unpack->BufferObj);
+         buf = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0,
+						      unpack->BufferObj->Size,
+						      GL_MAP_READ_BIT,
+						      unpack->BufferObj);
          if (!buf) {
             /* buffer is already mapped - that's an error */
             _mesa_error(ctx, GL_INVALID_OPERATION,
@@ -642,8 +641,7 @@
       }
 
       if (unpack->BufferObj->Name) {
-         ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
-                                 unpack->BufferObj);
+         ctx->Driver.UnmapBuffer(ctx, unpack->BufferObj);
       }
    }
    else {
diff --git a/src/mesa/main/.gitignore b/src/mesa/main/.gitignore
index 2575f44..d0744e3 100644
--- a/src/mesa/main/.gitignore
+++ b/src/mesa/main/.gitignore
@@ -4,3 +4,7 @@
 get_es2.c
 git_sha1.h
 git_sha1.h.tmp
+api_exec_es1_dispatch.h
+api_exec_es1_remap_helper.h
+api_exec_es2_dispatch.h
+api_exec_es2_remap_helper.h
diff --git a/src/mesa/main/api_arrayelt.c b/src/mesa/main/api_arrayelt.c
index f88da84..b93a057 100644
--- a/src/mesa/main/api_arrayelt.c
+++ b/src/mesa/main/api_arrayelt.c
@@ -1602,10 +1602,10 @@
       _ae_update_state(ctx);
 
    for (i = 0; i < actx->nr_vbos; i++)
-      ctx->Driver.MapBuffer(ctx,
-			    GL_ARRAY_BUFFER_ARB,
-			    GL_DYNAMIC_DRAW_ARB,
-			    actx->vbo[i]);
+      ctx->Driver.MapBufferRange(ctx, 0,
+				 actx->vbo[i]->Size,
+				 GL_MAP_READ_BIT,
+				 actx->vbo[i]);
 
    if (actx->nr_vbos)
       actx->mapped_vbos = GL_TRUE;
@@ -1622,9 +1622,7 @@
    assert (!actx->NewState);
 
    for (i = 0; i < actx->nr_vbos; i++)
-      ctx->Driver.UnmapBuffer(ctx,
-			      GL_ARRAY_BUFFER_ARB,
-			      actx->vbo[i]);
+      ctx->Driver.UnmapBuffer(ctx, actx->vbo[i]);
 
    actx->mapped_vbos = GL_FALSE;
 }
diff --git a/src/mesa/main/api_validate.c b/src/mesa/main/api_validate.c
index 2981d42..699b414 100644
--- a/src/mesa/main/api_validate.c
+++ b/src/mesa/main/api_validate.c
@@ -65,8 +65,8 @@
 
    if (_mesa_is_bufferobj(elementBuf)) {
       /* elements are in a user-defined buffer object.  need to map it */
-      map = ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER,
-                                  GL_READ_ONLY, elementBuf);
+      map = ctx->Driver.MapBufferRange(ctx, 0, elementBuf->Size,
+				       GL_MAP_READ_BIT, elementBuf);
       /* Actual address is the sum of pointers */
       indices = (const GLvoid *) ADD_POINTERS(map, (const GLubyte *) indices);
    }
@@ -89,7 +89,7 @@
    }
 
    if (map) {
-      ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, elementBuf);
+      ctx->Driver.UnmapBuffer(ctx, elementBuf);
    }
 
    return max;
diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c
index c52358e..c453f9c 100644
--- a/src/mesa/main/bufferobj.c
+++ b/src/mesa/main/bufferobj.c
@@ -386,11 +386,11 @@
  * \sa glBufferSubDataARB, dd_function_table::BufferSubData.
  */
 static void
-_mesa_buffer_subdata( struct gl_context *ctx, GLenum target, GLintptrARB offset,
+_mesa_buffer_subdata( struct gl_context *ctx, GLintptrARB offset,
 		      GLsizeiptrARB size, const GLvoid * data,
 		      struct gl_buffer_object * bufObj )
 {
-   (void) ctx; (void) target;
+   (void) ctx;
 
    /* this should have been caught in _mesa_BufferSubData() */
    ASSERT(size + offset <= bufObj->Size);
@@ -419,12 +419,11 @@
  * \sa glBufferGetSubDataARB, dd_function_table::GetBufferSubData.
  */
 static void
-_mesa_buffer_get_subdata( struct gl_context *ctx,
-                          GLenum target, GLintptrARB offset,
+_mesa_buffer_get_subdata( struct gl_context *ctx, GLintptrARB offset,
 			  GLsizeiptrARB size, GLvoid * data,
 			  struct gl_buffer_object * bufObj )
 {
-   (void) ctx; (void) target;
+   (void) ctx;
 
    if (bufObj->Data && ((GLsizeiptrARB) (size + offset) <= bufObj->Size)) {
       memcpy( data, (GLubyte *) bufObj->Data + offset, size );
@@ -433,49 +432,15 @@
 
 
 /**
- * Default callback for \c dd_function_tabel::MapBuffer().
- *
- * The function parameters will have been already tested for errors.
- *
- * \param ctx     GL context.
- * \param target  Buffer object target on which to operate.
- * \param access  Information about how the buffer will be accessed.
- * \param bufObj  Object to be mapped.
- * \return  A pointer to the object's internal data store that can be accessed
- *          by the processor
- *
- * \sa glMapBufferARB, dd_function_table::MapBuffer
- */
-static void *
-_mesa_buffer_map( struct gl_context *ctx, GLenum target, GLenum access,
-		  struct gl_buffer_object *bufObj )
-{
-   (void) ctx;
-   (void) target;
-   (void) access;
-   /* Just return a direct pointer to the data */
-   if (_mesa_bufferobj_mapped(bufObj)) {
-      /* already mapped! */
-      return NULL;
-   }
-   bufObj->Pointer = bufObj->Data;
-   bufObj->Length = bufObj->Size;
-   bufObj->Offset = 0;
-   return bufObj->Pointer;
-}
-
-
-/**
  * Default fallback for \c dd_function_table::MapBufferRange().
  * Called via glMapBufferRange().
  */
 static void *
-_mesa_buffer_map_range( struct gl_context *ctx, GLenum target, GLintptr offset,
+_mesa_buffer_map_range( struct gl_context *ctx, GLintptr offset,
                         GLsizeiptr length, GLbitfield access,
                         struct gl_buffer_object *bufObj )
 {
    (void) ctx;
-   (void) target;
    assert(!_mesa_bufferobj_mapped(bufObj));
    /* Just return a direct pointer to the data */
    bufObj->Pointer = bufObj->Data + offset;
@@ -491,12 +456,11 @@
  * Called via glFlushMappedBufferRange().
  */
 static void
-_mesa_buffer_flush_mapped_range( struct gl_context *ctx, GLenum target, 
+_mesa_buffer_flush_mapped_range( struct gl_context *ctx,
                                  GLintptr offset, GLsizeiptr length,
                                  struct gl_buffer_object *obj )
 {
    (void) ctx;
-   (void) target;
    (void) offset;
    (void) length;
    (void) obj;
@@ -512,11 +476,9 @@
  * \sa glUnmapBufferARB, dd_function_table::UnmapBuffer
  */
 static GLboolean
-_mesa_buffer_unmap( struct gl_context *ctx, GLenum target,
-                    struct gl_buffer_object *bufObj )
+_mesa_buffer_unmap( struct gl_context *ctx, struct gl_buffer_object *bufObj )
 {
    (void) ctx;
-   (void) target;
    /* XXX we might assert here that bufObj->Pointer is non-null */
    bufObj->Pointer = NULL;
    bufObj->Length = 0;
@@ -543,16 +505,16 @@
    assert(!_mesa_bufferobj_mapped(src));
    assert(!_mesa_bufferobj_mapped(dst));
 
-   srcPtr = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_COPY_READ_BUFFER,
-                                              GL_READ_ONLY, src);
-   dstPtr = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_COPY_WRITE_BUFFER,
-                                              GL_WRITE_ONLY, dst);
+   srcPtr = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0, src->Size,
+						   GL_MAP_READ_BIT, src);
+   dstPtr = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0, dst->Size,
+						   GL_MAP_WRITE_BIT, dst);
 
    if (srcPtr && dstPtr)
       memcpy(dstPtr + writeOffset, srcPtr + readOffset, size);
 
-   ctx->Driver.UnmapBuffer(ctx, GL_COPY_READ_BUFFER, src);
-   ctx->Driver.UnmapBuffer(ctx, GL_COPY_WRITE_BUFFER, dst);
+   ctx->Driver.UnmapBuffer(ctx, src);
+   ctx->Driver.UnmapBuffer(ctx, dst);
 }
 
 
@@ -712,7 +674,6 @@
    driver->BufferData = _mesa_buffer_data;
    driver->BufferSubData = _mesa_buffer_subdata;
    driver->GetBufferSubData = _mesa_buffer_get_subdata;
-   driver->MapBuffer = _mesa_buffer_map;
    driver->UnmapBuffer = _mesa_buffer_unmap;
 
    /* GL_ARB_map_buffer_range */
@@ -774,7 +735,7 @@
 
          if (_mesa_bufferobj_mapped(bufObj)) {
             /* if mapped, unmap it now */
-            ctx->Driver.UnmapBuffer(ctx, 0, bufObj);
+            ctx->Driver.UnmapBuffer(ctx, bufObj);
             bufObj->AccessFlags = DEFAULT_ACCESS;
             bufObj->Pointer = NULL;
          }
@@ -934,7 +895,7 @@
    
    if (_mesa_bufferobj_mapped(bufObj)) {
       /* Unmap the existing buffer.  We'll replace it now.  Not an error. */
-      ctx->Driver.UnmapBuffer(ctx, target, bufObj);
+      ctx->Driver.UnmapBuffer(ctx, bufObj);
       bufObj->AccessFlags = DEFAULT_ACCESS;
       ASSERT(bufObj->Pointer == NULL);
    }  
@@ -980,7 +941,7 @@
    bufObj->Written = GL_TRUE;
 
    ASSERT(ctx->Driver.BufferSubData);
-   ctx->Driver.BufferSubData( ctx, target, offset, size, data, bufObj );
+   ctx->Driver.BufferSubData( ctx, offset, size, data, bufObj );
 }
 
 
@@ -1000,7 +961,7 @@
    }
 
    ASSERT(ctx->Driver.GetBufferSubData);
-   ctx->Driver.GetBufferSubData( ctx, target, offset, size, data, bufObj );
+   ctx->Driver.GetBufferSubData( ctx, offset, size, data, bufObj );
 }
 
 
@@ -1043,8 +1004,8 @@
       return NULL;
    }
 
-   ASSERT(ctx->Driver.MapBuffer);
-   map = ctx->Driver.MapBuffer( ctx, target, access, bufObj );
+   ASSERT(ctx->Driver.MapBufferRange);
+   map = ctx->Driver.MapBufferRange(ctx, 0, bufObj->Size, accessFlags, bufObj);
    if (!map) {
       _mesa_error(ctx, GL_OUT_OF_MEMORY, "glMapBufferARB(map failed)");
       return NULL;
@@ -1147,7 +1108,7 @@
    }
 #endif
 
-   status = ctx->Driver.UnmapBuffer( ctx, target, bufObj );
+   status = ctx->Driver.UnmapBuffer( ctx, bufObj );
    bufObj->AccessFlags = DEFAULT_ACCESS;
    ASSERT(bufObj->Pointer == NULL);
    ASSERT(bufObj->Offset == 0);
@@ -1451,8 +1412,7 @@
    }
       
    ASSERT(ctx->Driver.MapBufferRange);
-   map = ctx->Driver.MapBufferRange(ctx, target, offset, length,
-                                    access, bufObj);
+   map = ctx->Driver.MapBufferRange(ctx, offset, length, access, bufObj);
    if (!map) {
       _mesa_error(ctx, GL_OUT_OF_MEMORY, "glMapBufferARB(map failed)");
    }
@@ -1535,7 +1495,7 @@
    ASSERT(bufObj->AccessFlags & GL_MAP_WRITE_BIT);
 
    if (ctx->Driver.FlushMappedBufferRange)
-      ctx->Driver.FlushMappedBufferRange(ctx, target, offset, length, bufObj);
+      ctx->Driver.FlushMappedBufferRange(ctx, offset, length, bufObj);
 }
 
 
diff --git a/src/mesa/main/compiler.h b/src/mesa/main/compiler.h
index 743841b..8ed1c6f 100644
--- a/src/mesa/main/compiler.h
+++ b/src/mesa/main/compiler.h
@@ -45,9 +45,6 @@
 #include <stdlib.h>
 #include <stdio.h>
 #include <string.h>
-#if defined(__linux__) && defined(__i386__)
-#include <fpu_control.h>
-#endif
 #include <float.h>
 #include <stdarg.h>
 
@@ -60,29 +57,7 @@
 /**
  * Get standard integer types
  */
-#if defined(_MSC_VER)
-   typedef __int8             int8_t;
-   typedef unsigned __int8    uint8_t;
-   typedef __int16            int16_t;
-   typedef unsigned __int16   uint16_t;
-   typedef __int32            int32_t;
-   typedef unsigned __int32   uint32_t;
-   typedef __int64            int64_t;
-   typedef unsigned __int64   uint64_t;
-
-#  if defined(_WIN64)
-     typedef __int64            intptr_t;
-     typedef unsigned __int64   uintptr_t;
-#  else
-     typedef __int32            intptr_t;
-     typedef unsigned __int32   uintptr_t;
-#  endif
-
-#  define INT64_C(__val) __val##i64
-#  define UINT64_C(__val) __val##ui64
-#else
-#  include <stdint.h>
-#endif
+#include <stdint.h>
 
 
 /**
@@ -139,26 +114,28 @@
 /**
  * Function inlining
  */
-#if defined(__GNUC__)
-#  define INLINE __inline__
-#elif defined(__MSC__)
-#  define INLINE __inline
-#elif defined(_MSC_VER)
-#  define INLINE __inline
-#elif defined(__ICL)
-#  define INLINE __inline
-#elif defined(__INTEL_COMPILER)
-#  define INLINE inline
-#elif defined(__WATCOMC__) && (__WATCOMC__ >= 1100)
-#  define INLINE __inline
-#elif defined(__SUNPRO_C) && defined(__C99FEATURES__)
-#  define INLINE inline
-#  define __inline inline
-#  define __inline__ inline
-#elif (__STDC_VERSION__ >= 199901L) /* C99 */
-#  define INLINE inline
-#else
-#  define INLINE
+#ifndef INLINE
+#  if defined(__GNUC__)
+#    define INLINE __inline__
+#  elif defined(__MSC__)
+#    define INLINE __inline
+#  elif defined(_MSC_VER)
+#    define INLINE __inline
+#  elif defined(__ICL)
+#    define INLINE __inline
+#  elif defined(__INTEL_COMPILER)
+#    define INLINE inline
+#  elif defined(__WATCOMC__) && (__WATCOMC__ >= 1100)
+#    define INLINE __inline
+#  elif defined(__SUNPRO_C) && defined(__C99FEATURES__)
+#    define INLINE inline
+#    define __inline inline
+#    define __inline__ inline
+#  elif (__STDC_VERSION__ >= 199901L) /* C99 */
+#    define INLINE inline
+#  else
+#    define INLINE
+#  endif
 #endif
 
 
diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
index 9fe6d52..fcf40ec 100644
--- a/src/mesa/main/dd.h
+++ b/src/mesa/main/dd.h
@@ -189,31 +189,22 @@
    /*@{*/
 
    /**
-    * Choose texture format.
-    * 
-    * This is called by the \c _mesa_store_tex[sub]image[123]d() fallback
-    * functions.  The driver should examine \p internalFormat and return a
-    * gl_format value.
+    * Choose actual hardware texture format given the user-provided source
+    * image format and type and the desired internal format.  In some
+    * cases, srcFormat and srcType can be GL_NONE.
+    * Called by glTexImage(), etc.
     */
-   GLuint (*ChooseTextureFormat)( struct gl_context *ctx, GLint internalFormat,
+   gl_format (*ChooseTextureFormat)( struct gl_context *ctx, GLint internalFormat,
                                      GLenum srcFormat, GLenum srcType );
 
    /**
-    * Called by glTexImage1D().
-    * 
-    * \param target user specified.
-    * \param format user specified.
-    * \param type user specified.
-    * \param pixels user specified.
-    * \param packing indicates the image packing of pixels.
+    * Called by glTexImage1D().  Simply copy the source texture data into the
+    * destination texture memory.  The gl_texture_image fields, etc. will be
+    * fully initialized.
+    * The parameters are the same as glTexImage1D(), plus:
+    * \param packing describes how to unpack the source data.
     * \param texObj is the target texture object.
-    * \param texImage is the target texture image.  It will have the texture \p
-    * width, \p height, \p depth, \p border and \p internalFormat information.
-    * 
-    * \p retainInternalCopy is returned by this function and indicates whether
-    * core Mesa should keep an internal copy of the texture image.
-    *
-    * Drivers should call a fallback routine from texstore.c if needed.
+    * \param texImage is the target texture image.
     */
    void (*TexImage1D)( struct gl_context *ctx, GLenum target, GLint level,
                        GLint internalFormat,
@@ -250,25 +241,9 @@
                        struct gl_texture_image *texImage );
 
    /**
-    * Called by glTexSubImage1D().
-    *
-    * \param target user specified.
-    * \param level user specified.
-    * \param xoffset user specified.
-    * \param yoffset user specified.
-    * \param zoffset user specified.
-    * \param width user specified.
-    * \param height user specified.
-    * \param depth user specified.
-    * \param format user specified.
-    * \param type user specified.
-    * \param pixels user specified.
-    * \param packing indicates the image packing of pixels.
-    * \param texObj is the target texture object.
-    * \param texImage is the target texture image.  It will have the texture \p
-    * width, \p height, \p border and \p internalFormat information.
-    *
-    * The driver should use a fallback routine from texstore.c if needed.
+    * Called by glTexSubImage1D().  Replace a subset of the target texture
+    * with new texel data.
+    * \sa dd_function_table::TexImage1D.
     */
    void (*TexSubImage1D)( struct gl_context *ctx, GLenum target, GLint level,
                           GLint xoffset, GLsizei width,
@@ -315,24 +290,6 @@
                         struct gl_texture_image *texImage );
 
    /**
-    * Called by glCopyTexImage1D().
-    * 
-    * Drivers should use a fallback routine from texstore.c if needed.
-    */
-   void (*CopyTexImage1D)( struct gl_context *ctx, GLenum target, GLint level,
-                           GLenum internalFormat, GLint x, GLint y,
-                           GLsizei width, GLint border );
-
-   /**
-    * Called by glCopyTexImage2D().
-    * 
-    * Drivers should use a fallback routine from texstore.c if needed.
-    */
-   void (*CopyTexImage2D)( struct gl_context *ctx, GLenum target, GLint level,
-                           GLenum internalFormat, GLint x, GLint y,
-                           GLsizei width, GLsizei height, GLint border );
-
-   /**
     * Called by glCopyTexSubImage1D().
     * 
     * Drivers should use a fallback routine from texstore.c if needed.
@@ -741,17 +698,14 @@
                             const GLvoid *data, GLenum usage,
                             struct gl_buffer_object *obj );
 
-   void (*BufferSubData)( struct gl_context *ctx, GLenum target, GLintptrARB offset,
+   void (*BufferSubData)( struct gl_context *ctx, GLintptrARB offset,
 			  GLsizeiptrARB size, const GLvoid *data,
 			  struct gl_buffer_object *obj );
 
-   void (*GetBufferSubData)( struct gl_context *ctx, GLenum target,
+   void (*GetBufferSubData)( struct gl_context *ctx,
 			     GLintptrARB offset, GLsizeiptrARB size,
 			     GLvoid *data, struct gl_buffer_object *obj );
 
-   void * (*MapBuffer)( struct gl_context *ctx, GLenum target, GLenum access,
-			struct gl_buffer_object *obj );
-
    void (*CopyBufferSubData)( struct gl_context *ctx,
                               struct gl_buffer_object *src,
                               struct gl_buffer_object *dst,
@@ -760,15 +714,15 @@
 
    /* May return NULL if MESA_MAP_NOWAIT_BIT is set in access:
     */
-   void * (*MapBufferRange)( struct gl_context *ctx, GLenum target, GLintptr offset,
+   void * (*MapBufferRange)( struct gl_context *ctx, GLintptr offset,
                              GLsizeiptr length, GLbitfield access,
                              struct gl_buffer_object *obj);
 
-   void (*FlushMappedBufferRange)(struct gl_context *ctx, GLenum target, 
+   void (*FlushMappedBufferRange)(struct gl_context *ctx,
                                   GLintptr offset, GLsizeiptr length,
                                   struct gl_buffer_object *obj);
 
-   GLboolean (*UnmapBuffer)( struct gl_context *ctx, GLenum target,
+   GLboolean (*UnmapBuffer)( struct gl_context *ctx,
 			     struct gl_buffer_object *obj );
    /*@}*/
 
diff --git a/src/mesa/main/debug.c b/src/mesa/main/debug.c
index e7f6be9..b1fc096 100644
--- a/src/mesa/main/debug.c
+++ b/src/mesa/main/debug.c
@@ -192,17 +192,6 @@
    if (strstr(debug, "flush"))
       MESA_DEBUG_FLAGS |= DEBUG_ALWAYS_FLUSH;
 
-#if defined(_FPU_GETCW) && defined(_FPU_SETCW)
-   if (strstr(debug, "fpexceptions")) {
-      /* raise FP exceptions */
-      fpu_control_t mask;
-      _FPU_GETCW(mask);
-      mask &= ~(_FPU_MASK_IM | _FPU_MASK_DM | _FPU_MASK_ZM
-                | _FPU_MASK_OM | _FPU_MASK_UM);
-      _FPU_SETCW(mask);
-   }
-#endif
-
 #else
    (void) debug;
 #endif
diff --git a/src/mesa/main/dlist.c b/src/mesa/main/dlist.c
index f928239..6e075b4 100644
--- a/src/mesa/main/dlist.c
+++ b/src/mesa/main/dlist.c
@@ -894,8 +894,8 @@
       GLvoid *image;
 
       map = (GLubyte *)
-         ctx->Driver.MapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
-                               GL_READ_ONLY_ARB, unpack->BufferObj);
+         ctx->Driver.MapBufferRange(ctx, 0, unpack->BufferObj->Size,
+				    GL_MAP_READ_BIT, unpack->BufferObj);
       if (!map) {
          /* unable to map src buffer! */
          _mesa_error(ctx, GL_INVALID_OPERATION, "unable to map PBO");
@@ -906,8 +906,7 @@
       image = _mesa_unpack_image(dimensions, width, height, depth,
                                  format, type, src, unpack);
 
-      ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
-                              unpack->BufferObj);
+      ctx->Driver.UnmapBuffer(ctx, unpack->BufferObj);
 
       if (!image) {
          _mesa_error(ctx, GL_OUT_OF_MEMORY, "display list construction");
diff --git a/src/mesa/main/drawtex.c b/src/mesa/main/drawtex.c
index 2089cdf..83485a9 100644
--- a/src/mesa/main/drawtex.c
+++ b/src/mesa/main/drawtex.c
@@ -45,11 +45,15 @@
       return;
    }
 
+   _mesa_set_vp_override(ctx, GL_TRUE);
+
    if (ctx->NewState)
       _mesa_update_state(ctx);
 
    ASSERT(ctx->Driver.DrawTex);
    ctx->Driver.DrawTex(ctx, x, y, z, width, height);
+
+   _mesa_set_vp_override(ctx, GL_FALSE);
 }
 
 
diff --git a/src/mesa/main/enable.c b/src/mesa/main/enable.c
index aac8b9c..3ba4df6 100644
--- a/src/mesa/main/enable.c
+++ b/src/mesa/main/enable.c
@@ -5,7 +5,6 @@
 
 /*
  * Mesa 3-D graphics library
- * Version:  7.0.3
  *
  * Copyright (C) 1999-2007  Brian Paul   All Rights Reserved.
  *
@@ -560,7 +559,6 @@
          ctx->Polygon.OffsetLine = state;
          break;
       case GL_POLYGON_OFFSET_FILL:
-         /*case GL_POLYGON_OFFSET_EXT:*/
          if (ctx->Polygon.OffsetFill == state)
             return;
          FLUSH_VERTICES(ctx, _NEW_POLYGON);
@@ -643,9 +641,7 @@
          break;
 #endif
 
-      /*
-       * CLIENT STATE!!!
-       */
+      /* client-side state */
       case GL_VERTEX_ARRAY:
       case GL_NORMAL_ARRAY:
       case GL_COLOR_ARRAY:
@@ -1174,7 +1170,6 @@
       case GL_POLYGON_OFFSET_LINE:
 	 return ctx->Polygon.OffsetLine;
       case GL_POLYGON_OFFSET_FILL:
-      /*case GL_POLYGON_OFFSET_EXT:*/
 	 return ctx->Polygon.OffsetFill;
       case GL_RESCALE_NORMAL_EXT:
          return ctx->Transform.RescaleNormals;
@@ -1213,9 +1208,7 @@
          }
 #endif
 
-      /*
-       * CLIENT STATE!!!
-       */
+      /* client-side state */
       case GL_VERTEX_ARRAY:
          return (ctx->Array.ArrayObj->Vertex.Enabled != 0);
       case GL_NORMAL_ARRAY:
diff --git a/src/mesa/main/es_generator.py b/src/mesa/main/es_generator.py
index c0b0a44..cad3dea 100644
--- a/src/mesa/main/es_generator.py
+++ b/src/mesa/main/es_generator.py
@@ -681,10 +681,10 @@
 #if FEATURE_remap_table
 
 /* define esLocalRemapTable */
-#include "%sapi/main/dispatch.h"
+#include "main/api_exec_%s_dispatch.h"
 
 #define need_MESA_remap_table
-#include "%sapi/main/remap_helper.h"
+#include "main/api_exec_%s_remap_helper.h"
 
 static void
 init_remap_table(void)
diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c
index bc61c50..14b0cf9 100644
--- a/src/mesa/main/extensions.c
+++ b/src/mesa/main/extensions.c
@@ -81,6 +81,7 @@
    { "GL_ARB_blend_func_extended",                 o(ARB_blend_func_extended),                 GL,             2009 },
    { "GL_ARB_color_buffer_float",                  o(ARB_color_buffer_float),                  GL,             2004 },
    { "GL_ARB_copy_buffer",                         o(ARB_copy_buffer),                         GL,             2008 },
+   { "GL_ARB_conservative_depth",                  o(AMD_conservative_depth),                  GL,             2011 },
    { "GL_ARB_depth_buffer_float",                  o(ARB_depth_buffer_float),                  GL,             2008 },
    { "GL_ARB_depth_clamp",                         o(ARB_depth_clamp),                         GL,             2003 },
    { "GL_ARB_depth_texture",                       o(ARB_depth_texture),                       GL,             2001 },
diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c
index 8496936..0b48fc7 100644
--- a/src/mesa/main/fbobject.c
+++ b/src/mesa/main/fbobject.c
@@ -1984,10 +1984,26 @@
 {
    GET_CURRENT_CONTEXT(ctx);
 
-   if ((texture != 0) && (textarget != GL_TEXTURE_1D)) {
-      _mesa_error(ctx, GL_INVALID_ENUM,
-                  "glFramebufferTexture1DEXT(textarget)");
-      return;
+   if (texture != 0) {
+      GLboolean error;
+
+      switch (textarget) {
+      case GL_TEXTURE_1D:
+         error = GL_FALSE;
+         break;
+      case GL_TEXTURE_1D_ARRAY:
+         error = !ctx->Extensions.EXT_texture_array;
+         break;
+      default:
+         error = GL_TRUE;
+      }
+
+      if (error) {
+         _mesa_error(ctx, GL_INVALID_OPERATION,
+                     "glFramebufferTexture1DEXT(textarget=%s)",
+                     _mesa_lookup_enum_by_nr(textarget));
+         return;
+      }
    }
 
    framebuffer_texture(ctx, "1D", target, attachment, textarget, texture,
@@ -2001,13 +2017,37 @@
 {
    GET_CURRENT_CONTEXT(ctx);
 
-   if ((texture != 0) &&
-       (textarget != GL_TEXTURE_2D) &&
-       (textarget != GL_TEXTURE_RECTANGLE_ARB) &&
-       (!is_cube_face(textarget))) {
-      _mesa_error(ctx, GL_INVALID_OPERATION,
-                  "glFramebufferTexture2DEXT(textarget=0x%x)", textarget);
-      return;
+   if (texture != 0) {
+      GLboolean error;
+
+      switch (textarget) {
+      case GL_TEXTURE_2D:
+         error = GL_FALSE;
+         break;
+      case GL_TEXTURE_RECTANGLE:
+         error = !ctx->Extensions.NV_texture_rectangle;
+         break;
+      case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+      case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+      case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+      case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+      case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+      case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
+         error = !ctx->Extensions.ARB_texture_cube_map;
+         break;
+      case GL_TEXTURE_2D_ARRAY:
+         error = !ctx->Extensions.EXT_texture_array;
+         break;
+      default:
+         error = GL_FALSE;
+      }
+
+      if (error) {
+         _mesa_error(ctx, GL_INVALID_OPERATION,
+                     "glFramebufferTexture2DEXT(textarget=%s)",
+                     _mesa_lookup_enum_by_nr(textarget));
+         return;
+      }
    }
 
    framebuffer_texture(ctx, "2D", target, attachment, textarget, texture,
@@ -2023,7 +2063,7 @@
    GET_CURRENT_CONTEXT(ctx);
 
    if ((texture != 0) && (textarget != GL_TEXTURE_3D)) {
-      _mesa_error(ctx, GL_INVALID_ENUM,
+      _mesa_error(ctx, GL_INVALID_OPERATION,
                   "glFramebufferTexture3DEXT(textarget)");
       return;
    }
@@ -2134,10 +2174,14 @@
 {
    const struct gl_renderbuffer_attachment *att;
    struct gl_framebuffer *buffer;
+   GLenum err;
    GET_CURRENT_CONTEXT(ctx);
 
    ASSERT_OUTSIDE_BEGIN_END(ctx);
 
+   /* The error differs in GL andd GLES. */
+   err = ctx->API == API_OPENGL ? GL_INVALID_OPERATION : GL_INVALID_ENUM;
+
    buffer = get_framebuffer_target(ctx, target);
    if (!buffer) {
       _mesa_error(ctx, GL_INVALID_ENUM,
@@ -2188,7 +2232,12 @@
       }
       else {
          assert(att->Type == GL_NONE);
-         *params = 0;
+         if (ctx->API == API_OPENGL) {
+            *params = 0;
+         } else {
+            _mesa_error(ctx, GL_INVALID_ENUM,
+                        "glGetFramebufferAttachmentParameterivEXT(pname)");
+         }
       }
       return;
    case GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_LEVEL_EXT:
@@ -2196,7 +2245,7 @@
 	 *params = att->TextureLevel;
       }
       else if (att->Type == GL_NONE) {
-         _mesa_error(ctx, GL_INVALID_OPERATION,
+         _mesa_error(ctx, err,
                      "glGetFramebufferAttachmentParameterivEXT(pname)");
       }
       else {
@@ -2214,7 +2263,7 @@
          }
       }
       else if (att->Type == GL_NONE) {
-         _mesa_error(ctx, GL_INVALID_OPERATION,
+         _mesa_error(ctx, err,
                      "glGetFramebufferAttachmentParameterivEXT(pname)");
       }
       else {
@@ -2232,7 +2281,7 @@
          }
       }
       else if (att->Type == GL_NONE) {
-         _mesa_error(ctx, GL_INVALID_OPERATION,
+         _mesa_error(ctx, err,
                      "glGetFramebufferAttachmentParameterivEXT(pname)");
       }
       else {
@@ -2246,7 +2295,7 @@
                      "glGetFramebufferAttachmentParameterivEXT(pname)");
       }
       else if (att->Type == GL_NONE) {
-         _mesa_error(ctx, GL_INVALID_OPERATION,
+         _mesa_error(ctx, err,
                      "glGetFramebufferAttachmentParameterivEXT(pname)");
       }
       else {
@@ -2267,7 +2316,7 @@
          return;
       }
       else if (att->Type == GL_NONE) {
-         _mesa_error(ctx, GL_INVALID_OPERATION,
+         _mesa_error(ctx, err,
                      "glGetFramebufferAttachmentParameterivEXT(pname)");
       }
       else {
@@ -2301,7 +2350,7 @@
                      "glGetFramebufferAttachmentParameterivEXT(pname)");
       }
       else if (att->Type == GL_NONE) {
-         _mesa_error(ctx, GL_INVALID_OPERATION,
+         _mesa_error(ctx, err,
                      "glGetFramebufferAttachmentParameterivEXT(pname)");
       }
       else if (att->Texture) {
@@ -2337,6 +2386,8 @@
 _mesa_GenerateMipmapEXT(GLenum target)
 {
    struct gl_texture_object *texObj;
+   GLboolean error;
+
    GET_CURRENT_CONTEXT(ctx);
 
    ASSERT_OUTSIDE_BEGIN_END(ctx);
@@ -2346,12 +2397,22 @@
    case GL_TEXTURE_1D:
    case GL_TEXTURE_2D:
    case GL_TEXTURE_3D:
+      error = GL_FALSE;
+      break;
    case GL_TEXTURE_CUBE_MAP:
-      /* OK, legal value */
+      error = !ctx->Extensions.ARB_texture_cube_map;
+      break;
+   case GL_TEXTURE_1D_ARRAY:
+   case GL_TEXTURE_2D_ARRAY:
+      error = !ctx->Extensions.EXT_texture_array;
       break;
    default:
-      /* XXX need to implement GL_TEXTURE_1D_ARRAY and GL_TEXTURE_2D_ARRAY */
-      _mesa_error(ctx, GL_INVALID_ENUM, "glGenerateMipmapEXT(target)");
+      error = GL_TRUE;
+   }
+
+   if (error) {
+      _mesa_error(ctx, GL_INVALID_ENUM, "glGenerateMipmapEXT(target=%s)",
+                  _mesa_lookup_enum_by_nr(target));
       return;
    }
 
diff --git a/src/mesa/main/ff_fragment_shader.cpp b/src/mesa/main/ff_fragment_shader.cpp
index 0b53c28..7cc1721 100644
--- a/src/mesa/main/ff_fragment_shader.cpp
+++ b/src/mesa/main/ff_fragment_shader.cpp
@@ -330,8 +330,7 @@
       /* _NEW_RENDERMODE */
       fp_inputs = (FRAG_BIT_COL0 | FRAG_BIT_TEX0);
    }
-   else if (!(vertexProgram || vertexShader) ||
-            !ctx->VertexProgram._Current) {
+   else if (!(vertexProgram || vertexShader)) {
       /* Fixed function vertex logic */
       /* _NEW_ARRAY */
       GLbitfield varying_inputs = ctx->varying_vp_inputs;
@@ -875,7 +874,8 @@
    values[1] = s1;
    values[2] = s2;
    values[3] = s3;
-   idx = _mesa_add_unnamed_constant( p->program->Base.Parameters, values, 4,
+   idx = _mesa_add_unnamed_constant( p->program->Base.Parameters,
+                                     (gl_constant_value *) values, 4,
                                      &swizzle );
    r = make_ureg(PROGRAM_CONSTANT, idx);
    r.swz = swizzle;
diff --git a/src/mesa/main/ffvertex_prog.c b/src/mesa/main/ffvertex_prog.c
index b8e49a3..2d2485c 100644
--- a/src/mesa/main/ffvertex_prog.c
+++ b/src/mesa/main/ffvertex_prog.c
@@ -455,13 +455,13 @@
 			      GLfloat s2,
 			      GLfloat s3)
 {
-   GLfloat values[4];
+   gl_constant_value values[4];
    GLint idx;
    GLuint swizzle;
-   values[0] = s0;
-   values[1] = s1;
-   values[2] = s2;
-   values[3] = s3;
+   values[0].f = s0;
+   values[1].f = s1;
+   values[2].f = s2;
+   values[3].f = s3;
    idx = _mesa_add_unnamed_constant( p->program->Base.Parameters, values, 4,
                                      &swizzle );
    ASSERT(swizzle == SWIZZLE_NOOP);
diff --git a/src/mesa/main/framebuffer.c b/src/mesa/main/framebuffer.c
index e27569a..23fa1b2 100644
--- a/src/mesa/main/framebuffer.c
+++ b/src/mesa/main/framebuffer.c
@@ -548,6 +548,7 @@
             fb->Visual.rgbBits = fb->Visual.redBits
                + fb->Visual.greenBits + fb->Visual.blueBits;
             fb->Visual.samples = rb->NumSamples;
+            fb->Visual.sampleBuffers = rb->NumSamples > 0 ? 1 : 0;
             if (_mesa_get_format_color_encoding(fmt) == GL_SRGB)
                 fb->Visual.sRGBCapable = ctx->Const.sRGBCapable;
             break;
diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c
index 0492e15..d32c68a 100644
--- a/src/mesa/main/get.c
+++ b/src/mesa/main/get.c
@@ -1569,11 +1569,11 @@
       break;
 
    case GL_NUM_COMPRESSED_TEXTURE_FORMATS_ARB:
-      v->value_int = _mesa_get_compressed_formats(ctx, NULL, GL_FALSE);
+      v->value_int = _mesa_get_compressed_formats(ctx, NULL);
       break;
    case GL_COMPRESSED_TEXTURE_FORMATS_ARB:
       v->value_int_n.n = 
-	 _mesa_get_compressed_formats(ctx, v->value_int_n.ints, GL_FALSE);
+	 _mesa_get_compressed_formats(ctx, v->value_int_n.ints);
       ASSERT(v->value_int_n.n <= 100);
       break;
 
diff --git a/src/mesa/main/imports.c b/src/mesa/main/imports.c
index 0a572ec..8f09719 100644
--- a/src/mesa/main/imports.c
+++ b/src/mesa/main/imports.c
@@ -753,7 +753,8 @@
 float
 _mesa_strtof( const char *s, char **end )
 {
-#if defined(_GNU_SOURCE) && !defined(__CYGWIN__) && !defined(__FreeBSD__)
+#if defined(_GNU_SOURCE) && !defined(__CYGWIN__) && !defined(__FreeBSD__) && \
+    !defined(ANDROID)
    static locale_t loc = NULL;
    if (!loc) {
       loc = newlocale(LC_CTYPE_MASK, "C", NULL);
diff --git a/src/mesa/main/imports.h b/src/mesa/main/imports.h
index 3fa1db0..70defdc 100644
--- a/src/mesa/main/imports.h
+++ b/src/mesa/main/imports.h
@@ -134,7 +134,13 @@
 #define exp2f(f) ((float) exp2(f))
 #define floorf(f) ((float) floor(f))
 #define logf(f) ((float) log(f))
+
+#ifdef ANDROID
+#define log2f(f) (logf(f) * (float) (1.0 / M_LN2))
+#else
 #define log2f(f) ((float) log2(f))
+#endif
+
 #define powf(x,y) ((float) pow(x,y))
 #define sinf(f) ((float) sin(f))
 #define sinhf(f) ((float) sinh(f))
@@ -562,7 +568,7 @@
 
 #ifdef __GNUC__
 
-#ifdef __MINGW32__
+#if defined(__MINGW32__) || defined(ANDROID)
 #define ffs __builtin_ffs
 #define ffsll __builtin_ffsll
 #endif
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index b881183..f2eb889 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -1279,6 +1279,9 @@
    GLboolean _IsPowerOfTwo;	/**< Are all dimensions powers of two? */
 
    struct gl_texture_object *TexObject;  /**< Pointer back to parent object */
+   GLuint Level;                /**< Which mipmap level am I? */
+   /** Cube map face: index into gl_texture_object::Image[] array */
+   GLuint Face;
 
    FetchTexelFuncC FetchTexelc;	/**< GLchan texel fetch function pointer */
    FetchTexelFuncF FetchTexelf;	/**< Float texel fetch function pointer */
@@ -2252,8 +2255,6 @@
     */
    struct gl_shader_program *ActiveProgram;
 
-   void *MemPool;
-
    GLbitfield Flags;                    /**< Mask of GLSL_x flags */
 };
 
@@ -2719,6 +2720,12 @@
 
    GLuint GLSLVersion;  /**< GLSL version supported (ex: 120 = 1.20) */
 
+   /**
+    * Does the driver support real 32-bit integers?  (Otherwise, integers are
+    * simulated via floats.)
+    */
+   GLboolean NativeIntegers;
+
    /** Which texture units support GL_ATI_envmap_bumpmap as targets */
    GLbitfield SupportedBumpUnits;
 
diff --git a/src/mesa/main/nvprogram.c b/src/mesa/main/nvprogram.c
index dd198b8..7ff7645 100644
--- a/src/mesa/main/nvprogram.c
+++ b/src/mesa/main/nvprogram.c
@@ -812,7 +812,7 @@
 {
    struct gl_program *prog;
    struct gl_fragment_program *fragProg;
-   GLfloat *v;
+   gl_constant_value *v;
 
    GET_CURRENT_CONTEXT(ctx);
    ASSERT_OUTSIDE_BEGIN_END(ctx);
@@ -834,10 +834,10 @@
    v = _mesa_lookup_parameter_value(fragProg->Base.Parameters, len,
                                     (char *) name);
    if (v) {
-      v[0] = x;
-      v[1] = y;
-      v[2] = z;
-      v[3] = w;
+      v[0].f = x;
+      v[1].f = y;
+      v[2].f = z;
+      v[3].f = w;
       return;
    }
 
@@ -878,7 +878,7 @@
 {
    struct gl_program *prog;
    struct gl_fragment_program *fragProg;
-   const GLfloat *v;
+   const gl_constant_value *v;
 
    GET_CURRENT_CONTEXT(ctx);
 
@@ -899,10 +899,10 @@
    v = _mesa_lookup_parameter_value(fragProg->Base.Parameters,
                                     len, (char *) name);
    if (v) {
-      params[0] = v[0];
-      params[1] = v[1];
-      params[2] = v[2];
-      params[3] = v[3];
+      params[0] = v[0].f;
+      params[1] = v[1].f;
+      params[2] = v[2].f;
+      params[3] = v[3].f;
       return;
    }
 
diff --git a/src/mesa/main/pbo.c b/src/mesa/main/pbo.c
index 15e0480..4e7e6f9 100644
--- a/src/mesa/main/pbo.c
+++ b/src/mesa/main/pbo.c
@@ -128,9 +128,10 @@
 
    if (_mesa_is_bufferobj(unpack->BufferObj)) {
       /* unpack from PBO */
-      buf = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
-                                              GL_READ_ONLY_ARB,
-                                              unpack->BufferObj);
+      buf = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0,
+						   unpack->BufferObj->Size,
+						   GL_MAP_READ_BIT,
+						   unpack->BufferObj);
       if (!buf)
          return NULL;
 
@@ -201,8 +202,7 @@
 {
    ASSERT(unpack != &ctx->Pack); /* catch pack/unpack mismatch */
    if (_mesa_is_bufferobj(unpack->BufferObj)) {
-      ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
-                              unpack->BufferObj);
+      ctx->Driver.UnmapBuffer(ctx, unpack->BufferObj);
    }
 }
 
@@ -224,9 +224,10 @@
 
    if (_mesa_is_bufferobj(pack->BufferObj)) {
       /* pack into PBO */
-      buf = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_PIXEL_PACK_BUFFER_EXT,
-                                              GL_WRITE_ONLY_ARB,
-                                              pack->BufferObj);
+      buf = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0,
+						   pack->BufferObj->Size,
+						   GL_MAP_WRITE_BIT,
+						   pack->BufferObj);
       if (!buf)
          return NULL;
 
@@ -297,7 +298,7 @@
 {
    ASSERT(pack != &ctx->Unpack); /* catch pack/unpack mismatch */
    if (_mesa_is_bufferobj(pack->BufferObj)) {
-      ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_PACK_BUFFER_EXT, pack->BufferObj);
+      ctx->Driver.UnmapBuffer(ctx, pack->BufferObj);
    }
 }
 
@@ -327,8 +328,9 @@
       return NULL;
    }
 
-   buf = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
-                                          GL_READ_ONLY_ARB, unpack->BufferObj);
+   buf = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0, unpack->BufferObj->Size,
+						GL_MAP_READ_BIT,
+						unpack->BufferObj);
    if (!buf) {
       _mesa_error(ctx, GL_INVALID_OPERATION, funcName, "(PBO is mapped)");
       return NULL;
@@ -364,8 +366,10 @@
       return NULL;
    }
 
-   buf = (GLubyte*) ctx->Driver.MapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
-                                         GL_READ_ONLY_ARB, packing->BufferObj);
+   buf = (GLubyte*) ctx->Driver.MapBufferRange(ctx, 0,
+					       packing->BufferObj->Size,
+					       GL_MAP_READ_BIT,
+					       packing->BufferObj);
    if (!buf) {
       _mesa_error(ctx, GL_INVALID_OPERATION, funcName, "(PBO is mapped");
       return NULL;
@@ -384,8 +388,7 @@
                          const struct gl_pixelstore_attrib *unpack)
 {
    if (_mesa_is_bufferobj(unpack->BufferObj)) {
-      ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
-                              unpack->BufferObj);
+      ctx->Driver.UnmapBuffer(ctx, unpack->BufferObj);
    }
 }
 
diff --git a/src/mesa/main/querymatrix.c b/src/mesa/main/querymatrix.c
index 944ad43..eaedf7c 100644
--- a/src/mesa/main/querymatrix.c
+++ b/src/mesa/main/querymatrix.c
@@ -73,7 +73,7 @@
 #elif defined(__APPLE__) || defined(__CYGWIN__) || defined(__FreeBSD__) || \
      defined(__OpenBSD__) || defined(__NetBSD__) || defined(__DragonFly__) || \
      (defined(__sun) && defined(__C99FEATURES__)) || defined(__MINGW32__) || \
-     (defined(__sun) && defined(__GNUC__))
+     (defined(__sun) && defined(__GNUC__)) || defined(ANDROID)
 
 /* fpclassify is available. */
 
diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c
index 8df25c3..74997ea 100644
--- a/src/mesa/main/shaderapi.c
+++ b/src/mesa/main/shaderapi.c
@@ -1125,7 +1125,7 @@
 validate_program(struct gl_context *ctx, GLuint program)
 {
    struct gl_shader_program *shProg;
-   char errMsg[100];
+   char errMsg[100] = "";
 
    shProg = _mesa_lookup_shader_program_err(ctx, program, "glValidateProgram");
    if (!shProg) {
diff --git a/src/mesa/main/shaderobj.c b/src/mesa/main/shaderobj.c
index 33d91ad..f128648 100644
--- a/src/mesa/main/shaderobj.c
+++ b/src/mesa/main/shaderobj.c
@@ -244,6 +244,8 @@
    prog->Geom.InputType = GL_TRIANGLES;
    prog->Geom.OutputType = GL_TRIANGLE_STRIP;
 #endif
+
+   prog->InfoLog = ralloc_strdup(prog, "");
 }
 
 /**
@@ -283,6 +285,10 @@
       _mesa_free_parameter_list(shProg->Varying);
       shProg->Varying = NULL;
    }
+
+   assert(shProg->InfoLog != NULL);
+   ralloc_free(shProg->InfoLog);
+   shProg->InfoLog = ralloc_strdup(shProg, "");
 }
 
 
@@ -317,11 +323,6 @@
       shProg->Shaders = NULL;
    }
 
-   if (shProg->InfoLog) {
-      ralloc_free(shProg->InfoLog);
-      shProg->InfoLog = NULL;
-   }
-
    /* Transform feedback varying vars */
    for (i = 0; i < shProg->TransformFeedback.NumVarying; i++) {
       free(shProg->TransformFeedback.VaryingNames[i]);
diff --git a/src/mesa/main/shared.c b/src/mesa/main/shared.c
index d84f596..8b7159d 100644
--- a/src/mesa/main/shared.c
+++ b/src/mesa/main/shared.c
@@ -200,7 +200,7 @@
    struct gl_buffer_object *bufObj = (struct gl_buffer_object *) data;
    struct gl_context *ctx = (struct gl_context *) userData;
    if (_mesa_bufferobj_mapped(bufObj)) {
-      ctx->Driver.UnmapBuffer(ctx, 0, bufObj);
+      ctx->Driver.UnmapBuffer(ctx, bufObj);
       bufObj->Pointer = NULL;
    }
    _mesa_reference_buffer_object(ctx, &bufObj, NULL);
diff --git a/src/mesa/main/texcompress.c b/src/mesa/main/texcompress.c
index d820ae9..42bd1ee 100644
--- a/src/mesa/main/texcompress.c
+++ b/src/mesa/main/texcompress.c
@@ -40,19 +40,192 @@
 
 
 /**
+ * Get the GL base format of a specified GL compressed texture format
+ *
+ * From page 232 of the OpenGL 3.3 (Compatiblity Profile) spec:
+ *
+ *     "Compressed Internal Format      Base Internal Format    Type
+ *     ---------------------------     --------------------    ---------
+ *     COMPRESSED_ALPHA                ALPHA                   Generic
+ *     COMPRESSED_LUMINANCE            LUMINANCE               Generic
+ *     COMPRESSED_LUMINANCE_ALPHA      LUMINANCE_ALPHA         Generic
+ *     COMPRESSED_INTENSITY            INTENSITY               Generic
+ *     COMPRESSED_RED                  RED                     Generic
+ *     COMPRESSED_RG                   RG                      Generic
+ *     COMPRESSED_RGB                  RGB                     Generic
+ *     COMPRESSED_RGBA                 RGBA                    Generic
+ *     COMPRESSED_SRGB                 RGB                     Generic
+ *     COMPRESSED_SRGB_ALPHA           RGBA                    Generic
+ *     COMPRESSED_SLUMINANCE           LUMINANCE               Generic
+ *     COMPRESSED_SLUMINANCE_ALPHA     LUMINANCE_ALPHA         Generic
+ *     COMPRESSED_RED_RGTC1            RED                     Specific
+ *     COMPRESSED_SIGNED_RED_RGTC1     RED                     Specific
+ *     COMPRESSED_RG_RGTC2             RG                      Specific
+ *     COMPRESSED_SIGNED_RG_RGTC2      RG                      Specific"
+ *
+ * \return
+ * The base format of \c format if \c format is a compressed format (either
+ * generic or specific.  Otherwise 0 is returned.
+ */
+GLenum
+_mesa_gl_compressed_format_base_format(GLenum format)
+{
+   switch (format) {
+   case GL_COMPRESSED_RED:
+   case GL_COMPRESSED_RED_RGTC1:
+   case GL_COMPRESSED_SIGNED_RED_RGTC1:
+      return GL_RED;
+
+   case GL_COMPRESSED_RG:
+   case GL_COMPRESSED_RG_RGTC2:
+   case GL_COMPRESSED_SIGNED_RG_RGTC2:
+      return GL_RG;
+
+   case GL_COMPRESSED_RGB:
+   case GL_COMPRESSED_SRGB:
+   case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
+   case GL_COMPRESSED_RGB_FXT1_3DFX:
+   case GL_COMPRESSED_SRGB_S3TC_DXT1_EXT:
+      return GL_RGB;
+
+   case GL_COMPRESSED_RGBA:
+   case GL_COMPRESSED_SRGB_ALPHA:
+   case GL_COMPRESSED_RGBA_BPTC_UNORM_ARB:
+   case GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM_ARB:
+   case GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT_ARB:
+   case GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT_ARB:
+   case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
+   case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
+   case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
+   case GL_COMPRESSED_RGBA_FXT1_3DFX:
+   case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT:
+   case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT:
+   case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT:
+      return GL_RGBA;
+
+   case GL_COMPRESSED_ALPHA:
+      return GL_ALPHA;
+
+   case GL_COMPRESSED_LUMINANCE:
+   case GL_COMPRESSED_SLUMINANCE:
+   case GL_COMPRESSED_LUMINANCE_LATC1_EXT:
+   case GL_COMPRESSED_SIGNED_LUMINANCE_LATC1_EXT:
+      return GL_LUMINANCE;
+
+   case GL_COMPRESSED_LUMINANCE_ALPHA:
+   case GL_COMPRESSED_SLUMINANCE_ALPHA:
+   case GL_COMPRESSED_LUMINANCE_ALPHA_LATC2_EXT:
+   case GL_COMPRESSED_SIGNED_LUMINANCE_ALPHA_LATC2_EXT:
+   case GL_COMPRESSED_LUMINANCE_ALPHA_3DC_ATI:
+      return GL_LUMINANCE_ALPHA;
+
+   case GL_COMPRESSED_INTENSITY:
+      return GL_INTENSITY;
+
+   default:
+      return 0;
+   }
+}
+
+/**
  * Return list of (and count of) all specific texture compression
  * formats that are supported.
  *
+ * Some formats are \b not returned by this function.  The
+ * \c GL_COMPRESSED_TEXTURE_FORMATS query only returns formats that are
+ * "suitable for general-purpose usage."  All texture compression extensions
+ * have taken this to mean either linear RGB or linear RGBA.
+ *
+ * The GL_ARB_texture_compress_rgtc spec says:
+ *
+ *    "19) Should the GL_NUM_COMPRESSED_TEXTURE_FORMATS and
+ *        GL_COMPRESSED_TEXTURE_FORMATS queries return the RGTC formats?
+ *
+ *        RESOLVED:  No.
+ *
+ *        The OpenGL 2.1 specification says "The only values returned
+ *        by this query [GL_COMPRESSED_TEXTURE_FORMATS"] are those
+ *        corresponding to formats suitable for general-purpose usage.
+ *        The renderer will not enumerate formats with restrictions that
+ *        need to be specifically understood prior to use."
+ *
+ *        Compressed textures with just red or red-green components are
+ *        not general-purpose so should not be returned by these queries
+ *        because they have restrictions.
+ *
+ *        Applications that seek to use the RGTC formats should do so
+ *        by looking for this extension's name in the string returned by
+ *        glGetString(GL_EXTENSIONS) rather than
+ *        what GL_NUM_COMPRESSED_TEXTURE_FORMATS and
+ *        GL_COMPRESSED_TEXTURE_FORMATS return."
+ *
+ * There is nearly identical wording in the GL_EXT_texture_compression_rgtc
+ * spec.
+ *
+ * The GL_EXT_texture_rRGB spec says:
+ *
+ *    "22) Should the new COMPRESSED_SRGB_* formats be listed in an
+ *        implementation's GL_COMPRESSED_TEXTURE_FORMATS list?
+ *
+ *        RESOLVED:  No.  Section 3.8.1 says formats listed by
+ *        GL_COMPRESSED_TEXTURE_FORMATS are "suitable for general-purpose
+ *        usage."  The non-linear distribution of red, green, and
+ *        blue for these sRGB compressed formats makes them not really
+ *        general-purpose."
+ *
+ * The GL_EXT_texture_compression_latc spec says:
+ *
+ *    "16) Should the GL_NUM_COMPRESSED_TEXTURE_FORMATS and
+ *        GL_COMPRESSED_TEXTURE_FORMATS queries return the LATC formats?
+ *
+ *        RESOLVED:  No.
+ *
+ *        The OpenGL 2.1 specification says "The only values returned
+ *        by this query [GL_COMPRESSED_TEXTURE_FORMATS"] are those
+ *        corresponding to formats suitable for general-purpose usage.
+ *        The renderer will not enumerate formats with restrictions that
+ *        need to be specifically understood prior to use."
+ *
+ *        Historically, OpenGL implementation have advertised the RGB and
+ *        RGBA versions of the S3TC extensions compressed format tokens
+ *        through this mechanism.
+ *
+ *        The specification is not sufficiently clear about what "suitable
+ *        for general-purpose usage" means.  Historically that seems to mean
+ *        unsigned RGB or unsigned RGBA.  The DXT1 format supporting alpha
+ *        (GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) is not exposed in the list (at
+ *        least for NVIDIA drivers) because the alpha is always 1.0 expect
+ *        when it is 0.0 when RGB is required to be black.  NVIDIA's even
+ *        limits itself to true linear RGB or RGBA formats, specifically
+ *        not including EXT_texture_sRGB's sRGB S3TC compressed formats.
+ *
+ *        Adding luminance and luminance-alpha texture formats (and
+ *        certainly signed versions of luminance and luminance-alpha
+ *        formats!) invites potential comptaibility problems with old
+ *        applications using this mechanism since old applications are
+ *        unlikely to expect non-RGB or non-RGBA formats to be advertised
+ *        through this mechanism.  However no specific misinteractions
+ *        with old applications is known.
+ *
+ *        Applications that seek to use the LATC formats should do so
+ *        by looking for this extension's name in the string returned by
+ *        glGetString(GL_EXTENSIONS) rather than
+ *        what GL_NUM_COMPRESSED_TEXTURE_FORMATS and
+ *        GL_COMPRESSED_TEXTURE_FORMATS return."
+ *
+ * There is no formal spec for GL_ATI_texture_compression_3dc.  Since the
+ * formats added by this extension are luminance-alpha formats, it is
+ * reasonable to expect them to follow the same rules as
+ * GL_EXT_texture_compression_latc.  At the very least, Catalyst 11.6 does not
+ * expose the 3dc formats through this mechanism.
+ *
  * \param ctx  the GL context
  * \param formats  the resulting format list (may be NULL).
- * \param all  if true return all formats, even those with  some kind
- *             of restrictions/limitations (See GL_ARB_texture_compression
- *             spec for more info).
  *
  * \return number of formats.
  */
 GLuint
-_mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats, GLboolean all)
+_mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats)
 {
    GLuint n = 0;
    if (ctx->Extensions.TDFX_texture_compression_FXT1) {
@@ -64,24 +237,15 @@
          n += 2;
       }
    }
-   /* don't return RGTC - ARB_texture_compression_rgtc query 19 */
+
    if (ctx->Extensions.EXT_texture_compression_s3tc) {
       if (formats) {
          formats[n++] = GL_COMPRESSED_RGB_S3TC_DXT1_EXT;
-         /* This format has some restrictions/limitations and so should
-          * not be returned via the GL_COMPRESSED_TEXTURE_FORMATS query.
-          * Specifically, all transparent pixels become black.  NVIDIA
-          * omits this format too.
-          */
-         if (all)
-             formats[n++] = GL_COMPRESSED_RGBA_S3TC_DXT1_EXT;
          formats[n++] = GL_COMPRESSED_RGBA_S3TC_DXT3_EXT;
          formats[n++] = GL_COMPRESSED_RGBA_S3TC_DXT5_EXT;
       }
       else {
          n += 3;
-         if (all)
-             n += 1;
       }
    }
    if (ctx->Extensions.S3_s3tc) {
@@ -95,19 +259,6 @@
          n += 4;
       }
    }
-#if FEATURE_EXT_texture_sRGB
-   if (ctx->Extensions.EXT_texture_sRGB) {
-      if (formats) {
-         formats[n++] = GL_COMPRESSED_SRGB_S3TC_DXT1_EXT;
-         formats[n++] = GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT;
-         formats[n++] = GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT;
-         formats[n++] = GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT;
-      }
-      else {
-         n += 4;
-      }
-   }
-#endif /* FEATURE_EXT_texture_sRGB */
    return n;
 
 #if FEATURE_ES1 || FEATURE_ES2
diff --git a/src/mesa/main/texcompress.h b/src/mesa/main/texcompress.h
index 19b08bb..375cf90 100644
--- a/src/mesa/main/texcompress.h
+++ b/src/mesa/main/texcompress.h
@@ -33,8 +33,11 @@
 
 #if _HAVE_FULL_GL
 
+extern GLenum
+_mesa_gl_compressed_format_base_format(GLenum format);
+
 extern GLuint
-_mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats, GLboolean all);
+_mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats);
 
 extern gl_format
 _mesa_glenum_to_compressed_format(GLenum format);
diff --git a/src/mesa/main/texcompress_rgtc_tmp.h b/src/mesa/main/texcompress_rgtc_tmp.h
index c8bf082..48bbd37 100644
--- a/src/mesa/main/texcompress_rgtc_tmp.h
+++ b/src/mesa/main/texcompress_rgtc_tmp.h
@@ -181,7 +181,7 @@
       fprintf(stderr, "%d ", alphaenc1[i]);
    }
    fprintf(stderr, "cutVals ");
-   for (i = 0; i < 8; i++) {
+   for (i = 0; i < 7; i++) {
       fprintf(stderr, "%d ", acutValues[i]);
    }
    fprintf(stderr, "srcVals ");
diff --git a/src/mesa/main/texgetimage.c b/src/mesa/main/texgetimage.c
index 26c2ff9..b2ebb0d 100644
--- a/src/mesa/main/texgetimage.c
+++ b/src/mesa/main/texgetimage.c
@@ -441,8 +441,8 @@
        * texture data to the PBO if the PBO is in VRAM along with the texture.
        */
       GLubyte *buf = (GLubyte *)
-         ctx->Driver.MapBuffer(ctx, GL_PIXEL_PACK_BUFFER_EXT,
-                               GL_WRITE_ONLY_ARB, ctx->Pack.BufferObj);
+         ctx->Driver.MapBufferRange(ctx, 0, ctx->Pack.BufferObj->Size,
+				    GL_MAP_WRITE_BIT, ctx->Pack.BufferObj);
       if (!buf) {
          /* out of memory or other unexpected error */
          _mesa_error(ctx, GL_OUT_OF_MEMORY, "glGetTexImage(map PBO failed)");
@@ -474,8 +474,7 @@
    }
 
    if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) {
-      ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_PACK_BUFFER_EXT,
-                              ctx->Pack.BufferObj);
+      ctx->Driver.UnmapBuffer(ctx, ctx->Pack.BufferObj);
    }
 }
 
@@ -500,8 +499,8 @@
    if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) {
       /* pack texture image into a PBO */
       GLubyte *buf = (GLubyte *)
-         ctx->Driver.MapBuffer(ctx, GL_PIXEL_PACK_BUFFER_EXT,
-                               GL_WRITE_ONLY_ARB, ctx->Pack.BufferObj);
+         ctx->Driver.MapBufferRange(ctx, 0, ctx->Pack.BufferObj->Size,
+				    GL_MAP_WRITE_BIT, ctx->Pack.BufferObj);
       if (!buf) {
          /* out of memory or other unexpected error */
          _mesa_error(ctx, GL_OUT_OF_MEMORY,
@@ -531,8 +530,7 @@
    }
 
    if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) {
-      ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_PACK_BUFFER_EXT,
-                              ctx->Pack.BufferObj);
+      ctx->Driver.UnmapBuffer(ctx, ctx->Pack.BufferObj);
    }
 }
 
diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c
index 6f53686..a005d29 100644
--- a/src/mesa/main/teximage.c
+++ b/src/mesa/main/teximage.c
@@ -1,6 +1,5 @@
 /*
- * mesa 3-D graphics library
- * Version:  7.6
+ * Mesa 3-D graphics library
  *
  * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
  * Copyright (C) 2009  VMware, Inc.  All Rights Reserved.
@@ -556,8 +555,6 @@
  * \param target texture target.
  * \param level image level.
  * \param texImage texture image.
- * 
- * This was basically prompted by the introduction of cube maps.
  */
 void
 _mesa_set_tex_image(struct gl_texture_object *tObj,
@@ -574,6 +571,8 @@
 
    /* Set the 'back' pointer */
    texImage->TexObject = tObj;
+   texImage->Level = level;
+   texImage->Face = face;
 }
 
 
@@ -709,15 +708,13 @@
 
 /**
  * Get the texture object that corresponds to the target of the given
- * texture unit.
+ * texture unit.  The target should have already been checked for validity.
  *
  * \param ctx GL context.
  * \param texUnit texture unit.
  * \param target texture target.
  *
  * \return pointer to the texture object on success, or NULL on failure.
- * 
- * \sa gl_texture_unit.
  */
 struct gl_texture_object *
 _mesa_select_tex_object(struct gl_context *ctx,
@@ -2797,29 +2794,43 @@
 	 _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyTexImage%uD", dims);
       }
       else {
-         gl_format texFormat;
-
-         if (texImage->Data) {
-            ctx->Driver.FreeTexImageData( ctx, texImage );
-         }
-
-         ASSERT(texImage->Data == NULL);
-
-         texFormat = _mesa_choose_texture_format(ctx, texObj, target, level,
-                                                 internalFormat, GL_NONE,
-                                                 GL_NONE);
+         /* choose actual hw format */
+         gl_format texFormat = _mesa_choose_texture_format(ctx, texObj,
+                                                           target, level,
+                                                           internalFormat,
+                                                           GL_NONE, GL_NONE);
 
          if (legal_texture_size(ctx, texFormat, width, height, 1)) {
+            GLint srcX = x, srcY = y, dstX = 0, dstY = 0;
+
+            /* Free old texture image */
+            ctx->Driver.FreeTexImageData(ctx, texImage);
+
             _mesa_init_teximage_fields(ctx, target, texImage, width, height, 1,
                                        border, internalFormat, texFormat);
 
-            ASSERT(ctx->Driver.CopyTexImage2D);
-            if (dims == 1)
-               ctx->Driver.CopyTexImage1D(ctx, target, level, internalFormat,
-                                          x, y, width, border);
-            else
-               ctx->Driver.CopyTexImage2D(ctx, target, level, internalFormat,
-                                          x, y, width, height, border);
+            /* Allocate texture memory (no pixel data yet) */
+            if (dims == 1) {
+               ctx->Driver.TexImage1D(ctx, target, level, internalFormat,
+                                      width, border, GL_NONE, GL_NONE, NULL,
+                                      &ctx->Unpack, texObj, texImage);
+            }
+            else {
+               ctx->Driver.TexImage2D(ctx, target, level, internalFormat,
+                                      width, height, border, GL_NONE, GL_NONE,
+                                      NULL, &ctx->Unpack, texObj, texImage);
+            }
+
+            if (_mesa_clip_copytexsubimage(ctx, &dstX, &dstY, &srcX, &srcY,
+                                           &width, &height)) {
+               if (dims == 1)
+                  ctx->Driver.CopyTexSubImage1D(ctx, target, level, dstX,
+                                                srcX, srcY, width);
+                                                
+               else
+                  ctx->Driver.CopyTexSubImage2D(ctx, target, level, dstX, dstY,
+                                                srcX, srcY, width, height);
+            }
 
             check_gen_mipmap(ctx, target, texObj, level);
 
@@ -2830,6 +2841,7 @@
             ctx->NewState |= _NEW_TEXTURE;
          }
          else {
+            /* probably too large of image */
             _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyTexImage%uD", dims);
          }
       }
diff --git a/src/mesa/main/texobj.c b/src/mesa/main/texobj.c
index 3021716..078a43a 100644
--- a/src/mesa/main/texobj.c
+++ b/src/mesa/main/texobj.c
@@ -842,7 +842,7 @@
       struct gl_texture_object *texObj;
       GLuint name = first + i;
       GLenum target = 0;
-      texObj = (*ctx->Driver.NewTextureObject)( ctx, name, target);
+      texObj = ctx->Driver.NewTextureObject(ctx, name, target);
       if (!texObj) {
          _glthread_UNLOCK_MUTEX(ctx->Shared->Mutex);
          _mesa_error(ctx, GL_OUT_OF_MEMORY, "glGenTextures");
@@ -1066,7 +1066,7 @@
       }
       else {
          /* if this is a new texture id, allocate a texture object now */
-         newTexObj = (*ctx->Driver.NewTextureObject)(ctx, texName, target);
+         newTexObj = ctx->Driver.NewTextureObject(ctx, texName, target);
          if (!newTexObj) {
             _mesa_error(ctx, GL_OUT_OF_MEMORY, "glBindTexture");
             return;
@@ -1108,7 +1108,7 @@
 
    /* Pass BindTexture call to device driver */
    if (ctx->Driver.BindTexture)
-      (*ctx->Driver.BindTexture)( ctx, target, newTexObj );
+      ctx->Driver.BindTexture(ctx, target, newTexObj);
 }
 
 
diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c
index 4b9dcb5..bbbb306 100644
--- a/src/mesa/main/texparam.c
+++ b/src/mesa/main/texparam.c
@@ -888,7 +888,7 @@
    texObj = _mesa_select_tex_object(ctx, texUnit, target);
 
    img = _mesa_select_tex_image(ctx, texObj, target, level);
-   if (!img || !img->TexFormat) {
+   if (!img || img->TexFormat == MESA_FORMAT_NONE) {
       /* undefined texture image */
       if (pname == GL_TEXTURE_COMPONENTS)
          *params = 1;
@@ -915,9 +915,23 @@
             *params = _mesa_compressed_format_to_glenum(ctx, texFormat);
          }
          else {
-            /* return the user's requested internal format */
-            *params = img->InternalFormat;
-         }
+	    /* If the true internal format is not compressed but the user
+	     * requested a generic compressed format, we have to return the
+	     * generic base format that matches.
+	     *
+	     * From page 119 (page 129 of the PDF) of the OpenGL 1.3 spec:
+	     *
+	     *     "If no specific compressed format is available,
+	     *     internalformat is instead replaced by the corresponding base
+	     *     internal format."
+	     *
+	     * Otherwise just return the user's requested internal format
+	     */
+	    const GLenum f =
+	       _mesa_gl_compressed_format_base_format(img->InternalFormat);
+
+	    *params = (f != 0) ? f : img->InternalFormat;
+	 }
          break;
       case GL_TEXTURE_BORDER:
          *params = img->Border;
@@ -980,28 +994,21 @@
             *params = 0;
          break;
       case GL_TEXTURE_DEPTH_SIZE_ARB:
-         if (ctx->Extensions.ARB_depth_texture)
-            *params = _mesa_get_format_bits(texFormat, pname);
-         else
+         if (!ctx->Extensions.ARB_depth_texture)
             goto invalid_pname;
+         *params = _mesa_get_format_bits(texFormat, pname);
          break;
       case GL_TEXTURE_STENCIL_SIZE_EXT:
-         if (ctx->Extensions.EXT_packed_depth_stencil ||
-             ctx->Extensions.ARB_framebuffer_object) {
-            *params = _mesa_get_format_bits(texFormat, pname);
-         }
-         else {
+         if (!ctx->Extensions.EXT_packed_depth_stencil &&
+             !ctx->Extensions.ARB_framebuffer_object)
             goto invalid_pname;
-         }
+         *params = _mesa_get_format_bits(texFormat, pname);
          break;
       case GL_TEXTURE_SHARED_SIZE:
-         if (ctx->VersionMajor >= 3 ||
-             ctx->Extensions.EXT_texture_shared_exponent) {
-            *params = texFormat == MESA_FORMAT_RGB9_E5_FLOAT ? 5 : 0;
-         }
-         else {
+         if (ctx->VersionMajor < 3 &&
+             !ctx->Extensions.EXT_texture_shared_exponent)
             goto invalid_pname;
-         }
+         *params = texFormat == MESA_FORMAT_RGB9_E5_FLOAT ? 5 : 0;
          break;
 
       /* GL_ARB_texture_compression */
@@ -1022,67 +1029,46 @@
 
       /* GL_ARB_texture_float */
       case GL_TEXTURE_RED_TYPE_ARB:
-         if (ctx->Extensions.ARB_texture_float) {
-            *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_RED_SIZE) ?
-               _mesa_get_format_datatype(texFormat) : GL_NONE;
-         }
-         else {
+         if (!ctx->Extensions.ARB_texture_float)
             goto invalid_pname;
-         }
+         *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_RED_SIZE) ?
+            _mesa_get_format_datatype(texFormat) : GL_NONE;
          break;
       case GL_TEXTURE_GREEN_TYPE_ARB:
-         if (ctx->Extensions.ARB_texture_float) {
-            *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_GREEN_SIZE) ?
-               _mesa_get_format_datatype(texFormat) : GL_NONE;
-         }
-         else {
+         if (!ctx->Extensions.ARB_texture_float)
             goto invalid_pname;
-         }
+         *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_GREEN_SIZE) ?
+            _mesa_get_format_datatype(texFormat) : GL_NONE;
          break;
       case GL_TEXTURE_BLUE_TYPE_ARB:
-         if (ctx->Extensions.ARB_texture_float) {
-            *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_BLUE_SIZE) ?
-               _mesa_get_format_datatype(texFormat) : GL_NONE;
-         }
-         else {
+         if (!ctx->Extensions.ARB_texture_float)
             goto invalid_pname;
-         }
+         *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_BLUE_SIZE) ?
+            _mesa_get_format_datatype(texFormat) : GL_NONE;
          break;
       case GL_TEXTURE_ALPHA_TYPE_ARB:
-         if (ctx->Extensions.ARB_texture_float) {
-            *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_ALPHA_SIZE) ?
-               _mesa_get_format_datatype(texFormat) : GL_NONE;
-         }
-         else {
+         if (!ctx->Extensions.ARB_texture_float)
             goto invalid_pname;
-         }
+         *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_ALPHA_SIZE) ?
+            _mesa_get_format_datatype(texFormat) : GL_NONE;
          break;
       case GL_TEXTURE_LUMINANCE_TYPE_ARB:
-         if (ctx->Extensions.ARB_texture_float) {
-            *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_LUMINANCE_SIZE) ?
-               _mesa_get_format_datatype(texFormat) : GL_NONE;
-         }
-         else {
+         if (!ctx->Extensions.ARB_texture_float)
             goto invalid_pname;
-         }
+         *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_LUMINANCE_SIZE) ?
+            _mesa_get_format_datatype(texFormat) : GL_NONE;
          break;
       case GL_TEXTURE_INTENSITY_TYPE_ARB:
-         if (ctx->Extensions.ARB_texture_float) {
-            *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_INTENSITY_SIZE) ?
-               _mesa_get_format_datatype(texFormat) : GL_NONE;
-         }
-         else {
+         if (!ctx->Extensions.ARB_texture_float)
             goto invalid_pname;
-         }
+         *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_INTENSITY_SIZE) ?
+            _mesa_get_format_datatype(texFormat) : GL_NONE;
          break;
       case GL_TEXTURE_DEPTH_TYPE_ARB:
-         if (ctx->Extensions.ARB_texture_float) {
-            *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_DEPTH_SIZE) ?
-               _mesa_get_format_datatype(texFormat) : GL_NONE;
-         }
-         else {
+         if (!ctx->Extensions.ARB_texture_float)
             goto invalid_pname;
-         }
+         *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_DEPTH_SIZE) ?
+            _mesa_get_format_datatype(texFormat) : GL_NONE;
          break;
 
       default:
@@ -1104,7 +1090,6 @@
 _mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params )
 {
    struct gl_texture_object *obj;
-   GLboolean error = GL_FALSE;
    GET_CURRENT_CONTEXT(ctx);
    ASSERT_OUTSIDE_BEGIN_END(ctx);
 
@@ -1130,17 +1115,15 @@
          *params = ENUM_TO_FLOAT(obj->Sampler.WrapR);
          break;
       case GL_TEXTURE_BORDER_COLOR:
-         if(ctx->NewState & (_NEW_BUFFERS | _NEW_FRAG_CLAMP))
+         if (ctx->NewState & (_NEW_BUFFERS | _NEW_FRAG_CLAMP))
             _mesa_update_state_locked(ctx);
-         if(ctx->Color._ClampFragmentColor)
-         {
+         if (ctx->Color._ClampFragmentColor) {
             params[0] = CLAMP(obj->Sampler.BorderColor.f[0], 0.0F, 1.0F);
             params[1] = CLAMP(obj->Sampler.BorderColor.f[1], 0.0F, 1.0F);
             params[2] = CLAMP(obj->Sampler.BorderColor.f[2], 0.0F, 1.0F);
             params[3] = CLAMP(obj->Sampler.BorderColor.f[3], 0.0F, 1.0F);
          }
-         else
-         {
+         else {
             params[0] = obj->Sampler.BorderColor.f[0];
             params[1] = obj->Sampler.BorderColor.f[1];
             params[2] = obj->Sampler.BorderColor.f[2];
@@ -1148,14 +1131,8 @@
          }
          break;
       case GL_TEXTURE_RESIDENT:
-         {
-            GLboolean resident;
-            if (ctx->Driver.IsTextureResident)
-               resident = ctx->Driver.IsTextureResident(ctx, obj);
-            else
-               resident = GL_TRUE;
-            *params = ENUM_TO_FLOAT(resident);
-         }
+         *params = ctx->Driver.IsTextureResident ?
+            ctx->Driver.IsTextureResident(ctx, obj) : 1.0F;
          break;
       case GL_TEXTURE_PRIORITY:
          *params = obj->Priority;
@@ -1173,49 +1150,37 @@
          *params = (GLfloat) obj->MaxLevel;
          break;
       case GL_TEXTURE_MAX_ANISOTROPY_EXT:
-         if (ctx->Extensions.EXT_texture_filter_anisotropic) {
-            *params = obj->Sampler.MaxAnisotropy;
-         }
-	 else
-	    error = GL_TRUE;
+         if (!ctx->Extensions.EXT_texture_filter_anisotropic)
+            goto invalid_pname;
+         *params = obj->Sampler.MaxAnisotropy;
          break;
       case GL_TEXTURE_COMPARE_FAIL_VALUE_ARB:
-         if (ctx->Extensions.ARB_shadow_ambient) {
-            *params = obj->Sampler.CompareFailValue;
-         }
-	 else 
-	    error = GL_TRUE;
+         if (!ctx->Extensions.ARB_shadow_ambient)
+            goto invalid_pname;
+         *params = obj->Sampler.CompareFailValue;
          break;
       case GL_GENERATE_MIPMAP_SGIS:
 	 *params = (GLfloat) obj->GenerateMipmap;
          break;
       case GL_TEXTURE_COMPARE_MODE_ARB:
-         if (ctx->Extensions.ARB_shadow) {
-            *params = (GLfloat) obj->Sampler.CompareMode;
-         }
-	 else 
-	    error = GL_TRUE;
+         if (!ctx->Extensions.ARB_shadow)
+            goto invalid_pname;
+         *params = (GLfloat) obj->Sampler.CompareMode;
          break;
       case GL_TEXTURE_COMPARE_FUNC_ARB:
-         if (ctx->Extensions.ARB_shadow) {
-            *params = (GLfloat) obj->Sampler.CompareFunc;
-         }
-	 else 
-	    error = GL_TRUE;
+         if (!ctx->Extensions.ARB_shadow)
+            goto invalid_pname;
+         *params = (GLfloat) obj->Sampler.CompareFunc;
          break;
       case GL_DEPTH_TEXTURE_MODE_ARB:
-         if (ctx->Extensions.ARB_depth_texture) {
-            *params = (GLfloat) obj->Sampler.DepthMode;
-         }
-	 else 
-	    error = GL_TRUE;
+         if (!ctx->Extensions.ARB_depth_texture)
+            goto invalid_pname;
+         *params = (GLfloat) obj->Sampler.DepthMode;
          break;
       case GL_TEXTURE_LOD_BIAS:
-         if (ctx->Extensions.EXT_texture_lod_bias) {
-            *params = obj->Sampler.LodBias;
-         }
-	 else 
-	    error = GL_TRUE;
+         if (!ctx->Extensions.EXT_texture_lod_bias)
+            goto invalid_pname;
+         *params = obj->Sampler.LodBias;
          break;
 #if FEATURE_OES_draw_texture
       case GL_TEXTURE_CROP_RECT_OES:
@@ -1230,45 +1195,40 @@
       case GL_TEXTURE_SWIZZLE_G_EXT:
       case GL_TEXTURE_SWIZZLE_B_EXT:
       case GL_TEXTURE_SWIZZLE_A_EXT:
-         if (ctx->Extensions.EXT_texture_swizzle) {
-            GLuint comp = pname - GL_TEXTURE_SWIZZLE_R_EXT;
-            *params = (GLfloat) obj->Swizzle[comp];
-         }
-         else {
-            error = GL_TRUE;
-         }
+         if (!ctx->Extensions.EXT_texture_swizzle)
+            goto invalid_pname;
+         *params = (GLfloat) obj->Swizzle[pname - GL_TEXTURE_SWIZZLE_R_EXT];
          break;
 
       case GL_TEXTURE_SWIZZLE_RGBA_EXT:
-         if (ctx->Extensions.EXT_texture_swizzle) {
+         if (!ctx->Extensions.EXT_texture_swizzle) {
+            goto invalid_pname;
+         }
+         else {
             GLuint comp;
             for (comp = 0; comp < 4; comp++) {
                params[comp] = (GLfloat) obj->Swizzle[comp];
             }
          }
-         else {
-            error = GL_TRUE;
-         }
          break;
 
       case GL_TEXTURE_CUBE_MAP_SEAMLESS:
-      if (ctx->Extensions.AMD_seamless_cubemap_per_texture) {
+         if (!ctx->Extensions.AMD_seamless_cubemap_per_texture)
+            goto invalid_pname;
          *params = (GLfloat) obj->Sampler.CubeMapSeamless;
-      }
-      else {
-         error = GL_TRUE;
-      }
+         break;
 
       default:
-	 error = GL_TRUE;
-	 break;
+         goto invalid_pname;
    }
 
-   if (error)
-      _mesa_error(ctx, GL_INVALID_ENUM, "glGetTexParameterfv(pname=0x%x)",
-		  pname);
-
+   /* no error if we get here */
    _mesa_unlock_texture(ctx, obj);
+   return;
+
+invalid_pname:
+   _mesa_unlock_texture(ctx, obj);
+   _mesa_error(ctx, GL_INVALID_ENUM, "glGetTexParameterfv(pname=0x%x)", pname);
 }
 
 
@@ -1276,13 +1236,12 @@
 _mesa_GetTexParameteriv( GLenum target, GLenum pname, GLint *params )
 {
    struct gl_texture_object *obj;
-   GLboolean error = GL_FALSE;
    GET_CURRENT_CONTEXT(ctx);
    ASSERT_OUTSIDE_BEGIN_END(ctx);
 
-    obj = get_texobj(ctx, target, GL_TRUE);
-    if (!obj)
-       return;
+   obj = get_texobj(ctx, target, GL_TRUE);
+   if (!obj)
+      return;
 
    _mesa_lock_texture(ctx, obj);
    switch (pname) {
@@ -1315,14 +1274,8 @@
          }
          break;;
       case GL_TEXTURE_RESIDENT:
-         {
-            GLboolean resident;
-            if (ctx->Driver.IsTextureResident)
-               resident = ctx->Driver.IsTextureResident(ctx, obj);
-            else
-               resident = GL_TRUE;
-            *params = (GLint) resident;
-         }
+         *params = ctx->Driver.IsTextureResident ?
+            ctx->Driver.IsTextureResident(ctx, obj) : 1;
          break;;
       case GL_TEXTURE_PRIORITY:
          *params = FLOAT_TO_INT(obj->Priority);
@@ -1340,55 +1293,37 @@
          *params = obj->MaxLevel;
          break;;
       case GL_TEXTURE_MAX_ANISOTROPY_EXT:
-         if (ctx->Extensions.EXT_texture_filter_anisotropic) {
-            *params = (GLint) obj->Sampler.MaxAnisotropy;
-         }
-         else {
-            error = GL_TRUE;
-         }
+         if (!ctx->Extensions.EXT_texture_filter_anisotropic)
+            goto invalid_pname;
+         *params = (GLint) obj->Sampler.MaxAnisotropy;
          break;
       case GL_TEXTURE_COMPARE_FAIL_VALUE_ARB:
-         if (ctx->Extensions.ARB_shadow_ambient) {
-            *params = (GLint) FLOAT_TO_INT(obj->Sampler.CompareFailValue);
-         }
-         else {
-            error = GL_TRUE;
-         }
+         if (!ctx->Extensions.ARB_shadow_ambient)
+            goto invalid_pname;
+         *params = (GLint) FLOAT_TO_INT(obj->Sampler.CompareFailValue);
          break;
       case GL_GENERATE_MIPMAP_SGIS:
 	 *params = (GLint) obj->GenerateMipmap;
          break;
       case GL_TEXTURE_COMPARE_MODE_ARB:
-         if (ctx->Extensions.ARB_shadow) {
-            *params = (GLint) obj->Sampler.CompareMode;
-         }
-         else {
-            error = GL_TRUE;
-         }
+         if (!ctx->Extensions.ARB_shadow)
+            goto invalid_pname;
+         *params = (GLint) obj->Sampler.CompareMode;
          break;
       case GL_TEXTURE_COMPARE_FUNC_ARB:
-         if (ctx->Extensions.ARB_shadow) {
-            *params = (GLint) obj->Sampler.CompareFunc;
-         }
-         else {
-            error = GL_TRUE;
-         }
+         if (!ctx->Extensions.ARB_shadow)
+            goto invalid_pname;
+         *params = (GLint) obj->Sampler.CompareFunc;
          break;
       case GL_DEPTH_TEXTURE_MODE_ARB:
-         if (ctx->Extensions.ARB_depth_texture) {
-            *params = (GLint) obj->Sampler.DepthMode;
-         }
-         else {
-            error = GL_TRUE;
-         }
+         if (!ctx->Extensions.ARB_depth_texture)
+            goto invalid_pname;
+         *params = (GLint) obj->Sampler.DepthMode;
          break;
       case GL_TEXTURE_LOD_BIAS:
-         if (ctx->Extensions.EXT_texture_lod_bias) {
-            *params = (GLint) obj->Sampler.LodBias;
-         }
-         else {
-            error = GL_TRUE;
-         }
+         if (!ctx->Extensions.EXT_texture_lod_bias)
+            goto invalid_pname;
+         *params = (GLint) obj->Sampler.LodBias;
          break;
 #if FEATURE_OES_draw_texture
       case GL_TEXTURE_CROP_RECT_OES:
@@ -1402,41 +1337,34 @@
       case GL_TEXTURE_SWIZZLE_G_EXT:
       case GL_TEXTURE_SWIZZLE_B_EXT:
       case GL_TEXTURE_SWIZZLE_A_EXT:
-         if (ctx->Extensions.EXT_texture_swizzle) {
-            GLuint comp = pname - GL_TEXTURE_SWIZZLE_R_EXT;
-            *params = obj->Swizzle[comp];
-         }
-         else {
-            error = GL_TRUE;
-         }
+         if (!ctx->Extensions.EXT_texture_swizzle)
+            goto invalid_pname;
+         *params = obj->Swizzle[pname - GL_TEXTURE_SWIZZLE_R_EXT];
          break;
 
       case GL_TEXTURE_SWIZZLE_RGBA_EXT:
-         if (ctx->Extensions.EXT_texture_swizzle) {
-            COPY_4V(params, obj->Swizzle);
-         }
-         else {
-            error = GL_TRUE;
-         }
+         if (!ctx->Extensions.EXT_texture_swizzle)
+            goto invalid_pname;
+         COPY_4V(params, obj->Swizzle);
          break;
 
       case GL_TEXTURE_CUBE_MAP_SEAMLESS:
-         if (ctx->Extensions.AMD_seamless_cubemap_per_texture) {
-            *params = (GLint) obj->Sampler.CubeMapSeamless;
-         }
-         else {
-            error = GL_TRUE;
-         }
+         if (!ctx->Extensions.AMD_seamless_cubemap_per_texture)
+            goto invalid_pname;
+         *params = (GLint) obj->Sampler.CubeMapSeamless;
+         break;
 
       default:
-         ; /* silence warnings */
+         goto invalid_pname;
    }
 
-   if (error)
-      _mesa_error(ctx, GL_INVALID_ENUM, "glGetTexParameteriv(pname=0x%x)",
-		  pname);
-
+   /* no error if we get here */
    _mesa_unlock_texture(ctx, obj);
+   return;
+
+invalid_pname:
+   _mesa_unlock_texture(ctx, obj);
+   _mesa_error(ctx, GL_INVALID_ENUM, "glGetTexParameteriv(pname=0x%x)", pname);
 }
 
 
@@ -1449,6 +1377,8 @@
    ASSERT_OUTSIDE_BEGIN_END(ctx);
 
    texObj = get_texobj(ctx, target, GL_TRUE);
+   if (!texObj)
+      return;
    
    switch (pname) {
    case GL_TEXTURE_BORDER_COLOR:
@@ -1469,6 +1399,8 @@
    ASSERT_OUTSIDE_BEGIN_END(ctx);
 
    texObj = get_texobj(ctx, target, GL_TRUE);
+   if (!texObj)
+      return;
    
    switch (pname) {
    case GL_TEXTURE_BORDER_COLOR:
diff --git a/src/mesa/main/texstore.c b/src/mesa/main/texstore.c
index 6e1e63b..c4aeaa8 100644
--- a/src/mesa/main/texstore.c
+++ b/src/mesa/main/texstore.c
@@ -4577,8 +4577,7 @@
 
 
 /**
- * This is the software fallback for Driver.TexImage1D()
- * and Driver.CopyTexImage1D().
+ * This is the software fallback for Driver.TexImage1D().
  * \sa _mesa_store_teximage2d()
  */
 void
@@ -4629,8 +4628,7 @@
 
 
 /**
- * This is the software fallback for Driver.TexImage2D()
- * and Driver.CopyTexImage2D().
+ * This is the software fallback for Driver.TexImage2D().
  *
  * This function is oriented toward storing images in main memory, rather
  * than VRAM.  Device driver's can easily plug in their own replacement.
@@ -4684,8 +4682,7 @@
 
 
 /**
- * This is the software fallback for Driver.TexImage3D()
- * and Driver.CopyTexImage3D().
+ * This is the software fallback for Driver.TexImage3D().
  * \sa _mesa_store_teximage2d()
  */
 void
diff --git a/src/mesa/main/uniforms.c b/src/mesa/main/uniforms.c
index dd069a3..cda840f 100644
--- a/src/mesa/main/uniforms.c
+++ b/src/mesa/main/uniforms.c
@@ -429,7 +429,7 @@
             for (i = 0; i < rows; i++) {
                const int base = paramPos + offset + i;
                for (j = 0; j < cols; j++ ) {
-                  params[k++] = prog->Parameters->ParameterValues[base][j];
+                  params[k++] = prog->Parameters->ParameterValues[base][j].f;
                }
             }
          }
@@ -442,7 +442,7 @@
                const int base = paramPos + offset + i;
                for (j = 0; j < cols; j++ ) {
                   params[k++] = (GLdouble)
-                     prog->Parameters->ParameterValues[base][j];
+                     prog->Parameters->ParameterValues[base][j].f;
                }
             }
          }
@@ -454,8 +454,9 @@
             for (i = 0; i < rows; i++) {
                const int base = paramPos + offset + i;
                for (j = 0; j < cols; j++ ) {
-                  params[k++] = (GLint)
-                     prog->Parameters->ParameterValues[base][j];
+                  params[k++] = ctx->Const.NativeIntegers ?
+                     prog->Parameters->ParameterValues[base][j].i :
+                     (GLint) prog->Parameters->ParameterValues[base][j].f;
                }
             }
          }
@@ -467,8 +468,9 @@
             for (i = 0; i < rows; i++) {
                const int base = paramPos + offset + i;
                for (j = 0; j < cols; j++ ) {
-                  params[k++] = (GLuint)
-                     prog->Parameters->ParameterValues[base][j];
+                  params[k++] = ctx->Const.NativeIntegers ?
+                     prog->Parameters->ParameterValues[base][j].u :
+                     (GLuint) prog->Parameters->ParameterValues[base][j].f;
                }
             }
          }
@@ -670,7 +672,7 @@
       /* loop over number of samplers to change */
       for (i = 0; i < count; i++) {
          GLuint sampler = (GLuint)
-            program->Parameters->ParameterValues[index + offset + i][0];
+            program->Parameters->ParameterValues[index+offset + i][0].f;
          GLuint texUnit = ((GLuint *) values)[i];
 
          /* check that the sampler (tex unit index) is legal */
@@ -735,42 +737,52 @@
 
       /* loop over number of array elements */
       for (k = 0; k < count; k++) {
-         GLfloat *uniformVal;
+         gl_constant_value *uniformVal;
 
          if (offset + k >= slots) {
             /* Extra array data is ignored */
             break;
          }
 
-         /* uniformVal (the destination) is always float[4] */
+         /* uniformVal (the destination) is always gl_constant_value[4] */
          uniformVal = program->Parameters->ParameterValues[index + offset + k];
 
          if (basicType == GL_INT) {
-            /* convert user's ints to floats */
             const GLint *iValues = ((const GLint *) values) + k * elems;
             for (i = 0; i < elems; i++) {
-               uniformVal[i] = (GLfloat) iValues[i];
+               if (!ctx->Const.NativeIntegers)
+                  uniformVal[i].f = (GLfloat) iValues[i];
+               else
+                  uniformVal[i].i = iValues[i];
             }
          }
          else if (basicType == GL_UNSIGNED_INT) {
-            /* convert user's uints to floats */
             const GLuint *iValues = ((const GLuint *) values) + k * elems;
             for (i = 0; i < elems; i++) {
-               uniformVal[i] = (GLfloat) iValues[i];
+               if (!ctx->Const.NativeIntegers)
+                  uniformVal[i].f = (GLfloat)(GLuint) iValues[i];
+               else
+                  uniformVal[i].u = iValues[i];
             }
          }
          else {
             const GLfloat *fValues = ((const GLfloat *) values) + k * elems;
             assert(basicType == GL_FLOAT);
             for (i = 0; i < elems; i++) {
-               uniformVal[i] = fValues[i];
+               uniformVal[i].f = fValues[i];
             }
          }
 
-         /* if the uniform is bool-valued, convert to 1.0 or 0.0 */
+         /* if the uniform is bool-valued, convert to 1 or 0 */
          if (isUniformBool) {
             for (i = 0; i < elems; i++) {
-               uniformVal[i] = uniformVal[i] ? 1.0f : 0.0f;
+               if (basicType == GL_FLOAT)
+                  uniformVal[i].b = uniformVal[i].f != 0.0f ? 1 : 0;
+               else
+                  uniformVal[i].b = uniformVal[i].u ? 1 : 0;
+               
+               if (!ctx->Const.NativeIntegers)
+                  uniformVal[i].f = uniformVal[i].b ? 1.0f : 0.0f;
             }
          }
       }
@@ -936,7 +948,7 @@
             /* Ignore writes beyond the end of (the used part of) an array */
             return;
          }
-         v = program->Parameters->ParameterValues[index + offset];
+         v = (GLfloat *) program->Parameters->ParameterValues[index + offset];
          for (row = 0; row < rows; row++) {
             if (transpose) {
                v[row] = values[src + row * cols + col];
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index d8e5a3a..6820e4c 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -134,7 +134,7 @@
    this->index = reg.index;
    this->swizzle = SWIZZLE_XYZW;
    this->negate = 0;
-   this->reladdr = NULL;
+   this->reladdr = reg.reladdr;
 }
 
 dst_reg::dst_reg(src_reg reg)
@@ -297,11 +297,11 @@
    /**
     * Emit the correct dot-product instruction for the type of arguments
     */
-   void emit_dp(ir_instruction *ir,
-	        dst_reg dst,
-	        src_reg src0,
-	        src_reg src1,
-	        unsigned elements);
+   ir_to_mesa_instruction * emit_dp(ir_instruction *ir,
+				    dst_reg dst,
+				    src_reg src0,
+				    src_reg src1,
+				    unsigned elements);
 
    void emit_scalar(ir_instruction *ir, enum prog_opcode op,
 		    dst_reg dst, src_reg src0);
@@ -312,9 +312,11 @@
    void emit_scs(ir_instruction *ir, enum prog_opcode op,
 		 dst_reg dst, const src_reg &src);
 
-   GLboolean try_emit_mad(ir_expression *ir,
+   bool try_emit_mad(ir_expression *ir,
 			  int mul_operand);
-   GLboolean try_emit_sat(ir_expression *ir);
+   bool try_emit_mad_for_and_not(ir_expression *ir,
+				 int mul_operand);
+   bool try_emit_sat(ir_expression *ir);
 
    void emit_swz(ir_expression *ir);
 
@@ -331,20 +333,6 @@
 
 dst_reg address_reg = dst_reg(PROGRAM_ADDRESS, WRITEMASK_X);
 
-static void
-fail_link(struct gl_shader_program *prog, const char *fmt, ...) PRINTFLIKE(2, 3);
-
-static void
-fail_link(struct gl_shader_program *prog, const char *fmt, ...)
-{
-   va_list args;
-   va_start(args, fmt);
-   ralloc_vasprintf_append(&prog->InfoLog, fmt, args);
-   va_end(args);
-
-   prog->LinkStatus = GL_FALSE;
-}
-
 static int
 swizzle_for_size(int size)
 {
@@ -422,7 +410,7 @@
    return emit(ir, op, undef_dst, undef_src, undef_src, undef_src);
 }
 
-void
+ir_to_mesa_instruction *
 ir_to_mesa_visitor::emit_dp(ir_instruction *ir,
 			    dst_reg dst, src_reg src0, src_reg src1,
 			    unsigned elements)
@@ -431,7 +419,7 @@
       OPCODE_DP2, OPCODE_DP3, OPCODE_DP4
    };
 
-   emit(ir, dot_opcodes[elements - 2], dst, src0, src1);
+   return emit(ir, dot_opcodes[elements - 2], dst, src0, src1);
 }
 
 /**
@@ -593,13 +581,13 @@
    }
 }
 
-struct src_reg
+src_reg
 ir_to_mesa_visitor::src_reg_for_float(float val)
 {
    src_reg src(PROGRAM_CONSTANT, -1, NULL);
 
    src.index = _mesa_add_unnamed_constant(this->prog->Parameters,
-					  &val, 1, &src.swizzle);
+					  (const gl_constant_value *)&val, 1, &src.swizzle);
 
    return src;
 }
@@ -655,8 +643,6 @@
 ir_to_mesa_visitor::get_temp(const glsl_type *type)
 {
    src_reg src;
-   int swizzle[4];
-   int i;
 
    src.file = PROGRAM_TEMPORARY;
    src.index = next_temp;
@@ -666,12 +652,7 @@
    if (type->is_array() || type->is_record()) {
       src.swizzle = SWIZZLE_NOOP;
    } else {
-      for (i = 0; i < type->vector_elements; i++)
-	 swizzle[i] = i;
-      for (; i < 4; i++)
-	 swizzle[i] = type->vector_elements - 1;
-      src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1],
-				  swizzle[2], swizzle[3]);
+      src.swizzle = swizzle_for_size(type->vector_elements);
    }
    src.negate = 0;
 
@@ -744,7 +725,7 @@
 	 }
       }
 
-      struct variable_storage *storage;
+      variable_storage *storage;
       dst_reg dst;
       if (i == ir->num_state_slots) {
 	 /* We'll set the index later. */
@@ -789,10 +770,11 @@
 
       if (storage->file == PROGRAM_TEMPORARY &&
 	  dst.index != storage->index + (int) ir->num_state_slots) {
-	 fail_link(this->shader_program,
-		   "failed to load builtin uniform `%s'  (%d/%d regs loaded)\n",
-		   ir->name, dst.index - storage->index,
-		   type_size(ir->type));
+	 linker_error(this->shader_program,
+		      "failed to load builtin uniform `%s' "
+		      "(%d/%d regs loaded)\n",
+		      ir->name, dst.index - storage->index,
+		      type_size(ir->type));
       }
    }
 }
@@ -889,7 +871,7 @@
    }
 }
 
-GLboolean
+bool
 ir_to_mesa_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
 {
    int nonmul_operand = 1 - mul_operand;
@@ -912,7 +894,47 @@
    return true;
 }
 
-GLboolean
+/**
+ * Emit OPCODE_MAD(a, -b, a) instead of AND(a, NOT(b))
+ *
+ * The logic values are 1.0 for true and 0.0 for false.  Logical-and is
+ * implemented using multiplication, and logical-or is implemented using
+ * addition.  Logical-not can be implemented as (true - x), or (1.0 - x).
+ * As result, the logical expression (a & !b) can be rewritten as:
+ *
+ *     - a * !b
+ *     - a * (1 - b)
+ *     - (a * 1) - (a * b)
+ *     - a + -(a * b)
+ *     - a + (a * -b)
+ *
+ * This final expression can be implemented as a single MAD(a, -b, a)
+ * instruction.
+ */
+bool
+ir_to_mesa_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand)
+{
+   const int other_operand = 1 - try_operand;
+   src_reg a, b;
+
+   ir_expression *expr = ir->operands[try_operand]->as_expression();
+   if (!expr || expr->operation != ir_unop_logic_not)
+      return false;
+
+   ir->operands[other_operand]->accept(this);
+   a = this->result;
+   expr->operands[0]->accept(this);
+   b = this->result;
+
+   b.negate = ~b.negate;
+
+   this->result = get_temp(ir->type);
+   emit(ir, OPCODE_MAD, dst_reg(this->result), a, b, a);
+
+   return true;
+}
+
+bool
 ir_to_mesa_visitor::try_emit_sat(ir_expression *ir)
 {
    /* Saturates were only introduced to vertex programs in
@@ -928,10 +950,30 @@
    sat_src->accept(this);
    src_reg src = this->result;
 
-   this->result = get_temp(ir->type);
-   ir_to_mesa_instruction *inst;
-   inst = emit(ir, OPCODE_MOV, dst_reg(this->result), src);
-   inst->saturate = true;
+   /* If we generated an expression instruction into a temporary in
+    * processing the saturate's operand, apply the saturate to that
+    * instruction.  Otherwise, generate a MOV to do the saturate.
+    *
+    * Note that we have to be careful to only do this optimization if
+    * the instruction in question was what generated src->result.  For
+    * example, ir_dereference_array might generate a MUL instruction
+    * to create the reladdr, and return us a src reg using that
+    * reladdr.  That MUL result is not the value we're trying to
+    * saturate.
+    */
+   ir_expression *sat_src_expr = sat_src->as_expression();
+   ir_to_mesa_instruction *new_inst;
+   new_inst = (ir_to_mesa_instruction *)this->instructions.get_tail();
+   if (sat_src_expr && (sat_src_expr->operation == ir_binop_mul ||
+			sat_src_expr->operation == ir_binop_add ||
+			sat_src_expr->operation == ir_binop_dot)) {
+      new_inst->saturate = true;
+   } else {
+      this->result = get_temp(ir->type);
+      ir_to_mesa_instruction *inst;
+      inst = emit(ir, OPCODE_MOV, dst_reg(this->result), src);
+      inst->saturate = true;
+   }
 
    return true;
 }
@@ -1088,6 +1130,16 @@
       if (try_emit_mad(ir, 0))
 	 return;
    }
+
+   /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b))
+    */
+   if (ir->operation == ir_binop_logic_and) {
+      if (try_emit_mad_for_and_not(ir, 1))
+	 return;
+      if (try_emit_mad_for_and_not(ir, 0))
+	 return;
+   }
+
    if (try_emit_sat(ir))
       return;
 
@@ -1135,7 +1187,13 @@
 
    switch (ir->operation) {
    case ir_unop_logic_not:
-      emit(ir, OPCODE_SEQ, result_dst, op[0], src_reg_for_float(0.0));
+      /* Previously 'SEQ dst, src, 0.0' was used for this.  However, many
+       * older GPUs implement SEQ using multiple instructions (i915 uses two
+       * SGE instructions and a MUL instruction).  Since our logic values are
+       * 0.0 and 1.0, 1-x also implements !x.
+       */
+      op[0].negate = ~op[0].negate;
+      emit(ir, OPCODE_ADD, result_dst, op[0], src_reg_for_float(1.0));
       break;
    case ir_unop_neg:
       op[0].negate = ~op[0].negate;
@@ -1231,8 +1289,19 @@
 	  ir->operands[1]->type->is_vector()) {
 	 src_reg temp = get_temp(glsl_type::vec4_type);
 	 emit(ir, OPCODE_SNE, dst_reg(temp), op[0], op[1]);
+
+	 /* After the dot-product, the value will be an integer on the
+	  * range [0,4].  Zero becomes 1.0, and positive values become zero.
+	  */
 	 emit_dp(ir, result_dst, temp, temp, vector_elements);
-	 emit(ir, OPCODE_SEQ, result_dst, result_src, src_reg_for_float(0.0));
+
+	 /* Negating the result of the dot-product gives values on the range
+	  * [-4, 0].  Zero becomes 1.0, and negative values become zero.  This
+	  * achieved using SGE.
+	  */
+	 src_reg sge_src = result_src;
+	 sge_src.negate = ~sge_src.negate;
+	 emit(ir, OPCODE_SGE, result_dst, sge_src, src_reg_for_float(0.0));
       } else {
 	 emit(ir, OPCODE_SEQ, result_dst, op[0], op[1]);
       }
@@ -1243,29 +1312,83 @@
 	  ir->operands[1]->type->is_vector()) {
 	 src_reg temp = get_temp(glsl_type::vec4_type);
 	 emit(ir, OPCODE_SNE, dst_reg(temp), op[0], op[1]);
-	 emit_dp(ir, result_dst, temp, temp, vector_elements);
-	 emit(ir, OPCODE_SNE, result_dst, result_src, src_reg_for_float(0.0));
+
+	 /* After the dot-product, the value will be an integer on the
+	  * range [0,4].  Zero stays zero, and positive values become 1.0.
+	  */
+	 ir_to_mesa_instruction *const dp =
+	    emit_dp(ir, result_dst, temp, temp, vector_elements);
+	 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
+	    /* The clamping to [0,1] can be done for free in the fragment
+	     * shader with a saturate.
+	     */
+	    dp->saturate = true;
+	 } else {
+	    /* Negating the result of the dot-product gives values on the range
+	     * [-4, 0].  Zero stays zero, and negative values become 1.0.  This
+	     * achieved using SLT.
+	     */
+	    src_reg slt_src = result_src;
+	    slt_src.negate = ~slt_src.negate;
+	    emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0));
+	 }
       } else {
 	 emit(ir, OPCODE_SNE, result_dst, op[0], op[1]);
       }
       break;
 
-   case ir_unop_any:
+   case ir_unop_any: {
       assert(ir->operands[0]->type->is_vector());
-      emit_dp(ir, result_dst, op[0], op[0],
-	      ir->operands[0]->type->vector_elements);
-      emit(ir, OPCODE_SNE, result_dst, result_src, src_reg_for_float(0.0));
+
+      /* After the dot-product, the value will be an integer on the
+       * range [0,4].  Zero stays zero, and positive values become 1.0.
+       */
+      ir_to_mesa_instruction *const dp =
+	 emit_dp(ir, result_dst, op[0], op[0],
+		 ir->operands[0]->type->vector_elements);
+      if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
+	 /* The clamping to [0,1] can be done for free in the fragment
+	  * shader with a saturate.
+	  */
+	 dp->saturate = true;
+      } else {
+	 /* Negating the result of the dot-product gives values on the range
+	  * [-4, 0].  Zero stays zero, and negative values become 1.0.  This
+	  * is achieved using SLT.
+	  */
+	 src_reg slt_src = result_src;
+	 slt_src.negate = ~slt_src.negate;
+	 emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0));
+      }
       break;
+   }
 
    case ir_binop_logic_xor:
       emit(ir, OPCODE_SNE, result_dst, op[0], op[1]);
       break;
 
-   case ir_binop_logic_or:
-      /* This could be a saturated add and skip the SNE. */
-      emit(ir, OPCODE_ADD, result_dst, op[0], op[1]);
-      emit(ir, OPCODE_SNE, result_dst, result_src, src_reg_for_float(0.0));
+   case ir_binop_logic_or: {
+      /* After the addition, the value will be an integer on the
+       * range [0,2].  Zero stays zero, and positive values become 1.0.
+       */
+      ir_to_mesa_instruction *add =
+	 emit(ir, OPCODE_ADD, result_dst, op[0], op[1]);
+      if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
+	 /* The clamping to [0,1] can be done for free in the fragment
+	  * shader with a saturate.
+	  */
+	 add->saturate = true;
+      } else {
+	 /* Negating the result of the addition gives values on the range
+	  * [-2, 0].  Zero stays zero, and negative values become 1.0.  This
+	  * is achieved using SLT.
+	  */
+	 src_reg slt_src = result_src;
+	 slt_src.negate = ~slt_src.negate;
+	 emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0));
+      }
       break;
+   }
 
    case ir_binop_logic_and:
       /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */
@@ -1496,6 +1619,18 @@
 	      this->result, src_reg_for_float(element_size));
       }
 
+      /* If there was already a relative address register involved, add the
+       * new and the old together to get the new offset.
+       */
+      if (src.reladdr != NULL)  {
+	 src_reg accum_reg = get_temp(glsl_type::float_type);
+
+	 emit(ir, OPCODE_ADD, dst_reg(accum_reg),
+	      index_reg, *src.reladdr);
+
+	 index_reg = accum_reg;
+      }
+
       src.reladdr = ralloc(mem_ctx, src_reg);
       memcpy(src.reladdr, &index_reg, sizeof(index_reg));
    }
@@ -1796,7 +1931,7 @@
 
 	 src = src_reg(PROGRAM_CONSTANT, -1, NULL);
 	 src.index = _mesa_add_unnamed_constant(this->prog->Parameters,
-						values,
+						(gl_constant_value *) values,
 						ir->type->vector_elements,
 						&src.swizzle);
 	 emit(ir, OPCODE_MOV, mat_column, src);
@@ -1834,7 +1969,7 @@
 
    this->result = src_reg(PROGRAM_CONSTANT, -1, ir->type);
    this->result.index = _mesa_add_unnamed_constant(this->prog->Parameters,
-						   values,
+						   (gl_constant_value *) values,
 						   ir->type->vector_elements,
 						   &this->result.swizzle);
 }
@@ -1969,7 +2104,10 @@
    ir_to_mesa_instruction *inst = NULL;
    prog_opcode opcode = OPCODE_NOP;
 
-   ir->coordinate->accept(this);
+   if (ir->op == ir_txs)
+      this->result = src_reg_for_float(0.0);
+   else
+      ir->coordinate->accept(this);
 
    /* Put our coords in a temp.  We'll need to modify them for shadow,
     * projection, or LOD, so the only case we'd use it as is is if
@@ -1993,6 +2131,7 @@
 
    switch (ir->op) {
    case ir_tex:
+   case ir_txs:
       opcode = OPCODE_TEX;
       break;
    case ir_txb:
@@ -2401,29 +2540,32 @@
    case GL_VERTEX_PROGRAM_ARB:
       if (_mesa_bitcount(prog->SamplersUsed) >
           ctx->Const.MaxVertexTextureImageUnits) {
-         fail_link(shader_program, "Too many vertex shader texture samplers");
+         linker_error(shader_program,
+		      "Too many vertex shader texture samplers");
       }
       if (prog->Parameters->NumParameters > MAX_UNIFORMS) {
-         fail_link(shader_program, "Too many vertex shader constants");
+         linker_error(shader_program, "Too many vertex shader constants");
       }
       break;
    case MESA_GEOMETRY_PROGRAM:
       if (_mesa_bitcount(prog->SamplersUsed) >
           ctx->Const.MaxGeometryTextureImageUnits) {
-         fail_link(shader_program, "Too many geometry shader texture samplers");
+         linker_error(shader_program,
+		      "Too many geometry shader texture samplers");
       }
       if (prog->Parameters->NumParameters >
           MAX_GEOMETRY_UNIFORM_COMPONENTS / 4) {
-         fail_link(shader_program, "Too many geometry shader constants");
+         linker_error(shader_program, "Too many geometry shader constants");
       }
       break;
    case GL_FRAGMENT_PROGRAM_ARB:
       if (_mesa_bitcount(prog->SamplersUsed) >
           ctx->Const.MaxTextureImageUnits) {
-         fail_link(shader_program, "Too many fragment shader texture samplers");
+         linker_error(shader_program,
+		      "Too many fragment shader texture samplers");
       }
       if (prog->Parameters->NumParameters > MAX_UNIFORMS) {
-         fail_link(shader_program, "Too many fragment shader constants");
+         linker_error(shader_program, "Too many fragment shader constants");
       }
       break;
    default:
@@ -2531,16 +2673,17 @@
 	  */
 	 if (file == PROGRAM_SAMPLER) {
 	    for (unsigned int j = 0; j < size / 4; j++)
-	       prog->Parameters->ParameterValues[index + j][0] = next_sampler++;
+	       prog->Parameters->ParameterValues[index + j][0].f = next_sampler++;
 	 }
 
 	 /* The location chosen in the Parameters list here (returned
 	  * from _mesa_add_uniform) has to match what the linker chose.
 	  */
 	 if (index != parameter_index) {
-	    fail_link(shader_program, "Allocation of uniform `%s' to target "
-		      "failed (%d vs %d)\n",
-		      uniform->Name, index, parameter_index);
+	    linker_error(shader_program,
+			 "Allocation of uniform `%s' to target failed "
+			 "(%d vs %d)\n",
+			 uniform->Name, index, parameter_index);
 	 }
       }
    }
@@ -2573,8 +2716,8 @@
    int loc = _mesa_get_uniform_location(ctx, shader_program, name);
 
    if (loc == -1) {
-      fail_link(shader_program,
-		"Couldn't find uniform for initializer %s\n", name);
+      linker_error(shader_program,
+		   "Couldn't find uniform for initializer %s\n", name);
       return;
    }
 
@@ -2974,11 +3117,31 @@
          if (mesa_inst->SrcReg[src].RelAddr)
             prog->IndirectRegisterFiles |= 1 << mesa_inst->SrcReg[src].File;
 
-      if (options->EmitNoIfs && mesa_inst->Opcode == OPCODE_IF) {
-	 fail_link(shader_program, "Couldn't flatten if statement\n");
-      }
-
       switch (mesa_inst->Opcode) {
+      case OPCODE_IF:
+	 if (options->EmitNoIfs) {
+	    linker_warning(shader_program,
+			   "Couldn't flatten if-statement.  "
+			   "This will likely result in software "
+			   "rasterization.\n");
+	 }
+	 break;
+      case OPCODE_BGNLOOP:
+	 if (options->EmitNoLoops) {
+	    linker_warning(shader_program,
+			   "Couldn't unroll loop.  "
+			   "This will likely result in software "
+			   "rasterization.\n");
+	 }
+	 break;
+      case OPCODE_CONT:
+	 if (options->EmitNoCont) {
+	    linker_warning(shader_program,
+			   "Couldn't lower continue-statement.  "
+			   "This will likely result in software "
+			   "rasterization.\n");
+	 }
+	 break;
       case OPCODE_BGNSUB:
 	 inst->function->inst = i;
 	 mesa_inst->Comment = strdup(inst->function->sig->function_name());
@@ -3246,7 +3409,7 @@
 
    for (i = 0; i < prog->NumShaders; i++) {
       if (!prog->Shaders[i]->CompileStatus) {
-	 fail_link(prog, "linking with uncompiled shader");
+	 linker_error(prog, "linking with uncompiled shader");
 	 prog->LinkStatus = GL_FALSE;
       }
    }
diff --git a/src/mesa/program/nvfragparse.c b/src/mesa/program/nvfragparse.c
index 8516b5f..ce72c61 100644
--- a/src/mesa/program/nvfragparse.c
+++ b/src/mesa/program/nvfragparse.c
@@ -472,8 +472,9 @@
       const GLfloat *constant;
       if (!Parse_Identifier(parseState, ident))
          RETURN_ERROR1("Expected an identifier");
-      constant = _mesa_lookup_parameter_value(parseState->parameters,
-                                              -1, (const char *) ident);
+      constant = (GLfloat *)_mesa_lookup_parameter_value(parseState->parameters,
+                                                         -1, 
+                                                         (const char *) ident);
       /* XXX Check that it's a constant and not a parameter */
       if (!constant) {
          RETURN_ERROR1("Undefined symbol");
@@ -1039,7 +1040,8 @@
       if (!Parse_ScalarConstant(parseState, values))
          RETURN_ERROR;
       paramIndex = _mesa_add_unnamed_constant(parseState->parameters,
-                                              values, 4, NULL);
+                                              (gl_constant_value *) values,
+                                              4, NULL);
       srcReg->File = PROGRAM_NAMED_PARAM;
       srcReg->Index = paramIndex;
    }
@@ -1051,7 +1053,8 @@
       if (!Parse_VectorConstant(parseState, values))
          RETURN_ERROR;
       paramIndex = _mesa_add_unnamed_constant(parseState->parameters,
-                                              values, 4, NULL);
+                                              (gl_constant_value *) values,
+                                              4, NULL);
       srcReg->File = PROGRAM_NAMED_PARAM;
       srcReg->Index = paramIndex;      
    }
@@ -1145,7 +1148,8 @@
       if (!Parse_VectorConstant(parseState, values))
          RETURN_ERROR;
       paramIndex = _mesa_add_unnamed_constant(parseState->parameters,
-                                              values, 4, NULL);
+                                              (gl_constant_value *) values,
+                                              4, NULL);
       srcReg->File = PROGRAM_NAMED_PARAM;
       srcReg->Index = paramIndex;      
    }
@@ -1170,7 +1174,8 @@
       if (!Parse_ScalarConstant(parseState, values))
          RETURN_ERROR;
       paramIndex = _mesa_add_unnamed_constant(parseState->parameters,
-                                              values, 4, NULL);
+                                              (gl_constant_value *) values,
+                                              4, NULL);
       srcReg->Index = paramIndex;      
       srcReg->File = PROGRAM_NAMED_PARAM;
       needSuffix = GL_FALSE;
@@ -1296,7 +1301,8 @@
             RETURN_ERROR2(id, "already defined");
          }
          _mesa_add_named_parameter(parseState->parameters,
-                                   (const char *) id, value);
+                                   (const char *) id,
+                                   (gl_constant_value *) value);
       }
       else if (Parse_String(parseState, "DECLARE")) {
          GLubyte id[100];
@@ -1315,7 +1321,8 @@
             RETURN_ERROR2(id, "already declared");
          }
          _mesa_add_named_parameter(parseState->parameters,
-                                   (const char *) id, value);
+                                   (const char *) id,
+                                   (gl_constant_value *) value);
       }
       else if (Parse_String(parseState, "END")) {
          inst->Opcode = OPCODE_END;
diff --git a/src/mesa/program/prog_execute.c b/src/mesa/program/prog_execute.c
index e7553c6..77f842a 100644
--- a/src/mesa/program/prog_execute.c
+++ b/src/mesa/program/prog_execute.c
@@ -157,7 +157,7 @@
    case PROGRAM_NAMED_PARAM:
       if (reg >= (GLint) prog->Parameters->NumParameters)
          return ZeroVec;
-      return prog->Parameters->ParameterValues[reg];
+      return (GLfloat *) prog->Parameters->ParameterValues[reg];
 
    case PROGRAM_SYSTEM_VALUE:
       assert(reg < Elements(machine->SystemValues));
@@ -639,7 +639,7 @@
                       struct gl_program_machine *machine)
 {
    const GLuint numInst = program->NumInstructions;
-   const GLuint maxExec = 10000;
+   const GLuint maxExec = 65536;
    GLuint pc, numExec = 0;
 
    machine->CurProgram = program;
@@ -1651,6 +1651,14 @@
             GLfloat texcoord[4], color[4];
             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
 
+            /* For TEX, texcoord.Q should not be used and its value should not
+             * matter (at most, we pass coord.xyz to texture3D() in GLSL).
+             * Set Q=1 so that FetchTexelDeriv() doesn't get a garbage value
+             * which is effectively what happens when the texcoord swizzle
+             * is .xyzz
+             */
+            texcoord[3] = 1.0f;
+
             fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
 
             if (DEBUG_PROG) {
diff --git a/src/mesa/program/prog_opt_constant_fold.c b/src/mesa/program/prog_opt_constant_fold.c
new file mode 100644
index 0000000..e2418b5
--- /dev/null
+++ b/src/mesa/program/prog_opt_constant_fold.c
@@ -0,0 +1,451 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/macros.h"
+#include "program.h"
+#include "prog_instruction.h"
+#include "prog_optimize.h"
+#include "prog_parameter.h"
+#include <stdbool.h>
+
+static bool
+src_regs_are_constant(const struct prog_instruction *inst, unsigned num_srcs)
+{
+   unsigned i;
+
+   for (i = 0; i < num_srcs; i++) {
+      if (inst->SrcReg[i].File != PROGRAM_CONSTANT)
+	 return false;
+   }
+
+   return true;
+}
+
+static struct prog_src_register
+src_reg_for_float(struct gl_program *prog, float val)
+{
+   struct prog_src_register src;
+   unsigned swiz;
+
+   memset(&src, 0, sizeof(src));
+
+   src.File = PROGRAM_CONSTANT;
+   src.Index = _mesa_add_unnamed_constant(prog->Parameters,
+					  (gl_constant_value *) &val, 1, &swiz);
+   src.Swizzle = swiz;
+   return src;
+}
+
+static struct prog_src_register
+src_reg_for_vec4(struct gl_program *prog, const float *val)
+{
+   struct prog_src_register src;
+   unsigned swiz;
+
+   memset(&src, 0, sizeof(src));
+
+   src.File = PROGRAM_CONSTANT;
+   src.Index = _mesa_add_unnamed_constant(prog->Parameters,
+					  (gl_constant_value *) val, 4, &swiz);
+   src.Swizzle = swiz;
+   return src;
+}
+
+static bool
+src_regs_are_same(const struct prog_src_register *a,
+		  const struct prog_src_register *b)
+{
+   return (a->File == b->File)
+      && (a->Index == b->Index)
+      && (a->Swizzle == b->Swizzle)
+      && (a->Abs == b->Abs)
+      && (a->Negate == b->Negate)
+      && (a->RelAddr == 0)
+      && (b->RelAddr == 0);
+}
+
+static void
+get_value(struct gl_program *prog, struct prog_src_register *r, float *data)
+{
+   const gl_constant_value *const value =
+      prog->Parameters->ParameterValues[r->Index];
+
+   data[0] = value[GET_SWZ(r->Swizzle, 0)].f;
+   data[1] = value[GET_SWZ(r->Swizzle, 1)].f;
+   data[2] = value[GET_SWZ(r->Swizzle, 2)].f;
+   data[3] = value[GET_SWZ(r->Swizzle, 3)].f;
+
+   if (r->Abs) {
+      data[0] = fabsf(data[0]);
+      data[1] = fabsf(data[1]);
+      data[2] = fabsf(data[2]);
+      data[3] = fabsf(data[3]);
+   }
+
+   if (r->Negate & 0x01) {
+      data[0] = -data[0];
+   }
+
+   if (r->Negate & 0x02) {
+      data[1] = -data[1];
+   }
+
+   if (r->Negate & 0x04) {
+      data[2] = -data[2];
+   }
+
+   if (r->Negate & 0x08) {
+      data[3] = -data[3];
+   }
+}
+
+/**
+ * Try to replace instructions that produce a constant result with simple moves
+ *
+ * The hope is that a following copy propagation pass will eliminate the
+ * unnecessary move instructions.
+ */
+GLboolean
+_mesa_constant_fold(struct gl_program *prog)
+{
+   bool progress = false;
+   unsigned i;
+
+   for (i = 0; i < prog->NumInstructions; i++) {
+      struct prog_instruction *const inst = &prog->Instructions[i];
+
+      switch (inst->Opcode) {
+      case OPCODE_ADD:
+	 if (src_regs_are_constant(inst, 2)) {
+	    float a[4];
+	    float b[4];
+	    float result[4];
+
+	    get_value(prog, &inst->SrcReg[0], a);
+	    get_value(prog, &inst->SrcReg[1], b);
+
+	    result[0] = a[0] + b[0];
+	    result[1] = a[1] + b[1];
+	    result[2] = a[2] + b[2];
+	    result[3] = a[3] + b[3];
+
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 }
+	 break;
+
+      case OPCODE_CMP:
+	 /* FINISHME: We could also optimize CMP instructions where the first
+	  * FINISHME: source is a constant that is either all < 0.0 or all
+	  * FINISHME: >= 0.0.
+	  */
+	 if (src_regs_are_constant(inst, 3)) {
+	    float a[4];
+	    float b[4];
+	    float c[4];
+	    float result[4];
+
+	    get_value(prog, &inst->SrcReg[0], a);
+	    get_value(prog, &inst->SrcReg[1], b);
+	    get_value(prog, &inst->SrcReg[2], c);
+
+            result[0] = a[0] < 0.0f ? b[0] : c[0];
+            result[1] = a[1] < 0.0f ? b[1] : c[1];
+            result[2] = a[2] < 0.0f ? b[2] : c[2];
+            result[3] = a[3] < 0.0f ? b[3] : c[3];
+
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+	    inst->SrcReg[2].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[2].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 }
+	 break;
+
+      case OPCODE_DP2:
+      case OPCODE_DP3:
+      case OPCODE_DP4:
+	 if (src_regs_are_constant(inst, 2)) {
+	    float a[4];
+	    float b[4];
+	    float result;
+
+	    get_value(prog, &inst->SrcReg[0], a);
+	    get_value(prog, &inst->SrcReg[1], b);
+
+	    /* It seems like a loop could be used here, but we cleverly put
+	     * DP2A between DP2 and DP3.  Subtracting DP2 (or similar) from
+	     * the opcode results in various failures of the loop control.
+	     */
+	    result = (a[0] * b[0]) + (a[1] * b[1]);
+
+	    if (inst->Opcode >= OPCODE_DP3)
+	       result += a[2] * b[2];
+
+	    if (inst->Opcode == OPCODE_DP4)
+	       result += a[3] * b[3];
+
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_float(prog, result);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 }
+	 break;
+
+      case OPCODE_MUL:
+	 if (src_regs_are_constant(inst, 2)) {
+	    float a[4];
+	    float b[4];
+	    float result[4];
+
+	    get_value(prog, &inst->SrcReg[0], a);
+	    get_value(prog, &inst->SrcReg[1], b);
+
+	    result[0] = a[0] * b[0];
+	    result[1] = a[1] * b[1];
+	    result[2] = a[2] * b[2];
+	    result[3] = a[3] * b[3];
+
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 }
+	 break;
+
+      case OPCODE_SEQ:
+	 if (src_regs_are_constant(inst, 2)) {
+	    float a[4];
+	    float b[4];
+	    float result[4];
+
+	    get_value(prog, &inst->SrcReg[0], a);
+	    get_value(prog, &inst->SrcReg[1], b);
+
+	    result[0] = (a[0] == b[0]) ? 1.0f : 0.0f;
+	    result[1] = (a[1] == b[1]) ? 1.0f : 0.0f;
+	    result[2] = (a[2] == b[2]) ? 1.0f : 0.0f;
+	    result[3] = (a[3] == b[3]) ? 1.0f : 0.0f;
+
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_float(prog, 1.0f);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 }
+	 break;
+
+      case OPCODE_SGE:
+	 if (src_regs_are_constant(inst, 2)) {
+	    float a[4];
+	    float b[4];
+	    float result[4];
+
+	    get_value(prog, &inst->SrcReg[0], a);
+	    get_value(prog, &inst->SrcReg[1], b);
+
+	    result[0] = (a[0] >= b[0]) ? 1.0f : 0.0f;
+	    result[1] = (a[1] >= b[1]) ? 1.0f : 0.0f;
+	    result[2] = (a[2] >= b[2]) ? 1.0f : 0.0f;
+	    result[3] = (a[3] >= b[3]) ? 1.0f : 0.0f;
+
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_float(prog, 1.0f);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 }
+	 break;
+
+      case OPCODE_SGT:
+	 if (src_regs_are_constant(inst, 2)) {
+	    float a[4];
+	    float b[4];
+	    float result[4];
+
+	    get_value(prog, &inst->SrcReg[0], a);
+	    get_value(prog, &inst->SrcReg[1], b);
+
+	    result[0] = (a[0] > b[0]) ? 1.0f : 0.0f;
+	    result[1] = (a[1] > b[1]) ? 1.0f : 0.0f;
+	    result[2] = (a[2] > b[2]) ? 1.0f : 0.0f;
+	    result[3] = (a[3] > b[3]) ? 1.0f : 0.0f;
+
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_float(prog, 0.0f);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 }
+	 break;
+
+      case OPCODE_SLE:
+	 if (src_regs_are_constant(inst, 2)) {
+	    float a[4];
+	    float b[4];
+	    float result[4];
+
+	    get_value(prog, &inst->SrcReg[0], a);
+	    get_value(prog, &inst->SrcReg[1], b);
+
+	    result[0] = (a[0] <= b[0]) ? 1.0f : 0.0f;
+	    result[1] = (a[1] <= b[1]) ? 1.0f : 0.0f;
+	    result[2] = (a[2] <= b[2]) ? 1.0f : 0.0f;
+	    result[3] = (a[3] <= b[3]) ? 1.0f : 0.0f;
+
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_float(prog, 1.0f);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 }
+	 break;
+
+      case OPCODE_SLT:
+	 if (src_regs_are_constant(inst, 2)) {
+	    float a[4];
+	    float b[4];
+	    float result[4];
+
+	    get_value(prog, &inst->SrcReg[0], a);
+	    get_value(prog, &inst->SrcReg[1], b);
+
+	    result[0] = (a[0] < b[0]) ? 1.0f : 0.0f;
+	    result[1] = (a[1] < b[1]) ? 1.0f : 0.0f;
+	    result[2] = (a[2] < b[2]) ? 1.0f : 0.0f;
+	    result[3] = (a[3] < b[3]) ? 1.0f : 0.0f;
+
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_float(prog, 0.0f);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 }
+	 break;
+
+      case OPCODE_SNE:
+	 if (src_regs_are_constant(inst, 2)) {
+	    float a[4];
+	    float b[4];
+	    float result[4];
+
+	    get_value(prog, &inst->SrcReg[0], a);
+	    get_value(prog, &inst->SrcReg[1], b);
+
+	    result[0] = (a[0] != b[0]) ? 1.0f : 0.0f;
+	    result[1] = (a[1] != b[1]) ? 1.0f : 0.0f;
+	    result[2] = (a[2] != b[2]) ? 1.0f : 0.0f;
+	    result[3] = (a[3] != b[3]) ? 1.0f : 0.0f;
+
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_float(prog, 0.0f);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 }
+	 break;
+
+      default:
+	 break;
+      }
+   }
+
+   return progress;
+}
diff --git a/src/mesa/program/prog_optimize.c b/src/mesa/program/prog_optimize.c
index 8a40fa6..25d9684 100644
--- a/src/mesa/program/prog_optimize.c
+++ b/src/mesa/program/prog_optimize.c
@@ -472,8 +472,7 @@
       mov->SrcReg[0].HasIndex2 == 0 &&
       mov->SrcReg[0].RelAddr2 == 0 &&
       mov->DstReg.RelAddr == 0 &&
-      mov->DstReg.CondMask == COND_TR &&
-      mov->SaturateMode == SATURATE_OFF;
+      mov->DstReg.CondMask == COND_TR;
 }
 
 
@@ -482,7 +481,8 @@
 {
    return
       can_downward_mov_be_modifed(mov) &&
-      mov->DstReg.File == PROGRAM_TEMPORARY;
+      mov->DstReg.File == PROGRAM_TEMPORARY &&
+      mov->SaturateMode == SATURATE_OFF;
 }
 
 
@@ -657,6 +657,8 @@
    if (mask != (inst->DstReg.WriteMask & mask))
       return GL_FALSE;
 
+   inst->SaturateMode |= mov->SaturateMode;
+
    /* Depending on the instruction, we may need to recompute the swizzles.
     * Also, some other instructions (like TEX) are not linear. We will only
     * consider completely active sources and destinations
@@ -1319,6 +1321,15 @@
 
          inst->Opcode = OPCODE_MOV;
          inst->SrcReg[0] = inst->SrcReg[1];
+
+	 /* Unused operands are expected to have the file set to
+	  * PROGRAM_UNDEFINED.  This is how _mesa_init_instructions initializes
+	  * all of the sources.
+	  */
+	 inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+	 inst->SrcReg[2].File = PROGRAM_UNDEFINED;
+	 inst->SrcReg[2].Swizzle = SWIZZLE_NOOP;
       }
    }
    if (dbg) {
@@ -1347,6 +1358,8 @@
          any_change = GL_TRUE;
       if (_mesa_remove_dead_code_local(program))
          any_change = GL_TRUE;
+
+      any_change = _mesa_constant_fold(program) || any_change;
       _mesa_reallocate_registers(program);
    } while (any_change);
 }
diff --git a/src/mesa/program/prog_optimize.h b/src/mesa/program/prog_optimize.h
index 463f5fc..9854fb7 100644
--- a/src/mesa/program/prog_optimize.h
+++ b/src/mesa/program/prog_optimize.h
@@ -44,4 +44,7 @@
 extern void
 _mesa_optimize_program(struct gl_context *ctx, struct gl_program *program);
 
+extern GLboolean
+_mesa_constant_fold(struct gl_program *prog);
+
 #endif
diff --git a/src/mesa/program/prog_parameter.c b/src/mesa/program/prog_parameter.c
index 3570cab..49b3ffb 100644
--- a/src/mesa/program/prog_parameter.c
+++ b/src/mesa/program/prog_parameter.c
@@ -56,8 +56,8 @@
       p->Parameters = (struct gl_program_parameter *)
 	 calloc(1, size * sizeof(struct gl_program_parameter));
 
-      p->ParameterValues = (GLfloat (*)[4])
-         _mesa_align_malloc(size * 4 *sizeof(GLfloat), 16);
+      p->ParameterValues = (gl_constant_value (*)[4])
+         _mesa_align_malloc(size * 4 *sizeof(gl_constant_value), 16);
 
 
       if ((p->Parameters == NULL) || (p->ParameterValues == NULL)) {
@@ -101,14 +101,15 @@
  * \param name  the parameter name, will be duplicated/copied!
  * \param size  number of elements in 'values' vector (1..4, or more)
  * \param datatype  GL_FLOAT, GL_FLOAT_VECx, GL_INT, GL_INT_VECx or GL_NONE.
- * \param values  initial parameter value, up to 4 GLfloats, or NULL
+ * \param values  initial parameter value, up to 4 gl_constant_values, or NULL
  * \param state  state indexes, or NULL
  * \return  index of new parameter in the list, or -1 if error (out of mem)
  */
 GLint
 _mesa_add_parameter(struct gl_program_parameter_list *paramList,
                     gl_register_file type, const char *name,
-                    GLuint size, GLenum datatype, const GLfloat *values,
+                    GLuint size, GLenum datatype,
+                    const gl_constant_value *values,
                     const gl_state_index state[STATE_LENGTH],
                     GLbitfield flags)
 {
@@ -127,10 +128,10 @@
 		       oldNum * sizeof(struct gl_program_parameter),
 		       paramList->Size * sizeof(struct gl_program_parameter));
 
-      paramList->ParameterValues = (GLfloat (*)[4])
+      paramList->ParameterValues = (gl_constant_value (*)[4])
          _mesa_align_realloc(paramList->ParameterValues,         /* old buf */
-                             oldNum * 4 * sizeof(GLfloat),      /* old size */
-                             paramList->Size * 4 *sizeof(GLfloat), /* new sz */
+                             oldNum * 4 * sizeof(gl_constant_value),/* old sz */
+                             paramList->Size*4*sizeof(gl_constant_value),/*new*/
                              16);
    }
 
@@ -142,7 +143,7 @@
       return -1;
    }
    else {
-      GLuint i;
+      GLuint i, j;
 
       paramList->NumParameters = oldNum + sz4;
 
@@ -163,7 +164,8 @@
          }
          else {
             /* silence valgrind */
-            ASSIGN_4V(paramList->ParameterValues[oldNum + i], 0, 0, 0, 0);
+            for (j = 0; j < 4; j++)
+            	paramList->ParameterValues[oldNum + i][j].f = 0;
          }
          size -= 4;
       }
@@ -184,7 +186,7 @@
  */
 GLint
 _mesa_add_named_parameter(struct gl_program_parameter_list *paramList,
-                          const char *name, const GLfloat values[4])
+                          const char *name, const gl_constant_value values[4])
 {
    return _mesa_add_parameter(paramList, PROGRAM_NAMED_PARAM, name,
                               4, GL_NONE, values, NULL, 0x0);
@@ -204,17 +206,17 @@
  */
 GLint
 _mesa_add_named_constant(struct gl_program_parameter_list *paramList,
-                         const char *name, const GLfloat values[4],
+                         const char *name, const gl_constant_value values[4],
                          GLuint size)
 {
    /* first check if this is a duplicate constant */
    GLint pos;
    for (pos = 0; pos < (GLint)paramList->NumParameters; pos++) {
-      const GLfloat *pvals = paramList->ParameterValues[pos];
-      if (pvals[0] == values[0] &&
-          pvals[1] == values[1] &&
-          pvals[2] == values[2] &&
-          pvals[3] == values[3] &&
+      const gl_constant_value *pvals = paramList->ParameterValues[pos];
+      if (pvals[0].u == values[0].u &&
+          pvals[1].u == values[1].u &&
+          pvals[2].u == values[2].u &&
+          pvals[3].u == values[3].u &&
           strcmp(paramList->Parameters[pos].Name, name) == 0) {
          /* Same name and value is already in the param list - reuse it */
          return pos;
@@ -239,9 +241,9 @@
  * \return index/position of the new parameter in the parameter list.
  */
 GLint
-_mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList,
-                           const GLfloat values[4], GLuint size,
-                           GLuint *swizzleOut)
+_mesa_add_typed_unnamed_constant(struct gl_program_parameter_list *paramList,
+                           const gl_constant_value values[4], GLuint size,
+                           GLenum datatype, GLuint *swizzleOut)
 {
    GLint pos;
    ASSERT(size >= 1);
@@ -262,7 +264,7 @@
          struct gl_program_parameter *p = paramList->Parameters + pos;
          if (p->Type == PROGRAM_CONSTANT && p->Size + size <= 4) {
             /* ok, found room */
-            GLfloat *pVal = paramList->ParameterValues[pos];
+            gl_constant_value *pVal = paramList->ParameterValues[pos];
             GLuint swz = p->Size; /* 1, 2 or 3 for Y, Z, W */
             pVal[p->Size] = values[0];
             p->Size++;
@@ -274,7 +276,7 @@
 
    /* add a new parameter to store this constant */
    pos = _mesa_add_parameter(paramList, PROGRAM_CONSTANT, NULL,
-                             size, GL_NONE, values, NULL, 0x0);
+                             size, datatype, values, NULL, 0x0);
    if (pos >= 0 && swizzleOut) {
       if (size == 1)
          *swizzleOut = SWIZZLE_XXXX;
@@ -285,6 +287,28 @@
 }
 
 /**
+ * Add a new unnamed constant to the parameter list.  This will be used
+ * when a fragment/vertex program contains something like this:
+ *    MOV r, { 0, 1, 2, 3 };
+ * If swizzleOut is non-null we'll search the parameter list for an
+ * existing instance of the constant which matches with a swizzle.
+ *
+ * \param paramList  the parameter list
+ * \param values  four float values
+ * \param swizzleOut  returns swizzle mask for accessing the constant
+ * \return index/position of the new parameter in the parameter list.
+ * \sa _mesa_add_typed_unnamed_constant
+ */
+GLint
+_mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList,
+                           const gl_constant_value values[4], GLuint size,
+                           GLuint *swizzleOut)
+{
+   return _mesa_add_typed_unnamed_constant(paramList, values, size, GL_NONE,
+                                           swizzleOut);
+}
+
+/**
  * Add parameter representing a varying variable.
  */
 GLint
@@ -401,7 +425,7 @@
  * Lookup a parameter value by name in the given parameter list.
  * \return pointer to the float[4] values.
  */
-GLfloat *
+gl_constant_value *
 _mesa_lookup_parameter_value(const struct gl_program_parameter_list *paramList,
                              GLsizei nameLen, const char *name)
 {
@@ -465,7 +489,7 @@
  */
 GLboolean
 _mesa_lookup_parameter_constant(const struct gl_program_parameter_list *list,
-                                const GLfloat v[], GLuint vSize,
+                                const gl_constant_value v[], GLuint vSize,
                                 GLint *posOut, GLuint *swizzleOut)
 {
    GLuint i;
@@ -484,7 +508,7 @@
             /* swizzle not allowed */
             GLuint j, match = 0;
             for (j = 0; j < vSize; j++) {
-               if (v[j] == list->ParameterValues[i][j])
+               if (v[j].u == list->ParameterValues[i][j].u)
                   match++;
             }
             if (match == vSize) {
@@ -498,7 +522,7 @@
                 /* look for v[0] anywhere within float[4] value */
                 GLuint j;
                 for (j = 0; j < list->Parameters[i].Size; j++) {
-                   if (list->ParameterValues[i][j] == v[0]) {
+                   if (list->ParameterValues[i][j].u == v[0].u) {
                       /* found it */
                       *posOut = i;
                       *swizzleOut = MAKE_SWIZZLE4(j, j, j, j);
@@ -511,13 +535,13 @@
                 GLuint swz[4];
                 GLuint match = 0, j, k;
                 for (j = 0; j < vSize; j++) {
-                   if (v[j] == list->ParameterValues[i][j]) {
+                   if (v[j].u == list->ParameterValues[i][j].u) {
                       swz[j] = j;
                       match++;
                    }
                    else {
                       for (k = 0; k < list->Parameters[i].Size; k++) {
-                         if (v[j] == list->ParameterValues[i][k]) {
+                         if (v[j].u == list->ParameterValues[i][k].u) {
                             swz[j] = k;
                             match++;
                             break;
diff --git a/src/mesa/program/prog_parameter.h b/src/mesa/program/prog_parameter.h
index 10cbbe5..1a5ed34 100644
--- a/src/mesa/program/prog_parameter.h
+++ b/src/mesa/program/prog_parameter.h
@@ -47,6 +47,17 @@
 /*@}*/
 
 
+/**
+ * Actual data for constant values of parameters.
+ */
+typedef union gl_constant_value
+{
+   GLfloat f;
+   GLboolean b;
+   GLint i;
+   GLuint u;
+} gl_constant_value;
+
 
 /**
  * Program parameter.
@@ -81,7 +92,7 @@
    GLuint Size;           /**< allocated size of Parameters, ParameterValues */
    GLuint NumParameters;  /**< number of parameters in arrays */
    struct gl_program_parameter *Parameters; /**< Array [Size] */
-   GLfloat (*ParameterValues)[4];        /**< Array [Size] of GLfloat[4] */
+   gl_constant_value (*ParameterValues)[4]; /**< Array [Size] of constant[4] */
    GLbitfield StateFlags; /**< _NEW_* flags indicating which state changes
                                might invalidate ParameterValues[] */
 };
@@ -112,22 +123,28 @@
 extern GLint
 _mesa_add_parameter(struct gl_program_parameter_list *paramList,
                     gl_register_file type, const char *name,
-                    GLuint size, GLenum datatype, const GLfloat *values,
+                    GLuint size, GLenum datatype,
+                    const gl_constant_value *values,
                     const gl_state_index state[STATE_LENGTH],
                     GLbitfield flags);
 
 extern GLint
 _mesa_add_named_parameter(struct gl_program_parameter_list *paramList,
-                          const char *name, const GLfloat values[4]);
+                          const char *name, const gl_constant_value values[4]);
 
 extern GLint
 _mesa_add_named_constant(struct gl_program_parameter_list *paramList,
-                         const char *name, const GLfloat values[4],
+                         const char *name, const gl_constant_value values[4],
                          GLuint size);
 
 extern GLint
+_mesa_add_typed_unnamed_constant(struct gl_program_parameter_list *paramList,
+                           const gl_constant_value values[4], GLuint size,
+                           GLenum datatype, GLuint *swizzleOut);
+
+extern GLint
 _mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList,
-                           const GLfloat values[4], GLuint size,
+                           const gl_constant_value values[4], GLuint size,
                            GLuint *swizzleOut);
 
 extern GLint
@@ -143,7 +160,7 @@
 _mesa_add_state_reference(struct gl_program_parameter_list *paramList,
                           const gl_state_index stateTokens[STATE_LENGTH]);
 
-extern GLfloat *
+extern gl_constant_value *
 _mesa_lookup_parameter_value(const struct gl_program_parameter_list *paramList,
                              GLsizei nameLen, const char *name);
 
@@ -153,7 +170,7 @@
 
 extern GLboolean
 _mesa_lookup_parameter_constant(const struct gl_program_parameter_list *list,
-                                const GLfloat v[], GLuint vSize,
+                                const gl_constant_value v[], GLuint vSize,
                                 GLint *posOut, GLuint *swizzleOut);
 
 extern GLuint
diff --git a/src/mesa/program/prog_parameter_layout.c b/src/mesa/program/prog_parameter_layout.c
index 90a9771..28fca3b 100644
--- a/src/mesa/program/prog_parameter_layout.c
+++ b/src/mesa/program/prog_parameter_layout.c
@@ -182,7 +182,7 @@
 
 	 switch (p->Type) {
 	 case PROGRAM_CONSTANT: {
-	    const float *const v =
+	    const gl_constant_value *const v =
 	       state->prog->Parameters->ParameterValues[idx];
 
 	    inst->Base.SrcReg[i].Index =
diff --git a/src/mesa/program/prog_print.c b/src/mesa/program/prog_print.c
index 7c3b490..70412b1 100644
--- a/src/mesa/program/prog_print.c
+++ b/src/mesa/program/prog_print.c
@@ -985,7 +985,7 @@
    fprintf(f, "dirty state flags: 0x%x\n", list->StateFlags);
    for (i = 0; i < list->NumParameters; i++){
       struct gl_program_parameter *param = list->Parameters + i;
-      const GLfloat *v = list->ParameterValues[i];
+      const GLfloat *v = (GLfloat *) list->ParameterValues[i];
       fprintf(f, "param[%d] sz=%d %s %s = {%.3g, %.3g, %.3g, %.3g}",
 	      i, param->Size,
 	      _mesa_register_file_name(list->Parameters[i].Type),
diff --git a/src/mesa/program/prog_statevars.c b/src/mesa/program/prog_statevars.c
index 16f9690..6aa2409 100644
--- a/src/mesa/program/prog_statevars.c
+++ b/src/mesa/program/prog_statevars.c
@@ -1111,7 +1111,7 @@
       if (paramList->Parameters[i].Type == PROGRAM_STATE_VAR) {
          _mesa_fetch_state(ctx,
 			   paramList->Parameters[i].StateIndexes,
-                           paramList->ParameterValues[i]);
+                           &paramList->ParameterValues[i][0].f);
       }
    }
 }
diff --git a/src/mesa/program/program.c b/src/mesa/program/program.c
index adca094..ecff234 100644
--- a/src/mesa/program/program.c
+++ b/src/mesa/program/program.c
@@ -388,8 +388,9 @@
    if (prog->String)
       free(prog->String);
 
-   _mesa_free_instructions(prog->Instructions, prog->NumInstructions);
-
+   if (prog->Instructions) {
+      _mesa_free_instructions(prog->Instructions, prog->NumInstructions);
+   }
    if (prog->Parameters) {
       _mesa_free_parameter_list(prog->Parameters);
    }
@@ -1031,7 +1032,8 @@
    GLuint i;
    GLuint whiteSwizzle;
    GLint whiteIndex = _mesa_add_unnamed_constant(prog->Parameters,
-                                                 white, 4, &whiteSwizzle);
+                                                 (gl_constant_value *) white,
+                                                 4, &whiteSwizzle);
 
    (void) whiteIndex;
 
diff --git a/src/mesa/program/program_parse.y b/src/mesa/program/program_parse.y
index dbf5aba..dec3503 100644
--- a/src/mesa/program/program_parse.y
+++ b/src/mesa/program/program_parse.y
@@ -1854,64 +1854,64 @@
 paramConstScalarDecl: signedFloatConstant
 	{
 	   $$.count = 4;
-	   $$.data[0] = $1;
-	   $$.data[1] = $1;
-	   $$.data[2] = $1;
-	   $$.data[3] = $1;
+	   $$.data[0].f = $1;
+	   $$.data[1].f = $1;
+	   $$.data[2].f = $1;
+	   $$.data[3].f = $1;
 	}
 	;
 
 paramConstScalarUse: REAL
 	{
 	   $$.count = 1;
-	   $$.data[0] = $1;
-	   $$.data[1] = $1;
-	   $$.data[2] = $1;
-	   $$.data[3] = $1;
+	   $$.data[0].f = $1;
+	   $$.data[1].f = $1;
+	   $$.data[2].f = $1;
+	   $$.data[3].f = $1;
 	}
 	| INTEGER
 	{
 	   $$.count = 1;
-	   $$.data[0] = (float) $1;
-	   $$.data[1] = (float) $1;
-	   $$.data[2] = (float) $1;
-	   $$.data[3] = (float) $1;
+	   $$.data[0].f = (float) $1;
+	   $$.data[1].f = (float) $1;
+	   $$.data[2].f = (float) $1;
+	   $$.data[3].f = (float) $1;
 	}
 	;
 
 paramConstVector: '{' signedFloatConstant '}'
 	{
 	   $$.count = 4;
-	   $$.data[0] = $2;
-	   $$.data[1] = 0.0f;
-	   $$.data[2] = 0.0f;
-	   $$.data[3] = 1.0f;
+	   $$.data[0].f = $2;
+	   $$.data[1].f = 0.0f;
+	   $$.data[2].f = 0.0f;
+	   $$.data[3].f = 1.0f;
 	}
 	| '{' signedFloatConstant ',' signedFloatConstant '}'
 	{
 	   $$.count = 4;
-	   $$.data[0] = $2;
-	   $$.data[1] = $4;
-	   $$.data[2] = 0.0f;
-	   $$.data[3] = 1.0f;
+	   $$.data[0].f = $2;
+	   $$.data[1].f = $4;
+	   $$.data[2].f = 0.0f;
+	   $$.data[3].f = 1.0f;
 	}
 	| '{' signedFloatConstant ',' signedFloatConstant ','
               signedFloatConstant '}'
 	{
 	   $$.count = 4;
-	   $$.data[0] = $2;
-	   $$.data[1] = $4;
-	   $$.data[2] = $6;
-	   $$.data[3] = 1.0f;
+	   $$.data[0].f = $2;
+	   $$.data[1].f = $4;
+	   $$.data[2].f = $6;
+	   $$.data[3].f = 1.0f;
 	}
 	| '{' signedFloatConstant ',' signedFloatConstant ','
               signedFloatConstant ',' signedFloatConstant '}'
 	{
 	   $$.count = 4;
-	   $$.data[0] = $2;
-	   $$.data[1] = $4;
-	   $$.data[2] = $6;
-	   $$.data[3] = $8;
+	   $$.data[0].f = $2;
+	   $$.data[1].f = $4;
+	   $$.data[2].f = $6;
+	   $$.data[3].f = $8;
 	}
 	;
 
diff --git a/src/mesa/program/program_parser.h b/src/mesa/program/program_parser.h
index 8e5aaee..5637598 100644
--- a/src/mesa/program/program_parser.h
+++ b/src/mesa/program/program_parser.h
@@ -23,6 +23,7 @@
 #pragma once
 
 #include "main/config.h"
+#include "program/prog_parameter.h"
 
 struct gl_context;
 
@@ -96,7 +97,7 @@
 
 struct asm_vector {
    unsigned count;
-   float    data[4];
+   gl_constant_value data[4];
 };
 
 
diff --git a/src/mesa/program/register_allocate.c b/src/mesa/program/register_allocate.c
index de96eb4..f5b5174 100644
--- a/src/mesa/program/register_allocate.c
+++ b/src/mesa/program/register_allocate.c
@@ -200,6 +200,27 @@
    }
 }
 
+/**
+ * Adds a conflict between base_reg and reg, and also between reg and
+ * anything that base_reg conflicts with.
+ *
+ * This can simplify code for setting up multiple register classes
+ * which are aggregates of some base hardware registers, compared to
+ * explicitly using ra_add_reg_conflict.
+ */
+void
+ra_add_transitive_reg_conflict(struct ra_regs *regs,
+			       unsigned int base_reg, unsigned int reg)
+{
+   int i;
+
+   ra_add_reg_conflict(regs, reg, base_reg);
+
+   for (i = 0; i < regs->regs[base_reg].num_conflicts; i++) {
+      ra_add_reg_conflict(regs, reg, regs->regs[base_reg].conflict_list[i]);
+   }
+}
+
 unsigned int
 ra_alloc_reg_class(struct ra_regs *regs)
 {
diff --git a/src/mesa/program/register_allocate.h b/src/mesa/program/register_allocate.h
index 5b95833..ee2e58a 100644
--- a/src/mesa/program/register_allocate.h
+++ b/src/mesa/program/register_allocate.h
@@ -40,6 +40,8 @@
 unsigned int ra_alloc_reg_class(struct ra_regs *regs);
 void ra_add_reg_conflict(struct ra_regs *regs,
 			 unsigned int r1, unsigned int r2);
+void ra_add_transitive_reg_conflict(struct ra_regs *regs,
+				    unsigned int base_reg, unsigned int reg);
 void ra_class_add_reg(struct ra_regs *regs, unsigned int c, unsigned int reg);
 void ra_set_finalize(struct ra_regs *regs);
 /** @} */
diff --git a/src/mesa/program/sampler.cpp b/src/mesa/program/sampler.cpp
index 1457d11..e8d34c6 100644
--- a/src/mesa/program/sampler.cpp
+++ b/src/mesa/program/sampler.cpp
@@ -132,6 +132,6 @@
 
    index += getname.offset;
 
-   return prog->Parameters->ParameterValues[index][0];
+   return prog->Parameters->ParameterValues[index][0].f;
 }
 }
diff --git a/src/mesa/sources.mak b/src/mesa/sources.mak
index 4b2ec08..5e77e0f 100644
--- a/src/mesa/sources.mak
+++ b/src/mesa/sources.mak
@@ -251,6 +251,7 @@
 	program/prog_instruction.c \
 	program/prog_noise.c \
 	program/prog_optimize.c \
+	program/prog_opt_constant_fold.c \
 	program/prog_parameter.c \
 	program/prog_parameter_layout.c \
 	program/prog_print.c \
@@ -336,7 +337,8 @@
 
 MESA_GALLIUM_CXX_SOURCES = \
 	$(MAIN_CXX_SOURCES) \
-	$(SHADER_CXX_SOURCES)
+	$(SHADER_CXX_SOURCES) \
+	state_tracker/st_glsl_to_tgsi.cpp
 
 # All the core C sources, for dependency checking
 ALL_SOURCES = \
diff --git a/src/mesa/state_tracker/st_atom_pixeltransfer.c b/src/mesa/state_tracker/st_atom_pixeltransfer.c
index 1f833d2..12b5bc5 100644
--- a/src/mesa/state_tracker/st_atom_pixeltransfer.c
+++ b/src/mesa/state_tracker/st_atom_pixeltransfer.c
@@ -84,26 +84,6 @@
 }
 
 
-static struct pipe_resource *
-create_color_map_texture(struct gl_context *ctx)
-{
-   struct st_context *st = st_context(ctx);
-   struct pipe_context *pipe = st->pipe;
-   struct pipe_resource *pt;
-   enum pipe_format format;
-   const uint texSize = 256; /* simple, and usually perfect */
-
-   /* find an RGBA texture format */
-   format = st_choose_format(pipe->screen, GL_RGBA, GL_NONE, GL_NONE,
-                             PIPE_TEXTURE_2D, 0, PIPE_BIND_SAMPLER_VIEW);
-
-   /* create texture for color map/table */
-   pt = st_texture_create(st, PIPE_TEXTURE_2D, format, 0,
-                          texSize, texSize, 1, 1, PIPE_BIND_SAMPLER_VIEW);
-   return pt;
-}
-
-
 /**
  * Update the pixelmap texture with the contents of the R/G/B/A pixel maps.
  */
@@ -219,7 +199,7 @@
 
       /* create the colormap/texture now if not already done */
       if (!st->pixel_xfer.pixelmap_texture) {
-         st->pixel_xfer.pixelmap_texture = create_color_map_texture(ctx);
+         st->pixel_xfer.pixelmap_texture = st_create_color_map_texture(ctx);
          st->pixel_xfer.pixelmap_sampler_view =
             st_create_texture_sampler_view(st->pipe,
                                            st->pixel_xfer.pixelmap_texture);
diff --git a/src/mesa/state_tracker/st_atom_texture.c b/src/mesa/state_tracker/st_atom_texture.c
index 800a9f1..3115a25 100644
--- a/src/mesa/state_tracker/st_atom_texture.c
+++ b/src/mesa/state_tracker/st_atom_texture.c
@@ -221,9 +221,9 @@
 
       if ((samp->sRGBDecode == GL_SKIP_DECODE_EXT) &&
 	  (_mesa_get_format_color_encoding(texFormat) == GL_SRGB)) {
-	 /* don't do sRGB->RGB conversion.  Interpret the texture
-	  * texture data as linear values.
-	  */
+         /* Don't do sRGB->RGB conversion.  Interpret the texture data as
+          * linear values.
+          */
 	 const gl_format linearFormat =
 	    _mesa_get_srgb_format_linear(texFormat);
 	 firstImageFormat = st_mesa_format_to_pipe_format(linearFormat);
diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c
index 49b1960..beb5e7c 100644
--- a/src/mesa/state_tracker/st_cb_bitmap.c
+++ b/src/mesa/state_tracker/st_cb_bitmap.c
@@ -172,6 +172,23 @@
 }
 
 
+static struct gl_program *
+make_bitmap_fragment_program_glsl(struct st_context *st,
+                                  struct st_fragment_program *orig,
+                                  GLuint samplerIndex)
+{
+   struct gl_context *ctx = st->ctx;
+   struct st_fragment_program *fp = (struct st_fragment_program *)
+      ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0);
+
+   if (!fp)
+      return NULL;
+   
+   get_bitmap_visitor(fp, orig->glsl_to_tgsi, samplerIndex);
+   return &fp->Base.Base;
+}
+
+
 static int
 find_free_bit(uint bitfield)
 {
@@ -199,6 +216,7 @@
                                 GLuint *bitmap_sampler)
 {
    struct st_fragment_program *bitmap_prog;
+   struct st_fragment_program *stfpIn = (struct st_fragment_program *) fpIn;
    struct gl_program *newProg;
    uint sampler;
 
@@ -207,13 +225,18 @@
     * with the bitmap sampler/kill instructions.
     */
    sampler = find_free_bit(fpIn->Base.SamplersUsed);
-   bitmap_prog = make_bitmap_fragment_program(st->ctx, sampler);
+   
+   if (stfpIn->glsl_to_tgsi)
+      newProg = make_bitmap_fragment_program_glsl(st, stfpIn, sampler);
+   else {
+      bitmap_prog = make_bitmap_fragment_program(st->ctx, sampler);
 
-   newProg = _mesa_combine_programs(st->ctx,
-                                    &bitmap_prog->Base.Base,
-                                    &fpIn->Base);
-   /* done with this after combining */
-   st_reference_fragprog(st, &bitmap_prog, NULL);
+      newProg = _mesa_combine_programs(st->ctx,
+                                       &bitmap_prog->Base.Base,
+                                       &fpIn->Base);
+      /* done with this after combining */
+      st_reference_fragprog(st, &bitmap_prog, NULL);
+   }
 
 #if 0
    {
@@ -328,8 +351,8 @@
 
    if(!normalized)
    {
-      sRight = width;
-      tBot = height;
+      sRight = (GLfloat) width;
+      tBot = (GLfloat) height;
    }
 
    /* XXX: Need to improve buffer_write to allow NO_WAIT (as well as
@@ -381,7 +404,7 @@
    /* same for all verts: */
    for (i = 0; i < 4; i++) {
       st->bitmap.vertices[i][0][2] = z;
-      st->bitmap.vertices[i][0][3] = 1.0;
+      st->bitmap.vertices[i][0][3] = 1.0f;
       st->bitmap.vertices[i][1][0] = color[0];
       st->bitmap.vertices[i][1][1] = color[1];
       st->bitmap.vertices[i][1][2] = color[2];
@@ -513,7 +536,7 @@
    cso_set_vertex_elements(cso, 3, st->velems_util_draw);
 
    /* convert Z from [0,1] to [-1,-1] to match viewport Z scale/bias */
-   z = z * 2.0 - 1.0;
+   z = z * 2.0f - 1.0f;
 
    /* draw textured quad */
    offset = setup_bitmap_vertex_data(st,
diff --git a/src/mesa/state_tracker/st_cb_blit.c b/src/mesa/state_tracker/st_cb_blit.c
index 416be19..750f541 100644
--- a/src/mesa/state_tracker/st_cb_blit.c
+++ b/src/mesa/state_tracker/st_cb_blit.c
@@ -62,6 +62,84 @@
 #if FEATURE_EXT_framebuffer_blit
 
 static void
+st_BlitFramebuffer_resolve(struct gl_context *ctx,
+                           GLbitfield mask,
+                           struct pipe_resolve_info *info)
+{
+   const GLbitfield depthStencil = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
+
+   struct st_context *st = st_context(ctx);
+
+   struct st_renderbuffer *srcRb, *dstRb;
+
+   if (mask & GL_COLOR_BUFFER_BIT) {
+      srcRb = st_renderbuffer(ctx->ReadBuffer->_ColorReadBuffer);
+      dstRb = st_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[0]);
+
+      info->mask = PIPE_MASK_RGBA;
+
+      info->src.res = srcRb->texture;
+      info->src.layer = srcRb->surface->u.tex.first_layer;
+      info->dst.res = dstRb->texture;
+      info->dst.level = dstRb->surface->u.tex.level;
+      info->dst.layer = dstRb->surface->u.tex.first_layer;
+
+      st->pipe->resource_resolve(st->pipe, info);
+   }
+
+   if (mask & depthStencil) {
+      struct gl_renderbuffer_attachment *srcDepth, *srcStencil;
+      struct gl_renderbuffer_attachment *dstDepth, *dstStencil;
+      boolean combined;
+
+      srcDepth = &ctx->ReadBuffer->Attachment[BUFFER_DEPTH];
+      dstDepth = &ctx->DrawBuffer->Attachment[BUFFER_DEPTH];
+      srcStencil = &ctx->ReadBuffer->Attachment[BUFFER_STENCIL];
+      dstStencil = &ctx->DrawBuffer->Attachment[BUFFER_STENCIL];
+
+      combined =
+         st_is_depth_stencil_combined(srcDepth, srcStencil) &&
+         st_is_depth_stencil_combined(dstDepth, dstStencil);
+
+      if ((mask & GL_DEPTH_BUFFER_BIT) || combined) {
+         /* resolve depth and, if combined and requested, stencil as well */
+         srcRb = st_renderbuffer(srcDepth->Renderbuffer);
+         dstRb = st_renderbuffer(dstDepth->Renderbuffer);
+
+         info->mask = (mask & GL_DEPTH_BUFFER_BIT) ? PIPE_MASK_Z : 0;
+         if (combined && (mask & GL_STENCIL_BUFFER_BIT)) {
+            mask &= ~GL_STENCIL_BUFFER_BIT;
+            info->mask |= PIPE_MASK_S;
+         }
+
+         info->src.res = srcRb->texture;
+         info->src.layer = srcRb->surface->u.tex.first_layer;
+         info->dst.res = dstRb->texture;
+         info->dst.level = dstRb->surface->u.tex.level;
+         info->dst.layer = dstRb->surface->u.tex.first_layer;
+
+         st->pipe->resource_resolve(st->pipe, info);
+      }
+
+      if (mask & GL_STENCIL_BUFFER_BIT) {
+         /* resolve separate stencil buffer */
+         srcRb = st_renderbuffer(srcStencil->Renderbuffer);
+         dstRb = st_renderbuffer(dstStencil->Renderbuffer);
+
+         info->mask = PIPE_MASK_S;
+
+         info->src.res = srcRb->texture;
+         info->src.layer = srcRb->surface->u.tex.first_layer;
+         info->dst.res = dstRb->texture;
+         info->dst.level = dstRb->surface->u.tex.level;
+         info->dst.layer = dstRb->surface->u.tex.first_layer;
+
+         st->pipe->resource_resolve(st->pipe, info);
+      }
+   }
+}
+
+static void
 st_BlitFramebuffer(struct gl_context *ctx,
                    GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
                    GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
@@ -95,6 +173,42 @@
       srcY1 = readFB->Height - srcY1;
    }
 
+   /* Disable conditional rendering. */
+   if (st->render_condition) {
+      st->pipe->render_condition(st->pipe, NULL, 0);
+   }
+
+   if (readFB->Visual.sampleBuffers > drawFB->Visual.sampleBuffers) {
+      struct pipe_resolve_info info;
+
+      if (dstX0 < dstX1) {
+         info.dst.x0 = dstX0;
+         info.dst.x1 = dstX1;
+         info.src.x0 = srcX0;
+         info.src.x1 = srcX1;
+      } else {
+         info.dst.x0 = dstX1;
+         info.dst.x1 = dstX0;
+         info.src.x0 = srcX1;
+         info.src.x1 = srcX0;
+      }
+      if (dstY0 < dstY1) {
+         info.dst.y0 = dstY0;
+         info.dst.y1 = dstY1;
+         info.src.y0 = srcY0;
+         info.src.y1 = srcY1;
+      } else {
+         info.dst.y0 = dstY1;
+         info.dst.y1 = dstY0;
+         info.src.y0 = srcY1;
+         info.src.y1 = srcY0;
+      }
+
+      st_BlitFramebuffer_resolve(ctx, mask, &info); /* filter doesn't apply */
+
+      goto done;
+   }
+
    if (srcY0 > srcY1 && dstY0 > dstY1) {
       /* Both src and dst are upside down.  Swap Y to make it
        * right-side up to increase odds of using a fast path.
@@ -109,11 +223,6 @@
       dstY1 = tmp;
    }
 
-   /* Disable conditional rendering. */
-   if (st->render_condition) {
-      st->pipe->render_condition(st->pipe, NULL, 0);
-   }
-
    if (mask & GL_COLOR_BUFFER_BIT) {
       struct gl_renderbuffer_attachment *srcAtt =
          &readFB->Attachment[readFB->_ColorReadBufferIndex];
diff --git a/src/mesa/state_tracker/st_cb_bufferobjects.c b/src/mesa/state_tracker/st_cb_bufferobjects.c
index 7374bb0..a451b44 100644
--- a/src/mesa/state_tracker/st_cb_bufferobjects.c
+++ b/src/mesa/state_tracker/st_cb_bufferobjects.c
@@ -93,7 +93,6 @@
  */
 static void
 st_bufferobj_subdata(struct gl_context *ctx,
-		     GLenum target,
 		     GLintptrARB offset,
 		     GLsizeiptrARB size,
 		     const GLvoid * data, struct gl_buffer_object *obj)
@@ -133,7 +132,6 @@
  */
 static void
 st_bufferobj_get_subdata(struct gl_context *ctx,
-                         GLenum target,
                          GLintptrARB offset,
                          GLsizeiptrARB size,
                          GLvoid * data, struct gl_buffer_object *obj)
@@ -238,52 +236,10 @@
 
 
 /**
- * Called via glMapBufferARB().
- */
-static void *
-st_bufferobj_map(struct gl_context *ctx, GLenum target, GLenum access,
-                 struct gl_buffer_object *obj)
-{
-   struct st_buffer_object *st_obj = st_buffer_object(obj);
-   uint flags;
-
-   switch (access) {
-   case GL_WRITE_ONLY:
-      flags = PIPE_TRANSFER_WRITE;
-      break;
-   case GL_READ_ONLY:
-      flags = PIPE_TRANSFER_READ;
-      break;
-   case GL_READ_WRITE:
-   default:
-      flags = PIPE_TRANSFER_READ_WRITE;
-      break;      
-   }
-
-   /* Handle zero-size buffers here rather than in drivers */
-   if (obj->Size == 0) {
-      obj->Pointer = &st_bufferobj_zero_length;
-   }
-   else {
-      obj->Pointer = pipe_buffer_map(st_context(ctx)->pipe,
-                                     st_obj->buffer,
-                                     flags,
-                                     &st_obj->transfer);
-   }
-
-   if (obj->Pointer) {
-      obj->Offset = 0;
-      obj->Length = obj->Size;
-   }
-   return obj->Pointer;
-}
-
-
-/**
  * Called via glMapBufferRange().
  */
 static void *
-st_bufferobj_map_range(struct gl_context *ctx, GLenum target, 
+st_bufferobj_map_range(struct gl_context *ctx,
                        GLintptr offset, GLsizeiptr length, GLbitfield access,
                        struct gl_buffer_object *obj)
 {
@@ -353,7 +309,7 @@
 
 
 static void
-st_bufferobj_flush_mapped_range(struct gl_context *ctx, GLenum target, 
+st_bufferobj_flush_mapped_range(struct gl_context *ctx,
                                 GLintptr offset, GLsizeiptr length,
                                 struct gl_buffer_object *obj)
 {
@@ -378,7 +334,7 @@
  * Called via glUnmapBufferARB().
  */
 static GLboolean
-st_bufferobj_unmap(struct gl_context *ctx, GLenum target, struct gl_buffer_object *obj)
+st_bufferobj_unmap(struct gl_context *ctx, struct gl_buffer_object *obj)
 {
    struct pipe_context *pipe = st_context(ctx)->pipe;
    struct st_buffer_object *st_obj = st_buffer_object(obj);
@@ -444,7 +400,6 @@
    functions->BufferData = st_bufferobj_data;
    functions->BufferSubData = st_bufferobj_subdata;
    functions->GetBufferSubData = st_bufferobj_get_subdata;
-   functions->MapBuffer = st_bufferobj_map;
    functions->MapBufferRange = st_bufferobj_map_range;
    functions->FlushMappedBufferRange = st_bufferobj_flush_mapped_range;
    functions->UnmapBuffer = st_bufferobj_unmap;
diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c
index 1d908c0..390c518 100644
--- a/src/mesa/state_tracker/st_cb_drawpixels.c
+++ b/src/mesa/state_tracker/st_cb_drawpixels.c
@@ -94,6 +94,46 @@
 }
 
 
+/**
+ * Returns a fragment program which implements the current pixel transfer ops.
+ */
+static struct gl_fragment_program *
+get_glsl_pixel_transfer_program(struct st_context *st,
+                                struct st_fragment_program *orig)
+{
+   int pixelMaps = 0, scaleAndBias = 0;
+   struct gl_context *ctx = st->ctx;
+   struct st_fragment_program *fp = (struct st_fragment_program *)
+      ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0);
+
+   if (!fp)
+      return NULL;
+
+   if (ctx->Pixel.RedBias != 0.0 || ctx->Pixel.RedScale != 1.0 ||
+       ctx->Pixel.GreenBias != 0.0 || ctx->Pixel.GreenScale != 1.0 ||
+       ctx->Pixel.BlueBias != 0.0 || ctx->Pixel.BlueScale != 1.0 ||
+       ctx->Pixel.AlphaBias != 0.0 || ctx->Pixel.AlphaScale != 1.0) {
+      scaleAndBias = 1;
+   }
+
+   pixelMaps = ctx->Pixel.MapColorFlag;
+
+   if (pixelMaps) {
+      /* create the colormap/texture now if not already done */
+      if (!st->pixel_xfer.pixelmap_texture) {
+         st->pixel_xfer.pixelmap_texture = st_create_color_map_texture(ctx);
+         st->pixel_xfer.pixelmap_sampler_view =
+            st_create_texture_sampler_view(st->pipe,
+                                           st->pixel_xfer.pixelmap_texture);
+      }
+   }
+
+   get_pixel_transfer_visitor(fp, orig->glsl_to_tgsi,
+                              scaleAndBias, pixelMaps);
+
+   return &fp->Base;
+}
+
 
 /**
  * Make fragment shader for glDraw/CopyPixels.  This shader is made
@@ -107,11 +147,15 @@
                                  struct gl_fragment_program **fpOut)
 {
    struct gl_program *newProg;
+   struct st_fragment_program *stfp = (struct st_fragment_program *) fpIn;
 
    if (is_passthrough_program(fpIn)) {
       newProg = (struct gl_program *) _mesa_clone_fragment_program(st->ctx,
                                              &st->pixel_xfer.program->Base);
    }
+   else if (stfp->glsl_to_tgsi != NULL) {
+      newProg = (struct gl_program *) get_glsl_pixel_transfer_program(st, stfp);
+   }
    else {
 #if 0
       /* debug */
diff --git a/src/mesa/state_tracker/st_cb_program.c b/src/mesa/state_tracker/st_cb_program.c
index 3269497..2abb4d8 100644
--- a/src/mesa/state_tracker/st_cb_program.c
+++ b/src/mesa/state_tracker/st_cb_program.c
@@ -44,6 +44,7 @@
 #include "st_program.h"
 #include "st_mesa_to_tgsi.h"
 #include "st_cb_program.h"
+#include "st_glsl_to_tgsi.h"
 
 
 
@@ -129,6 +130,9 @@
       {
          struct st_vertex_program *stvp = (struct st_vertex_program *) prog;
          st_release_vp_variants( st, stvp );
+         
+         if (stvp->glsl_to_tgsi)
+            free_glsl_to_tgsi_visitor(stvp->glsl_to_tgsi);
       }
       break;
    case MESA_GEOMETRY_PROGRAM:
@@ -137,6 +141,9 @@
             (struct st_geometry_program *) prog;
 
          st_release_gp_variants(st, stgp);
+         
+         if (stgp->glsl_to_tgsi)
+            free_glsl_to_tgsi_visitor(stgp->glsl_to_tgsi);
 
          if (stgp->tgsi.tokens) {
             st_free_tokens((void *) stgp->tgsi.tokens);
@@ -151,6 +158,9 @@
 
          st_release_fp_variants(st, stfp);
          
+         if (stfp->glsl_to_tgsi)
+            free_glsl_to_tgsi_visitor(stfp->glsl_to_tgsi);
+         
          if (stfp->tgsi.tokens) {
             st_free_tokens(stfp->tgsi.tokens);
             stfp->tgsi.tokens = NULL;
@@ -242,4 +252,8 @@
    functions->DeleteProgram = st_delete_program;
    functions->IsProgramNative = st_is_program_native;
    functions->ProgramStringNotify = st_program_string_notify;
+   
+   functions->NewShader = st_new_shader;
+   functions->NewShaderProgram = st_new_shader_program;
+   functions->LinkShader = st_link_shader;
 }
diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c
index 6907cfc..a3b2ba9 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -344,7 +344,7 @@
                               stImage->base.Width2,
                               stImage->base.Height2,
                               stImage->base.Depth2,
-                              stImage->level,
+                              stImage->base.Level,
                               &width, &height, &depth)) {
       /* we can't determine the image size at level=0 */
       stObj->width0 = stObj->height0 = stObj->depth0 = 0;
@@ -367,7 +367,7 @@
         stImage->base._BaseFormat == GL_DEPTH_COMPONENT ||
         stImage->base._BaseFormat == GL_DEPTH_STENCIL_EXT) &&
        !stObj->base.GenerateMipmap &&
-       stImage->level == 0) {
+       stImage->base.Level == 0) {
       /* only alloc space for a single mipmap level */
       lastLevel = 0;
    }
@@ -506,8 +506,8 @@
       assert(texImage->Depth == depth);
    }
 
-   stImage->face = _mesa_tex_target_to_face(target);
-   stImage->level = level;
+   stImage->base.Face = _mesa_tex_target_to_face(target);
+   stImage->base.Level = level;
 
    _mesa_set_fetch_functions(texImage, dims);
 
@@ -529,7 +529,7 @@
    if (stObj->pt) {
       if (level > (GLint) stObj->pt->last_level ||
           !st_texture_match_image(stObj->pt, &stImage->base,
-                                  stImage->face, stImage->level)) {
+                                  stImage->base.Face, stImage->base.Level)) {
          DBG("release it\n");
          pipe_resource_reference(&stObj->pt, NULL);
          assert(!stObj->pt);
@@ -563,7 +563,7 @@
     */
    if (stObj->pt &&
        st_texture_match_image(stObj->pt, &stImage->base,
-                              stImage->face, stImage->level)) {
+                              stImage->base.Face, stImage->base.Level)) {
 
       pipe_resource_reference(&stImage->pt, stObj->pt);
       assert(stImage->pt);
@@ -1466,34 +1466,6 @@
       depth/stencil samples per pixel? Need some transfer clarifications. */
    assert(sample_count < 2);
 
-   if (srcX < 0) {
-      width -= -srcX;
-      destX += -srcX;
-      srcX = 0;
-   }
-
-   if (srcY < 0) {
-      height -= -srcY;
-      destY += -srcY;
-      srcY = 0;
-   }
-
-   if (destX < 0) {
-      width -= -destX;
-      srcX += -destX;
-      destX = 0;
-   }
-
-   if (destY < 0) {
-      height -= -destY;
-      srcY += -destY;
-      destY = 0;
-   }
-
-   if (width < 0 || height < 0)
-      return;
-
-
    assert(strb);
    assert(strb->surface);
    assert(stImage->pt);
@@ -1529,8 +1501,8 @@
          pipe->resource_copy_region(pipe,
                                     /* dest */
                                     stImage->pt,
-                                    stImage->level,
-                                    destX, destY, destZ + stImage->face,
+                                    stImage->base.Level,
+                                    destX, destY, destZ + stImage->base.Face,
                                     /* src */
                                     strb->texture,
                                     strb->surface->u.tex.level,
@@ -1552,9 +1524,9 @@
          memset(&surf_tmpl, 0, sizeof(surf_tmpl));
          surf_tmpl.format = util_format_linear(stImage->pt->format);
          surf_tmpl.usage = PIPE_BIND_RENDER_TARGET;
-         surf_tmpl.u.tex.level = stImage->level;
-         surf_tmpl.u.tex.first_layer = stImage->face + destZ;
-         surf_tmpl.u.tex.last_layer = stImage->face + destZ;
+         surf_tmpl.u.tex.level = stImage->base.Level;
+         surf_tmpl.u.tex.first_layer = stImage->base.Face + destZ;
+         surf_tmpl.u.tex.last_layer = stImage->base.Face + destZ;
 
          dest_surface = pipe->create_surface(pipe, stImage->pt,
                                              &surf_tmpl);
@@ -1610,59 +1582,6 @@
 
 
 static void
-st_CopyTexImage1D(struct gl_context * ctx, GLenum target, GLint level,
-                  GLenum internalFormat,
-                  GLint x, GLint y, GLsizei width, GLint border)
-{
-   struct gl_texture_unit *texUnit =
-      &ctx->Texture.Unit[ctx->Texture.CurrentUnit];
-   struct gl_texture_object *texObj =
-      _mesa_select_tex_object(ctx, texUnit, target);
-   struct gl_texture_image *texImage =
-      _mesa_select_tex_image(ctx, texObj, target, level);
-
-   /* Setup or redefine the texture object, texture and texture
-    * image.  Don't populate yet.  
-    */
-   ctx->Driver.TexImage1D(ctx, target, level, internalFormat,
-                          width, border,
-                          GL_RGBA, CHAN_TYPE, NULL,
-                          &ctx->DefaultPacking, texObj, texImage);
-
-   st_copy_texsubimage(ctx, target, level,
-                       0, 0, 0,  /* destX,Y,Z */
-                       x, y, width, 1);  /* src X, Y, size */
-}
-
-
-static void
-st_CopyTexImage2D(struct gl_context * ctx, GLenum target, GLint level,
-                  GLenum internalFormat,
-                  GLint x, GLint y, GLsizei width, GLsizei height,
-                  GLint border)
-{
-   struct gl_texture_unit *texUnit =
-      &ctx->Texture.Unit[ctx->Texture.CurrentUnit];
-   struct gl_texture_object *texObj =
-      _mesa_select_tex_object(ctx, texUnit, target);
-   struct gl_texture_image *texImage =
-      _mesa_select_tex_image(ctx, texObj, target, level);
-
-   /* Setup or redefine the texture object, texture and texture
-    * image.  Don't populate yet.  
-    */
-   ctx->Driver.TexImage2D(ctx, target, level, internalFormat,
-                          width, height, border,
-                          GL_RGBA, CHAN_TYPE, NULL,
-                          &ctx->DefaultPacking, texObj, texImage);
-
-   st_copy_texsubimage(ctx, target, level,
-                       0, 0, 0,  /* destX,Y,Z */
-                       x, y, width, height);  /* src X, Y, size */
-}
-
-
-static void
 st_CopyTexSubImage1D(struct gl_context * ctx, GLenum target, GLint level,
                      GLint xoffset, GLint x, GLint y, GLsizei width)
 {
@@ -1710,7 +1629,7 @@
    /* debug checks */
    {
       const struct gl_texture_image *dstImage =
-         stObj->base.Image[stImage->face][dstLevel];
+         stObj->base.Image[stImage->base.Face][dstLevel];
       assert(dstImage);
       assert(dstImage->Width == stImage->base.Width);
       assert(dstImage->Height == stImage->base.Height);
@@ -1722,15 +1641,15 @@
        */
       st_texture_image_copy(st->pipe,
                             stObj->pt, dstLevel,  /* dest texture, level */
-                            stImage->pt, stImage->level, /* src texture, level */
-                            stImage->face);
+                            stImage->pt, stImage->base.Level, /* src texture, level */
+                            stImage->base.Face);
 
       pipe_resource_reference(&stImage->pt, NULL);
    }
    else if (stImage->base.Data) {
       st_texture_image_data(st,
                             stObj->pt,
-                            stImage->face,
+                            stImage->base.Face,
                             dstLevel,
                             stImage->base.Data,
                             stImage->base.RowStride * 
@@ -1947,8 +1866,6 @@
    functions->CompressedTexSubImage1D = st_CompressedTexSubImage1D;
    functions->CompressedTexSubImage2D = st_CompressedTexSubImage2D;
    functions->CompressedTexSubImage3D = st_CompressedTexSubImage3D;
-   functions->CopyTexImage1D = st_CopyTexImage1D;
-   functions->CopyTexImage2D = st_CopyTexImage2D;
    functions->CopyTexSubImage1D = st_CopyTexSubImage1D;
    functions->CopyTexSubImage2D = st_CopyTexSubImage2D;
    functions->CopyTexSubImage3D = st_CopyTexSubImage3D;
diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c
index 99b231d..8e90093 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -208,6 +208,15 @@
 }
 
 
+static GLboolean st_get_s3tc_override(void)
+{
+   const char *override = _mesa_getenv("force_s3tc_enable");
+   if (override && !strcmp(override, "true"))
+      return GL_TRUE;
+   return GL_FALSE;
+}
+
+
 /**
  * Use pipe_screen::get_param() to query PIPE_CAP_ values to determine
  * which GL extensions are supported.
@@ -219,6 +228,7 @@
 {
    struct pipe_screen *screen = st->pipe->screen;
    struct gl_context *ctx = st->ctx;
+   int i;
 
    /*
     * Extensions that are supported by all Gallium drivers:
@@ -426,7 +436,7 @@
    if (screen->is_format_supported(screen, PIPE_FORMAT_DXT5_RGBA,
                                    PIPE_TEXTURE_2D, 0,
                                    PIPE_BIND_SAMPLER_VIEW) &&
-       ctx->Mesa_DXTn) {
+       (ctx->Mesa_DXTn || st_get_s3tc_override())) {
       ctx->Extensions.EXT_texture_compression_s3tc = GL_TRUE;
       ctx->Extensions.S3_s3tc = GL_TRUE;
    }
@@ -596,6 +606,16 @@
       ctx->Extensions.EXT_packed_float = GL_TRUE;
    }
 
+   /* Maximum sample count. */
+   for (i = 16; i > 0; --i) {
+      if (screen->is_format_supported(screen, PIPE_FORMAT_B8G8R8A8_UNORM,
+                                      PIPE_TEXTURE_2D, i,
+                                      PIPE_BIND_RENDER_TARGET)) {
+         ctx->Const.MaxSamples = i;
+         break;
+      }
+   }
+
    if (screen->get_param(screen, PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE)) {
       ctx->Extensions.ARB_seamless_cube_map = GL_TRUE;
       ctx->Extensions.AMD_seamless_cubemap_per_texture = GL_TRUE;
diff --git a/src/mesa/state_tracker/st_gen_mipmap.c b/src/mesa/state_tracker/st_gen_mipmap.c
index b091129..82ca4af 100644
--- a/src/mesa/state_tracker/st_gen_mipmap.c
+++ b/src/mesa/state_tracker/st_gen_mipmap.c
@@ -453,7 +453,6 @@
                                  srcImage->TexFormat);
 
       stImage = st_texture_image(dstImage);
-      stImage->level = dstLevel;
 
       pipe_resource_reference(&stImage->pt, pt);
    }
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
new file mode 100644
index 0000000..9cac309
--- /dev/null
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -0,0 +1,5142 @@
+/*
+ * Copyright (C) 2005-2007  Brian Paul   All Rights Reserved.
+ * Copyright (C) 2008  VMware, Inc.   All Rights Reserved.
+ * Copyright © 2010 Intel Corporation
+ * Copyright © 2011 Bryan Cain
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file glsl_to_tgsi.cpp
+ *
+ * Translate GLSL IR to TGSI.
+ */
+
+#include <stdio.h>
+#include "main/compiler.h"
+#include "ir.h"
+#include "ir_visitor.h"
+#include "ir_print_visitor.h"
+#include "ir_expression_flattening.h"
+#include "glsl_types.h"
+#include "glsl_parser_extras.h"
+#include "../glsl/program.h"
+#include "ir_optimization.h"
+#include "ast.h"
+
+extern "C" {
+#include "main/mtypes.h"
+#include "main/shaderapi.h"
+#include "main/shaderobj.h"
+#include "main/uniforms.h"
+#include "program/hash_table.h"
+#include "program/prog_instruction.h"
+#include "program/prog_optimize.h"
+#include "program/prog_print.h"
+#include "program/program.h"
+#include "program/prog_uniform.h"
+#include "program/prog_parameter.h"
+#include "program/sampler.h"
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_context.h"
+#include "pipe/p_screen.h"
+#include "pipe/p_shader_tokens.h"
+#include "pipe/p_state.h"
+#include "util/u_math.h"
+#include "tgsi/tgsi_ureg.h"
+#include "tgsi/tgsi_info.h"
+#include "st_context.h"
+#include "st_program.h"
+#include "st_glsl_to_tgsi.h"
+#include "st_mesa_to_tgsi.h"
+}
+
+#define PROGRAM_IMMEDIATE PROGRAM_FILE_MAX
+#define PROGRAM_ANY_CONST ((1 << PROGRAM_LOCAL_PARAM) |  \
+                           (1 << PROGRAM_ENV_PARAM) |    \
+                           (1 << PROGRAM_STATE_VAR) |    \
+                           (1 << PROGRAM_NAMED_PARAM) |  \
+                           (1 << PROGRAM_CONSTANT) |     \
+                           (1 << PROGRAM_UNIFORM))
+
+#define MAX_TEMPS         4096
+
+class st_src_reg;
+class st_dst_reg;
+
+static int swizzle_for_size(int size);
+
+/**
+ * This struct is a corresponding struct to TGSI ureg_src.
+ */
+class st_src_reg {
+public:
+   st_src_reg(gl_register_file file, int index, const glsl_type *type)
+   {
+      this->file = file;
+      this->index = index;
+      if (type && (type->is_scalar() || type->is_vector() || type->is_matrix()))
+         this->swizzle = swizzle_for_size(type->vector_elements);
+      else
+         this->swizzle = SWIZZLE_XYZW;
+      this->negate = 0;
+      this->type = type ? type->base_type : GLSL_TYPE_ERROR;
+      this->reladdr = NULL;
+   }
+
+   st_src_reg(gl_register_file file, int index, int type)
+   {
+      this->type = type;
+      this->file = file;
+      this->index = index;
+      this->swizzle = SWIZZLE_XYZW;
+      this->negate = 0;
+      this->reladdr = NULL;
+   }
+
+   st_src_reg()
+   {
+      this->type = GLSL_TYPE_ERROR;
+      this->file = PROGRAM_UNDEFINED;
+      this->index = 0;
+      this->swizzle = 0;
+      this->negate = 0;
+      this->reladdr = NULL;
+   }
+
+   explicit st_src_reg(st_dst_reg reg);
+
+   gl_register_file file; /**< PROGRAM_* from Mesa */
+   int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
+   GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */
+   int negate; /**< NEGATE_XYZW mask from mesa */
+   int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
+   /** Register index should be offset by the integer in this reg. */
+   st_src_reg *reladdr;
+};
+
+class st_dst_reg {
+public:
+   st_dst_reg(gl_register_file file, int writemask, int type)
+   {
+      this->file = file;
+      this->index = 0;
+      this->writemask = writemask;
+      this->cond_mask = COND_TR;
+      this->reladdr = NULL;
+      this->type = type;
+   }
+
+   st_dst_reg()
+   {
+      this->type = GLSL_TYPE_ERROR;
+      this->file = PROGRAM_UNDEFINED;
+      this->index = 0;
+      this->writemask = 0;
+      this->cond_mask = COND_TR;
+      this->reladdr = NULL;
+   }
+
+   explicit st_dst_reg(st_src_reg reg);
+
+   gl_register_file file; /**< PROGRAM_* from Mesa */
+   int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
+   int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
+   GLuint cond_mask:4;
+   int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
+   /** Register index should be offset by the integer in this reg. */
+   st_src_reg *reladdr;
+};
+
+st_src_reg::st_src_reg(st_dst_reg reg)
+{
+   this->type = reg.type;
+   this->file = reg.file;
+   this->index = reg.index;
+   this->swizzle = SWIZZLE_XYZW;
+   this->negate = 0;
+   this->reladdr = reg.reladdr;
+}
+
+st_dst_reg::st_dst_reg(st_src_reg reg)
+{
+   this->type = reg.type;
+   this->file = reg.file;
+   this->index = reg.index;
+   this->writemask = WRITEMASK_XYZW;
+   this->cond_mask = COND_TR;
+   this->reladdr = reg.reladdr;
+}
+
+class glsl_to_tgsi_instruction : public exec_node {
+public:
+   /* Callers of this ralloc-based new need not call delete. It's
+    * easier to just ralloc_free 'ctx' (or any of its ancestors). */
+   static void* operator new(size_t size, void *ctx)
+   {
+      void *node;
+
+      node = rzalloc_size(ctx, size);
+      assert(node != NULL);
+
+      return node;
+   }
+
+   unsigned op;
+   st_dst_reg dst;
+   st_src_reg src[3];
+   /** Pointer to the ir source this tree came from for debugging */
+   ir_instruction *ir;
+   GLboolean cond_update;
+   bool saturate;
+   int sampler; /**< sampler index */
+   int tex_target; /**< One of TEXTURE_*_INDEX */
+   GLboolean tex_shadow;
+   int dead_mask; /**< Used in dead code elimination */
+
+   class function_entry *function; /* Set on TGSI_OPCODE_CAL or TGSI_OPCODE_BGNSUB */
+};
+
+class variable_storage : public exec_node {
+public:
+   variable_storage(ir_variable *var, gl_register_file file, int index)
+      : file(file), index(index), var(var)
+   {
+      /* empty */
+   }
+
+   gl_register_file file;
+   int index;
+   ir_variable *var; /* variable that maps to this, if any */
+};
+
+class immediate_storage : public exec_node {
+public:
+   immediate_storage(gl_constant_value *values, int size, int type)
+   {
+      memcpy(this->values, values, size * sizeof(gl_constant_value));
+      this->size = size;
+      this->type = type;
+   }
+   
+   gl_constant_value values[4];
+   int size; /**< Number of components (1-4) */
+   int type; /**< GL_FLOAT, GL_INT, GL_BOOL, or GL_UNSIGNED_INT */
+};
+
+class function_entry : public exec_node {
+public:
+   ir_function_signature *sig;
+
+   /**
+    * identifier of this function signature used by the program.
+    *
+    * At the point that TGSI instructions for function calls are
+    * generated, we don't know the address of the first instruction of
+    * the function body.  So we make the BranchTarget that is called a
+    * small integer and rewrite them during set_branchtargets().
+    */
+   int sig_id;
+
+   /**
+    * Pointer to first instruction of the function body.
+    *
+    * Set during function body emits after main() is processed.
+    */
+   glsl_to_tgsi_instruction *bgn_inst;
+
+   /**
+    * Index of the first instruction of the function body in actual TGSI.
+    *
+    * Set after conversion from glsl_to_tgsi_instruction to TGSI.
+    */
+   int inst;
+
+   /** Storage for the return value. */
+   st_src_reg return_reg;
+};
+
+class glsl_to_tgsi_visitor : public ir_visitor {
+public:
+   glsl_to_tgsi_visitor();
+   ~glsl_to_tgsi_visitor();
+
+   function_entry *current_function;
+
+   struct gl_context *ctx;
+   struct gl_program *prog;
+   struct gl_shader_program *shader_program;
+   struct gl_shader_compiler_options *options;
+
+   int next_temp;
+
+   int num_address_regs;
+   int samplers_used;
+   bool indirect_addr_temps;
+   bool indirect_addr_consts;
+   
+   int glsl_version;
+   bool native_integers;
+
+   variable_storage *find_variable_storage(ir_variable *var);
+
+   int add_constant(gl_register_file file, gl_constant_value values[4],
+                    int size, int datatype, GLuint *swizzle_out);
+
+   function_entry *get_function_signature(ir_function_signature *sig);
+
+   st_src_reg get_temp(const glsl_type *type);
+   void reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr);
+
+   st_src_reg st_src_reg_for_float(float val);
+   st_src_reg st_src_reg_for_int(int val);
+   st_src_reg st_src_reg_for_type(int type, int val);
+
+   /**
+    * \name Visit methods
+    *
+    * As typical for the visitor pattern, there must be one \c visit method for
+    * each concrete subclass of \c ir_instruction.  Virtual base classes within
+    * the hierarchy should not have \c visit methods.
+    */
+   /*@{*/
+   virtual void visit(ir_variable *);
+   virtual void visit(ir_loop *);
+   virtual void visit(ir_loop_jump *);
+   virtual void visit(ir_function_signature *);
+   virtual void visit(ir_function *);
+   virtual void visit(ir_expression *);
+   virtual void visit(ir_swizzle *);
+   virtual void visit(ir_dereference_variable  *);
+   virtual void visit(ir_dereference_array *);
+   virtual void visit(ir_dereference_record *);
+   virtual void visit(ir_assignment *);
+   virtual void visit(ir_constant *);
+   virtual void visit(ir_call *);
+   virtual void visit(ir_return *);
+   virtual void visit(ir_discard *);
+   virtual void visit(ir_texture *);
+   virtual void visit(ir_if *);
+   /*@}*/
+
+   st_src_reg result;
+
+   /** List of variable_storage */
+   exec_list variables;
+
+   /** List of immediate_storage */
+   exec_list immediates;
+   int num_immediates;
+
+   /** List of function_entry */
+   exec_list function_signatures;
+   int next_signature_id;
+
+   /** List of glsl_to_tgsi_instruction */
+   exec_list instructions;
+
+   glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op);
+
+   glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
+        		        st_dst_reg dst, st_src_reg src0);
+
+   glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
+        		        st_dst_reg dst, st_src_reg src0, st_src_reg src1);
+
+   glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
+        		        st_dst_reg dst,
+        		        st_src_reg src0, st_src_reg src1, st_src_reg src2);
+   
+   unsigned get_opcode(ir_instruction *ir, unsigned op,
+                    st_dst_reg dst,
+                    st_src_reg src0, st_src_reg src1);
+
+   /**
+    * Emit the correct dot-product instruction for the type of arguments
+    */
+   glsl_to_tgsi_instruction *emit_dp(ir_instruction *ir,
+                                     st_dst_reg dst,
+                                     st_src_reg src0,
+                                     st_src_reg src1,
+                                     unsigned elements);
+
+   void emit_scalar(ir_instruction *ir, unsigned op,
+        	    st_dst_reg dst, st_src_reg src0);
+
+   void emit_scalar(ir_instruction *ir, unsigned op,
+        	    st_dst_reg dst, st_src_reg src0, st_src_reg src1);
+
+   void emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0);
+
+   void emit_scs(ir_instruction *ir, unsigned op,
+        	 st_dst_reg dst, const st_src_reg &src);
+
+   bool try_emit_mad(ir_expression *ir,
+              int mul_operand);
+   bool try_emit_mad_for_and_not(ir_expression *ir,
+              int mul_operand);
+   bool try_emit_sat(ir_expression *ir);
+
+   void emit_swz(ir_expression *ir);
+
+   bool process_move_condition(ir_rvalue *ir);
+
+   void remove_output_reads(gl_register_file type);
+   void simplify_cmp(void);
+
+   void rename_temp_register(int index, int new_index);
+   int get_first_temp_read(int index);
+   int get_first_temp_write(int index);
+   int get_last_temp_read(int index);
+   int get_last_temp_write(int index);
+
+   void copy_propagate(void);
+   void eliminate_dead_code(void);
+   int eliminate_dead_code_advanced(void);
+   void merge_registers(void);
+   void renumber_registers(void);
+
+   void *mem_ctx;
+};
+
+static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR);
+
+static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR);
+
+static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT);
+
+static void
+fail_link(struct gl_shader_program *prog, const char *fmt, ...) PRINTFLIKE(2, 3);
+
+static void
+fail_link(struct gl_shader_program *prog, const char *fmt, ...)
+{
+   va_list args;
+   va_start(args, fmt);
+   ralloc_vasprintf_append(&prog->InfoLog, fmt, args);
+   va_end(args);
+
+   prog->LinkStatus = GL_FALSE;
+}
+
+static int
+swizzle_for_size(int size)
+{
+   int size_swizzles[4] = {
+      MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
+      MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
+      MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z),
+      MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W),
+   };
+
+   assert((size >= 1) && (size <= 4));
+   return size_swizzles[size - 1];
+}
+
+static bool
+is_tex_instruction(unsigned opcode)
+{
+   const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode);
+   return info->is_tex;
+}
+
+static unsigned
+num_inst_dst_regs(unsigned opcode)
+{
+   const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode);
+   return info->num_dst;
+}
+
+static unsigned
+num_inst_src_regs(unsigned opcode)
+{
+   const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode);
+   return info->is_tex ? info->num_src - 1 : info->num_src;
+}
+
+glsl_to_tgsi_instruction *
+glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
+        		 st_dst_reg dst,
+        		 st_src_reg src0, st_src_reg src1, st_src_reg src2)
+{
+   glsl_to_tgsi_instruction *inst = new(mem_ctx) glsl_to_tgsi_instruction();
+   int num_reladdr = 0, i;
+   
+   op = get_opcode(ir, op, dst, src0, src1);
+
+   /* If we have to do relative addressing, we want to load the ARL
+    * reg directly for one of the regs, and preload the other reladdr
+    * sources into temps.
+    */
+   num_reladdr += dst.reladdr != NULL;
+   num_reladdr += src0.reladdr != NULL;
+   num_reladdr += src1.reladdr != NULL;
+   num_reladdr += src2.reladdr != NULL;
+
+   reladdr_to_temp(ir, &src2, &num_reladdr);
+   reladdr_to_temp(ir, &src1, &num_reladdr);
+   reladdr_to_temp(ir, &src0, &num_reladdr);
+
+   if (dst.reladdr) {
+      emit_arl(ir, address_reg, *dst.reladdr);
+      num_reladdr--;
+   }
+   assert(num_reladdr == 0);
+
+   inst->op = op;
+   inst->dst = dst;
+   inst->src[0] = src0;
+   inst->src[1] = src1;
+   inst->src[2] = src2;
+   inst->ir = ir;
+   inst->dead_mask = 0;
+
+   inst->function = NULL;
+   
+   if (op == TGSI_OPCODE_ARL)
+      this->num_address_regs = 1;
+   
+   /* Update indirect addressing status used by TGSI */
+   if (dst.reladdr) {
+      switch(dst.file) {
+      case PROGRAM_TEMPORARY:
+         this->indirect_addr_temps = true;
+         break;
+      case PROGRAM_LOCAL_PARAM:
+      case PROGRAM_ENV_PARAM:
+      case PROGRAM_STATE_VAR:
+      case PROGRAM_NAMED_PARAM:
+      case PROGRAM_CONSTANT:
+      case PROGRAM_UNIFORM:
+         this->indirect_addr_consts = true;
+         break;
+      case PROGRAM_IMMEDIATE:
+         assert(!"immediates should not have indirect addressing");
+         break;
+      default:
+         break;
+      }
+   }
+   else {
+      for (i=0; i<3; i++) {
+         if(inst->src[i].reladdr) {
+            switch(inst->src[i].file) {
+            case PROGRAM_TEMPORARY:
+               this->indirect_addr_temps = true;
+               break;
+            case PROGRAM_LOCAL_PARAM:
+            case PROGRAM_ENV_PARAM:
+            case PROGRAM_STATE_VAR:
+            case PROGRAM_NAMED_PARAM:
+            case PROGRAM_CONSTANT:
+            case PROGRAM_UNIFORM:
+               this->indirect_addr_consts = true;
+               break;
+            case PROGRAM_IMMEDIATE:
+               assert(!"immediates should not have indirect addressing");
+               break;
+            default:
+               break;
+            }
+         }
+      }
+   }
+
+   this->instructions.push_tail(inst);
+   
+   return inst;
+}
+
+
+glsl_to_tgsi_instruction *
+glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
+        		 st_dst_reg dst, st_src_reg src0, st_src_reg src1)
+{
+   return emit(ir, op, dst, src0, src1, undef_src);
+}
+
+glsl_to_tgsi_instruction *
+glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
+        		 st_dst_reg dst, st_src_reg src0)
+{
+   assert(dst.writemask != 0);
+   return emit(ir, op, dst, src0, undef_src, undef_src);
+}
+
+glsl_to_tgsi_instruction *
+glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op)
+{
+   return emit(ir, op, undef_dst, undef_src, undef_src, undef_src);
+}
+
+/**
+ * Determines whether to use an integer, unsigned integer, or float opcode 
+ * based on the operands and input opcode, then emits the result.
+ * 
+ * TODO: type checking for remaining TGSI opcodes
+ */
+unsigned
+glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op,
+        		 st_dst_reg dst,
+        		 st_src_reg src0, st_src_reg src1)
+{
+   int type = GLSL_TYPE_FLOAT;
+   
+   if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT)
+      type = GLSL_TYPE_FLOAT;
+   else if (native_integers)
+      type = src0.type;
+
+#define case4(c, f, i, u) \
+   case TGSI_OPCODE_##c: \
+      if (type == GLSL_TYPE_INT) op = TGSI_OPCODE_##i; \
+      else if (type == GLSL_TYPE_UINT) op = TGSI_OPCODE_##u; \
+      else op = TGSI_OPCODE_##f; \
+      break;
+#define case3(f, i, u)  case4(f, f, i, u)
+#define case2fi(f, i)   case4(f, f, i, i)
+#define case2iu(i, u)   case4(i, LAST, i, u)
+   
+   switch(op) {
+      case2fi(ADD, UADD);
+      case2fi(MUL, UMUL);
+      case2fi(MAD, UMAD);
+      case3(DIV, IDIV, UDIV);
+      case3(MAX, IMAX, UMAX);
+      case3(MIN, IMIN, UMIN);
+      case2iu(MOD, UMOD);
+      
+      case2fi(SEQ, USEQ);
+      case2fi(SNE, USNE);
+      case3(SGE, ISGE, USGE);
+      case3(SLT, ISLT, USLT);
+      
+      case2iu(SHL, SHL);
+      case2iu(ISHR, USHR);
+      case2iu(NOT, NOT);
+      case2iu(AND, AND);
+      case2iu(OR, OR);
+      case2iu(XOR, XOR);
+      
+      default: break;
+   }
+   
+   assert(op != TGSI_OPCODE_LAST);
+   return op;
+}
+
+glsl_to_tgsi_instruction *
+glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir,
+        		    st_dst_reg dst, st_src_reg src0, st_src_reg src1,
+        		    unsigned elements)
+{
+   static const unsigned dot_opcodes[] = {
+      TGSI_OPCODE_DP2, TGSI_OPCODE_DP3, TGSI_OPCODE_DP4
+   };
+
+   return emit(ir, dot_opcodes[elements - 2], dst, src0, src1);
+}
+
+/**
+ * Emits TGSI scalar opcodes to produce unique answers across channels.
+ *
+ * Some TGSI opcodes are scalar-only, like ARB_fp/vp.  The src X
+ * channel determines the result across all channels.  So to do a vec4
+ * of this operation, we want to emit a scalar per source channel used
+ * to produce dest channels.
+ */
+void
+glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op,
+        		        st_dst_reg dst,
+        			st_src_reg orig_src0, st_src_reg orig_src1)
+{
+   int i, j;
+   int done_mask = ~dst.writemask;
+
+   /* TGSI RCP is a scalar operation splatting results to all channels,
+    * like ARB_fp/vp.  So emit as many RCPs as necessary to cover our
+    * dst channels.
+    */
+   for (i = 0; i < 4; i++) {
+      GLuint this_mask = (1 << i);
+      glsl_to_tgsi_instruction *inst;
+      st_src_reg src0 = orig_src0;
+      st_src_reg src1 = orig_src1;
+
+      if (done_mask & this_mask)
+         continue;
+
+      GLuint src0_swiz = GET_SWZ(src0.swizzle, i);
+      GLuint src1_swiz = GET_SWZ(src1.swizzle, i);
+      for (j = i + 1; j < 4; j++) {
+         /* If there is another enabled component in the destination that is
+          * derived from the same inputs, generate its value on this pass as
+          * well.
+          */
+         if (!(done_mask & (1 << j)) &&
+             GET_SWZ(src0.swizzle, j) == src0_swiz &&
+             GET_SWZ(src1.swizzle, j) == src1_swiz) {
+            this_mask |= (1 << j);
+         }
+      }
+      src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
+        			   src0_swiz, src0_swiz);
+      src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz,
+        			  src1_swiz, src1_swiz);
+
+      inst = emit(ir, op, dst, src0, src1);
+      inst->dst.writemask = this_mask;
+      done_mask |= this_mask;
+   }
+}
+
+void
+glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op,
+        		        st_dst_reg dst, st_src_reg src0)
+{
+   st_src_reg undef = undef_src;
+
+   undef.swizzle = SWIZZLE_XXXX;
+
+   emit_scalar(ir, op, dst, src0, undef);
+}
+
+void
+glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir,
+        		        st_dst_reg dst, st_src_reg src0)
+{
+   st_src_reg tmp = get_temp(glsl_type::float_type);
+
+   if (src0.type == GLSL_TYPE_INT)
+      emit(NULL, TGSI_OPCODE_I2F, st_dst_reg(tmp), src0);
+   else if (src0.type == GLSL_TYPE_UINT)
+      emit(NULL, TGSI_OPCODE_U2F, st_dst_reg(tmp), src0);
+   else
+      tmp = src0;
+   
+   emit(NULL, TGSI_OPCODE_ARL, dst, tmp);
+}
+
+/**
+ * Emit an TGSI_OPCODE_SCS instruction
+ *
+ * The \c SCS opcode functions a bit differently than the other TGSI opcodes.
+ * Instead of splatting its result across all four components of the 
+ * destination, it writes one value to the \c x component and another value to 
+ * the \c y component.
+ *
+ * \param ir        IR instruction being processed
+ * \param op        Either \c TGSI_OPCODE_SIN or \c TGSI_OPCODE_COS depending 
+ *                  on which value is desired.
+ * \param dst       Destination register
+ * \param src       Source register
+ */
+void
+glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, unsigned op,
+        		     st_dst_reg dst,
+        		     const st_src_reg &src)
+{
+   /* Vertex programs cannot use the SCS opcode.
+    */
+   if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) {
+      emit_scalar(ir, op, dst, src);
+      return;
+   }
+
+   const unsigned component = (op == TGSI_OPCODE_SIN) ? 0 : 1;
+   const unsigned scs_mask = (1U << component);
+   int done_mask = ~dst.writemask;
+   st_src_reg tmp;
+
+   assert(op == TGSI_OPCODE_SIN || op == TGSI_OPCODE_COS);
+
+   /* If there are compnents in the destination that differ from the component
+    * that will be written by the SCS instrution, we'll need a temporary.
+    */
+   if (scs_mask != unsigned(dst.writemask)) {
+      tmp = get_temp(glsl_type::vec4_type);
+   }
+
+   for (unsigned i = 0; i < 4; i++) {
+      unsigned this_mask = (1U << i);
+      st_src_reg src0 = src;
+
+      if ((done_mask & this_mask) != 0)
+         continue;
+
+      /* The source swizzle specified which component of the source generates
+       * sine / cosine for the current component in the destination.  The SCS
+       * instruction requires that this value be swizzle to the X component.
+       * Replace the current swizzle with a swizzle that puts the source in
+       * the X component.
+       */
+      unsigned src0_swiz = GET_SWZ(src.swizzle, i);
+
+      src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
+        			   src0_swiz, src0_swiz);
+      for (unsigned j = i + 1; j < 4; j++) {
+         /* If there is another enabled component in the destination that is
+          * derived from the same inputs, generate its value on this pass as
+          * well.
+          */
+         if (!(done_mask & (1 << j)) &&
+             GET_SWZ(src0.swizzle, j) == src0_swiz) {
+            this_mask |= (1 << j);
+         }
+      }
+
+      if (this_mask != scs_mask) {
+         glsl_to_tgsi_instruction *inst;
+         st_dst_reg tmp_dst = st_dst_reg(tmp);
+
+         /* Emit the SCS instruction.
+          */
+         inst = emit(ir, TGSI_OPCODE_SCS, tmp_dst, src0);
+         inst->dst.writemask = scs_mask;
+
+         /* Move the result of the SCS instruction to the desired location in
+          * the destination.
+          */
+         tmp.swizzle = MAKE_SWIZZLE4(component, component,
+        			     component, component);
+         inst = emit(ir, TGSI_OPCODE_SCS, dst, tmp);
+         inst->dst.writemask = this_mask;
+      } else {
+         /* Emit the SCS instruction to write directly to the destination.
+          */
+         glsl_to_tgsi_instruction *inst = emit(ir, TGSI_OPCODE_SCS, dst, src0);
+         inst->dst.writemask = scs_mask;
+      }
+
+      done_mask |= this_mask;
+   }
+}
+
+int
+glsl_to_tgsi_visitor::add_constant(gl_register_file file,
+        		     gl_constant_value values[4], int size, int datatype,
+        		     GLuint *swizzle_out)
+{
+   if (file == PROGRAM_CONSTANT) {
+      return _mesa_add_typed_unnamed_constant(this->prog->Parameters, values,
+                                              size, datatype, swizzle_out);
+   } else {
+      int index = 0;
+      immediate_storage *entry;
+      assert(file == PROGRAM_IMMEDIATE);
+
+      /* Search immediate storage to see if we already have an identical
+       * immediate that we can use instead of adding a duplicate entry.
+       */
+      foreach_iter(exec_list_iterator, iter, this->immediates) {
+         entry = (immediate_storage *)iter.get();
+         
+         if (entry->size == size &&
+             entry->type == datatype &&
+             !memcmp(entry->values, values, size * sizeof(gl_constant_value))) {
+             return index;
+         }
+         index++;
+      }
+      
+      /* Add this immediate to the list. */
+      entry = new(mem_ctx) immediate_storage(values, size, datatype);
+      this->immediates.push_tail(entry);
+      this->num_immediates++;
+      return index;
+   }
+}
+
+st_src_reg
+glsl_to_tgsi_visitor::st_src_reg_for_float(float val)
+{
+   st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_FLOAT);
+   union gl_constant_value uval;
+
+   uval.f = val;
+   src.index = add_constant(src.file, &uval, 1, GL_FLOAT, &src.swizzle);
+
+   return src;
+}
+
+st_src_reg
+glsl_to_tgsi_visitor::st_src_reg_for_int(int val)
+{
+   st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_INT);
+   union gl_constant_value uval;
+   
+   assert(native_integers);
+
+   uval.i = val;
+   src.index = add_constant(src.file, &uval, 1, GL_INT, &src.swizzle);
+
+   return src;
+}
+
+st_src_reg
+glsl_to_tgsi_visitor::st_src_reg_for_type(int type, int val)
+{
+   if (native_integers)
+      return type == GLSL_TYPE_FLOAT ? st_src_reg_for_float(val) : 
+                                       st_src_reg_for_int(val);
+   else
+      return st_src_reg_for_float(val);
+}
+
+static int
+type_size(const struct glsl_type *type)
+{
+   unsigned int i;
+   int size;
+
+   switch (type->base_type) {
+   case GLSL_TYPE_UINT:
+   case GLSL_TYPE_INT:
+   case GLSL_TYPE_FLOAT:
+   case GLSL_TYPE_BOOL:
+      if (type->is_matrix()) {
+         return type->matrix_columns;
+      } else {
+         /* Regardless of size of vector, it gets a vec4. This is bad
+          * packing for things like floats, but otherwise arrays become a
+          * mess.  Hopefully a later pass over the code can pack scalars
+          * down if appropriate.
+          */
+         return 1;
+      }
+   case GLSL_TYPE_ARRAY:
+      assert(type->length > 0);
+      return type_size(type->fields.array) * type->length;
+   case GLSL_TYPE_STRUCT:
+      size = 0;
+      for (i = 0; i < type->length; i++) {
+         size += type_size(type->fields.structure[i].type);
+      }
+      return size;
+   case GLSL_TYPE_SAMPLER:
+      /* Samplers take up one slot in UNIFORMS[], but they're baked in
+       * at link time.
+       */
+      return 1;
+   default:
+      assert(0);
+      return 0;
+   }
+}
+
+/**
+ * In the initial pass of codegen, we assign temporary numbers to
+ * intermediate results.  (not SSA -- variable assignments will reuse
+ * storage).
+ */
+st_src_reg
+glsl_to_tgsi_visitor::get_temp(const glsl_type *type)
+{
+   st_src_reg src;
+
+   src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT;
+   src.file = PROGRAM_TEMPORARY;
+   src.index = next_temp;
+   src.reladdr = NULL;
+   next_temp += type_size(type);
+
+   if (type->is_array() || type->is_record()) {
+      src.swizzle = SWIZZLE_NOOP;
+   } else {
+      src.swizzle = swizzle_for_size(type->vector_elements);
+   }
+   src.negate = 0;
+
+   return src;
+}
+
+variable_storage *
+glsl_to_tgsi_visitor::find_variable_storage(ir_variable *var)
+{
+   
+   variable_storage *entry;
+
+   foreach_iter(exec_list_iterator, iter, this->variables) {
+      entry = (variable_storage *)iter.get();
+
+      if (entry->var == var)
+         return entry;
+   }
+
+   return NULL;
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_variable *ir)
+{
+   if (strcmp(ir->name, "gl_FragCoord") == 0) {
+      struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
+
+      fp->OriginUpperLeft = ir->origin_upper_left;
+      fp->PixelCenterInteger = ir->pixel_center_integer;
+
+   } else if (strcmp(ir->name, "gl_FragDepth") == 0) {
+      struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
+      switch (ir->depth_layout) {
+      case ir_depth_layout_none:
+         fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_NONE;
+         break;
+      case ir_depth_layout_any:
+         fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_ANY;
+         break;
+      case ir_depth_layout_greater:
+         fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_GREATER;
+         break;
+      case ir_depth_layout_less:
+         fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_LESS;
+         break;
+      case ir_depth_layout_unchanged:
+         fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_UNCHANGED;
+         break;
+      default:
+         assert(0);
+         break;
+      }
+   }
+
+   if (ir->mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) {
+      unsigned int i;
+      const ir_state_slot *const slots = ir->state_slots;
+      assert(ir->state_slots != NULL);
+
+      /* Check if this statevar's setup in the STATE file exactly
+       * matches how we'll want to reference it as a
+       * struct/array/whatever.  If not, then we need to move it into
+       * temporary storage and hope that it'll get copy-propagated
+       * out.
+       */
+      for (i = 0; i < ir->num_state_slots; i++) {
+         if (slots[i].swizzle != SWIZZLE_XYZW) {
+            break;
+         }
+      }
+
+      variable_storage *storage;
+      st_dst_reg dst;
+      if (i == ir->num_state_slots) {
+         /* We'll set the index later. */
+         storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1);
+         this->variables.push_tail(storage);
+
+         dst = undef_dst;
+      } else {
+         /* The variable_storage constructor allocates slots based on the size
+          * of the type.  However, this had better match the number of state
+          * elements that we're going to copy into the new temporary.
+          */
+         assert((int) ir->num_state_slots == type_size(ir->type));
+
+         storage = new(mem_ctx) variable_storage(ir, PROGRAM_TEMPORARY,
+        					 this->next_temp);
+         this->variables.push_tail(storage);
+         this->next_temp += type_size(ir->type);
+
+         dst = st_dst_reg(st_src_reg(PROGRAM_TEMPORARY, storage->index,
+               native_integers ? ir->type->base_type : GLSL_TYPE_FLOAT));
+      }
+
+
+      for (unsigned int i = 0; i < ir->num_state_slots; i++) {
+         int index = _mesa_add_state_reference(this->prog->Parameters,
+        				       (gl_state_index *)slots[i].tokens);
+
+         if (storage->file == PROGRAM_STATE_VAR) {
+            if (storage->index == -1) {
+               storage->index = index;
+            } else {
+               assert(index == storage->index + (int)i);
+            }
+         } else {
+            st_src_reg src(PROGRAM_STATE_VAR, index,
+                  native_integers ? ir->type->base_type : GLSL_TYPE_FLOAT);
+            src.swizzle = slots[i].swizzle;
+            emit(ir, TGSI_OPCODE_MOV, dst, src);
+            /* even a float takes up a whole vec4 reg in a struct/array. */
+            dst.index++;
+         }
+      }
+
+      if (storage->file == PROGRAM_TEMPORARY &&
+          dst.index != storage->index + (int) ir->num_state_slots) {
+         fail_link(this->shader_program,
+        	   "failed to load builtin uniform `%s'  (%d/%d regs loaded)\n",
+        	   ir->name, dst.index - storage->index,
+        	   type_size(ir->type));
+      }
+   }
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_loop *ir)
+{
+   ir_dereference_variable *counter = NULL;
+
+   if (ir->counter != NULL)
+      counter = new(ir) ir_dereference_variable(ir->counter);
+
+   if (ir->from != NULL) {
+      assert(ir->counter != NULL);
+
+      ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL);
+
+      a->accept(this);
+      delete a;
+   }
+
+   emit(NULL, TGSI_OPCODE_BGNLOOP);
+
+   if (ir->to) {
+      ir_expression *e =
+         new(ir) ir_expression(ir->cmp, glsl_type::bool_type,
+        		       counter, ir->to);
+      ir_if *if_stmt =  new(ir) ir_if(e);
+
+      ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break);
+
+      if_stmt->then_instructions.push_tail(brk);
+
+      if_stmt->accept(this);
+
+      delete if_stmt;
+      delete e;
+      delete brk;
+   }
+
+   visit_exec_list(&ir->body_instructions, this);
+
+   if (ir->increment) {
+      ir_expression *e =
+         new(ir) ir_expression(ir_binop_add, counter->type,
+        		       counter, ir->increment);
+
+      ir_assignment *a = new(ir) ir_assignment(counter, e, NULL);
+
+      a->accept(this);
+      delete a;
+      delete e;
+   }
+
+   emit(NULL, TGSI_OPCODE_ENDLOOP);
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_loop_jump *ir)
+{
+   switch (ir->mode) {
+   case ir_loop_jump::jump_break:
+      emit(NULL, TGSI_OPCODE_BRK);
+      break;
+   case ir_loop_jump::jump_continue:
+      emit(NULL, TGSI_OPCODE_CONT);
+      break;
+   }
+}
+
+
+void
+glsl_to_tgsi_visitor::visit(ir_function_signature *ir)
+{
+   assert(0);
+   (void)ir;
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_function *ir)
+{
+   /* Ignore function bodies other than main() -- we shouldn't see calls to
+    * them since they should all be inlined before we get to glsl_to_tgsi.
+    */
+   if (strcmp(ir->name, "main") == 0) {
+      const ir_function_signature *sig;
+      exec_list empty;
+
+      sig = ir->matching_signature(&empty);
+
+      assert(sig);
+
+      foreach_iter(exec_list_iterator, iter, sig->body) {
+         ir_instruction *ir = (ir_instruction *)iter.get();
+
+         ir->accept(this);
+      }
+   }
+}
+
+bool
+glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
+{
+   int nonmul_operand = 1 - mul_operand;
+   st_src_reg a, b, c;
+   st_dst_reg result_dst;
+
+   ir_expression *expr = ir->operands[mul_operand]->as_expression();
+   if (!expr || expr->operation != ir_binop_mul)
+      return false;
+
+   expr->operands[0]->accept(this);
+   a = this->result;
+   expr->operands[1]->accept(this);
+   b = this->result;
+   ir->operands[nonmul_operand]->accept(this);
+   c = this->result;
+
+   this->result = get_temp(ir->type);
+   result_dst = st_dst_reg(this->result);
+   result_dst.writemask = (1 << ir->type->vector_elements) - 1;
+   emit(ir, TGSI_OPCODE_MAD, result_dst, a, b, c);
+
+   return true;
+}
+
+/**
+ * Emit MAD(a, -b, a) instead of AND(a, NOT(b))
+ *
+ * The logic values are 1.0 for true and 0.0 for false.  Logical-and is
+ * implemented using multiplication, and logical-or is implemented using
+ * addition.  Logical-not can be implemented as (true - x), or (1.0 - x).
+ * As result, the logical expression (a & !b) can be rewritten as:
+ *
+ *     - a * !b
+ *     - a * (1 - b)
+ *     - (a * 1) - (a * b)
+ *     - a + -(a * b)
+ *     - a + (a * -b)
+ *
+ * This final expression can be implemented as a single MAD(a, -b, a)
+ * instruction.
+ */
+bool
+glsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand)
+{
+   const int other_operand = 1 - try_operand;
+   st_src_reg a, b;
+
+   ir_expression *expr = ir->operands[try_operand]->as_expression();
+   if (!expr || expr->operation != ir_unop_logic_not)
+      return false;
+
+   ir->operands[other_operand]->accept(this);
+   a = this->result;
+   expr->operands[0]->accept(this);
+   b = this->result;
+
+   b.negate = ~b.negate;
+
+   this->result = get_temp(ir->type);
+   emit(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, a);
+
+   return true;
+}
+
+bool
+glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir)
+{
+   /* Saturates were only introduced to vertex programs in
+    * NV_vertex_program3, so don't give them to drivers in the VP.
+    */
+   if (this->prog->Target == GL_VERTEX_PROGRAM_ARB)
+      return false;
+
+   ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
+   if (!sat_src)
+      return false;
+
+   sat_src->accept(this);
+   st_src_reg src = this->result;
+
+   /* If we generated an expression instruction into a temporary in
+    * processing the saturate's operand, apply the saturate to that
+    * instruction.  Otherwise, generate a MOV to do the saturate.
+    *
+    * Note that we have to be careful to only do this optimization if
+    * the instruction in question was what generated src->result.  For
+    * example, ir_dereference_array might generate a MUL instruction
+    * to create the reladdr, and return us a src reg using that
+    * reladdr.  That MUL result is not the value we're trying to
+    * saturate.
+    */
+   ir_expression *sat_src_expr = sat_src->as_expression();
+   if (sat_src_expr && (sat_src_expr->operation == ir_binop_mul ||
+			sat_src_expr->operation == ir_binop_add ||
+			sat_src_expr->operation == ir_binop_dot)) {
+      glsl_to_tgsi_instruction *new_inst;
+      new_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
+      new_inst->saturate = true;
+   } else {
+      this->result = get_temp(ir->type);
+      st_dst_reg result_dst = st_dst_reg(this->result);
+      result_dst.writemask = (1 << ir->type->vector_elements) - 1;
+      glsl_to_tgsi_instruction *inst;
+      inst = emit(ir, TGSI_OPCODE_MOV, result_dst, src);
+      inst->saturate = true;
+   }
+
+   return true;
+}
+
+void
+glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir,
+        			    st_src_reg *reg, int *num_reladdr)
+{
+   if (!reg->reladdr)
+      return;
+
+   emit_arl(ir, address_reg, *reg->reladdr);
+
+   if (*num_reladdr != 1) {
+      st_src_reg temp = get_temp(glsl_type::vec4_type);
+
+      emit(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), *reg);
+      *reg = temp;
+   }
+
+   (*num_reladdr)--;
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_expression *ir)
+{
+   unsigned int operand;
+   st_src_reg op[Elements(ir->operands)];
+   st_src_reg result_src;
+   st_dst_reg result_dst;
+
+   /* Quick peephole: Emit MAD(a, b, c) instead of ADD(MUL(a, b), c)
+    */
+   if (ir->operation == ir_binop_add) {
+      if (try_emit_mad(ir, 1))
+         return;
+      if (try_emit_mad(ir, 0))
+         return;
+   }
+
+   /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b))
+    */
+   if (ir->operation == ir_binop_logic_and) {
+      if (try_emit_mad_for_and_not(ir, 1))
+	 return;
+      if (try_emit_mad_for_and_not(ir, 0))
+	 return;
+   }
+
+   if (try_emit_sat(ir))
+      return;
+
+   if (ir->operation == ir_quadop_vector)
+      assert(!"ir_quadop_vector should have been lowered");
+
+   for (operand = 0; operand < ir->get_num_operands(); operand++) {
+      this->result.file = PROGRAM_UNDEFINED;
+      ir->operands[operand]->accept(this);
+      if (this->result.file == PROGRAM_UNDEFINED) {
+         ir_print_visitor v;
+         printf("Failed to get tree for expression operand:\n");
+         ir->operands[operand]->accept(&v);
+         exit(1);
+      }
+      op[operand] = this->result;
+
+      /* Matrix expression operands should have been broken down to vector
+       * operations already.
+       */
+      assert(!ir->operands[operand]->type->is_matrix());
+   }
+
+   int vector_elements = ir->operands[0]->type->vector_elements;
+   if (ir->operands[1]) {
+      vector_elements = MAX2(vector_elements,
+        		     ir->operands[1]->type->vector_elements);
+   }
+
+   this->result.file = PROGRAM_UNDEFINED;
+
+   /* Storage for our result.  Ideally for an assignment we'd be using
+    * the actual storage for the result here, instead.
+    */
+   result_src = get_temp(ir->type);
+   /* convenience for the emit functions below. */
+   result_dst = st_dst_reg(result_src);
+   /* Limit writes to the channels that will be used by result_src later.
+    * This does limit this temp's use as a temporary for multi-instruction
+    * sequences.
+    */
+   result_dst.writemask = (1 << ir->type->vector_elements) - 1;
+
+   switch (ir->operation) {
+   case ir_unop_logic_not:
+      if (result_dst.type != GLSL_TYPE_FLOAT)
+         emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], st_src_reg_for_type(result_dst.type, 0));
+      else {
+         /* Previously 'SEQ dst, src, 0.0' was used for this.  However, many
+          * older GPUs implement SEQ using multiple instructions (i915 uses two
+          * SGE instructions and a MUL instruction).  Since our logic values are
+          * 0.0 and 1.0, 1-x also implements !x.
+          */
+         op[0].negate = ~op[0].negate;
+         emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], st_src_reg_for_float(1.0));
+      }
+      break;
+   case ir_unop_neg:
+      assert(result_dst.type == GLSL_TYPE_FLOAT || result_dst.type == GLSL_TYPE_INT);
+      if (result_dst.type == GLSL_TYPE_INT)
+         emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]);
+      else {
+         op[0].negate = ~op[0].negate;
+         result_src = op[0];
+      }
+      break;
+   case ir_unop_abs:
+      assert(result_dst.type == GLSL_TYPE_FLOAT);
+      emit(ir, TGSI_OPCODE_ABS, result_dst, op[0]);
+      break;
+   case ir_unop_sign:
+      emit(ir, TGSI_OPCODE_SSG, result_dst, op[0]);
+      break;
+   case ir_unop_rcp:
+      emit_scalar(ir, TGSI_OPCODE_RCP, result_dst, op[0]);
+      break;
+
+   case ir_unop_exp2:
+      emit_scalar(ir, TGSI_OPCODE_EX2, result_dst, op[0]);
+      break;
+   case ir_unop_exp:
+   case ir_unop_log:
+      assert(!"not reached: should be handled by ir_explog_to_explog2");
+      break;
+   case ir_unop_log2:
+      emit_scalar(ir, TGSI_OPCODE_LG2, result_dst, op[0]);
+      break;
+   case ir_unop_sin:
+      emit_scalar(ir, TGSI_OPCODE_SIN, result_dst, op[0]);
+      break;
+   case ir_unop_cos:
+      emit_scalar(ir, TGSI_OPCODE_COS, result_dst, op[0]);
+      break;
+   case ir_unop_sin_reduced:
+      emit_scs(ir, TGSI_OPCODE_SIN, result_dst, op[0]);
+      break;
+   case ir_unop_cos_reduced:
+      emit_scs(ir, TGSI_OPCODE_COS, result_dst, op[0]);
+      break;
+
+   case ir_unop_dFdx:
+      emit(ir, TGSI_OPCODE_DDX, result_dst, op[0]);
+      break;
+   case ir_unop_dFdy:
+      op[0].negate = ~op[0].negate;
+      emit(ir, TGSI_OPCODE_DDY, result_dst, op[0]);
+      break;
+
+   case ir_unop_noise: {
+      /* At some point, a motivated person could add a better
+       * implementation of noise.  Currently not even the nvidia
+       * binary drivers do anything more than this.  In any case, the
+       * place to do this is in the GL state tracker, not the poor
+       * driver.
+       */
+      emit(ir, TGSI_OPCODE_MOV, result_dst, st_src_reg_for_float(0.5));
+      break;
+   }
+
+   case ir_binop_add:
+      emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
+      break;
+   case ir_binop_sub:
+      emit(ir, TGSI_OPCODE_SUB, result_dst, op[0], op[1]);
+      break;
+
+   case ir_binop_mul:
+      emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
+      break;
+   case ir_binop_div:
+      if (result_dst.type == GLSL_TYPE_FLOAT)
+         assert(!"not reached: should be handled by ir_div_to_mul_rcp");
+      else
+         emit(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]);
+      break;
+   case ir_binop_mod:
+      if (result_dst.type == GLSL_TYPE_FLOAT)
+         assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
+      else
+         emit(ir, TGSI_OPCODE_MOD, result_dst, op[0], op[1]);
+      break;
+
+   case ir_binop_less:
+      emit(ir, TGSI_OPCODE_SLT, result_dst, op[0], op[1]);
+      break;
+   case ir_binop_greater:
+      emit(ir, TGSI_OPCODE_SGT, result_dst, op[0], op[1]);
+      break;
+   case ir_binop_lequal:
+      emit(ir, TGSI_OPCODE_SLE, result_dst, op[0], op[1]);
+      break;
+   case ir_binop_gequal:
+      emit(ir, TGSI_OPCODE_SGE, result_dst, op[0], op[1]);
+      break;
+   case ir_binop_equal:
+      emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]);
+      break;
+   case ir_binop_nequal:
+      emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
+      break;
+   case ir_binop_all_equal:
+      /* "==" operator producing a scalar boolean. */
+      if (ir->operands[0]->type->is_vector() ||
+          ir->operands[1]->type->is_vector()) {
+         st_src_reg temp = get_temp(native_integers ?
+               glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) :
+               glsl_type::vec4_type);
+         assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
+         emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
+         
+         /* After the dot-product, the value will be an integer on the
+          * range [0,4].  Zero becomes 1.0, and positive values become zero.
+          */
+         emit_dp(ir, result_dst, temp, temp, vector_elements);
+         
+         if (result_dst.type == GLSL_TYPE_FLOAT) {
+            /* Negating the result of the dot-product gives values on the range
+             * [-4, 0].  Zero becomes 1.0, and negative values become zero.
+             * This is achieved using SGE.
+             */
+            st_src_reg sge_src = result_src;
+            sge_src.negate = ~sge_src.negate;
+            emit(ir, TGSI_OPCODE_SGE, result_dst, sge_src, st_src_reg_for_float(0.0));
+         } else {
+            /* The TGSI negate flag doesn't work for integers, so use SEQ 0
+             * instead.
+             */
+            emit(ir, TGSI_OPCODE_SEQ, result_dst, result_src, st_src_reg_for_int(0));
+         }
+      } else {
+         emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]);
+      }
+      break;
+   case ir_binop_any_nequal:
+      /* "!=" operator producing a scalar boolean. */
+      if (ir->operands[0]->type->is_vector() ||
+          ir->operands[1]->type->is_vector()) {
+         st_src_reg temp = get_temp(native_integers ?
+               glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) :
+               glsl_type::vec4_type);
+         assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
+         emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
+
+         /* After the dot-product, the value will be an integer on the
+          * range [0,4].  Zero stays zero, and positive values become 1.0.
+          */
+         glsl_to_tgsi_instruction *const dp =
+               emit_dp(ir, result_dst, temp, temp, vector_elements);
+         if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
+             result_dst.type == GLSL_TYPE_FLOAT) {
+            /* The clamping to [0,1] can be done for free in the fragment
+             * shader with a saturate.
+             */
+            dp->saturate = true;
+         } else if (result_dst.type == GLSL_TYPE_FLOAT) {
+            /* Negating the result of the dot-product gives values on the range
+             * [-4, 0].  Zero stays zero, and negative values become 1.0.  This
+             * achieved using SLT.
+             */
+            st_src_reg slt_src = result_src;
+            slt_src.negate = ~slt_src.negate;
+            emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
+         } else {
+            emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0));
+         }
+      } else {
+         emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
+      }
+      break;
+
+   case ir_unop_any: {
+      assert(ir->operands[0]->type->is_vector());
+
+      /* After the dot-product, the value will be an integer on the
+       * range [0,4].  Zero stays zero, and positive values become 1.0.
+       */
+      glsl_to_tgsi_instruction *const dp =
+         emit_dp(ir, result_dst, op[0], op[0],
+                 ir->operands[0]->type->vector_elements);
+      if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
+          result_dst.type == GLSL_TYPE_FLOAT) {
+	      /* The clamping to [0,1] can be done for free in the fragment
+	       * shader with a saturate.
+	       */
+	      dp->saturate = true;
+      } else if (result_dst.type == GLSL_TYPE_FLOAT) {
+	      /* Negating the result of the dot-product gives values on the range
+	       * [-4, 0].  Zero stays zero, and negative values become 1.0.  This
+	       * is achieved using SLT.
+	       */
+	      st_src_reg slt_src = result_src;
+	      slt_src.negate = ~slt_src.negate;
+	      emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
+      }
+      else {
+         /* Use SNE 0 if integers are being used as boolean values. */
+         emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0));
+      }
+      break;
+   }
+
+   case ir_binop_logic_xor:
+      emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
+      break;
+
+   case ir_binop_logic_or: {
+      /* After the addition, the value will be an integer on the
+       * range [0,2].  Zero stays zero, and positive values become 1.0.
+       */
+      glsl_to_tgsi_instruction *add =
+         emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
+      if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
+          result_dst.type == GLSL_TYPE_FLOAT) {
+         /* The clamping to [0,1] can be done for free in the fragment
+          * shader with a saturate if floats are being used as boolean values.
+          */
+         add->saturate = true;
+      } else if (result_dst.type == GLSL_TYPE_FLOAT) {
+         /* Negating the result of the addition gives values on the range
+          * [-2, 0].  Zero stays zero, and negative values become 1.0.  This
+          * is achieved using SLT.
+          */
+         st_src_reg slt_src = result_src;
+         slt_src.negate = ~slt_src.negate;
+         emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
+      } else {
+         /* Use an SNE on the result of the addition.  Zero stays zero,
+          * 1 stays 1, and 2 becomes 1.
+          */
+         emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0));
+      }
+      break;
+   }
+
+   case ir_binop_logic_and:
+      /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */
+      emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
+      break;
+
+   case ir_binop_dot:
+      assert(ir->operands[0]->type->is_vector());
+      assert(ir->operands[0]->type == ir->operands[1]->type);
+      emit_dp(ir, result_dst, op[0], op[1],
+              ir->operands[0]->type->vector_elements);
+      break;
+
+   case ir_unop_sqrt:
+      /* sqrt(x) = x * rsq(x). */
+      emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]);
+      emit(ir, TGSI_OPCODE_MUL, result_dst, result_src, op[0]);
+      /* For incoming channels <= 0, set the result to 0. */
+      op[0].negate = ~op[0].negate;
+      emit(ir, TGSI_OPCODE_CMP, result_dst,
+        		  op[0], result_src, st_src_reg_for_float(0.0));
+      break;
+   case ir_unop_rsq:
+      emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]);
+      break;
+   case ir_unop_i2f:
+   case ir_unop_b2f:
+      if (native_integers) {
+         emit(ir, TGSI_OPCODE_I2F, result_dst, op[0]);
+         break;
+      }
+   case ir_unop_i2u:
+   case ir_unop_u2i:
+      /* Converting between signed and unsigned integers is a no-op. */
+   case ir_unop_b2i:
+      /* Booleans are stored as integers (or floats in GLSL 1.20 and lower). */
+      result_src = op[0];
+      break;
+   case ir_unop_f2i:
+      if (native_integers)
+         emit(ir, TGSI_OPCODE_F2I, result_dst, op[0]);
+      else
+         emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
+      break;
+   case ir_unop_f2b:
+   case ir_unop_i2b:
+      emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], 
+            st_src_reg_for_type(result_dst.type, 0));
+      break;
+   case ir_unop_trunc:
+      emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
+      break;
+   case ir_unop_ceil:
+      op[0].negate = ~op[0].negate;
+      emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]);
+      result_src.negate = ~result_src.negate;
+      break;
+   case ir_unop_floor:
+      emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]);
+      break;
+   case ir_unop_fract:
+      emit(ir, TGSI_OPCODE_FRC, result_dst, op[0]);
+      break;
+
+   case ir_binop_min:
+      emit(ir, TGSI_OPCODE_MIN, result_dst, op[0], op[1]);
+      break;
+   case ir_binop_max:
+      emit(ir, TGSI_OPCODE_MAX, result_dst, op[0], op[1]);
+      break;
+   case ir_binop_pow:
+      emit_scalar(ir, TGSI_OPCODE_POW, result_dst, op[0], op[1]);
+      break;
+
+   case ir_unop_bit_not:
+      if (glsl_version >= 130) {
+         emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]);
+         break;
+      }
+   case ir_unop_u2f:
+      if (native_integers) {
+         emit(ir, TGSI_OPCODE_U2F, result_dst, op[0]);
+         break;
+      }
+   case ir_binop_lshift:
+      if (glsl_version >= 130) {
+         emit(ir, TGSI_OPCODE_SHL, result_dst, op[0]);
+         break;
+      }
+   case ir_binop_rshift:
+      if (glsl_version >= 130) {
+         emit(ir, TGSI_OPCODE_ISHR, result_dst, op[0]);
+         break;
+      }
+   case ir_binop_bit_and:
+      if (glsl_version >= 130) {
+         emit(ir, TGSI_OPCODE_AND, result_dst, op[0]);
+         break;
+      }
+   case ir_binop_bit_xor:
+      if (glsl_version >= 130) {
+         emit(ir, TGSI_OPCODE_XOR, result_dst, op[0]);
+         break;
+      }
+   case ir_binop_bit_or:
+      if (glsl_version >= 130) {
+         emit(ir, TGSI_OPCODE_OR, result_dst, op[0]);
+         break;
+      }
+   case ir_unop_round_even:
+      assert(!"GLSL 1.30 features unsupported");
+      break;
+
+   case ir_quadop_vector:
+      /* This operation should have already been handled.
+       */
+      assert(!"Should not get here.");
+      break;
+   }
+
+   this->result = result_src;
+}
+
+
+void
+glsl_to_tgsi_visitor::visit(ir_swizzle *ir)
+{
+   st_src_reg src;
+   int i;
+   int swizzle[4];
+
+   /* Note that this is only swizzles in expressions, not those on the left
+    * hand side of an assignment, which do write masking.  See ir_assignment
+    * for that.
+    */
+
+   ir->val->accept(this);
+   src = this->result;
+   assert(src.file != PROGRAM_UNDEFINED);
+
+   for (i = 0; i < 4; i++) {
+      if (i < ir->type->vector_elements) {
+         switch (i) {
+         case 0:
+            swizzle[i] = GET_SWZ(src.swizzle, ir->mask.x);
+            break;
+         case 1:
+            swizzle[i] = GET_SWZ(src.swizzle, ir->mask.y);
+            break;
+         case 2:
+            swizzle[i] = GET_SWZ(src.swizzle, ir->mask.z);
+            break;
+         case 3:
+            swizzle[i] = GET_SWZ(src.swizzle, ir->mask.w);
+            break;
+         }
+      } else {
+         /* If the type is smaller than a vec4, replicate the last
+          * channel out.
+          */
+         swizzle[i] = swizzle[ir->type->vector_elements - 1];
+      }
+   }
+
+   src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
+
+   this->result = src;
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir)
+{
+   variable_storage *entry = find_variable_storage(ir->var);
+   ir_variable *var = ir->var;
+
+   if (!entry) {
+      switch (var->mode) {
+      case ir_var_uniform:
+         entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM,
+        				       var->location);
+         this->variables.push_tail(entry);
+         break;
+      case ir_var_in:
+      case ir_var_inout:
+         /* The linker assigns locations for varyings and attributes,
+          * including deprecated builtins (like gl_Color), user-assign
+          * generic attributes (glBindVertexLocation), and
+          * user-defined varyings.
+          *
+          * FINISHME: We would hit this path for function arguments.  Fix!
+          */
+         assert(var->location != -1);
+         entry = new(mem_ctx) variable_storage(var,
+                                               PROGRAM_INPUT,
+                                               var->location);
+         if (this->prog->Target == GL_VERTEX_PROGRAM_ARB &&
+             var->location >= VERT_ATTRIB_GENERIC0) {
+            _mesa_add_attribute(this->prog->Attributes,
+                                var->name,
+                                _mesa_sizeof_glsl_type(var->type->gl_type),
+                                var->type->gl_type,
+                                var->location - VERT_ATTRIB_GENERIC0);
+         }
+         break;
+      case ir_var_out:
+         assert(var->location != -1);
+         entry = new(mem_ctx) variable_storage(var,
+                                               PROGRAM_OUTPUT,
+                                               var->location);
+         break;
+      case ir_var_system_value:
+         entry = new(mem_ctx) variable_storage(var,
+                                               PROGRAM_SYSTEM_VALUE,
+                                               var->location);
+         break;
+      case ir_var_auto:
+      case ir_var_temporary:
+         entry = new(mem_ctx) variable_storage(var, PROGRAM_TEMPORARY,
+        				       this->next_temp);
+         this->variables.push_tail(entry);
+
+         next_temp += type_size(var->type);
+         break;
+      }
+
+      if (!entry) {
+         printf("Failed to make storage for %s\n", var->name);
+         exit(1);
+      }
+   }
+
+   this->result = st_src_reg(entry->file, entry->index, var->type);
+   if (!native_integers)
+      this->result.type = GLSL_TYPE_FLOAT;
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_dereference_array *ir)
+{
+   ir_constant *index;
+   st_src_reg src;
+   int element_size = type_size(ir->type);
+
+   index = ir->array_index->constant_expression_value();
+
+   ir->array->accept(this);
+   src = this->result;
+
+   if (index) {
+      src.index += index->value.i[0] * element_size;
+   } else {
+      /* Variable index array dereference.  It eats the "vec4" of the
+       * base of the array and an index that offsets the TGSI register
+       * index.
+       */
+      ir->array_index->accept(this);
+
+      st_src_reg index_reg;
+
+      if (element_size == 1) {
+         index_reg = this->result;
+      } else {
+         index_reg = get_temp(glsl_type::float_type);
+
+         emit(ir, TGSI_OPCODE_MUL, st_dst_reg(index_reg),
+              this->result, st_src_reg_for_float(element_size));
+      }
+
+      /* If there was already a relative address register involved, add the
+       * new and the old together to get the new offset.
+       */
+      if (src.reladdr != NULL) {
+         st_src_reg accum_reg = get_temp(glsl_type::float_type);
+
+         emit(ir, TGSI_OPCODE_ADD, st_dst_reg(accum_reg),
+              index_reg, *src.reladdr);
+
+         index_reg = accum_reg;
+      }
+
+      src.reladdr = ralloc(mem_ctx, st_src_reg);
+      memcpy(src.reladdr, &index_reg, sizeof(index_reg));
+   }
+
+   /* If the type is smaller than a vec4, replicate the last channel out. */
+   if (ir->type->is_scalar() || ir->type->is_vector())
+      src.swizzle = swizzle_for_size(ir->type->vector_elements);
+   else
+      src.swizzle = SWIZZLE_NOOP;
+
+   this->result = src;
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_dereference_record *ir)
+{
+   unsigned int i;
+   const glsl_type *struct_type = ir->record->type;
+   int offset = 0;
+
+   ir->record->accept(this);
+
+   for (i = 0; i < struct_type->length; i++) {
+      if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
+         break;
+      offset += type_size(struct_type->fields.structure[i].type);
+   }
+
+   /* If the type is smaller than a vec4, replicate the last channel out. */
+   if (ir->type->is_scalar() || ir->type->is_vector())
+      this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
+   else
+      this->result.swizzle = SWIZZLE_NOOP;
+
+   this->result.index += offset;
+}
+
+/**
+ * We want to be careful in assignment setup to hit the actual storage
+ * instead of potentially using a temporary like we might with the
+ * ir_dereference handler.
+ */
+static st_dst_reg
+get_assignment_lhs(ir_dereference *ir, glsl_to_tgsi_visitor *v)
+{
+   /* The LHS must be a dereference.  If the LHS is a variable indexed array
+    * access of a vector, it must be separated into a series conditional moves
+    * before reaching this point (see ir_vec_index_to_cond_assign).
+    */
+   assert(ir->as_dereference());
+   ir_dereference_array *deref_array = ir->as_dereference_array();
+   if (deref_array) {
+      assert(!deref_array->array->type->is_vector());
+   }
+
+   /* Use the rvalue deref handler for the most part.  We'll ignore
+    * swizzles in it and write swizzles using writemask, though.
+    */
+   ir->accept(v);
+   return st_dst_reg(v->result);
+}
+
+/**
+ * Process the condition of a conditional assignment
+ *
+ * Examines the condition of a conditional assignment to generate the optimal
+ * first operand of a \c CMP instruction.  If the condition is a relational
+ * operator with 0 (e.g., \c ir_binop_less), the value being compared will be
+ * used as the source for the \c CMP instruction.  Otherwise the comparison
+ * is processed to a boolean result, and the boolean result is used as the
+ * operand to the CMP instruction.
+ */
+bool
+glsl_to_tgsi_visitor::process_move_condition(ir_rvalue *ir)
+{
+   ir_rvalue *src_ir = ir;
+   bool negate = true;
+   bool switch_order = false;
+
+   ir_expression *const expr = ir->as_expression();
+   if ((expr != NULL) && (expr->get_num_operands() == 2)) {
+      bool zero_on_left = false;
+
+      if (expr->operands[0]->is_zero()) {
+         src_ir = expr->operands[1];
+         zero_on_left = true;
+      } else if (expr->operands[1]->is_zero()) {
+         src_ir = expr->operands[0];
+         zero_on_left = false;
+      }
+
+      /*      a is -  0  +            -  0  +
+       * (a <  0)  T  F  F  ( a < 0)  T  F  F
+       * (0 <  a)  F  F  T  (-a < 0)  F  F  T
+       * (a <= 0)  T  T  F  (-a < 0)  F  F  T  (swap order of other operands)
+       * (0 <= a)  F  T  T  ( a < 0)  T  F  F  (swap order of other operands)
+       * (a >  0)  F  F  T  (-a < 0)  F  F  T
+       * (0 >  a)  T  F  F  ( a < 0)  T  F  F
+       * (a >= 0)  F  T  T  ( a < 0)  T  F  F  (swap order of other operands)
+       * (0 >= a)  T  T  F  (-a < 0)  F  F  T  (swap order of other operands)
+       *
+       * Note that exchanging the order of 0 and 'a' in the comparison simply
+       * means that the value of 'a' should be negated.
+       */
+      if (src_ir != ir) {
+         switch (expr->operation) {
+         case ir_binop_less:
+            switch_order = false;
+            negate = zero_on_left;
+            break;
+
+         case ir_binop_greater:
+            switch_order = false;
+            negate = !zero_on_left;
+            break;
+
+         case ir_binop_lequal:
+            switch_order = true;
+            negate = !zero_on_left;
+            break;
+
+         case ir_binop_gequal:
+            switch_order = true;
+            negate = zero_on_left;
+            break;
+
+         default:
+            /* This isn't the right kind of comparison afterall, so make sure
+             * the whole condition is visited.
+             */
+            src_ir = ir;
+            break;
+         }
+      }
+   }
+
+   src_ir->accept(this);
+
+   /* We use the TGSI_OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the
+    * condition we produced is 0.0 or 1.0.  By flipping the sign, we can
+    * choose which value TGSI_OPCODE_CMP produces without an extra instruction
+    * computing the condition.
+    */
+   if (negate)
+      this->result.negate = ~this->result.negate;
+
+   return switch_order;
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_assignment *ir)
+{
+   st_dst_reg l;
+   st_src_reg r;
+   int i;
+
+   ir->rhs->accept(this);
+   r = this->result;
+
+   l = get_assignment_lhs(ir->lhs, this);
+
+   /* FINISHME: This should really set to the correct maximal writemask for each
+    * FINISHME: component written (in the loops below).  This case can only
+    * FINISHME: occur for matrices, arrays, and structures.
+    */
+   if (ir->write_mask == 0) {
+      assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());
+      l.writemask = WRITEMASK_XYZW;
+   } else if (ir->lhs->type->is_scalar() &&
+              ir->lhs->variable_referenced()->mode == ir_var_out) {
+      /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the
+       * FINISHME: W component of fragment shader output zero, work correctly.
+       */
+      l.writemask = WRITEMASK_XYZW;
+   } else {
+      int swizzles[4];
+      int first_enabled_chan = 0;
+      int rhs_chan = 0;
+
+      l.writemask = ir->write_mask;
+
+      for (int i = 0; i < 4; i++) {
+         if (l.writemask & (1 << i)) {
+            first_enabled_chan = GET_SWZ(r.swizzle, i);
+            break;
+         }
+      }
+
+      /* Swizzle a small RHS vector into the channels being written.
+       *
+       * glsl ir treats write_mask as dictating how many channels are
+       * present on the RHS while TGSI treats write_mask as just
+       * showing which channels of the vec4 RHS get written.
+       */
+      for (int i = 0; i < 4; i++) {
+         if (l.writemask & (1 << i))
+            swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++);
+         else
+            swizzles[i] = first_enabled_chan;
+      }
+      r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1],
+        			swizzles[2], swizzles[3]);
+   }
+
+   assert(l.file != PROGRAM_UNDEFINED);
+   assert(r.file != PROGRAM_UNDEFINED);
+
+   if (ir->condition) {
+      const bool switch_order = this->process_move_condition(ir->condition);
+      st_src_reg condition = this->result;
+
+      for (i = 0; i < type_size(ir->lhs->type); i++) {
+         st_src_reg l_src = st_src_reg(l);
+         l_src.swizzle = swizzle_for_size(ir->lhs->type->vector_elements);
+         
+         if (switch_order) {
+            emit(ir, TGSI_OPCODE_CMP, l, condition, l_src, r);
+         } else {
+            emit(ir, TGSI_OPCODE_CMP, l, condition, r, l_src);
+         }
+
+         l.index++;
+         r.index++;
+      }
+   } else if (ir->rhs->as_expression() &&
+              this->instructions.get_tail() &&
+              ir->rhs == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->ir &&
+              type_size(ir->lhs->type) == 1 &&
+              l.writemask == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->dst.writemask) {
+      /* To avoid emitting an extra MOV when assigning an expression to a 
+       * variable, emit the last instruction of the expression again, but
+       * replace the destination register with the target of the assignment.
+       * Dead code elimination will remove the original instruction.
+       */
+      glsl_to_tgsi_instruction *inst, *new_inst;
+      inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
+      new_inst = emit(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2]);
+      new_inst->saturate = inst->saturate;
+   } else {
+      for (i = 0; i < type_size(ir->lhs->type); i++) {
+         emit(ir, TGSI_OPCODE_MOV, l, r);
+         l.index++;
+         r.index++;
+      }
+   }
+}
+
+
+void
+glsl_to_tgsi_visitor::visit(ir_constant *ir)
+{
+   st_src_reg src;
+   GLfloat stack_vals[4] = { 0 };
+   gl_constant_value *values = (gl_constant_value *) stack_vals;
+   GLenum gl_type = GL_NONE;
+   unsigned int i;
+   static int in_array = 0;
+   gl_register_file file = in_array ? PROGRAM_CONSTANT : PROGRAM_IMMEDIATE;
+
+   /* Unfortunately, 4 floats is all we can get into
+    * _mesa_add_typed_unnamed_constant.  So, make a temp to store an
+    * aggregate constant and move each constant value into it.  If we
+    * get lucky, copy propagation will eliminate the extra moves.
+    */
+   if (ir->type->base_type == GLSL_TYPE_STRUCT) {
+      st_src_reg temp_base = get_temp(ir->type);
+      st_dst_reg temp = st_dst_reg(temp_base);
+
+      foreach_iter(exec_list_iterator, iter, ir->components) {
+         ir_constant *field_value = (ir_constant *)iter.get();
+         int size = type_size(field_value->type);
+
+         assert(size > 0);
+
+         field_value->accept(this);
+         src = this->result;
+
+         for (i = 0; i < (unsigned int)size; i++) {
+            emit(ir, TGSI_OPCODE_MOV, temp, src);
+
+            src.index++;
+            temp.index++;
+         }
+      }
+      this->result = temp_base;
+      return;
+   }
+
+   if (ir->type->is_array()) {
+      st_src_reg temp_base = get_temp(ir->type);
+      st_dst_reg temp = st_dst_reg(temp_base);
+      int size = type_size(ir->type->fields.array);
+
+      assert(size > 0);
+      in_array++;
+
+      for (i = 0; i < ir->type->length; i++) {
+         ir->array_elements[i]->accept(this);
+         src = this->result;
+         for (int j = 0; j < size; j++) {
+            emit(ir, TGSI_OPCODE_MOV, temp, src);
+
+            src.index++;
+            temp.index++;
+         }
+      }
+      this->result = temp_base;
+      in_array--;
+      return;
+   }
+
+   if (ir->type->is_matrix()) {
+      st_src_reg mat = get_temp(ir->type);
+      st_dst_reg mat_column = st_dst_reg(mat);
+
+      for (i = 0; i < ir->type->matrix_columns; i++) {
+         assert(ir->type->base_type == GLSL_TYPE_FLOAT);
+         values = (gl_constant_value *) &ir->value.f[i * ir->type->vector_elements];
+
+         src = st_src_reg(file, -1, ir->type->base_type);
+         src.index = add_constant(file,
+                                  values,
+                                  ir->type->vector_elements,
+                                  GL_FLOAT,
+                                  &src.swizzle);
+         emit(ir, TGSI_OPCODE_MOV, mat_column, src);
+
+         mat_column.index++;
+      }
+
+      this->result = mat;
+      return;
+   }
+
+   switch (ir->type->base_type) {
+   case GLSL_TYPE_FLOAT:
+      gl_type = GL_FLOAT;
+      for (i = 0; i < ir->type->vector_elements; i++) {
+         values[i].f = ir->value.f[i];
+      }
+      break;
+   case GLSL_TYPE_UINT:
+      gl_type = native_integers ? GL_UNSIGNED_INT : GL_FLOAT;
+      for (i = 0; i < ir->type->vector_elements; i++) {
+         if (native_integers)
+            values[i].u = ir->value.u[i];
+         else
+            values[i].f = ir->value.u[i];
+      }
+      break;
+   case GLSL_TYPE_INT:
+      gl_type = native_integers ? GL_INT : GL_FLOAT;
+      for (i = 0; i < ir->type->vector_elements; i++) {
+         if (native_integers)
+            values[i].i = ir->value.i[i];
+         else
+            values[i].f = ir->value.i[i];
+      }
+      break;
+   case GLSL_TYPE_BOOL:
+      gl_type = native_integers ? GL_BOOL : GL_FLOAT;
+      for (i = 0; i < ir->type->vector_elements; i++) {
+         if (native_integers)
+            values[i].b = ir->value.b[i];
+         else
+            values[i].f = ir->value.b[i];
+      }
+      break;
+   default:
+      assert(!"Non-float/uint/int/bool constant");
+   }
+
+   this->result = st_src_reg(file, -1, ir->type);
+   this->result.index = add_constant(file,
+                                     values,
+                                     ir->type->vector_elements,
+                                     gl_type,
+                                     &this->result.swizzle);
+}
+
+function_entry *
+glsl_to_tgsi_visitor::get_function_signature(ir_function_signature *sig)
+{
+   function_entry *entry;
+
+   foreach_iter(exec_list_iterator, iter, this->function_signatures) {
+      entry = (function_entry *)iter.get();
+
+      if (entry->sig == sig)
+         return entry;
+   }
+
+   entry = ralloc(mem_ctx, function_entry);
+   entry->sig = sig;
+   entry->sig_id = this->next_signature_id++;
+   entry->bgn_inst = NULL;
+
+   /* Allocate storage for all the parameters. */
+   foreach_iter(exec_list_iterator, iter, sig->parameters) {
+      ir_variable *param = (ir_variable *)iter.get();
+      variable_storage *storage;
+
+      storage = find_variable_storage(param);
+      assert(!storage);
+
+      storage = new(mem_ctx) variable_storage(param, PROGRAM_TEMPORARY,
+        				      this->next_temp);
+      this->variables.push_tail(storage);
+
+      this->next_temp += type_size(param->type);
+   }
+
+   if (!sig->return_type->is_void()) {
+      entry->return_reg = get_temp(sig->return_type);
+   } else {
+      entry->return_reg = undef_src;
+   }
+
+   this->function_signatures.push_tail(entry);
+   return entry;
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_call *ir)
+{
+   glsl_to_tgsi_instruction *call_inst;
+   ir_function_signature *sig = ir->get_callee();
+   function_entry *entry = get_function_signature(sig);
+   int i;
+
+   /* Process in parameters. */
+   exec_list_iterator sig_iter = sig->parameters.iterator();
+   foreach_iter(exec_list_iterator, iter, *ir) {
+      ir_rvalue *param_rval = (ir_rvalue *)iter.get();
+      ir_variable *param = (ir_variable *)sig_iter.get();
+
+      if (param->mode == ir_var_in ||
+          param->mode == ir_var_inout) {
+         variable_storage *storage = find_variable_storage(param);
+         assert(storage);
+
+         param_rval->accept(this);
+         st_src_reg r = this->result;
+
+         st_dst_reg l;
+         l.file = storage->file;
+         l.index = storage->index;
+         l.reladdr = NULL;
+         l.writemask = WRITEMASK_XYZW;
+         l.cond_mask = COND_TR;
+
+         for (i = 0; i < type_size(param->type); i++) {
+            emit(ir, TGSI_OPCODE_MOV, l, r);
+            l.index++;
+            r.index++;
+         }
+      }
+
+      sig_iter.next();
+   }
+   assert(!sig_iter.has_next());
+
+   /* Emit call instruction */
+   call_inst = emit(ir, TGSI_OPCODE_CAL);
+   call_inst->function = entry;
+
+   /* Process out parameters. */
+   sig_iter = sig->parameters.iterator();
+   foreach_iter(exec_list_iterator, iter, *ir) {
+      ir_rvalue *param_rval = (ir_rvalue *)iter.get();
+      ir_variable *param = (ir_variable *)sig_iter.get();
+
+      if (param->mode == ir_var_out ||
+          param->mode == ir_var_inout) {
+         variable_storage *storage = find_variable_storage(param);
+         assert(storage);
+
+         st_src_reg r;
+         r.file = storage->file;
+         r.index = storage->index;
+         r.reladdr = NULL;
+         r.swizzle = SWIZZLE_NOOP;
+         r.negate = 0;
+
+         param_rval->accept(this);
+         st_dst_reg l = st_dst_reg(this->result);
+
+         for (i = 0; i < type_size(param->type); i++) {
+            emit(ir, TGSI_OPCODE_MOV, l, r);
+            l.index++;
+            r.index++;
+         }
+      }
+
+      sig_iter.next();
+   }
+   assert(!sig_iter.has_next());
+
+   /* Process return value. */
+   this->result = entry->return_reg;
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_texture *ir)
+{
+   st_src_reg result_src, coord, lod_info, projector, dx, dy;
+   st_dst_reg result_dst, coord_dst;
+   glsl_to_tgsi_instruction *inst = NULL;
+   unsigned opcode = TGSI_OPCODE_NOP;
+
+   if (ir->coordinate) {
+      ir->coordinate->accept(this);
+
+      /* Put our coords in a temp.  We'll need to modify them for shadow,
+       * projection, or LOD, so the only case we'd use it as is is if
+       * we're doing plain old texturing.  The optimization passes on
+       * glsl_to_tgsi_visitor should handle cleaning up our mess in that case.
+       */
+      coord = get_temp(glsl_type::vec4_type);
+      coord_dst = st_dst_reg(coord);
+      emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
+   }
+
+   if (ir->projector) {
+      ir->projector->accept(this);
+      projector = this->result;
+   }
+
+   /* Storage for our result.  Ideally for an assignment we'd be using
+    * the actual storage for the result here, instead.
+    */
+   result_src = get_temp(glsl_type::vec4_type);
+   result_dst = st_dst_reg(result_src);
+
+   switch (ir->op) {
+   case ir_tex:
+      opcode = TGSI_OPCODE_TEX;
+      break;
+   case ir_txb:
+      opcode = TGSI_OPCODE_TXB;
+      ir->lod_info.bias->accept(this);
+      lod_info = this->result;
+      break;
+   case ir_txl:
+      opcode = TGSI_OPCODE_TXL;
+      ir->lod_info.lod->accept(this);
+      lod_info = this->result;
+      break;
+   case ir_txd:
+      opcode = TGSI_OPCODE_TXD;
+      ir->lod_info.grad.dPdx->accept(this);
+      dx = this->result;
+      ir->lod_info.grad.dPdy->accept(this);
+      dy = this->result;
+      break;
+   case ir_txs:
+      opcode = TGSI_OPCODE_TXQ;
+      ir->lod_info.lod->accept(this);
+      lod_info = this->result;
+      break;
+   case ir_txf:
+      opcode = TGSI_OPCODE_TXF;
+      ir->lod_info.lod->accept(this);
+      lod_info = this->result;
+      break;
+   }
+
+   if (ir->projector) {
+      if (opcode == TGSI_OPCODE_TEX) {
+         /* Slot the projector in as the last component of the coord. */
+         coord_dst.writemask = WRITEMASK_W;
+         emit(ir, TGSI_OPCODE_MOV, coord_dst, projector);
+         coord_dst.writemask = WRITEMASK_XYZW;
+         opcode = TGSI_OPCODE_TXP;
+      } else {
+         st_src_reg coord_w = coord;
+         coord_w.swizzle = SWIZZLE_WWWW;
+
+         /* For the other TEX opcodes there's no projective version
+          * since the last slot is taken up by LOD info.  Do the
+          * projective divide now.
+          */
+         coord_dst.writemask = WRITEMASK_W;
+         emit(ir, TGSI_OPCODE_RCP, coord_dst, projector);
+
+         /* In the case where we have to project the coordinates "by hand,"
+          * the shadow comparator value must also be projected.
+          */
+         st_src_reg tmp_src = coord;
+         if (ir->shadow_comparitor) {
+            /* Slot the shadow value in as the second to last component of the
+             * coord.
+             */
+            ir->shadow_comparitor->accept(this);
+
+            tmp_src = get_temp(glsl_type::vec4_type);
+            st_dst_reg tmp_dst = st_dst_reg(tmp_src);
+
+            tmp_dst.writemask = WRITEMASK_Z;
+            emit(ir, TGSI_OPCODE_MOV, tmp_dst, this->result);
+
+            tmp_dst.writemask = WRITEMASK_XY;
+            emit(ir, TGSI_OPCODE_MOV, tmp_dst, coord);
+         }
+
+         coord_dst.writemask = WRITEMASK_XYZ;
+         emit(ir, TGSI_OPCODE_MUL, coord_dst, tmp_src, coord_w);
+
+         coord_dst.writemask = WRITEMASK_XYZW;
+         coord.swizzle = SWIZZLE_XYZW;
+      }
+   }
+
+   /* If projection is done and the opcode is not TGSI_OPCODE_TXP, then the shadow
+    * comparator was put in the correct place (and projected) by the code,
+    * above, that handles by-hand projection.
+    */
+   if (ir->shadow_comparitor && (!ir->projector || opcode == TGSI_OPCODE_TXP)) {
+      /* Slot the shadow value in as the second to last component of the
+       * coord.
+       */
+      ir->shadow_comparitor->accept(this);
+      coord_dst.writemask = WRITEMASK_Z;
+      emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
+      coord_dst.writemask = WRITEMASK_XYZW;
+   }
+
+   if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB ||
+       opcode == TGSI_OPCODE_TXF) {
+      /* TGSI stores LOD or LOD bias in the last channel of the coords. */
+      coord_dst.writemask = WRITEMASK_W;
+      emit(ir, TGSI_OPCODE_MOV, coord_dst, lod_info);
+      coord_dst.writemask = WRITEMASK_XYZW;
+   }
+
+   if (opcode == TGSI_OPCODE_TXD)
+      inst = emit(ir, opcode, result_dst, coord, dx, dy);
+   else if (opcode == TGSI_OPCODE_TXQ)
+      inst = emit(ir, opcode, result_dst, lod_info);
+   else
+      inst = emit(ir, opcode, result_dst, coord);
+
+   if (ir->shadow_comparitor)
+      inst->tex_shadow = GL_TRUE;
+
+   inst->sampler = _mesa_get_sampler_uniform_value(ir->sampler,
+        					   this->shader_program,
+        					   this->prog);
+
+   const glsl_type *sampler_type = ir->sampler->type;
+
+   switch (sampler_type->sampler_dimensionality) {
+   case GLSL_SAMPLER_DIM_1D:
+      inst->tex_target = (sampler_type->sampler_array)
+         ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX;
+      break;
+   case GLSL_SAMPLER_DIM_2D:
+      inst->tex_target = (sampler_type->sampler_array)
+         ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX;
+      break;
+   case GLSL_SAMPLER_DIM_3D:
+      inst->tex_target = TEXTURE_3D_INDEX;
+      break;
+   case GLSL_SAMPLER_DIM_CUBE:
+      inst->tex_target = TEXTURE_CUBE_INDEX;
+      break;
+   case GLSL_SAMPLER_DIM_RECT:
+      inst->tex_target = TEXTURE_RECT_INDEX;
+      break;
+   case GLSL_SAMPLER_DIM_BUF:
+      assert(!"FINISHME: Implement ARB_texture_buffer_object");
+      break;
+   default:
+      assert(!"Should not get here.");
+   }
+
+   this->result = result_src;
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_return *ir)
+{
+   if (ir->get_value()) {
+      st_dst_reg l;
+      int i;
+
+      assert(current_function);
+
+      ir->get_value()->accept(this);
+      st_src_reg r = this->result;
+
+      l = st_dst_reg(current_function->return_reg);
+
+      for (i = 0; i < type_size(current_function->sig->return_type); i++) {
+         emit(ir, TGSI_OPCODE_MOV, l, r);
+         l.index++;
+         r.index++;
+      }
+   }
+
+   emit(ir, TGSI_OPCODE_RET);
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_discard *ir)
+{
+   struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
+
+   if (ir->condition) {
+      ir->condition->accept(this);
+      this->result.negate = ~this->result.negate;
+      emit(ir, TGSI_OPCODE_KIL, undef_dst, this->result);
+   } else {
+      emit(ir, TGSI_OPCODE_KILP);
+   }
+
+   fp->UsesKill = GL_TRUE;
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_if *ir)
+{
+   glsl_to_tgsi_instruction *cond_inst, *if_inst;
+   glsl_to_tgsi_instruction *prev_inst;
+
+   prev_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
+
+   ir->condition->accept(this);
+   assert(this->result.file != PROGRAM_UNDEFINED);
+
+   if (this->options->EmitCondCodes) {
+      cond_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
+
+      /* See if we actually generated any instruction for generating
+       * the condition.  If not, then cook up a move to a temp so we
+       * have something to set cond_update on.
+       */
+      if (cond_inst == prev_inst) {
+         st_src_reg temp = get_temp(glsl_type::bool_type);
+         cond_inst = emit(ir->condition, TGSI_OPCODE_MOV, st_dst_reg(temp), result);
+      }
+      cond_inst->cond_update = GL_TRUE;
+
+      if_inst = emit(ir->condition, TGSI_OPCODE_IF);
+      if_inst->dst.cond_mask = COND_NE;
+   } else {
+      if_inst = emit(ir->condition, TGSI_OPCODE_IF, undef_dst, this->result);
+   }
+
+   this->instructions.push_tail(if_inst);
+
+   visit_exec_list(&ir->then_instructions, this);
+
+   if (!ir->else_instructions.is_empty()) {
+      emit(ir->condition, TGSI_OPCODE_ELSE);
+      visit_exec_list(&ir->else_instructions, this);
+   }
+
+   if_inst = emit(ir->condition, TGSI_OPCODE_ENDIF);
+}
+
+glsl_to_tgsi_visitor::glsl_to_tgsi_visitor()
+{
+   result.file = PROGRAM_UNDEFINED;
+   next_temp = 1;
+   next_signature_id = 1;
+   num_immediates = 0;
+   current_function = NULL;
+   num_address_regs = 0;
+   indirect_addr_temps = false;
+   indirect_addr_consts = false;
+   mem_ctx = ralloc_context(NULL);
+}
+
+glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor()
+{
+   ralloc_free(mem_ctx);
+}
+
+extern "C" void free_glsl_to_tgsi_visitor(glsl_to_tgsi_visitor *v)
+{
+   delete v;
+}
+
+
+/**
+ * Count resources used by the given gpu program (number of texture
+ * samplers, etc).
+ */
+static void
+count_resources(glsl_to_tgsi_visitor *v, gl_program *prog)
+{
+   v->samplers_used = 0;
+
+   foreach_iter(exec_list_iterator, iter, v->instructions) {
+      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+
+      if (is_tex_instruction(inst->op)) {
+         v->samplers_used |= 1 << inst->sampler;
+
+         prog->SamplerTargets[inst->sampler] =
+            (gl_texture_index)inst->tex_target;
+         if (inst->tex_shadow) {
+            prog->ShadowSamplers |= 1 << inst->sampler;
+         }
+      }
+   }
+   
+   prog->SamplersUsed = v->samplers_used;
+   _mesa_update_shader_textures_used(prog);
+}
+
+
+/**
+ * Check if the given vertex/fragment/shader program is within the
+ * resource limits of the context (number of texture units, etc).
+ * If any of those checks fail, record a linker error.
+ *
+ * XXX more checks are needed...
+ */
+static void
+check_resources(const struct gl_context *ctx,
+                struct gl_shader_program *shader_program,
+                glsl_to_tgsi_visitor *prog,
+                struct gl_program *proginfo)
+{
+   switch (proginfo->Target) {
+   case GL_VERTEX_PROGRAM_ARB:
+      if (_mesa_bitcount(prog->samplers_used) >
+          ctx->Const.MaxVertexTextureImageUnits) {
+         fail_link(shader_program, "Too many vertex shader texture samplers");
+      }
+      if (proginfo->Parameters->NumParameters > MAX_UNIFORMS) {
+         fail_link(shader_program, "Too many vertex shader constants");
+      }
+      break;
+   case MESA_GEOMETRY_PROGRAM:
+      if (_mesa_bitcount(prog->samplers_used) >
+          ctx->Const.MaxGeometryTextureImageUnits) {
+         fail_link(shader_program, "Too many geometry shader texture samplers");
+      }
+      if (proginfo->Parameters->NumParameters >
+          MAX_GEOMETRY_UNIFORM_COMPONENTS / 4) {
+         fail_link(shader_program, "Too many geometry shader constants");
+      }
+      break;
+   case GL_FRAGMENT_PROGRAM_ARB:
+      if (_mesa_bitcount(prog->samplers_used) >
+          ctx->Const.MaxTextureImageUnits) {
+         fail_link(shader_program, "Too many fragment shader texture samplers");
+      }
+      if (proginfo->Parameters->NumParameters > MAX_UNIFORMS) {
+         fail_link(shader_program, "Too many fragment shader constants");
+      }
+      break;
+   default:
+      _mesa_problem(ctx, "unexpected program type in check_resources()");
+   }
+}
+
+
+
+struct uniform_sort {
+   struct gl_uniform *u;
+   int pos;
+};
+
+/* The shader_program->Uniforms list is almost sorted in increasing
+ * uniform->{Frag,Vert}Pos locations, but not quite when there are
+ * uniforms shared between targets.  We need to add parameters in
+ * increasing order for the targets.
+ */
+static int
+sort_uniforms(const void *a, const void *b)
+{
+   struct uniform_sort *u1 = (struct uniform_sort *)a;
+   struct uniform_sort *u2 = (struct uniform_sort *)b;
+
+   return u1->pos - u2->pos;
+}
+
+/* Add the uniforms to the parameters.  The linker chose locations
+ * in our parameters lists (which weren't created yet), which the
+ * uniforms code will use to poke values into our parameters list
+ * when uniforms are updated.
+ */
+static void
+add_uniforms_to_parameters_list(struct gl_shader_program *shader_program,
+        			struct gl_shader *shader,
+        			struct gl_program *prog)
+{
+   unsigned int i;
+   unsigned int next_sampler = 0, num_uniforms = 0;
+   struct uniform_sort *sorted_uniforms;
+
+   sorted_uniforms = ralloc_array(NULL, struct uniform_sort,
+        			  shader_program->Uniforms->NumUniforms);
+
+   for (i = 0; i < shader_program->Uniforms->NumUniforms; i++) {
+      struct gl_uniform *uniform = shader_program->Uniforms->Uniforms + i;
+      int parameter_index = -1;
+
+      switch (shader->Type) {
+      case GL_VERTEX_SHADER:
+         parameter_index = uniform->VertPos;
+         break;
+      case GL_FRAGMENT_SHADER:
+         parameter_index = uniform->FragPos;
+         break;
+      case GL_GEOMETRY_SHADER:
+         parameter_index = uniform->GeomPos;
+         break;
+      }
+
+      /* Only add uniforms used in our target. */
+      if (parameter_index != -1) {
+         sorted_uniforms[num_uniforms].pos = parameter_index;
+         sorted_uniforms[num_uniforms].u = uniform;
+         num_uniforms++;
+      }
+   }
+
+   qsort(sorted_uniforms, num_uniforms, sizeof(struct uniform_sort),
+         sort_uniforms);
+
+   for (i = 0; i < num_uniforms; i++) {
+      struct gl_uniform *uniform = sorted_uniforms[i].u;
+      int parameter_index = sorted_uniforms[i].pos;
+      const glsl_type *type = uniform->Type;
+      unsigned int size;
+
+      if (type->is_vector() ||
+          type->is_scalar()) {
+         size = type->vector_elements;
+      } else {
+         size = type_size(type) * 4;
+      }
+
+      gl_register_file file;
+      if (type->is_sampler() ||
+          (type->is_array() && type->fields.array->is_sampler())) {
+         file = PROGRAM_SAMPLER;
+      } else {
+         file = PROGRAM_UNIFORM;
+      }
+
+      GLint index = _mesa_lookup_parameter_index(prog->Parameters, -1,
+        					 uniform->Name);
+
+      if (index < 0) {
+         index = _mesa_add_parameter(prog->Parameters, file,
+        			     uniform->Name, size, type->gl_type,
+        			     NULL, NULL, 0x0);
+
+         /* Sampler uniform values are stored in prog->SamplerUnits,
+          * and the entry in that array is selected by this index we
+          * store in ParameterValues[].
+          */
+         if (file == PROGRAM_SAMPLER) {
+            for (unsigned int j = 0; j < size / 4; j++)
+               prog->Parameters->ParameterValues[index + j][0].f = next_sampler++;
+         }
+
+         /* The location chosen in the Parameters list here (returned
+          * from _mesa_add_uniform) has to match what the linker chose.
+          */
+         if (index != parameter_index) {
+            fail_link(shader_program, "Allocation of uniform `%s' to target "
+        	      "failed (%d vs %d)\n",
+        	      uniform->Name, index, parameter_index);
+         }
+      }
+   }
+
+   ralloc_free(sorted_uniforms);
+}
+
+static void
+set_uniform_initializer(struct gl_context *ctx, void *mem_ctx,
+        		struct gl_shader_program *shader_program,
+        		const char *name, const glsl_type *type,
+        		ir_constant *val)
+{
+   if (type->is_record()) {
+      ir_constant *field_constant;
+
+      field_constant = (ir_constant *)val->components.get_head();
+
+      for (unsigned int i = 0; i < type->length; i++) {
+         const glsl_type *field_type = type->fields.structure[i].type;
+         const char *field_name = ralloc_asprintf(mem_ctx, "%s.%s", name,
+        				    type->fields.structure[i].name);
+         set_uniform_initializer(ctx, mem_ctx, shader_program, field_name,
+        			 field_type, field_constant);
+         field_constant = (ir_constant *)field_constant->next;
+      }
+      return;
+   }
+
+   int loc = _mesa_get_uniform_location(ctx, shader_program, name);
+
+   if (loc == -1) {
+      fail_link(shader_program,
+        	"Couldn't find uniform for initializer %s\n", name);
+      return;
+   }
+
+   for (unsigned int i = 0; i < (type->is_array() ? type->length : 1); i++) {
+      ir_constant *element;
+      const glsl_type *element_type;
+      if (type->is_array()) {
+         element = val->array_elements[i];
+         element_type = type->fields.array;
+      } else {
+         element = val;
+         element_type = type;
+      }
+
+      void *values;
+
+      if (element_type->base_type == GLSL_TYPE_BOOL) {
+         int *conv = ralloc_array(mem_ctx, int, element_type->components());
+         for (unsigned int j = 0; j < element_type->components(); j++) {
+            conv[j] = element->value.b[j];
+         }
+         values = (void *)conv;
+         element_type = glsl_type::get_instance(GLSL_TYPE_INT,
+        					element_type->vector_elements,
+        					1);
+      } else {
+         values = &element->value;
+      }
+
+      if (element_type->is_matrix()) {
+         _mesa_uniform_matrix(ctx, shader_program,
+        		      element_type->matrix_columns,
+        		      element_type->vector_elements,
+        		      loc, 1, GL_FALSE, (GLfloat *)values);
+         loc += element_type->matrix_columns;
+      } else {
+         _mesa_uniform(ctx, shader_program, loc, element_type->matrix_columns,
+        	       values, element_type->gl_type);
+         loc += type_size(element_type);
+      }
+   }
+}
+
+static void
+set_uniform_initializers(struct gl_context *ctx,
+        		 struct gl_shader_program *shader_program)
+{
+   void *mem_ctx = NULL;
+
+   for (unsigned int i = 0; i < MESA_SHADER_TYPES; i++) {
+      struct gl_shader *shader = shader_program->_LinkedShaders[i];
+
+      if (shader == NULL)
+         continue;
+
+      foreach_iter(exec_list_iterator, iter, *shader->ir) {
+         ir_instruction *ir = (ir_instruction *)iter.get();
+         ir_variable *var = ir->as_variable();
+
+         if (!var || var->mode != ir_var_uniform || !var->constant_value)
+            continue;
+
+         if (!mem_ctx)
+            mem_ctx = ralloc_context(NULL);
+
+         set_uniform_initializer(ctx, mem_ctx, shader_program, var->name,
+        			 var->type, var->constant_value);
+      }
+   }
+
+   ralloc_free(mem_ctx);
+}
+
+/*
+ * Scan/rewrite program to remove reads of custom (output) registers.
+ * The passed type has to be either PROGRAM_OUTPUT or PROGRAM_VARYING
+ * (for vertex shaders).
+ * In GLSL shaders, varying vars can be read and written.
+ * On some hardware, trying to read an output register causes trouble.
+ * So, rewrite the program to use a temporary register in this case.
+ * 
+ * Based on _mesa_remove_output_reads from programopt.c.
+ */
+void
+glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type)
+{
+   GLuint i;
+   GLint outputMap[VERT_RESULT_MAX];
+   GLint outputTypes[VERT_RESULT_MAX];
+   GLuint numVaryingReads = 0;
+   GLboolean usedTemps[MAX_TEMPS];
+   GLuint firstTemp = 0;
+
+   _mesa_find_used_registers(prog, PROGRAM_TEMPORARY,
+                             usedTemps, MAX_TEMPS);
+
+   assert(type == PROGRAM_VARYING || type == PROGRAM_OUTPUT);
+   assert(prog->Target == GL_VERTEX_PROGRAM_ARB || type != PROGRAM_VARYING);
+
+   for (i = 0; i < VERT_RESULT_MAX; i++)
+      outputMap[i] = -1;
+
+   /* look for instructions which read from varying vars */
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+      const GLuint numSrc = num_inst_src_regs(inst->op);
+      GLuint j;
+      for (j = 0; j < numSrc; j++) {
+         if (inst->src[j].file == type) {
+            /* replace the read with a temp reg */
+            const GLuint var = inst->src[j].index;
+            if (outputMap[var] == -1) {
+               numVaryingReads++;
+               outputMap[var] = _mesa_find_free_register(usedTemps,
+                                                         MAX_TEMPS,
+                                                         firstTemp);
+               outputTypes[var] = inst->src[j].type;
+               firstTemp = outputMap[var] + 1;
+            }
+            inst->src[j].file = PROGRAM_TEMPORARY;
+            inst->src[j].index = outputMap[var];
+         }
+      }
+   }
+
+   if (numVaryingReads == 0)
+      return; /* nothing to be done */
+
+   /* look for instructions which write to the varying vars identified above */
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+      if (inst->dst.file == type && outputMap[inst->dst.index] >= 0) {
+         /* change inst to write to the temp reg, instead of the varying */
+         inst->dst.file = PROGRAM_TEMPORARY;
+         inst->dst.index = outputMap[inst->dst.index];
+      }
+   }
+   
+   /* insert new MOV instructions at the end */
+   for (i = 0; i < VERT_RESULT_MAX; i++) {
+      if (outputMap[i] >= 0) {
+         /* MOV VAR[i], TEMP[tmp]; */
+         st_src_reg src = st_src_reg(PROGRAM_TEMPORARY, outputMap[i], outputTypes[i]);
+         st_dst_reg dst = st_dst_reg(type, WRITEMASK_XYZW, outputTypes[i]);
+         dst.index = i;
+         this->emit(NULL, TGSI_OPCODE_MOV, dst, src);
+      }
+   }
+}
+
+/**
+ * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which
+ * are read from the given src in this instruction
+ */
+static int
+get_src_arg_mask(st_dst_reg dst, st_src_reg src)
+{
+   int read_mask = 0, comp;
+
+   /* Now, given the src swizzle and the written channels, find which
+    * components are actually read
+    */
+   for (comp = 0; comp < 4; ++comp) {
+      const unsigned coord = GET_SWZ(src.swizzle, comp);
+      ASSERT(coord < 4);
+      if (dst.writemask & (1 << comp) && coord <= SWIZZLE_W)
+         read_mask |= 1 << coord;
+   }
+
+   return read_mask;
+}
+
+/**
+ * This pass replaces CMP T0, T1 T2 T0 with MOV T0, T2 when the CMP
+ * instruction is the first instruction to write to register T0.  There are
+ * several lowering passes done in GLSL IR (e.g. branches and
+ * relative addressing) that create a large number of conditional assignments
+ * that ir_to_mesa converts to CMP instructions like the one mentioned above.
+ *
+ * Here is why this conversion is safe:
+ * CMP T0, T1 T2 T0 can be expanded to:
+ * if (T1 < 0.0)
+ * 	MOV T0, T2;
+ * else
+ * 	MOV T0, T0;
+ *
+ * If (T1 < 0.0) evaluates to true then our replacement MOV T0, T2 is the same
+ * as the original program.  If (T1 < 0.0) evaluates to false, executing
+ * MOV T0, T0 will store a garbage value in T0 since T0 is uninitialized.
+ * Therefore, it doesn't matter that we are replacing MOV T0, T0 with MOV T0, T2
+ * because any instruction that was going to read from T0 after this was going
+ * to read a garbage value anyway.
+ */
+void
+glsl_to_tgsi_visitor::simplify_cmp(void)
+{
+   unsigned tempWrites[MAX_TEMPS];
+   unsigned outputWrites[MAX_PROGRAM_OUTPUTS];
+
+   memset(tempWrites, 0, sizeof(tempWrites));
+   memset(outputWrites, 0, sizeof(outputWrites));
+
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+      unsigned prevWriteMask = 0;
+
+      /* Give up if we encounter relative addressing or flow control. */
+      if (inst->dst.reladdr ||
+          tgsi_get_opcode_info(inst->op)->is_branch ||
+          inst->op == TGSI_OPCODE_BGNSUB ||
+          inst->op == TGSI_OPCODE_CONT ||
+          inst->op == TGSI_OPCODE_END ||
+          inst->op == TGSI_OPCODE_ENDSUB ||
+          inst->op == TGSI_OPCODE_RET) {
+         return;
+      }
+
+      if (inst->dst.file == PROGRAM_OUTPUT) {
+         assert(inst->dst.index < MAX_PROGRAM_OUTPUTS);
+         prevWriteMask = outputWrites[inst->dst.index];
+         outputWrites[inst->dst.index] |= inst->dst.writemask;
+      } else if (inst->dst.file == PROGRAM_TEMPORARY) {
+         assert(inst->dst.index < MAX_TEMPS);
+         prevWriteMask = tempWrites[inst->dst.index];
+         tempWrites[inst->dst.index] |= inst->dst.writemask;
+      }
+
+      /* For a CMP to be considered a conditional write, the destination
+       * register and source register two must be the same. */
+      if (inst->op == TGSI_OPCODE_CMP
+          && !(inst->dst.writemask & prevWriteMask)
+          && inst->src[2].file == inst->dst.file
+          && inst->src[2].index == inst->dst.index
+          && inst->dst.writemask == get_src_arg_mask(inst->dst, inst->src[2])) {
+
+         inst->op = TGSI_OPCODE_MOV;
+         inst->src[0] = inst->src[1];
+      }
+   }
+}
+
+/* Replaces all references to a temporary register index with another index. */
+void
+glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index)
+{
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+      unsigned j;
+      
+      for (j=0; j < num_inst_src_regs(inst->op); j++) {
+         if (inst->src[j].file == PROGRAM_TEMPORARY && 
+             inst->src[j].index == index) {
+            inst->src[j].index = new_index;
+         }
+      }
+      
+      if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) {
+         inst->dst.index = new_index;
+      }
+   }
+}
+
+int
+glsl_to_tgsi_visitor::get_first_temp_read(int index)
+{
+   int depth = 0; /* loop depth */
+   int loop_start = -1; /* index of the first active BGNLOOP (if any) */
+   unsigned i = 0, j;
+   
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+      
+      for (j=0; j < num_inst_src_regs(inst->op); j++) {
+         if (inst->src[j].file == PROGRAM_TEMPORARY && 
+             inst->src[j].index == index) {
+            return (depth == 0) ? i : loop_start;
+         }
+      }
+      
+      if (inst->op == TGSI_OPCODE_BGNLOOP) {
+         if(depth++ == 0)
+            loop_start = i;
+      } else if (inst->op == TGSI_OPCODE_ENDLOOP) {
+         if (--depth == 0)
+            loop_start = -1;
+      }
+      assert(depth >= 0);
+      
+      i++;
+   }
+   
+   return -1;
+}
+
+int
+glsl_to_tgsi_visitor::get_first_temp_write(int index)
+{
+   int depth = 0; /* loop depth */
+   int loop_start = -1; /* index of the first active BGNLOOP (if any) */
+   int i = 0;
+   
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+      
+      if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) {
+         return (depth == 0) ? i : loop_start;
+      }
+      
+      if (inst->op == TGSI_OPCODE_BGNLOOP) {
+         if(depth++ == 0)
+            loop_start = i;
+      } else if (inst->op == TGSI_OPCODE_ENDLOOP) {
+         if (--depth == 0)
+            loop_start = -1;
+      }
+      assert(depth >= 0);
+      
+      i++;
+   }
+   
+   return -1;
+}
+
+int
+glsl_to_tgsi_visitor::get_last_temp_read(int index)
+{
+   int depth = 0; /* loop depth */
+   int last = -1; /* index of last instruction that reads the temporary */
+   unsigned i = 0, j;
+   
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+      
+      for (j=0; j < num_inst_src_regs(inst->op); j++) {
+         if (inst->src[j].file == PROGRAM_TEMPORARY && 
+             inst->src[j].index == index) {
+            last = (depth == 0) ? i : -2;
+         }
+      }
+      
+      if (inst->op == TGSI_OPCODE_BGNLOOP)
+         depth++;
+      else if (inst->op == TGSI_OPCODE_ENDLOOP)
+         if (--depth == 0 && last == -2)
+            last = i;
+      assert(depth >= 0);
+      
+      i++;
+   }
+   
+   assert(last >= -1);
+   return last;
+}
+
+int
+glsl_to_tgsi_visitor::get_last_temp_write(int index)
+{
+   int depth = 0; /* loop depth */
+   int last = -1; /* index of last instruction that writes to the temporary */
+   int i = 0;
+   
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+      
+      if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index)
+         last = (depth == 0) ? i : -2;
+      
+      if (inst->op == TGSI_OPCODE_BGNLOOP)
+         depth++;
+      else if (inst->op == TGSI_OPCODE_ENDLOOP)
+         if (--depth == 0 && last == -2)
+            last = i;
+      assert(depth >= 0);
+      
+      i++;
+   }
+   
+   assert(last >= -1);
+   return last;
+}
+
+/*
+ * On a basic block basis, tracks available PROGRAM_TEMPORARY register
+ * channels for copy propagation and updates following instructions to
+ * use the original versions.
+ *
+ * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
+ * will occur.  As an example, a TXP production before this pass:
+ *
+ * 0: MOV TEMP[1], INPUT[4].xyyy;
+ * 1: MOV TEMP[1].w, INPUT[4].wwww;
+ * 2: TXP TEMP[2], TEMP[1], texture[0], 2D;
+ *
+ * and after:
+ *
+ * 0: MOV TEMP[1], INPUT[4].xyyy;
+ * 1: MOV TEMP[1].w, INPUT[4].wwww;
+ * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
+ *
+ * which allows for dead code elimination on TEMP[1]'s writes.
+ */
+void
+glsl_to_tgsi_visitor::copy_propagate(void)
+{
+   glsl_to_tgsi_instruction **acp = rzalloc_array(mem_ctx,
+        					    glsl_to_tgsi_instruction *,
+        					    this->next_temp * 4);
+   int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
+   int level = 0;
+
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+
+      assert(inst->dst.file != PROGRAM_TEMPORARY
+             || inst->dst.index < this->next_temp);
+
+      /* First, do any copy propagation possible into the src regs. */
+      for (int r = 0; r < 3; r++) {
+         glsl_to_tgsi_instruction *first = NULL;
+         bool good = true;
+         int acp_base = inst->src[r].index * 4;
+
+         if (inst->src[r].file != PROGRAM_TEMPORARY ||
+             inst->src[r].reladdr)
+            continue;
+
+         /* See if we can find entries in the ACP consisting of MOVs
+          * from the same src register for all the swizzled channels
+          * of this src register reference.
+          */
+         for (int i = 0; i < 4; i++) {
+            int src_chan = GET_SWZ(inst->src[r].swizzle, i);
+            glsl_to_tgsi_instruction *copy_chan = acp[acp_base + src_chan];
+
+            if (!copy_chan) {
+               good = false;
+               break;
+            }
+
+            assert(acp_level[acp_base + src_chan] <= level);
+
+            if (!first) {
+               first = copy_chan;
+            } else {
+               if (first->src[0].file != copy_chan->src[0].file ||
+        	   first->src[0].index != copy_chan->src[0].index) {
+        	  good = false;
+        	  break;
+               }
+            }
+         }
+
+         if (good) {
+            /* We've now validated that we can copy-propagate to
+             * replace this src register reference.  Do it.
+             */
+            inst->src[r].file = first->src[0].file;
+            inst->src[r].index = first->src[0].index;
+
+            int swizzle = 0;
+            for (int i = 0; i < 4; i++) {
+               int src_chan = GET_SWZ(inst->src[r].swizzle, i);
+               glsl_to_tgsi_instruction *copy_inst = acp[acp_base + src_chan];
+               swizzle |= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) <<
+        		   (3 * i));
+            }
+            inst->src[r].swizzle = swizzle;
+         }
+      }
+
+      switch (inst->op) {
+      case TGSI_OPCODE_BGNLOOP:
+      case TGSI_OPCODE_ENDLOOP:
+         /* End of a basic block, clear the ACP entirely. */
+         memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
+         break;
+
+      case TGSI_OPCODE_IF:
+         ++level;
+         break;
+
+      case TGSI_OPCODE_ENDIF:
+      case TGSI_OPCODE_ELSE:
+         /* Clear all channels written inside the block from the ACP, but
+          * leaving those that were not touched.
+          */
+         for (int r = 0; r < this->next_temp; r++) {
+            for (int c = 0; c < 4; c++) {
+               if (!acp[4 * r + c])
+        	  continue;
+
+               if (acp_level[4 * r + c] >= level)
+        	  acp[4 * r + c] = NULL;
+            }
+         }
+         if (inst->op == TGSI_OPCODE_ENDIF)
+            --level;
+         break;
+
+      default:
+         /* Continuing the block, clear any written channels from
+          * the ACP.
+          */
+         if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.reladdr) {
+            /* Any temporary might be written, so no copy propagation
+             * across this instruction.
+             */
+            memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
+         } else if (inst->dst.file == PROGRAM_OUTPUT &&
+        	    inst->dst.reladdr) {
+            /* Any output might be written, so no copy propagation
+             * from outputs across this instruction.
+             */
+            for (int r = 0; r < this->next_temp; r++) {
+               for (int c = 0; c < 4; c++) {
+        	  if (!acp[4 * r + c])
+        	     continue;
+
+        	  if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT)
+        	     acp[4 * r + c] = NULL;
+               }
+            }
+         } else if (inst->dst.file == PROGRAM_TEMPORARY ||
+        	    inst->dst.file == PROGRAM_OUTPUT) {
+            /* Clear where it's used as dst. */
+            if (inst->dst.file == PROGRAM_TEMPORARY) {
+               for (int c = 0; c < 4; c++) {
+        	  if (inst->dst.writemask & (1 << c)) {
+        	     acp[4 * inst->dst.index + c] = NULL;
+        	  }
+               }
+            }
+
+            /* Clear where it's used as src. */
+            for (int r = 0; r < this->next_temp; r++) {
+               for (int c = 0; c < 4; c++) {
+        	  if (!acp[4 * r + c])
+        	     continue;
+
+        	  int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c);
+
+        	  if (acp[4 * r + c]->src[0].file == inst->dst.file &&
+        	      acp[4 * r + c]->src[0].index == inst->dst.index &&
+        	      inst->dst.writemask & (1 << src_chan))
+        	  {
+        	     acp[4 * r + c] = NULL;
+        	  }
+               }
+            }
+         }
+         break;
+      }
+
+      /* If this is a copy, add it to the ACP. */
+      if (inst->op == TGSI_OPCODE_MOV &&
+          inst->dst.file == PROGRAM_TEMPORARY &&
+          !inst->dst.reladdr &&
+          !inst->saturate &&
+          !inst->src[0].reladdr &&
+          !inst->src[0].negate) {
+         for (int i = 0; i < 4; i++) {
+            if (inst->dst.writemask & (1 << i)) {
+               acp[4 * inst->dst.index + i] = inst;
+               acp_level[4 * inst->dst.index + i] = level;
+            }
+         }
+      }
+   }
+
+   ralloc_free(acp_level);
+   ralloc_free(acp);
+}
+
+/*
+ * Tracks available PROGRAM_TEMPORARY registers for dead code elimination.
+ *
+ * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
+ * will occur.  As an example, a TXP production after copy propagation but 
+ * before this pass:
+ *
+ * 0: MOV TEMP[1], INPUT[4].xyyy;
+ * 1: MOV TEMP[1].w, INPUT[4].wwww;
+ * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
+ *
+ * and after this pass:
+ *
+ * 0: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
+ * 
+ * FIXME: assumes that all functions are inlined (no support for BGNSUB/ENDSUB)
+ * FIXME: doesn't eliminate all dead code inside of loops; it steps around them
+ */
+void
+glsl_to_tgsi_visitor::eliminate_dead_code(void)
+{
+   int i;
+   
+   for (i=0; i < this->next_temp; i++) {
+      int last_read = get_last_temp_read(i);
+      int j = 0;
+      
+      foreach_iter(exec_list_iterator, iter, this->instructions) {
+         glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+
+         if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == i &&
+             j > last_read)
+         {
+            iter.remove();
+            delete inst;
+         }
+         
+         j++;
+      }
+   }
+}
+
+/*
+ * On a basic block basis, tracks available PROGRAM_TEMPORARY registers for dead
+ * code elimination.  This is less primitive than eliminate_dead_code(), as it
+ * is per-channel and can detect consecutive writes without a read between them
+ * as dead code.  However, there is some dead code that can be eliminated by 
+ * eliminate_dead_code() but not this function - for example, this function 
+ * cannot eliminate an instruction writing to a register that is never read and
+ * is the only instruction writing to that register.
+ *
+ * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
+ * will occur.
+ */
+int
+glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void)
+{
+   glsl_to_tgsi_instruction **writes = rzalloc_array(mem_ctx,
+                                                     glsl_to_tgsi_instruction *,
+                                                     this->next_temp * 4);
+   int *write_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
+   int level = 0;
+   int removed = 0;
+
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+
+      assert(inst->dst.file != PROGRAM_TEMPORARY
+             || inst->dst.index < this->next_temp);
+      
+      switch (inst->op) {
+      case TGSI_OPCODE_BGNLOOP:
+      case TGSI_OPCODE_ENDLOOP:
+         /* End of a basic block, clear the write array entirely.
+          * FIXME: This keeps us from killing dead code when the writes are
+          * on either side of a loop, even when the register isn't touched
+          * inside the loop.
+          */
+         memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
+         break;
+
+      case TGSI_OPCODE_ENDIF:
+         --level;
+         break;
+
+      case TGSI_OPCODE_ELSE:
+         /* Clear all channels written inside the preceding if block from the
+          * write array, but leave those that were not touched.
+          *
+          * FIXME: This destroys opportunities to remove dead code inside of
+          * IF blocks that are followed by an ELSE block.
+          */
+         for (int r = 0; r < this->next_temp; r++) {
+            for (int c = 0; c < 4; c++) {
+               if (!writes[4 * r + c])
+        	         continue;
+
+               if (write_level[4 * r + c] >= level)
+        	         writes[4 * r + c] = NULL;
+            }
+         }
+         break;
+
+      case TGSI_OPCODE_IF:
+         ++level;
+         /* fallthrough to default case to mark the condition as read */
+      
+      default:
+         /* Continuing the block, clear any channels from the write array that
+          * are read by this instruction.
+          */
+         for (unsigned i = 0; i < Elements(inst->src); i++) {
+            if (inst->src[i].file == PROGRAM_TEMPORARY && inst->src[i].reladdr){
+               /* Any temporary might be read, so no dead code elimination 
+                * across this instruction.
+                */
+               memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
+            } else if (inst->src[i].file == PROGRAM_TEMPORARY) {
+               /* Clear where it's used as src. */
+               int src_chans = 1 << GET_SWZ(inst->src[i].swizzle, 0);
+               src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 1);
+               src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 2);
+               src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 3);
+               
+               for (int c = 0; c < 4; c++) {
+              	   if (src_chans & (1 << c)) {
+              	      writes[4 * inst->src[i].index + c] = NULL;
+              	   }
+               }
+            }
+         }
+         break;
+      }
+
+      /* If this instruction writes to a temporary, add it to the write array.
+       * If there is already an instruction in the write array for one or more
+       * of the channels, flag that channel write as dead.
+       */
+      if (inst->dst.file == PROGRAM_TEMPORARY &&
+          !inst->dst.reladdr &&
+          !inst->saturate) {
+         for (int c = 0; c < 4; c++) {
+            if (inst->dst.writemask & (1 << c)) {
+               if (writes[4 * inst->dst.index + c]) {
+                  if (write_level[4 * inst->dst.index + c] < level)
+                     continue;
+                  else
+                     writes[4 * inst->dst.index + c]->dead_mask |= (1 << c);
+               }
+               writes[4 * inst->dst.index + c] = inst;
+               write_level[4 * inst->dst.index + c] = level;
+            }
+         }
+      }
+   }
+
+   /* Anything still in the write array at this point is dead code. */
+   for (int r = 0; r < this->next_temp; r++) {
+      for (int c = 0; c < 4; c++) {
+         glsl_to_tgsi_instruction *inst = writes[4 * r + c];
+         if (inst)
+            inst->dead_mask |= (1 << c);
+      }
+   }
+
+   /* Now actually remove the instructions that are completely dead and update
+    * the writemask of other instructions with dead channels.
+    */
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+      
+      if (!inst->dead_mask || !inst->dst.writemask)
+         continue;
+      else if (inst->dead_mask == inst->dst.writemask) {
+         iter.remove();
+         delete inst;
+         removed++;
+      } else
+         inst->dst.writemask &= ~(inst->dead_mask);
+   }
+
+   ralloc_free(write_level);
+   ralloc_free(writes);
+   
+   return removed;
+}
+
+/* Merges temporary registers together where possible to reduce the number of 
+ * registers needed to run a program.
+ * 
+ * Produces optimal code only after copy propagation and dead code elimination 
+ * have been run. */
+void
+glsl_to_tgsi_visitor::merge_registers(void)
+{
+   int *last_reads = rzalloc_array(mem_ctx, int, this->next_temp);
+   int *first_writes = rzalloc_array(mem_ctx, int, this->next_temp);
+   int i, j;
+   
+   /* Read the indices of the last read and first write to each temp register
+    * into an array so that we don't have to traverse the instruction list as 
+    * much. */
+   for (i=0; i < this->next_temp; i++) {
+      last_reads[i] = get_last_temp_read(i);
+      first_writes[i] = get_first_temp_write(i);
+   }
+   
+   /* Start looking for registers with non-overlapping usages that can be 
+    * merged together. */
+   for (i=0; i < this->next_temp; i++) {
+      /* Don't touch unused registers. */
+      if (last_reads[i] < 0 || first_writes[i] < 0) continue;
+      
+      for (j=0; j < this->next_temp; j++) {
+         /* Don't touch unused registers. */
+         if (last_reads[j] < 0 || first_writes[j] < 0) continue;
+         
+         /* We can merge the two registers if the first write to j is after or 
+          * in the same instruction as the last read from i.  Note that the 
+          * register at index i will always be used earlier or at the same time 
+          * as the register at index j. */
+         if (first_writes[i] <= first_writes[j] && 
+             last_reads[i] <= first_writes[j])
+         {
+            rename_temp_register(j, i); /* Replace all references to j with i.*/
+            
+            /* Update the first_writes and last_reads arrays with the new 
+             * values for the merged register index, and mark the newly unused 
+             * register index as such. */
+            last_reads[i] = last_reads[j];
+            first_writes[j] = -1;
+            last_reads[j] = -1;
+         }
+      }
+   }
+   
+   ralloc_free(last_reads);
+   ralloc_free(first_writes);
+}
+
+/* Reassign indices to temporary registers by reusing unused indices created 
+ * by optimization passes. */
+void
+glsl_to_tgsi_visitor::renumber_registers(void)
+{
+   int i = 0;
+   int new_index = 0;
+   
+   for (i=0; i < this->next_temp; i++) {
+      if (get_first_temp_read(i) < 0) continue;
+      if (i != new_index)
+         rename_temp_register(i, new_index);
+      new_index++;
+   }
+   
+   this->next_temp = new_index;
+}
+
+/**
+ * Returns a fragment program which implements the current pixel transfer ops.
+ * Based on get_pixel_transfer_program in st_atom_pixeltransfer.c.
+ */
+extern "C" void
+get_pixel_transfer_visitor(struct st_fragment_program *fp,
+                           glsl_to_tgsi_visitor *original,
+                           int scale_and_bias, int pixel_maps)
+{
+   glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor();
+   struct st_context *st = st_context(original->ctx);
+   struct gl_program *prog = &fp->Base.Base;
+   struct gl_program_parameter_list *params = _mesa_new_parameter_list();
+   st_src_reg coord, src0;
+   st_dst_reg dst0;
+   glsl_to_tgsi_instruction *inst;
+
+   /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */
+   v->ctx = original->ctx;
+   v->prog = prog;
+   v->glsl_version = original->glsl_version;
+   v->native_integers = original->native_integers;
+   v->options = original->options;
+   v->next_temp = original->next_temp;
+   v->num_address_regs = original->num_address_regs;
+   v->samplers_used = prog->SamplersUsed = original->samplers_used;
+   v->indirect_addr_temps = original->indirect_addr_temps;
+   v->indirect_addr_consts = original->indirect_addr_consts;
+   memcpy(&v->immediates, &original->immediates, sizeof(v->immediates));
+
+   /*
+    * Get initial pixel color from the texture.
+    * TEX colorTemp, fragment.texcoord[0], texture[0], 2D;
+    */
+   coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type);
+   src0 = v->get_temp(glsl_type::vec4_type);
+   dst0 = st_dst_reg(src0);
+   inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord);
+   inst->sampler = 0;
+   inst->tex_target = TEXTURE_2D_INDEX;
+
+   prog->InputsRead |= (1 << FRAG_ATTRIB_TEX0);
+   prog->SamplersUsed |= (1 << 0); /* mark sampler 0 as used */
+   v->samplers_used |= (1 << 0);
+
+   if (scale_and_bias) {
+      static const gl_state_index scale_state[STATE_LENGTH] =
+         { STATE_INTERNAL, STATE_PT_SCALE,
+           (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 };
+      static const gl_state_index bias_state[STATE_LENGTH] =
+         { STATE_INTERNAL, STATE_PT_BIAS,
+           (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 };
+      GLint scale_p, bias_p;
+      st_src_reg scale, bias;
+
+      scale_p = _mesa_add_state_reference(params, scale_state);
+      bias_p = _mesa_add_state_reference(params, bias_state);
+
+      /* MAD colorTemp, colorTemp, scale, bias; */
+      scale = st_src_reg(PROGRAM_STATE_VAR, scale_p, GLSL_TYPE_FLOAT);
+      bias = st_src_reg(PROGRAM_STATE_VAR, bias_p, GLSL_TYPE_FLOAT);
+      inst = v->emit(NULL, TGSI_OPCODE_MAD, dst0, src0, scale, bias);
+   }
+
+   if (pixel_maps) {
+      st_src_reg temp = v->get_temp(glsl_type::vec4_type);
+      st_dst_reg temp_dst = st_dst_reg(temp);
+
+      assert(st->pixel_xfer.pixelmap_texture);
+
+      /* With a little effort, we can do four pixel map look-ups with
+       * two TEX instructions:
+       */
+
+      /* TEX temp.rg, colorTemp.rgba, texture[1], 2D; */
+      temp_dst.writemask = WRITEMASK_XY; /* write R,G */
+      inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0);
+      inst->sampler = 1;
+      inst->tex_target = TEXTURE_2D_INDEX;
+
+      /* TEX temp.ba, colorTemp.baba, texture[1], 2D; */
+      src0.swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W);
+      temp_dst.writemask = WRITEMASK_ZW; /* write B,A */
+      inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0);
+      inst->sampler = 1;
+      inst->tex_target = TEXTURE_2D_INDEX;
+
+      prog->SamplersUsed |= (1 << 1); /* mark sampler 1 as used */
+      v->samplers_used |= (1 << 1);
+
+      /* MOV colorTemp, temp; */
+      inst = v->emit(NULL, TGSI_OPCODE_MOV, dst0, temp);
+   }
+
+   /* Now copy the instructions from the original glsl_to_tgsi_visitor into the
+    * new visitor. */
+   foreach_iter(exec_list_iterator, iter, original->instructions) {
+      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+      st_src_reg src_regs[3];
+
+      if (inst->dst.file == PROGRAM_OUTPUT)
+         prog->OutputsWritten |= BITFIELD64_BIT(inst->dst.index);
+
+      for (int i=0; i<3; i++) {
+         src_regs[i] = inst->src[i];
+         if (src_regs[i].file == PROGRAM_INPUT &&
+             src_regs[i].index == FRAG_ATTRIB_COL0)
+         {
+            src_regs[i].file = PROGRAM_TEMPORARY;
+            src_regs[i].index = src0.index;
+         }
+         else if (src_regs[i].file == PROGRAM_INPUT)
+            prog->InputsRead |= (1 << src_regs[i].index);
+      }
+
+      v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]);
+   }
+
+   /* Make modifications to fragment program info. */
+   prog->Parameters = _mesa_combine_parameter_lists(params,
+                                                    original->prog->Parameters);
+   prog->Attributes = _mesa_clone_parameter_list(original->prog->Attributes);
+   prog->Varying = _mesa_clone_parameter_list(original->prog->Varying);
+   _mesa_free_parameter_list(params);
+   count_resources(v, prog);
+   fp->glsl_to_tgsi = v;
+}
+
+/**
+ * Make fragment program for glBitmap:
+ *   Sample the texture and kill the fragment if the bit is 0.
+ * This program will be combined with the user's fragment program.
+ *
+ * Based on make_bitmap_fragment_program in st_cb_bitmap.c.
+ */
+extern "C" void
+get_bitmap_visitor(struct st_fragment_program *fp,
+                   glsl_to_tgsi_visitor *original, int samplerIndex)
+{
+   glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor();
+   struct st_context *st = st_context(original->ctx);
+   struct gl_program *prog = &fp->Base.Base;
+   st_src_reg coord, src0;
+   st_dst_reg dst0;
+   glsl_to_tgsi_instruction *inst;
+
+   /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */
+   v->ctx = original->ctx;
+   v->prog = prog;
+   v->glsl_version = original->glsl_version;
+   v->native_integers = original->native_integers;
+   v->options = original->options;
+   v->next_temp = original->next_temp;
+   v->num_address_regs = original->num_address_regs;
+   v->samplers_used = prog->SamplersUsed = original->samplers_used;
+   v->indirect_addr_temps = original->indirect_addr_temps;
+   v->indirect_addr_consts = original->indirect_addr_consts;
+   memcpy(&v->immediates, &original->immediates, sizeof(v->immediates));
+
+   /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */
+   coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type);
+   src0 = v->get_temp(glsl_type::vec4_type);
+   dst0 = st_dst_reg(src0);
+   inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord);
+   inst->sampler = samplerIndex;
+   inst->tex_target = TEXTURE_2D_INDEX;
+
+   prog->InputsRead |= (1 << FRAG_ATTRIB_TEX0);
+   prog->SamplersUsed |= (1 << samplerIndex); /* mark sampler as used */
+   v->samplers_used |= (1 << samplerIndex);
+
+   /* KIL if -tmp0 < 0 # texel=0 -> keep / texel=0 -> discard */
+   src0.negate = NEGATE_XYZW;
+   if (st->bitmap.tex_format == PIPE_FORMAT_L8_UNORM)
+      src0.swizzle = SWIZZLE_XXXX;
+   inst = v->emit(NULL, TGSI_OPCODE_KIL, undef_dst, src0);
+
+   /* Now copy the instructions from the original glsl_to_tgsi_visitor into the
+    * new visitor. */
+   foreach_iter(exec_list_iterator, iter, original->instructions) {
+      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+      st_src_reg src_regs[3];
+
+      if (inst->dst.file == PROGRAM_OUTPUT)
+         prog->OutputsWritten |= BITFIELD64_BIT(inst->dst.index);
+
+      for (int i=0; i<3; i++) {
+         src_regs[i] = inst->src[i];
+         if (src_regs[i].file == PROGRAM_INPUT)
+            prog->InputsRead |= (1 << src_regs[i].index);
+      }
+
+      v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]);
+   }
+
+   /* Make modifications to fragment program info. */
+   prog->Parameters = _mesa_clone_parameter_list(original->prog->Parameters);
+   prog->Attributes = _mesa_clone_parameter_list(original->prog->Attributes);
+   prog->Varying = _mesa_clone_parameter_list(original->prog->Varying);
+   count_resources(v, prog);
+   fp->glsl_to_tgsi = v;
+}
+
+/* ------------------------- TGSI conversion stuff -------------------------- */
+struct label {
+   unsigned branch_target;
+   unsigned token;
+};
+
+/**
+ * Intermediate state used during shader translation.
+ */
+struct st_translate {
+   struct ureg_program *ureg;
+
+   struct ureg_dst temps[MAX_TEMPS];
+   struct ureg_src *constants;
+   struct ureg_src *immediates;
+   struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS];
+   struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS];
+   struct ureg_dst address[1];
+   struct ureg_src samplers[PIPE_MAX_SAMPLERS];
+   struct ureg_src systemValues[SYSTEM_VALUE_MAX];
+
+   /* Extra info for handling point size clamping in vertex shader */
+   struct ureg_dst pointSizeResult; /**< Actual point size output register */
+   struct ureg_src pointSizeConst;  /**< Point size range constant register */
+   GLint pointSizeOutIndex;         /**< Temp point size output register */
+   GLboolean prevInstWrotePointSize;
+
+   const GLuint *inputMapping;
+   const GLuint *outputMapping;
+
+   /* For every instruction that contains a label (eg CALL), keep
+    * details so that we can go back afterwards and emit the correct
+    * tgsi instruction number for each label.
+    */
+   struct label *labels;
+   unsigned labels_size;
+   unsigned labels_count;
+
+   /* Keep a record of the tgsi instruction number that each mesa
+    * instruction starts at, will be used to fix up labels after
+    * translation.
+    */
+   unsigned *insn;
+   unsigned insn_size;
+   unsigned insn_count;
+
+   unsigned procType;  /**< TGSI_PROCESSOR_VERTEX/FRAGMENT */
+
+   boolean error;
+};
+
+/** Map Mesa's SYSTEM_VALUE_x to TGSI_SEMANTIC_x */
+static unsigned mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = {
+   TGSI_SEMANTIC_FACE,
+   TGSI_SEMANTIC_INSTANCEID
+};
+
+/**
+ * Make note of a branch to a label in the TGSI code.
+ * After we've emitted all instructions, we'll go over the list
+ * of labels built here and patch the TGSI code with the actual
+ * location of each label.
+ */
+static unsigned *get_label(struct st_translate *t, unsigned branch_target)
+{
+   unsigned i;
+
+   if (t->labels_count + 1 >= t->labels_size) {
+      t->labels_size = 1 << (util_logbase2(t->labels_size) + 1);
+      t->labels = (struct label *)realloc(t->labels, 
+                                          t->labels_size * sizeof(struct label));
+      if (t->labels == NULL) {
+         static unsigned dummy;
+         t->error = TRUE;
+         return &dummy;
+      }
+   }
+
+   i = t->labels_count++;
+   t->labels[i].branch_target = branch_target;
+   return &t->labels[i].token;
+}
+
+/**
+ * Called prior to emitting the TGSI code for each instruction.
+ * Allocate additional space for instructions if needed.
+ * Update the insn[] array so the next glsl_to_tgsi_instruction points to
+ * the next TGSI instruction.
+ */
+static void set_insn_start(struct st_translate *t, unsigned start)
+{
+   if (t->insn_count + 1 >= t->insn_size) {
+      t->insn_size = 1 << (util_logbase2(t->insn_size) + 1);
+      t->insn = (unsigned *)realloc(t->insn, t->insn_size * sizeof(t->insn[0]));
+      if (t->insn == NULL) {
+         t->error = TRUE;
+         return;
+      }
+   }
+
+   t->insn[t->insn_count++] = start;
+}
+
+/**
+ * Map a glsl_to_tgsi constant/immediate to a TGSI immediate.
+ */
+static struct ureg_src
+emit_immediate(struct st_translate *t,
+               gl_constant_value values[4],
+               int type, int size)
+{
+   struct ureg_program *ureg = t->ureg;
+
+   switch(type)
+   {
+   case GL_FLOAT:
+      return ureg_DECL_immediate(ureg, &values[0].f, size);
+   case GL_INT:
+      return ureg_DECL_immediate_int(ureg, &values[0].i, size);
+   case GL_UNSIGNED_INT:
+   case GL_BOOL:
+      return ureg_DECL_immediate_uint(ureg, &values[0].u, size);
+   default:
+      assert(!"should not get here - type must be float, int, uint, or bool");
+      return ureg_src_undef();
+   }
+}
+
+/**
+ * Map a glsl_to_tgsi dst register to a TGSI ureg_dst register.
+ */
+static struct ureg_dst
+dst_register(struct st_translate *t,
+             gl_register_file file,
+             GLuint index)
+{
+   switch(file) {
+   case PROGRAM_UNDEFINED:
+      return ureg_dst_undef();
+
+   case PROGRAM_TEMPORARY:
+      if (ureg_dst_is_undef(t->temps[index]))
+         t->temps[index] = ureg_DECL_temporary(t->ureg);
+
+      return t->temps[index];
+
+   case PROGRAM_OUTPUT:
+      if (t->procType == TGSI_PROCESSOR_VERTEX && index == VERT_RESULT_PSIZ)
+         t->prevInstWrotePointSize = GL_TRUE;
+
+      if (t->procType == TGSI_PROCESSOR_VERTEX)
+         assert(index < VERT_RESULT_MAX);
+      else if (t->procType == TGSI_PROCESSOR_FRAGMENT)
+         assert(index < FRAG_RESULT_MAX);
+      else
+         assert(index < GEOM_RESULT_MAX);
+
+      assert(t->outputMapping[index] < Elements(t->outputs));
+
+      return t->outputs[t->outputMapping[index]];
+
+   case PROGRAM_ADDRESS:
+      return t->address[index];
+
+   default:
+      assert(!"unknown dst register file");
+      return ureg_dst_undef();
+   }
+}
+
+/**
+ * Map a glsl_to_tgsi src register to a TGSI ureg_src register.
+ */
+static struct ureg_src
+src_register(struct st_translate *t,
+             gl_register_file file,
+             GLuint index)
+{
+   switch(file) {
+   case PROGRAM_UNDEFINED:
+      return ureg_src_undef();
+
+   case PROGRAM_TEMPORARY:
+      assert(index >= 0);
+      assert(index < Elements(t->temps));
+      if (ureg_dst_is_undef(t->temps[index]))
+         t->temps[index] = ureg_DECL_temporary(t->ureg);
+      return ureg_src(t->temps[index]);
+
+   case PROGRAM_NAMED_PARAM:
+   case PROGRAM_ENV_PARAM:
+   case PROGRAM_LOCAL_PARAM:
+   case PROGRAM_UNIFORM:
+      assert(index >= 0);
+      return t->constants[index];
+   case PROGRAM_STATE_VAR:
+   case PROGRAM_CONSTANT:       /* ie, immediate */
+      if (index < 0)
+         return ureg_DECL_constant(t->ureg, 0);
+      else
+         return t->constants[index];
+
+   case PROGRAM_IMMEDIATE:
+      return t->immediates[index];
+
+   case PROGRAM_INPUT:
+      assert(t->inputMapping[index] < Elements(t->inputs));
+      return t->inputs[t->inputMapping[index]];
+
+   case PROGRAM_OUTPUT:
+      assert(t->outputMapping[index] < Elements(t->outputs));
+      return ureg_src(t->outputs[t->outputMapping[index]]); /* not needed? */
+
+   case PROGRAM_ADDRESS:
+      return ureg_src(t->address[index]);
+
+   case PROGRAM_SYSTEM_VALUE:
+      assert(index < Elements(t->systemValues));
+      return t->systemValues[index];
+
+   default:
+      assert(!"unknown src register file");
+      return ureg_src_undef();
+   }
+}
+
+/**
+ * Create a TGSI ureg_dst register from an st_dst_reg.
+ */
+static struct ureg_dst
+translate_dst(struct st_translate *t,
+              const st_dst_reg *dst_reg,
+              bool saturate)
+{
+   struct ureg_dst dst = dst_register(t, 
+                                      dst_reg->file,
+                                      dst_reg->index);
+
+   dst = ureg_writemask(dst, dst_reg->writemask);
+   
+   if (saturate)
+      dst = ureg_saturate(dst);
+
+   if (dst_reg->reladdr != NULL)
+      dst = ureg_dst_indirect(dst, ureg_src(t->address[0]));
+
+   return dst;
+}
+
+/**
+ * Create a TGSI ureg_src register from an st_src_reg.
+ */
+static struct ureg_src
+translate_src(struct st_translate *t, const st_src_reg *src_reg)
+{
+   struct ureg_src src = src_register(t, src_reg->file, src_reg->index);
+
+   src = ureg_swizzle(src,
+                      GET_SWZ(src_reg->swizzle, 0) & 0x3,
+                      GET_SWZ(src_reg->swizzle, 1) & 0x3,
+                      GET_SWZ(src_reg->swizzle, 2) & 0x3,
+                      GET_SWZ(src_reg->swizzle, 3) & 0x3);
+
+   if ((src_reg->negate & 0xf) == NEGATE_XYZW)
+      src = ureg_negate(src);
+
+   if (src_reg->reladdr != NULL) {
+      /* Normally ureg_src_indirect() would be used here, but a stupid compiler 
+       * bug in g++ makes ureg_src_indirect (an inline C function) erroneously 
+       * set the bit for src.Negate.  So we have to do the operation manually
+       * here to work around the compiler's problems. */
+      /*src = ureg_src_indirect(src, ureg_src(t->address[0]));*/
+      struct ureg_src addr = ureg_src(t->address[0]);
+      src.Indirect = 1;
+      src.IndirectFile = addr.File;
+      src.IndirectIndex = addr.Index;
+      src.IndirectSwizzle = addr.SwizzleX;
+      
+      if (src_reg->file != PROGRAM_INPUT &&
+          src_reg->file != PROGRAM_OUTPUT) {
+         /* If src_reg->index was negative, it was set to zero in
+          * src_register().  Reassign it now.  But don't do this
+          * for input/output regs since they get remapped while
+          * const buffers don't.
+          */
+         src.Index = src_reg->index;
+      }
+   }
+
+   return src;
+}
+
+static void
+compile_tgsi_instruction(struct st_translate *t,
+                         const glsl_to_tgsi_instruction *inst)
+{
+   struct ureg_program *ureg = t->ureg;
+   GLuint i;
+   struct ureg_dst dst[1];
+   struct ureg_src src[4];
+   unsigned num_dst;
+   unsigned num_src;
+
+   num_dst = num_inst_dst_regs(inst->op);
+   num_src = num_inst_src_regs(inst->op);
+
+   if (num_dst) 
+      dst[0] = translate_dst(t, 
+                             &inst->dst,
+                             inst->saturate);
+
+   for (i = 0; i < num_src; i++) 
+      src[i] = translate_src(t, &inst->src[i]);
+
+   switch(inst->op) {
+   case TGSI_OPCODE_BGNLOOP:
+   case TGSI_OPCODE_CAL:
+   case TGSI_OPCODE_ELSE:
+   case TGSI_OPCODE_ENDLOOP:
+   case TGSI_OPCODE_IF:
+      assert(num_dst == 0);
+      ureg_label_insn(ureg,
+                      inst->op,
+                      src, num_src,
+                      get_label(t, 
+                                inst->op == TGSI_OPCODE_CAL ? inst->function->sig_id : 0));
+      return;
+
+   case TGSI_OPCODE_TEX:
+   case TGSI_OPCODE_TXB:
+   case TGSI_OPCODE_TXD:
+   case TGSI_OPCODE_TXL:
+   case TGSI_OPCODE_TXP:
+   case TGSI_OPCODE_TXQ:
+   case TGSI_OPCODE_TXF:
+      src[num_src++] = t->samplers[inst->sampler];
+      ureg_tex_insn(ureg,
+                    inst->op,
+                    dst, num_dst, 
+                    translate_texture_target(inst->tex_target, inst->tex_shadow),
+                    src, num_src);
+      return;
+
+   case TGSI_OPCODE_SCS:
+      dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY);
+      ureg_insn(ureg, inst->op, dst, num_dst, src, num_src);
+      break;
+
+   default:
+      ureg_insn(ureg,
+                inst->op,
+                dst, num_dst,
+                src, num_src);
+      break;
+   }
+}
+
+/**
+ * Emit the TGSI instructions to adjust the WPOS pixel center convention
+ * Basically, add (adjX, adjY) to the fragment position.
+ */
+static void
+emit_adjusted_wpos(struct st_translate *t,
+                   const struct gl_program *program,
+                   float adjX, float adjY)
+{
+   struct ureg_program *ureg = t->ureg;
+   struct ureg_dst wpos_temp = ureg_DECL_temporary(ureg);
+   struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]];
+
+   /* Note that we bias X and Y and pass Z and W through unchanged.
+    * The shader might also use gl_FragCoord.w and .z.
+    */
+   ureg_ADD(ureg, wpos_temp, wpos_input,
+            ureg_imm4f(ureg, adjX, adjY, 0.0f, 0.0f));
+
+   t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp);
+}
+
+
+/**
+ * Emit the TGSI instructions for inverting the WPOS y coordinate.
+ * This code is unavoidable because it also depends on whether
+ * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM).
+ */
+static void
+emit_wpos_inversion(struct st_translate *t,
+                    const struct gl_program *program,
+                    bool invert)
+{
+   struct ureg_program *ureg = t->ureg;
+
+   /* Fragment program uses fragment position input.
+    * Need to replace instances of INPUT[WPOS] with temp T
+    * where T = INPUT[WPOS] by y is inverted.
+    */
+   static const gl_state_index wposTransformState[STATE_LENGTH]
+      = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM, 
+          (gl_state_index)0, (gl_state_index)0, (gl_state_index)0 };
+   
+   /* XXX: note we are modifying the incoming shader here!  Need to
+    * do this before emitting the constant decls below, or this
+    * will be missed:
+    */
+   unsigned wposTransConst = _mesa_add_state_reference(program->Parameters,
+                                                       wposTransformState);
+
+   struct ureg_src wpostrans = ureg_DECL_constant(ureg, wposTransConst);
+   struct ureg_dst wpos_temp;
+   struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]];
+
+   /* MOV wpos_temp, input[wpos]
+    */
+   if (wpos_input.File == TGSI_FILE_TEMPORARY)
+      wpos_temp = ureg_dst(wpos_input);
+   else {
+      wpos_temp = ureg_DECL_temporary(ureg);
+      ureg_MOV(ureg, wpos_temp, wpos_input);
+   }
+
+   if (invert) {
+      /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy
+       */
+      ureg_MAD(ureg,
+               ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y),
+               wpos_input,
+               ureg_scalar(wpostrans, 0),
+               ureg_scalar(wpostrans, 1));
+   } else {
+      /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww
+       */
+      ureg_MAD(ureg,
+               ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y),
+               wpos_input,
+               ureg_scalar(wpostrans, 2),
+               ureg_scalar(wpostrans, 3));
+   }
+
+   /* Use wpos_temp as position input from here on:
+    */
+   t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp);
+}
+
+
+/**
+ * Emit fragment position/ooordinate code.
+ */
+static void
+emit_wpos(struct st_context *st,
+          struct st_translate *t,
+          const struct gl_program *program,
+          struct ureg_program *ureg)
+{
+   const struct gl_fragment_program *fp =
+      (const struct gl_fragment_program *) program;
+   struct pipe_screen *pscreen = st->pipe->screen;
+   boolean invert = FALSE;
+
+   if (fp->OriginUpperLeft) {
+      /* Fragment shader wants origin in upper-left */
+      if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) {
+         /* the driver supports upper-left origin */
+      }
+      else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) {
+         /* the driver supports lower-left origin, need to invert Y */
+         ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
+         invert = TRUE;
+      }
+      else
+         assert(0);
+   }
+   else {
+      /* Fragment shader wants origin in lower-left */
+      if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT))
+         /* the driver supports lower-left origin */
+         ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
+      else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT))
+         /* the driver supports upper-left origin, need to invert Y */
+         invert = TRUE;
+      else
+         assert(0);
+   }
+   
+   if (fp->PixelCenterInteger) {
+      /* Fragment shader wants pixel center integer */
+      if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER))
+         /* the driver supports pixel center integer */
+         ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
+      else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER))
+         /* the driver supports pixel center half integer, need to bias X,Y */
+         emit_adjusted_wpos(t, program, 0.5f, invert ? 0.5f : -0.5f);
+      else
+         assert(0);
+   }
+   else {
+      /* Fragment shader wants pixel center half integer */
+      if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) {
+         /* the driver supports pixel center half integer */
+      }
+      else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) {
+         /* the driver supports pixel center integer, need to bias X,Y */
+         ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
+         emit_adjusted_wpos(t, program, 0.5f, invert ? -0.5f : 0.5f);
+      }
+      else
+         assert(0);
+   }
+
+   /* we invert after adjustment so that we avoid the MOV to temporary,
+    * and reuse the adjustment ADD instead */
+   emit_wpos_inversion(t, program, invert);
+}
+
+/**
+ * OpenGL's fragment gl_FrontFace input is 1 for front-facing, 0 for back.
+ * TGSI uses +1 for front, -1 for back.
+ * This function converts the TGSI value to the GL value.  Simply clamping/
+ * saturating the value to [0,1] does the job.
+ */
+static void
+emit_face_var(struct st_translate *t)
+{
+   struct ureg_program *ureg = t->ureg;
+   struct ureg_dst face_temp = ureg_DECL_temporary(ureg);
+   struct ureg_src face_input = t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]];
+
+   /* MOV_SAT face_temp, input[face] */
+   face_temp = ureg_saturate(face_temp);
+   ureg_MOV(ureg, face_temp, face_input);
+
+   /* Use face_temp as face input from here on: */
+   t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]] = ureg_src(face_temp);
+}
+
+static void
+emit_edgeflags(struct st_translate *t)
+{
+   struct ureg_program *ureg = t->ureg;
+   struct ureg_dst edge_dst = t->outputs[t->outputMapping[VERT_RESULT_EDGE]];
+   struct ureg_src edge_src = t->inputs[t->inputMapping[VERT_ATTRIB_EDGEFLAG]];
+
+   ureg_MOV(ureg, edge_dst, edge_src);
+}
+
+/**
+ * Translate intermediate IR (glsl_to_tgsi_instruction) to TGSI format.
+ * \param program  the program to translate
+ * \param numInputs  number of input registers used
+ * \param inputMapping  maps Mesa fragment program inputs to TGSI generic
+ *                      input indexes
+ * \param inputSemanticName  the TGSI_SEMANTIC flag for each input
+ * \param inputSemanticIndex  the semantic index (ex: which texcoord) for
+ *                            each input
+ * \param interpMode  the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input
+ * \param numOutputs  number of output registers used
+ * \param outputMapping  maps Mesa fragment program outputs to TGSI
+ *                       generic outputs
+ * \param outputSemanticName  the TGSI_SEMANTIC flag for each output
+ * \param outputSemanticIndex  the semantic index (ex: which texcoord) for
+ *                             each output
+ *
+ * \return  PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY
+ */
+extern "C" enum pipe_error
+st_translate_program(
+   struct gl_context *ctx,
+   uint procType,
+   struct ureg_program *ureg,
+   glsl_to_tgsi_visitor *program,
+   const struct gl_program *proginfo,
+   GLuint numInputs,
+   const GLuint inputMapping[],
+   const ubyte inputSemanticName[],
+   const ubyte inputSemanticIndex[],
+   const GLuint interpMode[],
+   GLuint numOutputs,
+   const GLuint outputMapping[],
+   const ubyte outputSemanticName[],
+   const ubyte outputSemanticIndex[],
+   boolean passthrough_edgeflags)
+{
+   struct st_translate translate, *t;
+   unsigned i;
+   enum pipe_error ret = PIPE_OK;
+
+   assert(numInputs <= Elements(t->inputs));
+   assert(numOutputs <= Elements(t->outputs));
+
+   t = &translate;
+   memset(t, 0, sizeof *t);
+
+   t->procType = procType;
+   t->inputMapping = inputMapping;
+   t->outputMapping = outputMapping;
+   t->ureg = ureg;
+   t->pointSizeOutIndex = -1;
+   t->prevInstWrotePointSize = GL_FALSE;
+
+   /*
+    * Declare input attributes.
+    */
+   if (procType == TGSI_PROCESSOR_FRAGMENT) {
+      for (i = 0; i < numInputs; i++) {
+         t->inputs[i] = ureg_DECL_fs_input(ureg,
+                                           inputSemanticName[i],
+                                           inputSemanticIndex[i],
+                                           interpMode[i]);
+      }
+
+      if (proginfo->InputsRead & FRAG_BIT_WPOS) {
+         /* Must do this after setting up t->inputs, and before
+          * emitting constant references, below:
+          */
+          emit_wpos(st_context(ctx), t, proginfo, ureg);
+      }
+
+      if (proginfo->InputsRead & FRAG_BIT_FACE)
+         emit_face_var(t);
+
+      /*
+       * Declare output attributes.
+       */
+      for (i = 0; i < numOutputs; i++) {
+         switch (outputSemanticName[i]) {
+         case TGSI_SEMANTIC_POSITION:
+            t->outputs[i] = ureg_DECL_output(ureg,
+                                             TGSI_SEMANTIC_POSITION, /* Z/Depth */
+                                             outputSemanticIndex[i]);
+            t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Z);
+            break;
+         case TGSI_SEMANTIC_STENCIL:
+            t->outputs[i] = ureg_DECL_output(ureg,
+                                             TGSI_SEMANTIC_STENCIL, /* Stencil */
+                                             outputSemanticIndex[i]);
+            t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Y);
+            break;
+         case TGSI_SEMANTIC_COLOR:
+            t->outputs[i] = ureg_DECL_output(ureg,
+                                             TGSI_SEMANTIC_COLOR,
+                                             outputSemanticIndex[i]);
+            break;
+         default:
+            assert(!"fragment shader outputs must be POSITION/STENCIL/COLOR");
+            return PIPE_ERROR_BAD_INPUT;
+         }
+      }
+   }
+   else if (procType == TGSI_PROCESSOR_GEOMETRY) {
+      for (i = 0; i < numInputs; i++) {
+         t->inputs[i] = ureg_DECL_gs_input(ureg,
+                                           i,
+                                           inputSemanticName[i],
+                                           inputSemanticIndex[i]);
+      }
+
+      for (i = 0; i < numOutputs; i++) {
+         t->outputs[i] = ureg_DECL_output(ureg,
+                                          outputSemanticName[i],
+                                          outputSemanticIndex[i]);
+      }
+   }
+   else {
+      assert(procType == TGSI_PROCESSOR_VERTEX);
+
+      for (i = 0; i < numInputs; i++) {
+         t->inputs[i] = ureg_DECL_vs_input(ureg, i);
+      }
+
+      for (i = 0; i < numOutputs; i++) {
+         t->outputs[i] = ureg_DECL_output(ureg,
+                                          outputSemanticName[i],
+                                          outputSemanticIndex[i]);
+         if ((outputSemanticName[i] == TGSI_SEMANTIC_PSIZE) && proginfo->Id) {
+            /* Writing to the point size result register requires special
+             * handling to implement clamping.
+             */
+            static const gl_state_index pointSizeClampState[STATE_LENGTH]
+               = { STATE_INTERNAL, STATE_POINT_SIZE_IMPL_CLAMP, (gl_state_index)0, (gl_state_index)0, (gl_state_index)0 };
+               /* XXX: note we are modifying the incoming shader here!  Need to
+               * do this before emitting the constant decls below, or this
+               * will be missed.
+               */
+            unsigned pointSizeClampConst =
+               _mesa_add_state_reference(proginfo->Parameters,
+                                         pointSizeClampState);
+            struct ureg_dst psizregtemp = ureg_DECL_temporary(ureg);
+            t->pointSizeConst = ureg_DECL_constant(ureg, pointSizeClampConst);
+            t->pointSizeResult = t->outputs[i];
+            t->pointSizeOutIndex = i;
+            t->outputs[i] = psizregtemp;
+         }
+      }
+      if (passthrough_edgeflags)
+         emit_edgeflags(t);
+   }
+
+   /* Declare address register.
+    */
+   if (program->num_address_regs > 0) {
+      assert(program->num_address_regs == 1);
+      t->address[0] = ureg_DECL_address(ureg);
+   }
+
+   /* Declare misc input registers
+    */
+   {
+      GLbitfield sysInputs = proginfo->SystemValuesRead;
+      unsigned numSys = 0;
+      for (i = 0; sysInputs; i++) {
+         if (sysInputs & (1 << i)) {
+            unsigned semName = mesa_sysval_to_semantic[i];
+            t->systemValues[i] = ureg_DECL_system_value(ureg, numSys, semName, 0);
+            numSys++;
+            sysInputs &= ~(1 << i);
+         }
+      }
+   }
+
+   if (program->indirect_addr_temps) {
+      /* If temps are accessed with indirect addressing, declare temporaries
+       * in sequential order.  Else, we declare them on demand elsewhere.
+       * (Note: the number of temporaries is equal to program->next_temp)
+       */
+      for (i = 0; i < (unsigned)program->next_temp; i++) {
+         /* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */
+         t->temps[i] = ureg_DECL_temporary(t->ureg);
+      }
+   }
+
+   /* Emit constants and uniforms.  TGSI uses a single index space for these, 
+    * so we put all the translated regs in t->constants.
+    */
+   if (proginfo->Parameters) {
+      t->constants = (struct ureg_src *)CALLOC(proginfo->Parameters->NumParameters * sizeof(t->constants[0]));
+      if (t->constants == NULL) {
+         ret = PIPE_ERROR_OUT_OF_MEMORY;
+         goto out;
+      }
+
+      for (i = 0; i < proginfo->Parameters->NumParameters; i++) {
+         switch (proginfo->Parameters->Parameters[i].Type) {
+         case PROGRAM_ENV_PARAM:
+         case PROGRAM_LOCAL_PARAM:
+         case PROGRAM_STATE_VAR:
+         case PROGRAM_NAMED_PARAM:
+         case PROGRAM_UNIFORM:
+            t->constants[i] = ureg_DECL_constant(ureg, i);
+            break;
+
+         /* Emit immediates for PROGRAM_CONSTANT only when there's no indirect
+          * addressing of the const buffer.
+          * FIXME: Be smarter and recognize param arrays:
+          * indirect addressing is only valid within the referenced
+          * array.
+          */
+         case PROGRAM_CONSTANT:
+            if (program->indirect_addr_consts)
+               t->constants[i] = ureg_DECL_constant(ureg, i);
+            else
+               t->constants[i] = emit_immediate(t,
+                                                proginfo->Parameters->ParameterValues[i],
+                                                proginfo->Parameters->Parameters[i].DataType,
+                                                4);
+            break;
+         default:
+            break;
+         }
+      }
+   }
+   
+   /* Emit immediate values.
+    */
+   t->immediates = (struct ureg_src *)CALLOC(program->num_immediates * sizeof(struct ureg_src));
+   if (t->immediates == NULL) {
+      ret = PIPE_ERROR_OUT_OF_MEMORY;
+      goto out;
+   }
+   i = 0;
+   foreach_iter(exec_list_iterator, iter, program->immediates) {
+      immediate_storage *imm = (immediate_storage *)iter.get();
+      t->immediates[i++] = emit_immediate(t, imm->values, imm->type, imm->size);
+   }
+
+   /* texture samplers */
+   for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
+      if (program->samplers_used & (1 << i)) {
+         t->samplers[i] = ureg_DECL_sampler(ureg, i);
+      }
+   }
+
+   /* Emit each instruction in turn:
+    */
+   foreach_iter(exec_list_iterator, iter, program->instructions) {
+      set_insn_start(t, ureg_get_instruction_number(ureg));
+      compile_tgsi_instruction(t, (glsl_to_tgsi_instruction *)iter.get());
+
+      if (t->prevInstWrotePointSize && proginfo->Id) {
+         /* The previous instruction wrote to the (fake) vertex point size
+          * result register.  Now we need to clamp that value to the min/max
+          * point size range, putting the result into the real point size
+          * register.
+          * Note that we can't do this easily at the end of program due to
+          * possible early return.
+          */
+         set_insn_start(t, ureg_get_instruction_number(ureg));
+         ureg_MAX(t->ureg,
+                  ureg_writemask(t->outputs[t->pointSizeOutIndex], WRITEMASK_X),
+                  ureg_src(t->outputs[t->pointSizeOutIndex]),
+                  ureg_swizzle(t->pointSizeConst, 1,1,1,1));
+         ureg_MIN(t->ureg, ureg_writemask(t->pointSizeResult, WRITEMASK_X),
+                  ureg_src(t->outputs[t->pointSizeOutIndex]),
+                  ureg_swizzle(t->pointSizeConst, 2,2,2,2));
+      }
+      t->prevInstWrotePointSize = GL_FALSE;
+   }
+
+   /* Fix up all emitted labels:
+    */
+   for (i = 0; i < t->labels_count; i++) {
+      ureg_fixup_label(ureg, t->labels[i].token,
+                       t->insn[t->labels[i].branch_target]);
+   }
+
+out:
+   FREE(t->insn);
+   FREE(t->labels);
+   FREE(t->constants);
+   FREE(t->immediates);
+
+   if (t->error) {
+      debug_printf("%s: translate error flag set\n", __FUNCTION__);
+   }
+
+   return ret;
+}
+/* ----------------------------- End TGSI code ------------------------------ */
+
+/**
+ * Convert a shader's GLSL IR into a Mesa gl_program, although without 
+ * generating Mesa IR.
+ */
+static struct gl_program *
+get_mesa_program(struct gl_context *ctx,
+                 struct gl_shader_program *shader_program,
+        	 struct gl_shader *shader)
+{
+   glsl_to_tgsi_visitor* v = new glsl_to_tgsi_visitor();
+   struct gl_program *prog;
+   GLenum target;
+   const char *target_string;
+   bool progress;
+   struct gl_shader_compiler_options *options =
+         &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(shader->Type)];
+
+   switch (shader->Type) {
+   case GL_VERTEX_SHADER:
+      target = GL_VERTEX_PROGRAM_ARB;
+      target_string = "vertex";
+      break;
+   case GL_FRAGMENT_SHADER:
+      target = GL_FRAGMENT_PROGRAM_ARB;
+      target_string = "fragment";
+      break;
+   case GL_GEOMETRY_SHADER:
+      target = GL_GEOMETRY_PROGRAM_NV;
+      target_string = "geometry";
+      break;
+   default:
+      assert(!"should not be reached");
+      return NULL;
+   }
+
+   validate_ir_tree(shader->ir);
+
+   prog = ctx->Driver.NewProgram(ctx, target, shader_program->Name);
+   if (!prog)
+      return NULL;
+   prog->Parameters = _mesa_new_parameter_list();
+   prog->Varying = _mesa_new_parameter_list();
+   prog->Attributes = _mesa_new_parameter_list();
+   v->ctx = ctx;
+   v->prog = prog;
+   v->shader_program = shader_program;
+   v->options = options;
+   v->glsl_version = ctx->Const.GLSLVersion;
+   v->native_integers = ctx->Const.NativeIntegers;
+
+   add_uniforms_to_parameters_list(shader_program, shader, prog);
+
+   /* Emit intermediate IR for main(). */
+   visit_exec_list(shader->ir, v);
+
+   /* Now emit bodies for any functions that were used. */
+   do {
+      progress = GL_FALSE;
+
+      foreach_iter(exec_list_iterator, iter, v->function_signatures) {
+         function_entry *entry = (function_entry *)iter.get();
+
+         if (!entry->bgn_inst) {
+            v->current_function = entry;
+
+            entry->bgn_inst = v->emit(NULL, TGSI_OPCODE_BGNSUB);
+            entry->bgn_inst->function = entry;
+
+            visit_exec_list(&entry->sig->body, v);
+
+            glsl_to_tgsi_instruction *last;
+            last = (glsl_to_tgsi_instruction *)v->instructions.get_tail();
+            if (last->op != TGSI_OPCODE_RET)
+               v->emit(NULL, TGSI_OPCODE_RET);
+
+            glsl_to_tgsi_instruction *end;
+            end = v->emit(NULL, TGSI_OPCODE_ENDSUB);
+            end->function = entry;
+
+            progress = GL_TRUE;
+         }
+      }
+   } while (progress);
+
+#if 0
+   /* Print out some information (for debugging purposes) used by the 
+    * optimization passes. */
+   for (i=0; i < v->next_temp; i++) {
+      int fr = v->get_first_temp_read(i);
+      int fw = v->get_first_temp_write(i);
+      int lr = v->get_last_temp_read(i);
+      int lw = v->get_last_temp_write(i);
+      
+      printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, fr, fw, lr, lw);
+      assert(fw <= fr);
+   }
+#endif
+
+   /* Remove reads to output registers, and to varyings in vertex shaders. */
+   v->remove_output_reads(PROGRAM_OUTPUT);
+   if (target == GL_VERTEX_PROGRAM_ARB)
+      v->remove_output_reads(PROGRAM_VARYING);
+   
+   /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */
+   v->simplify_cmp();
+   v->copy_propagate();
+   while (v->eliminate_dead_code_advanced());
+
+   /* FIXME: These passes to optimize temporary registers don't work when there
+    * is indirect addressing of the temporary register space.  We need proper 
+    * array support so that we don't have to give up these passes in every 
+    * shader that uses arrays.
+    */
+   if (!v->indirect_addr_temps) {
+      v->eliminate_dead_code();
+      v->merge_registers();
+      v->renumber_registers();
+   }
+   
+   /* Write the END instruction. */
+   v->emit(NULL, TGSI_OPCODE_END);
+
+   if (ctx->Shader.Flags & GLSL_DUMP) {
+      printf("\n");
+      printf("GLSL IR for linked %s program %d:\n", target_string,
+             shader_program->Name);
+      _mesa_print_ir(shader->ir, NULL);
+      printf("\n");
+      printf("\n");
+   }
+
+   prog->Instructions = NULL;
+   prog->NumInstructions = 0;
+
+   do_set_program_inouts(shader->ir, prog);
+   count_resources(v, prog);
+
+   check_resources(ctx, shader_program, v, prog);
+
+   _mesa_reference_program(ctx, &shader->Program, prog);
+   
+   struct st_vertex_program *stvp;
+   struct st_fragment_program *stfp;
+   struct st_geometry_program *stgp;
+   
+   switch (shader->Type) {
+   case GL_VERTEX_SHADER:
+      stvp = (struct st_vertex_program *)prog;
+      stvp->glsl_to_tgsi = v;
+      break;
+   case GL_FRAGMENT_SHADER:
+      stfp = (struct st_fragment_program *)prog;
+      stfp->glsl_to_tgsi = v;
+      break;
+   case GL_GEOMETRY_SHADER:
+      stgp = (struct st_geometry_program *)prog;
+      stgp->glsl_to_tgsi = v;
+      break;
+   default:
+      assert(!"should not be reached");
+      return NULL;
+   }
+
+   return prog;
+}
+
+extern "C" {
+
+struct gl_shader *
+st_new_shader(struct gl_context *ctx, GLuint name, GLuint type)
+{
+   struct gl_shader *shader;
+   assert(type == GL_FRAGMENT_SHADER || type == GL_VERTEX_SHADER ||
+          type == GL_GEOMETRY_SHADER_ARB);
+   shader = rzalloc(NULL, struct gl_shader);
+   if (shader) {
+      shader->Type = type;
+      shader->Name = name;
+      _mesa_init_shader(ctx, shader);
+   }
+   return shader;
+}
+
+struct gl_shader_program *
+st_new_shader_program(struct gl_context *ctx, GLuint name)
+{
+   struct gl_shader_program *shProg;
+   shProg = rzalloc(NULL, struct gl_shader_program);
+   if (shProg) {
+      shProg->Name = name;
+      _mesa_init_shader_program(ctx, shProg);
+   }
+   return shProg;
+}
+
+/**
+ * Link a shader.
+ * Called via ctx->Driver.LinkShader()
+ * This actually involves converting GLSL IR into an intermediate TGSI-like IR 
+ * with code lowering and other optimizations.
+ */
+GLboolean
+st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
+{
+   assert(prog->LinkStatus);
+
+   for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
+      if (prog->_LinkedShaders[i] == NULL)
+         continue;
+
+      bool progress;
+      exec_list *ir = prog->_LinkedShaders[i]->ir;
+      const struct gl_shader_compiler_options *options =
+            &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(prog->_LinkedShaders[i]->Type)];
+
+      do {
+         progress = false;
+
+         /* Lowering */
+         do_mat_op_to_vec(ir);
+         lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2
+        			 | LOG_TO_LOG2
+        			 | ((options->EmitNoPow) ? POW_TO_EXP2 : 0)));
+
+         progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress;
+
+         progress = do_common_optimization(ir, true, options->MaxUnrollIterations) || progress;
+
+         progress = lower_quadop_vector(ir, false) || progress;
+
+         if (options->EmitNoIfs) {
+            progress = lower_discard(ir) || progress;
+            progress = lower_if_to_cond_assign(ir) || progress;
+         }
+
+         if (options->EmitNoNoise)
+            progress = lower_noise(ir) || progress;
+
+         /* If there are forms of indirect addressing that the driver
+          * cannot handle, perform the lowering pass.
+          */
+         if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput
+             || options->EmitNoIndirectTemp || options->EmitNoIndirectUniform)
+           progress =
+             lower_variable_index_to_cond_assign(ir,
+        					 options->EmitNoIndirectInput,
+        					 options->EmitNoIndirectOutput,
+        					 options->EmitNoIndirectTemp,
+        					 options->EmitNoIndirectUniform)
+             || progress;
+
+         progress = do_vec_index_to_cond_assign(ir) || progress;
+      } while (progress);
+
+      validate_ir_tree(ir);
+   }
+
+   for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
+      struct gl_program *linked_prog;
+
+      if (prog->_LinkedShaders[i] == NULL)
+         continue;
+
+      linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]);
+
+      if (linked_prog) {
+         bool ok = true;
+
+         switch (prog->_LinkedShaders[i]->Type) {
+         case GL_VERTEX_SHADER:
+            _mesa_reference_vertprog(ctx, &prog->VertexProgram,
+                                     (struct gl_vertex_program *)linked_prog);
+            ok = ctx->Driver.ProgramStringNotify(ctx, GL_VERTEX_PROGRAM_ARB,
+                                                 linked_prog);
+            break;
+         case GL_FRAGMENT_SHADER:
+            _mesa_reference_fragprog(ctx, &prog->FragmentProgram,
+                                     (struct gl_fragment_program *)linked_prog);
+            ok = ctx->Driver.ProgramStringNotify(ctx, GL_FRAGMENT_PROGRAM_ARB,
+                                                 linked_prog);
+            break;
+         case GL_GEOMETRY_SHADER:
+            _mesa_reference_geomprog(ctx, &prog->GeometryProgram,
+                                     (struct gl_geometry_program *)linked_prog);
+            ok = ctx->Driver.ProgramStringNotify(ctx, GL_GEOMETRY_PROGRAM_NV,
+                                                 linked_prog);
+            break;
+         }
+         if (!ok) {
+            return GL_FALSE;
+         }
+      }
+
+      _mesa_reference_program(ctx, &linked_prog, NULL);
+   }
+
+   return GL_TRUE;
+}
+
+
+/**
+ * Link a GLSL shader program.  Called via glLinkProgram().
+ */
+void
+st_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
+{
+   unsigned int i;
+
+   _mesa_clear_shader_program_data(ctx, prog);
+
+   prog->LinkStatus = GL_TRUE;
+
+   for (i = 0; i < prog->NumShaders; i++) {
+      if (!prog->Shaders[i]->CompileStatus) {
+         fail_link(prog, "linking with uncompiled shader");
+         prog->LinkStatus = GL_FALSE;
+      }
+   }
+
+   prog->Varying = _mesa_new_parameter_list();
+   _mesa_reference_vertprog(ctx, &prog->VertexProgram, NULL);
+   _mesa_reference_fragprog(ctx, &prog->FragmentProgram, NULL);
+   _mesa_reference_geomprog(ctx, &prog->GeometryProgram, NULL);
+
+   if (prog->LinkStatus) {
+      link_shaders(ctx, prog);
+   }
+
+   if (prog->LinkStatus) {
+      if (!ctx->Driver.LinkShader(ctx, prog)) {
+         prog->LinkStatus = GL_FALSE;
+      }
+   }
+
+   set_uniform_initializers(ctx, prog);
+
+   if (ctx->Shader.Flags & GLSL_DUMP) {
+      if (!prog->LinkStatus) {
+         printf("GLSL shader program %d failed to link\n", prog->Name);
+      }
+
+      if (prog->InfoLog && prog->InfoLog[0] != 0) {
+         printf("GLSL shader program %d info log:\n", prog->Name);
+         printf("%s\n", prog->InfoLog);
+      }
+   }
+}
+
+} /* extern "C" */
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.h b/src/mesa/state_tracker/st_glsl_to_tgsi.h
new file mode 100644
index 0000000..d877471
--- /dev/null
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ * Copyright © 2011 Bryan Cain
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "main/glheader.h"
+#include "tgsi/tgsi_ureg.h"
+
+struct gl_context;
+struct gl_shader;
+struct gl_shader_program;
+struct glsl_to_tgsi_visitor;
+
+enum pipe_error st_translate_program(
+   struct gl_context *ctx,
+   uint procType,
+   struct ureg_program *ureg,
+   struct glsl_to_tgsi_visitor *program,
+   const struct gl_program *proginfo,
+   GLuint numInputs,
+   const GLuint inputMapping[],
+   const ubyte inputSemanticName[],
+   const ubyte inputSemanticIndex[],
+   const GLuint interpMode[],
+   GLuint numOutputs,
+   const GLuint outputMapping[],
+   const ubyte outputSemanticName[],
+   const ubyte outputSemanticIndex[],
+   boolean passthrough_edgeflags);
+
+void free_glsl_to_tgsi_visitor(struct glsl_to_tgsi_visitor *v);
+void get_pixel_transfer_visitor(struct st_fragment_program *fp,
+                                struct glsl_to_tgsi_visitor *original,
+                                int scale_and_bias, int pixel_maps);
+void get_bitmap_visitor(struct st_fragment_program *fp,
+                        struct glsl_to_tgsi_visitor *original,
+                        int samplerIndex);
+
+struct gl_shader *st_new_shader(struct gl_context *ctx, GLuint name, GLuint type);
+
+struct gl_shader_program *
+st_new_shader_program(struct gl_context *ctx, GLuint name);
+
+void st_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog);
+GLboolean st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/mesa/state_tracker/st_manager.c b/src/mesa/state_tracker/st_manager.c
index 7bd82aa..d5228d3 100644
--- a/src/mesa/state_tracker/st_manager.c
+++ b/src/mesa/state_tracker/st_manager.c
@@ -587,7 +587,7 @@
          internalFormat = GL_RGB;
 
       texFormat = st_ChooseTextureFormat(ctx, internalFormat,
-                                         GL_RGBA, GL_UNSIGNED_BYTE);
+                                         GL_BGRA, GL_UNSIGNED_BYTE);
 
       _mesa_init_teximage_fields(ctx, target, texImage,
                                  tex->width0, tex->height0, 1, 0,
diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c
index a41e5b1..656c985 100644
--- a/src/mesa/state_tracker/st_mesa_to_tgsi.c
+++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c
@@ -267,7 +267,7 @@
 /**
  * Map mesa texture target to TGSI texture target.
  */
-static unsigned
+unsigned
 translate_texture_target( GLuint textarget,
                           GLboolean shadow )
 {
@@ -511,7 +511,7 @@
 
 
 
-static unsigned
+unsigned
 translate_opcode( unsigned op )
 {
    switch( op ) {
@@ -1207,7 +1207,7 @@
             else
                t->constants[i] = 
                   ureg_DECL_immediate( ureg,
-                                       program->Parameters->ParameterValues[i],
+                                       (const float*) program->Parameters->ParameterValues[i],
                                        4 );
             break;
          default:
diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.h b/src/mesa/state_tracker/st_mesa_to_tgsi.h
index 0615e52..0dbdf5f 100644
--- a/src/mesa/state_tracker/st_mesa_to_tgsi.h
+++ b/src/mesa/state_tracker/st_mesa_to_tgsi.h
@@ -64,6 +64,12 @@
 void
 st_free_tokens(const struct tgsi_token *tokens);
 
+unsigned
+translate_opcode(unsigned op);
+
+unsigned
+translate_texture_target(GLuint textarget, GLboolean shadow);
+
 
 #if defined __cplusplus
 } /* extern "C" */
diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c
index 132ebdb..a4f47ed 100644
--- a/src/mesa/state_tracker/st_program.c
+++ b/src/mesa/state_tracker/st_program.c
@@ -174,8 +174,8 @@
  * \param tokensOut  destination for TGSI tokens
  * \return  pointer to cached pipe_shader object.
  */
-static void
-st_prepare_vertex_program(struct st_context *st,
+void
+st_prepare_vertex_program(struct gl_context *ctx,
                             struct st_vertex_program *stvp)
 {
    GLuint attr;
@@ -184,9 +184,10 @@
    stvp->num_outputs = 0;
 
    if (stvp->Base.IsPositionInvariant)
-      _mesa_insert_mvp_code(st->ctx, &stvp->Base);
+      _mesa_insert_mvp_code(ctx, &stvp->Base);
 
-   assert(stvp->Base.Base.NumInstructions > 1);
+   if (!stvp->glsl_to_tgsi)
+      assert(stvp->Base.Base.NumInstructions > 1);
 
    /*
     * Determine number of inputs, the mappings between VERT_ATTRIB_x
@@ -292,10 +293,13 @@
    enum pipe_error error;
    unsigned num_outputs;
 
-   st_prepare_vertex_program( st, stvp );
+   st_prepare_vertex_program(st->ctx, stvp);
 
-   _mesa_remove_output_reads(&stvp->Base.Base, PROGRAM_OUTPUT);
-   _mesa_remove_output_reads(&stvp->Base.Base, PROGRAM_VARYING);
+   if (!stvp->glsl_to_tgsi)
+   {
+      _mesa_remove_output_reads(&stvp->Base.Base, PROGRAM_OUTPUT);
+      _mesa_remove_output_reads(&stvp->Base.Base, PROGRAM_VARYING);
+   }
 
    ureg = ureg_create( TGSI_PROCESSOR_VERTEX );
    if (ureg == NULL) {
@@ -318,22 +322,41 @@
       debug_printf("\n");
    }
 
-   error = st_translate_mesa_program(st->ctx,
-                                     TGSI_PROCESSOR_VERTEX,
-                                     ureg,
-                                     &stvp->Base.Base,
-                                     /* inputs */
-                                     vpv->num_inputs,
-                                     stvp->input_to_index,
-                                     NULL, /* input semantic name */
-                                     NULL, /* input semantic index */
-                                     NULL,
-                                     /* outputs */
-                                     num_outputs,
-                                     stvp->result_to_output,
-                                     stvp->output_semantic_name,
-                                     stvp->output_semantic_index,
-                                     key->passthrough_edgeflags );
+   if (stvp->glsl_to_tgsi)
+      error = st_translate_program(st->ctx,
+                                   TGSI_PROCESSOR_VERTEX,
+                                   ureg,
+                                   stvp->glsl_to_tgsi,
+                                   &stvp->Base.Base,
+                                   /* inputs */
+                                   stvp->num_inputs,
+                                   stvp->input_to_index,
+                                   NULL, /* input semantic name */
+                                   NULL, /* input semantic index */
+                                   NULL, /* interp mode */
+                                   /* outputs */
+                                   stvp->num_outputs,
+                                   stvp->result_to_output,
+                                   stvp->output_semantic_name,
+                                   stvp->output_semantic_index,
+                                   key->passthrough_edgeflags );
+   else
+      error = st_translate_mesa_program(st->ctx,
+                                        TGSI_PROCESSOR_VERTEX,
+                                        ureg,
+                                        &stvp->Base.Base,
+                                        /* inputs */
+                                        vpv->num_inputs,
+                                        stvp->input_to_index,
+                                        NULL, /* input semantic name */
+                                        NULL, /* input semantic index */
+                                        NULL,
+                                        /* outputs */
+                                        num_outputs,
+                                        stvp->result_to_output,
+                                        stvp->output_semantic_name,
+                                        stvp->output_semantic_index,
+                                        key->passthrough_edgeflags );
 
    if (error)
       goto fail;
@@ -451,6 +474,7 @@
       GLuint attr;
       const GLbitfield inputsRead = stfp->Base.Base.InputsRead;
       struct ureg_program *ureg;
+
       GLboolean write_all = GL_FALSE;
 
       ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
@@ -460,9 +484,9 @@
       ubyte fs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
       ubyte fs_output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
       uint fs_num_outputs = 0;
-
-
-      _mesa_remove_output_reads(&stfp->Base.Base, PROGRAM_OUTPUT);
+      
+      if (!stfp->glsl_to_tgsi)
+         _mesa_remove_output_reads(&stfp->Base.Base, PROGRAM_OUTPUT);
 
       /*
        * Convert Mesa program inputs to TGSI input register semantics.
@@ -605,21 +629,39 @@
       if (write_all == GL_TRUE)
          ureg_property_fs_color0_writes_all_cbufs(ureg, 1);
 
-      st_translate_mesa_program(st->ctx,
-                                TGSI_PROCESSOR_FRAGMENT,
-                                ureg,
-                                &stfp->Base.Base,
-                                /* inputs */
-                                fs_num_inputs,
-                                inputMapping,
-                                input_semantic_name,
-                                input_semantic_index,
-                                interpMode,
-                                /* outputs */
-                                fs_num_outputs,
-                                outputMapping,
-                                fs_output_semantic_name,
-                                fs_output_semantic_index, FALSE );
+      if (stfp->glsl_to_tgsi)
+         st_translate_program(st->ctx,
+                              TGSI_PROCESSOR_FRAGMENT,
+                              ureg,
+                              stfp->glsl_to_tgsi,
+                              &stfp->Base.Base,
+                              /* inputs */
+                              fs_num_inputs,
+                              inputMapping,
+                              input_semantic_name,
+                              input_semantic_index,
+                              interpMode,
+                              /* outputs */
+                              fs_num_outputs,
+                              outputMapping,
+                              fs_output_semantic_name,
+                              fs_output_semantic_index, FALSE );
+      else
+         st_translate_mesa_program(st->ctx,
+                                   TGSI_PROCESSOR_FRAGMENT,
+                                   ureg,
+                                   &stfp->Base.Base,
+                                   /* inputs */
+                                   fs_num_inputs,
+                                   inputMapping,
+                                   input_semantic_name,
+                                   input_semantic_index,
+                                   interpMode,
+                                   /* outputs */
+                                   fs_num_outputs,
+                                   outputMapping,
+                                   fs_output_semantic_name,
+                                   fs_output_semantic_index, FALSE );
 
       stfp->tgsi.tokens = ureg_get_tokens( ureg, NULL );
       ureg_destroy( ureg );
diff --git a/src/mesa/state_tracker/st_program.h b/src/mesa/state_tracker/st_program.h
index c4244df..699b6e8 100644
--- a/src/mesa/state_tracker/st_program.h
+++ b/src/mesa/state_tracker/st_program.h
@@ -38,6 +38,7 @@
 #include "program/program.h"
 #include "pipe/p_state.h"
 #include "st_context.h"
+#include "st_glsl_to_tgsi.h"
 
 
 /** Fragment program variant key */
@@ -83,6 +84,7 @@
 struct st_fragment_program
 {
    struct gl_fragment_program Base;
+   struct glsl_to_tgsi_visitor* glsl_to_tgsi;
 
    struct pipe_shader_state tgsi;
 
@@ -136,6 +138,7 @@
 struct st_vertex_program
 {
    struct gl_vertex_program Base;  /**< The Mesa vertex program */
+   struct glsl_to_tgsi_visitor* glsl_to_tgsi;
 
    /** maps a Mesa VERT_ATTRIB_x to a packed TGSI input index */
    GLuint input_to_index[VERT_ATTRIB_MAX];
@@ -184,6 +187,7 @@
 struct st_geometry_program
 {
    struct gl_geometry_program Base;  /**< The Mesa geometry program */
+   struct glsl_to_tgsi_visitor* glsl_to_tgsi;
 
    /** map GP input back to VP output */
    GLuint input_map[PIPE_MAX_SHADER_INPUTS];
@@ -276,6 +280,14 @@
                   const struct st_gp_variant_key *key);
 
 
+extern void
+st_prepare_vertex_program(struct gl_context *ctx,
+                          struct st_vertex_program *stvp);
+
+extern GLboolean
+st_prepare_fragment_program(struct gl_context *ctx,
+                            struct st_fragment_program *stfp);
+
 
 extern void
 st_release_vp_variants( struct st_context *st,
diff --git a/src/mesa/state_tracker/st_texture.c b/src/mesa/state_tracker/st_texture.c
index ffe7e25..232c286 100644
--- a/src/mesa/state_tracker/st_texture.c
+++ b/src/mesa/state_tracker/st_texture.c
@@ -221,8 +221,8 @@
 
    DBG("%s \n", __FUNCTION__);
 
-   stImage->transfer = pipe_get_transfer(st->pipe, pt, stImage->level,
-                                         stImage->face + zoffset,
+   stImage->transfer = pipe_get_transfer(st->pipe, pt, stImage->base.Level,
+                                         stImage->base.Face + zoffset,
                                          usage, x, y, w, h);
 
    if (stImage->transfer)
@@ -396,3 +396,23 @@
    }
 }
 
+
+struct pipe_resource *
+st_create_color_map_texture(struct gl_context *ctx)
+{
+   struct st_context *st = st_context(ctx);
+   struct pipe_context *pipe = st->pipe;
+   struct pipe_resource *pt;
+   enum pipe_format format;
+   const uint texSize = 256; /* simple, and usually perfect */
+
+   /* find an RGBA texture format */
+   format = st_choose_format(pipe->screen, GL_RGBA, GL_NONE, GL_NONE,
+                             PIPE_TEXTURE_2D, 0, PIPE_BIND_SAMPLER_VIEW);
+
+   /* create texture for color map/table */
+   pt = st_texture_create(st, PIPE_TEXTURE_2D, format, 0,
+                          texSize, texSize, 1, 1, PIPE_BIND_SAMPLER_VIEW);
+   return pt;
+}
+
diff --git a/src/mesa/state_tracker/st_texture.h b/src/mesa/state_tracker/st_texture.h
index d50c3c9..50b7284 100644
--- a/src/mesa/state_tracker/st_texture.h
+++ b/src/mesa/state_tracker/st_texture.h
@@ -45,11 +45,6 @@
 {
    struct gl_texture_image base;
 
-   /* These aren't stored in gl_texture_image 
-    */
-   GLuint level;
-   GLuint face;
-
    /* If stImage->pt != NULL, image data is stored here.
     * Else if stImage->base.Data != NULL, image is stored there.
     * Else there is no image data.
@@ -232,4 +227,8 @@
                       struct pipe_resource *src, GLuint srcLevel,
                       GLuint face);
 
+
+extern struct pipe_resource *
+st_create_color_map_texture(struct gl_context *ctx);
+
 #endif
diff --git a/src/mesa/swrast/s_aatritemp.h b/src/mesa/swrast/s_aatritemp.h
index 91d4f7a..77b3ae6 100644
--- a/src/mesa/swrast/s_aatritemp.h
+++ b/src/mesa/swrast/s_aatritemp.h
@@ -181,13 +181,20 @@
       const GLfloat *pMax = vMax->attrib[FRAG_ATTRIB_WPOS];
       const GLfloat dxdy = majDx / majDy;
       const GLfloat xAdj = dxdy < 0.0F ? -dxdy : 0.0F;
-      GLfloat x = pMin[0] - (yMin - iyMin) * dxdy;
       GLint iy;
-      for (iy = iyMin; iy < iyMax; iy++, x += dxdy) {
+#ifdef _OPENMP
+#pragma omp parallel for schedule(dynamic) private(iy) firstprivate(span)
+#endif
+      for (iy = iyMin; iy < iyMax; iy++) {
+         GLfloat x = pMin[0] - (yMin - iy) * dxdy;
          GLint ix, startX = (GLint) (x - xAdj);
          GLuint count;
          GLfloat coverage = 0.0F;
 
+#ifdef _OPENMP
+         /* each thread needs to use a different (global) SpanArrays variable */
+         span.array = SWRAST_CONTEXT(ctx)->SpanArrays + omp_get_thread_num();
+#endif
          /* skip over fragments with zero coverage */
          while (startX < MAX_WIDTH) {
             coverage = compute_coveragef(pMin, pMid, pMax, startX, iy);
@@ -228,13 +235,12 @@
             coverage = compute_coveragef(pMin, pMid, pMax, ix, iy);
          }
          
-         if (ix <= startX)
-            continue;
-         
-         span.x = startX;
-         span.y = iy;
-         span.end = (GLuint) ix - (GLuint) startX;
-         _swrast_write_rgba_span(ctx, &span);
+         if (ix > startX) {
+            span.x = startX;
+            span.y = iy;
+            span.end = (GLuint) ix - (GLuint) startX;
+            _swrast_write_rgba_span(ctx, &span);
+         }
       }
    }
    else {
@@ -244,13 +250,20 @@
       const GLfloat *pMax = vMax->attrib[FRAG_ATTRIB_WPOS];
       const GLfloat dxdy = majDx / majDy;
       const GLfloat xAdj = dxdy > 0 ? dxdy : 0.0F;
-      GLfloat x = pMin[0] - (yMin - iyMin) * dxdy;
       GLint iy;
-      for (iy = iyMin; iy < iyMax; iy++, x += dxdy) {
+#ifdef _OPENMP
+#pragma omp parallel for schedule(dynamic) private(iy) firstprivate(span)
+#endif
+      for (iy = iyMin; iy < iyMax; iy++) {
+         GLfloat x = pMin[0] - (yMin - iy) * dxdy;
          GLint ix, left, startX = (GLint) (x + xAdj);
          GLuint count, n;
          GLfloat coverage = 0.0F;
          
+#ifdef _OPENMP
+         /* each thread needs to use a different (global) SpanArrays variable */
+         span.array = SWRAST_CONTEXT(ctx)->SpanArrays + omp_get_thread_num();
+#endif
          /* make sure we're not past the window edge */
          if (startX >= ctx->DrawBuffer->_Xmax) {
             startX = ctx->DrawBuffer->_Xmax - 1;
@@ -296,31 +309,30 @@
          ATTRIB_LOOP_END
 #endif
 
-         if (startX <= ix)
-            continue;
+         if (startX > ix) {
+            n = (GLuint) startX - (GLuint) ix;
 
-         n = (GLuint) startX - (GLuint) ix;
+            left = ix + 1;
 
-         left = ix + 1;
-
-         /* shift all values to the left */
-         /* XXX this is temporary */
-         {
-            SWspanarrays *array = span.array;
-            GLint j;
-            for (j = 0; j < (GLint) n; j++) {
-               array->coverage[j] = array->coverage[j + left];
-               COPY_CHAN4(array->rgba[j], array->rgba[j + left]);
+            /* shift all values to the left */
+            /* XXX this is temporary */
+            {
+               SWspanarrays *array = span.array;
+               GLint j;
+               for (j = 0; j < (GLint) n; j++) {
+                  array->coverage[j] = array->coverage[j + left];
+                  COPY_CHAN4(array->rgba[j], array->rgba[j + left]);
 #ifdef DO_Z
-               array->z[j] = array->z[j + left];
+                  array->z[j] = array->z[j + left];
 #endif
+               }
             }
-         }
 
-         span.x = left;
-         span.y = iy;
-         span.end = n;
-         _swrast_write_rgba_span(ctx, &span);
+            span.x = left;
+            span.y = iy;
+            span.end = n;
+            _swrast_write_rgba_span(ctx, &span);
+         }
       }
    }
 }
diff --git a/src/mesa/swrast/s_context.c b/src/mesa/swrast/s_context.c
index def1531..792b528 100644
--- a/src/mesa/swrast/s_context.c
+++ b/src/mesa/swrast/s_context.c
@@ -417,84 +417,6 @@
    swrast->BlendFunc( ctx, n, mask, src, dst, chanType );
 }
 
-
-/**
- * Make sure we have texture image data for all the textures we may need
- * for subsequent rendering.
- */
-static void
-_swrast_validate_texture_images(struct gl_context *ctx)
-{
-   SWcontext *swrast = SWRAST_CONTEXT(ctx);
-   GLuint u;
-
-   if (!swrast->ValidateTextureImage || !ctx->Texture._EnabledUnits) {
-      /* no textures enabled, or no way to validate images! */
-      return;
-   }
-
-   for (u = 0; u < ctx->Const.MaxTextureImageUnits; u++) {
-      if (ctx->Texture.Unit[u]._ReallyEnabled) {
-         struct gl_texture_object *texObj = ctx->Texture.Unit[u]._Current;
-         ASSERT(texObj);
-         if (texObj) {
-            GLuint numFaces = (texObj->Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
-            GLuint face;
-            for (face = 0; face < numFaces; face++) {
-               GLint lvl;
-               for (lvl = texObj->BaseLevel; lvl <= texObj->_MaxLevel; lvl++) {
-                  struct gl_texture_image *texImg = texObj->Image[face][lvl];
-                  if (texImg && !texImg->Data) {
-                     swrast->ValidateTextureImage(ctx, texObj, face, lvl);
-                     ASSERT(texObj->Image[face][lvl]->Data);
-                  }
-               }
-            }
-         }
-      }
-   }
-}
-
-
-/**
- * Free the texture image data attached to all currently enabled
- * textures.  Meant to be called by device drivers when transitioning
- * from software to hardware rendering.
- */
-void
-_swrast_eject_texture_images(struct gl_context *ctx)
-{
-   GLuint u;
-
-   if (!ctx->Texture._EnabledUnits) {
-      /* no textures enabled */
-      return;
-   }
-
-   for (u = 0; u < ctx->Const.MaxTextureImageUnits; u++) {
-      if (ctx->Texture.Unit[u]._ReallyEnabled) {
-         struct gl_texture_object *texObj = ctx->Texture.Unit[u]._Current;
-         ASSERT(texObj);
-         if (texObj) {
-            GLuint numFaces = (texObj->Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
-            GLuint face;
-            for (face = 0; face < numFaces; face++) {
-               GLint lvl;
-               for (lvl = texObj->BaseLevel; lvl <= texObj->_MaxLevel; lvl++) {
-                  struct gl_texture_image *texImg = texObj->Image[face][lvl];
-                  if (texImg && texImg->Data) {
-                     _mesa_free_texmemory(texImg->Data);
-                     texImg->Data = NULL;
-                  }
-               }
-            }
-         }
-      }
-   }
-}
-
-
-
 static void
 _swrast_sleep( struct gl_context *ctx, GLbitfield new_state )
 {
@@ -640,7 +562,6 @@
 
       if (swrast->NewState & (_NEW_TEXTURE | _NEW_PROGRAM)) {
          _swrast_update_texture_samplers( ctx );
-         _swrast_validate_texture_images(ctx);
       }
 
       if (swrast->NewState & (_NEW_COLOR | _NEW_PROGRAM))
@@ -772,6 +693,11 @@
 {
    GLuint i;
    SWcontext *swrast = (SWcontext *)CALLOC(sizeof(SWcontext));
+#ifdef _OPENMP
+   const GLint maxThreads = omp_get_max_threads();
+#else
+   const GLint maxThreads = 1;
+#endif
 
    if (SWRAST_DEBUG) {
       _mesa_debug(ctx, "_swrast_CreateContext\n");
@@ -806,19 +732,25 @@
    for (i = 0; i < MAX_TEXTURE_IMAGE_UNITS; i++)
       swrast->TextureSample[i] = NULL;
 
-   swrast->SpanArrays = MALLOC_STRUCT(sw_span_arrays);
+   /* SpanArrays is global and shared by all SWspan instances. However, when
+    * using multiple threads, it is necessary to have one SpanArrays instance
+    * per thread.
+    */
+   swrast->SpanArrays = (SWspanarrays *) MALLOC(maxThreads * sizeof(SWspanarrays));
    if (!swrast->SpanArrays) {
       FREE(swrast);
       return GL_FALSE;
    }
-   swrast->SpanArrays->ChanType = CHAN_TYPE;
+   for(i = 0; i < maxThreads; i++) {
+      swrast->SpanArrays[i].ChanType = CHAN_TYPE;
 #if CHAN_TYPE == GL_UNSIGNED_BYTE
-   swrast->SpanArrays->rgba = swrast->SpanArrays->rgba8;
+      swrast->SpanArrays[i].rgba = swrast->SpanArrays[i].rgba8;
 #elif CHAN_TYPE == GL_UNSIGNED_SHORT
-   swrast->SpanArrays->rgba = swrast->SpanArrays->rgba16;
+      swrast->SpanArrays[i].rgba = swrast->SpanArrays[i].rgba16;
 #else
-   swrast->SpanArrays->rgba = swrast->SpanArrays->attribs[FRAG_ATTRIB_COL0];
+      swrast->SpanArrays[i].rgba = swrast->SpanArrays[i].attribs[FRAG_ATTRIB_COL0];
 #endif
+   }
 
    /* init point span buffer */
    swrast->PointSpan.primitive = GL_POINT;
@@ -826,7 +758,10 @@
    swrast->PointSpan.facing = 0;
    swrast->PointSpan.array = swrast->SpanArrays;
 
-   swrast->TexelBuffer = (GLfloat *) MALLOC(ctx->Const.MaxTextureImageUnits *
+   /* TexelBuffer is also global and normally shared by all SWspan instances;
+    * when running with multiple threads, create one per thread.
+    */
+   swrast->TexelBuffer = (GLfloat *) MALLOC(ctx->Const.MaxTextureImageUnits * maxThreads *
                                            MAX_WIDTH * 4 * sizeof(GLfloat));
    if (!swrast->TexelBuffer) {
       FREE(swrast->SpanArrays);
diff --git a/src/mesa/swrast/s_span.c b/src/mesa/swrast/s_span.c
index db102ac..9a91be3 100644
--- a/src/mesa/swrast/s_span.c
+++ b/src/mesa/swrast/s_span.c
@@ -212,10 +212,10 @@
 static INLINE void
 interpolate_int_colors(struct gl_context *ctx, SWspan *span)
 {
+#if CHAN_BITS != 32
    const GLuint n = span->end;
    GLuint i;
 
-#if CHAN_BITS != 32
    ASSERT(!(span->arrayMask & SPAN_RGBA));
 #endif
 
diff --git a/src/mesa/swrast/s_stencil.c b/src/mesa/swrast/s_stencil.c
index 5bec71c..fa5093a 100644
--- a/src/mesa/swrast/s_stencil.c
+++ b/src/mesa/swrast/s_stencil.c
@@ -462,7 +462,8 @@
     * Some fragments passed the stencil test, apply depth test to them
     * and apply Zpass and Zfail stencil ops.
     */
-   if (ctx->Depth.Test == GL_FALSE) {
+   if (ctx->Depth.Test == GL_FALSE ||
+       ctx->DrawBuffer->_DepthBuffer == NULL) {
       /*
        * No depth buffer, just apply zpass stencil function to active pixels.
        */
diff --git a/src/mesa/swrast/s_texcombine.c b/src/mesa/swrast/s_texcombine.c
index 086ed0b..80b9dff 100644
--- a/src/mesa/swrast/s_texcombine.c
+++ b/src/mesa/swrast/s_texcombine.c
@@ -48,7 +48,11 @@
 static INLINE float4_array
 get_texel_array(SWcontext *swrast, GLuint unit)
 {
+#ifdef _OPENMP
+   return (float4_array) (swrast->TexelBuffer + unit * MAX_WIDTH * 4 * omp_get_num_threads() + (MAX_WIDTH * 4 * omp_get_thread_num()));
+#else
    return (float4_array) (swrast->TexelBuffer + unit * MAX_WIDTH * 4);
+#endif
 }
 
 
diff --git a/src/mesa/tnl/t_draw.c b/src/mesa/tnl/t_draw.c
index b1967e6..86af4b7 100644
--- a/src/mesa/tnl/t_draw.c
+++ b/src/mesa/tnl/t_draw.c
@@ -280,10 +280,9 @@
 	 if (!inputs[i]->BufferObj->Pointer) {
 	    bo[*nr_bo] = inputs[i]->BufferObj;
 	    (*nr_bo)++;
-	    ctx->Driver.MapBuffer(ctx, 
-				  GL_ARRAY_BUFFER,
-				  GL_READ_ONLY_ARB,
-				  inputs[i]->BufferObj);
+	    ctx->Driver.MapBufferRange(ctx, 0, inputs[i]->BufferObj->Size,
+				       GL_MAP_READ_BIT,
+				       inputs[i]->BufferObj);
 	    
 	    assert(inputs[i]->BufferObj->Pointer);
 	 }
@@ -348,18 +347,32 @@
    }
 
    if (ib->obj->Name && !ib->obj->Pointer) {
+      unsigned map_size;
+
+      switch (ib->type) {
+      case GL_UNSIGNED_BYTE:
+	 map_size = ib->count * sizeof(GLubyte);
+	 break;
+      case GL_UNSIGNED_SHORT:
+	 map_size = ib->count * sizeof(GLushort);
+	 break;
+      case GL_UNSIGNED_INT:
+	 map_size = ib->count * sizeof(GLuint);
+	 break;
+      default:
+	 assert(0);
+	 map_size = 0;
+      }
+
       bo[*nr_bo] = ib->obj;
       (*nr_bo)++;
-      ctx->Driver.MapBuffer(ctx, 
-			    GL_ELEMENT_ARRAY_BUFFER,
-			    GL_READ_ONLY_ARB,
-			    ib->obj);
-
+      ptr = ctx->Driver.MapBufferRange(ctx, (GLsizeiptr) ib->ptr, map_size,
+				       GL_MAP_READ_BIT, ib->obj);
       assert(ib->obj->Pointer);
+   } else {
+      ptr = ib->ptr;
    }
 
-   ptr = ADD_POINTERS(ib->obj->Pointer, ib->ptr);
-
    if (ib->type == GL_UNSIGNED_INT && VB->Primitive[0].basevertex == 0) {
       VB->Elts = (GLuint *) ptr;
    }
@@ -402,9 +415,7 @@
 {
    GLuint i;
    for (i = 0; i < nr_bo; i++) { 
-      ctx->Driver.UnmapBuffer(ctx, 
-			      0, /* target -- I don't see why this would be needed */
-			      bo[i]);
+      ctx->Driver.UnmapBuffer(ctx, bo[i]);
    }
 }
 
diff --git a/src/mesa/tnl/t_pipeline.c b/src/mesa/tnl/t_pipeline.c
index 18f095f..881d5d5 100644
--- a/src/mesa/tnl/t_pipeline.c
+++ b/src/mesa/tnl/t_pipeline.c
@@ -146,7 +146,17 @@
 	 _tnl_notify_pipeline_output_change( ctx );
    }
 
+#ifndef _OPENMP
+   /* Don't adjust FPU precision mode in case multiple threads are to be used.
+    * This would require that the additional threads also changed the FPU mode
+    * which is quite a mess as this had to be done in all parallelized sections;
+    * otherwise the master thread and all other threads are running in different
+    * modes, producing inconsistent results.
+    * Note that all x64 implementations don't define/use START_FAST_MATH, so
+    * this is "hack" is only used in i386 mode
+    */
    START_FAST_MATH(__tmp);
+#endif
 
    for (i = 0; i < tnl->pipeline.nr_stages ; i++) {
       struct tnl_pipeline_stage *s = &tnl->pipeline.stages[i];
@@ -154,7 +164,9 @@
 	 break;
    }
 
+#ifndef _OPENMP
    END_FAST_MATH(__tmp);
+#endif
 }
 
 
diff --git a/src/mesa/vbo/vbo_exec_api.c b/src/mesa/vbo/vbo_exec_api.c
index 2b8d38e..8474c78 100644
--- a/src/mesa/vbo/vbo_exec_api.c
+++ b/src/mesa/vbo/vbo_exec_api.c
@@ -431,6 +431,24 @@
 #include "vbo_attrib_tmp.h"
 
 
+/**
+ * Flush (draw) vertices.
+ * \param  unmap - leave VBO unmapped after flushing?
+ */
+static void
+vbo_exec_FlushVertices_internal(struct vbo_exec_context *exec, GLboolean unmap)
+{
+   if (exec->vtx.vert_count || unmap) {
+      vbo_exec_vtx_flush( exec, unmap );
+   }
+
+   if (exec->vtx.vertex_size) {
+      vbo_exec_copy_to_current( exec );
+      reset_attrfv( exec );
+   }
+}
+
+
 #if FEATURE_beginend
 
 
@@ -535,24 +553,6 @@
 
 
 /**
- * Flush (draw) vertices.
- * \param  unmap - leave VBO unmapped after flushing?
- */
-static void
-vbo_exec_FlushVertices_internal(struct vbo_exec_context *exec, GLboolean unmap)
-{
-   if (exec->vtx.vert_count || unmap) {
-      vbo_exec_vtx_flush( exec, unmap );
-   }
-
-   if (exec->vtx.vertex_size) {
-      vbo_exec_copy_to_current( exec );
-      reset_attrfv( exec );
-   }
-}
-
-
-/**
  * Called via glBegin.
  */
 static void GLAPIENTRY vbo_exec_Begin( GLenum mode )
@@ -947,7 +947,7 @@
    /* Free the vertex buffer.  Unmap first if needed.
     */
    if (_mesa_bufferobj_mapped(exec->vtx.bufferobj)) {
-      ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, exec->vtx.bufferobj);
+      ctx->Driver.UnmapBuffer(ctx, exec->vtx.bufferobj);
    }
    _mesa_reference_buffer_object(ctx, &exec->vtx.bufferobj, NULL);
 }
diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c
index b908d5a..18719d5 100644
--- a/src/mesa/vbo/vbo_exec_array.c
+++ b/src/mesa/vbo/vbo_exec_array.c
@@ -95,10 +95,25 @@
    GLuint i;
 
    if (_mesa_is_bufferobj(ib->obj)) {
-      const GLvoid *map =
-         ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB,
-                               GL_READ_ONLY, ib->obj);
-      indices = ADD_POINTERS(map, ib->ptr);
+      unsigned map_size;
+
+      switch (ib->type) {
+      case GL_UNSIGNED_INT:
+	 map_size = count * sizeof(GLuint);
+	 break;
+      case GL_UNSIGNED_SHORT:
+	 map_size = count * sizeof(GLushort);
+	 break;
+      case GL_UNSIGNED_BYTE:
+	 map_size = count * sizeof(GLubyte);
+	 break;
+      default:
+	 assert(0);
+	 map_size = 0;
+      }
+
+      indices = ctx->Driver.MapBufferRange(ctx, (GLsizeiptr) ib->ptr, map_size,
+					   GL_MAP_READ_BIT, ib->obj);
    } else {
       indices = ib->ptr;
    }
@@ -176,7 +191,7 @@
    }
 
    if (_mesa_is_bufferobj(ib->obj)) {
-      ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, ib->obj);
+      ctx->Driver.UnmapBuffer(ctx, ib->obj);
    }
 }
 
@@ -196,8 +211,8 @@
          if (!array->BufferObj->Pointer) {
             /* need to map now */
             array->BufferObj->Pointer =
-               ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER_ARB,
-                                     GL_READ_ONLY, array->BufferObj);
+               ctx->Driver.MapBufferRange(ctx, 0, array->BufferObj->Size,
+					  GL_MAP_READ_BIT, array->BufferObj);
          }
          data = ADD_POINTERS(data, array->BufferObj->Pointer);
       }
@@ -238,7 +253,7 @@
    if (array->Enabled &&
        _mesa_is_bufferobj(array->BufferObj) &&
        _mesa_bufferobj_mapped(array->BufferObj)) {
-      ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER_ARB, array->BufferObj);
+      ctx->Driver.UnmapBuffer(ctx, array->BufferObj);
    }
 }
 
@@ -256,10 +271,10 @@
    GLint i, k;
 
    if (_mesa_is_bufferobj(ctx->Array.ElementArrayBufferObj)) {
-      elemMap = ctx->Driver.MapBuffer(ctx,
-                                      GL_ELEMENT_ARRAY_BUFFER_ARB,
-                                      GL_READ_ONLY,
-                                      ctx->Array.ElementArrayBufferObj);
+      elemMap = ctx->Driver.MapBufferRange(ctx, 0,
+					   ctx->Array.ElementArrayBufferObj->Size,
+					   GL_MAP_READ_BIT,
+					   ctx->Array.ElementArrayBufferObj);
       elements = ADD_POINTERS(elements, elemMap);
    }
 
@@ -296,8 +311,7 @@
    }
 
    if (_mesa_is_bufferobj(ctx->Array.ElementArrayBufferObj)) {
-      ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB,
-			      ctx->Array.ElementArrayBufferObj);
+      ctx->Driver.UnmapBuffer(ctx, ctx->Array.ElementArrayBufferObj);
    }
 
    unmap_array_buffer(ctx, &arrayObj->Vertex);
@@ -351,8 +365,8 @@
 	     bufName);
 
       if (bufName) {
-         GLubyte *p = ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER_ARB,
-                                            GL_READ_ONLY_ARB, bufObj);
+         GLubyte *p = ctx->Driver.MapBufferRange(ctx, 0, bufObj->Size,
+						 GL_MAP_READ_BIT, bufObj);
          int offset = (int) (GLintptr) exec->array.inputs[i]->Ptr;
          float *f = (float *) (p + offset);
          int *k = (int *) f;
@@ -364,7 +378,7 @@
          for (i = 0; i < n; i++) {
             printf("    float[%d] = 0x%08x %f\n", i, k[i], f[i]);
          }
-         ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER_ARB, bufObj);
+         ctx->Driver.UnmapBuffer(ctx, bufObj);
       }
    }
 }
@@ -715,10 +729,11 @@
 static void
 dump_element_buffer(struct gl_context *ctx, GLenum type)
 {
-   const GLvoid *map = ctx->Driver.MapBuffer(ctx,
-                                             GL_ELEMENT_ARRAY_BUFFER_ARB,
-                                             GL_READ_ONLY,
-                                             ctx->Array.ElementArrayBufferObj);
+   const GLvoid *map =
+      ctx->Driver.MapBufferRange(ctx, 0,
+				 ctx->Array.ElementArrayBufferObj->Size,
+				 GL_MAP_READ_BIT,
+				 ctx->Array.ElementArrayBufferObj);
    switch (type) {
    case GL_UNSIGNED_BYTE:
       {
@@ -760,8 +775,7 @@
       ;
    }
 
-   ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB,
-                           ctx->Array.ElementArrayBufferObj);
+   ctx->Driver.UnmapBuffer(ctx, ctx->Array.ElementArrayBufferObj);
 }
 
 
@@ -909,11 +923,10 @@
       if (0)
          _mesa_print_arrays(ctx);
 
-#ifdef DEBUG
       /* 'end' was out of bounds, but now let's check the actual array
        * indexes to see if any of them are out of bounds.
        */
-      {
+      if (0) {
          GLuint max = _mesa_max_buffer_index(ctx, count, type, indices,
                                              ctx->Array.ElementArrayBufferObj);
          if (max >= ctx->Array.ArrayObj->_MaxElement) {
@@ -934,7 +947,6 @@
           * upper bound wrong.
           */
       }
-#endif
 
       /* Set 'end' to the max possible legal value */
       assert(ctx->Array.ArrayObj->_MaxElement >= 1);
diff --git a/src/mesa/vbo/vbo_exec_draw.c b/src/mesa/vbo/vbo_exec_draw.c
index 7e8d860..8ffaaaa 100644
--- a/src/mesa/vbo/vbo_exec_draw.c
+++ b/src/mesa/vbo/vbo_exec_draw.c
@@ -260,8 +260,6 @@
 static void
 vbo_exec_vtx_unmap( struct vbo_exec_context *exec )
 {
-   GLenum target = GL_ARRAY_BUFFER_ARB;
-
    if (_mesa_is_bufferobj(exec->vtx.bufferobj)) {
       struct gl_context *ctx = exec->ctx;
       
@@ -270,8 +268,7 @@
          GLsizeiptr length = (exec->vtx.buffer_ptr - exec->vtx.buffer_map) * sizeof(float);
 
          if (length)
-            ctx->Driver.FlushMappedBufferRange(ctx, target,
-                                               offset, length,
+            ctx->Driver.FlushMappedBufferRange(ctx, offset, length,
                                                exec->vtx.bufferobj);
       }
 
@@ -281,7 +278,7 @@
       assert(exec->vtx.buffer_used <= VBO_VERT_BUFFER_SIZE);
       assert(exec->vtx.buffer_ptr != NULL);
       
-      ctx->Driver.UnmapBuffer(ctx, target, exec->vtx.bufferobj);
+      ctx->Driver.UnmapBuffer(ctx, exec->vtx.bufferobj);
       exec->vtx.buffer_map = NULL;
       exec->vtx.buffer_ptr = NULL;
       exec->vtx.max_vert = 0;
@@ -296,8 +293,6 @@
 vbo_exec_vtx_map( struct vbo_exec_context *exec )
 {
    struct gl_context *ctx = exec->ctx;
-   const GLenum target = GL_ARRAY_BUFFER_ARB;
-   const GLenum access = GL_READ_WRITE_ARB; /* for MapBuffer */
    const GLenum accessRange = GL_MAP_WRITE_BIT |  /* for MapBufferRange */
                               GL_MAP_INVALIDATE_RANGE_BIT |
                               GL_MAP_UNSYNCHRONIZED_BIT |
@@ -311,12 +306,10 @@
    assert(!exec->vtx.buffer_map);
    assert(!exec->vtx.buffer_ptr);
 
-   if (VBO_VERT_BUFFER_SIZE > exec->vtx.buffer_used + 1024 &&
-       ctx->Driver.MapBufferRange) {
+   if (VBO_VERT_BUFFER_SIZE > exec->vtx.buffer_used + 1024) {
       /* The VBO exists and there's room for more */
       exec->vtx.buffer_map = 
          (GLfloat *)ctx->Driver.MapBufferRange(ctx, 
-                                               target, 
                                                exec->vtx.buffer_used,
                                                (VBO_VERT_BUFFER_SIZE - 
                                                 exec->vtx.buffer_used),
@@ -329,20 +322,16 @@
       /* Need to allocate a new VBO */
       exec->vtx.buffer_used = 0;
 
-      ctx->Driver.BufferData(ctx, target, 
+      ctx->Driver.BufferData(ctx, GL_ARRAY_BUFFER_ARB,
                              VBO_VERT_BUFFER_SIZE, 
                              NULL, usage, exec->vtx.bufferobj);
 
 
-      if (ctx->Driver.MapBufferRange)
-         exec->vtx.buffer_map = 
-            (GLfloat *)ctx->Driver.MapBufferRange(ctx, target,
-                                                  0, VBO_VERT_BUFFER_SIZE,
-                                                  accessRange,
-                                                  exec->vtx.bufferobj);
-      if (!exec->vtx.buffer_map)
-         exec->vtx.buffer_map =
-            (GLfloat *)ctx->Driver.MapBuffer(ctx, target, access, exec->vtx.bufferobj);
+      exec->vtx.buffer_map =
+	 (GLfloat *)ctx->Driver.MapBufferRange(ctx,
+					       0, VBO_VERT_BUFFER_SIZE,
+					       accessRange,
+					       exec->vtx.bufferobj);
       assert(exec->vtx.buffer_map);
       exec->vtx.buffer_ptr = exec->vtx.buffer_map;
    }
diff --git a/src/mesa/vbo/vbo_rebase.c b/src/mesa/vbo/vbo_rebase.c
index 1de290f..a1eab75 100644
--- a/src/mesa/vbo/vbo_rebase.c
+++ b/src/mesa/vbo/vbo_rebase.c
@@ -159,10 +159,8 @@
       void *ptr;
 
       if (map_ib) 
-	 ctx->Driver.MapBuffer(ctx, 
-			       GL_ELEMENT_ARRAY_BUFFER,
-			       GL_READ_ONLY_ARB,
-			       ib->obj);
+	 ctx->Driver.MapBufferRange(ctx, 0, ib->obj->Size, GL_MAP_READ_BIT,
+				    ib->obj);
 
 
       ptr = ADD_POINTERS(ib->obj->Pointer, ib->ptr);
@@ -183,9 +181,7 @@
       }      
 
       if (map_ib) 
-	 ctx->Driver.UnmapBuffer(ctx, 
-				 GL_ELEMENT_ARRAY_BUFFER,
-				 ib->obj);
+	 ctx->Driver.UnmapBuffer(ctx, ib->obj);
 
       tmp_ib.obj = ctx->Shared->NullBufferObj;
       tmp_ib.ptr = tmp_indices;
diff --git a/src/mesa/vbo/vbo_save_api.c b/src/mesa/vbo/vbo_save_api.c
index 9041f79..ad36e93 100644
--- a/src/mesa/vbo/vbo_save_api.c
+++ b/src/mesa/vbo/vbo_save_api.c
@@ -232,11 +232,10 @@
    assert(vertex_store->bufferobj);
    assert(!vertex_store->buffer);
    vertex_store->buffer =
-      (GLfloat *) ctx->Driver.MapBuffer(ctx,
-                                        GL_ARRAY_BUFFER_ARB,   /* not used */
-                                        GL_WRITE_ONLY,      /* not used */
-                                        vertex_store->
-                                        bufferobj);
+      (GLfloat *) ctx->Driver.MapBufferRange(ctx, 0,
+					     vertex_store->bufferobj->Size,
+					     GL_MAP_WRITE_BIT,    /* not used */
+					     vertex_store->bufferobj);
 
    assert(vertex_store->buffer);
    return vertex_store->buffer + vertex_store->used;
@@ -247,7 +246,7 @@
 unmap_vertex_store(struct gl_context *ctx,
                    struct vbo_save_vertex_store *vertex_store)
 {
-   ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER_ARB, vertex_store->bufferobj);
+   ctx->Driver.UnmapBuffer(ctx, vertex_store->bufferobj);
    vertex_store->buffer = NULL;
 }
 
diff --git a/src/mesa/vbo/vbo_save_draw.c b/src/mesa/vbo/vbo_save_draw.c
index a37af73..6cda831 100644
--- a/src/mesa/vbo/vbo_save_draw.c
+++ b/src/mesa/vbo/vbo_save_draw.c
@@ -70,7 +70,7 @@
       else
          offset = node->buffer_offset;
 
-      ctx->Driver.GetBufferSubData( ctx, 0, offset, 
+      ctx->Driver.GetBufferSubData( ctx, offset,
                                     node->vertex_size * sizeof(GLfloat), 
                                     data, node->vertex_store->bufferobj );
 
@@ -217,10 +217,11 @@
 vbo_save_loopback_vertex_list(struct gl_context *ctx,
                               const struct vbo_save_vertex_list *list)
 {
-   const char *buffer = ctx->Driver.MapBuffer(ctx, 
-					      GL_ARRAY_BUFFER_ARB, 
-					      GL_READ_ONLY, /* ? */
-                                              list->vertex_store->bufferobj);
+   const char *buffer =
+      ctx->Driver.MapBufferRange(ctx, 0,
+				 list->vertex_store->bufferobj->Size,
+				 GL_MAP_READ_BIT, /* ? */
+				 list->vertex_store->bufferobj);
 
    vbo_loopback_vertex_list(ctx,
                             (const GLfloat *)(buffer + list->buffer_offset),
@@ -230,8 +231,7 @@
                             list->wrap_count,
                             list->vertex_size);
 
-   ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER_ARB, 
-			   list->vertex_store->bufferobj);
+   ctx->Driver.UnmapBuffer(ctx, list->vertex_store->bufferobj);
 }
 
 
diff --git a/src/mesa/vbo/vbo_split_copy.c b/src/mesa/vbo/vbo_split_copy.c
index ecca117..40906e3 100644
--- a/src/mesa/vbo/vbo_split_copy.c
+++ b/src/mesa/vbo/vbo_split_copy.c
@@ -444,7 +444,7 @@
 	 copy->vertex_size += attr_size(copy->array[i]);
       
 	 if (_mesa_is_bufferobj(vbo) && !_mesa_bufferobj_mapped(vbo)) 
-	    ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY, vbo);
+	    ctx->Driver.MapBufferRange(ctx, 0, vbo->Size, GL_MAP_READ_BIT, vbo);
 
 	 copy->varying[j].src_ptr = ADD_POINTERS(vbo->Pointer,
 						 copy->array[i]->Ptr);
@@ -459,8 +459,8 @@
     */
    if (_mesa_is_bufferobj(copy->ib->obj) &&
        !_mesa_bufferobj_mapped(copy->ib->obj)) 
-      ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY,
-			    copy->ib->obj);
+      ctx->Driver.MapBufferRange(ctx, 0, copy->ib->obj->Size, GL_MAP_READ_BIT,
+				 copy->ib->obj);
 
    srcptr = (const GLubyte *) ADD_POINTERS(copy->ib->obj->Pointer,
                                            copy->ib->ptr);
@@ -564,14 +564,14 @@
    for (i = 0; i < copy->nr_varying; i++) {
       struct gl_buffer_object *vbo = copy->varying[i].array->BufferObj;
       if (_mesa_is_bufferobj(vbo) && _mesa_bufferobj_mapped(vbo)) 
-	 ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, vbo);
+	 ctx->Driver.UnmapBuffer(ctx, vbo);
    }
 
    /* Unmap index buffer:
     */
    if (_mesa_is_bufferobj(copy->ib->obj) &&
        _mesa_bufferobj_mapped(copy->ib->obj)) {
-      ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, copy->ib->obj);
+      ctx->Driver.UnmapBuffer(ctx, copy->ib->obj);
    }
 }
 
diff --git a/src/mesa/x86-64/xform4.S b/src/mesa/x86-64/xform4.S
index 6141e43..5abd5a2 100644
--- a/src/mesa/x86-64/xform4.S
+++ b/src/mesa/x86-64/xform4.S
@@ -118,7 +118,7 @@
 .byte  0x00, 0x00, 0x00, 0x00
 .byte  0x00, 0x00, 0x00, 0x00
 .byte  0x00, 0x00, 0x00, 0x00
-.float 0f+1.0
+.float 1.0
 
 .text
 .align 16