st/mesa: do vertex and fragment color clamping in shaders

For ARB_color_buffer_float. Most hardware can't do it and st/mesa is
the perfect place for a fallback.
The exceptions are:
- r500 (vertex clamp only)
- nv50 (both)
- nvc0 (both)
- softpipe (both)

We also have to take into account that r300 can do CLAMPED vertex colors only,
while r600 can do UNCLAMPED vertex colors only. The difference can be expressed
with the two new CAPs.
diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c
index 6fc854f..aef0ed6 100644
--- a/src/gallium/drivers/i915/i915_screen.c
+++ b/src/gallium/drivers/i915/i915_screen.c
@@ -181,6 +181,7 @@
    case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
    case PIPE_CAP_BLEND_EQUATION_SEPARATE:
    case PIPE_CAP_TGSI_INSTANCEID:
+   case PIPE_CAP_VERTEX_COLOR_CLAMPED:
       return 1;
 
    /* Unsupported features (boolean caps). */
@@ -196,12 +197,13 @@
    case PIPE_CAP_SEAMLESS_CUBE_MAP:
    case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
    case PIPE_CAP_SCALED_RESOLVE:
-   case PIPE_CAP_FRAGMENT_COLOR_CLAMP_CONTROL:
+   case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
    case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
    case PIPE_CAP_CONDITIONAL_RENDER:
    case PIPE_CAP_TEXTURE_BARRIER:
    case PIPE_CAP_TGSI_CAN_COMPACT_VARYINGS:
    case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
+   case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
       return 0;
 
    /* Features we can lie about (boolean caps). */
diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c
index 7ee90f1..536e064 100644
--- a/src/gallium/drivers/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nv50/nv50_screen.c
@@ -112,7 +112,9 @@
       return 1;
    case PIPE_CAP_MAX_RENDER_TARGETS:
       return 8;
-   case PIPE_CAP_FRAGMENT_COLOR_CLAMP_CONTROL:
+   case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
+   case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
+   case PIPE_CAP_VERTEX_COLOR_CLAMPED:
       return 1;
    case PIPE_CAP_TIMER_QUERY:
    case PIPE_CAP_OCCLUSION_QUERY:
diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c
index f9ad39d..51166469 100644
--- a/src/gallium/drivers/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nvc0/nvc0_screen.c
@@ -99,7 +99,9 @@
       return 1;
    case PIPE_CAP_MAX_RENDER_TARGETS:
       return 8;
-   case PIPE_CAP_FRAGMENT_COLOR_CLAMP_CONTROL:
+   case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
+   case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
+   case PIPE_CAP_VERTEX_COLOR_CLAMPED:
       return 1;
    case PIPE_CAP_TIMER_QUERY:
    case PIPE_CAP_OCCLUSION_QUERY:
diff --git a/src/gallium/drivers/nvfx/nvfx_screen.c b/src/gallium/drivers/nvfx/nvfx_screen.c
index ead777d..2a3289f 100644
--- a/src/gallium/drivers/nvfx/nvfx_screen.c
+++ b/src/gallium/drivers/nvfx/nvfx_screen.c
@@ -72,6 +72,7 @@
 	case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
 	case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
 	case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
+	case PIPE_CAP_VERTEX_COLOR_CLAMPED:
 		return 1;
 	case PIPE_CAP_DEPTH_CLIP_DISABLE:
 		return 0; // TODO: implement depth clamp
@@ -80,7 +81,8 @@
 	case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
 	case PIPE_CAP_TGSI_INSTANCEID:
 	case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
-	case PIPE_CAP_FRAGMENT_COLOR_CLAMP_CONTROL:
+	case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
+	case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
 	case PIPE_CAP_SEAMLESS_CUBE_MAP:
 	case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
 	case PIPE_CAP_SHADER_STENCIL_EXPORT:
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index cd3c88d..48a6569 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -102,6 +102,7 @@
         case PIPE_CAP_TEXTURE_BARRIER:
         case PIPE_CAP_TGSI_CAN_COMPACT_VARYINGS:
         case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
+        case PIPE_CAP_VERTEX_COLOR_CLAMPED:
             return 1;
 
         /* r300 cannot do swizzling of compressed textures. Supported otherwise. */
@@ -109,7 +110,7 @@
             return util_format_s3tc_enabled ? r300screen->caps.dxtc_swizzle : 1;
 
         /* Supported on r500 only. */
-        case PIPE_CAP_FRAGMENT_COLOR_CLAMP_CONTROL:
+        case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
         case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
         case PIPE_CAP_SM3:
             return is_r500 ? 1 : 0;
@@ -135,6 +136,7 @@
         case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
         case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
         case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
+        case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
             return 0;
 
         /* SWTCL-only features. */
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index 2bc7036..5d4a895 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -1036,7 +1036,8 @@
     float point_texcoord_bottom = 0;/* R300_GA_POINT_T0: 0x4204 */
     float point_texcoord_right = 1; /* R300_GA_POINT_S1: 0x4208 */
     float point_texcoord_top = 0;   /* R300_GA_POINT_T1: 0x420c */
-    boolean vclamp = state->clamp_vertex_color;
+    boolean vclamp = state->clamp_vertex_color ||
+                     !r300_context(pipe)->screen->caps.is_r500;
     CB_LOCALS;
 
     /* Copy rasterizer state. */
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index 3d85160..bd9267f 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -361,11 +361,11 @@
 	case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
 	case PIPE_CAP_SM3:
 	case PIPE_CAP_SEAMLESS_CUBE_MAP:
-	case PIPE_CAP_FRAGMENT_COLOR_CLAMP_CONTROL:
 	case PIPE_CAP_PRIMITIVE_RESTART:
 	case PIPE_CAP_CONDITIONAL_RENDER:
 	case PIPE_CAP_TEXTURE_BARRIER:
 	case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
+	case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
 		return 1;
 
 	/* Supported except the original R600. */
@@ -385,6 +385,8 @@
 	case PIPE_CAP_SCALED_RESOLVE:
 	case PIPE_CAP_TGSI_CAN_COMPACT_VARYINGS:
 	case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
+	case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
+	case PIPE_CAP_VERTEX_COLOR_CLAMPED:
 		return 0;
 
 	/* Stream output. */
diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
index 34592a9..37bbc43 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -128,7 +128,9 @@
       return 7;
    case PIPE_CAP_CONDITIONAL_RENDER:
       return 1;
-   case PIPE_CAP_FRAGMENT_COLOR_CLAMP_CONTROL:
+   case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
+   case PIPE_CAP_VERTEX_COLOR_UNCLAMPED: /* draw module */
+   case PIPE_CAP_VERTEX_COLOR_CLAMPED: /* draw module */
       return 1;
    default:
       return 0;
diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h
index 4425fc9..2cd11f9 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -472,7 +472,7 @@
    PIPE_CAP_SHADER_STENCIL_EXPORT = 42,
    PIPE_CAP_TGSI_INSTANCEID = 43,
    PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR = 44,
-   PIPE_CAP_FRAGMENT_COLOR_CLAMP_CONTROL = 45,
+   PIPE_CAP_FRAGMENT_COLOR_CLAMPED = 45,
    PIPE_CAP_MIXED_COLORBUFFER_FORMATS = 46,
    PIPE_CAP_SEAMLESS_CUBE_MAP = 47,
    PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE = 48,
@@ -485,7 +485,9 @@
    PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS = 56,
    PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME = 57,
    PIPE_CAP_TGSI_CAN_COMPACT_VARYINGS = 58, /* temporary */
-   PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS = 59 /* temporary */
+   PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS = 59, /* temporary */
+   PIPE_CAP_VERTEX_COLOR_UNCLAMPED = 60,
+   PIPE_CAP_VERTEX_COLOR_CLAMPED = 61
 };
 
 /**
diff --git a/src/mesa/state_tracker/st_atom_rasterizer.c b/src/mesa/state_tracker/st_atom_rasterizer.c
index f3d28e6..25799bf 100644
--- a/src/mesa/state_tracker/st_atom_rasterizer.c
+++ b/src/mesa/state_tracker/st_atom_rasterizer.c
@@ -112,7 +112,8 @@
       raster->light_twoside = 1;
    }
 
-   raster->clamp_vertex_color = ctx->Light._ClampVertexColor;
+   raster->clamp_vertex_color = !st->clamp_vert_color_in_shader &&
+                                ctx->Light._ClampVertexColor;
 
    /* _NEW_POLYGON
     */
@@ -255,7 +256,8 @@
       raster->scissor = 1;
 
    /* _NEW_FRAG_CLAMP */
-   raster->clamp_fragment_color = ctx->Color._ClampFragmentColor;
+   raster->clamp_fragment_color = !st->clamp_frag_color_in_shader &&
+                                  ctx->Color._ClampFragmentColor;
    raster->gl_rasterization_rules = 1;
 
    /* _NEW_RASTERIZER_DISCARD */
diff --git a/src/mesa/state_tracker/st_atom_shader.c b/src/mesa/state_tracker/st_atom_shader.c
index c311d04..ae34910 100644
--- a/src/mesa/state_tracker/st_atom_shader.c
+++ b/src/mesa/state_tracker/st_atom_shader.c
@@ -83,6 +83,10 @@
    memset(&key, 0, sizeof(key));
    key.st = st;
 
+   /* _NEW_FRAG_CLAMP */
+   key.clamp_color = st->clamp_frag_color_in_shader &&
+                     st->ctx->Color._ClampFragmentColor;
+
    st->fp_variant = st_get_fp_variant(st, stfp, &key);
 
    st_reference_fragprog(st, &st->fp, stfp);
@@ -141,6 +145,9 @@
                                 st->ctx->Polygon.FrontMode != GL_FILL ||
                                 st->ctx->Polygon.BackMode != GL_FILL));
 
+   key.clamp_color = st->clamp_vert_color_in_shader &&
+                     st->ctx->Light._ClampVertexColor;
+
    st->vp_variant = st_get_vp_variant(st, stvp, &key);
 
    st_reference_vertprog(st, &st->vp, stvp);
diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c
index a970968..7c0254d 100644
--- a/src/mesa/state_tracker/st_cb_bitmap.c
+++ b/src/mesa/state_tracker/st_cb_bitmap.c
@@ -450,6 +450,8 @@
    memset(&key, 0, sizeof(key));
    key.st = st;
    key.bitmap = GL_TRUE;
+   key.clamp_color = st->clamp_frag_color_in_shader &&
+                     st->ctx->Color._ClampFragmentColor;
 
    fpv = st_get_fp_variant(st, st->fp, &key);
 
diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c
index 13c4f33..386eed2 100644
--- a/src/mesa/state_tracker/st_cb_drawpixels.c
+++ b/src/mesa/state_tracker/st_cb_drawpixels.c
@@ -684,7 +684,8 @@
    {
       struct pipe_rasterizer_state rasterizer;
       memset(&rasterizer, 0, sizeof(rasterizer));
-      rasterizer.clamp_fragment_color = ctx->Color._ClampFragmentColor;
+      rasterizer.clamp_fragment_color = !st->clamp_frag_color_in_shader &&
+                                        ctx->Color._ClampFragmentColor;
       rasterizer.gl_rasterization_rules = 1;
       rasterizer.depth_clip = !ctx->Transform.DepthClamp;
       rasterizer.scissor = ctx->Scissor.Enabled;
@@ -1007,6 +1008,8 @@
                        ctx->Pixel.AlphaBias != 0.0 ||
                        ctx->Pixel.AlphaScale != 1.0);
    key.pixelMaps = ctx->Pixel.MapColorFlag;
+   key.clamp_color = st->clamp_frag_color_in_shader &&
+                     st->ctx->Color._ClampFragmentColor;
 
    fpv = st_get_fp_variant(st, st->fp, &key);
 
diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c
index dc1d33f..a3fd4db 100644
--- a/src/mesa/state_tracker/st_context.c
+++ b/src/mesa/state_tracker/st_context.c
@@ -76,6 +76,17 @@
 {
    struct st_context *st = st_context(ctx);
 
+   /* Replace _NEW_FRAG_CLAMP with ST_NEW_FRAGMENT_PROGRAM for the fallback. */
+   if (st->clamp_frag_color_in_shader && (new_state & _NEW_FRAG_CLAMP)) {
+      new_state &= ~_NEW_FRAG_CLAMP;
+      st->dirty.st |= ST_NEW_FRAGMENT_PROGRAM;
+   }
+
+   /* Update the vertex shader if ctx->Light._ClampVertexColor was changed. */
+   if (st->clamp_vert_color_in_shader && (new_state & _NEW_LIGHT)) {
+      st->dirty.st |= ST_NEW_VERTEX_PROGRAM;
+   }
+
    st->dirty.mesa |= new_state;
    st->dirty.st |= ST_NEW_MESA;
 
diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h
index 9db50b3..da03719 100644
--- a/src/mesa/state_tracker/st_context.h
+++ b/src/mesa/state_tracker/st_context.h
@@ -76,6 +76,8 @@
    struct draw_stage *selection_stage;  /**< For GL_SELECT rendermode */
    struct draw_stage *rastpos_stage;  /**< For glRasterPos */
    GLboolean sw_primitive_restart;
+   GLboolean clamp_frag_color_in_shader;
+   GLboolean clamp_vert_color_in_shader;
 
 
    /* On old libGL's for linux we need to invalidate the drawables
diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c
index 49c8747..1f3b59f 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -607,33 +607,16 @@
       ctx->Extensions.ARB_depth_clamp = GL_TRUE;
    }
 
-   /* This extension does not actually require support of floating point
-    * render targets, just clamping controls.
-    * Advertise this extension if either fragment color clamping is supported
-    * or no render targets having color values outside of the range [0, 1]
-    * are supported, in which case the fragment color clamping has no effect
-    * on rendering.
-    */
-   if (screen->get_param(screen, PIPE_CAP_FRAGMENT_COLOR_CLAMP_CONTROL) ||
-       (!screen->is_format_supported(screen, PIPE_FORMAT_R8G8B8A8_SNORM,
-                                     PIPE_TEXTURE_2D, 0,
-                                     PIPE_BIND_RENDER_TARGET) &&
-        !screen->is_format_supported(screen, PIPE_FORMAT_R16G16B16A16_SNORM,
-                                     PIPE_TEXTURE_2D, 0,
-                                     PIPE_BIND_RENDER_TARGET) &&
-        !screen->is_format_supported(screen, PIPE_FORMAT_R16G16B16A16_FLOAT,
-                                     PIPE_TEXTURE_2D, 0,
-                                     PIPE_BIND_RENDER_TARGET) &&
-        !screen->is_format_supported(screen, PIPE_FORMAT_R32G32B32A32_FLOAT,
-                                     PIPE_TEXTURE_2D, 0,
-                                     PIPE_BIND_RENDER_TARGET) &&
-        !screen->is_format_supported(screen, PIPE_FORMAT_R11G11B10_FLOAT,
-                                     PIPE_TEXTURE_2D, 0,
-                                     PIPE_BIND_RENDER_TARGET) &&
-        !screen->is_format_supported(screen, PIPE_FORMAT_R9G9B9E5_FLOAT,
-                                     PIPE_TEXTURE_2D, 0,
-                                     PIPE_BIND_RENDER_TARGET))) {
+   if (screen->get_param(screen, PIPE_CAP_VERTEX_COLOR_UNCLAMPED)) {
       ctx->Extensions.ARB_color_buffer_float = GL_TRUE;
+
+      if (!screen->get_param(screen, PIPE_CAP_VERTEX_COLOR_CLAMPED)) {
+         st->clamp_vert_color_in_shader = TRUE;
+      }
+
+      if (!screen->get_param(screen, PIPE_CAP_FRAGMENT_COLOR_CLAMPED)) {
+         st->clamp_frag_color_in_shader = TRUE;
+      }
    }
 
    if (screen->get_param(screen, PIPE_CAP_SHADER_STENCIL_EXPORT)) {
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 92dffe2..188e8d9 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -4055,7 +4055,7 @@
 static struct ureg_dst
 translate_dst(struct st_translate *t,
               const st_dst_reg *dst_reg,
-              bool saturate)
+              bool saturate, bool clamp_color)
 {
    struct ureg_dst dst = dst_register(t, 
                                       dst_reg->file,
@@ -4065,6 +4065,27 @@
    
    if (saturate)
       dst = ureg_saturate(dst);
+   else if (clamp_color && dst_reg->file == PROGRAM_OUTPUT) {
+      /* Clamp colors for ARB_color_buffer_float. */
+      switch (t->procType) {
+      case TGSI_PROCESSOR_VERTEX:
+         /* XXX if the geometry shader is present, this must be done there
+          * instead of here. */
+         if (dst_reg->index == VERT_RESULT_COL0 ||
+             dst_reg->index == VERT_RESULT_COL1 ||
+             dst_reg->index == VERT_RESULT_BFC0 ||
+             dst_reg->index == VERT_RESULT_BFC1) {
+            dst = ureg_saturate(dst);
+         }
+         break;
+
+      case TGSI_PROCESSOR_FRAGMENT:
+         if (dst_reg->index >= FRAG_RESULT_COLOR) {
+            dst = ureg_saturate(dst);
+         }
+         break;
+      }
+   }
 
    if (dst_reg->reladdr != NULL)
       dst = ureg_dst_indirect(dst, ureg_src(t->address[0]));
@@ -4134,7 +4155,8 @@
 
 static void
 compile_tgsi_instruction(struct st_translate *t,
-                         const glsl_to_tgsi_instruction *inst)
+                         const glsl_to_tgsi_instruction *inst,
+                         bool clamp_dst_color_output)
 {
    struct ureg_program *ureg = t->ureg;
    GLuint i;
@@ -4151,7 +4173,8 @@
    if (num_dst) 
       dst[0] = translate_dst(t, 
                              &inst->dst,
-                             inst->saturate);
+                             inst->saturate,
+                             clamp_dst_color_output);
 
    for (i = 0; i < num_src; i++) 
       src[i] = translate_src(t, &inst->src[i]);
@@ -4457,7 +4480,8 @@
    const GLuint outputMapping[],
    const ubyte outputSemanticName[],
    const ubyte outputSemanticIndex[],
-   boolean passthrough_edgeflags)
+   boolean passthrough_edgeflags,
+   boolean clamp_color)
 {
    struct st_translate *t;
    unsigned i;
@@ -4697,7 +4721,8 @@
     */
    foreach_iter(exec_list_iterator, iter, program->instructions) {
       set_insn_start(t, ureg_get_instruction_number(ureg));
-      compile_tgsi_instruction(t, (glsl_to_tgsi_instruction *)iter.get());
+      compile_tgsi_instruction(t, (glsl_to_tgsi_instruction *)iter.get(),
+                               clamp_color);
 
       if (t->prevInstWrotePointSize && proginfo->Id) {
          /* The previous instruction wrote to the (fake) vertex point size
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.h b/src/mesa/state_tracker/st_glsl_to_tgsi.h
index 1f71f33..55d59d5 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.h
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.h
@@ -49,7 +49,8 @@
    const GLuint outputMapping[],
    const ubyte outputSemanticName[],
    const ubyte outputSemanticIndex[],
-   boolean passthrough_edgeflags);
+   boolean passthrough_edgeflags,
+   boolean clamp_color);
 
 void free_glsl_to_tgsi_visitor(struct glsl_to_tgsi_visitor *v);
 void get_pixel_transfer_visitor(struct st_fragment_program *fp,
diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c
index 0764234..fc77089 100644
--- a/src/mesa/state_tracker/st_mesa_to_tgsi.c
+++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c
@@ -306,7 +306,8 @@
 static struct ureg_dst
 translate_dst( struct st_translate *t,
                const struct prog_dst_register *DstReg,
-               boolean saturate )
+               boolean saturate,
+               boolean clamp_color)
 {
    struct ureg_dst dst = dst_register( t, 
                                        DstReg->File,
@@ -317,6 +318,27 @@
    
    if (saturate)
       dst = ureg_saturate( dst );
+   else if (clamp_color && DstReg->File == PROGRAM_OUTPUT) {
+      /* Clamp colors for ARB_color_buffer_float. */
+      switch (t->procType) {
+      case TGSI_PROCESSOR_VERTEX:
+         /* XXX if the geometry shader is present, this must be done there
+          * instead of here. */
+         if (DstReg->Index == VERT_RESULT_COL0 ||
+             DstReg->Index == VERT_RESULT_COL1 ||
+             DstReg->Index == VERT_RESULT_BFC0 ||
+             DstReg->Index == VERT_RESULT_BFC1) {
+            dst = ureg_saturate(dst);
+         }
+         break;
+
+      case TGSI_PROCESSOR_FRAGMENT:
+         if (DstReg->Index >= FRAG_RESULT_COLOR) {
+            dst = ureg_saturate(dst);
+         }
+         break;
+      }
+   }
 
    if (DstReg->RelAddr)
       dst = ureg_dst_indirect( dst, ureg_src(t->address[0]) );
@@ -662,7 +684,8 @@
 static void
 compile_instruction(
    struct st_translate *t,
-   const struct prog_instruction *inst )
+   const struct prog_instruction *inst,
+   boolean clamp_dst_color_output)
 {
    struct ureg_program *ureg = t->ureg;
    GLuint i;
@@ -677,7 +700,8 @@
    if (num_dst) 
       dst[0] = translate_dst( t, 
                               &inst->DstReg,
-                              inst->SaturateMode );
+                              inst->SaturateMode,
+                              clamp_dst_color_output);
 
    for (i = 0; i < num_src; i++) 
       src[i] = translate_src( t, &inst->SrcReg[i] );
@@ -1016,7 +1040,8 @@
    const GLuint outputMapping[],
    const ubyte outputSemanticName[],
    const ubyte outputSemanticIndex[],
-   boolean passthrough_edgeflags )
+   boolean passthrough_edgeflags,
+   boolean clamp_color)
 {
    struct st_translate translate, *t;
    unsigned i;
@@ -1233,7 +1258,7 @@
     */
    for (i = 0; i < program->NumInstructions; i++) {
       set_insn_start( t, ureg_get_instruction_number( ureg ));
-      compile_instruction( t, &program->Instructions[i] );
+      compile_instruction( t, &program->Instructions[i], clamp_color );
 
       if (t->prevInstWrotePointSize && program->Id) {
          /* The previous instruction wrote to the (fake) vertex point size
diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.h b/src/mesa/state_tracker/st_mesa_to_tgsi.h
index 7563c80..7e1a5ab 100644
--- a/src/mesa/state_tracker/st_mesa_to_tgsi.h
+++ b/src/mesa/state_tracker/st_mesa_to_tgsi.h
@@ -59,7 +59,8 @@
    const GLuint outputMapping[],
    const ubyte outputSemanticName[],
    const ubyte outputSemanticIndex[],
-   boolean passthrough_edgeflags );
+   boolean passthrough_edgeflags,
+   boolean clamp_color);
 
 void
 st_free_tokens(const struct tgsi_token *tokens);
diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c
index 8d08b2b..3ae79dd 100644
--- a/src/mesa/state_tracker/st_program.c
+++ b/src/mesa/state_tracker/st_program.c
@@ -351,7 +351,8 @@
                                    stvp->result_to_output,
                                    stvp->output_semantic_name,
                                    stvp->output_semantic_index,
-                                   key->passthrough_edgeflags );
+                                   key->passthrough_edgeflags,
+                                   key->clamp_color);
    else
       error = st_translate_mesa_program(st->ctx,
                                         TGSI_PROCESSOR_VERTEX,
@@ -368,7 +369,8 @@
                                         stvp->result_to_output,
                                         stvp->output_semantic_name,
                                         stvp->output_semantic_index,
-                                        key->passthrough_edgeflags );
+                                        key->passthrough_edgeflags,
+                                        key->clamp_color);
 
    if (error)
       goto fail;
@@ -505,7 +507,8 @@
    }
 #endif
 
-   if (!stfp->tgsi.tokens) {
+   /* XXX this will be cleaned up in the following commit */
+   if (1) {
       /* need to translate Mesa instructions to TGSI now */
       GLuint outputMapping[FRAG_RESULT_MAX];
       GLuint inputMapping[FRAG_ATTRIB_MAX];
@@ -718,7 +721,8 @@
                               fs_num_outputs,
                               outputMapping,
                               fs_output_semantic_name,
-                              fs_output_semantic_index, FALSE );
+                              fs_output_semantic_index, FALSE,
+                              key->clamp_color );
       else
          st_translate_mesa_program(st->ctx,
                                    TGSI_PROCESSOR_FRAGMENT,
@@ -734,7 +738,8 @@
                                    fs_num_outputs,
                                    outputMapping,
                                    fs_output_semantic_name,
-                                   fs_output_semantic_index, FALSE );
+                                   fs_output_semantic_index, FALSE,
+                                   key->clamp_color);
 
       stfp->tgsi.tokens = ureg_get_tokens( ureg, NULL );
       ureg_destroy( ureg );
@@ -1022,6 +1027,7 @@
                              outputMapping,
                              gs_output_semantic_name,
                              gs_output_semantic_index,
+                             FALSE,
                              FALSE);
 
    stgp->num_inputs = gs_num_inputs;
diff --git a/src/mesa/state_tracker/st_program.h b/src/mesa/state_tracker/st_program.h
index 699b6e8..0ae3420 100644
--- a/src/mesa/state_tracker/st_program.h
+++ b/src/mesa/state_tracker/st_program.h
@@ -55,6 +55,9 @@
    GLuint pixelMaps:1;            /**< glDrawPixels w/ pixel lookup map? */
    GLuint drawpixels_z:1;         /**< glDrawPixels(GL_DEPTH) */
    GLuint drawpixels_stencil:1;   /**< glDrawPixels(GL_STENCIL) */
+
+   /** for ARB_color_buffer_float */
+   GLuint clamp_color:1;
 };
 
 
@@ -98,6 +101,9 @@
 {
    struct st_context *st;          /**< variants are per-context */
    boolean passthrough_edgeflags;
+
+   /** for ARB_color_buffer_float */
+   boolean clamp_color;
 };