gallium: add point size clamp to implementation limits in vertex shader

The point size min/max registers (unused by mesa state tracker) were removed
since most hardware couldn't do much with them. However, we don't want to have
to rely on hw to do point size clamping correctly to implementation
dependent limits, hence have to do that in the vertex shader. This should also
solve a potential problem with (non-AA) points smaller than 1.0 which according
to OGL still have size 1.0.
Note that OGL point rendering is odd, in particular point sprites are rasterized
differently to points. Some hardware might support those different modes, but in
any case the different clamping values used for smooth/multisampled/sprite
enabled points might help a bit for hw which rasterizes points the same as point
sprites.
Also tweak mesa's ff to vertex shader translation so don't have to clamp twice in
case of point attenuation.
diff --git a/src/mesa/main/ffvertex_prog.c b/src/mesa/main/ffvertex_prog.c
index 2d1db29..867a552 100644
--- a/src/mesa/main/ffvertex_prog.c
+++ b/src/mesa/main/ffvertex_prog.c
@@ -1496,7 +1496,7 @@
 static void build_atten_pointsize( struct tnl_program *p )
 {
    struct ureg eye = get_eye_position_z(p);
-   struct ureg state_size = register_param1(p, STATE_POINT_SIZE);
+   struct ureg state_size = register_param2(p, STATE_INTERNAL, STATE_POINT_SIZE_CLAMPED);
    struct ureg state_attenuation = register_param1(p, STATE_POINT_ATTENUATION);
    struct ureg out = register_output(p, VERT_RESULT_PSIZ);
    struct ureg ut = get_temp(p);
diff --git a/src/mesa/shader/prog_statevars.c b/src/mesa/shader/prog_statevars.c
index 058d4bb..460ecd4 100644
--- a/src/mesa/shader/prog_statevars.c
+++ b/src/mesa/shader/prog_statevars.c
@@ -444,6 +444,61 @@
          value[3] = (GLfloat)(ctx->Fog.Density * ONE_DIV_SQRT_LN2);
          return;
 
+      case STATE_POINT_SIZE_CLAMPED:
+         {
+           /* this includes implementation dependent limits, to avoid
+            * another potentially necessary clamp.
+            * Note: for sprites, point smooth (point AA) is ignored
+            * and we'll clamp to MinPointSizeAA and MaxPointSize, because we
+            * expect drivers will want to say their minimum for AA size is 0.0
+            * but for non-AA it's 1.0 (because normal points with size below 1.0
+            * need to get rounded up to 1.0, hence never disappear). GL does
+            * not specify max clamp size for sprites, other than it needs to be
+            * at least as large as max AA size, hence use non-AA size there.
+            */
+            GLfloat minImplSize;
+            GLfloat maxImplSize;
+            if (ctx->Point.PointSprite) {
+               minImplSize = ctx->Const.MinPointSizeAA;
+               maxImplSize = ctx->Const.MaxPointSize;
+            }
+            else if (ctx->Point.SmoothFlag || ctx->Multisample._Enabled) {
+               minImplSize = ctx->Const.MinPointSizeAA;
+               maxImplSize = ctx->Const.MaxPointSizeAA;
+            }
+            else {
+               minImplSize = ctx->Const.MinPointSize;
+               maxImplSize = ctx->Const.MaxPointSize;
+            }
+            value[0] = ctx->Point.Size;
+            value[1] = ctx->Point.MinSize >= minImplSize ? ctx->Point.MinSize : minImplSize;
+            value[2] = ctx->Point.MaxSize <= maxImplSize ? ctx->Point.MaxSize : maxImplSize;
+            value[3] = ctx->Point.Threshold;
+         }
+         return;
+      case STATE_POINT_SIZE_IMPL_CLAMP:
+         {
+           /* for implementation clamp only in vs */
+            GLfloat minImplSize;
+            GLfloat maxImplSize;
+            if (ctx->Point.PointSprite) {
+               minImplSize = ctx->Const.MinPointSizeAA;
+               maxImplSize = ctx->Const.MaxPointSize;
+            }
+            else if (ctx->Point.SmoothFlag || ctx->Multisample._Enabled) {
+               minImplSize = ctx->Const.MinPointSizeAA;
+               maxImplSize = ctx->Const.MaxPointSizeAA;
+            }
+            else {
+               minImplSize = ctx->Const.MinPointSize;
+               maxImplSize = ctx->Const.MaxPointSize;
+            }
+            value[0] = ctx->Point.Size;
+            value[1] = minImplSize;
+            value[2] = maxImplSize;
+            value[3] = ctx->Point.Threshold;
+         }
+         return;
       case STATE_LIGHT_SPOT_DIR_NORMALIZED:
          {
             /* here, state[2] is the light number */
@@ -639,6 +694,9 @@
 	 return _NEW_TEXTURE;
       case STATE_FOG_PARAMS_OPTIMIZED:
 	 return _NEW_FOG;
+      case STATE_POINT_SIZE_CLAMPED:
+      case STATE_POINT_SIZE_IMPL_CLAMP:
+         return _NEW_POINT | _NEW_MULTISAMPLE;
       case STATE_LIGHT_SPOT_DIR_NORMALIZED:
       case STATE_LIGHT_POSITION:
       case STATE_LIGHT_POSITION_NORMALIZED:
@@ -830,6 +888,12 @@
    case STATE_FOG_PARAMS_OPTIMIZED:
       append(dst, "fogParamsOptimized");
       break;
+   case STATE_POINT_SIZE_CLAMPED:
+      append(dst, "pointSizeClamped");
+      break;
+   case STATE_POINT_SIZE_IMPL_CLAMP:
+      append(dst, "pointSizeImplClamp");
+      break;
    case STATE_LIGHT_SPOT_DIR_NORMALIZED:
       append(dst, "lightSpotDirNormalized");
       break;
diff --git a/src/mesa/shader/prog_statevars.h b/src/mesa/shader/prog_statevars.h
index 1180d9e..1753471 100644
--- a/src/mesa/shader/prog_statevars.h
+++ b/src/mesa/shader/prog_statevars.h
@@ -108,6 +108,8 @@
    STATE_NORMAL_SCALE,
    STATE_TEXRECT_SCALE,
    STATE_FOG_PARAMS_OPTIMIZED,  /* for faster fog calc */
+   STATE_POINT_SIZE_CLAMPED,    /* includes implementation dependent size clamp */
+   STATE_POINT_SIZE_IMPL_CLAMP, /* for implementation clamp only in vs */
    STATE_LIGHT_SPOT_DIR_NORMALIZED,   /* pre-normalized spot dir */
    STATE_LIGHT_POSITION,              /* object vs eye space */
    STATE_LIGHT_POSITION_NORMALIZED,   /* object vs eye space */
diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c
index 35e0874..7684ccd 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -114,6 +114,11 @@
       = _maxf(1.0f, screen->get_paramf(screen, PIPE_CAP_MAX_POINT_WIDTH));
    c->MaxPointSizeAA
       = _maxf(1.0f, screen->get_paramf(screen, PIPE_CAP_MAX_POINT_WIDTH_AA));
+   /* these are not queryable. Note that GL basically mandates a 1.0 minimum
+    * for non-aa sizes, but we can go down to 0.0 for aa points.
+    */
+   c->MinPointSize = 1.0f;
+   c->MinPointSizeAA = 0.0f;
 
    c->MaxTextureMaxAnisotropy
       = _maxf(2.0f, screen->get_paramf(screen, PIPE_CAP_MAX_TEXTURE_ANISOTROPY));
diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c
index e788008..4830a8f 100644
--- a/src/mesa/state_tracker/st_mesa_to_tgsi.c
+++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c
@@ -57,6 +57,10 @@
    struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS];
    struct ureg_dst address[1];
    struct ureg_src samplers[PIPE_MAX_SAMPLERS];
+   struct ureg_dst psizregreal;
+   struct ureg_src pointSizeConst;
+   GLint psizoutindex;
+   GLboolean prevInstWrotePsiz;
 
    const GLuint *inputMapping;
    const GLuint *outputMapping;
@@ -145,6 +149,8 @@
       return t->temps[index];
 
    case PROGRAM_OUTPUT:
+      if (index == t->psizoutindex)
+         t->prevInstWrotePsiz = GL_TRUE;
       return t->outputs[t->outputMapping[index]];
 
    case PROGRAM_ADDRESS:
@@ -787,6 +793,8 @@
    t->inputMapping = inputMapping;
    t->outputMapping = outputMapping;
    t->ureg = ureg;
+   t->psizoutindex = -1;
+   t->prevInstWrotePsiz = GL_FALSE;
 
    /*_mesa_print_program(program);*/
 
@@ -845,6 +853,21 @@
          t->outputs[i] = ureg_DECL_output( ureg,
                                            outputSemanticName[i],
                                            outputSemanticIndex[i] );
+         if ((outputSemanticName[i] == TGSI_SEMANTIC_PSIZE) && program->Id) {
+            static const gl_state_index pointSizeClampState[STATE_LENGTH]
+               = { STATE_INTERNAL, STATE_POINT_SIZE_IMPL_CLAMP, 0, 0, 0 };
+               /* XXX: note we are modifying the incoming shader here!  Need to
+               * do this before emitting the constant decls below, or this
+               * will be missed:
+               */
+            unsigned pointSizeClampConst = _mesa_add_state_reference(program->Parameters,
+                                                                     pointSizeClampState);
+            struct ureg_dst psizregtemp = ureg_DECL_temporary( ureg );
+            t->pointSizeConst = ureg_DECL_constant( ureg, pointSizeClampConst );
+            t->psizregreal = t->outputs[i];
+            t->psizoutindex = i;
+            t->outputs[i] = psizregtemp;
+         }
       }
       if (passthrough_edgeflags)
          emit_edgeflags( t, program );
@@ -910,6 +933,19 @@
    for (i = 0; i < program->NumInstructions; i++) {
       set_insn_start( t, ureg_get_instruction_number( ureg ));
       compile_instruction( t, &program->Instructions[i] );
+
+      /* note can't do that easily at the end of prog due to
+         possible early return */
+      if (t->prevInstWrotePsiz && program->Id) {
+         set_insn_start( t, ureg_get_instruction_number( ureg ));
+         ureg_MAX( t->ureg, ureg_writemask(t->outputs[t->psizoutindex], WRITEMASK_X),
+                   ureg_src(t->outputs[t->psizoutindex]),
+                   ureg_swizzle(t->pointSizeConst, 1,1,1,1));
+         ureg_MIN( t->ureg, ureg_writemask(t->psizregreal, WRITEMASK_X),
+                   ureg_src(t->outputs[t->psizoutindex]),
+                   ureg_swizzle(t->pointSizeConst, 2,2,2,2));
+      }
+      t->prevInstWrotePsiz = GL_FALSE;
    }
 
    /* Fix up all emitted labels: