mesa: added OPCODE_NRM3/NRM4 instructions for vector normalization. We may emit these instructions from GLSL instead of DP3/RCP/MUL. Also, implement SSG (set sign) instruction in the interpreter.

commit: f6ead50827c03017e6b730313c361b39190da92f [log] [tgz]
author: Brian Paul <brian.paul@tungstengraphics.com> Fri Nov 07 08:51:31 2008 -0700
committer: Brian Paul <brian.paul@tungstengraphics.com> Fri Nov 07 09:51:25 2008 -0700
tree: 4c2c3ee5869419d5a265b072fd8b6e906ff16f43
parent: 4550b0562d5b59890fccb0e7eb0dbef967d1ccf9 [diff] [blame]
diff --git a/src/mesa/shader/prog_execute.c b/src/mesa/shader/prog_execute.c
index d843761..c0173d3 100644
--- a/src/mesa/shader/prog_execute.c
+++ b/src/mesa/shader/prog_execute.c

@@ -1019,6 +1019,36 @@
          break;
       case OPCODE_NOP:
          break;
+      case OPCODE_NRM3:        /* 3-component normalization */
+         {
+            GLfloat a[4], result[4];
+            GLfloat tmp;
+            fetch_vector4(&inst->SrcReg[0], machine, a);
+            tmp = a[0] * a[0] + a[1] * a[1] + a[2] * a[2];
+            if (tmp != 0.0F)
+               tmp = 1.0F / tmp;
+            result[0] = tmp * a[0];
+            result[1] = tmp * a[1];
+            result[2] = tmp * a[2];
+            result[3] = 0.0;  /* undefined, but prevent valgrind warnings */
+            store_vector4(inst, machine, result);
+         }
+         break;
+      case OPCODE_NRM4:        /* 4-component normalization */
+         {
+            GLfloat a[4], result[4];
+            GLfloat tmp;
+            fetch_vector4(&inst->SrcReg[0], machine, a);
+            tmp = a[0] * a[0] + a[1] * a[1] + a[2] * a[2] + a[3] * a[3];
+            if (tmp != 0.0F)
+               tmp = 1.0F / tmp;
+            result[0] = tmp * a[0];
+            result[1] = tmp * a[1];
+            result[2] = tmp * a[2];
+            result[3] = tmp * a[3];
+            store_vector4(inst, machine, result);
+         }
+         break;
       case OPCODE_PK2H:        /* pack two 16-bit floats in one 32-bit float */
          {
             GLfloat a[4], result[4];
@@ -1277,6 +1307,17 @@
             }
          }
          break;
+      case OPCODE_SSG:         /* set sign (-1, 0 or +1) */
+         {
+            GLfloat a[4], result[4];
+            fetch_vector4(&inst->SrcReg[0], machine, a);
+            result[0] = (GLfloat) ((a[0] > 0.0F) - (a[0] < 0.0F));
+            result[1] = (GLfloat) ((a[1] > 0.0F) - (a[1] < 0.0F));
+            result[2] = (GLfloat) ((a[2] > 0.0F) - (a[2] < 0.0F));
+            result[3] = (GLfloat) ((a[3] > 0.0F) - (a[3] < 0.0F));
+            store_vector4(inst, machine, result);
+         }
+         break;
       case OPCODE_STR:         /* set true, operands ignored */
          {
             static const GLfloat result[4] = { 1.0F, 1.0F, 1.0F, 1.0F };
commit	f6ead50827c03017e6b730313c361b39190da92f	[log] [tgz]
author	Brian Paul <brian.paul@tungstengraphics.com>	Fri Nov 07 08:51:31 2008 -0700
committer	Brian Paul <brian.paul@tungstengraphics.com>	Fri Nov 07 09:51:25 2008 -0700
tree	4c2c3ee5869419d5a265b072fd8b6e906ff16f43
parent	4550b0562d5b59890fccb0e7eb0dbef967d1ccf9 [diff] [blame]