blob: cb85e1734ec2e84db053408a6d678dac9d81df7e [file] [log] [blame]
Zack Rusine884c7e2008-03-01 08:04:21 -05001/**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /*
29 * This file is compiled with clang into the LLVM bitcode
30 *
31 * Authors:
32 * Zack Rusin zack@tungstengraphics.com
33 */
Zack Rusin201ac412008-04-21 00:10:39 -040034typedef __attribute__(( ext_vector_type(4) )) float float4;
Zack Rusine884c7e2008-03-01 08:04:21 -050035
Zack Rusin59766ac2008-05-15 17:46:20 -040036
37extern float fabsf(float val);
38
Stephane Marchesina6ff2152008-10-01 00:00:58 +020039/* helpers */
40
Zack Rusin02e45b22008-05-16 17:10:52 -040041float4 absvec(float4 vec)
42{
43 float4 res;
44 res.x = fabsf(vec.x);
45 res.y = fabsf(vec.y);
46 res.z = fabsf(vec.z);
47 res.w = fabsf(vec.w);
48
49 return res;
50}
51
Stephane Marchesina6ff2152008-10-01 00:00:58 +020052float4 maxvec(float4 a, float4 b)
53{
54 return (float4){(a.x > b.x) ? a.x : b.x,
55 (a.y > b.y) ? a.y : b.y,
56 (a.z > b.z) ? a.z : b.z,
57 (a.w > b.w) ? a.w : b.w};
58}
59
60float4 minvec(float4 a, float4 b)
61{
62 return (float4){(a.x < b.x) ? a.x : b.x,
63 (a.y < b.y) ? a.y : b.y,
64 (a.z < b.z) ? a.z : b.z,
65 (a.w < b.w) ? a.w : b.w};
66}
67
68extern float powf(float num, float p);
69extern float sqrtf(float x);
70
71float4 powvec(float4 vec, float4 q)
72{
73 float4 p;
74 p.x = powf(vec.x, q.x);
75 p.y = powf(vec.y, q.y);
76 p.z = powf(vec.z, q.z);
77 p.w = powf(vec.w, q.w);
78 return p;
79}
80
81float4 sqrtvec(float4 vec)
82{
83 float4 p;
84 p.x = sqrtf(vec.x);
85 p.y = sqrtf(vec.y);
86 p.z = sqrtf(vec.z);
87 p.w = sqrtf(vec.w);
88 return p;
89}
90
91float4 sltvec(float4 v1, float4 v2)
92{
93 float4 p;
94 p.x = (v1.x < v2.x) ? 1.0 : 0.0;
95 p.y = (v1.y < v2.y) ? 1.0 : 0.0;
96 p.z = (v1.z < v2.z) ? 1.0 : 0.0;
97 p.w = (v1.w < v2.w) ? 1.0 : 0.0;
98 return p;
99}
100
101
102/* instructions */
103
Zack Rusin59766ac2008-05-15 17:46:20 -0400104void abs(float4 *res,
105 float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w)
106{
Zack Rusin02e45b22008-05-16 17:10:52 -0400107 res[0] = absvec(tmp0x);
108 res[1] = absvec(tmp0y);
109 res[2] = absvec(tmp0z);
110 res[3] = absvec(tmp0w);
Zack Rusin59766ac2008-05-15 17:46:20 -0400111}
112
Zack Rusine884c7e2008-03-01 08:04:21 -0500113void dp3(float4 *res,
114 float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w,
115 float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w)
116{
117 float4 dot = (tmp0x * tmp1x) + (tmp0y * tmp1y) +
118 (tmp0z * tmp1z);
119
120 res[0] = dot;
121 res[1] = dot;
122 res[2] = dot;
123 res[3] = dot;
124}
125
Zack Rusina9c40f82008-03-01 09:50:41 -0500126void dp4(float4 *res,
127 float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w,
128 float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w)
129{
130 float4 dot = (tmp0x * tmp1x) + (tmp0y * tmp1y) +
131 (tmp0z * tmp1z) + (tmp0w * tmp1w);
132
133 res[0] = dot;
134 res[1] = dot;
135 res[2] = dot;
136 res[3] = dot;
137}
138
Zack Rusin1d1cf8e2008-05-16 16:06:59 -0400139void lit(float4 *res,
140 float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w)
141{
Zack Rusin0bf82c02008-05-16 17:56:38 -0400142 const float4 zerovec = (float4) {0.0, 0.0, 0.0, 0.0};
Zack Rusin1d1cf8e2008-05-16 16:06:59 -0400143 const float4 min128 = (float4) {-128.f, -128.f, -128.f, -128.f};
144 const float4 plus128 = (float4) {128.f, 128.f, 128.f, 128.f};
145
146 res[0] = (float4){1.0, 1.0, 1.0, 1.0};
147 if (tmp0x.x > 0) {
Zack Rusin1d1cf8e2008-05-16 16:06:59 -0400148 float4 tmpy = maxvec(tmp0y, zerovec);
149 float4 tmpw = minvec(tmp0w, plus128);
150 tmpw = maxvec(tmpw, min128);
Zack Rusin0bf82c02008-05-16 17:56:38 -0400151 res[1] = tmp0x;
Zack Rusin1d1cf8e2008-05-16 16:06:59 -0400152 res[2] = powvec(tmpy, tmpw);
153 } else {
154 res[1] = zerovec;
155 res[2] = zerovec;
156 }
157 res[3] = (float4){1.0, 1.0, 1.0, 1.0};
158}
Zack Rusin02e45b22008-05-16 17:10:52 -0400159
Stephane Marchesina6ff2152008-10-01 00:00:58 +0200160void min(float4 *res,
161 float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w,
162 float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w)
Zack Rusin02e45b22008-05-16 17:10:52 -0400163{
Stephane Marchesina6ff2152008-10-01 00:00:58 +0200164 res[0] = minvec(tmp0x, tmp1x);
165 res[1] = minvec(tmp0y, tmp1y);
166 res[2] = minvec(tmp0z, tmp1z);
167 res[3] = minvec(tmp0w, tmp1w);
168}
169
170
171void max(float4 *res,
172 float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w,
173 float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w)
174{
175 res[0] = maxvec(tmp0x, tmp1x);
176 res[1] = maxvec(tmp0y, tmp1y);
177 res[2] = maxvec(tmp0z, tmp1z);
178 res[3] = maxvec(tmp0w, tmp1w);
179}
180
181void pow(float4 *res,
182 float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w,
183 float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w)
184{
185 res[0] = powvec(tmp0x, tmp1x);
186 res[1] = res[0];
187 res[2] = res[0];
188 res[3] = res[0];
Zack Rusin02e45b22008-05-16 17:10:52 -0400189}
190
191void rsq(float4 *res,
192 float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w)
193{
194 const float4 onevec = (float4) {1., 1., 1., 1.};
195 res[0] = onevec/sqrtvec(absvec(tmp0x));
Zack Rusina7449d42008-05-16 17:35:47 -0400196 res[1] = onevec/sqrtvec(absvec(tmp0y));
197 res[2] = onevec/sqrtvec(absvec(tmp0z));
198 res[3] = onevec/sqrtvec(absvec(tmp0w));
Zack Rusin02e45b22008-05-16 17:10:52 -0400199}
Stephane Marchesina6ff2152008-10-01 00:00:58 +0200200
201void slt(float4 *res,
202 float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w,
203 float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w)
204{
205 res[0] = sltvec(tmp0x, tmp1x);
206 res[1] = sltvec(tmp0y, tmp1y);
207 res[2] = sltvec(tmp0z, tmp1z);
208 res[3] = sltvec(tmp0w, tmp1w);
209}
210