Zack Rusin | e884c7e | 2008-03-01 08:04:21 -0500 | [diff] [blame] | 1 | /************************************************************************** |
| 2 | * |
| 3 | * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. |
| 4 | * All Rights Reserved. |
| 5 | * |
| 6 | * Permission is hereby granted, free of charge, to any person obtaining a |
| 7 | * copy of this software and associated documentation files (the |
| 8 | * "Software"), to deal in the Software without restriction, including |
| 9 | * without limitation the rights to use, copy, modify, merge, publish, |
| 10 | * distribute, sub license, and/or sell copies of the Software, and to |
| 11 | * permit persons to whom the Software is furnished to do so, subject to |
| 12 | * the following conditions: |
| 13 | * |
| 14 | * The above copyright notice and this permission notice (including the |
| 15 | * next paragraph) shall be included in all copies or substantial portions |
| 16 | * of the Software. |
| 17 | * |
| 18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
| 19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
| 20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
| 21 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR |
| 22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
| 23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
| 24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
| 25 | * |
| 26 | **************************************************************************/ |
| 27 | |
| 28 | /* |
| 29 | * This file is compiled with clang into the LLVM bitcode |
| 30 | * |
| 31 | * Authors: |
| 32 | * Zack Rusin zack@tungstengraphics.com |
| 33 | */ |
Zack Rusin | 201ac41 | 2008-04-21 00:10:39 -0400 | [diff] [blame] | 34 | typedef __attribute__(( ext_vector_type(4) )) float float4; |
Zack Rusin | e884c7e | 2008-03-01 08:04:21 -0500 | [diff] [blame] | 35 | |
Zack Rusin | 59766ac | 2008-05-15 17:46:20 -0400 | [diff] [blame] | 36 | |
| 37 | extern float fabsf(float val); |
| 38 | |
Stephane Marchesin | a6ff215 | 2008-10-01 00:00:58 +0200 | [diff] [blame] | 39 | /* helpers */ |
| 40 | |
Zack Rusin | 02e45b2 | 2008-05-16 17:10:52 -0400 | [diff] [blame] | 41 | float4 absvec(float4 vec) |
| 42 | { |
| 43 | float4 res; |
| 44 | res.x = fabsf(vec.x); |
| 45 | res.y = fabsf(vec.y); |
| 46 | res.z = fabsf(vec.z); |
| 47 | res.w = fabsf(vec.w); |
| 48 | |
| 49 | return res; |
| 50 | } |
| 51 | |
Stephane Marchesin | a6ff215 | 2008-10-01 00:00:58 +0200 | [diff] [blame] | 52 | float4 maxvec(float4 a, float4 b) |
| 53 | { |
| 54 | return (float4){(a.x > b.x) ? a.x : b.x, |
| 55 | (a.y > b.y) ? a.y : b.y, |
| 56 | (a.z > b.z) ? a.z : b.z, |
| 57 | (a.w > b.w) ? a.w : b.w}; |
| 58 | } |
| 59 | |
| 60 | float4 minvec(float4 a, float4 b) |
| 61 | { |
| 62 | return (float4){(a.x < b.x) ? a.x : b.x, |
| 63 | (a.y < b.y) ? a.y : b.y, |
| 64 | (a.z < b.z) ? a.z : b.z, |
| 65 | (a.w < b.w) ? a.w : b.w}; |
| 66 | } |
| 67 | |
| 68 | extern float powf(float num, float p); |
| 69 | extern float sqrtf(float x); |
| 70 | |
| 71 | float4 powvec(float4 vec, float4 q) |
| 72 | { |
| 73 | float4 p; |
| 74 | p.x = powf(vec.x, q.x); |
| 75 | p.y = powf(vec.y, q.y); |
| 76 | p.z = powf(vec.z, q.z); |
| 77 | p.w = powf(vec.w, q.w); |
| 78 | return p; |
| 79 | } |
| 80 | |
| 81 | float4 sqrtvec(float4 vec) |
| 82 | { |
| 83 | float4 p; |
| 84 | p.x = sqrtf(vec.x); |
| 85 | p.y = sqrtf(vec.y); |
| 86 | p.z = sqrtf(vec.z); |
| 87 | p.w = sqrtf(vec.w); |
| 88 | return p; |
| 89 | } |
| 90 | |
| 91 | float4 sltvec(float4 v1, float4 v2) |
| 92 | { |
| 93 | float4 p; |
| 94 | p.x = (v1.x < v2.x) ? 1.0 : 0.0; |
| 95 | p.y = (v1.y < v2.y) ? 1.0 : 0.0; |
| 96 | p.z = (v1.z < v2.z) ? 1.0 : 0.0; |
| 97 | p.w = (v1.w < v2.w) ? 1.0 : 0.0; |
| 98 | return p; |
| 99 | } |
| 100 | |
| 101 | |
| 102 | /* instructions */ |
| 103 | |
Zack Rusin | 59766ac | 2008-05-15 17:46:20 -0400 | [diff] [blame] | 104 | void abs(float4 *res, |
| 105 | float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w) |
| 106 | { |
Zack Rusin | 02e45b2 | 2008-05-16 17:10:52 -0400 | [diff] [blame] | 107 | res[0] = absvec(tmp0x); |
| 108 | res[1] = absvec(tmp0y); |
| 109 | res[2] = absvec(tmp0z); |
| 110 | res[3] = absvec(tmp0w); |
Zack Rusin | 59766ac | 2008-05-15 17:46:20 -0400 | [diff] [blame] | 111 | } |
| 112 | |
Zack Rusin | e884c7e | 2008-03-01 08:04:21 -0500 | [diff] [blame] | 113 | void dp3(float4 *res, |
| 114 | float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w, |
| 115 | float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w) |
| 116 | { |
| 117 | float4 dot = (tmp0x * tmp1x) + (tmp0y * tmp1y) + |
| 118 | (tmp0z * tmp1z); |
| 119 | |
| 120 | res[0] = dot; |
| 121 | res[1] = dot; |
| 122 | res[2] = dot; |
| 123 | res[3] = dot; |
| 124 | } |
| 125 | |
Zack Rusin | a9c40f8 | 2008-03-01 09:50:41 -0500 | [diff] [blame] | 126 | void dp4(float4 *res, |
| 127 | float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w, |
| 128 | float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w) |
| 129 | { |
| 130 | float4 dot = (tmp0x * tmp1x) + (tmp0y * tmp1y) + |
| 131 | (tmp0z * tmp1z) + (tmp0w * tmp1w); |
| 132 | |
| 133 | res[0] = dot; |
| 134 | res[1] = dot; |
| 135 | res[2] = dot; |
| 136 | res[3] = dot; |
| 137 | } |
| 138 | |
Zack Rusin | 1d1cf8e | 2008-05-16 16:06:59 -0400 | [diff] [blame] | 139 | void lit(float4 *res, |
| 140 | float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w) |
| 141 | { |
Zack Rusin | 0bf82c0 | 2008-05-16 17:56:38 -0400 | [diff] [blame] | 142 | const float4 zerovec = (float4) {0.0, 0.0, 0.0, 0.0}; |
Zack Rusin | 1d1cf8e | 2008-05-16 16:06:59 -0400 | [diff] [blame] | 143 | const float4 min128 = (float4) {-128.f, -128.f, -128.f, -128.f}; |
| 144 | const float4 plus128 = (float4) {128.f, 128.f, 128.f, 128.f}; |
| 145 | |
| 146 | res[0] = (float4){1.0, 1.0, 1.0, 1.0}; |
| 147 | if (tmp0x.x > 0) { |
Zack Rusin | 1d1cf8e | 2008-05-16 16:06:59 -0400 | [diff] [blame] | 148 | float4 tmpy = maxvec(tmp0y, zerovec); |
| 149 | float4 tmpw = minvec(tmp0w, plus128); |
| 150 | tmpw = maxvec(tmpw, min128); |
Zack Rusin | 0bf82c0 | 2008-05-16 17:56:38 -0400 | [diff] [blame] | 151 | res[1] = tmp0x; |
Zack Rusin | 1d1cf8e | 2008-05-16 16:06:59 -0400 | [diff] [blame] | 152 | res[2] = powvec(tmpy, tmpw); |
| 153 | } else { |
| 154 | res[1] = zerovec; |
| 155 | res[2] = zerovec; |
| 156 | } |
| 157 | res[3] = (float4){1.0, 1.0, 1.0, 1.0}; |
| 158 | } |
Zack Rusin | 02e45b2 | 2008-05-16 17:10:52 -0400 | [diff] [blame] | 159 | |
Stephane Marchesin | a6ff215 | 2008-10-01 00:00:58 +0200 | [diff] [blame] | 160 | void min(float4 *res, |
| 161 | float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w, |
| 162 | float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w) |
Zack Rusin | 02e45b2 | 2008-05-16 17:10:52 -0400 | [diff] [blame] | 163 | { |
Stephane Marchesin | a6ff215 | 2008-10-01 00:00:58 +0200 | [diff] [blame] | 164 | res[0] = minvec(tmp0x, tmp1x); |
| 165 | res[1] = minvec(tmp0y, tmp1y); |
| 166 | res[2] = minvec(tmp0z, tmp1z); |
| 167 | res[3] = minvec(tmp0w, tmp1w); |
| 168 | } |
| 169 | |
| 170 | |
| 171 | void max(float4 *res, |
| 172 | float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w, |
| 173 | float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w) |
| 174 | { |
| 175 | res[0] = maxvec(tmp0x, tmp1x); |
| 176 | res[1] = maxvec(tmp0y, tmp1y); |
| 177 | res[2] = maxvec(tmp0z, tmp1z); |
| 178 | res[3] = maxvec(tmp0w, tmp1w); |
| 179 | } |
| 180 | |
| 181 | void pow(float4 *res, |
| 182 | float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w, |
| 183 | float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w) |
| 184 | { |
| 185 | res[0] = powvec(tmp0x, tmp1x); |
| 186 | res[1] = res[0]; |
| 187 | res[2] = res[0]; |
| 188 | res[3] = res[0]; |
Zack Rusin | 02e45b2 | 2008-05-16 17:10:52 -0400 | [diff] [blame] | 189 | } |
| 190 | |
| 191 | void rsq(float4 *res, |
| 192 | float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w) |
| 193 | { |
| 194 | const float4 onevec = (float4) {1., 1., 1., 1.}; |
| 195 | res[0] = onevec/sqrtvec(absvec(tmp0x)); |
Zack Rusin | a7449d4 | 2008-05-16 17:35:47 -0400 | [diff] [blame] | 196 | res[1] = onevec/sqrtvec(absvec(tmp0y)); |
| 197 | res[2] = onevec/sqrtvec(absvec(tmp0z)); |
| 198 | res[3] = onevec/sqrtvec(absvec(tmp0w)); |
Zack Rusin | 02e45b2 | 2008-05-16 17:10:52 -0400 | [diff] [blame] | 199 | } |
Stephane Marchesin | a6ff215 | 2008-10-01 00:00:58 +0200 | [diff] [blame] | 200 | |
| 201 | void slt(float4 *res, |
| 202 | float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w, |
| 203 | float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w) |
| 204 | { |
| 205 | res[0] = sltvec(tmp0x, tmp1x); |
| 206 | res[1] = sltvec(tmp0y, tmp1y); |
| 207 | res[2] = sltvec(tmp0z, tmp1z); |
| 208 | res[3] = sltvec(tmp0w, tmp1w); |
| 209 | } |
| 210 | |