Brian Paul | 9a90cd4 | 2003-12-01 22:40:26 +0000 | [diff] [blame] | 1 | /* |
| 2 | * Clip testing in SPARC assembly |
| 3 | */ |
Brian Paul | 7943b34 | 2001-05-23 14:27:03 +0000 | [diff] [blame] | 4 | |
Ian Romanick | 9f23a3a | 2005-07-28 00:11:10 +0000 | [diff] [blame] | 5 | #if __arch64__ |
Brian Paul | 7943b34 | 2001-05-23 14:27:03 +0000 | [diff] [blame] | 6 | #define LDPTR ldx |
| 7 | #define V4F_DATA 0x00 |
| 8 | #define V4F_START 0x08 |
| 9 | #define V4F_COUNT 0x10 |
| 10 | #define V4F_STRIDE 0x14 |
| 11 | #define V4F_SIZE 0x18 |
| 12 | #define V4F_FLAGS 0x1c |
| 13 | #else |
| 14 | #define LDPTR ld |
| 15 | #define V4F_DATA 0x00 |
| 16 | #define V4F_START 0x04 |
| 17 | #define V4F_COUNT 0x08 |
| 18 | #define V4F_STRIDE 0x0c |
| 19 | #define V4F_SIZE 0x10 |
| 20 | #define V4F_FLAGS 0x14 |
| 21 | #endif |
| 22 | |
| 23 | #define VEC_SIZE_1 1 |
| 24 | #define VEC_SIZE_2 3 |
| 25 | #define VEC_SIZE_3 7 |
| 26 | #define VEC_SIZE_4 15 |
| 27 | |
Ian Romanick | 9f23a3a | 2005-07-28 00:11:10 +0000 | [diff] [blame] | 28 | #if defined(SVR4) || defined(__SVR4) || defined(__svr4__) |
Alan Hourihane | 462183f | 2004-04-26 10:10:22 +0000 | [diff] [blame] | 29 | /* Solaris requires this for 64-bit. */ |
| 30 | .register %g2, #scratch |
| 31 | .register %g3, #scratch |
| 32 | .register %g7, #scratch |
| 33 | #endif |
| 34 | |
Brian Paul | 7943b34 | 2001-05-23 14:27:03 +0000 | [diff] [blame] | 35 | .text |
| 36 | .align 64 |
| 37 | |
| 38 | one_dot_zero: |
| 39 | .word 0x3f800000 /* 1.0f */ |
| 40 | |
| 41 | /* This trick is shamelessly stolen from the x86 |
| 42 | * Mesa asm. Very clever, and we can do it too |
| 43 | * since we have the necessary add with carry |
| 44 | * instructions on Sparc. |
| 45 | */ |
| 46 | clip_table: |
| 47 | .byte 0, 1, 0, 2, 4, 5, 4, 6 |
| 48 | .byte 0, 1, 0, 2, 8, 9, 8, 10 |
| 49 | .byte 32, 33, 32, 34, 36, 37, 36, 38 |
| 50 | .byte 32, 33, 32, 34, 40, 41, 40, 42 |
| 51 | .byte 0, 1, 0, 2, 4, 5, 4, 6 |
| 52 | .byte 0, 1, 0, 2, 8, 9, 8, 10 |
| 53 | .byte 16, 17, 16, 18, 20, 21, 20, 22 |
| 54 | .byte 16, 17, 16, 18, 24, 25, 24, 26 |
| 55 | .byte 63, 61, 63, 62, 55, 53, 55, 54 |
| 56 | .byte 63, 61, 63, 62, 59, 57, 59, 58 |
| 57 | .byte 47, 45, 47, 46, 39, 37, 39, 38 |
| 58 | .byte 47, 45, 47, 46, 43, 41, 43, 42 |
| 59 | .byte 63, 61, 63, 62, 55, 53, 55, 54 |
| 60 | .byte 63, 61, 63, 62, 59, 57, 59, 58 |
| 61 | .byte 31, 29, 31, 30, 23, 21, 23, 22 |
| 62 | .byte 31, 29, 31, 30, 27, 25, 27, 26 |
| 63 | |
| 64 | /* GLvector4f *clip_vec, GLvector4f *proj_vec, |
| 65 | GLubyte clipMask[], GLubyte *orMask, GLubyte *andMask */ |
| 66 | |
| 67 | .align 64 |
| 68 | __pc_tramp: |
| 69 | retl |
| 70 | nop |
| 71 | |
| 72 | .globl _mesa_sparc_cliptest_points4 |
| 73 | _mesa_sparc_cliptest_points4: |
| 74 | save %sp, -64, %sp |
| 75 | call __pc_tramp |
| 76 | sub %o7, (. - one_dot_zero - 4), %g1 |
| 77 | ld [%g1 + 0x0], %f4 |
| 78 | add %g1, 0x4, %g1 |
| 79 | |
| 80 | ld [%i0 + V4F_STRIDE], %l1 |
| 81 | ld [%i0 + V4F_COUNT], %g7 |
| 82 | LDPTR [%i0 + V4F_START], %i0 |
| 83 | LDPTR [%i1 + V4F_START], %i5 |
| 84 | ldub [%i3], %g2 |
| 85 | ldub [%i4], %g3 |
| 86 | sll %g3, 8, %g3 |
| 87 | or %g2, %g3, %g2 |
| 88 | |
| 89 | ld [%i1 + V4F_FLAGS], %g3 |
| 90 | or %g3, VEC_SIZE_4, %g3 |
| 91 | st %g3, [%i1 + V4F_FLAGS] |
| 92 | mov 3, %g3 |
| 93 | st %g3, [%i1 + V4F_SIZE] |
| 94 | st %g7, [%i1 + V4F_COUNT] |
| 95 | clr %l2 |
| 96 | clr %l0 |
| 97 | |
| 98 | /* l0: i |
| 99 | * g7: count |
| 100 | * l1: stride |
| 101 | * l2: c |
| 102 | * g2: (tmpAndMask << 8) | tmpOrMask |
| 103 | * g1: clip_table |
| 104 | * i0: from[stride][i] |
| 105 | * i2: clipMask |
| 106 | * i5: vProj[4][i] |
| 107 | */ |
| 108 | |
| 109 | 1: ld [%i0 + 0x0c], %f3 ! LSU Group |
| 110 | ld [%i0 + 0x0c], %g5 ! LSU Group |
| 111 | ld [%i0 + 0x08], %g4 ! LSU Group |
| 112 | fdivs %f4, %f3, %f8 ! FGM |
| 113 | addcc %g5, %g5, %g5 ! IEU1 Group |
| 114 | addx %g0, 0x0, %g3 ! IEU1 Group |
| 115 | addcc %g4, %g4, %g4 ! IEU1 Group |
| 116 | addx %g3, %g3, %g3 ! IEU1 Group |
| 117 | subcc %g5, %g4, %g0 ! IEU1 Group |
| 118 | ld [%i0 + 0x04], %g4 ! LSU Group |
| 119 | addx %g3, %g3, %g3 ! IEU1 Group |
| 120 | addcc %g4, %g4, %g4 ! IEU1 Group |
| 121 | addx %g3, %g3, %g3 ! IEU1 Group |
| 122 | subcc %g5, %g4, %g0 ! IEU1 Group |
| 123 | ld [%i0 + 0x00], %g4 ! LSU Group |
| 124 | addx %g3, %g3, %g3 ! IEU1 Group |
| 125 | addcc %g4, %g4, %g4 ! IEU1 Group |
| 126 | addx %g3, %g3, %g3 ! IEU1 Group |
| 127 | subcc %g5, %g4, %g0 ! IEU1 Group |
| 128 | addx %g3, %g3, %g3 ! IEU1 Group |
| 129 | ldub [%g1 + %g3], %g3 ! LSU Group |
| 130 | cmp %g3, 0 ! IEU1 Group, stall |
| 131 | be 2f ! CTI |
| 132 | stb %g3, [%i2] ! LSU |
| 133 | sll %g3, 8, %g4 ! IEU1 Group |
| 134 | add %l2, 1, %l2 ! IEU0 |
| 135 | st %g0, [%i5 + 0x00] ! LSU |
| 136 | or %g4, 0xff, %g4 ! IEU0 Group |
| 137 | or %g2, %g3, %g2 ! IEU1 |
| 138 | st %g0, [%i5 + 0x04] ! LSU |
| 139 | and %g2, %g4, %g2 ! IEU0 Group |
| 140 | st %g0, [%i5 + 0x08] ! LSU |
| 141 | b 3f ! CTI |
| 142 | st %f4, [%i5 + 0x0c] ! LSU Group |
| 143 | 2: ld [%i0 + 0x00], %f0 ! LSU Group |
| 144 | ld [%i0 + 0x04], %f1 ! LSU Group |
| 145 | ld [%i0 + 0x08], %f2 ! LSU Group |
| 146 | fmuls %f0, %f8, %f0 ! FGM |
| 147 | st %f0, [%i5 + 0x00] ! LSU Group |
| 148 | fmuls %f1, %f8, %f1 ! FGM |
| 149 | st %f1, [%i5 + 0x04] ! LSU Group |
| 150 | fmuls %f2, %f8, %f2 ! FGM |
| 151 | st %f2, [%i5 + 0x08] ! LSU Group |
| 152 | st %f8, [%i5 + 0x0c] ! LSU Group |
| 153 | 3: add %i5, 0x10, %i5 ! IEU1 |
| 154 | add %l0, 1, %l0 ! IEU0 Group |
| 155 | add %i2, 1, %i2 ! IEU0 Group |
| 156 | cmp %l0, %g7 ! IEU1 Group |
| 157 | bne 1b ! CTI |
| 158 | add %i0, %l1, %i0 ! IEU0 Group |
| 159 | stb %g2, [%i3] ! LSU |
| 160 | srl %g2, 8, %g3 ! IEU0 Group |
| 161 | cmp %l2, %g7 ! IEU1 Group |
| 162 | bl,a 1f ! CTI |
| 163 | clr %g3 ! IEU0 |
| 164 | 1: stb %g3, [%i4] ! LSU Group |
| 165 | ret ! CTI Group |
| 166 | restore %i1, 0x0, %o0 |
| 167 | |
| 168 | .globl _mesa_sparc_cliptest_points4_np |
| 169 | _mesa_sparc_cliptest_points4_np: |
| 170 | save %sp, -64, %sp |
| 171 | |
| 172 | call __pc_tramp |
| 173 | sub %o7, (. - one_dot_zero - 4), %g1 |
| 174 | add %g1, 0x4, %g1 |
| 175 | |
| 176 | ld [%i0 + V4F_STRIDE], %l1 |
| 177 | ld [%i0 + V4F_COUNT], %g7 |
| 178 | LDPTR [%i0 + V4F_START], %i0 |
| 179 | LDPTR [%i1 + V4F_START], %i5 |
| 180 | ldub [%i3], %g2 |
| 181 | ldub [%i4], %g3 |
| 182 | sll %g3, 8, %g3 |
| 183 | or %g2, %g3, %g2 |
| 184 | |
| 185 | ld [%i1 + V4F_FLAGS], %g3 |
| 186 | or %g3, VEC_SIZE_4, %g3 |
| 187 | st %g3, [%i1 + V4F_FLAGS] |
| 188 | mov 3, %g3 |
| 189 | st %g3, [%i1 + V4F_SIZE] |
| 190 | st %g7, [%i1 + V4F_COUNT] |
| 191 | clr %l2 |
| 192 | clr %l0 |
| 193 | |
| 194 | /* l0: i |
| 195 | * g7: count |
| 196 | * l1: stride |
| 197 | * l2: c |
| 198 | * g2: (tmpAndMask << 8) | tmpOrMask |
| 199 | * g1: clip_table |
| 200 | * i0: from[stride][i] |
| 201 | * i2: clipMask |
| 202 | */ |
| 203 | |
| 204 | 1: ld [%i0 + 0x0c], %g5 ! LSU Group |
| 205 | ld [%i0 + 0x08], %g4 ! LSU Group |
| 206 | addcc %g5, %g5, %g5 ! IEU1 Group |
| 207 | addx %g0, 0x0, %g3 ! IEU1 Group |
| 208 | addcc %g4, %g4, %g4 ! IEU1 Group |
| 209 | addx %g3, %g3, %g3 ! IEU1 Group |
| 210 | subcc %g5, %g4, %g0 ! IEU1 Group |
| 211 | ld [%i0 + 0x04], %g4 ! LSU Group |
| 212 | addx %g3, %g3, %g3 ! IEU1 Group |
| 213 | addcc %g4, %g4, %g4 ! IEU1 Group |
| 214 | addx %g3, %g3, %g3 ! IEU1 Group |
| 215 | subcc %g5, %g4, %g0 ! IEU1 Group |
| 216 | ld [%i0 + 0x00], %g4 ! LSU Group |
| 217 | addx %g3, %g3, %g3 ! IEU1 Group |
| 218 | addcc %g4, %g4, %g4 ! IEU1 Group |
| 219 | addx %g3, %g3, %g3 ! IEU1 Group |
| 220 | subcc %g5, %g4, %g0 ! IEU1 Group |
| 221 | addx %g3, %g3, %g3 ! IEU1 Group |
| 222 | ldub [%g1 + %g3], %g3 ! LSU Group |
| 223 | cmp %g3, 0 ! IEU1 Group, stall |
| 224 | be 2f ! CTI |
| 225 | stb %g3, [%i2] ! LSU |
| 226 | sll %g3, 8, %g4 ! IEU1 Group |
| 227 | add %l2, 1, %l2 ! IEU0 |
| 228 | or %g4, 0xff, %g4 ! IEU0 Group |
| 229 | or %g2, %g3, %g2 ! IEU1 |
| 230 | and %g2, %g4, %g2 ! IEU0 Group |
| 231 | 2: add %l0, 1, %l0 ! IEU0 Group |
| 232 | add %i2, 1, %i2 ! IEU0 Group |
| 233 | cmp %l0, %g7 ! IEU1 Group |
| 234 | bne 1b ! CTI |
| 235 | add %i0, %l1, %i0 ! IEU0 Group |
| 236 | stb %g2, [%i3] ! LSU |
| 237 | srl %g2, 8, %g3 ! IEU0 Group |
| 238 | cmp %l2, %g7 ! IEU1 Group |
| 239 | bl,a 1f ! CTI |
| 240 | clr %g3 ! IEU0 |
| 241 | 1: stb %g3, [%i4] ! LSU Group |
| 242 | ret ! CTI Group |
| 243 | restore %i1, 0x0, %o0 |