Chia-I Wu | 28b8996 | 2014-08-18 14:40:49 +0800 | [diff] [blame^] | 1 | /* |
| 2 | * XGL |
| 3 | * |
| 4 | * Copyright (C) 2014 LunarG, Inc. |
| 5 | * |
| 6 | * Permission is hereby granted, free of charge, to any person obtaining a |
| 7 | * copy of this software and associated documentation files (the "Software"), |
| 8 | * to deal in the Software without restriction, including without limitation |
| 9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| 10 | * and/or sell copies of the Software, and to permit persons to whom the |
| 11 | * Software is furnished to do so, subject to the following conditions: |
| 12 | * |
| 13 | * The above copyright notice and this permission notice shall be included |
| 14 | * in all copies or substantial portions of the Software. |
| 15 | * |
| 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| 19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| 21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
| 22 | * DEALINGS IN THE SOFTWARE. |
| 23 | */ |
| 24 | |
| 25 | #include "genhw/genhw.h" |
| 26 | #include "dev.h" |
| 27 | #include "sampler.h" |
| 28 | |
| 29 | /** |
| 30 | * Translate a pipe texture filter to the matching hardware mapfilter. |
| 31 | */ |
| 32 | static int translate_tex_filter(XGL_TEX_FILTER filter) |
| 33 | { |
| 34 | switch (filter) { |
| 35 | case XGL_TEX_FILTER_NEAREST: return GEN6_MAPFILTER_NEAREST; |
| 36 | case XGL_TEX_FILTER_LINEAR: return GEN6_MAPFILTER_LINEAR; |
| 37 | default: |
| 38 | assert(!"unknown tex filter"); |
| 39 | return GEN6_MAPFILTER_NEAREST; |
| 40 | } |
| 41 | } |
| 42 | |
| 43 | static int translate_tex_mipmap_mode(XGL_TEX_MIPMAP_MODE mode) |
| 44 | { |
| 45 | switch (mode) { |
| 46 | case XGL_TEX_MIPMAP_NEAREST: return GEN6_MIPFILTER_NEAREST; |
| 47 | case XGL_TEX_MIPMAP_LINEAR: return GEN6_MIPFILTER_LINEAR; |
| 48 | case XGL_TEX_MIPMAP_BASE: return GEN6_MIPFILTER_NONE; |
| 49 | default: |
| 50 | assert(!"unknown tex mipmap mode"); |
| 51 | return GEN6_MIPFILTER_NONE; |
| 52 | } |
| 53 | } |
| 54 | |
| 55 | static int translate_tex_addr(XGL_TEX_ADDRESS addr) |
| 56 | { |
| 57 | switch (addr) { |
| 58 | case XGL_TEX_ADDRESS_WRAP: return GEN6_TEXCOORDMODE_WRAP; |
| 59 | case XGL_TEX_ADDRESS_MIRROR: return GEN6_TEXCOORDMODE_MIRROR; |
| 60 | case XGL_TEX_ADDRESS_CLAMP: return GEN6_TEXCOORDMODE_CLAMP; |
| 61 | case XGL_TEX_ADDRESS_MIRROR_ONCE: return GEN6_TEXCOORDMODE_MIRROR_ONCE; |
| 62 | case XGL_TEX_ADDRESS_CLAMP_BORDER: return GEN6_TEXCOORDMODE_CLAMP_BORDER; |
| 63 | default: |
| 64 | assert(!"unknown tex address"); |
| 65 | return GEN6_TEXCOORDMODE_WRAP; |
| 66 | } |
| 67 | } |
| 68 | |
| 69 | static int translate_compare_func(XGL_COMPARE_FUNC func) |
| 70 | { |
| 71 | switch (func) { |
| 72 | case XGL_COMPARE_NEVER: return GEN6_COMPAREFUNCTION_NEVER; |
| 73 | case XGL_COMPARE_LESS: return GEN6_COMPAREFUNCTION_LESS; |
| 74 | case XGL_COMPARE_EQUAL: return GEN6_COMPAREFUNCTION_EQUAL; |
| 75 | case XGL_COMPARE_LESS_EQUAL: return GEN6_COMPAREFUNCTION_LEQUAL; |
| 76 | case XGL_COMPARE_GREATER: return GEN6_COMPAREFUNCTION_GREATER; |
| 77 | case XGL_COMPARE_NOT_EQUAL: return GEN6_COMPAREFUNCTION_NOTEQUAL; |
| 78 | case XGL_COMPARE_GREATER_EQUAL: return GEN6_COMPAREFUNCTION_GEQUAL; |
| 79 | case XGL_COMPARE_ALWAYS: return GEN6_COMPAREFUNCTION_ALWAYS; |
| 80 | default: |
| 81 | assert(!"unknown compare_func"); |
| 82 | return GEN6_COMPAREFUNCTION_NEVER; |
| 83 | } |
| 84 | } |
| 85 | |
| 86 | static void translate_border_color(XGL_BORDER_COLOR_TYPE type, float rgba[4]) |
| 87 | { |
| 88 | switch (type) { |
| 89 | case XGL_BORDER_COLOR_OPAQUE_WHITE: |
| 90 | rgba[0] = 1.0; |
| 91 | rgba[1] = 1.0; |
| 92 | rgba[2] = 1.0; |
| 93 | rgba[3] = 1.0; |
| 94 | break; |
| 95 | case XGL_BORDER_COLOR_TRANSPARENT_BLACK: |
| 96 | default: |
| 97 | rgba[0] = 0.0; |
| 98 | rgba[1] = 0.0; |
| 99 | rgba[2] = 0.0; |
| 100 | rgba[3] = 0.0; |
| 101 | break; |
| 102 | case XGL_BORDER_COLOR_OPAQUE_BLACK: |
| 103 | rgba[0] = 0.0; |
| 104 | rgba[1] = 0.0; |
| 105 | rgba[2] = 0.0; |
| 106 | rgba[3] = 1.0; |
| 107 | break; |
| 108 | } |
| 109 | } |
| 110 | |
| 111 | static void |
| 112 | emit_border_color_gen6(const struct intel_gpu *gpu, |
| 113 | const float color[4], |
| 114 | uint32_t dw[12]) |
| 115 | { |
| 116 | float rgba[4] = { color[0], color[1], color[2], color[3] }; |
| 117 | |
| 118 | INTEL_GPU_ASSERT(gpu, 6, 6); |
| 119 | |
| 120 | /* |
| 121 | * This state is not documented in the Sandy Bridge PRM, but in the |
| 122 | * Ironlake PRM. SNORM8 seems to be in DW11 instead of DW1. |
| 123 | */ |
| 124 | |
| 125 | /* IEEE_FP */ |
| 126 | dw[1] = u_fui(rgba[0]); |
| 127 | dw[2] = u_fui(rgba[1]); |
| 128 | dw[3] = u_fui(rgba[2]); |
| 129 | dw[4] = u_fui(rgba[3]); |
| 130 | |
| 131 | /* FLOAT_16 */ |
| 132 | dw[5] = u_float_to_half(rgba[0]) | |
| 133 | u_float_to_half(rgba[1]) << 16; |
| 134 | dw[6] = u_float_to_half(rgba[2]) | |
| 135 | u_float_to_half(rgba[3]) << 16; |
| 136 | |
| 137 | /* clamp to [-1.0f, 1.0f] */ |
| 138 | rgba[0] = U_CLAMP(rgba[0], -1.0f, 1.0f); |
| 139 | rgba[1] = U_CLAMP(rgba[1], -1.0f, 1.0f); |
| 140 | rgba[2] = U_CLAMP(rgba[2], -1.0f, 1.0f); |
| 141 | rgba[3] = U_CLAMP(rgba[3], -1.0f, 1.0f); |
| 142 | |
| 143 | /* SNORM16 */ |
| 144 | dw[9] = (int16_t) u_iround(rgba[0] * 32767.0f) | |
| 145 | (int16_t) u_iround(rgba[1] * 32767.0f) << 16; |
| 146 | dw[10] = (int16_t) u_iround(rgba[2] * 32767.0f) | |
| 147 | (int16_t) u_iround(rgba[3] * 32767.0f) << 16; |
| 148 | |
| 149 | /* SNORM8 */ |
| 150 | dw[11] = (int8_t) u_iround(rgba[0] * 127.0f) | |
| 151 | (int8_t) u_iround(rgba[1] * 127.0f) << 8 | |
| 152 | (int8_t) u_iround(rgba[2] * 127.0f) << 16 | |
| 153 | (int8_t) u_iround(rgba[3] * 127.0f) << 24; |
| 154 | |
| 155 | /* clamp to [0.0f, 1.0f] */ |
| 156 | rgba[0] = U_CLAMP(rgba[0], 0.0f, 1.0f); |
| 157 | rgba[1] = U_CLAMP(rgba[1], 0.0f, 1.0f); |
| 158 | rgba[2] = U_CLAMP(rgba[2], 0.0f, 1.0f); |
| 159 | rgba[3] = U_CLAMP(rgba[3], 0.0f, 1.0f); |
| 160 | |
| 161 | /* UNORM8 */ |
| 162 | dw[0] = (uint8_t) u_iround(rgba[0] * 255.0f) | |
| 163 | (uint8_t) u_iround(rgba[1] * 255.0f) << 8 | |
| 164 | (uint8_t) u_iround(rgba[2] * 255.0f) << 16 | |
| 165 | (uint8_t) u_iround(rgba[3] * 255.0f) << 24; |
| 166 | |
| 167 | /* UNORM16 */ |
| 168 | dw[7] = (uint16_t) u_iround(rgba[0] * 65535.0f) | |
| 169 | (uint16_t) u_iround(rgba[1] * 65535.0f) << 16; |
| 170 | dw[8] = (uint16_t) u_iround(rgba[2] * 65535.0f) | |
| 171 | (uint16_t) u_iround(rgba[3] * 65535.0f) << 16; |
| 172 | } |
| 173 | |
| 174 | static void |
| 175 | emit_sampler(const struct intel_gpu *gpu, |
| 176 | const XGL_SAMPLER_CREATE_INFO *info, |
| 177 | uint32_t cmd[15]) |
| 178 | { |
| 179 | int mip_filter, min_filter, mag_filter, max_aniso; |
| 180 | int lod_bias, max_lod, min_lod; |
| 181 | int wrap_s, wrap_t, wrap_r; |
| 182 | uint32_t dw0, dw1, dw3; |
| 183 | float border_color[4]; |
| 184 | |
| 185 | INTEL_GPU_ASSERT(gpu, 6, 7.5); |
| 186 | |
| 187 | mip_filter = translate_tex_mipmap_mode(info->mipMode); |
| 188 | min_filter = translate_tex_filter(info->minFilter); |
| 189 | mag_filter = translate_tex_filter(info->magFilter); |
| 190 | |
| 191 | if (info->maxAnisotropy >= 2 && info->maxAnisotropy <= 16) |
| 192 | max_aniso = info->maxAnisotropy / 2 - 1; |
| 193 | else if (info->maxAnisotropy > 16) |
| 194 | max_aniso = GEN6_ANISORATIO_16; |
| 195 | else |
| 196 | max_aniso = GEN6_ANISORATIO_2; |
| 197 | |
| 198 | /* |
| 199 | * Here is how the hardware calculate per-pixel LOD, from my reading of the |
| 200 | * PRMs: |
| 201 | * |
| 202 | * 1) LOD is set to log2(ratio of texels to pixels) if not specified in |
| 203 | * other ways. The number of texels is measured using level |
| 204 | * SurfMinLod. |
| 205 | * 2) Bias is added to LOD. |
| 206 | * 3) LOD is clamped to [MinLod, MaxLod], and the clamped value is |
| 207 | * compared with Base to determine whether magnification or |
| 208 | * minification is needed. (if preclamp is disabled, LOD is compared |
| 209 | * with Base before clamping) |
| 210 | * 4) If magnification is needed, or no mipmapping is requested, LOD is |
| 211 | * set to floor(MinLod). |
| 212 | * 5) LOD is clamped to [0, MIPCnt], and SurfMinLod is added to LOD. |
| 213 | * |
| 214 | * With Gallium interface, Base is always zero and |
| 215 | * pipe_sampler_view::u.tex.first_level specifies SurfMinLod. |
| 216 | */ |
| 217 | if (intel_gpu_gen(gpu) >= INTEL_GEN(7)) { |
| 218 | const float scale = 256.0f; |
| 219 | |
| 220 | /* [-16.0, 16.0) in S4.8 */ |
| 221 | lod_bias = (int) |
| 222 | (U_CLAMP(info->mipLodBias, -16.0f, 15.9f) * scale); |
| 223 | lod_bias &= 0x1fff; |
| 224 | |
| 225 | /* [0.0, 14.0] in U4.8 */ |
| 226 | max_lod = (int) (U_CLAMP(info->maxLod, 0.0f, 14.0f) * scale); |
| 227 | min_lod = (int) (U_CLAMP(info->minLod, 0.0f, 14.0f) * scale); |
| 228 | } |
| 229 | else { |
| 230 | const float scale = 64.0f; |
| 231 | |
| 232 | /* [-16.0, 16.0) in S4.6 */ |
| 233 | lod_bias = (int) |
| 234 | (U_CLAMP(info->mipLodBias, -16.0f, 15.9f) * scale); |
| 235 | lod_bias &= 0x7ff; |
| 236 | |
| 237 | /* [0.0, 13.0] in U4.6 */ |
| 238 | max_lod = (int) (U_CLAMP(info->maxLod, 0.0f, 13.0f) * scale); |
| 239 | min_lod = (int) (U_CLAMP(info->minLod, 0.0f, 13.0f) * scale); |
| 240 | } |
| 241 | |
| 242 | /* |
| 243 | * We want LOD to be clamped to determine magnification/minification, and |
| 244 | * get set to zero when it is magnification or when mipmapping is disabled. |
| 245 | * The hardware would set LOD to floor(MinLod) and that is a problem when |
| 246 | * MinLod is greater than or equal to 1.0f. |
| 247 | * |
| 248 | * With Base being zero, it is always minification when MinLod is non-zero. |
| 249 | * To achieve our goal, we just need to set MinLod to zero and set |
| 250 | * MagFilter to MinFilter when mipmapping is disabled. |
| 251 | */ |
| 252 | if (info->mipMode == XGL_TEX_MIPMAP_BASE && min_lod) { |
| 253 | min_lod = 0; |
| 254 | mag_filter = min_filter; |
| 255 | } |
| 256 | |
| 257 | /* determine wrap s/t/r */ |
| 258 | wrap_s = translate_tex_addr(info->addressU); |
| 259 | wrap_t = translate_tex_addr(info->addressV); |
| 260 | wrap_r = translate_tex_addr(info->addressW); |
| 261 | |
| 262 | translate_border_color(info->borderColorType, border_color); |
| 263 | |
| 264 | if (intel_gpu_gen(gpu) >= INTEL_GEN(7)) { |
| 265 | dw0 = 1 << 28 | |
| 266 | mip_filter << 20 | |
| 267 | lod_bias << 1; |
| 268 | |
| 269 | if (info->maxAnisotropy) { |
| 270 | dw0 |= GEN6_MAPFILTER_ANISOTROPIC << 17 | |
| 271 | GEN6_MAPFILTER_ANISOTROPIC << 14 | |
| 272 | 1; |
| 273 | } else { |
| 274 | dw0 |= mag_filter << 17 | |
| 275 | min_filter << 14; |
| 276 | } |
| 277 | |
| 278 | dw1 = min_lod << 20 | |
| 279 | max_lod << 8; |
| 280 | |
| 281 | dw1 |= translate_compare_func(info->compareFunc) << 1; |
| 282 | |
| 283 | dw3 = max_aniso << 19; |
| 284 | |
| 285 | /* round the coordinates for linear filtering */ |
| 286 | if (min_filter != GEN6_MAPFILTER_NEAREST) { |
| 287 | dw3 |= (GEN6_SAMPLER_DW3_U_MIN_ROUND | |
| 288 | GEN6_SAMPLER_DW3_V_MIN_ROUND | |
| 289 | GEN6_SAMPLER_DW3_R_MIN_ROUND); |
| 290 | } |
| 291 | if (mag_filter != GEN6_MAPFILTER_NEAREST) { |
| 292 | dw3 |= (GEN6_SAMPLER_DW3_U_MAG_ROUND | |
| 293 | GEN6_SAMPLER_DW3_V_MAG_ROUND | |
| 294 | GEN6_SAMPLER_DW3_R_MAG_ROUND); |
| 295 | } |
| 296 | |
| 297 | dw3 |= wrap_s << 6 | |
| 298 | wrap_t << 3 | |
| 299 | wrap_r; |
| 300 | |
| 301 | cmd[0] = dw0; |
| 302 | cmd[1] = dw1; |
| 303 | cmd[2] = dw3; |
| 304 | |
| 305 | memcpy(&cmd[3], &border_color, sizeof(border_color)); |
| 306 | } |
| 307 | else { |
| 308 | dw0 = 1 << 28 | |
| 309 | mip_filter << 20 | |
| 310 | lod_bias << 3; |
| 311 | |
| 312 | dw0 |= translate_compare_func(info->compareFunc); |
| 313 | |
| 314 | if (info->maxAnisotropy) { |
| 315 | dw0 |= GEN6_MAPFILTER_ANISOTROPIC << 17 | |
| 316 | GEN6_MAPFILTER_ANISOTROPIC << 14; |
| 317 | } |
| 318 | else { |
| 319 | dw0 |= (min_filter != mag_filter) << 27 | |
| 320 | mag_filter << 17 | |
| 321 | min_filter << 14; |
| 322 | } |
| 323 | |
| 324 | dw1 = min_lod << 22 | |
| 325 | max_lod << 12; |
| 326 | |
| 327 | dw1 |= wrap_s << 6 | |
| 328 | wrap_t << 3 | |
| 329 | wrap_r; |
| 330 | |
| 331 | dw3 = max_aniso << 19; |
| 332 | |
| 333 | /* round the coordinates for linear filtering */ |
| 334 | if (min_filter != GEN6_MAPFILTER_NEAREST) { |
| 335 | dw3 |= (GEN6_SAMPLER_DW3_U_MIN_ROUND | |
| 336 | GEN6_SAMPLER_DW3_V_MIN_ROUND | |
| 337 | GEN6_SAMPLER_DW3_R_MIN_ROUND); |
| 338 | } |
| 339 | if (mag_filter != GEN6_MAPFILTER_NEAREST) { |
| 340 | dw3 |= (GEN6_SAMPLER_DW3_U_MAG_ROUND | |
| 341 | GEN6_SAMPLER_DW3_V_MAG_ROUND | |
| 342 | GEN6_SAMPLER_DW3_R_MAG_ROUND); |
| 343 | } |
| 344 | |
| 345 | cmd[0] = dw0; |
| 346 | cmd[1] = dw1; |
| 347 | cmd[2] = dw3; |
| 348 | |
| 349 | emit_border_color_gen6(gpu, border_color, &cmd[3]); |
| 350 | } |
| 351 | } |
| 352 | |
| 353 | static void sampler_destroy(struct intel_obj *obj) |
| 354 | { |
| 355 | struct intel_sampler *sampler = intel_sampler_from_obj(obj); |
| 356 | |
| 357 | intel_sampler_destroy(sampler); |
| 358 | } |
| 359 | |
| 360 | XGL_RESULT intel_sampler_create(struct intel_dev *dev, |
| 361 | const XGL_SAMPLER_CREATE_INFO *info, |
| 362 | struct intel_sampler **sampler_ret) |
| 363 | { |
| 364 | struct intel_sampler *sampler; |
| 365 | |
| 366 | sampler = (struct intel_sampler *) intel_base_create(dev, |
| 367 | sizeof(*sampler), dev->base.dbg, XGL_DBG_OBJECT_SAMPLER, info, 0); |
| 368 | if (!sampler) |
| 369 | return XGL_ERROR_OUT_OF_MEMORY; |
| 370 | |
| 371 | sampler->obj.destroy = sampler_destroy; |
| 372 | |
| 373 | emit_sampler(dev->gpu, info, sampler->cmd); |
| 374 | |
| 375 | *sampler_ret = sampler; |
| 376 | |
| 377 | return XGL_SUCCESS; |
| 378 | } |
| 379 | |
| 380 | void intel_sampler_destroy(struct intel_sampler *sampler) |
| 381 | { |
| 382 | intel_base_destroy(&sampler->obj.base); |
| 383 | } |
| 384 | |
| 385 | XGL_RESULT XGLAPI intelCreateSampler( |
| 386 | XGL_DEVICE device, |
| 387 | const XGL_SAMPLER_CREATE_INFO* pCreateInfo, |
| 388 | XGL_SAMPLER* pSampler) |
| 389 | { |
| 390 | struct intel_dev *dev = intel_dev(device); |
| 391 | |
| 392 | return intel_sampler_create(dev, pCreateInfo, |
| 393 | (struct intel_sampler **) pSampler); |
| 394 | } |