Jason Ekstrand | 5ef2b8f | 2019-03-06 15:21:51 -0600 | [diff] [blame] | 1 | /* |
| 2 | * Copyright © 2019 Intel Corporation |
| 3 | * |
| 4 | * Permission is hereby granted, free of charge, to any person obtaining a |
| 5 | * copy of this software and associated documentation files (the "Software"), |
| 6 | * to deal in the Software without restriction, including without limitation |
| 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| 8 | * and/or sell copies of the Software, and to permit persons to whom the |
| 9 | * Software is furnished to do so, subject to the following conditions: |
| 10 | * |
| 11 | * The above copyright notice and this permission notice (including the next |
| 12 | * paragraph) shall be included in all copies or substantial portions of the |
| 13 | * Software. |
| 14 | * |
| 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| 18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| 20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| 21 | * IN THE SOFTWARE. |
| 22 | */ |
| 23 | |
| 24 | #include "nir.h" |
| 25 | #include "nir_builder.h" |
| 26 | #include "nir_deref.h" |
| 27 | |
| 28 | /** @file nir_lower_io_to_vector.c |
| 29 | * |
| 30 | * Merges compatible input/output variables residing in different components |
| 31 | * of the same location. It's expected that further passes such as |
| 32 | * nir_lower_io_to_temporaries will combine loads and stores of the merged |
| 33 | * variables, producing vector nir_load_input/nir_store_output instructions |
| 34 | * when all is said and done. |
| 35 | */ |
| 36 | |
Rhys Perry | 300e758 | 2019-05-17 11:53:32 +0100 | [diff] [blame] | 37 | /* FRAG_RESULT_MAX+1 instead of just FRAG_RESULT_MAX because of how this pass |
| 38 | * handles dual source blending */ |
| 39 | #define MAX_SLOTS MAX2(VARYING_SLOT_TESS_MAX, FRAG_RESULT_MAX+1) |
| 40 | |
| 41 | static unsigned |
| 42 | get_slot(const nir_variable *var) |
| 43 | { |
| 44 | /* This handling of dual-source blending might not be correct when more than |
| 45 | * one render target is supported, but it seems no driver supports more than |
| 46 | * one. */ |
| 47 | return var->data.location + var->data.index; |
| 48 | } |
| 49 | |
Jason Ekstrand | 5ef2b8f | 2019-03-06 15:21:51 -0600 | [diff] [blame] | 50 | static const struct glsl_type * |
Rhys Perry | bcd1475 | 2019-05-17 15:04:39 +0100 | [diff] [blame] | 51 | get_per_vertex_type(const nir_shader *shader, const nir_variable *var, |
| 52 | unsigned *num_vertices) |
| 53 | { |
| 54 | if (nir_is_per_vertex_io(var, shader->info.stage)) { |
| 55 | assert(glsl_type_is_array(var->type)); |
| 56 | if (num_vertices) |
| 57 | *num_vertices = glsl_get_length(var->type); |
| 58 | return glsl_get_array_element(var->type); |
| 59 | } else { |
| 60 | if (num_vertices) |
| 61 | *num_vertices = 0; |
| 62 | return var->type; |
| 63 | } |
| 64 | } |
| 65 | |
| 66 | static const struct glsl_type * |
Jason Ekstrand | 5ef2b8f | 2019-03-06 15:21:51 -0600 | [diff] [blame] | 67 | resize_array_vec_type(const struct glsl_type *type, unsigned num_components) |
| 68 | { |
| 69 | if (glsl_type_is_array(type)) { |
| 70 | const struct glsl_type *arr_elem = |
| 71 | resize_array_vec_type(glsl_get_array_element(type), num_components); |
| 72 | return glsl_array_type(arr_elem, glsl_get_length(type), 0); |
| 73 | } else { |
| 74 | assert(glsl_type_is_vector_or_scalar(type)); |
| 75 | return glsl_vector_type(glsl_get_base_type(type), num_components); |
| 76 | } |
| 77 | } |
| 78 | |
| 79 | static bool |
Rhys Perry | bcd1475 | 2019-05-17 15:04:39 +0100 | [diff] [blame] | 80 | variables_can_merge(const nir_shader *shader, |
| 81 | const nir_variable *a, const nir_variable *b, |
| 82 | bool same_array_structure) |
Jason Ekstrand | 5ef2b8f | 2019-03-06 15:21:51 -0600 | [diff] [blame] | 83 | { |
Rhys Perry | 6b8cb08 | 2019-09-06 21:38:57 +0100 | [diff] [blame] | 84 | if (a->data.compact || b->data.compact) |
| 85 | return false; |
| 86 | |
Caio Marcelo de Oliveira Filho | 5dc85ab | 2020-02-11 14:41:05 -0800 | [diff] [blame] | 87 | if (a->data.per_view || b->data.per_view) |
| 88 | return false; |
| 89 | |
Jason Ekstrand | 5ef2b8f | 2019-03-06 15:21:51 -0600 | [diff] [blame] | 90 | const struct glsl_type *a_type_tail = a->type; |
| 91 | const struct glsl_type *b_type_tail = b->type; |
| 92 | |
Rhys Perry | bcd1475 | 2019-05-17 15:04:39 +0100 | [diff] [blame] | 93 | if (nir_is_per_vertex_io(a, shader->info.stage) != |
| 94 | nir_is_per_vertex_io(b, shader->info.stage)) |
| 95 | return false; |
| 96 | |
Jason Ekstrand | 5ef2b8f | 2019-03-06 15:21:51 -0600 | [diff] [blame] | 97 | /* They must have the same array structure */ |
Rhys Perry | bcd1475 | 2019-05-17 15:04:39 +0100 | [diff] [blame] | 98 | if (same_array_structure) { |
| 99 | while (glsl_type_is_array(a_type_tail)) { |
| 100 | if (!glsl_type_is_array(b_type_tail)) |
| 101 | return false; |
Jason Ekstrand | 5ef2b8f | 2019-03-06 15:21:51 -0600 | [diff] [blame] | 102 | |
Rhys Perry | bcd1475 | 2019-05-17 15:04:39 +0100 | [diff] [blame] | 103 | if (glsl_get_length(a_type_tail) != glsl_get_length(b_type_tail)) |
| 104 | return false; |
Jason Ekstrand | 5ef2b8f | 2019-03-06 15:21:51 -0600 | [diff] [blame] | 105 | |
Rhys Perry | bcd1475 | 2019-05-17 15:04:39 +0100 | [diff] [blame] | 106 | a_type_tail = glsl_get_array_element(a_type_tail); |
| 107 | b_type_tail = glsl_get_array_element(b_type_tail); |
| 108 | } |
| 109 | if (glsl_type_is_array(b_type_tail)) |
| 110 | return false; |
| 111 | } else { |
| 112 | a_type_tail = glsl_without_array(a_type_tail); |
| 113 | b_type_tail = glsl_without_array(b_type_tail); |
Jason Ekstrand | 5ef2b8f | 2019-03-06 15:21:51 -0600 | [diff] [blame] | 114 | } |
| 115 | |
| 116 | if (!glsl_type_is_vector_or_scalar(a_type_tail) || |
| 117 | !glsl_type_is_vector_or_scalar(b_type_tail)) |
| 118 | return false; |
| 119 | |
Rhys Perry | bcd1475 | 2019-05-17 15:04:39 +0100 | [diff] [blame] | 120 | if (glsl_get_base_type(a_type_tail) != glsl_get_base_type(b_type_tail)) |
| 121 | return false; |
| 122 | |
| 123 | /* TODO: add 64/16bit support ? */ |
| 124 | if (glsl_get_bit_size(a_type_tail) != 32) |
Jason Ekstrand | 5ef2b8f | 2019-03-06 15:21:51 -0600 | [diff] [blame] | 125 | return false; |
| 126 | |
| 127 | assert(a->data.mode == b->data.mode); |
| 128 | if (shader->info.stage == MESA_SHADER_FRAGMENT && |
| 129 | a->data.mode == nir_var_shader_in && |
| 130 | a->data.interpolation != b->data.interpolation) |
| 131 | return false; |
| 132 | |
Rhys Perry | 300e758 | 2019-05-17 11:53:32 +0100 | [diff] [blame] | 133 | if (shader->info.stage == MESA_SHADER_FRAGMENT && |
| 134 | a->data.mode == nir_var_shader_out && |
| 135 | a->data.index != b->data.index) |
| 136 | return false; |
| 137 | |
Erik Faye-Lund | e838acf | 2020-06-04 12:38:08 +0200 | [diff] [blame] | 138 | /* It's tricky to merge XFB-outputs correctly, because we need there |
| 139 | * to not be any overlaps when we get to |
| 140 | * nir_gather_xfb_info_with_varyings later on. We'll end up |
| 141 | * triggering an assert there if we merge here. |
| 142 | */ |
| 143 | if ((shader->info.stage == MESA_SHADER_VERTEX || |
| 144 | shader->info.stage == MESA_SHADER_TESS_EVAL || |
| 145 | shader->info.stage == MESA_SHADER_GEOMETRY) && |
| 146 | a->data.mode == nir_var_shader_out && |
| 147 | (a->data.explicit_xfb_buffer || b->data.explicit_xfb_buffer)) |
| 148 | return false; |
| 149 | |
Jason Ekstrand | 5ef2b8f | 2019-03-06 15:21:51 -0600 | [diff] [blame] | 150 | return true; |
| 151 | } |
| 152 | |
Rhys Perry | bcd1475 | 2019-05-17 15:04:39 +0100 | [diff] [blame] | 153 | static const struct glsl_type * |
| 154 | get_flat_type(const nir_shader *shader, nir_variable *old_vars[MAX_SLOTS][4], |
| 155 | unsigned *loc, nir_variable **first_var, unsigned *num_vertices) |
| 156 | { |
| 157 | unsigned todo = 1; |
| 158 | unsigned slots = 0; |
| 159 | unsigned num_vars = 0; |
| 160 | enum glsl_base_type base; |
| 161 | *num_vertices = 0; |
| 162 | *first_var = NULL; |
| 163 | |
| 164 | while (todo) { |
| 165 | assert(*loc < MAX_SLOTS); |
| 166 | for (unsigned frac = 0; frac < 4; frac++) { |
| 167 | nir_variable *var = old_vars[*loc][frac]; |
| 168 | if (!var) |
| 169 | continue; |
| 170 | if ((*first_var && |
| 171 | !variables_can_merge(shader, var, *first_var, false)) || |
| 172 | var->data.compact) { |
| 173 | (*loc)++; |
| 174 | return NULL; |
| 175 | } |
| 176 | |
| 177 | if (!*first_var) { |
| 178 | if (!glsl_type_is_vector_or_scalar(glsl_without_array(var->type))) { |
| 179 | (*loc)++; |
| 180 | return NULL; |
| 181 | } |
| 182 | *first_var = var; |
| 183 | base = glsl_get_base_type( |
| 184 | glsl_without_array(get_per_vertex_type(shader, var, NULL))); |
| 185 | } |
| 186 | |
| 187 | bool vs_in = shader->info.stage == MESA_SHADER_VERTEX && |
| 188 | var->data.mode == nir_var_shader_in; |
| 189 | unsigned var_slots = glsl_count_attribute_slots( |
| 190 | get_per_vertex_type(shader, var, num_vertices), vs_in); |
| 191 | todo = MAX2(todo, var_slots); |
| 192 | num_vars++; |
| 193 | } |
| 194 | todo--; |
| 195 | slots++; |
| 196 | (*loc)++; |
| 197 | } |
| 198 | |
| 199 | if (num_vars <= 1) |
| 200 | return NULL; |
| 201 | |
Rhys Perry | 5404b7a | 2019-11-14 15:31:52 +0000 | [diff] [blame] | 202 | if (slots == 1) |
| 203 | return glsl_vector_type(base, 4); |
| 204 | else |
| 205 | return glsl_array_type(glsl_vector_type(base, 4), slots, 0); |
Rhys Perry | bcd1475 | 2019-05-17 15:04:39 +0100 | [diff] [blame] | 206 | } |
| 207 | |
Jason Ekstrand | 5ef2b8f | 2019-03-06 15:21:51 -0600 | [diff] [blame] | 208 | static bool |
Jason Ekstrand | bb3994c | 2020-07-21 11:13:04 -0500 | [diff] [blame] | 209 | create_new_io_vars(nir_shader *shader, nir_variable_mode mode, |
Rhys Perry | bcd1475 | 2019-05-17 15:04:39 +0100 | [diff] [blame] | 210 | nir_variable *new_vars[MAX_SLOTS][4], |
| 211 | bool flat_vars[MAX_SLOTS]) |
Jason Ekstrand | 5ef2b8f | 2019-03-06 15:21:51 -0600 | [diff] [blame] | 212 | { |
Rhys Perry | bcd1475 | 2019-05-17 15:04:39 +0100 | [diff] [blame] | 213 | nir_variable *old_vars[MAX_SLOTS][4] = {{0}}; |
| 214 | |
Jason Ekstrand | bb3994c | 2020-07-21 11:13:04 -0500 | [diff] [blame] | 215 | bool has_io_var = false; |
| 216 | nir_foreach_variable_with_modes(var, shader, mode) { |
Rhys Perry | bcd1475 | 2019-05-17 15:04:39 +0100 | [diff] [blame] | 217 | unsigned frac = var->data.location_frac; |
| 218 | old_vars[get_slot(var)][frac] = var; |
Jason Ekstrand | bb3994c | 2020-07-21 11:13:04 -0500 | [diff] [blame] | 219 | has_io_var = true; |
Jason Ekstrand | 5ef2b8f | 2019-03-06 15:21:51 -0600 | [diff] [blame] | 220 | } |
| 221 | |
Jason Ekstrand | bb3994c | 2020-07-21 11:13:04 -0500 | [diff] [blame] | 222 | if (!has_io_var) |
| 223 | return false; |
| 224 | |
Jason Ekstrand | 5ef2b8f | 2019-03-06 15:21:51 -0600 | [diff] [blame] | 225 | bool merged_any_vars = false; |
| 226 | |
Rhys Perry | 300e758 | 2019-05-17 11:53:32 +0100 | [diff] [blame] | 227 | for (unsigned loc = 0; loc < MAX_SLOTS; loc++) { |
Jason Ekstrand | 5ef2b8f | 2019-03-06 15:21:51 -0600 | [diff] [blame] | 228 | unsigned frac = 0; |
| 229 | while (frac < 4) { |
| 230 | nir_variable *first_var = old_vars[loc][frac]; |
| 231 | if (!first_var) { |
| 232 | frac++; |
| 233 | continue; |
| 234 | } |
| 235 | |
| 236 | int first = frac; |
| 237 | bool found_merge = false; |
| 238 | |
| 239 | while (frac < 4) { |
| 240 | nir_variable *var = old_vars[loc][frac]; |
| 241 | if (!var) |
| 242 | break; |
| 243 | |
| 244 | if (var != first_var) { |
Rhys Perry | bcd1475 | 2019-05-17 15:04:39 +0100 | [diff] [blame] | 245 | if (!variables_can_merge(shader, first_var, var, true)) |
Jason Ekstrand | 5ef2b8f | 2019-03-06 15:21:51 -0600 | [diff] [blame] | 246 | break; |
| 247 | |
| 248 | found_merge = true; |
| 249 | } |
| 250 | |
| 251 | const unsigned num_components = |
| 252 | glsl_get_components(glsl_without_array(var->type)); |
Rhys Perry | bcd1475 | 2019-05-17 15:04:39 +0100 | [diff] [blame] | 253 | if (!num_components) { |
| 254 | assert(frac == 0); |
| 255 | frac++; |
| 256 | break; /* The type was a struct. */ |
| 257 | } |
Jason Ekstrand | 5ef2b8f | 2019-03-06 15:21:51 -0600 | [diff] [blame] | 258 | |
| 259 | /* We had better not have any overlapping vars */ |
| 260 | for (unsigned i = 1; i < num_components; i++) |
| 261 | assert(old_vars[loc][frac + i] == NULL); |
| 262 | |
| 263 | frac += num_components; |
| 264 | } |
| 265 | |
| 266 | if (!found_merge) |
| 267 | continue; |
| 268 | |
| 269 | merged_any_vars = true; |
| 270 | |
| 271 | nir_variable *var = nir_variable_clone(old_vars[loc][first], shader); |
| 272 | var->data.location_frac = first; |
| 273 | var->type = resize_array_vec_type(var->type, frac - first); |
| 274 | |
| 275 | nir_shader_add_variable(shader, var); |
Rhys Perry | bcd1475 | 2019-05-17 15:04:39 +0100 | [diff] [blame] | 276 | for (unsigned i = first; i < frac; i++) { |
Jason Ekstrand | 5ef2b8f | 2019-03-06 15:21:51 -0600 | [diff] [blame] | 277 | new_vars[loc][i] = var; |
Rhys Perry | bcd1475 | 2019-05-17 15:04:39 +0100 | [diff] [blame] | 278 | old_vars[loc][i] = NULL; |
| 279 | } |
| 280 | |
| 281 | old_vars[loc][first] = var; |
Jason Ekstrand | 5ef2b8f | 2019-03-06 15:21:51 -0600 | [diff] [blame] | 282 | } |
| 283 | } |
| 284 | |
Rhys Perry | bcd1475 | 2019-05-17 15:04:39 +0100 | [diff] [blame] | 285 | /* "flat" mode: tries to ensure there is at most one variable per slot by |
| 286 | * merging variables into vec4s |
| 287 | */ |
| 288 | for (unsigned loc = 0; loc < MAX_SLOTS;) { |
| 289 | nir_variable *first_var; |
| 290 | unsigned num_vertices; |
| 291 | unsigned new_loc = loc; |
| 292 | const struct glsl_type *flat_type = |
| 293 | get_flat_type(shader, old_vars, &new_loc, &first_var, &num_vertices); |
| 294 | if (flat_type) { |
| 295 | merged_any_vars = true; |
| 296 | |
| 297 | nir_variable *var = nir_variable_clone(first_var, shader); |
| 298 | var->data.location_frac = 0; |
| 299 | if (num_vertices) |
| 300 | var->type = glsl_array_type(flat_type, num_vertices, 0); |
| 301 | else |
| 302 | var->type = flat_type; |
| 303 | |
| 304 | nir_shader_add_variable(shader, var); |
| 305 | for (unsigned i = 0; i < glsl_get_length(flat_type); i++) { |
| 306 | for (unsigned j = 0; j < 4; j++) |
| 307 | new_vars[loc + i][j] = var; |
| 308 | flat_vars[loc + i] = true; |
| 309 | } |
| 310 | } |
| 311 | loc = new_loc; |
| 312 | } |
| 313 | |
Jason Ekstrand | 5ef2b8f | 2019-03-06 15:21:51 -0600 | [diff] [blame] | 314 | return merged_any_vars; |
| 315 | } |
| 316 | |
| 317 | static nir_deref_instr * |
| 318 | build_array_deref_of_new_var(nir_builder *b, nir_variable *new_var, |
| 319 | nir_deref_instr *leader) |
| 320 | { |
| 321 | if (leader->deref_type == nir_deref_type_var) |
| 322 | return nir_build_deref_var(b, new_var); |
| 323 | |
| 324 | nir_deref_instr *parent = |
| 325 | build_array_deref_of_new_var(b, new_var, nir_deref_instr_parent(leader)); |
| 326 | |
| 327 | return nir_build_deref_follower(b, parent, leader); |
| 328 | } |
| 329 | |
Rhys Perry | bcd1475 | 2019-05-17 15:04:39 +0100 | [diff] [blame] | 330 | static nir_ssa_def * |
| 331 | build_array_index(nir_builder *b, nir_deref_instr *deref, nir_ssa_def *base, |
| 332 | bool vs_in) |
| 333 | { |
| 334 | switch (deref->deref_type) { |
| 335 | case nir_deref_type_var: |
| 336 | return base; |
| 337 | case nir_deref_type_array: { |
| 338 | nir_ssa_def *index = nir_i2i(b, deref->arr.index.ssa, |
| 339 | deref->dest.ssa.bit_size); |
| 340 | return nir_iadd( |
| 341 | b, build_array_index(b, nir_deref_instr_parent(deref), base, vs_in), |
Rob Clark | 6320e37 | 2019-09-26 10:32:00 -0700 | [diff] [blame] | 342 | nir_amul_imm(b, index, glsl_count_attribute_slots(deref->type, vs_in))); |
Rhys Perry | bcd1475 | 2019-05-17 15:04:39 +0100 | [diff] [blame] | 343 | } |
| 344 | default: |
| 345 | unreachable("Invalid deref instruction type"); |
| 346 | } |
| 347 | } |
| 348 | |
| 349 | static nir_deref_instr * |
| 350 | build_array_deref_of_new_var_flat(nir_shader *shader, |
| 351 | nir_builder *b, nir_variable *new_var, |
| 352 | nir_deref_instr *leader, unsigned base) |
| 353 | { |
| 354 | nir_deref_instr *deref = nir_build_deref_var(b, new_var); |
| 355 | |
| 356 | if (nir_is_per_vertex_io(new_var, shader->info.stage)) { |
| 357 | assert(leader->deref_type == nir_deref_type_array); |
| 358 | nir_ssa_def *index = leader->arr.index.ssa; |
| 359 | leader = nir_deref_instr_parent(leader); |
| 360 | deref = nir_build_deref_array(b, deref, index); |
| 361 | } |
| 362 | |
Rhys Perry | 5404b7a | 2019-11-14 15:31:52 +0000 | [diff] [blame] | 363 | if (!glsl_type_is_array(deref->type)) |
| 364 | return deref; |
| 365 | |
Rhys Perry | bcd1475 | 2019-05-17 15:04:39 +0100 | [diff] [blame] | 366 | bool vs_in = shader->info.stage == MESA_SHADER_VERTEX && |
| 367 | new_var->data.mode == nir_var_shader_in; |
| 368 | return nir_build_deref_array( |
| 369 | b, deref, build_array_index(b, leader, nir_imm_int(b, base), vs_in)); |
| 370 | } |
| 371 | |
Jason Ekstrand | 5ef2b8f | 2019-03-06 15:21:51 -0600 | [diff] [blame] | 372 | static bool |
| 373 | nir_lower_io_to_vector_impl(nir_function_impl *impl, nir_variable_mode modes) |
| 374 | { |
| 375 | assert(!(modes & ~(nir_var_shader_in | nir_var_shader_out))); |
| 376 | |
| 377 | nir_builder b; |
| 378 | nir_builder_init(&b, impl); |
| 379 | |
| 380 | nir_metadata_require(impl, nir_metadata_dominance); |
| 381 | |
| 382 | nir_shader *shader = impl->function->shader; |
Rhys Perry | 300e758 | 2019-05-17 11:53:32 +0100 | [diff] [blame] | 383 | nir_variable *new_inputs[MAX_SLOTS][4] = {{0}}; |
Rhys Perry | 300e758 | 2019-05-17 11:53:32 +0100 | [diff] [blame] | 384 | nir_variable *new_outputs[MAX_SLOTS][4] = {{0}}; |
Rhys Perry | bcd1475 | 2019-05-17 15:04:39 +0100 | [diff] [blame] | 385 | bool flat_inputs[MAX_SLOTS] = {0}; |
| 386 | bool flat_outputs[MAX_SLOTS] = {0}; |
Jason Ekstrand | 5ef2b8f | 2019-03-06 15:21:51 -0600 | [diff] [blame] | 387 | |
| 388 | if (modes & nir_var_shader_in) { |
| 389 | /* Vertex shaders support overlapping inputs. We don't do those */ |
| 390 | assert(b.shader->info.stage != MESA_SHADER_VERTEX); |
| 391 | |
| 392 | /* If we don't actually merge any variables, remove that bit from modes |
| 393 | * so we don't bother doing extra non-work. |
| 394 | */ |
Jason Ekstrand | bb3994c | 2020-07-21 11:13:04 -0500 | [diff] [blame] | 395 | if (!create_new_io_vars(shader, nir_var_shader_in, |
Rhys Perry | bcd1475 | 2019-05-17 15:04:39 +0100 | [diff] [blame] | 396 | new_inputs, flat_inputs)) |
Jason Ekstrand | 5ef2b8f | 2019-03-06 15:21:51 -0600 | [diff] [blame] | 397 | modes &= ~nir_var_shader_in; |
| 398 | } |
| 399 | |
| 400 | if (modes & nir_var_shader_out) { |
Jason Ekstrand | 5ef2b8f | 2019-03-06 15:21:51 -0600 | [diff] [blame] | 401 | /* If we don't actually merge any variables, remove that bit from modes |
| 402 | * so we don't bother doing extra non-work. |
| 403 | */ |
Jason Ekstrand | bb3994c | 2020-07-21 11:13:04 -0500 | [diff] [blame] | 404 | if (!create_new_io_vars(shader, nir_var_shader_out, |
Rhys Perry | bcd1475 | 2019-05-17 15:04:39 +0100 | [diff] [blame] | 405 | new_outputs, flat_outputs)) |
Jason Ekstrand | 5ef2b8f | 2019-03-06 15:21:51 -0600 | [diff] [blame] | 406 | modes &= ~nir_var_shader_out; |
| 407 | } |
| 408 | |
| 409 | if (!modes) |
| 410 | return false; |
| 411 | |
| 412 | bool progress = false; |
| 413 | |
| 414 | /* Actually lower all the IO load/store intrinsics. Load instructions are |
| 415 | * lowered to a vector load and an ALU instruction to grab the channels we |
| 416 | * want. Outputs are lowered to a write-masked store of the vector output. |
| 417 | * For non-TCS outputs, we then run nir_lower_io_to_temporaries at the end |
| 418 | * to clean up the partial writes. |
| 419 | */ |
| 420 | nir_foreach_block(block, impl) { |
| 421 | nir_foreach_instr_safe(instr, block) { |
| 422 | if (instr->type != nir_instr_type_intrinsic) |
| 423 | continue; |
| 424 | |
| 425 | nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); |
| 426 | |
| 427 | switch (intrin->intrinsic) { |
| 428 | case nir_intrinsic_load_deref: |
| 429 | case nir_intrinsic_interp_deref_at_centroid: |
| 430 | case nir_intrinsic_interp_deref_at_sample: |
Samuel Pitoiset | d29f10a | 2020-01-24 16:01:04 +0100 | [diff] [blame] | 431 | case nir_intrinsic_interp_deref_at_offset: |
| 432 | case nir_intrinsic_interp_deref_at_vertex: { |
Jason Ekstrand | 5ef2b8f | 2019-03-06 15:21:51 -0600 | [diff] [blame] | 433 | nir_deref_instr *old_deref = nir_src_as_deref(intrin->src[0]); |
Jason Ekstrand | 3cc58e6 | 2020-10-30 12:19:25 -0500 | [diff] [blame] | 434 | if (!nir_deref_mode_is_one_of(old_deref, modes)) |
Jason Ekstrand | 5ef2b8f | 2019-03-06 15:21:51 -0600 | [diff] [blame] | 435 | break; |
| 436 | |
Jason Ekstrand | 3cc58e6 | 2020-10-30 12:19:25 -0500 | [diff] [blame] | 437 | if (nir_deref_mode_is(old_deref, nir_var_shader_out)) |
Rhys Perry | 300e758 | 2019-05-17 11:53:32 +0100 | [diff] [blame] | 438 | assert(b.shader->info.stage == MESA_SHADER_TESS_CTRL || |
| 439 | b.shader->info.stage == MESA_SHADER_FRAGMENT); |
Jason Ekstrand | 5ef2b8f | 2019-03-06 15:21:51 -0600 | [diff] [blame] | 440 | |
| 441 | nir_variable *old_var = nir_deref_instr_get_variable(old_deref); |
Jason Ekstrand | 5ef2b8f | 2019-03-06 15:21:51 -0600 | [diff] [blame] | 442 | |
Rhys Perry | 300e758 | 2019-05-17 11:53:32 +0100 | [diff] [blame] | 443 | const unsigned loc = get_slot(old_var); |
Jason Ekstrand | 5ef2b8f | 2019-03-06 15:21:51 -0600 | [diff] [blame] | 444 | const unsigned old_frac = old_var->data.location_frac; |
Jason Ekstrand | bb5d502 | 2020-10-30 12:30:09 -0500 | [diff] [blame] | 445 | nir_variable *new_var = old_var->data.mode == nir_var_shader_in ? |
Jason Ekstrand | 5ef2b8f | 2019-03-06 15:21:51 -0600 | [diff] [blame] | 446 | new_inputs[loc][old_frac] : |
| 447 | new_outputs[loc][old_frac]; |
Jason Ekstrand | bb5d502 | 2020-10-30 12:30:09 -0500 | [diff] [blame] | 448 | bool flat = old_var->data.mode == nir_var_shader_in ? |
Rhys Perry | bcd1475 | 2019-05-17 15:04:39 +0100 | [diff] [blame] | 449 | flat_inputs[loc] : flat_outputs[loc]; |
Jason Ekstrand | 5ef2b8f | 2019-03-06 15:21:51 -0600 | [diff] [blame] | 450 | if (!new_var) |
| 451 | break; |
| 452 | |
Jason Ekstrand | 5ef2b8f | 2019-03-06 15:21:51 -0600 | [diff] [blame] | 453 | const unsigned new_frac = new_var->data.location_frac; |
| 454 | |
| 455 | nir_component_mask_t vec4_comp_mask = |
| 456 | ((1 << intrin->num_components) - 1) << old_frac; |
| 457 | |
| 458 | b.cursor = nir_before_instr(&intrin->instr); |
| 459 | |
| 460 | /* Rewrite the load to use the new variable and only select a |
| 461 | * portion of the result. |
| 462 | */ |
Rhys Perry | bcd1475 | 2019-05-17 15:04:39 +0100 | [diff] [blame] | 463 | nir_deref_instr *new_deref; |
| 464 | if (flat) { |
| 465 | new_deref = build_array_deref_of_new_var_flat( |
| 466 | shader, &b, new_var, old_deref, loc - get_slot(new_var)); |
| 467 | } else { |
| 468 | assert(get_slot(new_var) == loc); |
| 469 | new_deref = build_array_deref_of_new_var(&b, new_var, old_deref); |
| 470 | assert(glsl_type_is_vector(new_deref->type)); |
| 471 | } |
Jason Ekstrand | 5ef2b8f | 2019-03-06 15:21:51 -0600 | [diff] [blame] | 472 | nir_instr_rewrite_src(&intrin->instr, &intrin->src[0], |
| 473 | nir_src_for_ssa(&new_deref->dest.ssa)); |
| 474 | |
| 475 | intrin->num_components = |
| 476 | glsl_get_components(new_deref->type); |
| 477 | intrin->dest.ssa.num_components = intrin->num_components; |
| 478 | |
| 479 | b.cursor = nir_after_instr(&intrin->instr); |
| 480 | |
| 481 | nir_ssa_def *new_vec = nir_channels(&b, &intrin->dest.ssa, |
| 482 | vec4_comp_mask >> new_frac); |
| 483 | nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa, |
| 484 | nir_src_for_ssa(new_vec), |
| 485 | new_vec->parent_instr); |
| 486 | |
| 487 | progress = true; |
| 488 | break; |
| 489 | } |
| 490 | |
| 491 | case nir_intrinsic_store_deref: { |
| 492 | nir_deref_instr *old_deref = nir_src_as_deref(intrin->src[0]); |
Jason Ekstrand | 3cc58e6 | 2020-10-30 12:19:25 -0500 | [diff] [blame] | 493 | if (!nir_deref_mode_is(old_deref, nir_var_shader_out)) |
Jason Ekstrand | 5ef2b8f | 2019-03-06 15:21:51 -0600 | [diff] [blame] | 494 | break; |
| 495 | |
| 496 | nir_variable *old_var = nir_deref_instr_get_variable(old_deref); |
Jason Ekstrand | 5ef2b8f | 2019-03-06 15:21:51 -0600 | [diff] [blame] | 497 | |
Rhys Perry | 300e758 | 2019-05-17 11:53:32 +0100 | [diff] [blame] | 498 | const unsigned loc = get_slot(old_var); |
Jason Ekstrand | 5ef2b8f | 2019-03-06 15:21:51 -0600 | [diff] [blame] | 499 | const unsigned old_frac = old_var->data.location_frac; |
| 500 | nir_variable *new_var = new_outputs[loc][old_frac]; |
Rhys Perry | bcd1475 | 2019-05-17 15:04:39 +0100 | [diff] [blame] | 501 | bool flat = flat_outputs[loc]; |
Jason Ekstrand | 5ef2b8f | 2019-03-06 15:21:51 -0600 | [diff] [blame] | 502 | if (!new_var) |
| 503 | break; |
| 504 | |
Jason Ekstrand | 5ef2b8f | 2019-03-06 15:21:51 -0600 | [diff] [blame] | 505 | const unsigned new_frac = new_var->data.location_frac; |
| 506 | |
| 507 | b.cursor = nir_before_instr(&intrin->instr); |
| 508 | |
| 509 | /* Rewrite the store to be a masked store to the new variable */ |
Rhys Perry | bcd1475 | 2019-05-17 15:04:39 +0100 | [diff] [blame] | 510 | nir_deref_instr *new_deref; |
| 511 | if (flat) { |
| 512 | new_deref = build_array_deref_of_new_var_flat( |
| 513 | shader, &b, new_var, old_deref, loc - get_slot(new_var)); |
| 514 | } else { |
| 515 | assert(get_slot(new_var) == loc); |
| 516 | new_deref = build_array_deref_of_new_var(&b, new_var, old_deref); |
| 517 | assert(glsl_type_is_vector(new_deref->type)); |
| 518 | } |
Jason Ekstrand | 5ef2b8f | 2019-03-06 15:21:51 -0600 | [diff] [blame] | 519 | nir_instr_rewrite_src(&intrin->instr, &intrin->src[0], |
| 520 | nir_src_for_ssa(&new_deref->dest.ssa)); |
| 521 | |
| 522 | intrin->num_components = |
| 523 | glsl_get_components(new_deref->type); |
| 524 | |
| 525 | nir_component_mask_t old_wrmask = nir_intrinsic_write_mask(intrin); |
| 526 | |
| 527 | assert(intrin->src[1].is_ssa); |
| 528 | nir_ssa_def *old_value = intrin->src[1].ssa; |
| 529 | nir_ssa_def *comps[4]; |
| 530 | for (unsigned c = 0; c < intrin->num_components; c++) { |
| 531 | if (new_frac + c >= old_frac && |
| 532 | (old_wrmask & 1 << (new_frac + c - old_frac))) { |
| 533 | comps[c] = nir_channel(&b, old_value, |
| 534 | new_frac + c - old_frac); |
| 535 | } else { |
| 536 | comps[c] = nir_ssa_undef(&b, old_value->num_components, |
| 537 | old_value->bit_size); |
| 538 | } |
| 539 | } |
| 540 | nir_ssa_def *new_value = nir_vec(&b, comps, intrin->num_components); |
| 541 | nir_instr_rewrite_src(&intrin->instr, &intrin->src[1], |
| 542 | nir_src_for_ssa(new_value)); |
| 543 | |
| 544 | nir_intrinsic_set_write_mask(intrin, |
| 545 | old_wrmask << (old_frac - new_frac)); |
| 546 | |
| 547 | progress = true; |
| 548 | break; |
| 549 | } |
| 550 | |
| 551 | default: |
| 552 | break; |
| 553 | } |
| 554 | } |
| 555 | } |
| 556 | |
| 557 | if (progress) { |
| 558 | nir_metadata_preserve(impl, nir_metadata_block_index | |
| 559 | nir_metadata_dominance); |
| 560 | } |
| 561 | |
| 562 | return progress; |
| 563 | } |
| 564 | |
| 565 | bool |
| 566 | nir_lower_io_to_vector(nir_shader *shader, nir_variable_mode modes) |
| 567 | { |
| 568 | bool progress = false; |
| 569 | |
| 570 | nir_foreach_function(function, shader) { |
| 571 | if (function->impl) |
| 572 | progress |= nir_lower_io_to_vector_impl(function->impl, modes); |
| 573 | } |
| 574 | |
| 575 | return progress; |
| 576 | } |