Allan MacKinnon | 4359d52 | 2018-06-19 13:57:04 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright 2018 Google Inc. |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license that can be |
| 5 | * found in the LICENSE file. |
| 6 | * |
| 7 | */ |
| 8 | |
| 9 | // |
| 10 | // |
| 11 | // |
| 12 | |
| 13 | #include "transpose.h" |
Allan MacKinnon | 9e0d7e4 | 2018-07-16 15:57:05 -0700 | [diff] [blame] | 14 | #include "common/macros.h" |
Allan MacKinnon | 4359d52 | 2018-06-19 13:57:04 -0700 | [diff] [blame] | 15 | |
| 16 | // |
| 17 | // Rows must be an even number. This is enforced elsewhere. |
| 18 | // |
| 19 | // The transpose requires (cols_log2 * rows/2) row-pair blends. |
| 20 | // |
| 21 | void |
| 22 | hsg_transpose(uint32_t const cols_log2, |
| 23 | uint32_t const rows, |
Allan MacKinnon | 4359d52 | 2018-06-19 13:57:04 -0700 | [diff] [blame] | 24 | void (*pfn_blend)(uint32_t const cols_log2, |
| 25 | uint32_t const row_ll, // lower-left |
| 26 | uint32_t const row_ur, // upper-right |
| 27 | void * blend), |
Allan MacKinnon | 9e0d7e4 | 2018-07-16 15:57:05 -0700 | [diff] [blame] | 28 | void * blend, |
Allan MacKinnon | 4359d52 | 2018-06-19 13:57:04 -0700 | [diff] [blame] | 29 | void (*pfn_remap)(uint32_t const row_from, |
| 30 | uint32_t const row_to, |
Allan MacKinnon | 9e0d7e4 | 2018-07-16 15:57:05 -0700 | [diff] [blame] | 31 | void * remap), |
| 32 | void * remap) |
Allan MacKinnon | 4359d52 | 2018-06-19 13:57:04 -0700 | [diff] [blame] | 33 | { |
| 34 | // get mapping array |
Allan MacKinnon | 9e0d7e4 | 2018-07-16 15:57:05 -0700 | [diff] [blame] | 35 | uint32_t * map_curr = ALLOCA_MACRO(rows * sizeof(*map_curr)); |
| 36 | uint32_t * map_next = ALLOCA_MACRO(rows * sizeof(*map_next)); |
Allan MacKinnon | 4359d52 | 2018-06-19 13:57:04 -0700 | [diff] [blame] | 37 | |
| 38 | // init the mapping array |
| 39 | for (uint32_t ii=0; ii<rows; ii++) |
| 40 | map_curr[ii] = ii; |
| 41 | |
| 42 | // successively transpose rows using blends |
| 43 | for (uint32_t cc=1; cc<=cols_log2; cc++) |
| 44 | { |
| 45 | uint32_t const mask = BITS_TO_MASK(cc); |
| 46 | |
| 47 | for (uint32_t ii=0; ii<rows; ii++) |
| 48 | { |
| 49 | uint32_t const left = map_curr[ii]; |
| 50 | uint32_t const stay = left & ~mask; |
| 51 | |
| 52 | if (left != stay) // will be swapped away |
| 53 | { |
| 54 | for (uint32_t jj=0; jj<rows; jj++) |
| 55 | { |
Hal Canary | 1419534 | 2018-07-11 16:10:14 -0400 | [diff] [blame] | 56 | if (map_curr[jj] == stay) |
Allan MacKinnon | 4359d52 | 2018-06-19 13:57:04 -0700 | [diff] [blame] | 57 | { |
| 58 | map_next[jj] = stay; |
| 59 | map_next[ii] = stay + (rows << (cc-1)); |
| 60 | |
| 61 | pfn_blend(cc,ii,jj,blend); // log2,left,right,payload |
| 62 | |
| 63 | break; |
| 64 | } |
| 65 | } |
| 66 | } |
| 67 | } |
| 68 | |
| 69 | uint32_t * tmp = map_curr; |
| 70 | |
| 71 | map_curr = map_next; |
| 72 | map_next = tmp; |
| 73 | } |
| 74 | |
| 75 | // write out the remapping |
| 76 | for (uint32_t ii=0; ii<rows; ii++) |
| 77 | pfn_remap(ii,map_curr[ii] >> cols_log2,remap); |
| 78 | } |
| 79 | |
| 80 | // |
| 81 | // test it! |
| 82 | // |
| 83 | |
| 84 | #ifdef HS_TRANSPOSE_DEBUG |
| 85 | |
| 86 | #include <stdio.h> |
| 87 | |
| 88 | static uint32_t cols; // implicit on SIMD/GPU |
| 89 | |
| 90 | static |
Hal Canary | 1419534 | 2018-07-11 16:10:14 -0400 | [diff] [blame] | 91 | void |
Allan MacKinnon | 9e0d7e4 | 2018-07-16 15:57:05 -0700 | [diff] [blame] | 92 | hsg_debug_blend(uint32_t const cols_log2, |
| 93 | uint32_t const row_ll, // lower-left |
| 94 | uint32_t const row_ur, // upper-right |
| 95 | uint32_t * b) |
| 96 | { |
| 97 | fprintf(stdout,"BLEND( %u, %3u, %3u )\n",cols_log2,row_ll,row_ur); |
| 98 | |
Allan MacKinnon | e65dd58 | 2018-08-17 09:30:44 -0700 | [diff] [blame] | 99 | uint32_t * const ll = ALLOCA_MACRO(cols * sizeof(*b)); |
| 100 | uint32_t * const ur = ALLOCA_MACRO(cols * sizeof(*b)); |
Allan MacKinnon | 9e0d7e4 | 2018-07-16 15:57:05 -0700 | [diff] [blame] | 101 | |
| 102 | memcpy(ll,b+row_ll*cols,cols * sizeof(*b)); |
| 103 | memcpy(ur,b+row_ur*cols,cols * sizeof(*b)); |
| 104 | |
| 105 | for (uint32_t ii=0; ii<cols; ii++) |
| 106 | b[row_ll*cols+ii] = ((ii >> cols_log2-1) & 1) ? ll[ii] : ur[ii^(1<<cols_log2-1)]; |
| 107 | |
| 108 | for (uint32_t ii=0; ii<cols; ii++) |
| 109 | b[row_ur*cols+ii] = ((ii >> cols_log2-1) & 1) ? ll[ii^(1<<cols_log2-1)] : ur[ii]; |
| 110 | } |
| 111 | |
| 112 | static |
| 113 | void |
Allan MacKinnon | 4359d52 | 2018-06-19 13:57:04 -0700 | [diff] [blame] | 114 | hsg_debug_remap(uint32_t const row_from, |
| 115 | uint32_t const row_to, |
| 116 | uint32_t * const r) |
| 117 | { |
| 118 | fprintf(stdout,"REMAP( %3u, %3u )\n",row_from,row_to); |
| 119 | |
| 120 | r[row_to] = row_from; |
| 121 | } |
| 122 | |
| 123 | static |
Hal Canary | 1419534 | 2018-07-11 16:10:14 -0400 | [diff] [blame] | 124 | void |
Allan MacKinnon | 4359d52 | 2018-06-19 13:57:04 -0700 | [diff] [blame] | 125 | hsg_debug_print(uint32_t const rows, |
| 126 | uint32_t const * const m, |
| 127 | uint32_t const * const r) |
| 128 | { |
| 129 | for (uint32_t rr=0; rr<rows; rr++) { |
| 130 | for (uint32_t cc=0; cc<cols; cc++) |
| 131 | fprintf(stdout,"%4u ",m[r[rr]*cols + cc]); |
| 132 | fprintf(stdout,"\n"); |
| 133 | } |
| 134 | } |
| 135 | |
| 136 | int |
| 137 | main(int argc, char * argv[]) |
| 138 | { |
| 139 | uint32_t const cols_log2 = (argc <= 1) ? 3 : strtoul(argv[1],NULL,0); |
| 140 | uint32_t const rows = (argc <= 2) ? 6 : strtoul(argv[2],NULL,0); |
| 141 | |
| 142 | if (rows & 1) |
| 143 | return; |
| 144 | |
| 145 | cols = 1 << cols_log2; |
| 146 | |
Allan MacKinnon | e65dd58 | 2018-08-17 09:30:44 -0700 | [diff] [blame] | 147 | uint32_t * const b = ALLOCA_MACRO(cols * rows * sizeof(*b)); |
| 148 | uint32_t * const r = ALLOCA_MACRO( rows * sizeof(*r)); |
Allan MacKinnon | 4359d52 | 2018-06-19 13:57:04 -0700 | [diff] [blame] | 149 | |
| 150 | for (uint32_t rr=0; rr<rows; rr++) { |
| 151 | r[rr] = rr; |
| 152 | for (uint32_t cc=0; cc<cols; cc++) |
Allan MacKinnon | 9e0d7e4 | 2018-07-16 15:57:05 -0700 | [diff] [blame] | 153 | b[rr*cols+cc] = cc*rows+rr; |
Allan MacKinnon | 4359d52 | 2018-06-19 13:57:04 -0700 | [diff] [blame] | 154 | } |
| 155 | |
Allan MacKinnon | 9e0d7e4 | 2018-07-16 15:57:05 -0700 | [diff] [blame] | 156 | hsg_debug_print(rows,b,r); |
Allan MacKinnon | 4359d52 | 2018-06-19 13:57:04 -0700 | [diff] [blame] | 157 | |
| 158 | hsg_transpose(cols_log2,rows, |
Allan MacKinnon | 9e0d7e4 | 2018-07-16 15:57:05 -0700 | [diff] [blame] | 159 | hsg_debug_blend,b, |
| 160 | hsg_debug_remap,r); |
Allan MacKinnon | 4359d52 | 2018-06-19 13:57:04 -0700 | [diff] [blame] | 161 | |
Allan MacKinnon | 9e0d7e4 | 2018-07-16 15:57:05 -0700 | [diff] [blame] | 162 | hsg_debug_print(rows,b,r); |
Allan MacKinnon | 4359d52 | 2018-06-19 13:57:04 -0700 | [diff] [blame] | 163 | |
Allan MacKinnon | 5cd7777 | 2018-09-13 15:00:16 -0700 | [diff] [blame] | 164 | return EXIT_SUCCESS; |
Allan MacKinnon | 4359d52 | 2018-06-19 13:57:04 -0700 | [diff] [blame] | 165 | } |
| 166 | |
| 167 | #endif |
| 168 | |
| 169 | // |
| 170 | // |
| 171 | // |