| Allan MacKinnon | 4359d52 | 2018-06-19 13:57:04 -0700 | [diff] [blame] | 1 | /* | 
 | 2 |  * Copyright 2018 Google Inc. | 
 | 3 |  * | 
 | 4 |  * Use of this source code is governed by a BSD-style license that can be | 
 | 5 |  * found in the LICENSE file. | 
 | 6 |  * | 
 | 7 |  */ | 
 | 8 |  | 
 | 9 | // | 
 | 10 | // | 
 | 11 | // | 
 | 12 |  | 
 | 13 | #include "transpose.h" | 
| Allan MacKinnon | 9e0d7e4 | 2018-07-16 15:57:05 -0700 | [diff] [blame] | 14 | #include "common/macros.h" | 
| Allan MacKinnon | 4359d52 | 2018-06-19 13:57:04 -0700 | [diff] [blame] | 15 |  | 
 | 16 | // | 
 | 17 | // Rows must be an even number.  This is enforced elsewhere. | 
 | 18 | // | 
 | 19 | // The transpose requires (cols_log2 * rows/2) row-pair blends. | 
 | 20 | // | 
 | 21 | void | 
 | 22 | hsg_transpose(uint32_t                   const cols_log2, | 
 | 23 |               uint32_t                   const rows, | 
| Allan MacKinnon | 4359d52 | 2018-06-19 13:57:04 -0700 | [diff] [blame] | 24 |               void (*pfn_blend)(uint32_t const cols_log2, | 
 | 25 |                                 uint32_t const row_ll, // lower-left | 
 | 26 |                                 uint32_t const row_ur, // upper-right | 
 | 27 |                                 void *         blend), | 
| Allan MacKinnon | 9e0d7e4 | 2018-07-16 15:57:05 -0700 | [diff] [blame] | 28 |               void *                           blend, | 
| Allan MacKinnon | 4359d52 | 2018-06-19 13:57:04 -0700 | [diff] [blame] | 29 |               void (*pfn_remap)(uint32_t const row_from, | 
 | 30 |                                 uint32_t const row_to, | 
| Allan MacKinnon | 9e0d7e4 | 2018-07-16 15:57:05 -0700 | [diff] [blame] | 31 |                                 void *         remap), | 
 | 32 |               void *                           remap) | 
| Allan MacKinnon | 4359d52 | 2018-06-19 13:57:04 -0700 | [diff] [blame] | 33 | { | 
 | 34 |   // get mapping array | 
| Allan MacKinnon | 9e0d7e4 | 2018-07-16 15:57:05 -0700 | [diff] [blame] | 35 |   uint32_t * map_curr = ALLOCA_MACRO(rows * sizeof(*map_curr)); | 
 | 36 |   uint32_t * map_next = ALLOCA_MACRO(rows * sizeof(*map_next)); | 
| Allan MacKinnon | 4359d52 | 2018-06-19 13:57:04 -0700 | [diff] [blame] | 37 |  | 
 | 38 |   // init the mapping array | 
 | 39 |   for (uint32_t ii=0; ii<rows; ii++) | 
 | 40 |     map_curr[ii] = ii; | 
 | 41 |  | 
 | 42 |   // successively transpose rows using blends | 
 | 43 |   for (uint32_t cc=1; cc<=cols_log2; cc++) | 
 | 44 |     { | 
 | 45 |       uint32_t const mask = BITS_TO_MASK(cc); | 
 | 46 |  | 
 | 47 |       for (uint32_t ii=0; ii<rows; ii++) | 
 | 48 |         { | 
 | 49 |           uint32_t const left = map_curr[ii]; | 
 | 50 |           uint32_t const stay = left & ~mask; | 
 | 51 |  | 
 | 52 |           if (left != stay) // will be swapped away | 
 | 53 |             { | 
 | 54 |               for (uint32_t jj=0; jj<rows; jj++) | 
 | 55 |                 { | 
| Hal Canary | 1419534 | 2018-07-11 16:10:14 -0400 | [diff] [blame] | 56 |                   if (map_curr[jj] == stay) | 
| Allan MacKinnon | 4359d52 | 2018-06-19 13:57:04 -0700 | [diff] [blame] | 57 |                     { | 
 | 58 |                       map_next[jj] = stay; | 
 | 59 |                       map_next[ii] = stay + (rows << (cc-1)); | 
 | 60 |  | 
 | 61 |                       pfn_blend(cc,ii,jj,blend); // log2,left,right,payload | 
 | 62 |  | 
 | 63 |                       break; | 
 | 64 |                     } | 
 | 65 |                 } | 
 | 66 |             } | 
 | 67 |         } | 
 | 68 |  | 
 | 69 |       uint32_t * tmp = map_curr; | 
 | 70 |  | 
 | 71 |       map_curr = map_next; | 
 | 72 |       map_next = tmp; | 
 | 73 |     } | 
 | 74 |  | 
 | 75 |   // write out the remapping | 
 | 76 |   for (uint32_t ii=0; ii<rows; ii++) | 
 | 77 |     pfn_remap(ii,map_curr[ii] >> cols_log2,remap); | 
 | 78 | } | 
 | 79 |  | 
 | 80 | // | 
 | 81 | // test it! | 
 | 82 | // | 
 | 83 |  | 
 | 84 | #ifdef HS_TRANSPOSE_DEBUG | 
 | 85 |  | 
 | 86 | #include <stdio.h> | 
 | 87 |  | 
 | 88 | static uint32_t cols; // implicit on SIMD/GPU | 
 | 89 |  | 
 | 90 | static | 
| Hal Canary | 1419534 | 2018-07-11 16:10:14 -0400 | [diff] [blame] | 91 | void | 
| Allan MacKinnon | 9e0d7e4 | 2018-07-16 15:57:05 -0700 | [diff] [blame] | 92 | hsg_debug_blend(uint32_t const cols_log2, | 
 | 93 |                 uint32_t const row_ll, // lower-left | 
 | 94 |                 uint32_t const row_ur, // upper-right | 
 | 95 |                 uint32_t *     b) | 
 | 96 | { | 
 | 97 |   fprintf(stdout,"BLEND( %u, %3u, %3u )\n",cols_log2,row_ll,row_ur); | 
 | 98 |  | 
| Allan MacKinnon | e65dd58 | 2018-08-17 09:30:44 -0700 | [diff] [blame] | 99 |   uint32_t * const ll = ALLOCA_MACRO(cols * sizeof(*b)); | 
 | 100 |   uint32_t * const ur = ALLOCA_MACRO(cols * sizeof(*b)); | 
| Allan MacKinnon | 9e0d7e4 | 2018-07-16 15:57:05 -0700 | [diff] [blame] | 101 |  | 
 | 102 |   memcpy(ll,b+row_ll*cols,cols * sizeof(*b)); | 
 | 103 |   memcpy(ur,b+row_ur*cols,cols * sizeof(*b)); | 
 | 104 |  | 
 | 105 |   for (uint32_t ii=0; ii<cols; ii++) | 
 | 106 |     b[row_ll*cols+ii] = ((ii >> cols_log2-1) & 1) ? ll[ii] : ur[ii^(1<<cols_log2-1)]; | 
 | 107 |  | 
 | 108 |   for (uint32_t ii=0; ii<cols; ii++) | 
 | 109 |     b[row_ur*cols+ii] = ((ii >> cols_log2-1) & 1) ? ll[ii^(1<<cols_log2-1)] : ur[ii]; | 
 | 110 | } | 
 | 111 |  | 
 | 112 | static | 
 | 113 | void | 
| Allan MacKinnon | 4359d52 | 2018-06-19 13:57:04 -0700 | [diff] [blame] | 114 | hsg_debug_remap(uint32_t   const row_from, | 
 | 115 |                 uint32_t   const row_to, | 
 | 116 |                 uint32_t * const r) | 
 | 117 | { | 
 | 118 |   fprintf(stdout,"REMAP( %3u, %3u )\n",row_from,row_to); | 
 | 119 |  | 
 | 120 |   r[row_to] = row_from; | 
 | 121 | } | 
 | 122 |  | 
 | 123 | static | 
| Hal Canary | 1419534 | 2018-07-11 16:10:14 -0400 | [diff] [blame] | 124 | void | 
| Allan MacKinnon | 4359d52 | 2018-06-19 13:57:04 -0700 | [diff] [blame] | 125 | hsg_debug_print(uint32_t         const rows, | 
 | 126 |                 uint32_t const * const m, | 
 | 127 |                 uint32_t const * const r) | 
 | 128 | { | 
 | 129 |   for (uint32_t rr=0; rr<rows; rr++) { | 
 | 130 |     for (uint32_t cc=0; cc<cols; cc++) | 
 | 131 |       fprintf(stdout,"%4u ",m[r[rr]*cols + cc]); | 
 | 132 |     fprintf(stdout,"\n"); | 
 | 133 |   } | 
 | 134 | } | 
 | 135 |  | 
 | 136 | int | 
 | 137 | main(int argc, char * argv[]) | 
 | 138 | { | 
 | 139 |   uint32_t const cols_log2 = (argc <= 1) ? 3 : strtoul(argv[1],NULL,0); | 
 | 140 |   uint32_t const rows      = (argc <= 2) ? 6 : strtoul(argv[2],NULL,0); | 
 | 141 |  | 
 | 142 |   if (rows & 1) | 
 | 143 |     return; | 
 | 144 |  | 
 | 145 |   cols = 1 << cols_log2; | 
 | 146 |  | 
| Allan MacKinnon | e65dd58 | 2018-08-17 09:30:44 -0700 | [diff] [blame] | 147 |   uint32_t * const b = ALLOCA_MACRO(cols * rows * sizeof(*b)); | 
 | 148 |   uint32_t * const r = ALLOCA_MACRO(       rows * sizeof(*r)); | 
| Allan MacKinnon | 4359d52 | 2018-06-19 13:57:04 -0700 | [diff] [blame] | 149 |  | 
 | 150 |   for (uint32_t rr=0; rr<rows; rr++) { | 
 | 151 |     r[rr] = rr; | 
 | 152 |     for (uint32_t cc=0; cc<cols; cc++) | 
| Allan MacKinnon | 9e0d7e4 | 2018-07-16 15:57:05 -0700 | [diff] [blame] | 153 |       b[rr*cols+cc] = cc*rows+rr; | 
| Allan MacKinnon | 4359d52 | 2018-06-19 13:57:04 -0700 | [diff] [blame] | 154 |   } | 
 | 155 |  | 
| Allan MacKinnon | 9e0d7e4 | 2018-07-16 15:57:05 -0700 | [diff] [blame] | 156 |   hsg_debug_print(rows,b,r); | 
| Allan MacKinnon | 4359d52 | 2018-06-19 13:57:04 -0700 | [diff] [blame] | 157 |  | 
 | 158 |   hsg_transpose(cols_log2,rows, | 
| Allan MacKinnon | 9e0d7e4 | 2018-07-16 15:57:05 -0700 | [diff] [blame] | 159 |                 hsg_debug_blend,b, | 
 | 160 |                 hsg_debug_remap,r); | 
| Allan MacKinnon | 4359d52 | 2018-06-19 13:57:04 -0700 | [diff] [blame] | 161 |  | 
| Allan MacKinnon | 9e0d7e4 | 2018-07-16 15:57:05 -0700 | [diff] [blame] | 162 |   hsg_debug_print(rows,b,r); | 
| Allan MacKinnon | 4359d52 | 2018-06-19 13:57:04 -0700 | [diff] [blame] | 163 |  | 
| Allan MacKinnon | 5cd7777 | 2018-09-13 15:00:16 -0700 | [diff] [blame] | 164 |   return EXIT_SUCCESS; | 
| Allan MacKinnon | 4359d52 | 2018-06-19 13:57:04 -0700 | [diff] [blame] | 165 | } | 
 | 166 |  | 
 | 167 | #endif | 
 | 168 |  | 
 | 169 | // | 
 | 170 | // | 
 | 171 | // |