blob: 156aaeb2701c0d9b67eee0fa6842f37099620377 [file] [log] [blame]
Allan MacKinnon4359d522018-06-19 13:57:04 -07001/*
2 * Copyright 2018 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 *
7 */
8
9//
10//
11//
12
13#include "transpose.h"
Allan MacKinnon9e0d7e42018-07-16 15:57:05 -070014#include "common/macros.h"
Allan MacKinnon4359d522018-06-19 13:57:04 -070015
16//
17// Rows must be an even number. This is enforced elsewhere.
18//
19// The transpose requires (cols_log2 * rows/2) row-pair blends.
20//
21void
22hsg_transpose(uint32_t const cols_log2,
23 uint32_t const rows,
Allan MacKinnon4359d522018-06-19 13:57:04 -070024 void (*pfn_blend)(uint32_t const cols_log2,
25 uint32_t const row_ll, // lower-left
26 uint32_t const row_ur, // upper-right
27 void * blend),
Allan MacKinnon9e0d7e42018-07-16 15:57:05 -070028 void * blend,
Allan MacKinnon4359d522018-06-19 13:57:04 -070029 void (*pfn_remap)(uint32_t const row_from,
30 uint32_t const row_to,
Allan MacKinnon9e0d7e42018-07-16 15:57:05 -070031 void * remap),
32 void * remap)
Allan MacKinnon4359d522018-06-19 13:57:04 -070033{
34 // get mapping array
Allan MacKinnon9e0d7e42018-07-16 15:57:05 -070035 uint32_t * map_curr = ALLOCA_MACRO(rows * sizeof(*map_curr));
36 uint32_t * map_next = ALLOCA_MACRO(rows * sizeof(*map_next));
Allan MacKinnon4359d522018-06-19 13:57:04 -070037
38 // init the mapping array
39 for (uint32_t ii=0; ii<rows; ii++)
40 map_curr[ii] = ii;
41
42 // successively transpose rows using blends
43 for (uint32_t cc=1; cc<=cols_log2; cc++)
44 {
45 uint32_t const mask = BITS_TO_MASK(cc);
46
47 for (uint32_t ii=0; ii<rows; ii++)
48 {
49 uint32_t const left = map_curr[ii];
50 uint32_t const stay = left & ~mask;
51
52 if (left != stay) // will be swapped away
53 {
54 for (uint32_t jj=0; jj<rows; jj++)
55 {
Hal Canary14195342018-07-11 16:10:14 -040056 if (map_curr[jj] == stay)
Allan MacKinnon4359d522018-06-19 13:57:04 -070057 {
58 map_next[jj] = stay;
59 map_next[ii] = stay + (rows << (cc-1));
60
61 pfn_blend(cc,ii,jj,blend); // log2,left,right,payload
62
63 break;
64 }
65 }
66 }
67 }
68
69 uint32_t * tmp = map_curr;
70
71 map_curr = map_next;
72 map_next = tmp;
73 }
74
75 // write out the remapping
76 for (uint32_t ii=0; ii<rows; ii++)
77 pfn_remap(ii,map_curr[ii] >> cols_log2,remap);
78}
79
80//
81// test it!
82//
83
84#ifdef HS_TRANSPOSE_DEBUG
85
86#include <stdio.h>
87
88static uint32_t cols; // implicit on SIMD/GPU
89
90static
Hal Canary14195342018-07-11 16:10:14 -040091void
Allan MacKinnon9e0d7e42018-07-16 15:57:05 -070092hsg_debug_blend(uint32_t const cols_log2,
93 uint32_t const row_ll, // lower-left
94 uint32_t const row_ur, // upper-right
95 uint32_t * b)
96{
97 fprintf(stdout,"BLEND( %u, %3u, %3u )\n",cols_log2,row_ll,row_ur);
98
Allan MacKinnone65dd582018-08-17 09:30:44 -070099 uint32_t * const ll = ALLOCA_MACRO(cols * sizeof(*b));
100 uint32_t * const ur = ALLOCA_MACRO(cols * sizeof(*b));
Allan MacKinnon9e0d7e42018-07-16 15:57:05 -0700101
102 memcpy(ll,b+row_ll*cols,cols * sizeof(*b));
103 memcpy(ur,b+row_ur*cols,cols * sizeof(*b));
104
105 for (uint32_t ii=0; ii<cols; ii++)
106 b[row_ll*cols+ii] = ((ii >> cols_log2-1) & 1) ? ll[ii] : ur[ii^(1<<cols_log2-1)];
107
108 for (uint32_t ii=0; ii<cols; ii++)
109 b[row_ur*cols+ii] = ((ii >> cols_log2-1) & 1) ? ll[ii^(1<<cols_log2-1)] : ur[ii];
110}
111
112static
113void
Allan MacKinnon4359d522018-06-19 13:57:04 -0700114hsg_debug_remap(uint32_t const row_from,
115 uint32_t const row_to,
116 uint32_t * const r)
117{
118 fprintf(stdout,"REMAP( %3u, %3u )\n",row_from,row_to);
119
120 r[row_to] = row_from;
121}
122
123static
Hal Canary14195342018-07-11 16:10:14 -0400124void
Allan MacKinnon4359d522018-06-19 13:57:04 -0700125hsg_debug_print(uint32_t const rows,
126 uint32_t const * const m,
127 uint32_t const * const r)
128{
129 for (uint32_t rr=0; rr<rows; rr++) {
130 for (uint32_t cc=0; cc<cols; cc++)
131 fprintf(stdout,"%4u ",m[r[rr]*cols + cc]);
132 fprintf(stdout,"\n");
133 }
134}
135
136int
137main(int argc, char * argv[])
138{
139 uint32_t const cols_log2 = (argc <= 1) ? 3 : strtoul(argv[1],NULL,0);
140 uint32_t const rows = (argc <= 2) ? 6 : strtoul(argv[2],NULL,0);
141
142 if (rows & 1)
143 return;
144
145 cols = 1 << cols_log2;
146
Allan MacKinnone65dd582018-08-17 09:30:44 -0700147 uint32_t * const b = ALLOCA_MACRO(cols * rows * sizeof(*b));
148 uint32_t * const r = ALLOCA_MACRO( rows * sizeof(*r));
Allan MacKinnon4359d522018-06-19 13:57:04 -0700149
150 for (uint32_t rr=0; rr<rows; rr++) {
151 r[rr] = rr;
152 for (uint32_t cc=0; cc<cols; cc++)
Allan MacKinnon9e0d7e42018-07-16 15:57:05 -0700153 b[rr*cols+cc] = cc*rows+rr;
Allan MacKinnon4359d522018-06-19 13:57:04 -0700154 }
155
Allan MacKinnon9e0d7e42018-07-16 15:57:05 -0700156 hsg_debug_print(rows,b,r);
Allan MacKinnon4359d522018-06-19 13:57:04 -0700157
158 hsg_transpose(cols_log2,rows,
Allan MacKinnon9e0d7e42018-07-16 15:57:05 -0700159 hsg_debug_blend,b,
160 hsg_debug_remap,r);
Allan MacKinnon4359d522018-06-19 13:57:04 -0700161
Allan MacKinnon9e0d7e42018-07-16 15:57:05 -0700162 hsg_debug_print(rows,b,r);
Allan MacKinnon4359d522018-06-19 13:57:04 -0700163
Allan MacKinnon5cd77772018-09-13 15:00:16 -0700164 return EXIT_SUCCESS;
Allan MacKinnon4359d522018-06-19 13:57:04 -0700165}
166
167#endif
168
169//
170//
171//