blob: 18e8bb6870f10af288d66f634788105ef68a9e34 [file] [log] [blame]
bart25de6162008-03-09 13:41:26 +00001/** Compute the matrix inverse via Gauss-Jordan elimination.
2 * This program uses OpenMP separate computation steps but no
3 * mutexes. It is an example of a race-free program on which no data races
4 * are reported by the happens-before algorithm (drd), but a lot of data races
5 * (all false positives) are reported by the Eraser-algorithm (helgrind).
6 */
7
8
9#define _GNU_SOURCE
10
11/***********************/
12/* Include directives. */
13/***********************/
14
15#include <assert.h>
16#include <math.h>
bartb6c2ff42008-03-10 19:17:46 +000017#include <omp.h>
bart25de6162008-03-09 13:41:26 +000018#include <stdio.h>
bartb6c2ff42008-03-10 19:17:46 +000019#include <stdlib.h>
bart25de6162008-03-09 13:41:26 +000020
21
22/*********************/
23/* Type definitions. */
24/*********************/
25
26typedef double elem_t;
27
28
bartd5526492008-03-11 20:06:04 +000029/********************/
30/* Local variables. */
31/********************/
32
33static int s_trigger_race;
34
35
bart25de6162008-03-09 13:41:26 +000036/*************************/
37/* Function definitions. */
38/*************************/
39
40/** Allocate memory for a matrix with the specified number of rows and
41 * columns.
42 */
43static elem_t* new_matrix(const int rows, const int cols)
44{
45 assert(rows > 0);
46 assert(cols > 0);
47 return malloc(rows * cols * sizeof(elem_t));
48}
49
50/** Free the memory that was allocated for a matrix. */
51static void delete_matrix(elem_t* const a)
52{
53 free(a);
54}
55
56/** Fill in some numbers in a matrix.
57 * @note It is important not to call srand() in this program, such that
58 * the results of a run are reproducible.
59 */
60static void init_matrix(elem_t* const a, const int rows, const int cols)
61{
62 int i, j;
63 for (i = 0; i < rows; i++)
64 {
65 for (j = 0; j < rows; j++)
66 {
67 a[i * cols + j] = rand() * 1.0 / RAND_MAX;
68 }
69 }
70}
71
72/** Print all elements of a matrix. */
73void print_matrix(const char* const label,
74 const elem_t* const a, const int rows, const int cols)
75{
76 int i, j;
77 printf("%s:\n", label);
78 for (i = 0; i < rows; i++)
79 {
80 for (j = 0; j < cols; j++)
81 {
82 printf("%g ", a[i * cols + j]);
83 }
84 printf("\n");
85 }
86}
87
88/** Copy a subset of the elements of a matrix into another matrix. */
89static void copy_matrix(const elem_t* const from,
90 const int from_rows,
91 const int from_cols,
92 const int from_row_first,
93 const int from_row_last,
94 const int from_col_first,
95 const int from_col_last,
96 elem_t* const to,
97 const int to_rows,
98 const int to_cols,
99 const int to_row_first,
100 const int to_row_last,
101 const int to_col_first,
102 const int to_col_last)
103{
104 int i, j;
105
106 assert(from_row_last - from_row_first == to_row_last - to_row_first);
107 assert(from_col_last - from_col_first == to_col_last - to_col_first);
108
109 for (i = from_row_first; i < from_row_last; i++)
110 {
111 assert(i < from_rows);
112 assert(i - from_row_first + to_row_first < to_rows);
113 for (j = from_col_first; j < from_col_last; j++)
114 {
115 assert(j < from_cols);
116 assert(j - from_col_first + to_col_first < to_cols);
117 to[(i - from_row_first + to_col_first) * to_cols
118 + (j - from_col_first + to_col_first)]
119 = from[i * from_cols + j];
120 }
121 }
122}
123
124/** Compute the matrix product of a1 and a2. */
125static elem_t* multiply_matrices(const elem_t* const a1,
126 const int rows1,
127 const int cols1,
128 const elem_t* const a2,
129 const int rows2,
130 const int cols2)
131{
132 int i, j, k;
133 elem_t* prod;
134
135 assert(cols1 == rows2);
136
137 prod = new_matrix(rows1, cols2);
138 for (i = 0; i < rows1; i++)
139 {
140 for (j = 0; j < cols2; j++)
141 {
142 prod[i * cols2 + j] = 0;
143 for (k = 0; k < cols1; k++)
144 {
145 prod[i * cols2 + j] += a1[i * cols1 + k] * a2[k * cols2 + j];
146 }
147 }
148 }
149 return prod;
150}
151
152/** Apply the Gauss-Jordan elimination algorithm on the matrix p->a starting
153 * at row r0 and up to but not including row r1. It is assumed that as many
154 * threads execute this function concurrently as the count barrier p->b was
155 * initialized with. If the matrix p->a is nonsingular, and if matrix p->a
156 * has at least as many columns as rows, the result of this algorithm is that
157 * submatrix p->a[0..p->rows-1,0..p->rows-1] is the identity matrix.
158 * @see http://en.wikipedia.org/wiki/Gauss-Jordan_elimination
159 */
160static void gj(elem_t* const a, const int rows, const int cols)
161{
162 int i, j, k;
163
164 for (i = 0; i < rows; i++)
165 {
166 {
167 // Pivoting.
168 j = i;
169 for (k = i + 1; k < rows; k++)
170 {
171 if (a[k * cols + i] > a[j * cols + i])
172 {
173 j = k;
174 }
175 }
176 if (j != i)
177 {
178 for (k = 0; k < cols; k++)
179 {
180 const elem_t t = a[i * cols + k];
181 a[i * cols + k] = a[j * cols + k];
182 a[j * cols + k] = t;
183 }
184 }
185 // Normalize row i.
186 if (a[i * cols + i] != 0)
187 {
188 for (k = cols - 1; k >= 0; k--)
189 {
190 a[i * cols + k] /= a[i * cols + i];
191 }
192 }
193 }
194
195 // Reduce all rows j != i.
bartd5526492008-03-11 20:06:04 +0000196
197 if (s_trigger_race)
bart25de6162008-03-09 13:41:26 +0000198 {
bartd5526492008-03-11 20:06:04 +0000199# pragma omp parallel for
200 for (j = 0; j < rows; j++)
bart25de6162008-03-09 13:41:26 +0000201 {
bartd5526492008-03-11 20:06:04 +0000202 if (i != j)
bart25de6162008-03-09 13:41:26 +0000203 {
bartd5526492008-03-11 20:06:04 +0000204 const elem_t factor = a[j * cols + i];
205 for (k = 0; k < cols; k++)
206 {
207 a[j * cols + k] -= a[i * cols + k] * factor;
208 }
209 }
210 }
211 }
212 else
213 {
214# pragma omp parallel for private(j, k)
215 for (j = 0; j < rows; j++)
216 {
217 if (i != j)
218 {
219 const elem_t factor = a[j * cols + i];
220 for (k = 0; k < cols; k++)
221 {
222 a[j * cols + k] -= a[i * cols + k] * factor;
223 }
bart25de6162008-03-09 13:41:26 +0000224 }
225 }
226 }
227 }
228}
229
230/** Matrix inversion via the Gauss-Jordan algorithm. */
231static elem_t* invert_matrix(const elem_t* const a, const int n)
232{
233 int i, j;
234 elem_t* const inv = new_matrix(n, n);
235 elem_t* const tmp = new_matrix(n, 2*n);
236 copy_matrix(a, n, n, 0, n, 0, n, tmp, n, 2 * n, 0, n, 0, n);
237 for (i = 0; i < n; i++)
238 for (j = 0; j < n; j++)
239 tmp[i * 2 * n + n + j] = (i == j);
240 gj(tmp, n, 2*n);
241 copy_matrix(tmp, n, 2*n, 0, n, n, 2*n, inv, n, n, 0, n, 0, n);
242 delete_matrix(tmp);
243 return inv;
244}
245
246/** Compute the average square error between the identity matrix and the
247 * product of matrix a with its inverse matrix.
248 */
249static double identity_error(const elem_t* const a, const int n)
250{
251 int i, j;
252 elem_t e = 0;
253 for (i = 0; i < n; i++)
254 {
255 for (j = 0; j < n; j++)
256 {
257 const elem_t d = a[i * n + j] - (i == j);
258 e += d * d;
259 }
260 }
261 return sqrt(e / (n * n));
262}
263
264/** Compute epsilon for the numeric type elem_t. Epsilon is defined as the
265 * smallest number for which the sum of one and that number is different of
266 * one. It is assumed that the underlying representation of elem_t uses
267 * base two.
268 */
269static elem_t epsilon()
270{
271 elem_t eps;
272 for (eps = 1; 1 + eps != 1; eps /= 2)
273 ;
274 return 2 * eps;
275}
276
277int main(int argc, char** argv)
278{
279 int matrix_size;
bartb6c2ff42008-03-10 19:17:46 +0000280 int nthread;
bart25de6162008-03-09 13:41:26 +0000281 int silent;
282 elem_t *a, *inv, *prod;
283 elem_t eps;
284 double error;
285 double ratio;
286
bartd5526492008-03-11 20:06:04 +0000287 matrix_size = (argc > 1) ? atoi(argv[1]) : 3;
288 nthread = (argc > 2) ? atoi(argv[2]) : 3;
289 silent = (argc > 3) ? atoi(argv[3]) : 0;
290 s_trigger_race = (argc > 4) ? atoi(argv[4]) : 0;
bartb6c2ff42008-03-10 19:17:46 +0000291
292 omp_set_num_threads(nthread);
293 omp_set_dynamic(0);
bart25de6162008-03-09 13:41:26 +0000294
295 eps = epsilon();
296 a = new_matrix(matrix_size, matrix_size);
297 init_matrix(a, matrix_size, matrix_size);
298 inv = invert_matrix(a, matrix_size);
299 prod = multiply_matrices(a, matrix_size, matrix_size,
300 inv, matrix_size, matrix_size);
301 error = identity_error(prod, matrix_size);
302 ratio = error / (eps * matrix_size);
303 if (! silent)
304 {
305 printf("error = %g; epsilon = %g; error / (epsilon * n) = %g\n",
306 error, eps, ratio);
307 }
bart5e1952a2008-03-09 20:04:31 +0000308 if (isfinite(ratio) && ratio < 100)
bart25de6162008-03-09 13:41:26 +0000309 printf("Error within bounds.\n");
310 else
311 printf("Error out of bounds.\n");
312 delete_matrix(prod);
313 delete_matrix(inv);
314 delete_matrix(a);
315
316 return 0;
317}