bart | bf72be5 | 2008-03-12 16:46:36 +0000 | [diff] [blame] | 1 | /** Compute the matrix inverse via Gauss-Jordan elimination. |
| 2 | * This program uses only barriers to separate computation steps but no |
| 3 | * mutexes. It is an example of a race-free program on which no data races |
| 4 | * are reported by the happens-before algorithm (drd), but a lot of data races |
| 5 | * (all false positives) are reported by the Eraser-algorithm (helgrind). |
sewardj | 8564292 | 2008-01-14 11:54:56 +0000 | [diff] [blame] | 6 | */ |
| 7 | |
| 8 | |
sewardj | 347eeba | 2008-01-21 14:19:07 +0000 | [diff] [blame] | 9 | #define _GNU_SOURCE |
| 10 | |
sewardj | 8564292 | 2008-01-14 11:54:56 +0000 | [diff] [blame] | 11 | /***********************/ |
| 12 | /* Include directives. */ |
| 13 | /***********************/ |
| 14 | |
sewardj | 8564292 | 2008-01-14 11:54:56 +0000 | [diff] [blame] | 15 | #include <assert.h> |
| 16 | #include <math.h> |
bart | 28aa0ce | 2009-01-13 08:18:59 +0000 | [diff] [blame] | 17 | #include <limits.h> // PTHREAD_STACK_MIN |
sewardj | 8564292 | 2008-01-14 11:54:56 +0000 | [diff] [blame] | 18 | #include <pthread.h> |
sewardj | 8564292 | 2008-01-14 11:54:56 +0000 | [diff] [blame] | 19 | #include <stdio.h> |
bart | bf72be5 | 2008-03-12 16:46:36 +0000 | [diff] [blame] | 20 | #include <stdlib.h> |
| 21 | #include <unistd.h> // getopt() |
sewardj | 8564292 | 2008-01-14 11:54:56 +0000 | [diff] [blame] | 22 | |
| 23 | |
| 24 | /*********************/ |
| 25 | /* Type definitions. */ |
| 26 | /*********************/ |
| 27 | |
| 28 | typedef double elem_t; |
| 29 | |
| 30 | struct gj_threadinfo |
| 31 | { |
| 32 | pthread_barrier_t* b; |
| 33 | pthread_t tid; |
| 34 | elem_t* a; |
| 35 | int rows; |
| 36 | int cols; |
| 37 | int r0; |
| 38 | int r1; |
| 39 | }; |
| 40 | |
| 41 | |
| 42 | /********************/ |
| 43 | /* Local variables. */ |
| 44 | /********************/ |
| 45 | |
bart | 4703687 | 2008-03-13 17:32:41 +0000 | [diff] [blame] | 46 | static int s_nthread = 1; |
sewardj | 8564292 | 2008-01-14 11:54:56 +0000 | [diff] [blame] | 47 | |
| 48 | |
| 49 | /*************************/ |
| 50 | /* Function definitions. */ |
| 51 | /*************************/ |
| 52 | |
| 53 | /** Allocate memory for a matrix with the specified number of rows and |
| 54 | * columns. |
| 55 | */ |
| 56 | static elem_t* new_matrix(const int rows, const int cols) |
| 57 | { |
| 58 | assert(rows > 0); |
| 59 | assert(cols > 0); |
| 60 | return malloc(rows * cols * sizeof(elem_t)); |
| 61 | } |
| 62 | |
| 63 | /** Free the memory that was allocated for a matrix. */ |
| 64 | static void delete_matrix(elem_t* const a) |
| 65 | { |
| 66 | free(a); |
| 67 | } |
| 68 | |
bart | bf72be5 | 2008-03-12 16:46:36 +0000 | [diff] [blame] | 69 | /** Fill in some numbers in a matrix. */ |
sewardj | 8564292 | 2008-01-14 11:54:56 +0000 | [diff] [blame] | 70 | static void init_matrix(elem_t* const a, const int rows, const int cols) |
| 71 | { |
| 72 | int i, j; |
| 73 | for (i = 0; i < rows; i++) |
| 74 | { |
| 75 | for (j = 0; j < rows; j++) |
| 76 | { |
bart | bf72be5 | 2008-03-12 16:46:36 +0000 | [diff] [blame] | 77 | a[i * cols + j] = 1.0 / (1 + abs(i-j)); |
sewardj | 8564292 | 2008-01-14 11:54:56 +0000 | [diff] [blame] | 78 | } |
| 79 | } |
| 80 | } |
| 81 | |
| 82 | /** Print all elements of a matrix. */ |
| 83 | void print_matrix(const char* const label, |
| 84 | const elem_t* const a, const int rows, const int cols) |
| 85 | { |
| 86 | int i, j; |
| 87 | printf("%s:\n", label); |
| 88 | for (i = 0; i < rows; i++) |
| 89 | { |
| 90 | for (j = 0; j < cols; j++) |
| 91 | { |
| 92 | printf("%g ", a[i * cols + j]); |
| 93 | } |
| 94 | printf("\n"); |
| 95 | } |
| 96 | } |
| 97 | |
| 98 | /** Copy a subset of the elements of a matrix into another matrix. */ |
| 99 | static void copy_matrix(const elem_t* const from, |
| 100 | const int from_rows, |
| 101 | const int from_cols, |
| 102 | const int from_row_first, |
| 103 | const int from_row_last, |
| 104 | const int from_col_first, |
| 105 | const int from_col_last, |
| 106 | elem_t* const to, |
| 107 | const int to_rows, |
| 108 | const int to_cols, |
| 109 | const int to_row_first, |
| 110 | const int to_row_last, |
| 111 | const int to_col_first, |
| 112 | const int to_col_last) |
| 113 | { |
| 114 | int i, j; |
| 115 | |
| 116 | assert(from_row_last - from_row_first == to_row_last - to_row_first); |
| 117 | assert(from_col_last - from_col_first == to_col_last - to_col_first); |
| 118 | |
| 119 | for (i = from_row_first; i < from_row_last; i++) |
| 120 | { |
| 121 | assert(i < from_rows); |
| 122 | assert(i - from_row_first + to_row_first < to_rows); |
| 123 | for (j = from_col_first; j < from_col_last; j++) |
| 124 | { |
| 125 | assert(j < from_cols); |
| 126 | assert(j - from_col_first + to_col_first < to_cols); |
| 127 | to[(i - from_row_first + to_col_first) * to_cols |
| 128 | + (j - from_col_first + to_col_first)] |
| 129 | = from[i * from_cols + j]; |
| 130 | } |
| 131 | } |
| 132 | } |
| 133 | |
| 134 | /** Compute the matrix product of a1 and a2. */ |
| 135 | static elem_t* multiply_matrices(const elem_t* const a1, |
| 136 | const int rows1, |
| 137 | const int cols1, |
| 138 | const elem_t* const a2, |
| 139 | const int rows2, |
| 140 | const int cols2) |
| 141 | { |
| 142 | int i, j, k; |
| 143 | elem_t* prod; |
| 144 | |
| 145 | assert(cols1 == rows2); |
| 146 | |
| 147 | prod = new_matrix(rows1, cols2); |
| 148 | for (i = 0; i < rows1; i++) |
| 149 | { |
| 150 | for (j = 0; j < cols2; j++) |
| 151 | { |
| 152 | prod[i * cols2 + j] = 0; |
| 153 | for (k = 0; k < cols1; k++) |
| 154 | { |
| 155 | prod[i * cols2 + j] += a1[i * cols1 + k] * a2[k * cols2 + j]; |
| 156 | } |
| 157 | } |
| 158 | } |
| 159 | return prod; |
| 160 | } |
| 161 | |
| 162 | /** Apply the Gauss-Jordan elimination algorithm on the matrix p->a starting |
| 163 | * at row r0 and up to but not including row r1. It is assumed that as many |
bart | 31b983d | 2010-02-21 14:52:59 +0000 | [diff] [blame] | 164 | * threads execute this function concurrently as the count barrier p->b was |
sewardj | 8564292 | 2008-01-14 11:54:56 +0000 | [diff] [blame] | 165 | * initialized with. If the matrix p->a is nonsingular, and if matrix p->a |
| 166 | * has at least as many columns as rows, the result of this algorithm is that |
| 167 | * submatrix p->a[0..p->rows-1,0..p->rows-1] is the identity matrix. |
| 168 | * @see http://en.wikipedia.org/wiki/Gauss-Jordan_elimination |
| 169 | */ |
| 170 | static void gj_threadfunc(struct gj_threadinfo* p) |
| 171 | { |
| 172 | int i, j, k; |
| 173 | elem_t* const a = p->a; |
| 174 | const int rows = p->rows; |
| 175 | const int cols = p->cols; |
bart | a4cd017 | 2011-07-28 17:40:49 +0000 | [diff] [blame] | 176 | elem_t aii; |
sewardj | 8564292 | 2008-01-14 11:54:56 +0000 | [diff] [blame] | 177 | |
| 178 | for (i = 0; i < p->rows; i++) |
| 179 | { |
| 180 | if (pthread_barrier_wait(p->b) == PTHREAD_BARRIER_SERIAL_THREAD) |
| 181 | { |
| 182 | // Pivoting. |
| 183 | j = i; |
| 184 | for (k = i + 1; k < rows; k++) |
| 185 | { |
| 186 | if (a[k * cols + i] > a[j * cols + i]) |
| 187 | { |
| 188 | j = k; |
| 189 | } |
| 190 | } |
| 191 | if (j != i) |
| 192 | { |
| 193 | for (k = 0; k < cols; k++) |
| 194 | { |
| 195 | const elem_t t = a[i * cols + k]; |
| 196 | a[i * cols + k] = a[j * cols + k]; |
| 197 | a[j * cols + k] = t; |
| 198 | } |
| 199 | } |
| 200 | // Normalize row i. |
bart | a4cd017 | 2011-07-28 17:40:49 +0000 | [diff] [blame] | 201 | aii = a[i * cols + i]; |
| 202 | if (aii != 0) |
| 203 | for (k = i; k < cols; k++) |
| 204 | a[i * cols + k] /= aii; |
sewardj | 8564292 | 2008-01-14 11:54:56 +0000 | [diff] [blame] | 205 | } |
| 206 | pthread_barrier_wait(p->b); |
| 207 | // Reduce all rows j != i. |
| 208 | for (j = p->r0; j < p->r1; j++) |
| 209 | { |
| 210 | if (i != j) |
| 211 | { |
| 212 | const elem_t factor = a[j * cols + i]; |
| 213 | for (k = 0; k < cols; k++) |
| 214 | { |
| 215 | a[j * cols + k] -= a[i * cols + k] * factor; |
| 216 | } |
| 217 | } |
| 218 | } |
| 219 | } |
| 220 | } |
| 221 | |
| 222 | /** Multithreaded Gauss-Jordan algorithm. */ |
| 223 | static void gj(elem_t* const a, const int rows, const int cols) |
| 224 | { |
| 225 | int i; |
| 226 | struct gj_threadinfo* t; |
| 227 | pthread_barrier_t b; |
bart | a45570d | 2008-03-29 09:28:12 +0000 | [diff] [blame] | 228 | pthread_attr_t attr; |
| 229 | int err; |
sewardj | 8564292 | 2008-01-14 11:54:56 +0000 | [diff] [blame] | 230 | |
| 231 | assert(rows <= cols); |
| 232 | |
| 233 | t = malloc(sizeof(struct gj_threadinfo) * s_nthread); |
| 234 | |
| 235 | pthread_barrier_init(&b, 0, s_nthread); |
| 236 | |
bart | a45570d | 2008-03-29 09:28:12 +0000 | [diff] [blame] | 237 | pthread_attr_init(&attr); |
bart | 28aa0ce | 2009-01-13 08:18:59 +0000 | [diff] [blame] | 238 | err = pthread_attr_setstacksize(&attr, PTHREAD_STACK_MIN + 4096); |
bart | a45570d | 2008-03-29 09:28:12 +0000 | [diff] [blame] | 239 | assert(err == 0); |
| 240 | |
sewardj | 8564292 | 2008-01-14 11:54:56 +0000 | [diff] [blame] | 241 | for (i = 0; i < s_nthread; i++) |
| 242 | { |
| 243 | t[i].b = &b; |
| 244 | t[i].a = a; |
| 245 | t[i].rows = rows; |
| 246 | t[i].cols = cols; |
| 247 | t[i].r0 = i * rows / s_nthread; |
| 248 | t[i].r1 = (i+1) * rows / s_nthread; |
bart | a45570d | 2008-03-29 09:28:12 +0000 | [diff] [blame] | 249 | pthread_create(&t[i].tid, &attr, (void*(*)(void*))gj_threadfunc, &t[i]); |
sewardj | 8564292 | 2008-01-14 11:54:56 +0000 | [diff] [blame] | 250 | } |
| 251 | |
bart | a45570d | 2008-03-29 09:28:12 +0000 | [diff] [blame] | 252 | pthread_attr_destroy(&attr); |
| 253 | |
sewardj | 8564292 | 2008-01-14 11:54:56 +0000 | [diff] [blame] | 254 | for (i = 0; i < s_nthread; i++) |
| 255 | { |
| 256 | pthread_join(t[i].tid, 0); |
| 257 | } |
| 258 | |
| 259 | pthread_barrier_destroy(&b); |
| 260 | |
| 261 | free(t); |
| 262 | } |
| 263 | |
| 264 | /** Matrix inversion via the Gauss-Jordan algorithm. */ |
| 265 | static elem_t* invert_matrix(const elem_t* const a, const int n) |
| 266 | { |
| 267 | int i, j; |
| 268 | elem_t* const inv = new_matrix(n, n); |
| 269 | elem_t* const tmp = new_matrix(n, 2*n); |
| 270 | copy_matrix(a, n, n, 0, n, 0, n, tmp, n, 2 * n, 0, n, 0, n); |
| 271 | for (i = 0; i < n; i++) |
| 272 | for (j = 0; j < n; j++) |
| 273 | tmp[i * 2 * n + n + j] = (i == j); |
| 274 | gj(tmp, n, 2*n); |
| 275 | copy_matrix(tmp, n, 2*n, 0, n, n, 2*n, inv, n, n, 0, n, 0, n); |
| 276 | delete_matrix(tmp); |
| 277 | return inv; |
| 278 | } |
| 279 | |
| 280 | /** Compute the average square error between the identity matrix and the |
| 281 | * product of matrix a with its inverse matrix. |
| 282 | */ |
| 283 | static double identity_error(const elem_t* const a, const int n) |
| 284 | { |
| 285 | int i, j; |
| 286 | elem_t e = 0; |
| 287 | for (i = 0; i < n; i++) |
| 288 | { |
| 289 | for (j = 0; j < n; j++) |
| 290 | { |
| 291 | const elem_t d = a[i * n + j] - (i == j); |
| 292 | e += d * d; |
| 293 | } |
| 294 | } |
| 295 | return sqrt(e / (n * n)); |
| 296 | } |
| 297 | |
| 298 | /** Compute epsilon for the numeric type elem_t. Epsilon is defined as the |
| 299 | * smallest number for which the sum of one and that number is different of |
| 300 | * one. It is assumed that the underlying representation of elem_t uses |
| 301 | * base two. |
| 302 | */ |
| 303 | static elem_t epsilon() |
| 304 | { |
| 305 | elem_t eps; |
| 306 | for (eps = 1; 1 + eps != 1; eps /= 2) |
| 307 | ; |
| 308 | return 2 * eps; |
| 309 | } |
| 310 | |
| 311 | int main(int argc, char** argv) |
| 312 | { |
| 313 | int matrix_size; |
bart | bf72be5 | 2008-03-12 16:46:36 +0000 | [diff] [blame] | 314 | int silent = 0; |
| 315 | int optchar; |
sewardj | 8564292 | 2008-01-14 11:54:56 +0000 | [diff] [blame] | 316 | elem_t *a, *inv, *prod; |
| 317 | elem_t eps; |
| 318 | double error; |
| 319 | double ratio; |
| 320 | |
bart | bf72be5 | 2008-03-12 16:46:36 +0000 | [diff] [blame] | 321 | while ((optchar = getopt(argc, argv, "qt:")) != EOF) |
| 322 | { |
| 323 | switch (optchar) |
| 324 | { |
| 325 | case 'q': silent = 1; break; |
| 326 | case 't': s_nthread = atoi(optarg); break; |
| 327 | default: |
| 328 | fprintf(stderr, "Error: unknown option '%c'.\n", optchar); |
| 329 | return 1; |
| 330 | } |
| 331 | } |
| 332 | |
| 333 | if (optind + 1 != argc) |
| 334 | { |
| 335 | fprintf(stderr, "Error: wrong number of arguments.\n"); |
bart | 20d702d | 2011-07-11 16:25:37 +0000 | [diff] [blame] | 336 | return 1; |
bart | bf72be5 | 2008-03-12 16:46:36 +0000 | [diff] [blame] | 337 | } |
| 338 | matrix_size = atoi(argv[optind]); |
| 339 | |
| 340 | /* Error checking. */ |
| 341 | assert(matrix_size >= 1); |
| 342 | assert(s_nthread >= 1); |
sewardj | 8564292 | 2008-01-14 11:54:56 +0000 | [diff] [blame] | 343 | |
| 344 | eps = epsilon(); |
| 345 | a = new_matrix(matrix_size, matrix_size); |
| 346 | init_matrix(a, matrix_size, matrix_size); |
| 347 | inv = invert_matrix(a, matrix_size); |
| 348 | prod = multiply_matrices(a, matrix_size, matrix_size, |
| 349 | inv, matrix_size, matrix_size); |
| 350 | error = identity_error(prod, matrix_size); |
| 351 | ratio = error / (eps * matrix_size); |
| 352 | if (! silent) |
| 353 | { |
| 354 | printf("error = %g; epsilon = %g; error / (epsilon * n) = %g\n", |
| 355 | error, eps, ratio); |
| 356 | } |
bart | bf72be5 | 2008-03-12 16:46:36 +0000 | [diff] [blame] | 357 | if (isfinite(ratio) && ratio < 100) |
sewardj | 8564292 | 2008-01-14 11:54:56 +0000 | [diff] [blame] | 358 | printf("Error within bounds.\n"); |
| 359 | else |
| 360 | printf("Error out of bounds.\n"); |
| 361 | delete_matrix(prod); |
| 362 | delete_matrix(inv); |
| 363 | delete_matrix(a); |
| 364 | |
| 365 | return 0; |
| 366 | } |