exp-drd/tests/matinv.c - platform/external/valgrind - Gitiles

 /** Compute the matrix inverse via Gauss-Jordan elimination.
  *  This program uses only barriers to separate computation steps but no
  *  mutexes. It is an example of a race-free program on which no data races
  *  are reported by the happens-before algorithm (drd), but a lot of data races
  *  (all false positives) are reported by the Eraser-algorithm (helgrind).
  */


 #define _GNU_SOURCE

 /***********************/
 /* Include directives. */
 /***********************/

 #include <assert.h>
 #include <math.h>
 #include <pthread.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>  // getopt()


 /*********************/
 /* Type definitions. */
 /*********************/

 typedef double elem_t;

 struct gj_threadinfo
 {
   pthread_barrier_t* b;
   pthread_t tid;
   elem_t* a;
   int rows;
   int cols;
   int r0;
   int r1;
 };


 /********************/
 /* Local variables. */
 /********************/

 static int s_nthread;


 /*************************/
 /* Function definitions. */
 /*************************/

 /** Allocate memory for a matrix with the specified number of rows and
  *  columns.
  */
 static elem_t* new_matrix(const int rows, const int cols)
 {
   assert(rows > 0);
   assert(cols > 0);
   return malloc(rows * cols * sizeof(elem_t));
 }

 /** Free the memory that was allocated for a matrix. */
 static void delete_matrix(elem_t* const a)
 {
   free(a);
 }

 /** Fill in some numbers in a matrix. */
 static void init_matrix(elem_t* const a, const int rows, const int cols)
 {
   int i, j;
   for (i = 0; i < rows; i++)
   {
     for (j = 0; j < rows; j++)
     {
       a[i * cols + j] = 1.0 / (1 + abs(i-j));
     }
   }
 }

 /** Print all elements of a matrix. */
 void print_matrix(const char* const label,
                   const elem_t* const a, const int rows, const int cols)
 {
   int i, j;
   printf("%s:\n", label);
   for (i = 0; i < rows; i++)
   {
     for (j = 0; j < cols; j++)
     {
       printf("%g ", a[i * cols + j]);
     }
     printf("\n");
   }
 }

 /** Copy a subset of the elements of a matrix into another matrix. */
 static void copy_matrix(const elem_t* const from,
                         const int from_rows,
                         const int from_cols,
                         const int from_row_first,
                         const int from_row_last,
                         const int from_col_first,
                         const int from_col_last,
                         elem_t* const to,
                         const int to_rows,
                         const int to_cols,
                         const int to_row_first,
                         const int to_row_last,
                         const int to_col_first,
                         const int to_col_last)
 {
   int i, j;

   assert(from_row_last - from_row_first == to_row_last - to_row_first);
   assert(from_col_last - from_col_first == to_col_last - to_col_first);

   for (i = from_row_first; i < from_row_last; i++)
   {
     assert(i < from_rows);
     assert(i - from_row_first + to_row_first < to_rows);
     for (j = from_col_first; j < from_col_last; j++)
     {
       assert(j < from_cols);
       assert(j - from_col_first + to_col_first < to_cols);
       to[(i - from_row_first + to_col_first) * to_cols
          + (j - from_col_first + to_col_first)]
         = from[i * from_cols + j];
     }
   }
 }

 /** Compute the matrix product of a1 and a2. */
 static elem_t* multiply_matrices(const elem_t* const a1,
                                  const int rows1,
                                  const int cols1,
                                  const elem_t* const a2,
                                  const int rows2,
                                  const int cols2)
 {
   int i, j, k;
   elem_t* prod;

   assert(cols1 == rows2);

   prod = new_matrix(rows1, cols2);
   for (i = 0; i < rows1; i++)
   {
     for (j = 0; j < cols2; j++)
     {
       prod[i * cols2 + j] = 0;
       for (k = 0; k < cols1; k++)
       {
         prod[i * cols2 + j] += a1[i * cols1 + k] * a2[k * cols2 + j];
       }
     }
   }
   return prod;
 }

 /** Apply the Gauss-Jordan elimination algorithm on the matrix p->a starting
  *  at row r0 and up to but not including row r1. It is assumed that as many
  *  threads execute this function concurrently as the count barrier p->b was
  *  initialized with. If the matrix p->a is nonsingular, and if matrix p->a
  *  has at least as many columns as rows, the result of this algorithm is that
  *  submatrix p->a[0..p->rows-1,0..p->rows-1] is the identity matrix.
  * @see http://en.wikipedia.org/wiki/Gauss-Jordan_elimination
  */
 static void gj_threadfunc(struct gj_threadinfo* p)
 {
   int i, j, k;
   elem_t* const a = p->a;
   const int rows = p->rows;
   const int cols = p->cols;

   for (i = 0; i < p->rows; i++)
   {
     if (pthread_barrier_wait(p->b) == PTHREAD_BARRIER_SERIAL_THREAD)
     {
       // Pivoting.
       j = i;
       for (k = i + 1; k < rows; k++)
       {
         if (a[k * cols + i] > a[j * cols + i])
         {
           j = k;
         }
       }
       if (j != i)
       {
         for (k = 0; k < cols; k++)
         {
           const elem_t t = a[i * cols + k];
           a[i * cols + k] = a[j * cols + k];
           a[j * cols + k] = t;
         }
       }
       // Normalize row i.
       if (a[i * cols + i] != 0)
       {
         for (k = cols - 1; k >= 0; k--)
         {
           a[i * cols + k] /= a[i * cols + i];
         }
       }
     }
     pthread_barrier_wait(p->b);
     // Reduce all rows j != i.
     for (j = p->r0; j < p->r1; j++)
     {
       if (i != j)
       {
         const elem_t factor = a[j * cols + i];
         for (k = 0; k < cols; k++)
         {
           a[j * cols + k] -= a[i * cols + k] * factor;
         }
       }
     }
   }
 }

 /** Multithreaded Gauss-Jordan algorithm. */
 static void gj(elem_t* const a, const int rows, const int cols)
 {
   int i;
   struct gj_threadinfo* t;
   pthread_barrier_t b;

   assert(rows <= cols);

   t = malloc(sizeof(struct gj_threadinfo) * s_nthread);

   pthread_barrier_init(&b, 0, s_nthread);

   for (i = 0; i < s_nthread; i++)
   {
     t[i].b = &b;
     t[i].a = a;
     t[i].rows = rows;
     t[i].cols = cols;
     t[i].r0 = i * rows / s_nthread;
     t[i].r1 = (i+1) * rows / s_nthread;
     pthread_create(&t[i].tid, 0, (void*(*)(void*))gj_threadfunc, &t[i]);
   }

   for (i = 0; i < s_nthread; i++)
   {
     pthread_join(t[i].tid, 0);
   }

   pthread_barrier_destroy(&b);

   free(t);
 }

 /** Matrix inversion via the Gauss-Jordan algorithm. */
 static elem_t* invert_matrix(const elem_t* const a, const int n)
 {
   int i, j;
   elem_t* const inv = new_matrix(n, n);
   elem_t* const tmp = new_matrix(n, 2*n);
   copy_matrix(a, n, n, 0, n, 0, n, tmp, n, 2 * n, 0, n, 0, n);
   for (i = 0; i < n; i++)
     for (j = 0; j < n; j++)
       tmp[i * 2 * n + n + j] = (i == j);
   gj(tmp, n, 2*n);
   copy_matrix(tmp, n, 2*n, 0, n, n, 2*n, inv, n, n, 0, n, 0, n);
   delete_matrix(tmp);
   return inv;
 }

 /** Compute the average square error between the identity matrix and the
  * product of matrix a with its inverse matrix.
  */
 static double identity_error(const elem_t* const a, const int n)
 {
   int i, j;
   elem_t e = 0;
   for (i = 0; i < n; i++)
   {
     for (j = 0; j < n; j++)
     {
       const elem_t d = a[i * n + j] - (i == j);
       e += d * d;
     }
   }
   return sqrt(e / (n * n));
 }

 /** Compute epsilon for the numeric type elem_t. Epsilon is defined as the
  *  smallest number for which the sum of one and that number is different of
  *  one. It is assumed that the underlying representation of elem_t uses
  *  base two.
  */
 static elem_t epsilon()
 {
   elem_t eps;
   for (eps = 1; 1 + eps != 1; eps /= 2)
     ;
   return 2 * eps;
 }

 int main(int argc, char** argv)
 {
   int matrix_size;
   int silent = 0;
   int optchar;
   elem_t *a, *inv, *prod;
   elem_t eps;
   double error;
   double ratio;

   while ((optchar = getopt(argc, argv, "qt:")) != EOF)
   {
     switch (optchar)
     {
     case 'q': silent = 1; break;
     case 't': s_nthread = atoi(optarg); break;
     default:
       fprintf(stderr, "Error: unknown option '%c'.\n", optchar);
       return 1;
     }
   }

   if (optind + 1 != argc)
   {
     fprintf(stderr, "Error: wrong number of arguments.\n");
   }
   matrix_size = atoi(argv[optind]);

   /* Error checking. */
   assert(matrix_size >= 1);
   assert(s_nthread >= 1);

   eps = epsilon();
   a = new_matrix(matrix_size, matrix_size);
   init_matrix(a, matrix_size, matrix_size);
   inv = invert_matrix(a, matrix_size);
   prod = multiply_matrices(a, matrix_size, matrix_size,
                            inv, matrix_size, matrix_size);
   error = identity_error(prod, matrix_size);
   ratio = error / (eps * matrix_size);
   if (! silent)
   {
     printf("error = %g; epsilon = %g; error / (epsilon * n) = %g\n",
            error, eps, ratio);
   }
   if (isfinite(ratio) && ratio < 100)
     printf("Error within bounds.\n");
   else
     printf("Error out of bounds.\n");
   delete_matrix(prod);
   delete_matrix(inv);
   delete_matrix(a);

   return 0;
 }
	/** Compute the matrix inverse via Gauss-Jordan elimination.
	* This program uses only barriers to separate computation steps but no
	* mutexes. It is an example of a race-free program on which no data races
	* are reported by the happens-before algorithm (drd), but a lot of data races
	* (all false positives) are reported by the Eraser-algorithm (helgrind).
	*/


	#define _GNU_SOURCE

	/***********************/
	/* Include directives. */
	/***********************/

	#include <assert.h>
	#include <math.h>
	#include <pthread.h>
	#include <stdio.h>
	#include <stdlib.h>
	#include <unistd.h> // getopt()


	/*********************/
	/* Type definitions. */
	/*********************/

	typedef double elem_t;

	struct gj_threadinfo
	{
	pthread_barrier_t* b;
	pthread_t tid;
	elem_t* a;
	int rows;
	int cols;
	int r0;
	int r1;
	};


	/********************/
	/* Local variables. */
	/********************/

	static int s_nthread;


	/*************************/
	/* Function definitions. */
	/*************************/

	/** Allocate memory for a matrix with the specified number of rows and
	* columns.
	*/
	static elem_t* new_matrix(const int rows, const int cols)
	{
	assert(rows > 0);
	assert(cols > 0);
	return malloc(rows * cols * sizeof(elem_t));
	}

	/** Free the memory that was allocated for a matrix. */
	static void delete_matrix(elem_t* const a)
	{
	free(a);
	}

	/** Fill in some numbers in a matrix. */
	static void init_matrix(elem_t* const a, const int rows, const int cols)
	{
	int i, j;
	for (i = 0; i < rows; i++)
	{
	for (j = 0; j < rows; j++)
	{
	a[i * cols + j] = 1.0 / (1 + abs(i-j));
	}
	}
	}

	/** Print all elements of a matrix. */
	void print_matrix(const char* const label,
	const elem_t* const a, const int rows, const int cols)
	{
	int i, j;
	printf("%s:\n", label);
	for (i = 0; i < rows; i++)
	{
	for (j = 0; j < cols; j++)
	{
	printf("%g ", a[i * cols + j]);
	}
	printf("\n");
	}
	}

	/** Copy a subset of the elements of a matrix into another matrix. */
	static void copy_matrix(const elem_t* const from,
	const int from_rows,
	const int from_cols,
	const int from_row_first,
	const int from_row_last,
	const int from_col_first,
	const int from_col_last,
	elem_t* const to,
	const int to_rows,
	const int to_cols,
	const int to_row_first,
	const int to_row_last,
	const int to_col_first,
	const int to_col_last)
	{
	int i, j;

	assert(from_row_last - from_row_first == to_row_last - to_row_first);
	assert(from_col_last - from_col_first == to_col_last - to_col_first);

	for (i = from_row_first; i < from_row_last; i++)
	{
	assert(i < from_rows);
	assert(i - from_row_first + to_row_first < to_rows);
	for (j = from_col_first; j < from_col_last; j++)
	{
	assert(j < from_cols);
	assert(j - from_col_first + to_col_first < to_cols);
	to[(i - from_row_first + to_col_first) * to_cols
	+ (j - from_col_first + to_col_first)]
	= from[i * from_cols + j];
	}
	}
	}

	/** Compute the matrix product of a1 and a2. */
	static elem_t* multiply_matrices(const elem_t* const a1,
	const int rows1,
	const int cols1,
	const elem_t* const a2,
	const int rows2,
	const int cols2)
	{
	int i, j, k;
	elem_t* prod;

	assert(cols1 == rows2);

	prod = new_matrix(rows1, cols2);
	for (i = 0; i < rows1; i++)
	{
	for (j = 0; j < cols2; j++)
	{
	prod[i * cols2 + j] = 0;
	for (k = 0; k < cols1; k++)
	{
	prod[i * cols2 + j] += a1[i * cols1 + k] * a2[k * cols2 + j];
	}
	}
	}
	return prod;
	}

	/** Apply the Gauss-Jordan elimination algorithm on the matrix p->a starting
	* at row r0 and up to but not including row r1. It is assumed that as many
	* threads execute this function concurrently as the count barrier p->b was
	* initialized with. If the matrix p->a is nonsingular, and if matrix p->a
	* has at least as many columns as rows, the result of this algorithm is that
	* submatrix p->a[0..p->rows-1,0..p->rows-1] is the identity matrix.
	* @see http://en.wikipedia.org/wiki/Gauss-Jordan_elimination
	*/
	static void gj_threadfunc(struct gj_threadinfo* p)
	{
	int i, j, k;
	elem_t* const a = p->a;
	const int rows = p->rows;
	const int cols = p->cols;

	for (i = 0; i < p->rows; i++)
	{
	if (pthread_barrier_wait(p->b) == PTHREAD_BARRIER_SERIAL_THREAD)
	{
	// Pivoting.
	j = i;
	for (k = i + 1; k < rows; k++)
	{
	if (a[k * cols + i] > a[j * cols + i])
	{
	j = k;
	}
	}
	if (j != i)
	{
	for (k = 0; k < cols; k++)
	{
	const elem_t t = a[i * cols + k];
	a[i * cols + k] = a[j * cols + k];
	a[j * cols + k] = t;
	}
	}
	// Normalize row i.
	if (a[i * cols + i] != 0)
	{
	for (k = cols - 1; k >= 0; k--)
	{
	a[i * cols + k] /= a[i * cols + i];
	}
	}
	}
	pthread_barrier_wait(p->b);
	// Reduce all rows j != i.
	for (j = p->r0; j < p->r1; j++)
	{
	if (i != j)
	{
	const elem_t factor = a[j * cols + i];
	for (k = 0; k < cols; k++)
	{
	a[j * cols + k] -= a[i * cols + k] * factor;
	}
	}
	}
	}
	}

	/** Multithreaded Gauss-Jordan algorithm. */
	static void gj(elem_t* const a, const int rows, const int cols)
	{
	int i;
	struct gj_threadinfo* t;
	pthread_barrier_t b;

	assert(rows <= cols);

	t = malloc(sizeof(struct gj_threadinfo) * s_nthread);

	pthread_barrier_init(&b, 0, s_nthread);

	for (i = 0; i < s_nthread; i++)
	{
	t[i].b = &b;
	t[i].a = a;
	t[i].rows = rows;
	t[i].cols = cols;
	t[i].r0 = i * rows / s_nthread;
	t[i].r1 = (i+1) * rows / s_nthread;
	pthread_create(&t[i].tid, 0, (void()(void*))gj_threadfunc, &t[i]);
	}

	for (i = 0; i < s_nthread; i++)
	{
	pthread_join(t[i].tid, 0);
	}

	pthread_barrier_destroy(&b);

	free(t);
	}

	/** Matrix inversion via the Gauss-Jordan algorithm. */
	static elem_t* invert_matrix(const elem_t* const a, const int n)
	{
	int i, j;
	elem_t* const inv = new_matrix(n, n);
	elem_t* const tmp = new_matrix(n, 2*n);
	copy_matrix(a, n, n, 0, n, 0, n, tmp, n, 2 * n, 0, n, 0, n);
	for (i = 0; i < n; i++)
	for (j = 0; j < n; j++)
	tmp[i * 2 * n + n + j] = (i == j);
	gj(tmp, n, 2*n);
	copy_matrix(tmp, n, 2n, 0, n, n, 2n, inv, n, n, 0, n, 0, n);
	delete_matrix(tmp);
	return inv;
	}

	/** Compute the average square error between the identity matrix and the
	* product of matrix a with its inverse matrix.
	*/
	static double identity_error(const elem_t* const a, const int n)
	{
	int i, j;
	elem_t e = 0;
	for (i = 0; i < n; i++)
	{
	for (j = 0; j < n; j++)
	{
	const elem_t d = a[i * n + j] - (i == j);
	e += d * d;
	}
	}
	return sqrt(e / (n * n));
	}

	/** Compute epsilon for the numeric type elem_t. Epsilon is defined as the
	* smallest number for which the sum of one and that number is different of
	* one. It is assumed that the underlying representation of elem_t uses
	* base two.
	*/
	static elem_t epsilon()
	{
	elem_t eps;
	for (eps = 1; 1 + eps != 1; eps /= 2)
	;
	return 2 * eps;
	}

	int main(int argc, char** argv)
	{
	int matrix_size;
	int silent = 0;
	int optchar;
	elem_t a, inv, *prod;
	elem_t eps;
	double error;
	double ratio;

	while ((optchar = getopt(argc, argv, "qt:")) != EOF)
	{
	switch (optchar)
	{
	case 'q': silent = 1; break;
	case 't': s_nthread = atoi(optarg); break;
	default:
	fprintf(stderr, "Error: unknown option '%c'.\n", optchar);
	return 1;
	}
	}

	if (optind + 1 != argc)
	{
	fprintf(stderr, "Error: wrong number of arguments.\n");
	}
	matrix_size = atoi(argv[optind]);

	/* Error checking. */
	assert(matrix_size >= 1);
	assert(s_nthread >= 1);

	eps = epsilon();
	a = new_matrix(matrix_size, matrix_size);
	init_matrix(a, matrix_size, matrix_size);
	inv = invert_matrix(a, matrix_size);
	prod = multiply_matrices(a, matrix_size, matrix_size,
	inv, matrix_size, matrix_size);
	error = identity_error(prod, matrix_size);
	ratio = error / (eps * matrix_size);
	if (! silent)
	{
	printf("error = %g; epsilon = %g; error / (epsilon * n) = %g\n",
	error, eps, ratio);
	}
	if (isfinite(ratio) && ratio < 100)
	printf("Error within bounds.\n");
	else
	printf("Error out of bounds.\n");
	delete_matrix(prod);
	delete_matrix(inv);
	delete_matrix(a);

	return 0;
	}