testcases/realtime/func/matrix_mult/matrix_mult.c - platform/external/ltp - Gitiles

 /******************************************************************************
  *
  *   Copyright © International Business Machines  Corp., 2007, 2008
  *
  *   This program is free software;  you can redistribute it and/or modify
  *   it under the terms of the GNU General Public License as published by
  *   the Free Software Foundation; either version 2 of the License, or
  *   (at your option) any later version.
  *
  *   This program is distributed in the hope that it will be useful,
  *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
  *   the GNU General Public License for more details.
  *
  *   You should have received a copy of the GNU General Public License
  *   along with this program;  if not, write to the Free Software
  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  *
  * NAME
  *      matrix_mult.c
  *
  * DESCRIPTION
  *      Compare running sequential matrix multiplication routines
  *      to running them in parallel to judge mutliprocessor
  *      performance
  *
  * USAGE:
  *      Use run_auto.sh script in current directory to build and run test.
  *
  * AUTHOR
  *      Darren Hart <dvhltc@us.ibm.com>
  *
  * HISTORY
  *      2007-Mar-09:  Initial version by Darren Hart <dvhltc@us.ibm.com>
  *      2008-Feb-26:  Closely emulate jvm Dinakar Guniguntala <dino@in.ibm.com>
  *
  *****************************************************************************/

 #include <stdio.h>
 #include <stdlib.h>
 #include <math.h>
 #include <librttest.h>
 #include <libstats.h>

 #define MAX_CPUS	8192
 #define PRIO		43
 #define MATRIX_SIZE	100
 #define DEF_OPS		8		/* the higher the number, the more CPU intensive */
 					/* (and therefore SMP performance goes up) */
 #define PASS_CRITERIA	0.75		/* Avg concurrent time * pass criteria < avg seq time - */
 					/* for every addition of a cpu */
 #define ITERATIONS	128
 #define HIST_BUCKETS	100

 #define THREAD_WAIT	1
 #define THREAD_WORK	2
 #define THREAD_DONE	3

 #define THREAD_SLEEP	1 * NS_PER_US

 static int ops = DEF_OPS;
 static int numcpus;
 static float criteria;
 static int *tids;
 static int online_cpu_id = -1;
 static int iterations = ITERATIONS;
 static int iterations_percpu;

 stats_container_t sdat, cdat, *curdat;
 stats_container_t shist, chist;
 static pthread_barrier_t mult_start;
 static pthread_mutex_t mutex_cpu;


 void usage(void)
 {
 	rt_help();
 	printf("matrix_mult specific options:\n");
 	printf("  -l#           #: number of multiplications per iteration (load)\n");
 	printf("  -i#           #: number of iterations\n");
 }

 int parse_args(int c, char *v)
 {
 	int handled = 1;
 	switch (c) {
 	case 'i':
 		iterations = atoi(v);
 		break;
 	case 'l':
 		ops = atoi(v);
 		break;
 	case 'h':
 		usage();
 		exit(0);
 	default:
 		handled = 0;
 		break;
 	}
 	return handled;
 }

 void matrix_init(double  A[MATRIX_SIZE][MATRIX_SIZE], double  B[MATRIX_SIZE][MATRIX_SIZE])
 {
 	int i, j;
 	for (i = 0; i < MATRIX_SIZE; i++) {
 		for (j = 0; j < MATRIX_SIZE; j++) {
 			A[i][j] = (double) (i*j);
 			B[i][j] = (double) ((i*j)%10);
 		}
 	}
 }

 void matrix_mult(int m_size)
 {
 	double A[m_size][m_size];
 	double B[m_size][m_size];
 	double C[m_size][m_size];
 	int i, j, k;

 	matrix_init(A, B);
 	for (i = 0; i < m_size; i++) {
 		int i_m = m_size - i;
 		for (j = 0; j < m_size; j++) {
 			double sum = A[i_m][j] * B[j][i];
 			for (k = 0; k < m_size; k++)
 				sum += A[i_m][k]*B[k][j];
 			C[i][j] = sum;
 		}
 	}
 }

 void matrix_mult_record(int m_size, int index)
 {
 	nsec_t start, end, delta;
 	int i;

 	start = rt_gettime();
 	for (i = 0; i < ops; i++)
 		matrix_mult(MATRIX_SIZE);
 	end = rt_gettime();
 	delta = (long)((end - start)/NS_PER_US);
 	curdat->records[index].x = index;
 	curdat->records[index].y = delta;
 }

 int set_affinity(void)
 {
 	cpu_set_t mask;
 	int cpuid;

 	pthread_mutex_lock(&mutex_cpu);
 	do {
 		++online_cpu_id;
 		CPU_ZERO(&mask);
 		CPU_SET(online_cpu_id, &mask);

 		if (!sched_setaffinity(0, sizeof(mask), &mask)) {
 			cpuid = online_cpu_id; /* Save this value before unlocking mutex */
 			pthread_mutex_unlock(&mutex_cpu);
 			return cpuid;
 		}
 	} while (online_cpu_id < MAX_CPUS);
 	pthread_mutex_unlock(&mutex_cpu);
 	return -1;
 }

 void *concurrent_thread(void *thread)
 {
 	struct thread *t = (struct thread *)thread;
 	int thread_id = (intptr_t)t->id;
 	int cpuid;
 	int i;
 	int index;

 	cpuid = set_affinity();
 	if (cpuid == -1) {
 		fprintf(stderr, "Thread %d: Can't set affinity.\n", thread_id);
 		exit(1);
 	}

 	index = iterations_percpu * thread_id; /* To avoid stats overlapping */
 	pthread_barrier_wait(&mult_start);
 	for (i=0; i < iterations_percpu; i++)
 		matrix_mult_record(MATRIX_SIZE, index++);

 	return NULL;
 }


 void main_thread(void)
 {
 	int ret, i, j;
 	nsec_t start, end;
 	long smin = 0, smax = 0, cmin = 0, cmax = 0, delta = 0;
 	float savg, cavg;
 	int cpuid;

 	if (	stats_container_init(&sdat, iterations) ||
 		stats_container_init(&shist, HIST_BUCKETS) ||
 		stats_container_init(&cdat, iterations) ||
 		stats_container_init(&chist, HIST_BUCKETS)
 	)
 	{
 		fprintf (stderr, "Cannot init stats container\n");
 		exit(1);
 	}

 	tids = malloc(sizeof(int) * numcpus);
 	if (!tids) {
 		perror("malloc");
 		exit(1);
 	}
 	memset(tids, 0, numcpus);

 	cpuid = set_affinity();
 	if (cpuid == -1) {
 		fprintf(stderr, "Main thread: Can't set affinity.\n");
 		exit(1);
 	}


 	/* run matrix mult operation sequentially */
 	curdat = &sdat;
 	printf("\nRunning sequential operations\n");
 	start = rt_gettime();
 	for (i = 0; i < iterations; i++)
 		matrix_mult_record(MATRIX_SIZE, i);
 	end = rt_gettime();
 	delta = (long)((end - start)/NS_PER_US);

 	savg = delta/iterations; /* don't use the stats record, use the total time recorded */
 	smin = stats_min(&sdat);
 	smax = stats_max(&sdat);

 	printf("Min: %ld us\n", smin);
 	printf("Max: %ld us\n", smax);
 	printf("Avg: %.4f us\n", savg);
 	printf("StdDev: %.4f us\n", stats_stddev(&sdat));

 	if (
 		stats_hist(&shist, &sdat) ||

 		stats_container_save("sequential", "Matrix Multiplication Sequential Execution Runtime Scatter Plot",
 				"Iteration", "Runtime (us)", &sdat, "points") ||
 		stats_container_save("sequential_hist", "Matrix Multiplicatoin Sequential Execution Runtime Histogram",
 				"Runtime (us)", "Samples", &shist, "steps")
 	) {
 		fprintf(stderr, "Warning: could not save sequential mults stats\n");
 	}

 	pthread_barrier_init(&mult_start, NULL, numcpus+1);
 	set_priority(PRIO);
 	curdat = &cdat;
 	online_cpu_id = -1; /* Redispatch cpus */
 	/* Create numcpus-1 concurrent threads */
 	for (j = 0; j < numcpus; j++) {
 		tids[j] = create_fifo_thread(concurrent_thread, NULL, PRIO);
 		if (tids[j] == -1) {
 			printf("Thread creation failed (max threads exceeded?)\n");
 			exit(1);
 		}
 	}


 	/* run matrix mult operation concurrently */
 	printf("\nRunning concurrent operations\n");
 	pthread_barrier_wait(&mult_start);
 	start = rt_gettime();
 	join_threads();
 	end = rt_gettime();

 	delta = (long)((end - start)/NS_PER_US);

 	cavg = delta/iterations; /* don't use the stats record, use the total time recorded */
 	cmin = stats_min(&cdat);
 	cmax = stats_max(&cdat);

 	printf("Min: %ld us\n", cmin);
 	printf("Max: %ld us\n", cmax);
 	printf("Avg: %.4f us\n", cavg);
 	printf("StdDev: %.4f us\n", stats_stddev(&cdat));

 	if (
 		stats_hist(&chist, &cdat) ||

 		stats_container_save("concurrent", "Matrix Multiplication Concurrent Execution Runtime Scatter Plot",
 					"Iteration", "Runtime (us)", &cdat, "points") ||
 		stats_container_save("concurrent_hist", "Matrix Multiplication Concurrent Execution Runtime Histogram",
 					"Iteration", "Runtime (us)", &chist, "steps")
 	) {
 		fprintf(stderr, "Warning: could not save concurrent mults stats\n");
 	}

 	printf("\nConcurrent Multipliers:\n");
 	printf("Min: %.4f\n", (float)smin/cmin);
 	printf("Max: %.4f\n", (float)smax/cmax);
 	printf("Avg: %.4f\n", (float)savg/cavg);

 	ret = 1;
 	if (savg > (cavg * criteria))
 		ret = 0;
 	printf("\nCriteria: %.2f * average concurrent time < average sequential time\n",
 		criteria);
 	printf("Result: %s\n", ret ? "FAIL" : "PASS");

 	return;
 }

 int main(int argc, char *argv[])
 {
 	setup();
 	pass_criteria = PASS_CRITERIA;
 	rt_init("l:i:h", parse_args, argc, argv);
 	numcpus = sysconf(_SC_NPROCESSORS_ONLN);
 	/* the minimum avg concurrent multiplier to pass */
 	criteria = pass_criteria * numcpus;
 	int new_iterations;

 	if (iterations <= 0) {
 		fprintf(stderr, "iterations must be greater than zero\n");
 		exit(1);
 	}

 	printf("\n---------------------------------------\n");
 	printf("Matrix Multiplication (SMP Performance)\n");
 	printf("---------------------------------------\n\n");

 	/* Line below rounds up iterations to a multiple of numcpus.
 	 * Without this, having iterations not a mutiple of numcpus causes
 	 * stats to segfault (overflow stats array).
 	 */
 	new_iterations = (int) ( (iterations + numcpus - 1) / numcpus) * numcpus;
 	if (new_iterations != iterations)
 		printf("Rounding up iterations value to nearest multiple of total online CPUs\n");

 	iterations = new_iterations;
 	iterations_percpu = iterations / numcpus;

 	printf("Running %d iterations\n", iterations);
 	printf("Matrix Dimensions: %dx%d\n", MATRIX_SIZE, MATRIX_SIZE);
 	printf("Calculations per iteration: %d\n", ops);
 	printf("Number of CPUs: %u\n", numcpus);

 	set_priority(PRIO);
 	main_thread();


 	return 0;
 }
	/******************************************************************************
	*
	* Copyright © International Business Machines Corp., 2007, 2008
	*
	* This program is free software; you can redistribute it and/or modify
	* it under the terms of the GNU General Public License as published by
	* the Free Software Foundation; either version 2 of the License, or
	* (at your option) any later version.
	*
	* This program is distributed in the hope that it will be useful,
	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
	* the GNU General Public License for more details.
	*
	* You should have received a copy of the GNU General Public License
	* along with this program; if not, write to the Free Software
	* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
	*
	* NAME
	* matrix_mult.c
	*
	* DESCRIPTION
	* Compare running sequential matrix multiplication routines
	* to running them in parallel to judge mutliprocessor
	* performance
	*
	* USAGE:
	* Use run_auto.sh script in current directory to build and run test.
	*
	* AUTHOR
	* Darren Hart <dvhltc@us.ibm.com>
	*
	* HISTORY
	* 2007-Mar-09: Initial version by Darren Hart <dvhltc@us.ibm.com>
	* 2008-Feb-26: Closely emulate jvm Dinakar Guniguntala <dino@in.ibm.com>
	*
	*****************************************************************************/

	#include <stdio.h>
	#include <stdlib.h>
	#include <math.h>
	#include <librttest.h>
	#include <libstats.h>

	#define MAX_CPUS 8192
	#define PRIO 43
	#define MATRIX_SIZE 100
	#define DEF_OPS 8 /* the higher the number, the more CPU intensive */
	/* (and therefore SMP performance goes up) */
	#define PASS_CRITERIA 0.75 /* Avg concurrent time * pass criteria < avg seq time - */
	/* for every addition of a cpu */
	#define ITERATIONS 128
	#define HIST_BUCKETS 100

	#define THREAD_WAIT 1
	#define THREAD_WORK 2
	#define THREAD_DONE 3

	#define THREAD_SLEEP 1 * NS_PER_US

	static int ops = DEF_OPS;
	static int numcpus;
	static float criteria;
	static int *tids;
	static int online_cpu_id = -1;
	static int iterations = ITERATIONS;
	static int iterations_percpu;

	stats_container_t sdat, cdat, *curdat;
	stats_container_t shist, chist;
	static pthread_barrier_t mult_start;
	static pthread_mutex_t mutex_cpu;


	void usage(void)
	{
	rt_help();
	printf("matrix_mult specific options:\n");
	printf(" -l# #: number of multiplications per iteration (load)\n");
	printf(" -i# #: number of iterations\n");
	}

	int parse_args(int c, char *v)
	{
	int handled = 1;
	switch (c) {
	case 'i':
	iterations = atoi(v);
	break;
	case 'l':
	ops = atoi(v);
	break;
	case 'h':
	usage();
	exit(0);
	default:
	handled = 0;
	break;
	}
	return handled;
	}

	void matrix_init(double A[MATRIX_SIZE][MATRIX_SIZE], double B[MATRIX_SIZE][MATRIX_SIZE])
	{
	int i, j;
	for (i = 0; i < MATRIX_SIZE; i++) {
	for (j = 0; j < MATRIX_SIZE; j++) {
	A[i][j] = (double) (i*j);
	B[i][j] = (double) ((i*j)%10);
	}
	}
	}

	void matrix_mult(int m_size)
	{
	double A[m_size][m_size];
	double B[m_size][m_size];
	double C[m_size][m_size];
	int i, j, k;

	matrix_init(A, B);
	for (i = 0; i < m_size; i++) {
	int i_m = m_size - i;
	for (j = 0; j < m_size; j++) {
	double sum = A[i_m][j] * B[j][i];
	for (k = 0; k < m_size; k++)
	sum += A[i_m][k]*B[k][j];
	C[i][j] = sum;
	}
	}
	}

	void matrix_mult_record(int m_size, int index)
	{
	nsec_t start, end, delta;
	int i;

	start = rt_gettime();
	for (i = 0; i < ops; i++)
	matrix_mult(MATRIX_SIZE);
	end = rt_gettime();
	delta = (long)((end - start)/NS_PER_US);
	curdat->records[index].x = index;
	curdat->records[index].y = delta;
	}

	int set_affinity(void)
	{
	cpu_set_t mask;
	int cpuid;

	pthread_mutex_lock(&mutex_cpu);
	do {
	++online_cpu_id;
	CPU_ZERO(&mask);
	CPU_SET(online_cpu_id, &mask);

	if (!sched_setaffinity(0, sizeof(mask), &mask)) {
	cpuid = online_cpu_id; /* Save this value before unlocking mutex */
	pthread_mutex_unlock(&mutex_cpu);
	return cpuid;
	}
	} while (online_cpu_id < MAX_CPUS);
	pthread_mutex_unlock(&mutex_cpu);
	return -1;
	}

	void concurrent_thread(void thread)
	{
	struct thread t = (struct thread )thread;
	int thread_id = (intptr_t)t->id;
	int cpuid;
	int i;
	int index;

	cpuid = set_affinity();
	if (cpuid == -1) {
	fprintf(stderr, "Thread %d: Can't set affinity.\n", thread_id);
	exit(1);
	}

	index = iterations_percpu * thread_id; /* To avoid stats overlapping */
	pthread_barrier_wait(&mult_start);
	for (i=0; i < iterations_percpu; i++)
	matrix_mult_record(MATRIX_SIZE, index++);

	return NULL;
	}


	void main_thread(void)
	{
	int ret, i, j;
	nsec_t start, end;
	long smin = 0, smax = 0, cmin = 0, cmax = 0, delta = 0;
	float savg, cavg;
	int cpuid;

	if ( stats_container_init(&sdat, iterations) \|\|
	stats_container_init(&shist, HIST_BUCKETS) \|\|
	stats_container_init(&cdat, iterations) \|\|
	stats_container_init(&chist, HIST_BUCKETS)
	)
	{
	fprintf (stderr, "Cannot init stats container\n");
	exit(1);
	}

	tids = malloc(sizeof(int) * numcpus);
	if (!tids) {
	perror("malloc");
	exit(1);
	}
	memset(tids, 0, numcpus);

	cpuid = set_affinity();
	if (cpuid == -1) {
	fprintf(stderr, "Main thread: Can't set affinity.\n");
	exit(1);
	}


	/* run matrix mult operation sequentially */
	curdat = &sdat;
	printf("\nRunning sequential operations\n");
	start = rt_gettime();
	for (i = 0; i < iterations; i++)
	matrix_mult_record(MATRIX_SIZE, i);
	end = rt_gettime();
	delta = (long)((end - start)/NS_PER_US);

	savg = delta/iterations; /* don't use the stats record, use the total time recorded */
	smin = stats_min(&sdat);
	smax = stats_max(&sdat);

	printf("Min: %ld us\n", smin);
	printf("Max: %ld us\n", smax);
	printf("Avg: %.4f us\n", savg);
	printf("StdDev: %.4f us\n", stats_stddev(&sdat));

	if (
	stats_hist(&shist, &sdat) \|\|

	stats_container_save("sequential", "Matrix Multiplication Sequential Execution Runtime Scatter Plot",
	"Iteration", "Runtime (us)", &sdat, "points") \|\|
	stats_container_save("sequential_hist", "Matrix Multiplicatoin Sequential Execution Runtime Histogram",
	"Runtime (us)", "Samples", &shist, "steps")
	) {
	fprintf(stderr, "Warning: could not save sequential mults stats\n");
	}

	pthread_barrier_init(&mult_start, NULL, numcpus+1);
	set_priority(PRIO);
	curdat = &cdat;
	online_cpu_id = -1; /* Redispatch cpus */
	/* Create numcpus-1 concurrent threads */
	for (j = 0; j < numcpus; j++) {
	tids[j] = create_fifo_thread(concurrent_thread, NULL, PRIO);
	if (tids[j] == -1) {
	printf("Thread creation failed (max threads exceeded?)\n");
	exit(1);
	}
	}


	/* run matrix mult operation concurrently */
	printf("\nRunning concurrent operations\n");
	pthread_barrier_wait(&mult_start);
	start = rt_gettime();
	join_threads();
	end = rt_gettime();

	delta = (long)((end - start)/NS_PER_US);

	cavg = delta/iterations; /* don't use the stats record, use the total time recorded */
	cmin = stats_min(&cdat);
	cmax = stats_max(&cdat);

	printf("Min: %ld us\n", cmin);
	printf("Max: %ld us\n", cmax);
	printf("Avg: %.4f us\n", cavg);
	printf("StdDev: %.4f us\n", stats_stddev(&cdat));

	if (
	stats_hist(&chist, &cdat) \|\|

	stats_container_save("concurrent", "Matrix Multiplication Concurrent Execution Runtime Scatter Plot",
	"Iteration", "Runtime (us)", &cdat, "points") \|\|
	stats_container_save("concurrent_hist", "Matrix Multiplication Concurrent Execution Runtime Histogram",
	"Iteration", "Runtime (us)", &chist, "steps")
	) {
	fprintf(stderr, "Warning: could not save concurrent mults stats\n");
	}

	printf("\nConcurrent Multipliers:\n");
	printf("Min: %.4f\n", (float)smin/cmin);
	printf("Max: %.4f\n", (float)smax/cmax);
	printf("Avg: %.4f\n", (float)savg/cavg);

	ret = 1;
	if (savg > (cavg * criteria))
	ret = 0;
	printf("\nCriteria: %.2f * average concurrent time < average sequential time\n",
	criteria);
	printf("Result: %s\n", ret ? "FAIL" : "PASS");

	return;
	}

	int main(int argc, char *argv[])
	{
	setup();
	pass_criteria = PASS_CRITERIA;
	rt_init("l:i:h", parse_args, argc, argv);
	numcpus = sysconf(_SC_NPROCESSORS_ONLN);
	/* the minimum avg concurrent multiplier to pass */
	criteria = pass_criteria * numcpus;
	int new_iterations;

	if (iterations <= 0) {
	fprintf(stderr, "iterations must be greater than zero\n");
	exit(1);
	}

	printf("\n---------------------------------------\n");
	printf("Matrix Multiplication (SMP Performance)\n");
	printf("---------------------------------------\n\n");

	/* Line below rounds up iterations to a multiple of numcpus.
	* Without this, having iterations not a mutiple of numcpus causes
	* stats to segfault (overflow stats array).
	*/
	new_iterations = (int) ( (iterations + numcpus - 1) / numcpus) * numcpus;
	if (new_iterations != iterations)
	printf("Rounding up iterations value to nearest multiple of total online CPUs\n");

	iterations = new_iterations;
	iterations_percpu = iterations / numcpus;

	printf("Running %d iterations\n", iterations);
	printf("Matrix Dimensions: %dx%d\n", MATRIX_SIZE, MATRIX_SIZE);
	printf("Calculations per iteration: %d\n", ops);
	printf("Number of CPUs: %u\n", numcpus);

	set_priority(PRIO);
	main_thread();


	return 0;
	}