blob: d9f976f3f9f68878dfc7d69eef145d1118b1de28 [file] [log] [blame]
bart8785c122008-05-29 08:34:27 +00001#!/bin/bash
2
3########################
4# Function definitions #
5########################
6
bartf33ce892008-06-07 11:40:14 +00007source "$(dirname $0)/measurement-functions"
bart8785c122008-05-29 08:34:27 +00008
9function run_test {
bart8a2cd9b2008-06-19 07:49:49 +000010 local tmp avg1 stddev1 avg2 stddev2 avg4 stddev4 p
bart8785c122008-05-29 08:34:27 +000011
12 tmp="/tmp/test-timing.$$"
bart334db5e2008-06-05 10:14:53 +000013
bartd5b0f6c2009-04-25 11:40:09 +000014 rm -f "${tmp}"
bart8a2cd9b2008-06-19 07:49:49 +000015 p=1
bart3b7e2e32008-07-10 14:07:22 +000016 test_output="${1}-p${p}.out" measure_runtime "$@" -p${psep}${p} "${test_args}" | avgstddev > "$tmp"
bartd5b0f6c2009-04-25 11:40:09 +000017 read avg1 stddev1 vsz1 vszdev1 rest < "$tmp"
barta5a95dd2008-06-18 14:15:11 +000018 echo "Average time: ${avg1} +/- ${stddev1} seconds." \
19 " VSZ: ${vsz1} +/- ${vszdev1} KB"
bart8785c122008-05-29 08:34:27 +000020
bartd5b0f6c2009-04-25 11:40:09 +000021 if [ "${rest}" != "" ]; then
22 echo "Internal error ($rest)"
23 exit 1
24 fi
25
26 rm -f "${tmp}"
bart8a2cd9b2008-06-19 07:49:49 +000027 p=2
bart3b7e2e32008-07-10 14:07:22 +000028 test_output="${1}-p${p}.out" measure_runtime "$@" -p${psep}${p} "${test_args}" | avgstddev > "$tmp"
bartd5b0f6c2009-04-25 11:40:09 +000029 read avg2 stddev2 vsz2 vszdev2 rest < "$tmp"
barta5a95dd2008-06-18 14:15:11 +000030 echo "Average time: ${avg2} +/- ${stddev2} seconds." \
31 " VSZ: ${vsz2} +/- ${vszdev2} KB"
bart0d4e5c22008-06-07 10:42:52 +000032
bartd5b0f6c2009-04-25 11:40:09 +000033 if [ "${rest}" != "" ]; then
34 echo "Internal error ($rest)"
35 exit 1
36 fi
37
38 rm -f "${tmp}"
bart8a2cd9b2008-06-19 07:49:49 +000039 p=4
bart3b7e2e32008-07-10 14:07:22 +000040 test_output="${1}-p${p}.out" measure_runtime "$@" -p${psep}${p} "${test_args}" | avgstddev > "$tmp"
bartd5b0f6c2009-04-25 11:40:09 +000041 read avg4 stddev4 vsz4 vszdev4 rest < "$tmp"
barta5a95dd2008-06-18 14:15:11 +000042 echo "Average time: ${avg4} +/- ${stddev4} seconds." \
43 " VSZ: ${vsz4} +/- ${vszdev4} KB"
bartd5b0f6c2009-04-25 11:40:09 +000044 rm -f "$tmp"
45
46 if [ "${rest}" != "" ]; then
47 echo "Internal error ($rest)"
48 exit 1
49 fi
bartd1763bc2008-06-08 14:44:41 +000050
bart8a2cd9b2008-06-19 07:49:49 +000051 p=1
bartf16de382008-06-18 08:47:06 +000052 test_output="/dev/null" \
bartd5b0f6c2009-04-25 11:40:09 +000053 print_runtime_ratio "${avg1}" "${stddev1}" "${vsz1}" "${vszdev1}" "$VG" --tool=none "$@" -p${psep}${p} "${test_args}"
bartf16de382008-06-18 08:47:06 +000054
bart8a2cd9b2008-06-19 07:49:49 +000055 p=4
bartf16de382008-06-18 08:47:06 +000056 test_output="/dev/null" \
bartd5b0f6c2009-04-25 11:40:09 +000057 print_runtime_ratio "${avg4}" "${stddev4}" "${vsz4}" "${vszdev4}" "$VG" --tool=none "$@" -p${psep}${p} "${test_args}"
barta9952832008-06-17 14:20:26 +000058
bart8a2cd9b2008-06-19 07:49:49 +000059 p=4
60 test_output="${1}-drd-with-stack-var-4.out" \
bartd5b0f6c2009-04-25 11:40:09 +000061 print_runtime_ratio "${avg4}" "${stddev4}" "${vsz4}" "${vszdev4}" \
bartc243b2c2009-05-10 10:36:18 +000062 "$VG" --tool=drd --first-race-only=yes --check-stack-var=yes \
bartfcac80b2009-06-24 18:33:47 +000063 --drd-stats=yes "$@" -p${psep}${p} "${test_args}"
bartcf801352008-06-15 09:13:28 +000064
bart8a2cd9b2008-06-19 07:49:49 +000065 p=4
66 test_output="${1}-drd-without-stack-var-4.out" \
bartd5b0f6c2009-04-25 11:40:09 +000067 print_runtime_ratio "${avg4}" "${stddev4}" "${vsz4}" "${vszdev4}" \
bartc243b2c2009-05-10 10:36:18 +000068 "$VG" --tool=drd --first-race-only=yes --check-stack-var=no \
bartfcac80b2009-06-24 18:33:47 +000069 --drd-stats=yes "$@" -p${psep}${p} "${test_args}"
bartcf801352008-06-15 09:13:28 +000070
bart8a2cd9b2008-06-19 07:49:49 +000071 p=4
72 test_output="${1}-helgrind-4.out" \
bartd5b0f6c2009-04-25 11:40:09 +000073 print_runtime_ratio "${avg4}" "${stddev4}" "${vsz4}" "${vszdev4}" "$VG" --tool=helgrind "$@" -p${psep}${p} "${test_args}"
bart8785c122008-05-29 08:34:27 +000074
75 echo ''
bart8785c122008-05-29 08:34:27 +000076}
77
78
bart9cdd1782008-06-08 11:22:23 +000079########################
80# Script body #
81########################
bart8785c122008-05-29 08:34:27 +000082
bart32811502008-06-03 15:12:59 +000083DRD_SCRIPTS_DIR="$(dirname $0)"
bart7acf3802008-06-06 10:17:26 +000084if [ "${DRD_SCRIPTS_DIR:0:1}" != "/" ]; then
85 DRD_SCRIPTS_DIR="$PWD/$DRD_SCRIPTS_DIR"
bart32811502008-06-03 15:12:59 +000086fi
87
88SPLASH2="${DRD_SCRIPTS_DIR}/../splash2"
bartc4a174f2008-06-03 11:41:19 +000089if [ ! -e "${SPLASH2}" ]; then
90 echo "Error: splash2 directory not found (${SPLASH2})."
bart8785c122008-05-29 08:34:27 +000091 exit 1
92fi
93
94if [ "$VG" = "" ]; then
bart32811502008-06-03 15:12:59 +000095 VG="${DRD_SCRIPTS_DIR}/../../vg-in-place"
bart8785c122008-05-29 08:34:27 +000096fi
97
98if [ ! -e "$VG" ]; then
99 echo "Could not find $VG."
100 exit 1
101fi
102
bartee17ad62008-06-18 13:31:05 +0000103######################################################################################################################
barta5a95dd2008-06-18 14:15:11 +0000104# Meaning of the different colums:
105# 1. SPLASH2 test name.
106# 2. Execution time in seconds for native run with argument -p1.
107# 3. Virtual memory size in KB for the native run with argument -p1.
108# 4. Execution time in seconds for native run with argument -p2.
109# 5. Virtual memory size in KB for the native run with argument -p2.
110# 6. Execution time in seconds for native run with argument -p4.
111# 7. Virtual memory size in KB for the native run with argument -p4.
112# 8. Execution time ratio for --tool=none -p1 versus -p1.
113# 9. Virtual memory size ratio for --tool=none -p1 versus -p1.
114# 10. Execution time ratio for --tool=none -p4 versus -p4.
115# 11. Virtual memory size ratio for --tool=none -p4 versus -p4.
bartef1b9722008-07-04 15:34:23 +0000116# 12. Execution time ratio for --tool=drd --check-stack-var=yes -p4 versus -p4.
117# 13. Virtual memory size ratio for --tool=drd --check-stack-var=yes -p4 versus -p4.
118# 14. Execution time ratio for --tool=drd --check-stack-var=no -p4 versus -p4.
119# 15. Virtual memory size ratio for --tool=drd --check-stack-var=no -p4 versus -p4.
barta5a95dd2008-06-18 14:15:11 +0000120# 16. Execution time ratio for --tool=helgrind -p4 versus -p4.
121# 17. Virtual memory size ratio for --tool=helgrind -p4 versus -p4.
bart2d5d5292008-11-22 16:19:11 +0000122# 18. Execution time ratio for Intel Thread Checker -p4 versus -p4.
123# 19. Execution time ratio for Intel Thread Checker -p4 versus -p4.
barta5a95dd2008-06-18 14:15:11 +0000124#
barta5bf2312008-11-21 19:18:47 +0000125# Notes:
126# - Both Helgrind and DRD use a granularity of one byte for data race detection.
127# - Helgrind does detect data races on stack variables. DRD only detects
128# data races on stack variables with --check-stack-var=yes.
bart2d5d5292008-11-22 16:19:11 +0000129# - The ITC tests have been run on a 4-way 2.5 GHz Pentium 4 workstation, most
130# likely running a 32-bit OS. Not yet clear to me: which OS ? Which
131# granularity does ITC use ? And which m4 macro's have been used by ITC as
132# implementation of the synchronization primitives ?
bart3b7e2e32008-07-10 14:07:22 +0000133#
bartc243b2c2009-05-10 10:36:18 +0000134# 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
135############################################################################################################################
136# Results: native native native none none DRD DRD HG ITC ITC
137# -p1 -p2 -p4 -p1 -p4 -p4 -p4+f -p4 -p4 -p4+f
138# ..........................................................................................................................
bart74c2b682009-06-20 10:55:12 +0000139# Cholesky 0.14 12016 0.07 22016 0.62 41328 7.9 4.60 2.1 2.13 20 2.39 11 2.59 23 6.09 239 82
140# FFT 0.02 6692 0.02 15571 0.02 32304 17.0 7.92 18.0 2.41 154 2.81 56 3.06 105 5.42 90 41
141# LU, contiguous 0.07 4100 0.04 12304 0.05 28712 12.7 12.29 18.0 2.62 145 2.73 71 3.16 143 5.53 428 128
142# Ocean, contiguous 0.22 16848 0.18 25384 0.15 42528 6.5 3.75 10.4 2.09 117 2.52 85 2.78 272 5.95 90 28
143# Radix 0.20 15136 0.13 23336 0.11 40411 12.9 4.06 23.7 2.13 69 2.57 46 2.77 111 5.94 222 56
144# Raytrace 0.64 207104 0.48 215637 0.48 232704 8.7 1.22 11.5 1.20 378 1.31 87 1.36 213 3.79 172 53
145# Water-n2 0.17 10696 0.09 27072 0.10 59832 13.1 5.40 22.7 1.79 4422 2.82 1018 2.95 104 3.54 189 39
146# Water-sp 0.19 4444 0.11 13536 0.10 29928 11.0 11.42 22.1 2.52 382 3.03 55 3.30 93 4.76 183 34
bartc243b2c2009-05-10 10:36:18 +0000147# ..........................................................................................................................
bart74c2b682009-06-20 10:55:12 +0000148# geometric mean 0.14 13024 0.09 25818 0.12 47843 10.8 5.17 13.3 2.06 197 2.46 73 2.67 110 5.03 180 51
bartc243b2c2009-05-10 10:36:18 +0000149# ..........................................................................................................................
bart2d5d5292008-11-22 16:19:11 +0000150# Hardware: dual-core Intel Core2 Duo E6750, 2.66 GHz, 4 MB L2 cache, 2 GB RAM.
bart74c2b682009-06-20 10:55:12 +0000151# Software: openSUSE 11.0 (64-bit edition), runlevel 3, gcc 4.3.1, 32 bit executables, valgrind trunk r10339.
bartc243b2c2009-05-10 10:36:18 +0000152############################################################################################################################
bart3b7e2e32008-07-10 14:07:22 +0000153
154####
bart37630e52008-07-12 16:14:46 +0000155# Notes:
156# - The ITC performance numbers in the above table originate from table 1 in
157# the following paper:
158# Paul Sack, Brian E. Bliss, Zhiqiang Ma, Paul Petersen, Josep Torrellas,
159# Accurate and efficient filtering for the Intel thread checker race
160# detector, Proceedings of the 1st workshop on Architectural and system
161# support for improving software dependability, San Jose, California,
162# 2006. Pages: 34 - 41.
163# - The input parameters for benchmarks below originate from table 1 in the
164# following paper:
165# The SPLASH-2 programs: characterization and methodological considerations
166# Woo, S.C.; Ohara, M.; Torrie, E.; Singh, J.P.; Gupta, A.
167# 1995. Proceedings of the 22nd Annual International Symposium on Computer
168# Architecture, 22-24 Jun 1995, Page(s): 24 - 36.
169# ftp://www-flash.stanford.edu/pub/splash2/splash2_isca95.ps.Z
bart3b7e2e32008-07-10 14:07:22 +0000170####
bart8785c122008-05-29 08:34:27 +0000171
bart66bb75c2008-06-17 06:19:29 +0000172cache_size=$(get_cache_size)
bart32811502008-06-03 15:12:59 +0000173log2_cache_size=$(log2 ${cache_size})
174
175# Cholesky
bart32811502008-06-03 15:12:59 +0000176(
bart868d73a2008-06-04 13:02:22 +0000177 cd ${SPLASH2}/codes/kernels/cholesky/inputs
178 for f in *Z
179 do
180 gzip -cd <$f >${f%.Z}
181 done
bart3b7e2e32008-07-10 14:07:22 +0000182 test_args=tk15.O run_test ../CHOLESKY -C$((cache_size))
bart32811502008-06-03 15:12:59 +0000183)
bart32811502008-06-03 15:12:59 +0000184
185# FFT
bart3b7e2e32008-07-10 14:07:22 +0000186run_test ${SPLASH2}/codes/kernels/fft/FFT -t -l$((log2_cache_size/2)) -m16
bart32811502008-06-03 15:12:59 +0000187
188# LU, contiguous blocks.
bart3b7e2e32008-07-10 14:07:22 +0000189run_test ${SPLASH2}/codes/kernels/lu/contiguous_blocks/LU -n512
bart8785c122008-05-29 08:34:27 +0000190
bart32811502008-06-03 15:12:59 +0000191# LU, non-contiguous blocks.
bart3b7e2e32008-07-10 14:07:22 +0000192#run_test ${SPLASH2}/codes/kernels/lu/non_contiguous_blocks/LU -n512
bart8785c122008-05-29 08:34:27 +0000193
bart868d73a2008-06-04 13:02:22 +0000194# Ocean
bart3b7e2e32008-07-10 14:07:22 +0000195run_test ${SPLASH2}/codes/apps/ocean/contiguous_partitions/OCEAN -n258
196#run_test ${SPLASH2}/codes/apps/ocean/non_contiguous_partitions/OCEAN -n258
bart868d73a2008-06-04 13:02:22 +0000197
barta5bf2312008-11-21 19:18:47 +0000198# Radiosity. Runs fine on a 32-bit OS, but deadlocks on a 64-bit OS. Not clear to me why.
199if [ $(uname -p) = "i686" ]; then
bart2d5d5292008-11-22 16:19:11 +0000200 psep=' ' run_test ${SPLASH2}/codes/apps/radiosity/RADIOSITY -batch -room -ae 5000.0 -en 0.050 -bf 0.10
barta5bf2312008-11-21 19:18:47 +0000201fi
bartc4a174f2008-06-03 11:41:19 +0000202
bart868d73a2008-06-04 13:02:22 +0000203# Radix
bart3b7e2e32008-07-10 14:07:22 +0000204run_test ${SPLASH2}/codes/kernels/radix/RADIX -n$((2**20)) -r1024
bart868d73a2008-06-04 13:02:22 +0000205
206# Raytrace
207(
208 cd ${SPLASH2}/codes/apps/raytrace/inputs
209 rm -f *.env *.geo *.rl
210 for f in *Z
211 do
212 gzip -cd <$f >${f%.Z}
213 done
bart3b7e2e32008-07-10 14:07:22 +0000214 cd ..
bart1c6623c2008-07-12 17:59:09 +0000215 test_args=inputs/car.env psep='' run_test ./RAYTRACE -m64
bart868d73a2008-06-04 13:02:22 +0000216)
bart334db5e2008-06-05 10:14:53 +0000217
bart868d73a2008-06-04 13:02:22 +0000218# Water-n2
bart334db5e2008-06-05 10:14:53 +0000219(
220 cd ${SPLASH2}/codes/apps/water-nsquared
bart8a2cd9b2008-06-19 07:49:49 +0000221 test_input=${DRD_SCRIPTS_DIR}/run-splash2-water-input psep=' ' run_test ./WATER-NSQUARED
bart334db5e2008-06-05 10:14:53 +0000222)
bart868d73a2008-06-04 13:02:22 +0000223
224# Water-sp
bart334db5e2008-06-05 10:14:53 +0000225(
226 cd ${SPLASH2}/codes/apps/water-spatial
bart8a2cd9b2008-06-19 07:49:49 +0000227 test_input=${DRD_SCRIPTS_DIR}/run-splash2-water-input psep=' ' run_test ./WATER-SPATIAL
bart334db5e2008-06-05 10:14:53 +0000228)
bart868d73a2008-06-04 13:02:22 +0000229
230
bartc4a174f2008-06-03 11:41:19 +0000231
232# Local variables:
233# compile-command: "./run-splash2"
234# End: