blob: 73cc54065b8cae8d84c7de0170dffc67e82caf89 [file] [log] [blame]
bart8785c122008-05-29 08:34:27 +00001#!/bin/bash
2
3########################
4# Function definitions #
5########################
6
bartf33ce892008-06-07 11:40:14 +00007source "$(dirname $0)/measurement-functions"
bart8785c122008-05-29 08:34:27 +00008
9function run_test {
bart8a2cd9b2008-06-19 07:49:49 +000010 local tmp avg1 stddev1 avg2 stddev2 avg4 stddev4 p
bart8785c122008-05-29 08:34:27 +000011
12 tmp="/tmp/test-timing.$$"
bart334db5e2008-06-05 10:14:53 +000013 rm -f "${tmp}"
14
bart8a2cd9b2008-06-19 07:49:49 +000015 p=1
bart3b7e2e32008-07-10 14:07:22 +000016 test_output="${1}-p${p}.out" measure_runtime "$@" -p${psep}${p} "${test_args}" | avgstddev > "$tmp"
barta5a95dd2008-06-18 14:15:11 +000017 read avg1 stddev1 vsz1 vszdev1 < "$tmp"
18 echo "Average time: ${avg1} +/- ${stddev1} seconds." \
19 " VSZ: ${vsz1} +/- ${vszdev1} KB"
bart8785c122008-05-29 08:34:27 +000020
bart8a2cd9b2008-06-19 07:49:49 +000021 p=2
bart3b7e2e32008-07-10 14:07:22 +000022 test_output="${1}-p${p}.out" measure_runtime "$@" -p${psep}${p} "${test_args}" | avgstddev > "$tmp"
barta5a95dd2008-06-18 14:15:11 +000023 read avg2 stddev2 vsz2 vszdev2 < "$tmp"
24 echo "Average time: ${avg2} +/- ${stddev2} seconds." \
25 " VSZ: ${vsz2} +/- ${vszdev2} KB"
bart0d4e5c22008-06-07 10:42:52 +000026
bart8a2cd9b2008-06-19 07:49:49 +000027 p=4
bart3b7e2e32008-07-10 14:07:22 +000028 test_output="${1}-p${p}.out" measure_runtime "$@" -p${psep}${p} "${test_args}" | avgstddev > "$tmp"
barta5a95dd2008-06-18 14:15:11 +000029 read avg4 stddev4 vsz4 vszdev4 < "$tmp"
30 echo "Average time: ${avg4} +/- ${stddev4} seconds." \
31 " VSZ: ${vsz4} +/- ${vszdev4} KB"
bartd1763bc2008-06-08 14:44:41 +000032
bart8a2cd9b2008-06-19 07:49:49 +000033 p=1
bartf16de382008-06-18 08:47:06 +000034 test_output="/dev/null" \
bart8a2cd9b2008-06-19 07:49:49 +000035 print_runtime_ratio ${avg1} ${stddev1} ${vsz1} ${vszdev1} $VG --tool=none "$@" -p${psep}${p} "${test_args}"
bartf16de382008-06-18 08:47:06 +000036
bart8a2cd9b2008-06-19 07:49:49 +000037 p=4
bartf16de382008-06-18 08:47:06 +000038 test_output="/dev/null" \
bart8a2cd9b2008-06-19 07:49:49 +000039 print_runtime_ratio ${avg4} ${stddev4} ${vsz4} ${vszdev4} $VG --tool=none "$@" -p${psep}${p} "${test_args}"
barta9952832008-06-17 14:20:26 +000040
bart8a2cd9b2008-06-19 07:49:49 +000041 p=4
42 test_output="${1}-drd-with-stack-var-4.out" \
barta5a95dd2008-06-18 14:15:11 +000043 print_runtime_ratio ${avg4} ${stddev4} ${vsz4} ${vszdev4} \
bartef1b9722008-07-04 15:34:23 +000044 $VG --tool=drd --check-stack-var=yes "$@" -p${psep}${p} "${test_args}"
bartcf801352008-06-15 09:13:28 +000045
bart8a2cd9b2008-06-19 07:49:49 +000046 p=4
47 test_output="${1}-drd-without-stack-var-4.out" \
barta5a95dd2008-06-18 14:15:11 +000048 print_runtime_ratio ${avg4} ${stddev4} ${vsz4} ${vszdev4} \
bartef1b9722008-07-04 15:34:23 +000049 $VG --tool=drd --check-stack-var=no "$@" -p${psep}${p} "${test_args}"
bartcf801352008-06-15 09:13:28 +000050
bart8a2cd9b2008-06-19 07:49:49 +000051 p=4
52 test_output="${1}-helgrind-4.out" \
53 print_runtime_ratio ${avg4} ${stddev4} ${vsz4} ${vszdev4} $VG --tool=helgrind "$@" -p${psep}${p} "${test_args}"
bart8785c122008-05-29 08:34:27 +000054
55 echo ''
56
57 rm -f "$tmp"
58}
59
60
bart9cdd1782008-06-08 11:22:23 +000061########################
62# Script body #
63########################
bart8785c122008-05-29 08:34:27 +000064
bart32811502008-06-03 15:12:59 +000065DRD_SCRIPTS_DIR="$(dirname $0)"
bart7acf3802008-06-06 10:17:26 +000066if [ "${DRD_SCRIPTS_DIR:0:1}" != "/" ]; then
67 DRD_SCRIPTS_DIR="$PWD/$DRD_SCRIPTS_DIR"
bart32811502008-06-03 15:12:59 +000068fi
69
70SPLASH2="${DRD_SCRIPTS_DIR}/../splash2"
bartc4a174f2008-06-03 11:41:19 +000071if [ ! -e "${SPLASH2}" ]; then
72 echo "Error: splash2 directory not found (${SPLASH2})."
bart8785c122008-05-29 08:34:27 +000073 exit 1
74fi
75
76if [ "$VG" = "" ]; then
bart32811502008-06-03 15:12:59 +000077 VG="${DRD_SCRIPTS_DIR}/../../vg-in-place"
bart8785c122008-05-29 08:34:27 +000078fi
79
80if [ ! -e "$VG" ]; then
81 echo "Could not find $VG."
82 exit 1
83fi
84
bartee17ad62008-06-18 13:31:05 +000085######################################################################################################################
barta5a95dd2008-06-18 14:15:11 +000086# Meaning of the different colums:
87# 1. SPLASH2 test name.
88# 2. Execution time in seconds for native run with argument -p1.
89# 3. Virtual memory size in KB for the native run with argument -p1.
90# 4. Execution time in seconds for native run with argument -p2.
91# 5. Virtual memory size in KB for the native run with argument -p2.
92# 6. Execution time in seconds for native run with argument -p4.
93# 7. Virtual memory size in KB for the native run with argument -p4.
94# 8. Execution time ratio for --tool=none -p1 versus -p1.
95# 9. Virtual memory size ratio for --tool=none -p1 versus -p1.
96# 10. Execution time ratio for --tool=none -p4 versus -p4.
97# 11. Virtual memory size ratio for --tool=none -p4 versus -p4.
bartef1b9722008-07-04 15:34:23 +000098# 12. Execution time ratio for --tool=drd --check-stack-var=yes -p4 versus -p4.
99# 13. Virtual memory size ratio for --tool=drd --check-stack-var=yes -p4 versus -p4.
100# 14. Execution time ratio for --tool=drd --check-stack-var=no -p4 versus -p4.
101# 15. Virtual memory size ratio for --tool=drd --check-stack-var=no -p4 versus -p4.
barta5a95dd2008-06-18 14:15:11 +0000102# 16. Execution time ratio for --tool=helgrind -p4 versus -p4.
103# 17. Virtual memory size ratio for --tool=helgrind -p4 versus -p4.
bart2d5d5292008-11-22 16:19:11 +0000104# 18. Execution time ratio for Intel Thread Checker -p4 versus -p4.
105# 19. Execution time ratio for Intel Thread Checker -p4 versus -p4.
barta5a95dd2008-06-18 14:15:11 +0000106#
barta5bf2312008-11-21 19:18:47 +0000107# Notes:
108# - Both Helgrind and DRD use a granularity of one byte for data race detection.
109# - Helgrind does detect data races on stack variables. DRD only detects
110# data races on stack variables with --check-stack-var=yes.
bart2d5d5292008-11-22 16:19:11 +0000111# - The ITC tests have been run on a 4-way 2.5 GHz Pentium 4 workstation, most
112# likely running a 32-bit OS. Not yet clear to me: which OS ? Which
113# granularity does ITC use ? And which m4 macro's have been used by ITC as
114# implementation of the synchronization primitives ?
bart3b7e2e32008-07-10 14:07:22 +0000115#
bart2d5d5292008-11-22 16:19:11 +0000116# 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
117###########################################################################################################################
118# Results: native native native none none DRD DRD HG ITC ITC
119# -p1 -p2 -p4 -p1 -p4 -p4 -p4+f -p4 -p4 -p4+f
120# .........................................................................................................................
121# Cholesky 0.09 11880 0.07 21888 0.65 41883 15.2 5.08 3.9 2.15 17 2.41 13 2.60 50 5.86 239 82
122# FFT 0.03 6560 0.02 15101 0.02 32176 10.7 8.36 17.0 2.48 92 2.88 113 3.13 156 5.43 90 41
123# LU, contiguous 0.07 3968 0.05 12176 0.06 28584 12.4 13.17 14.8 2.70 89 2.95 80 3.24 135 5.68 428 128
124# Ocean, contiguous 0.19 23208 0.18 31744 0.13 48888 4.8 3.45 7.6 2.17 59 2.58 50 2.75 77 4.01 90 28
125# Radix 0.20 15008 0.11 23200 0.11 39592 13.1 4.22 23.6 2.23 59 2.67 54 2.87 282 6.23 222 56
126# Raytrace 0.66 206976 0.47 215168 0.47 232235 8.2 1.23 11.5 1.21 262 1.54 101 1.57 291 3.57 172 53
127# Water-n2 0.19 10560 0.09 26944 0.10 59704 11.4 5.65 22.0 1.83 3194 2.92 467 2.99 108 3.49 189 39
128# Water-sp 0.21 4312 0.10 13400 0.10 29496 9.8 12.20 20.7 2.63 254 3.14 64 3.42 99 5.13 183 34
129# .........................................................................................................................
130# geometric mean 0.41 13360 0.09 26319 0.12 48476 10.2 5.37 13.2 2.12 134 2.58 75 2.76 128 4.81 180 51
131# .........................................................................................................................
132# Hardware: dual-core Intel Core2 Duo E6750, 2.66 GHz, 4 MB L2 cache, 2 GB RAM.
133# Software: openSUSE 11.0 (64-bit edition), runlevel 3, gcc 4.3.1, 32 bit executables, valgrind trunk r8795.
134###########################################################################################################################
bart3b7e2e32008-07-10 14:07:22 +0000135
136####
bart37630e52008-07-12 16:14:46 +0000137# Notes:
138# - The ITC performance numbers in the above table originate from table 1 in
139# the following paper:
140# Paul Sack, Brian E. Bliss, Zhiqiang Ma, Paul Petersen, Josep Torrellas,
141# Accurate and efficient filtering for the Intel thread checker race
142# detector, Proceedings of the 1st workshop on Architectural and system
143# support for improving software dependability, San Jose, California,
144# 2006. Pages: 34 - 41.
145# - The input parameters for benchmarks below originate from table 1 in the
146# following paper:
147# The SPLASH-2 programs: characterization and methodological considerations
148# Woo, S.C.; Ohara, M.; Torrie, E.; Singh, J.P.; Gupta, A.
149# 1995. Proceedings of the 22nd Annual International Symposium on Computer
150# Architecture, 22-24 Jun 1995, Page(s): 24 - 36.
151# ftp://www-flash.stanford.edu/pub/splash2/splash2_isca95.ps.Z
bart3b7e2e32008-07-10 14:07:22 +0000152####
bart8785c122008-05-29 08:34:27 +0000153
bart66bb75c2008-06-17 06:19:29 +0000154cache_size=$(get_cache_size)
bart32811502008-06-03 15:12:59 +0000155log2_cache_size=$(log2 ${cache_size})
156
157# Cholesky
bart32811502008-06-03 15:12:59 +0000158(
bart868d73a2008-06-04 13:02:22 +0000159 cd ${SPLASH2}/codes/kernels/cholesky/inputs
160 for f in *Z
161 do
162 gzip -cd <$f >${f%.Z}
163 done
bart3b7e2e32008-07-10 14:07:22 +0000164 test_args=tk15.O run_test ../CHOLESKY -C$((cache_size))
bart32811502008-06-03 15:12:59 +0000165)
bart32811502008-06-03 15:12:59 +0000166
167# FFT
bart3b7e2e32008-07-10 14:07:22 +0000168run_test ${SPLASH2}/codes/kernels/fft/FFT -t -l$((log2_cache_size/2)) -m16
bart32811502008-06-03 15:12:59 +0000169
170# LU, contiguous blocks.
bart3b7e2e32008-07-10 14:07:22 +0000171run_test ${SPLASH2}/codes/kernels/lu/contiguous_blocks/LU -n512
bart8785c122008-05-29 08:34:27 +0000172
bart32811502008-06-03 15:12:59 +0000173# LU, non-contiguous blocks.
bart3b7e2e32008-07-10 14:07:22 +0000174#run_test ${SPLASH2}/codes/kernels/lu/non_contiguous_blocks/LU -n512
bart8785c122008-05-29 08:34:27 +0000175
bart868d73a2008-06-04 13:02:22 +0000176# Ocean
bart3b7e2e32008-07-10 14:07:22 +0000177run_test ${SPLASH2}/codes/apps/ocean/contiguous_partitions/OCEAN -n258
178#run_test ${SPLASH2}/codes/apps/ocean/non_contiguous_partitions/OCEAN -n258
bart868d73a2008-06-04 13:02:22 +0000179
barta5bf2312008-11-21 19:18:47 +0000180# Radiosity. Runs fine on a 32-bit OS, but deadlocks on a 64-bit OS. Not clear to me why.
181if [ $(uname -p) = "i686" ]; then
bart2d5d5292008-11-22 16:19:11 +0000182 psep=' ' run_test ${SPLASH2}/codes/apps/radiosity/RADIOSITY -batch -room -ae 5000.0 -en 0.050 -bf 0.10
barta5bf2312008-11-21 19:18:47 +0000183fi
bartc4a174f2008-06-03 11:41:19 +0000184
bart868d73a2008-06-04 13:02:22 +0000185# Radix
bart3b7e2e32008-07-10 14:07:22 +0000186run_test ${SPLASH2}/codes/kernels/radix/RADIX -n$((2**20)) -r1024
bart868d73a2008-06-04 13:02:22 +0000187
188# Raytrace
189(
190 cd ${SPLASH2}/codes/apps/raytrace/inputs
191 rm -f *.env *.geo *.rl
192 for f in *Z
193 do
194 gzip -cd <$f >${f%.Z}
195 done
bart3b7e2e32008-07-10 14:07:22 +0000196 cd ..
bart1c6623c2008-07-12 17:59:09 +0000197 test_args=inputs/car.env psep='' run_test ./RAYTRACE -m64
bart868d73a2008-06-04 13:02:22 +0000198)
bart334db5e2008-06-05 10:14:53 +0000199
bart868d73a2008-06-04 13:02:22 +0000200# Water-n2
bart334db5e2008-06-05 10:14:53 +0000201(
202 cd ${SPLASH2}/codes/apps/water-nsquared
bart8a2cd9b2008-06-19 07:49:49 +0000203 test_input=${DRD_SCRIPTS_DIR}/run-splash2-water-input psep=' ' run_test ./WATER-NSQUARED
bart334db5e2008-06-05 10:14:53 +0000204)
bart868d73a2008-06-04 13:02:22 +0000205
206# Water-sp
bart334db5e2008-06-05 10:14:53 +0000207(
208 cd ${SPLASH2}/codes/apps/water-spatial
bart8a2cd9b2008-06-19 07:49:49 +0000209 test_input=${DRD_SCRIPTS_DIR}/run-splash2-water-input psep=' ' run_test ./WATER-SPATIAL
bart334db5e2008-06-05 10:14:53 +0000210)
bart868d73a2008-06-04 13:02:22 +0000211
212
bartc4a174f2008-06-03 11:41:19 +0000213
214# Local variables:
215# compile-command: "./run-splash2"
216# End: