bart | 8785c12 | 2008-05-29 08:34:27 +0000 | [diff] [blame] | 1 | #!/bin/bash |
| 2 | |
| 3 | ######################## |
| 4 | # Function definitions # |
| 5 | ######################## |
| 6 | |
bart | f33ce89 | 2008-06-07 11:40:14 +0000 | [diff] [blame] | 7 | source "$(dirname $0)/measurement-functions" |
bart | 8785c12 | 2008-05-29 08:34:27 +0000 | [diff] [blame] | 8 | |
| 9 | function run_test { |
bart | 8a2cd9b | 2008-06-19 07:49:49 +0000 | [diff] [blame] | 10 | local tmp avg1 stddev1 avg2 stddev2 avg4 stddev4 p |
bart | 8785c12 | 2008-05-29 08:34:27 +0000 | [diff] [blame] | 11 | |
| 12 | tmp="/tmp/test-timing.$$" |
bart | 334db5e | 2008-06-05 10:14:53 +0000 | [diff] [blame] | 13 | |
bart | d5b0f6c | 2009-04-25 11:40:09 +0000 | [diff] [blame] | 14 | rm -f "${tmp}" |
bart | 8a2cd9b | 2008-06-19 07:49:49 +0000 | [diff] [blame] | 15 | p=1 |
bart | 3b7e2e3 | 2008-07-10 14:07:22 +0000 | [diff] [blame] | 16 | test_output="${1}-p${p}.out" measure_runtime "$@" -p${psep}${p} "${test_args}" | avgstddev > "$tmp" |
bart | d5b0f6c | 2009-04-25 11:40:09 +0000 | [diff] [blame] | 17 | read avg1 stddev1 vsz1 vszdev1 rest < "$tmp" |
bart | a5a95dd | 2008-06-18 14:15:11 +0000 | [diff] [blame] | 18 | echo "Average time: ${avg1} +/- ${stddev1} seconds." \ |
| 19 | " VSZ: ${vsz1} +/- ${vszdev1} KB" |
bart | 8785c12 | 2008-05-29 08:34:27 +0000 | [diff] [blame] | 20 | |
bart | d5b0f6c | 2009-04-25 11:40:09 +0000 | [diff] [blame] | 21 | if [ "${rest}" != "" ]; then |
| 22 | echo "Internal error ($rest)" |
| 23 | exit 1 |
| 24 | fi |
| 25 | |
| 26 | rm -f "${tmp}" |
bart | 8a2cd9b | 2008-06-19 07:49:49 +0000 | [diff] [blame] | 27 | p=2 |
bart | 3b7e2e3 | 2008-07-10 14:07:22 +0000 | [diff] [blame] | 28 | test_output="${1}-p${p}.out" measure_runtime "$@" -p${psep}${p} "${test_args}" | avgstddev > "$tmp" |
bart | d5b0f6c | 2009-04-25 11:40:09 +0000 | [diff] [blame] | 29 | read avg2 stddev2 vsz2 vszdev2 rest < "$tmp" |
bart | a5a95dd | 2008-06-18 14:15:11 +0000 | [diff] [blame] | 30 | echo "Average time: ${avg2} +/- ${stddev2} seconds." \ |
| 31 | " VSZ: ${vsz2} +/- ${vszdev2} KB" |
bart | 0d4e5c2 | 2008-06-07 10:42:52 +0000 | [diff] [blame] | 32 | |
bart | d5b0f6c | 2009-04-25 11:40:09 +0000 | [diff] [blame] | 33 | if [ "${rest}" != "" ]; then |
| 34 | echo "Internal error ($rest)" |
| 35 | exit 1 |
| 36 | fi |
| 37 | |
| 38 | rm -f "${tmp}" |
bart | 8a2cd9b | 2008-06-19 07:49:49 +0000 | [diff] [blame] | 39 | p=4 |
bart | 3b7e2e3 | 2008-07-10 14:07:22 +0000 | [diff] [blame] | 40 | test_output="${1}-p${p}.out" measure_runtime "$@" -p${psep}${p} "${test_args}" | avgstddev > "$tmp" |
bart | d5b0f6c | 2009-04-25 11:40:09 +0000 | [diff] [blame] | 41 | read avg4 stddev4 vsz4 vszdev4 rest < "$tmp" |
bart | a5a95dd | 2008-06-18 14:15:11 +0000 | [diff] [blame] | 42 | echo "Average time: ${avg4} +/- ${stddev4} seconds." \ |
| 43 | " VSZ: ${vsz4} +/- ${vszdev4} KB" |
bart | d5b0f6c | 2009-04-25 11:40:09 +0000 | [diff] [blame] | 44 | rm -f "$tmp" |
| 45 | |
| 46 | if [ "${rest}" != "" ]; then |
| 47 | echo "Internal error ($rest)" |
| 48 | exit 1 |
| 49 | fi |
bart | d1763bc | 2008-06-08 14:44:41 +0000 | [diff] [blame] | 50 | |
bart | 8a2cd9b | 2008-06-19 07:49:49 +0000 | [diff] [blame] | 51 | p=1 |
bart | f16de38 | 2008-06-18 08:47:06 +0000 | [diff] [blame] | 52 | test_output="/dev/null" \ |
bart | d5b0f6c | 2009-04-25 11:40:09 +0000 | [diff] [blame] | 53 | print_runtime_ratio "${avg1}" "${stddev1}" "${vsz1}" "${vszdev1}" "$VG" --tool=none "$@" -p${psep}${p} "${test_args}" |
bart | f16de38 | 2008-06-18 08:47:06 +0000 | [diff] [blame] | 54 | |
bart | 8a2cd9b | 2008-06-19 07:49:49 +0000 | [diff] [blame] | 55 | p=4 |
bart | f16de38 | 2008-06-18 08:47:06 +0000 | [diff] [blame] | 56 | test_output="/dev/null" \ |
bart | d5b0f6c | 2009-04-25 11:40:09 +0000 | [diff] [blame] | 57 | print_runtime_ratio "${avg4}" "${stddev4}" "${vsz4}" "${vszdev4}" "$VG" --tool=none "$@" -p${psep}${p} "${test_args}" |
bart | a995283 | 2008-06-17 14:20:26 +0000 | [diff] [blame] | 58 | |
bart | 8a2cd9b | 2008-06-19 07:49:49 +0000 | [diff] [blame] | 59 | p=4 |
| 60 | test_output="${1}-drd-with-stack-var-4.out" \ |
bart | d5b0f6c | 2009-04-25 11:40:09 +0000 | [diff] [blame] | 61 | print_runtime_ratio "${avg4}" "${stddev4}" "${vsz4}" "${vszdev4}" \ |
| 62 | "$VG" --tool=drd --check-stack-var=yes "$@" -p${psep}${p} "${test_args}" |
bart | cf80135 | 2008-06-15 09:13:28 +0000 | [diff] [blame] | 63 | |
bart | 8a2cd9b | 2008-06-19 07:49:49 +0000 | [diff] [blame] | 64 | p=4 |
| 65 | test_output="${1}-drd-without-stack-var-4.out" \ |
bart | d5b0f6c | 2009-04-25 11:40:09 +0000 | [diff] [blame] | 66 | print_runtime_ratio "${avg4}" "${stddev4}" "${vsz4}" "${vszdev4}" \ |
| 67 | "$VG" --tool=drd --check-stack-var=no "$@" -p${psep}${p} "${test_args}" |
bart | cf80135 | 2008-06-15 09:13:28 +0000 | [diff] [blame] | 68 | |
bart | 8a2cd9b | 2008-06-19 07:49:49 +0000 | [diff] [blame] | 69 | p=4 |
| 70 | test_output="${1}-helgrind-4.out" \ |
bart | d5b0f6c | 2009-04-25 11:40:09 +0000 | [diff] [blame] | 71 | print_runtime_ratio "${avg4}" "${stddev4}" "${vsz4}" "${vszdev4}" "$VG" --tool=helgrind "$@" -p${psep}${p} "${test_args}" |
bart | 8785c12 | 2008-05-29 08:34:27 +0000 | [diff] [blame] | 72 | |
| 73 | echo '' |
bart | 8785c12 | 2008-05-29 08:34:27 +0000 | [diff] [blame] | 74 | } |
| 75 | |
| 76 | |
bart | 9cdd178 | 2008-06-08 11:22:23 +0000 | [diff] [blame] | 77 | ######################## |
| 78 | # Script body # |
| 79 | ######################## |
bart | 8785c12 | 2008-05-29 08:34:27 +0000 | [diff] [blame] | 80 | |
bart | 3281150 | 2008-06-03 15:12:59 +0000 | [diff] [blame] | 81 | DRD_SCRIPTS_DIR="$(dirname $0)" |
bart | 7acf380 | 2008-06-06 10:17:26 +0000 | [diff] [blame] | 82 | if [ "${DRD_SCRIPTS_DIR:0:1}" != "/" ]; then |
| 83 | DRD_SCRIPTS_DIR="$PWD/$DRD_SCRIPTS_DIR" |
bart | 3281150 | 2008-06-03 15:12:59 +0000 | [diff] [blame] | 84 | fi |
| 85 | |
| 86 | SPLASH2="${DRD_SCRIPTS_DIR}/../splash2" |
bart | c4a174f | 2008-06-03 11:41:19 +0000 | [diff] [blame] | 87 | if [ ! -e "${SPLASH2}" ]; then |
| 88 | echo "Error: splash2 directory not found (${SPLASH2})." |
bart | 8785c12 | 2008-05-29 08:34:27 +0000 | [diff] [blame] | 89 | exit 1 |
| 90 | fi |
| 91 | |
| 92 | if [ "$VG" = "" ]; then |
bart | 3281150 | 2008-06-03 15:12:59 +0000 | [diff] [blame] | 93 | VG="${DRD_SCRIPTS_DIR}/../../vg-in-place" |
bart | 8785c12 | 2008-05-29 08:34:27 +0000 | [diff] [blame] | 94 | fi |
| 95 | |
| 96 | if [ ! -e "$VG" ]; then |
| 97 | echo "Could not find $VG." |
| 98 | exit 1 |
| 99 | fi |
| 100 | |
bart | ee17ad6 | 2008-06-18 13:31:05 +0000 | [diff] [blame] | 101 | ###################################################################################################################### |
bart | a5a95dd | 2008-06-18 14:15:11 +0000 | [diff] [blame] | 102 | # Meaning of the different colums: |
| 103 | # 1. SPLASH2 test name. |
| 104 | # 2. Execution time in seconds for native run with argument -p1. |
| 105 | # 3. Virtual memory size in KB for the native run with argument -p1. |
| 106 | # 4. Execution time in seconds for native run with argument -p2. |
| 107 | # 5. Virtual memory size in KB for the native run with argument -p2. |
| 108 | # 6. Execution time in seconds for native run with argument -p4. |
| 109 | # 7. Virtual memory size in KB for the native run with argument -p4. |
| 110 | # 8. Execution time ratio for --tool=none -p1 versus -p1. |
| 111 | # 9. Virtual memory size ratio for --tool=none -p1 versus -p1. |
| 112 | # 10. Execution time ratio for --tool=none -p4 versus -p4. |
| 113 | # 11. Virtual memory size ratio for --tool=none -p4 versus -p4. |
bart | ef1b972 | 2008-07-04 15:34:23 +0000 | [diff] [blame] | 114 | # 12. Execution time ratio for --tool=drd --check-stack-var=yes -p4 versus -p4. |
| 115 | # 13. Virtual memory size ratio for --tool=drd --check-stack-var=yes -p4 versus -p4. |
| 116 | # 14. Execution time ratio for --tool=drd --check-stack-var=no -p4 versus -p4. |
| 117 | # 15. Virtual memory size ratio for --tool=drd --check-stack-var=no -p4 versus -p4. |
bart | a5a95dd | 2008-06-18 14:15:11 +0000 | [diff] [blame] | 118 | # 16. Execution time ratio for --tool=helgrind -p4 versus -p4. |
| 119 | # 17. Virtual memory size ratio for --tool=helgrind -p4 versus -p4. |
bart | 2d5d529 | 2008-11-22 16:19:11 +0000 | [diff] [blame] | 120 | # 18. Execution time ratio for Intel Thread Checker -p4 versus -p4. |
| 121 | # 19. Execution time ratio for Intel Thread Checker -p4 versus -p4. |
bart | a5a95dd | 2008-06-18 14:15:11 +0000 | [diff] [blame] | 122 | # |
bart | a5bf231 | 2008-11-21 19:18:47 +0000 | [diff] [blame] | 123 | # Notes: |
| 124 | # - Both Helgrind and DRD use a granularity of one byte for data race detection. |
| 125 | # - Helgrind does detect data races on stack variables. DRD only detects |
| 126 | # data races on stack variables with --check-stack-var=yes. |
bart | 2d5d529 | 2008-11-22 16:19:11 +0000 | [diff] [blame] | 127 | # - The ITC tests have been run on a 4-way 2.5 GHz Pentium 4 workstation, most |
| 128 | # likely running a 32-bit OS. Not yet clear to me: which OS ? Which |
| 129 | # granularity does ITC use ? And which m4 macro's have been used by ITC as |
| 130 | # implementation of the synchronization primitives ? |
bart | 3b7e2e3 | 2008-07-10 14:07:22 +0000 | [diff] [blame] | 131 | # |
bart | 2d5d529 | 2008-11-22 16:19:11 +0000 | [diff] [blame] | 132 | # 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 |
| 133 | ########################################################################################################################### |
| 134 | # Results: native native native none none DRD DRD HG ITC ITC |
| 135 | # -p1 -p2 -p4 -p1 -p4 -p4 -p4+f -p4 -p4 -p4+f |
| 136 | # ......................................................................................................................... |
bart | e984fe9 | 2009-04-25 15:10:44 +0000 | [diff] [blame] | 137 | # Cholesky 0.09 11880 0.07 21888 0.65 41883 13.2 4.86 2.5 2.08 19 2.34 13 2.53 29 6.04 239 82 |
| 138 | # FFT 0.03 6560 0.02 15101 0.02 32176 21.0 7.92 20.5 2.41 118 2.81 53 3.06 120 5.42 90 41 |
| 139 | # LU, contiguous 0.07 3968 0.05 12176 0.06 28584 14.8 12.29 21.0 2.62 124 2.87 74 3.16 157 5.53 428 128 |
| 140 | # Ocean, contiguous 0.19 23208 0.18 31744 0.13 48888 6.7 3.75 10.6 2.09 69 2.52 43 2.71 264 5.95 90 28 |
| 141 | # Radix 0.20 15008 0.11 23200 0.11 39592 13.1 4.06 22.6 2.17 56 2.61 41 2.82 111 6.11 222 56 |
| 142 | # Raytrace 0.66 206976 0.47 215168 0.47 232235 8.4 1.22 10.8 1.20 272 1.53 88 1.56 211 3.79 172 53 |
| 143 | # Water-n2 0.19 10560 0.09 26944 0.10 59704 12.9 5.40 24.0 1.79 3793 2.92 621 2.95 109 3.54 189 39 |
| 144 | # Water-sp 0.21 4312 0.10 13400 0.10 29496 10.2 11.41 22.0 2.52 279 3.03 55 5.55 97 4.76 183 34 |
bart | 2d5d529 | 2008-11-22 16:19:11 +0000 | [diff] [blame] | 145 | # ......................................................................................................................... |
bart | e984fe9 | 2009-04-25 15:10:44 +0000 | [diff] [blame] | 146 | # geometric mean 0.14 13360 0.09 26319 0.12 48476 11.9 5.21 14.0 2.06 154 2.53 64 2.88 117 5.05 180 51 |
bart | 2d5d529 | 2008-11-22 16:19:11 +0000 | [diff] [blame] | 147 | # ......................................................................................................................... |
| 148 | # Hardware: dual-core Intel Core2 Duo E6750, 2.66 GHz, 4 MB L2 cache, 2 GB RAM. |
bart | e984fe9 | 2009-04-25 15:10:44 +0000 | [diff] [blame] | 149 | # Software: openSUSE 11.0 (64-bit edition), runlevel 3, gcc 4.3.1, 32 bit executables, valgrind trunk r9629. |
bart | 2d5d529 | 2008-11-22 16:19:11 +0000 | [diff] [blame] | 150 | ########################################################################################################################### |
bart | 3b7e2e3 | 2008-07-10 14:07:22 +0000 | [diff] [blame] | 151 | |
| 152 | #### |
bart | 37630e5 | 2008-07-12 16:14:46 +0000 | [diff] [blame] | 153 | # Notes: |
| 154 | # - The ITC performance numbers in the above table originate from table 1 in |
| 155 | # the following paper: |
| 156 | # Paul Sack, Brian E. Bliss, Zhiqiang Ma, Paul Petersen, Josep Torrellas, |
| 157 | # Accurate and efficient filtering for the Intel thread checker race |
| 158 | # detector, Proceedings of the 1st workshop on Architectural and system |
| 159 | # support for improving software dependability, San Jose, California, |
| 160 | # 2006. Pages: 34 - 41. |
| 161 | # - The input parameters for benchmarks below originate from table 1 in the |
| 162 | # following paper: |
| 163 | # The SPLASH-2 programs: characterization and methodological considerations |
| 164 | # Woo, S.C.; Ohara, M.; Torrie, E.; Singh, J.P.; Gupta, A. |
| 165 | # 1995. Proceedings of the 22nd Annual International Symposium on Computer |
| 166 | # Architecture, 22-24 Jun 1995, Page(s): 24 - 36. |
| 167 | # ftp://www-flash.stanford.edu/pub/splash2/splash2_isca95.ps.Z |
bart | 3b7e2e3 | 2008-07-10 14:07:22 +0000 | [diff] [blame] | 168 | #### |
bart | 8785c12 | 2008-05-29 08:34:27 +0000 | [diff] [blame] | 169 | |
bart | 66bb75c | 2008-06-17 06:19:29 +0000 | [diff] [blame] | 170 | cache_size=$(get_cache_size) |
bart | 3281150 | 2008-06-03 15:12:59 +0000 | [diff] [blame] | 171 | log2_cache_size=$(log2 ${cache_size}) |
| 172 | |
| 173 | # Cholesky |
bart | 3281150 | 2008-06-03 15:12:59 +0000 | [diff] [blame] | 174 | ( |
bart | 868d73a | 2008-06-04 13:02:22 +0000 | [diff] [blame] | 175 | cd ${SPLASH2}/codes/kernels/cholesky/inputs |
| 176 | for f in *Z |
| 177 | do |
| 178 | gzip -cd <$f >${f%.Z} |
| 179 | done |
bart | 3b7e2e3 | 2008-07-10 14:07:22 +0000 | [diff] [blame] | 180 | test_args=tk15.O run_test ../CHOLESKY -C$((cache_size)) |
bart | 3281150 | 2008-06-03 15:12:59 +0000 | [diff] [blame] | 181 | ) |
bart | 3281150 | 2008-06-03 15:12:59 +0000 | [diff] [blame] | 182 | |
| 183 | # FFT |
bart | 3b7e2e3 | 2008-07-10 14:07:22 +0000 | [diff] [blame] | 184 | run_test ${SPLASH2}/codes/kernels/fft/FFT -t -l$((log2_cache_size/2)) -m16 |
bart | 3281150 | 2008-06-03 15:12:59 +0000 | [diff] [blame] | 185 | |
| 186 | # LU, contiguous blocks. |
bart | 3b7e2e3 | 2008-07-10 14:07:22 +0000 | [diff] [blame] | 187 | run_test ${SPLASH2}/codes/kernels/lu/contiguous_blocks/LU -n512 |
bart | 8785c12 | 2008-05-29 08:34:27 +0000 | [diff] [blame] | 188 | |
bart | 3281150 | 2008-06-03 15:12:59 +0000 | [diff] [blame] | 189 | # LU, non-contiguous blocks. |
bart | 3b7e2e3 | 2008-07-10 14:07:22 +0000 | [diff] [blame] | 190 | #run_test ${SPLASH2}/codes/kernels/lu/non_contiguous_blocks/LU -n512 |
bart | 8785c12 | 2008-05-29 08:34:27 +0000 | [diff] [blame] | 191 | |
bart | 868d73a | 2008-06-04 13:02:22 +0000 | [diff] [blame] | 192 | # Ocean |
bart | 3b7e2e3 | 2008-07-10 14:07:22 +0000 | [diff] [blame] | 193 | run_test ${SPLASH2}/codes/apps/ocean/contiguous_partitions/OCEAN -n258 |
| 194 | #run_test ${SPLASH2}/codes/apps/ocean/non_contiguous_partitions/OCEAN -n258 |
bart | 868d73a | 2008-06-04 13:02:22 +0000 | [diff] [blame] | 195 | |
bart | a5bf231 | 2008-11-21 19:18:47 +0000 | [diff] [blame] | 196 | # Radiosity. Runs fine on a 32-bit OS, but deadlocks on a 64-bit OS. Not clear to me why. |
| 197 | if [ $(uname -p) = "i686" ]; then |
bart | 2d5d529 | 2008-11-22 16:19:11 +0000 | [diff] [blame] | 198 | psep=' ' run_test ${SPLASH2}/codes/apps/radiosity/RADIOSITY -batch -room -ae 5000.0 -en 0.050 -bf 0.10 |
bart | a5bf231 | 2008-11-21 19:18:47 +0000 | [diff] [blame] | 199 | fi |
bart | c4a174f | 2008-06-03 11:41:19 +0000 | [diff] [blame] | 200 | |
bart | 868d73a | 2008-06-04 13:02:22 +0000 | [diff] [blame] | 201 | # Radix |
bart | 3b7e2e3 | 2008-07-10 14:07:22 +0000 | [diff] [blame] | 202 | run_test ${SPLASH2}/codes/kernels/radix/RADIX -n$((2**20)) -r1024 |
bart | 868d73a | 2008-06-04 13:02:22 +0000 | [diff] [blame] | 203 | |
| 204 | # Raytrace |
| 205 | ( |
| 206 | cd ${SPLASH2}/codes/apps/raytrace/inputs |
| 207 | rm -f *.env *.geo *.rl |
| 208 | for f in *Z |
| 209 | do |
| 210 | gzip -cd <$f >${f%.Z} |
| 211 | done |
bart | 3b7e2e3 | 2008-07-10 14:07:22 +0000 | [diff] [blame] | 212 | cd .. |
bart | 1c6623c | 2008-07-12 17:59:09 +0000 | [diff] [blame] | 213 | test_args=inputs/car.env psep='' run_test ./RAYTRACE -m64 |
bart | 868d73a | 2008-06-04 13:02:22 +0000 | [diff] [blame] | 214 | ) |
bart | 334db5e | 2008-06-05 10:14:53 +0000 | [diff] [blame] | 215 | |
bart | 868d73a | 2008-06-04 13:02:22 +0000 | [diff] [blame] | 216 | # Water-n2 |
bart | 334db5e | 2008-06-05 10:14:53 +0000 | [diff] [blame] | 217 | ( |
| 218 | cd ${SPLASH2}/codes/apps/water-nsquared |
bart | 8a2cd9b | 2008-06-19 07:49:49 +0000 | [diff] [blame] | 219 | test_input=${DRD_SCRIPTS_DIR}/run-splash2-water-input psep=' ' run_test ./WATER-NSQUARED |
bart | 334db5e | 2008-06-05 10:14:53 +0000 | [diff] [blame] | 220 | ) |
bart | 868d73a | 2008-06-04 13:02:22 +0000 | [diff] [blame] | 221 | |
| 222 | # Water-sp |
bart | 334db5e | 2008-06-05 10:14:53 +0000 | [diff] [blame] | 223 | ( |
| 224 | cd ${SPLASH2}/codes/apps/water-spatial |
bart | 8a2cd9b | 2008-06-19 07:49:49 +0000 | [diff] [blame] | 225 | test_input=${DRD_SCRIPTS_DIR}/run-splash2-water-input psep=' ' run_test ./WATER-SPATIAL |
bart | 334db5e | 2008-06-05 10:14:53 +0000 | [diff] [blame] | 226 | ) |
bart | 868d73a | 2008-06-04 13:02:22 +0000 | [diff] [blame] | 227 | |
| 228 | |
bart | c4a174f | 2008-06-03 11:41:19 +0000 | [diff] [blame] | 229 | |
| 230 | # Local variables: |
| 231 | # compile-command: "./run-splash2" |
| 232 | # End: |