| #!/bin/bash |
| |
| ######################## |
| # Function definitions # |
| ######################## |
| |
| source "$(dirname $0)/measurement-functions" |
| |
| function run_test { |
| local tmp avg1 stddev1 avg2 stddev2 avg4 stddev4 p |
| |
| tmp="/tmp/test-timing.$$" |
| rm -f "${tmp}" |
| |
| p=1 |
| test_output="${1}-p${p}.out" measure_runtime "$@" -p${psep}${p} "${test_args}" | avgstddev > "$tmp" |
| read avg1 stddev1 vsz1 vszdev1 < "$tmp" |
| echo "Average time: ${avg1} +/- ${stddev1} seconds." \ |
| " VSZ: ${vsz1} +/- ${vszdev1} KB" |
| |
| p=2 |
| test_output="${1}-p${p}.out" measure_runtime "$@" -p${psep}${p} "${test_args}" | avgstddev > "$tmp" |
| read avg2 stddev2 vsz2 vszdev2 < "$tmp" |
| echo "Average time: ${avg2} +/- ${stddev2} seconds." \ |
| " VSZ: ${vsz2} +/- ${vszdev2} KB" |
| |
| p=4 |
| test_output="${1}-p${p}.out" measure_runtime "$@" -p${psep}${p} "${test_args}" | avgstddev > "$tmp" |
| read avg4 stddev4 vsz4 vszdev4 < "$tmp" |
| echo "Average time: ${avg4} +/- ${stddev4} seconds." \ |
| " VSZ: ${vsz4} +/- ${vszdev4} KB" |
| |
| p=1 |
| test_output="/dev/null" \ |
| print_runtime_ratio ${avg1} ${stddev1} ${vsz1} ${vszdev1} $VG --tool=none "$@" -p${psep}${p} "${test_args}" |
| |
| p=4 |
| test_output="/dev/null" \ |
| print_runtime_ratio ${avg4} ${stddev4} ${vsz4} ${vszdev4} $VG --tool=none "$@" -p${psep}${p} "${test_args}" |
| |
| p=4 |
| test_output="${1}-drd-with-stack-var-4.out" \ |
| print_runtime_ratio ${avg4} ${stddev4} ${vsz4} ${vszdev4} \ |
| $VG --tool=drd --check-stack-var=yes "$@" -p${psep}${p} "${test_args}" |
| |
| p=4 |
| test_output="${1}-drd-without-stack-var-4.out" \ |
| print_runtime_ratio ${avg4} ${stddev4} ${vsz4} ${vszdev4} \ |
| $VG --tool=drd --check-stack-var=no "$@" -p${psep}${p} "${test_args}" |
| |
| p=4 |
| test_output="${1}-helgrind-4.out" \ |
| print_runtime_ratio ${avg4} ${stddev4} ${vsz4} ${vszdev4} $VG --tool=helgrind "$@" -p${psep}${p} "${test_args}" |
| |
| echo '' |
| |
| rm -f "$tmp" |
| } |
| |
| |
| ######################## |
| # Script body # |
| ######################## |
| |
| DRD_SCRIPTS_DIR="$(dirname $0)" |
| if [ "${DRD_SCRIPTS_DIR:0:1}" != "/" ]; then |
| DRD_SCRIPTS_DIR="$PWD/$DRD_SCRIPTS_DIR" |
| fi |
| |
| SPLASH2="${DRD_SCRIPTS_DIR}/../splash2" |
| if [ ! -e "${SPLASH2}" ]; then |
| echo "Error: splash2 directory not found (${SPLASH2})." |
| exit 1 |
| fi |
| |
| if [ "$VG" = "" ]; then |
| VG="${DRD_SCRIPTS_DIR}/../../vg-in-place" |
| fi |
| |
| if [ ! -e "$VG" ]; then |
| echo "Could not find $VG." |
| exit 1 |
| fi |
| |
| ###################################################################################################################### |
| # Meaning of the different colums: |
| # 1. SPLASH2 test name. |
| # 2. Execution time in seconds for native run with argument -p1. |
| # 3. Virtual memory size in KB for the native run with argument -p1. |
| # 4. Execution time in seconds for native run with argument -p2. |
| # 5. Virtual memory size in KB for the native run with argument -p2. |
| # 6. Execution time in seconds for native run with argument -p4. |
| # 7. Virtual memory size in KB for the native run with argument -p4. |
| # 8. Execution time ratio for --tool=none -p1 versus -p1. |
| # 9. Virtual memory size ratio for --tool=none -p1 versus -p1. |
| # 10. Execution time ratio for --tool=none -p4 versus -p4. |
| # 11. Virtual memory size ratio for --tool=none -p4 versus -p4. |
| # 12. Execution time ratio for --tool=drd --check-stack-var=yes -p4 versus -p4. |
| # 13. Virtual memory size ratio for --tool=drd --check-stack-var=yes -p4 versus -p4. |
| # 14. Execution time ratio for --tool=drd --check-stack-var=no -p4 versus -p4. |
| # 15. Virtual memory size ratio for --tool=drd --check-stack-var=no -p4 versus -p4. |
| # 16. Execution time ratio for --tool=helgrind -p4 versus -p4. |
| # 17. Virtual memory size ratio for --tool=helgrind -p4 versus -p4. |
| # 18. Execution time ratio for Intel Thread Checker -p4 versus -p4. |
| # 19. Execution time ratio for Intel Thread Checker -p4 versus -p4. |
| # |
| # Notes: |
| # - Both Helgrind and DRD use a granularity of one byte for data race detection. |
| # - Helgrind does detect data races on stack variables. DRD only detects |
| # data races on stack variables with --check-stack-var=yes. |
| # - The ITC tests have been run on a 4-way 2.5 GHz Pentium 4 workstation, most |
| # likely running a 32-bit OS. Not yet clear to me: which OS ? Which |
| # granularity does ITC use ? And which m4 macro's have been used by ITC as |
| # implementation of the synchronization primitives ? |
| # |
| # 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 |
| ########################################################################################################################### |
| # Results: native native native none none DRD DRD HG ITC ITC |
| # -p1 -p2 -p4 -p1 -p4 -p4 -p4+f -p4 -p4 -p4+f |
| # ......................................................................................................................... |
| # Cholesky 0.09 11880 0.07 21888 0.65 41883 15.2 5.08 3.9 2.15 17 2.41 13 2.60 50 5.86 239 82 |
| # FFT 0.03 6560 0.02 15101 0.02 32176 10.7 8.36 17.0 2.48 92 2.88 113 3.13 156 5.43 90 41 |
| # LU, contiguous 0.07 3968 0.05 12176 0.06 28584 12.4 13.17 14.8 2.70 89 2.95 80 3.24 135 5.68 428 128 |
| # Ocean, contiguous 0.19 23208 0.18 31744 0.13 48888 4.8 3.45 7.6 2.17 59 2.58 50 2.75 77 4.01 90 28 |
| # Radix 0.20 15008 0.11 23200 0.11 39592 13.1 4.22 23.6 2.23 59 2.67 54 2.87 282 6.23 222 56 |
| # Raytrace 0.66 206976 0.47 215168 0.47 232235 8.2 1.23 11.5 1.21 262 1.54 101 1.57 291 3.57 172 53 |
| # Water-n2 0.19 10560 0.09 26944 0.10 59704 11.4 5.65 22.0 1.83 3194 2.92 467 2.99 108 3.49 189 39 |
| # Water-sp 0.21 4312 0.10 13400 0.10 29496 9.8 12.20 20.7 2.63 254 3.14 64 3.42 99 5.13 183 34 |
| # ......................................................................................................................... |
| # geometric mean 0.41 13360 0.09 26319 0.12 48476 10.2 5.37 13.2 2.12 134 2.58 75 2.76 128 4.81 180 51 |
| # ......................................................................................................................... |
| # Hardware: dual-core Intel Core2 Duo E6750, 2.66 GHz, 4 MB L2 cache, 2 GB RAM. |
| # Software: openSUSE 11.0 (64-bit edition), runlevel 3, gcc 4.3.1, 32 bit executables, valgrind trunk r8795. |
| ########################################################################################################################### |
| |
| #### |
| # Notes: |
| # - The ITC performance numbers in the above table originate from table 1 in |
| # the following paper: |
| # Paul Sack, Brian E. Bliss, Zhiqiang Ma, Paul Petersen, Josep Torrellas, |
| # Accurate and efficient filtering for the Intel thread checker race |
| # detector, Proceedings of the 1st workshop on Architectural and system |
| # support for improving software dependability, San Jose, California, |
| # 2006. Pages: 34 - 41. |
| # - The input parameters for benchmarks below originate from table 1 in the |
| # following paper: |
| # The SPLASH-2 programs: characterization and methodological considerations |
| # Woo, S.C.; Ohara, M.; Torrie, E.; Singh, J.P.; Gupta, A. |
| # 1995. Proceedings of the 22nd Annual International Symposium on Computer |
| # Architecture, 22-24 Jun 1995, Page(s): 24 - 36. |
| # ftp://www-flash.stanford.edu/pub/splash2/splash2_isca95.ps.Z |
| #### |
| |
| cache_size=$(get_cache_size) |
| log2_cache_size=$(log2 ${cache_size}) |
| |
| # Cholesky |
| ( |
| cd ${SPLASH2}/codes/kernels/cholesky/inputs |
| for f in *Z |
| do |
| gzip -cd <$f >${f%.Z} |
| done |
| test_args=tk15.O run_test ../CHOLESKY -C$((cache_size)) |
| ) |
| |
| # FFT |
| run_test ${SPLASH2}/codes/kernels/fft/FFT -t -l$((log2_cache_size/2)) -m16 |
| |
| # LU, contiguous blocks. |
| run_test ${SPLASH2}/codes/kernels/lu/contiguous_blocks/LU -n512 |
| |
| # LU, non-contiguous blocks. |
| #run_test ${SPLASH2}/codes/kernels/lu/non_contiguous_blocks/LU -n512 |
| |
| # Ocean |
| run_test ${SPLASH2}/codes/apps/ocean/contiguous_partitions/OCEAN -n258 |
| #run_test ${SPLASH2}/codes/apps/ocean/non_contiguous_partitions/OCEAN -n258 |
| |
| # Radiosity. Runs fine on a 32-bit OS, but deadlocks on a 64-bit OS. Not clear to me why. |
| if [ $(uname -p) = "i686" ]; then |
| psep=' ' run_test ${SPLASH2}/codes/apps/radiosity/RADIOSITY -batch -room -ae 5000.0 -en 0.050 -bf 0.10 |
| fi |
| |
| # Radix |
| run_test ${SPLASH2}/codes/kernels/radix/RADIX -n$((2**20)) -r1024 |
| |
| # Raytrace |
| ( |
| cd ${SPLASH2}/codes/apps/raytrace/inputs |
| rm -f *.env *.geo *.rl |
| for f in *Z |
| do |
| gzip -cd <$f >${f%.Z} |
| done |
| cd .. |
| test_args=inputs/car.env psep='' run_test ./RAYTRACE -m64 |
| ) |
| |
| # Water-n2 |
| ( |
| cd ${SPLASH2}/codes/apps/water-nsquared |
| test_input=${DRD_SCRIPTS_DIR}/run-splash2-water-input psep=' ' run_test ./WATER-NSQUARED |
| ) |
| |
| # Water-sp |
| ( |
| cd ${SPLASH2}/codes/apps/water-spatial |
| test_input=${DRD_SCRIPTS_DIR}/run-splash2-water-input psep=' ' run_test ./WATER-SPATIAL |
| ) |
| |
| |
| |
| # Local variables: |
| # compile-command: "./run-splash2" |
| # End: |