Callgrind new feature: count global bus lock events "Ge"

To count global bus lock events, use "--collect-bus=yes".
For x86, this will count the number of executed instructions
with a lock prefix; for architectures with LL/SC, this will
count the number of executed SC instructions.

git-svn-id: svn://svn.valgrind.org/valgrind/trunk@11167 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/callgrind/sim.c b/callgrind/sim.c
index 61377d1..4282456 100644
--- a/callgrind/sim.c
+++ b/callgrind/sim.c
@@ -1782,6 +1782,9 @@
 	CLG_(register_event_group4)(EG_DW, "Dw", "D1mw", "D2mw", "I2dmw");
     }
 
+    if (CLG_(clo).collect_bus)
+	CLG_(register_event_group)(EG_BUS, "Ge");
+
     if (CLG_(clo).collect_alloc)
 	CLG_(register_event_group2)(EG_ALLOC, "allocCount", "allocSize");
 
@@ -1793,6 +1796,7 @@
 
     // event set comprising all event groups, used for inclusive cost
     CLG_(sets).full = CLG_(add_event_group2)(CLG_(sets).base, EG_DR, EG_DW);
+    CLG_(sets).full = CLG_(add_event_group) (CLG_(sets).full, EG_BUS);
     CLG_(sets).full = CLG_(add_event_group2)(CLG_(sets).full, EG_ALLOC, EG_SYS);
 
     CLG_DEBUGIF(1) {
@@ -1819,6 +1823,7 @@
     CLG_(append_event)(CLG_(dumpmap), "SpLoss1");
     CLG_(append_event)(CLG_(dumpmap), "AcCost2");
     CLG_(append_event)(CLG_(dumpmap), "SpLoss2");
+    CLG_(append_event)(CLG_(dumpmap), "Ge");
     CLG_(append_event)(CLG_(dumpmap), "allocCount");
     CLG_(append_event)(CLG_(dumpmap), "allocSize");
     CLG_(append_event)(CLG_(dumpmap), "sysCount");
@@ -1832,7 +1837,8 @@
 {
     if (!CLG_(clo).simulate_cache)
 	cost[ fullOffset(EG_IR) ] += exe_count;
-    else
+
+    if (ii->eventset)
 	CLG_(add_and_zero_cost2)( CLG_(sets).full, cost,
 				  ii->eventset, bbcc->cost + ii->cost_offset);
 }