Even more optimizations.

git-svn-id: svn://svn.valgrind.org/valgrind/trunk@7678 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/exp-drd/drd_bitmap.c b/exp-drd/drd_bitmap.c
index 9481830..5131ebc 100644
--- a/exp-drd/drd_bitmap.c
+++ b/exp-drd/drd_bitmap.c
@@ -77,7 +77,7 @@
  * Record an access of type access_type at addresses a .. a + size - 1 in
  * bitmap bm.
  */
-static inline
+static
 void bm_access_range(struct bitmap* const bm,
                      const Addr a1, const Addr a2,
                      const BmAccessTypeT access_type)
@@ -134,12 +134,122 @@
    }
 }
 
+static inline
+void bm_access_aligned_load(struct bitmap* const bm,
+                            const Addr a1, const Addr a2)
+{
+   struct bitmap2* bm2;
+
+#if 0
+   /* Commented out the statements below because of performance reasons. */
+   tl_assert(bm);
+   tl_assert(a1 < a2);
+   tl_assert((a2 - a1) == 1 || (a2 - a1) == 2
+             || (a2 - a1) == 4 || (a2 - a1) == 8);
+   tl_assert((a1 & (a2 - a1 - 1)) == 0);
+#endif
+
+   bm2 = bm2_lookup_or_insert(bm, a1 >> ADDR0_BITS);
+   tl_assert(bm2);
+
+   bm0_set_range(bm2->bm1.bm0_r, a1 & ADDR0_MASK, (a2 - 1) & ADDR0_MASK);
+}
+
+static inline
+void bm_access_aligned_store(struct bitmap* const bm,
+                             const Addr a1, const Addr a2)
+{
+   struct bitmap2* bm2;
+
+#if 0
+   /* Commented out the statements below because of performance reasons. */
+   tl_assert(bm);
+   tl_assert(a1 < a2);
+   tl_assert((a2 - a1) == 1 || (a2 - a1) == 2
+             || (a2 - a1) == 4 || (a2 - a1) == 8);
+   tl_assert((a1 & (a2 - a1 - 1)) == 0);
+#endif
+
+   bm2 = bm2_lookup_or_insert(bm, a1 >> ADDR0_BITS);
+   tl_assert(bm2);
+
+   bm0_set_range(bm2->bm1.bm0_w, a1 & ADDR0_MASK, (a2 - 1) & ADDR0_MASK);
+}
+
 void bm_access_range_load(struct bitmap* const bm,
                           const Addr a1, const Addr a2)
 {
    bm_access_range(bm, a1, a2, eLoad);
 }
 
+void bm_access_load_1(struct bitmap* const bm, const Addr a1)
+{
+   bm_access_aligned_load(bm, a1, a1 + 1);
+}
+
+void bm_access_load_2(struct bitmap* const bm, const Addr a1)
+{
+   if ((a1 & 1) == 0)
+      bm_access_aligned_load(bm, a1, a1 + 2);
+   else
+      bm_access_range(bm, a1, a1 + 2, eLoad);
+}
+
+void bm_access_load_4(struct bitmap* const bm, const Addr a1)
+{
+   if ((a1 & 3) == 0)
+      bm_access_aligned_load(bm, a1, a1 + 4);
+   else
+      bm_access_range(bm, a1, a1 + 4, eLoad);
+}
+
+void bm_access_load_8(struct bitmap* const bm, const Addr a1)
+{
+   if ((a1 & 7) == 0)
+      bm_access_aligned_load(bm, a1, a1 + 8);
+   else if ((a1 & 3) == 0)
+   {
+      bm_access_aligned_load(bm, a1 + 0, a1 + 4);
+      bm_access_aligned_load(bm, a1 + 4, a1 + 8);
+   }
+   else
+      bm_access_range(bm, a1, a1 + 8, eLoad);
+}
+
+void bm_access_store_1(struct bitmap* const bm, const Addr a1)
+{
+   bm_access_aligned_store(bm, a1, a1 + 1);
+}
+
+void bm_access_store_2(struct bitmap* const bm, const Addr a1)
+{
+   if ((a1 & 1) == 0)
+      bm_access_aligned_store(bm, a1, a1 + 2);
+   else
+      bm_access_range(bm, a1, a1 + 2, eStore);
+}
+
+void bm_access_store_4(struct bitmap* const bm, const Addr a1)
+{
+   if ((a1 & 3) == 0)
+      bm_access_aligned_store(bm, a1, a1 + 4);
+   else
+      bm_access_range(bm, a1, a1 + 4, eStore);
+}
+
+void bm_access_store_8(struct bitmap* const bm, const Addr a1)
+{
+   if ((a1 & 7) == 0)
+      bm_access_aligned_store(bm, a1, a1 + 8);
+   else if ((a1 & 3) == 0)
+   {
+      bm_access_aligned_store(bm, a1 + 0, a1 + 4);
+      bm_access_aligned_store(bm, a1 + 4, a1 + 8);
+   }
+   else
+      bm_access_range(bm, a1, a1 + 8, eStore);
+}
+
 void bm_access_range_store(struct bitmap* const bm,
                            const Addr a1, const Addr a2)
 {
@@ -269,7 +379,7 @@
    UWord mask;
 
 #if 0
-   // Commented out the assert statements below because of performance reasons.
+   /* Commented out the statements below because of performance reasons. */
    tl_assert(a1);
    tl_assert(a1 <= a2);
    tl_assert(UWORD_MSB(a1) == UWORD_MSB(a2)
@@ -353,7 +463,6 @@
    }
 }
 
-inline
 Bool bm_has_conflict_with(const struct bitmap* const bm,
                           const Addr a1, const Addr a2,
                           const BmAccessTypeT access_type)
@@ -420,12 +529,125 @@
    return False;
 }
 
+static inline
+Bool bm_aligned_load_has_conflict_with(const struct bitmap* const bm,
+                                       const Addr a1, const Addr a2)
+{
+   struct bitmap2* bm2;
+
+#if 0
+   /* Commented out the statements below because of performance reasons. */
+   tl_assert(bm);
+   tl_assert(a1 < a2);
+   tl_assert((a2 - a1) == 1 || (a2 - a1) == 2
+             || (a2 - a1) == 4 || (a2 - a1) == 8);
+   tl_assert((a1 & (a2 - a1 - 1)) == 0);
+#endif
+
+   bm2 = bm_lookup(bm, a1);
+
+   if (bm2
+       && bm0_is_any_set(bm2->bm1.bm0_w, a1 & ADDR0_MASK, (a2-1) & ADDR0_MASK))
+   {
+      return True;
+   }
+   return False;
+}
+
+static inline
+Bool bm_aligned_store_has_conflict_with(const struct bitmap* const bm,
+                                        const Addr a1, const Addr a2)
+{
+   struct bitmap2* bm2;
+
+#if 0
+   /* Commented out the statements below because of performance reasons. */
+   tl_assert(bm);
+   tl_assert(a1 < a2);
+   tl_assert((a2 - a1) == 1 || (a2 - a1) == 2
+             || (a2 - a1) == 4 || (a2 - a1) == 8);
+   tl_assert((a1 & (a2 - a1 - 1)) == 0);
+#endif
+
+   bm2 = bm_lookup(bm, a1);
+
+   if (bm2)
+   {
+      const struct bitmap1* const p1 = &bm2->bm1;
+
+      if (bm0_is_any_set(p1->bm0_r, a1 & ADDR0_MASK, (a2-1) & ADDR0_MASK)
+          | bm0_is_any_set(p1->bm0_w, a1 & ADDR0_MASK, (a2-1) & ADDR0_MASK))
+      {
+         return True;
+      }
+   }
+   return False;
+}
+
 Bool bm_load_has_conflict_with(const struct bitmap* const bm,
                                const Addr a1, const Addr a2)
 {
    return bm_has_conflict_with(bm, a1, a2, eLoad);
 }
 
+Bool bm_load_1_has_conflict_with(const struct bitmap* const bm, const Addr a1)
+{
+   return bm_aligned_load_has_conflict_with(bm, a1, a1 + 1);
+}
+
+Bool bm_load_2_has_conflict_with(const struct bitmap* const bm, const Addr a1)
+{
+   if ((a1 & 1) == 0)
+      return bm_aligned_load_has_conflict_with(bm, a1, a1 + 2);
+   else
+      return bm_has_conflict_with(bm, a1, a1 + 2, eLoad);
+}
+
+Bool bm_load_4_has_conflict_with(const struct bitmap* const bm, const Addr a1)
+{
+   if ((a1 & 3) == 0)
+      return bm_aligned_load_has_conflict_with(bm, a1, a1 + 4);
+   else
+      return bm_has_conflict_with(bm, a1, a1 + 4, eLoad);
+}
+
+Bool bm_load_8_has_conflict_with(const struct bitmap* const bm, const Addr a1)
+{
+   if ((a1 & 7) == 0)
+      return bm_aligned_load_has_conflict_with(bm, a1, a1 + 8);
+   else
+      return bm_has_conflict_with(bm, a1, a1 + 8, eLoad);
+}
+
+Bool bm_store_1_has_conflict_with(const struct bitmap* const bm, const Addr a1)
+{
+   return bm_aligned_store_has_conflict_with(bm, a1, a1 + 1);
+}
+
+Bool bm_store_2_has_conflict_with(const struct bitmap* const bm, const Addr a1)
+{
+   if ((a1 & 1) == 0)
+      return bm_aligned_store_has_conflict_with(bm, a1, a1 + 2);
+   else
+      return bm_has_conflict_with(bm, a1, a1 + 2, eStore);
+}
+
+Bool bm_store_4_has_conflict_with(const struct bitmap* const bm, const Addr a1)
+{
+   if ((a1 & 3) == 0)
+      return bm_aligned_store_has_conflict_with(bm, a1, a1 + 4);
+   else
+      return bm_has_conflict_with(bm, a1, a1 + 4, eStore);
+}
+
+Bool bm_store_8_has_conflict_with(const struct bitmap* const bm, const Addr a1)
+{
+   if ((a1 & 7) == 0)
+      return bm_aligned_store_has_conflict_with(bm, a1, a1 + 8);
+   else
+      return bm_has_conflict_with(bm, a1, a1 + 8, eStore);
+}
+
 Bool bm_store_has_conflict_with(const struct bitmap* const bm,
                                 const Addr a1, const Addr a2)
 {