The Android Open Source Project | f6c3871 | 2009-03-03 19:28:47 -0800 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2008 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | /* |
| 17 | * Mutex-free cache for key1+key2=value. |
| 18 | */ |
| 19 | #ifndef _DALVIK_ATOMICCACHE |
| 20 | #define _DALVIK_ATOMICCACHE |
| 21 | |
| 22 | /* |
| 23 | * If set to "1", gather some stats on our caching success rate. |
| 24 | */ |
| 25 | #define CALC_CACHE_STATS 0 |
| 26 | |
| 27 | |
| 28 | /* |
| 29 | * One entry in the cache. We store two keys (e.g. the classes that are |
| 30 | * arguments to "instanceof") and one result (e.g. a boolean value). |
| 31 | * |
| 32 | * Must be exactly 16 bytes. |
| 33 | */ |
| 34 | typedef struct AtomicCacheEntry { |
| 35 | u4 key1; |
| 36 | u4 key2; |
| 37 | u4 value; |
| 38 | volatile u4 version; /* version and lock flag */ |
| 39 | } AtomicCacheEntry; |
| 40 | |
| 41 | /* |
| 42 | * One cache. |
| 43 | * |
| 44 | * Thought: we might be able to save a few cycles by storing the cache |
| 45 | * struct and "entries" separately, avoiding an indirection. (We already |
| 46 | * handle "numEntries" separately in ATOMIC_CACHE_LOOKUP.) |
| 47 | */ |
| 48 | typedef struct AtomicCache { |
| 49 | AtomicCacheEntry* entries; /* array of entries */ |
| 50 | int numEntries; /* #of entries, must be power of 2 */ |
| 51 | |
| 52 | void* entryAlloc; /* memory allocated for entries */ |
| 53 | |
| 54 | /* cache stats; note we don't guarantee atomic increments for these */ |
| 55 | int trivial; /* cache access not required */ |
| 56 | int fail; /* contention failure */ |
| 57 | int hits; /* found entry in cache */ |
| 58 | int misses; /* entry was for other keys */ |
| 59 | int fills; /* entry was empty */ |
| 60 | } AtomicCache; |
| 61 | |
| 62 | /* |
| 63 | * Do a cache lookup. We need to be able to read and write entries |
| 64 | * atomically. There are a couple of ways to do this: |
| 65 | * (1) Have a global lock. A mutex is too heavy, so instead we would use |
| 66 | * an atomic flag. If the flag is set, we could sit and spin, but |
| 67 | * if we're a high-priority thread that may cause a lockup. Better |
| 68 | * to just ignore the cache and do the full computation. |
| 69 | * (2) Have a "version" that gets incremented atomically when a write |
| 70 | * begins and again when it completes. Compare the version before |
| 71 | * and after doing reads. So long as "version" is volatile the |
| 72 | * compiler will do the right thing, allowing us to skip atomic |
| 73 | * ops in the common read case. The table updates are expensive, |
| 74 | * requiring two volatile writes and (for correctness on |
| 75 | * multiprocessor systems) memory barriers. We also need some |
| 76 | * sort of lock to ensure that nobody else tries to start an |
| 77 | * update while we're in the middle of one. |
| 78 | * |
| 79 | * We expect a 95+% hit rate for the things we use this for, so #2 is |
| 80 | * much better than #1. |
| 81 | * |
| 82 | * _cache is an AtomicCache* |
| 83 | * _cacheSize is _cache->cacheSize (can save a cycle avoiding the lookup) |
| 84 | * _key1, _key2 are the keys |
| 85 | * |
| 86 | * Define a function ATOMIC_CACHE_CALC that returns a 32-bit value. This |
| 87 | * will be invoked when we need to compute the value. |
| 88 | * |
| 89 | * Returns the value. |
| 90 | */ |
| 91 | #if CALC_CACHE_STATS > 0 |
| 92 | # define CACHE_XARG(_value) ,_value |
| 93 | #else |
| 94 | # define CACHE_XARG(_value) |
| 95 | #endif |
| 96 | #define ATOMIC_CACHE_LOOKUP(_cache, _cacheSize, _key1, _key2) ({ \ |
| 97 | AtomicCacheEntry* pEntry; \ |
| 98 | int hash; \ |
| 99 | u4 firstVersion; \ |
| 100 | u4 value; \ |
| 101 | \ |
| 102 | /* simple hash function */ \ |
| 103 | hash = (((u4)(_key1) >> 2) ^ (u4)(_key2)) & ((_cacheSize)-1); \ |
| 104 | pEntry = (_cache)->entries + hash; \ |
| 105 | \ |
| 106 | /* volatile read */ \ |
| 107 | firstVersion = pEntry->version; \ |
| 108 | \ |
| 109 | if (pEntry->key1 == (u4)(_key1) && pEntry->key2 == (u4)(_key2)) { \ |
| 110 | /* \ |
| 111 | * The fields match. Get the value, then read the version a \ |
| 112 | * second time to verify that we didn't catch a partial update. \ |
| 113 | * We're also hosed if "firstVersion" was odd, indicating that \ |
| 114 | * an update was in progress before we got here. \ |
| 115 | */ \ |
| 116 | value = pEntry->value; /* must grab before next check */ \ |
| 117 | \ |
| 118 | if ((firstVersion & 0x01) != 0 || firstVersion != pEntry->version) \ |
| 119 | { \ |
| 120 | /* \ |
| 121 | * We clashed with another thread. Instead of sitting and \ |
| 122 | * spinning, which might not complete if we're a high priority \ |
| 123 | * thread, just do the regular computation. \ |
| 124 | */ \ |
| 125 | if (CALC_CACHE_STATS) \ |
| 126 | (_cache)->fail++; \ |
| 127 | value = (u4) ATOMIC_CACHE_CALC; \ |
| 128 | } else { \ |
| 129 | /* all good */ \ |
| 130 | if (CALC_CACHE_STATS) \ |
| 131 | (_cache)->hits++; \ |
| 132 | } \ |
| 133 | } else { \ |
| 134 | /* \ |
| 135 | * Compute the result and update the cache. We really want this \ |
| 136 | * to happen in a different method -- it makes the ARM frame \ |
| 137 | * setup for this method simpler, which gives us a ~10% speed \ |
| 138 | * boost. \ |
| 139 | */ \ |
| 140 | value = (u4) ATOMIC_CACHE_CALC; \ |
| 141 | dvmUpdateAtomicCache((u4) (_key1), (u4) (_key2), value, pEntry, \ |
| 142 | firstVersion CACHE_XARG(_cache) ); \ |
| 143 | } \ |
| 144 | value; \ |
| 145 | }) |
| 146 | |
| 147 | /* |
| 148 | * Allocate a cache. |
| 149 | */ |
| 150 | AtomicCache* dvmAllocAtomicCache(int numEntries); |
| 151 | |
| 152 | /* |
| 153 | * Free a cache. |
| 154 | */ |
| 155 | void dvmFreeAtomicCache(AtomicCache* cache); |
| 156 | |
| 157 | /* |
| 158 | * Update a cache entry. |
| 159 | * |
| 160 | * Making the last argument optional, instead of merely unused, saves us |
| 161 | * a few percent in the ATOMIC_CACHE_LOOKUP time. |
| 162 | */ |
| 163 | void dvmUpdateAtomicCache(u4 key1, u4 key2, u4 value, AtomicCacheEntry* pEntry, |
| 164 | u4 firstVersion |
| 165 | #if CALC_CACHE_STATS > 0 |
| 166 | , AtomicCache* pCache |
| 167 | #endif |
| 168 | ); |
| 169 | |
| 170 | /* |
| 171 | * Debugging. |
| 172 | */ |
| 173 | void dvmDumpAtomicCacheStats(const AtomicCache* pCache); |
| 174 | |
| 175 | #endif /*_DALVIK_ATOMICCACHE*/ |