blob: 1d59a470822c4aab1464330d8b93c781b08cbb90 [file] [log] [blame]
The Android Open Source Projectf6c38712009-03-03 19:28:47 -08001/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16/*
17 * Mutex-free cache for key1+key2=value.
18 */
19#ifndef _DALVIK_ATOMICCACHE
20#define _DALVIK_ATOMICCACHE
21
22/*
23 * If set to "1", gather some stats on our caching success rate.
24 */
25#define CALC_CACHE_STATS 0
26
27
28/*
29 * One entry in the cache. We store two keys (e.g. the classes that are
30 * arguments to "instanceof") and one result (e.g. a boolean value).
31 *
32 * Must be exactly 16 bytes.
33 */
34typedef struct AtomicCacheEntry {
35 u4 key1;
36 u4 key2;
37 u4 value;
38 volatile u4 version; /* version and lock flag */
39} AtomicCacheEntry;
40
41/*
42 * One cache.
43 *
44 * Thought: we might be able to save a few cycles by storing the cache
45 * struct and "entries" separately, avoiding an indirection. (We already
46 * handle "numEntries" separately in ATOMIC_CACHE_LOOKUP.)
47 */
48typedef struct AtomicCache {
49 AtomicCacheEntry* entries; /* array of entries */
50 int numEntries; /* #of entries, must be power of 2 */
51
52 void* entryAlloc; /* memory allocated for entries */
53
54 /* cache stats; note we don't guarantee atomic increments for these */
55 int trivial; /* cache access not required */
56 int fail; /* contention failure */
57 int hits; /* found entry in cache */
58 int misses; /* entry was for other keys */
59 int fills; /* entry was empty */
60} AtomicCache;
61
62/*
63 * Do a cache lookup. We need to be able to read and write entries
64 * atomically. There are a couple of ways to do this:
65 * (1) Have a global lock. A mutex is too heavy, so instead we would use
66 * an atomic flag. If the flag is set, we could sit and spin, but
67 * if we're a high-priority thread that may cause a lockup. Better
68 * to just ignore the cache and do the full computation.
69 * (2) Have a "version" that gets incremented atomically when a write
70 * begins and again when it completes. Compare the version before
71 * and after doing reads. So long as "version" is volatile the
72 * compiler will do the right thing, allowing us to skip atomic
73 * ops in the common read case. The table updates are expensive,
74 * requiring two volatile writes and (for correctness on
75 * multiprocessor systems) memory barriers. We also need some
76 * sort of lock to ensure that nobody else tries to start an
77 * update while we're in the middle of one.
78 *
79 * We expect a 95+% hit rate for the things we use this for, so #2 is
80 * much better than #1.
81 *
82 * _cache is an AtomicCache*
83 * _cacheSize is _cache->cacheSize (can save a cycle avoiding the lookup)
84 * _key1, _key2 are the keys
85 *
86 * Define a function ATOMIC_CACHE_CALC that returns a 32-bit value. This
87 * will be invoked when we need to compute the value.
88 *
89 * Returns the value.
90 */
91#if CALC_CACHE_STATS > 0
92# define CACHE_XARG(_value) ,_value
93#else
94# define CACHE_XARG(_value)
95#endif
96#define ATOMIC_CACHE_LOOKUP(_cache, _cacheSize, _key1, _key2) ({ \
97 AtomicCacheEntry* pEntry; \
98 int hash; \
99 u4 firstVersion; \
100 u4 value; \
101 \
102 /* simple hash function */ \
103 hash = (((u4)(_key1) >> 2) ^ (u4)(_key2)) & ((_cacheSize)-1); \
104 pEntry = (_cache)->entries + hash; \
105 \
106 /* volatile read */ \
107 firstVersion = pEntry->version; \
108 \
109 if (pEntry->key1 == (u4)(_key1) && pEntry->key2 == (u4)(_key2)) { \
110 /* \
111 * The fields match. Get the value, then read the version a \
112 * second time to verify that we didn't catch a partial update. \
113 * We're also hosed if "firstVersion" was odd, indicating that \
114 * an update was in progress before we got here. \
115 */ \
116 value = pEntry->value; /* must grab before next check */ \
117 \
118 if ((firstVersion & 0x01) != 0 || firstVersion != pEntry->version) \
119 { \
120 /* \
121 * We clashed with another thread. Instead of sitting and \
122 * spinning, which might not complete if we're a high priority \
123 * thread, just do the regular computation. \
124 */ \
125 if (CALC_CACHE_STATS) \
126 (_cache)->fail++; \
127 value = (u4) ATOMIC_CACHE_CALC; \
128 } else { \
129 /* all good */ \
130 if (CALC_CACHE_STATS) \
131 (_cache)->hits++; \
132 } \
133 } else { \
134 /* \
135 * Compute the result and update the cache. We really want this \
136 * to happen in a different method -- it makes the ARM frame \
137 * setup for this method simpler, which gives us a ~10% speed \
138 * boost. \
139 */ \
140 value = (u4) ATOMIC_CACHE_CALC; \
141 dvmUpdateAtomicCache((u4) (_key1), (u4) (_key2), value, pEntry, \
142 firstVersion CACHE_XARG(_cache) ); \
143 } \
144 value; \
145})
146
147/*
148 * Allocate a cache.
149 */
150AtomicCache* dvmAllocAtomicCache(int numEntries);
151
152/*
153 * Free a cache.
154 */
155void dvmFreeAtomicCache(AtomicCache* cache);
156
157/*
158 * Update a cache entry.
159 *
160 * Making the last argument optional, instead of merely unused, saves us
161 * a few percent in the ATOMIC_CACHE_LOOKUP time.
162 */
163void dvmUpdateAtomicCache(u4 key1, u4 key2, u4 value, AtomicCacheEntry* pEntry,
164 u4 firstVersion
165#if CALC_CACHE_STATS > 0
166 , AtomicCache* pCache
167#endif
168 );
169
170/*
171 * Debugging.
172 */
173void dvmDumpAtomicCacheStats(const AtomicCache* pCache);
174
175#endif /*_DALVIK_ATOMICCACHE*/