Elliott Hughes | 6c1a394 | 2011-08-17 15:00:06 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2009 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | #ifndef ART_SRC_INDIRECT_REFERENCE_TABLE_H_ |
| 18 | #define ART_SRC_INDIRECT_REFERENCE_TABLE_H_ |
| 19 | |
| 20 | #include "logging.h" |
| 21 | |
| 22 | #include <iosfwd> |
| 23 | #include <stdint.h> |
| 24 | #include <string> |
| 25 | |
| 26 | namespace art { |
| 27 | |
| 28 | class Object; |
| 29 | |
| 30 | /* |
| 31 | * Maintain a table of indirect references. Used for local/global JNI |
| 32 | * references. |
| 33 | * |
| 34 | * The table contains object references that are part of the GC root set. |
| 35 | * When an object is added we return an IndirectRef that is not a valid |
| 36 | * pointer but can be used to find the original value in O(1) time. |
| 37 | * Conversions to and from indirect refs are performed on JNI method calls |
| 38 | * in and out of the VM, so they need to be very fast. |
| 39 | * |
| 40 | * To be efficient for JNI local variable storage, we need to provide |
| 41 | * operations that allow us to operate on segments of the table, where |
| 42 | * segments are pushed and popped as if on a stack. For example, deletion |
| 43 | * of an entry should only succeed if it appears in the current segment, |
| 44 | * and we want to be able to strip off the current segment quickly when |
| 45 | * a method returns. Additions to the table must be made in the current |
| 46 | * segment even if space is available in an earlier area. |
| 47 | * |
| 48 | * A new segment is created when we call into native code from interpreted |
| 49 | * code, or when we handle the JNI PushLocalFrame function. |
| 50 | * |
| 51 | * The GC must be able to scan the entire table quickly. |
| 52 | * |
| 53 | * In summary, these must be very fast: |
| 54 | * - adding or removing a segment |
| 55 | * - adding references to a new segment |
| 56 | * - converting an indirect reference back to an Object |
| 57 | * These can be a little slower, but must still be pretty quick: |
| 58 | * - adding references to a "mature" segment |
| 59 | * - removing individual references |
| 60 | * - scanning the entire table straight through |
| 61 | * |
| 62 | * If there's more than one segment, we don't guarantee that the table |
| 63 | * will fill completely before we fail due to lack of space. We do ensure |
| 64 | * that the current segment will pack tightly, which should satisfy JNI |
| 65 | * requirements (e.g. EnsureLocalCapacity). |
| 66 | * |
| 67 | * To make everything fit nicely in 32-bit integers, the maximum size of |
| 68 | * the table is capped at 64K. |
| 69 | * |
| 70 | * None of the table functions are synchronized. |
| 71 | */ |
| 72 | |
| 73 | /* |
| 74 | * Indirect reference definition. This must be interchangeable with JNI's |
| 75 | * jobject, and it's convenient to let null be null, so we use void*. |
| 76 | * |
| 77 | * We need a 16-bit table index and a 2-bit reference type (global, local, |
| 78 | * weak global). Real object pointers will have zeroes in the low 2 or 3 |
| 79 | * bits (4- or 8-byte alignment), so it's useful to put the ref type |
| 80 | * in the low bits and reserve zero as an invalid value. |
| 81 | * |
| 82 | * The remaining 14 bits can be used to detect stale indirect references. |
| 83 | * For example, if objects don't move, we can use a hash of the original |
| 84 | * Object* to make sure the entry hasn't been re-used. (If the Object* |
| 85 | * we find there doesn't match because of heap movement, we could do a |
| 86 | * secondary check on the preserved hash value; this implies that creating |
| 87 | * a global/local ref queries the hash value and forces it to be saved.) |
| 88 | * |
| 89 | * A more rigorous approach would be to put a serial number in the extra |
| 90 | * bits, and keep a copy of the serial number in a parallel table. This is |
| 91 | * easier when objects can move, but requires 2x the memory and additional |
| 92 | * memory accesses on add/get. It will catch additional problems, e.g.: |
| 93 | * create iref1 for obj, delete iref1, create iref2 for same obj, lookup |
| 94 | * iref1. A pattern based on object bits will miss this. |
| 95 | */ |
| 96 | typedef void* IndirectRef; |
| 97 | |
| 98 | /* magic failure values; must not pass dvmIsHeapAddress() */ |
| 99 | static Object* const kInvalidIndirectRefObject = reinterpret_cast<Object*>(0xdead4321); |
| 100 | static Object* const kClearedJniWeakGlobal = reinterpret_cast<Object*>(0xdead1234); |
| 101 | |
| 102 | /* |
| 103 | * Indirect reference kind, used as the two low bits of IndirectRef. |
| 104 | * |
| 105 | * For convenience these match up with enum jobjectRefType from jni.h. |
| 106 | */ |
| 107 | enum IndirectRefKind { |
| 108 | kInvalid = 0, |
| 109 | kLocal = 1, |
| 110 | kGlobal = 2, |
| 111 | kWeakGlobal = 3 |
| 112 | }; |
| 113 | std::ostream& operator<<(std::ostream& os, IndirectRefKind rhs); |
| 114 | |
| 115 | /* |
| 116 | * Determine what kind of indirect reference this is. |
| 117 | */ |
| 118 | static inline IndirectRefKind GetIndirectRefKind(IndirectRef iref) { |
| 119 | return static_cast<IndirectRefKind>(reinterpret_cast<uintptr_t>(iref) & 0x03); |
| 120 | } |
| 121 | |
| 122 | /* |
| 123 | * Extended debugging structure. We keep a parallel array of these, one |
| 124 | * per slot in the table. |
| 125 | */ |
| 126 | static const size_t kIRTPrevCount = 4; |
| 127 | struct IndirectRefSlot { |
| 128 | uint32_t serial; |
| 129 | Object* previous[kIRTPrevCount]; |
| 130 | }; |
| 131 | |
| 132 | /* use as initial value for "cookie", and when table has only one segment */ |
| 133 | static const uint32_t IRT_FIRST_SEGMENT = 0; |
| 134 | |
| 135 | /* |
| 136 | * Table definition. |
| 137 | * |
| 138 | * For the global reference table, the expected common operations are |
| 139 | * adding a new entry and removing a recently-added entry (usually the |
| 140 | * most-recently-added entry). For JNI local references, the common |
| 141 | * operations are adding a new entry and removing an entire table segment. |
| 142 | * |
| 143 | * If "alloc_entries_" is not equal to "max_entries_", the table may expand |
| 144 | * when entries are added, which means the memory may move. If you want |
| 145 | * to keep pointers into "table" rather than offsets, you must use a |
| 146 | * fixed-size table. |
| 147 | * |
| 148 | * If we delete entries from the middle of the list, we will be left with |
| 149 | * "holes". We track the number of holes so that, when adding new elements, |
| 150 | * we can quickly decide to do a trivial append or go slot-hunting. |
| 151 | * |
| 152 | * When the top-most entry is removed, any holes immediately below it are |
| 153 | * also removed. Thus, deletion of an entry may reduce "topIndex" by more |
| 154 | * than one. |
| 155 | * |
| 156 | * To get the desired behavior for JNI locals, we need to know the bottom |
| 157 | * and top of the current "segment". The top is managed internally, and |
| 158 | * the bottom is passed in as a function argument (the VM keeps it in a |
| 159 | * slot in the interpreted stack frame). When we call a native method or |
| 160 | * push a local frame, the current top index gets pushed on, and serves |
| 161 | * as the new bottom. When we pop a frame off, the value from the stack |
| 162 | * becomes the new top index, and the value stored in the previous frame |
| 163 | * becomes the new bottom. |
| 164 | * |
| 165 | * To avoid having to re-scan the table after a pop, we want to push the |
| 166 | * number of holes in the table onto the stack. Because of our 64K-entry |
| 167 | * cap, we can combine the two into a single unsigned 32-bit value. |
| 168 | * Instead of a "bottom" argument we take a "cookie", which includes the |
| 169 | * bottom index and the count of holes below the bottom. |
| 170 | * |
| 171 | * We need to minimize method call/return overhead. If we store the |
| 172 | * "cookie" externally, on the interpreted call stack, the VM can handle |
| 173 | * pushes and pops with a single 4-byte load and store. (We could also |
| 174 | * store it internally in a public structure, but the local JNI refs are |
| 175 | * logically tied to interpreted stack frames anyway.) |
| 176 | * |
| 177 | * Common alternative implementation: make IndirectRef a pointer to the |
| 178 | * actual reference slot. Instead of getting a table and doing a lookup, |
| 179 | * the lookup can be done instantly. Operations like determining the |
| 180 | * type and deleting the reference are more expensive because the table |
| 181 | * must be hunted for (i.e. you have to do a pointer comparison to see |
| 182 | * which table it's in), you can't move the table when expanding it (so |
| 183 | * realloc() is out), and tricks like serial number checking to detect |
| 184 | * stale references aren't possible (though we may be able to get similar |
| 185 | * benefits with other approaches). |
| 186 | * |
| 187 | * TODO: consider a "lastDeleteIndex" for quick hole-filling when an |
| 188 | * add immediately follows a delete; must invalidate after segment pop |
| 189 | * (which could increase the cost/complexity of method call/return). |
| 190 | * Might be worth only using it for JNI globals. |
| 191 | * |
| 192 | * TODO: may want completely different add/remove algorithms for global |
| 193 | * and local refs to improve performance. A large circular buffer might |
| 194 | * reduce the amortized cost of adding global references. |
| 195 | * |
| 196 | * TODO: if we can guarantee that the underlying storage doesn't move, |
| 197 | * e.g. by using oversized mmap regions to handle expanding tables, we may |
| 198 | * be able to avoid having to synchronize lookups. Might make sense to |
| 199 | * add a "synchronized lookup" call that takes the mutex as an argument, |
| 200 | * and either locks or doesn't lock based on internal details. |
| 201 | */ |
| 202 | union IRTSegmentState { |
| 203 | uint32_t all; |
| 204 | struct { |
| 205 | uint32_t topIndex:16; /* index of first unused entry */ |
| 206 | uint32_t numHoles:16; /* #of holes in entire table */ |
| 207 | } parts; |
| 208 | }; |
| 209 | |
| 210 | class IrtIterator { |
| 211 | public: |
| 212 | explicit IrtIterator(Object** table, size_t i, size_t capacity) |
| 213 | : table_(table), i_(i), capacity_(capacity) { |
| 214 | SkipNullsAndTombstones(); |
| 215 | } |
| 216 | |
| 217 | IrtIterator& operator++() { |
| 218 | ++i_; |
| 219 | SkipNullsAndTombstones(); |
| 220 | return *this; |
| 221 | } |
| 222 | |
| 223 | Object** operator*() { |
| 224 | return &table_[i_]; |
| 225 | } |
| 226 | |
| 227 | bool equals(const IrtIterator& rhs) const { |
| 228 | return (i_ == rhs.i_ && table_ == rhs.table_); |
| 229 | } |
| 230 | |
| 231 | private: |
| 232 | void SkipNullsAndTombstones() { |
| 233 | // We skip NULLs and tombstones. Clients don't want to see implementation details. |
| 234 | while (i_ < capacity_ && (table_[i_] == NULL || table_[i_] == kClearedJniWeakGlobal)) { |
| 235 | ++i_; |
| 236 | } |
| 237 | } |
| 238 | |
| 239 | Object** table_; |
| 240 | size_t i_; |
| 241 | size_t capacity_; |
| 242 | }; |
| 243 | |
| 244 | bool inline operator!=(const IrtIterator& lhs, const IrtIterator& rhs) { |
| 245 | return !lhs.equals(rhs); |
| 246 | } |
| 247 | |
| 248 | class IndirectReferenceTable { |
| 249 | public: |
| 250 | typedef IrtIterator iterator; |
| 251 | |
| 252 | IndirectReferenceTable(size_t initialCount, size_t maxCount, IndirectRefKind kind); |
| 253 | |
| 254 | ~IndirectReferenceTable(); |
| 255 | |
| 256 | /* |
| 257 | * Add a new entry. "obj" must be a valid non-NULL object reference |
| 258 | * (though it's okay if it's not fully-formed, e.g. the result from |
| 259 | * dvmMalloc doesn't have obj->clazz set). |
| 260 | * |
| 261 | * Returns NULL if the table is full (max entries reached, or alloc |
| 262 | * failed during expansion). |
| 263 | */ |
| 264 | IndirectRef Add(uint32_t cookie, Object* obj); |
| 265 | |
| 266 | /* |
| 267 | * Given an IndirectRef in the table, return the Object it refers to. |
| 268 | * |
| 269 | * Returns kInvalidIndirectRefObject if iref is invalid. |
| 270 | */ |
| 271 | Object* Get(IndirectRef iref) const { |
| 272 | if (!GetChecked(iref)) { |
| 273 | return kInvalidIndirectRefObject; |
| 274 | } |
| 275 | return table_[ExtractIndex(iref)]; |
| 276 | } |
| 277 | |
| 278 | // TODO: only used for workAroundAppJniBugs support. |
| 279 | bool Contains(IndirectRef iref) const; |
| 280 | |
| 281 | /* |
| 282 | * Remove an existing entry. |
| 283 | * |
| 284 | * If the entry is not between the current top index and the bottom index |
| 285 | * specified by the cookie, we don't remove anything. This is the behavior |
| 286 | * required by JNI's DeleteLocalRef function. |
| 287 | * |
| 288 | * Returns "false" if nothing was removed. |
| 289 | */ |
| 290 | bool Remove(uint32_t cookie, IndirectRef iref); |
| 291 | |
| 292 | void Dump() const; |
| 293 | |
| 294 | /* |
| 295 | * Return the #of entries in the entire table. This includes holes, and |
| 296 | * so may be larger than the actual number of "live" entries. |
| 297 | */ |
| 298 | size_t Capacity() const { |
| 299 | return segmentState.parts.topIndex; |
| 300 | } |
| 301 | |
| 302 | iterator begin() { |
| 303 | return iterator(table_, 0, Capacity()); |
| 304 | } |
| 305 | |
| 306 | iterator end() { |
| 307 | return iterator(table_, Capacity(), Capacity()); |
| 308 | } |
| 309 | |
| 310 | private: |
| 311 | /* |
| 312 | * Extract the table index from an indirect reference. |
| 313 | */ |
| 314 | static uint32_t ExtractIndex(IndirectRef iref) { |
| 315 | uint32_t uref = (uint32_t) iref; |
| 316 | return (uref >> 2) & 0xffff; |
| 317 | } |
| 318 | |
| 319 | /* |
| 320 | * The object pointer itself is subject to relocation in some GC |
| 321 | * implementations, so we shouldn't really be using it here. |
| 322 | */ |
| 323 | IndirectRef ToIndirectRef(Object* obj, uint32_t tableIndex) const { |
| 324 | DCHECK_LT(tableIndex, 65536U); |
| 325 | uint32_t serialChunk = slot_data_[tableIndex].serial; |
| 326 | uint32_t uref = serialChunk << 20 | (tableIndex << 2) | kind_; |
| 327 | return (IndirectRef) uref; |
| 328 | } |
| 329 | |
| 330 | /* |
| 331 | * Update extended debug info when an entry is added. |
| 332 | * |
| 333 | * We advance the serial number, invalidating any outstanding references to |
| 334 | * this slot. |
| 335 | */ |
| 336 | void UpdateSlotAdd(Object* obj, int slot) { |
| 337 | if (slot_data_ != NULL) { |
| 338 | IndirectRefSlot* pSlot = &slot_data_[slot]; |
| 339 | pSlot->serial++; |
| 340 | pSlot->previous[pSlot->serial % kIRTPrevCount] = obj; |
| 341 | } |
| 342 | } |
| 343 | |
| 344 | /* extra debugging checks */ |
| 345 | bool GetChecked(IndirectRef) const; |
| 346 | bool CheckEntry(const char*, IndirectRef, int) const; |
| 347 | |
| 348 | /* semi-public - read/write by interpreter in native call handler */ |
| 349 | IRTSegmentState segmentState; |
| 350 | |
| 351 | /* bottom of the stack */ |
| 352 | Object** table_; |
| 353 | /* bit mask, ORed into all irefs */ |
| 354 | IndirectRefKind kind_; |
| 355 | /* extended debugging info */ |
| 356 | IndirectRefSlot* slot_data_; |
| 357 | /* #of entries we have space for */ |
| 358 | size_t alloc_entries_; |
| 359 | /* max #of entries allowed */ |
| 360 | size_t max_entries_; |
| 361 | }; |
| 362 | |
| 363 | } // namespace art |
| 364 | |
| 365 | #endif // ART_SRC_INDIRECT_REFERENCE_TABLE_H_ |