Artem Udovichenko | d9786b0 | 2015-10-14 16:36:55 +0300 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2015 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | #include "type_lookup_table.h" |
| 18 | |
Andreas Gampe | 8cf9cb3 | 2017-07-19 09:28:38 -0700 | [diff] [blame] | 19 | #include <cstring> |
| 20 | #include <memory> |
| 21 | |
Vladimir Marko | 9bdf108 | 2016-01-21 12:15:52 +0000 | [diff] [blame] | 22 | #include "base/bit_utils.h" |
Vladimir Marko | ea341d2 | 2018-05-11 10:33:37 +0100 | [diff] [blame^] | 23 | #include "base/leb128.h" |
David Sehr | 9e734c7 | 2018-01-04 17:56:19 -0800 | [diff] [blame] | 24 | #include "dex/dex_file-inl.h" |
David Sehr | 0225f8e | 2018-01-31 08:52:24 +0000 | [diff] [blame] | 25 | #include "dex/utf-inl.h" |
Artem Udovichenko | d9786b0 | 2015-10-14 16:36:55 +0300 | [diff] [blame] | 26 | |
Artem Udovichenko | d9786b0 | 2015-10-14 16:36:55 +0300 | [diff] [blame] | 27 | namespace art { |
| 28 | |
Vladimir Marko | ea341d2 | 2018-05-11 10:33:37 +0100 | [diff] [blame^] | 29 | static inline bool ModifiedUtf8StringEquals(const char* lhs, const char* rhs) { |
| 30 | return CompareModifiedUtf8ToModifiedUtf8AsUtf16CodePointValues(lhs, rhs) == 0; |
Artem Udovichenko | d9786b0 | 2015-10-14 16:36:55 +0300 | [diff] [blame] | 31 | } |
| 32 | |
Vladimir Marko | ea341d2 | 2018-05-11 10:33:37 +0100 | [diff] [blame^] | 33 | TypeLookupTable TypeLookupTable::Create(const DexFile& dex_file) { |
| 34 | uint32_t num_class_defs = dex_file.NumClassDefs(); |
| 35 | if (UNLIKELY(!SupportedSize(num_class_defs))) { |
| 36 | return TypeLookupTable(); |
Artem Udovichenko | d9786b0 | 2015-10-14 16:36:55 +0300 | [diff] [blame] | 37 | } |
Vladimir Marko | ea341d2 | 2018-05-11 10:33:37 +0100 | [diff] [blame^] | 38 | size_t mask_bits = CalculateMaskBits(num_class_defs); |
| 39 | size_t size = 1u << mask_bits; |
| 40 | std::unique_ptr<Entry[]> owned_entries(new Entry[size]); |
| 41 | Entry* entries = owned_entries.get(); |
Artem Udovichenko | d9786b0 | 2015-10-14 16:36:55 +0300 | [diff] [blame] | 42 | |
Vladimir Marko | 9bdf108 | 2016-01-21 12:15:52 +0000 | [diff] [blame] | 43 | static_assert(alignof(Entry) == 4u, "Expecting Entry to be 4-byte aligned."); |
Vladimir Marko | ea341d2 | 2018-05-11 10:33:37 +0100 | [diff] [blame^] | 44 | const uint32_t mask = Entry::GetMask(mask_bits); |
Artem Udovichenko | d9786b0 | 2015-10-14 16:36:55 +0300 | [diff] [blame] | 45 | std::vector<uint16_t> conflict_class_defs; |
| 46 | // The first stage. Put elements on their initial positions. If an initial position is already |
| 47 | // occupied then delay the insertion of the element to the second stage to reduce probing |
| 48 | // distance. |
Vladimir Marko | ea341d2 | 2018-05-11 10:33:37 +0100 | [diff] [blame^] | 49 | for (size_t class_def_idx = 0; class_def_idx < dex_file.NumClassDefs(); ++class_def_idx) { |
| 50 | const DexFile::ClassDef& class_def = dex_file.GetClassDef(class_def_idx); |
Artem Udovichenko | d9786b0 | 2015-10-14 16:36:55 +0300 | [diff] [blame] | 51 | const DexFile::TypeId& type_id = dex_file.GetTypeId(class_def.class_idx_); |
| 52 | const DexFile::StringId& str_id = dex_file.GetStringId(type_id.descriptor_idx_); |
| 53 | const uint32_t hash = ComputeModifiedUtf8Hash(dex_file.GetStringData(str_id)); |
Vladimir Marko | ea341d2 | 2018-05-11 10:33:37 +0100 | [diff] [blame^] | 54 | const uint32_t pos = hash & mask; |
| 55 | if (entries[pos].IsEmpty()) { |
| 56 | entries[pos] = Entry(str_id.string_data_off_, hash, class_def_idx, mask_bits); |
| 57 | DCHECK(entries[pos].IsLast(mask_bits)); |
| 58 | } else { |
| 59 | conflict_class_defs.push_back(class_def_idx); |
Artem Udovichenko | d9786b0 | 2015-10-14 16:36:55 +0300 | [diff] [blame] | 60 | } |
| 61 | } |
| 62 | // The second stage. The initial position of these elements had a collision. Put these elements |
| 63 | // into the nearest free cells and link them together by updating next_pos_delta. |
| 64 | for (uint16_t class_def_idx : conflict_class_defs) { |
| 65 | const DexFile::ClassDef& class_def = dex_file.GetClassDef(class_def_idx); |
| 66 | const DexFile::TypeId& type_id = dex_file.GetTypeId(class_def.class_idx_); |
| 67 | const DexFile::StringId& str_id = dex_file.GetStringId(type_id.descriptor_idx_); |
| 68 | const uint32_t hash = ComputeModifiedUtf8Hash(dex_file.GetStringData(str_id)); |
Vladimir Marko | ea341d2 | 2018-05-11 10:33:37 +0100 | [diff] [blame^] | 69 | // Find the last entry in the chain. |
| 70 | uint32_t tail_pos = hash & mask; |
| 71 | DCHECK(!entries[tail_pos].IsEmpty()); |
| 72 | while (!entries[tail_pos].IsLast(mask_bits)) { |
| 73 | tail_pos = (tail_pos + entries[tail_pos].GetNextPosDelta(mask_bits)) & mask; |
| 74 | DCHECK(!entries[tail_pos].IsEmpty()); |
| 75 | } |
| 76 | // Find an empty entry for insertion. |
| 77 | uint32_t insert_pos = tail_pos; |
| 78 | do { |
| 79 | insert_pos = (insert_pos + 1) & mask; |
| 80 | } while (!entries[insert_pos].IsEmpty()); |
| 81 | // Insert and chain the new entry. |
| 82 | entries[insert_pos] = Entry(str_id.string_data_off_, hash, class_def_idx, mask_bits); |
| 83 | entries[tail_pos].SetNextPosDelta((insert_pos - tail_pos) & mask, mask_bits); |
| 84 | DCHECK(entries[insert_pos].IsLast(mask_bits)); |
| 85 | DCHECK(!entries[tail_pos].IsLast(mask_bits)); |
Artem Udovichenko | d9786b0 | 2015-10-14 16:36:55 +0300 | [diff] [blame] | 86 | } |
Vladimir Marko | ea341d2 | 2018-05-11 10:33:37 +0100 | [diff] [blame^] | 87 | |
| 88 | return TypeLookupTable(dex_file.DataBegin(), mask_bits, entries, std::move(owned_entries)); |
Artem Udovichenko | d9786b0 | 2015-10-14 16:36:55 +0300 | [diff] [blame] | 89 | } |
| 90 | |
Vladimir Marko | ea341d2 | 2018-05-11 10:33:37 +0100 | [diff] [blame^] | 91 | TypeLookupTable TypeLookupTable::Open(const uint8_t* dex_data_pointer, |
| 92 | const uint8_t* raw_data, |
| 93 | uint32_t num_class_defs) { |
| 94 | DCHECK_ALIGNED(raw_data, alignof(Entry)); |
| 95 | const Entry* entries = reinterpret_cast<const Entry*>(raw_data); |
| 96 | size_t mask_bits = CalculateMaskBits(num_class_defs); |
| 97 | return TypeLookupTable(dex_data_pointer, mask_bits, entries, /* owned_entries */ nullptr); |
Artem Udovichenko | d9786b0 | 2015-10-14 16:36:55 +0300 | [diff] [blame] | 98 | } |
| 99 | |
Vladimir Marko | ea341d2 | 2018-05-11 10:33:37 +0100 | [diff] [blame^] | 100 | uint32_t TypeLookupTable::Lookup(const char* str, uint32_t hash) const { |
| 101 | uint32_t mask = Entry::GetMask(mask_bits_); |
| 102 | uint32_t pos = hash & mask; |
| 103 | // Thanks to special insertion algorithm, the element at position pos can be empty |
| 104 | // or start of the right bucket, or anywhere in the wrong bucket's chain. |
Artem Udovichenko | d9786b0 | 2015-10-14 16:36:55 +0300 | [diff] [blame] | 105 | const Entry* entry = &entries_[pos]; |
Vladimir Marko | ea341d2 | 2018-05-11 10:33:37 +0100 | [diff] [blame^] | 106 | if (entry->IsEmpty()) { |
| 107 | return dex::kDexNoIndex; |
Artem Udovichenko | d9786b0 | 2015-10-14 16:36:55 +0300 | [diff] [blame] | 108 | } |
Vladimir Marko | ea341d2 | 2018-05-11 10:33:37 +0100 | [diff] [blame^] | 109 | // Look for the partial hash match first, even if traversing the wrong bucket's chain. |
| 110 | uint32_t compared_hash_bits = (hash << mask_bits_) >> (2 * mask_bits_); |
| 111 | while (compared_hash_bits != entry->GetHashBits(mask_bits_)) { |
| 112 | if (entry->IsLast(mask_bits_)) { |
| 113 | return dex::kDexNoIndex; |
| 114 | } |
| 115 | pos = (pos + entry->GetNextPosDelta(mask_bits_)) & mask; |
| 116 | entry = &entries_[pos]; |
| 117 | DCHECK(!entry->IsEmpty()); |
| 118 | } |
| 119 | // Found partial hash match, compare strings (expecting this to succeed). |
| 120 | const char* first_checked_str = GetStringData(*entry); |
| 121 | if (ModifiedUtf8StringEquals(str, first_checked_str)) { |
| 122 | return entry->GetClassDefIdx(mask_bits_); |
| 123 | } |
| 124 | // If we're at the end of the chain, return before doing further expensive work. |
| 125 | if (entry->IsLast(mask_bits_)) { |
| 126 | return dex::kDexNoIndex; |
| 127 | } |
| 128 | // Check if we're traversing the right bucket. This is important if the compared |
| 129 | // partial hash has only a few bits (i.e. it can match frequently). |
| 130 | if (((ComputeModifiedUtf8Hash(first_checked_str) ^ hash) & mask) != 0u) { |
| 131 | return dex::kDexNoIndex; // Low hash bits mismatch. |
| 132 | } |
| 133 | // Continue looking for the string in the rest of the chain. |
| 134 | do { |
| 135 | pos = (pos + entry->GetNextPosDelta(mask_bits_)) & mask; |
| 136 | entry = &entries_[pos]; |
| 137 | DCHECK(!entry->IsEmpty()); |
| 138 | if (compared_hash_bits == entry->GetHashBits(mask_bits_) && |
| 139 | ModifiedUtf8StringEquals(str, GetStringData(*entry))) { |
| 140 | return entry->GetClassDefIdx(mask_bits_); |
| 141 | } |
| 142 | } while (!entry->IsLast(mask_bits_)); |
| 143 | // Not found. |
| 144 | return dex::kDexNoIndex; |
| 145 | } |
| 146 | |
| 147 | uint32_t TypeLookupTable::RawDataLength(uint32_t num_class_defs) { |
| 148 | return SupportedSize(num_class_defs) ? RoundUpToPowerOfTwo(num_class_defs) * sizeof(Entry) : 0u; |
| 149 | } |
| 150 | |
| 151 | uint32_t TypeLookupTable::CalculateMaskBits(uint32_t num_class_defs) { |
| 152 | return SupportedSize(num_class_defs) ? MinimumBitsToStore(num_class_defs - 1u) : 0u; |
| 153 | } |
| 154 | |
| 155 | bool TypeLookupTable::SupportedSize(uint32_t num_class_defs) { |
| 156 | return num_class_defs != 0u && num_class_defs <= std::numeric_limits<uint16_t>::max(); |
| 157 | } |
| 158 | |
| 159 | TypeLookupTable::TypeLookupTable(const uint8_t* dex_data_pointer, |
| 160 | uint32_t mask_bits, |
| 161 | const Entry* entries, |
| 162 | std::unique_ptr<Entry[]> owned_entries) |
| 163 | : dex_data_begin_(dex_data_pointer), |
| 164 | mask_bits_(mask_bits), |
| 165 | entries_(entries), |
| 166 | owned_entries_(std::move(owned_entries)) {} |
| 167 | |
| 168 | const char* TypeLookupTable::GetStringData(const Entry& entry) const { |
| 169 | DCHECK(dex_data_begin_ != nullptr); |
| 170 | const uint8_t* ptr = dex_data_begin_ + entry.GetStringOffset(); |
| 171 | // Skip string length. |
| 172 | DecodeUnsignedLeb128(&ptr); |
| 173 | return reinterpret_cast<const char*>(ptr); |
Artem Udovichenko | d9786b0 | 2015-10-14 16:36:55 +0300 | [diff] [blame] | 174 | } |
| 175 | |
| 176 | } // namespace art |