Alan Donovan | 312d1a5 | 2017-10-02 10:10:28 -0400 | [diff] [blame] | 1 | // Copyright 2017 The Bazel Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
| 5 | package skylark |
| 6 | |
| 7 | import "fmt" |
| 8 | |
| 9 | // hashtable is used to represent Skylark dict and set values. |
| 10 | // It is a hash table whose key/value entries form a doubly-linked list |
| 11 | // in the order the entries were inserted. |
| 12 | type hashtable struct { |
| 13 | table []bucket // len is zero or a power of two |
| 14 | bucket0 [1]bucket // inline allocation for small maps. |
| 15 | len uint32 |
| 16 | itercount uint32 // number of active iterators (ignored if frozen) |
| 17 | head *entry // insertion order doubly-linked list; may be nil |
| 18 | tailLink **entry // address of nil link at end of list (perhaps &head) |
| 19 | frozen bool |
| 20 | } |
| 21 | |
| 22 | const bucketSize = 8 |
| 23 | |
| 24 | type bucket struct { |
| 25 | entries [bucketSize]entry |
| 26 | next *bucket // linked list of buckets |
| 27 | } |
| 28 | |
| 29 | type entry struct { |
| 30 | hash uint32 // nonzero => in use |
| 31 | key, value Value |
| 32 | next *entry // insertion order doubly-linked list; may be nil |
| 33 | prevLink **entry // address of link to this entry (perhaps &head) |
| 34 | } |
| 35 | |
| 36 | func (ht *hashtable) freeze() { |
| 37 | if !ht.frozen { |
| 38 | ht.frozen = true |
| 39 | for i := range ht.table { |
| 40 | for p := &ht.table[i]; p != nil; p = p.next { |
| 41 | for i := range p.entries { |
| 42 | e := &p.entries[i] |
| 43 | if e.hash != 0 { |
| 44 | e.key.Freeze() |
| 45 | e.value.Freeze() |
| 46 | } |
| 47 | } |
| 48 | } |
| 49 | } |
| 50 | } |
| 51 | } |
| 52 | |
| 53 | func (ht *hashtable) insert(k, v Value) error { |
| 54 | if ht.frozen { |
| 55 | return fmt.Errorf("cannot insert into frozen hash table") |
| 56 | } |
| 57 | if ht.itercount > 0 { |
| 58 | return fmt.Errorf("cannot insert into hash table during iteration") |
| 59 | } |
| 60 | if ht.table == nil { |
| 61 | ht.table = ht.bucket0[:1] |
| 62 | ht.tailLink = &ht.head |
| 63 | } |
| 64 | h, err := k.Hash() |
| 65 | if err != nil { |
| 66 | return err |
| 67 | } |
| 68 | if h == 0 { |
| 69 | h = 1 // zero is reserved |
| 70 | } |
| 71 | |
| 72 | retry: |
| 73 | var insert *entry |
| 74 | |
| 75 | // Inspect each bucket in the bucket list. |
| 76 | p := &ht.table[h&(uint32(len(ht.table)-1))] |
| 77 | for { |
| 78 | for i := range p.entries { |
| 79 | e := &p.entries[i] |
| 80 | if e.hash != h { |
| 81 | if e.hash == 0 { |
| 82 | // Found empty entry; make a note. |
| 83 | insert = e |
| 84 | } |
| 85 | continue |
| 86 | } |
| 87 | if eq, err := Equal(k, e.key); err != nil { |
| 88 | return err // e.g. excessively recursive tuple |
| 89 | } else if !eq { |
| 90 | continue |
| 91 | } |
| 92 | // Key already present; update value. |
| 93 | e.value = v |
| 94 | return nil |
| 95 | } |
| 96 | if p.next == nil { |
| 97 | break |
| 98 | } |
| 99 | p = p.next |
| 100 | } |
| 101 | |
| 102 | // Key not found. p points to the last bucket. |
| 103 | |
| 104 | // Does the number of elements exceed the buckets' load factor? |
| 105 | if overloaded(int(ht.len), len(ht.table)) { |
| 106 | ht.grow() |
| 107 | goto retry |
| 108 | } |
| 109 | |
| 110 | if insert == nil { |
| 111 | // No space in existing buckets. Add a new one to the bucket list. |
| 112 | b := new(bucket) |
| 113 | p.next = b |
| 114 | insert = &b.entries[0] |
| 115 | } |
| 116 | |
| 117 | // Insert key/value pair. |
| 118 | insert.hash = h |
| 119 | insert.key = k |
| 120 | insert.value = v |
| 121 | |
| 122 | // Append entry to doubly-linked list. |
| 123 | insert.prevLink = ht.tailLink |
| 124 | *ht.tailLink = insert |
| 125 | ht.tailLink = &insert.next |
| 126 | |
| 127 | ht.len++ |
| 128 | |
| 129 | return nil |
| 130 | } |
| 131 | |
| 132 | func overloaded(elems, buckets int) bool { |
| 133 | const loadFactor = 6.5 // just a guess |
| 134 | return elems >= bucketSize && float64(elems) >= loadFactor*float64(buckets) |
| 135 | } |
| 136 | |
| 137 | func (ht *hashtable) grow() { |
| 138 | // Double the number of buckets and rehash. |
| 139 | // TODO(adonovan): opt: |
| 140 | // - avoid reentrant calls to ht.insert, and specialize it. |
| 141 | // e.g. we know the calls to Equals will return false since |
| 142 | // there are no duplicates among the old keys. |
| 143 | // - saving the entire hash in the bucket would avoid the need to |
| 144 | // recompute the hash. |
| 145 | // - save the old buckets on a free list. |
| 146 | ht.table = make([]bucket, len(ht.table)<<1) |
| 147 | oldhead := ht.head |
| 148 | ht.head = nil |
| 149 | ht.tailLink = &ht.head |
| 150 | ht.len = 0 |
| 151 | for e := oldhead; e != nil; e = e.next { |
| 152 | ht.insert(e.key, e.value) |
| 153 | } |
| 154 | ht.bucket0[0] = bucket{} // clear out unused initial bucket |
| 155 | } |
| 156 | |
| 157 | func (ht *hashtable) lookup(k Value) (v Value, found bool, err error) { |
| 158 | h, err := k.Hash() |
| 159 | if err != nil { |
| 160 | return nil, false, err // unhashable |
| 161 | } |
| 162 | if h == 0 { |
| 163 | h = 1 // zero is reserved |
| 164 | } |
| 165 | if ht.table == nil { |
| 166 | return None, false, nil // empty |
| 167 | } |
| 168 | |
| 169 | // Inspect each bucket in the bucket list. |
| 170 | for p := &ht.table[h&(uint32(len(ht.table)-1))]; p != nil; p = p.next { |
| 171 | for i := range p.entries { |
| 172 | e := &p.entries[i] |
| 173 | if e.hash == h { |
| 174 | if eq, err := Equal(k, e.key); err != nil { |
| 175 | return nil, false, err // e.g. excessively recursive tuple |
| 176 | } else if eq { |
| 177 | return e.value, true, nil // found |
| 178 | } |
| 179 | } |
| 180 | } |
| 181 | } |
| 182 | return None, false, nil // not found |
| 183 | } |
| 184 | |
| 185 | // Items returns all the items in the map (as key/value pairs) in insertion order. |
| 186 | func (ht *hashtable) items() []Tuple { |
| 187 | items := make([]Tuple, 0, ht.len) |
| 188 | array := make([]Value, ht.len*2) // allocate a single backing array |
| 189 | for e := ht.head; e != nil; e = e.next { |
| 190 | pair := Tuple(array[:2:2]) |
| 191 | array = array[2:] |
| 192 | pair[0] = e.key |
| 193 | pair[1] = e.value |
| 194 | items = append(items, pair) |
| 195 | } |
| 196 | return items |
| 197 | } |
| 198 | |
| 199 | func (ht *hashtable) first() (Value, bool) { |
| 200 | if ht.head != nil { |
| 201 | return ht.head.key, true |
| 202 | } |
| 203 | return None, false |
| 204 | } |
| 205 | |
| 206 | func (ht *hashtable) keys() []Value { |
| 207 | keys := make([]Value, 0, ht.len) |
| 208 | for e := ht.head; e != nil; e = e.next { |
| 209 | keys = append(keys, e.key) |
| 210 | } |
| 211 | return keys |
| 212 | } |
| 213 | |
| 214 | func (ht *hashtable) delete(k Value) (v Value, found bool, err error) { |
| 215 | if ht.frozen { |
| 216 | return nil, false, fmt.Errorf("cannot delete from frozen hash table") |
| 217 | } |
| 218 | if ht.itercount > 0 { |
| 219 | return nil, false, fmt.Errorf("cannot delete from hash table during iteration") |
| 220 | } |
| 221 | if ht.table == nil { |
| 222 | return None, false, nil // empty |
| 223 | } |
| 224 | h, err := k.Hash() |
| 225 | if err != nil { |
| 226 | return nil, false, err // unhashable |
| 227 | } |
| 228 | if h == 0 { |
| 229 | h = 1 // zero is reserved |
| 230 | } |
| 231 | |
| 232 | // Inspect each bucket in the bucket list. |
| 233 | for p := &ht.table[h&(uint32(len(ht.table)-1))]; p != nil; p = p.next { |
| 234 | for i := range p.entries { |
| 235 | e := &p.entries[i] |
| 236 | if e.hash == h { |
| 237 | if eq, err := Equal(k, e.key); err != nil { |
| 238 | return nil, false, err |
| 239 | } else if eq { |
| 240 | // Remove e from doubly-linked list. |
| 241 | *e.prevLink = e.next |
| 242 | if e.next == nil { |
| 243 | ht.tailLink = e.prevLink // deletion of last entry |
| 244 | } else { |
| 245 | e.next.prevLink = e.prevLink |
| 246 | } |
| 247 | |
| 248 | v := e.value |
| 249 | *e = entry{} |
| 250 | ht.len-- |
| 251 | return v, true, nil // found |
| 252 | } |
| 253 | } |
| 254 | } |
| 255 | } |
| 256 | |
| 257 | // TODO(adonovan): opt: remove completely empty bucket from bucket list. |
| 258 | |
| 259 | return None, false, nil // not found |
| 260 | } |
| 261 | |
| 262 | func (ht *hashtable) clear() error { |
| 263 | if ht.frozen { |
| 264 | return fmt.Errorf("cannot clear frozen hash table") |
| 265 | } |
| 266 | if ht.itercount > 0 { |
| 267 | return fmt.Errorf("cannot clear hash table during iteration") |
| 268 | } |
| 269 | if ht.table != nil { |
| 270 | for i := range ht.table { |
| 271 | ht.table[i] = bucket{} |
| 272 | } |
| 273 | } |
| 274 | ht.head = nil |
| 275 | ht.tailLink = &ht.head |
| 276 | ht.len = 0 |
| 277 | return nil |
| 278 | } |
| 279 | |
| 280 | // dump is provided as an aid to debugging. |
| 281 | func (ht *hashtable) dump() { |
| 282 | fmt.Printf("hashtable %p len=%d head=%p tailLink=%p", |
| 283 | ht, ht.len, ht.head, ht.tailLink) |
| 284 | if ht.tailLink != nil { |
| 285 | fmt.Printf(" *tailLink=%p", *ht.tailLink) |
| 286 | } |
| 287 | fmt.Println() |
| 288 | for j := range ht.table { |
| 289 | fmt.Printf("bucket chain %d\n", j) |
| 290 | for p := &ht.table[j]; p != nil; p = p.next { |
| 291 | fmt.Printf("bucket %p\n", p) |
| 292 | for i := range p.entries { |
| 293 | e := &p.entries[i] |
| 294 | fmt.Printf("\tentry %d @ %p hash=%d key=%v value=%v\n", |
| 295 | i, e, e.hash, e.key, e.value) |
| 296 | fmt.Printf("\t\tnext=%p &next=%p prev=%p", |
| 297 | e.next, &e.next, e.prevLink) |
| 298 | if e.prevLink != nil { |
| 299 | fmt.Printf(" *prev=%p", *e.prevLink) |
| 300 | } |
| 301 | fmt.Println() |
| 302 | } |
| 303 | } |
| 304 | } |
| 305 | } |
| 306 | |
| 307 | func (ht *hashtable) iterate() *keyIterator { |
| 308 | if !ht.frozen { |
| 309 | ht.itercount++ |
| 310 | } |
| 311 | return &keyIterator{ht: ht, e: ht.head} |
| 312 | } |
| 313 | |
| 314 | type keyIterator struct { |
| 315 | ht *hashtable |
| 316 | e *entry |
| 317 | } |
| 318 | |
| 319 | func (it *keyIterator) Next(k *Value) bool { |
| 320 | if it.e != nil { |
| 321 | *k = it.e.key |
| 322 | it.e = it.e.next |
| 323 | return true |
| 324 | } |
| 325 | return false |
| 326 | } |
| 327 | |
| 328 | func (it *keyIterator) Done() { |
| 329 | if !it.ht.frozen { |
| 330 | it.ht.itercount-- |
| 331 | } |
| 332 | } |
| 333 | |
| 334 | // hashString computes the FNV hash of s. |
| 335 | func hashString(s string) uint32 { |
| 336 | var h uint32 |
| 337 | for i := 0; i < len(s); i++ { |
| 338 | h ^= uint32(s[i]) |
| 339 | h *= 16777619 |
| 340 | } |
| 341 | return h |
| 342 | } |