Alan Donovan | 312d1a5 | 2017-10-02 10:10:28 -0400 | [diff] [blame] | 1 | // Copyright 2017 The Bazel Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
Alan Donovan | e3deafe | 2018-10-23 11:05:09 -0400 | [diff] [blame] | 5 | package starlark |
Alan Donovan | 312d1a5 | 2017-10-02 10:10:28 -0400 | [diff] [blame] | 6 | |
alandonovan | f94b021 | 2018-10-12 15:31:48 -0400 | [diff] [blame] | 7 | import ( |
| 8 | "fmt" |
| 9 | _ "unsafe" // for go:linkname hack |
| 10 | ) |
Alan Donovan | 312d1a5 | 2017-10-02 10:10:28 -0400 | [diff] [blame] | 11 | |
Alan Donovan | e3deafe | 2018-10-23 11:05:09 -0400 | [diff] [blame] | 12 | // hashtable is used to represent Starlark dict and set values. |
Alan Donovan | 312d1a5 | 2017-10-02 10:10:28 -0400 | [diff] [blame] | 13 | // It is a hash table whose key/value entries form a doubly-linked list |
| 14 | // in the order the entries were inserted. |
| 15 | type hashtable struct { |
| 16 | table []bucket // len is zero or a power of two |
| 17 | bucket0 [1]bucket // inline allocation for small maps. |
| 18 | len uint32 |
| 19 | itercount uint32 // number of active iterators (ignored if frozen) |
| 20 | head *entry // insertion order doubly-linked list; may be nil |
| 21 | tailLink **entry // address of nil link at end of list (perhaps &head) |
| 22 | frozen bool |
| 23 | } |
| 24 | |
| 25 | const bucketSize = 8 |
| 26 | |
| 27 | type bucket struct { |
| 28 | entries [bucketSize]entry |
| 29 | next *bucket // linked list of buckets |
| 30 | } |
| 31 | |
| 32 | type entry struct { |
| 33 | hash uint32 // nonzero => in use |
| 34 | key, value Value |
| 35 | next *entry // insertion order doubly-linked list; may be nil |
| 36 | prevLink **entry // address of link to this entry (perhaps &head) |
| 37 | } |
| 38 | |
alandonovan | f83458d | 2019-04-02 20:34:11 -0400 | [diff] [blame] | 39 | func (ht *hashtable) init(size int) { |
| 40 | if size < 0 { |
| 41 | panic("size < 0") |
| 42 | } |
| 43 | nb := 1 |
| 44 | for overloaded(size, nb) { |
| 45 | nb = nb << 1 |
| 46 | } |
| 47 | if nb < 2 { |
| 48 | ht.table = ht.bucket0[:1] |
| 49 | } else { |
| 50 | ht.table = make([]bucket, nb) |
| 51 | } |
| 52 | ht.tailLink = &ht.head |
| 53 | } |
| 54 | |
Alan Donovan | 312d1a5 | 2017-10-02 10:10:28 -0400 | [diff] [blame] | 55 | func (ht *hashtable) freeze() { |
| 56 | if !ht.frozen { |
| 57 | ht.frozen = true |
| 58 | for i := range ht.table { |
| 59 | for p := &ht.table[i]; p != nil; p = p.next { |
| 60 | for i := range p.entries { |
| 61 | e := &p.entries[i] |
| 62 | if e.hash != 0 { |
| 63 | e.key.Freeze() |
| 64 | e.value.Freeze() |
| 65 | } |
| 66 | } |
| 67 | } |
| 68 | } |
| 69 | } |
| 70 | } |
| 71 | |
| 72 | func (ht *hashtable) insert(k, v Value) error { |
| 73 | if ht.frozen { |
| 74 | return fmt.Errorf("cannot insert into frozen hash table") |
| 75 | } |
| 76 | if ht.itercount > 0 { |
| 77 | return fmt.Errorf("cannot insert into hash table during iteration") |
| 78 | } |
| 79 | if ht.table == nil { |
alandonovan | f83458d | 2019-04-02 20:34:11 -0400 | [diff] [blame] | 80 | ht.init(1) |
Alan Donovan | 312d1a5 | 2017-10-02 10:10:28 -0400 | [diff] [blame] | 81 | } |
| 82 | h, err := k.Hash() |
| 83 | if err != nil { |
| 84 | return err |
| 85 | } |
| 86 | if h == 0 { |
| 87 | h = 1 // zero is reserved |
| 88 | } |
| 89 | |
| 90 | retry: |
| 91 | var insert *entry |
| 92 | |
| 93 | // Inspect each bucket in the bucket list. |
| 94 | p := &ht.table[h&(uint32(len(ht.table)-1))] |
| 95 | for { |
| 96 | for i := range p.entries { |
| 97 | e := &p.entries[i] |
| 98 | if e.hash != h { |
| 99 | if e.hash == 0 { |
| 100 | // Found empty entry; make a note. |
| 101 | insert = e |
| 102 | } |
| 103 | continue |
| 104 | } |
| 105 | if eq, err := Equal(k, e.key); err != nil { |
| 106 | return err // e.g. excessively recursive tuple |
| 107 | } else if !eq { |
| 108 | continue |
| 109 | } |
| 110 | // Key already present; update value. |
| 111 | e.value = v |
| 112 | return nil |
| 113 | } |
| 114 | if p.next == nil { |
| 115 | break |
| 116 | } |
| 117 | p = p.next |
| 118 | } |
| 119 | |
| 120 | // Key not found. p points to the last bucket. |
| 121 | |
| 122 | // Does the number of elements exceed the buckets' load factor? |
| 123 | if overloaded(int(ht.len), len(ht.table)) { |
| 124 | ht.grow() |
| 125 | goto retry |
| 126 | } |
| 127 | |
| 128 | if insert == nil { |
| 129 | // No space in existing buckets. Add a new one to the bucket list. |
| 130 | b := new(bucket) |
| 131 | p.next = b |
| 132 | insert = &b.entries[0] |
| 133 | } |
| 134 | |
| 135 | // Insert key/value pair. |
| 136 | insert.hash = h |
| 137 | insert.key = k |
| 138 | insert.value = v |
| 139 | |
| 140 | // Append entry to doubly-linked list. |
| 141 | insert.prevLink = ht.tailLink |
| 142 | *ht.tailLink = insert |
| 143 | ht.tailLink = &insert.next |
| 144 | |
| 145 | ht.len++ |
| 146 | |
| 147 | return nil |
| 148 | } |
| 149 | |
| 150 | func overloaded(elems, buckets int) bool { |
| 151 | const loadFactor = 6.5 // just a guess |
| 152 | return elems >= bucketSize && float64(elems) >= loadFactor*float64(buckets) |
| 153 | } |
| 154 | |
| 155 | func (ht *hashtable) grow() { |
| 156 | // Double the number of buckets and rehash. |
| 157 | // TODO(adonovan): opt: |
| 158 | // - avoid reentrant calls to ht.insert, and specialize it. |
| 159 | // e.g. we know the calls to Equals will return false since |
| 160 | // there are no duplicates among the old keys. |
| 161 | // - saving the entire hash in the bucket would avoid the need to |
| 162 | // recompute the hash. |
| 163 | // - save the old buckets on a free list. |
| 164 | ht.table = make([]bucket, len(ht.table)<<1) |
| 165 | oldhead := ht.head |
| 166 | ht.head = nil |
| 167 | ht.tailLink = &ht.head |
| 168 | ht.len = 0 |
| 169 | for e := oldhead; e != nil; e = e.next { |
| 170 | ht.insert(e.key, e.value) |
| 171 | } |
| 172 | ht.bucket0[0] = bucket{} // clear out unused initial bucket |
| 173 | } |
| 174 | |
| 175 | func (ht *hashtable) lookup(k Value) (v Value, found bool, err error) { |
| 176 | h, err := k.Hash() |
| 177 | if err != nil { |
| 178 | return nil, false, err // unhashable |
| 179 | } |
| 180 | if h == 0 { |
| 181 | h = 1 // zero is reserved |
| 182 | } |
| 183 | if ht.table == nil { |
| 184 | return None, false, nil // empty |
| 185 | } |
| 186 | |
| 187 | // Inspect each bucket in the bucket list. |
| 188 | for p := &ht.table[h&(uint32(len(ht.table)-1))]; p != nil; p = p.next { |
| 189 | for i := range p.entries { |
| 190 | e := &p.entries[i] |
| 191 | if e.hash == h { |
| 192 | if eq, err := Equal(k, e.key); err != nil { |
| 193 | return nil, false, err // e.g. excessively recursive tuple |
| 194 | } else if eq { |
| 195 | return e.value, true, nil // found |
| 196 | } |
| 197 | } |
| 198 | } |
| 199 | } |
| 200 | return None, false, nil // not found |
| 201 | } |
| 202 | |
| 203 | // Items returns all the items in the map (as key/value pairs) in insertion order. |
| 204 | func (ht *hashtable) items() []Tuple { |
| 205 | items := make([]Tuple, 0, ht.len) |
| 206 | array := make([]Value, ht.len*2) // allocate a single backing array |
| 207 | for e := ht.head; e != nil; e = e.next { |
| 208 | pair := Tuple(array[:2:2]) |
| 209 | array = array[2:] |
| 210 | pair[0] = e.key |
| 211 | pair[1] = e.value |
| 212 | items = append(items, pair) |
| 213 | } |
| 214 | return items |
| 215 | } |
| 216 | |
| 217 | func (ht *hashtable) first() (Value, bool) { |
| 218 | if ht.head != nil { |
| 219 | return ht.head.key, true |
| 220 | } |
| 221 | return None, false |
| 222 | } |
| 223 | |
| 224 | func (ht *hashtable) keys() []Value { |
| 225 | keys := make([]Value, 0, ht.len) |
| 226 | for e := ht.head; e != nil; e = e.next { |
| 227 | keys = append(keys, e.key) |
| 228 | } |
| 229 | return keys |
| 230 | } |
| 231 | |
| 232 | func (ht *hashtable) delete(k Value) (v Value, found bool, err error) { |
| 233 | if ht.frozen { |
| 234 | return nil, false, fmt.Errorf("cannot delete from frozen hash table") |
| 235 | } |
| 236 | if ht.itercount > 0 { |
| 237 | return nil, false, fmt.Errorf("cannot delete from hash table during iteration") |
| 238 | } |
| 239 | if ht.table == nil { |
| 240 | return None, false, nil // empty |
| 241 | } |
| 242 | h, err := k.Hash() |
| 243 | if err != nil { |
| 244 | return nil, false, err // unhashable |
| 245 | } |
| 246 | if h == 0 { |
| 247 | h = 1 // zero is reserved |
| 248 | } |
| 249 | |
| 250 | // Inspect each bucket in the bucket list. |
| 251 | for p := &ht.table[h&(uint32(len(ht.table)-1))]; p != nil; p = p.next { |
| 252 | for i := range p.entries { |
| 253 | e := &p.entries[i] |
| 254 | if e.hash == h { |
| 255 | if eq, err := Equal(k, e.key); err != nil { |
| 256 | return nil, false, err |
| 257 | } else if eq { |
| 258 | // Remove e from doubly-linked list. |
| 259 | *e.prevLink = e.next |
| 260 | if e.next == nil { |
| 261 | ht.tailLink = e.prevLink // deletion of last entry |
| 262 | } else { |
| 263 | e.next.prevLink = e.prevLink |
| 264 | } |
| 265 | |
| 266 | v := e.value |
| 267 | *e = entry{} |
| 268 | ht.len-- |
| 269 | return v, true, nil // found |
| 270 | } |
| 271 | } |
| 272 | } |
| 273 | } |
| 274 | |
| 275 | // TODO(adonovan): opt: remove completely empty bucket from bucket list. |
| 276 | |
| 277 | return None, false, nil // not found |
| 278 | } |
| 279 | |
| 280 | func (ht *hashtable) clear() error { |
| 281 | if ht.frozen { |
| 282 | return fmt.Errorf("cannot clear frozen hash table") |
| 283 | } |
| 284 | if ht.itercount > 0 { |
| 285 | return fmt.Errorf("cannot clear hash table during iteration") |
| 286 | } |
| 287 | if ht.table != nil { |
| 288 | for i := range ht.table { |
| 289 | ht.table[i] = bucket{} |
| 290 | } |
| 291 | } |
| 292 | ht.head = nil |
| 293 | ht.tailLink = &ht.head |
| 294 | ht.len = 0 |
| 295 | return nil |
| 296 | } |
| 297 | |
| 298 | // dump is provided as an aid to debugging. |
| 299 | func (ht *hashtable) dump() { |
| 300 | fmt.Printf("hashtable %p len=%d head=%p tailLink=%p", |
| 301 | ht, ht.len, ht.head, ht.tailLink) |
| 302 | if ht.tailLink != nil { |
| 303 | fmt.Printf(" *tailLink=%p", *ht.tailLink) |
| 304 | } |
| 305 | fmt.Println() |
| 306 | for j := range ht.table { |
| 307 | fmt.Printf("bucket chain %d\n", j) |
| 308 | for p := &ht.table[j]; p != nil; p = p.next { |
| 309 | fmt.Printf("bucket %p\n", p) |
| 310 | for i := range p.entries { |
| 311 | e := &p.entries[i] |
| 312 | fmt.Printf("\tentry %d @ %p hash=%d key=%v value=%v\n", |
| 313 | i, e, e.hash, e.key, e.value) |
| 314 | fmt.Printf("\t\tnext=%p &next=%p prev=%p", |
| 315 | e.next, &e.next, e.prevLink) |
| 316 | if e.prevLink != nil { |
| 317 | fmt.Printf(" *prev=%p", *e.prevLink) |
| 318 | } |
| 319 | fmt.Println() |
| 320 | } |
| 321 | } |
| 322 | } |
| 323 | } |
| 324 | |
| 325 | func (ht *hashtable) iterate() *keyIterator { |
| 326 | if !ht.frozen { |
| 327 | ht.itercount++ |
| 328 | } |
| 329 | return &keyIterator{ht: ht, e: ht.head} |
| 330 | } |
| 331 | |
| 332 | type keyIterator struct { |
| 333 | ht *hashtable |
| 334 | e *entry |
| 335 | } |
| 336 | |
| 337 | func (it *keyIterator) Next(k *Value) bool { |
| 338 | if it.e != nil { |
| 339 | *k = it.e.key |
| 340 | it.e = it.e.next |
| 341 | return true |
| 342 | } |
| 343 | return false |
| 344 | } |
| 345 | |
| 346 | func (it *keyIterator) Done() { |
| 347 | if !it.ht.frozen { |
| 348 | it.ht.itercount-- |
| 349 | } |
| 350 | } |
| 351 | |
alandonovan | f94b021 | 2018-10-12 15:31:48 -0400 | [diff] [blame] | 352 | // hashString computes the hash of s. |
Alan Donovan | 312d1a5 | 2017-10-02 10:10:28 -0400 | [diff] [blame] | 353 | func hashString(s string) uint32 { |
alandonovan | f94b021 | 2018-10-12 15:31:48 -0400 | [diff] [blame] | 354 | if len(s) >= 12 { |
| 355 | // Call the Go runtime's optimized hash implementation, |
| 356 | // which uses the AESENC instruction on amd64 machines. |
| 357 | return uint32(goStringHash(s, 0)) |
| 358 | } |
| 359 | return softHashString(s) |
| 360 | } |
| 361 | |
| 362 | //go:linkname goStringHash runtime.stringHash |
| 363 | func goStringHash(s string, seed uintptr) uintptr |
| 364 | |
alandonovan | ebe61bd | 2021-02-12 16:57:32 -0500 | [diff] [blame] | 365 | // softHashString computes the 32-bit FNV-1a hash of s in software. |
alandonovan | f94b021 | 2018-10-12 15:31:48 -0400 | [diff] [blame] | 366 | func softHashString(s string) uint32 { |
alandonovan | ebe61bd | 2021-02-12 16:57:32 -0500 | [diff] [blame] | 367 | var h uint32 = 2166136261 |
Alan Donovan | 312d1a5 | 2017-10-02 10:10:28 -0400 | [diff] [blame] | 368 | for i := 0; i < len(s); i++ { |
| 369 | h ^= uint32(s[i]) |
| 370 | h *= 16777619 |
| 371 | } |
| 372 | return h |
| 373 | } |