blob: 27990b5496dc326c55e807686fc1a7c0dc9d8b0d [file] [log] [blame]
Alan Donovan312d1a52017-10-02 10:10:28 -04001// Copyright 2017 The Bazel Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
Alan Donovane3deafe2018-10-23 11:05:09 -04005package starlark
Alan Donovan312d1a52017-10-02 10:10:28 -04006
alandonovanf94b0212018-10-12 15:31:48 -04007import (
8 "fmt"
9 _ "unsafe" // for go:linkname hack
10)
Alan Donovan312d1a52017-10-02 10:10:28 -040011
Alan Donovane3deafe2018-10-23 11:05:09 -040012// hashtable is used to represent Starlark dict and set values.
Alan Donovan312d1a52017-10-02 10:10:28 -040013// It is a hash table whose key/value entries form a doubly-linked list
14// in the order the entries were inserted.
15type hashtable struct {
16 table []bucket // len is zero or a power of two
17 bucket0 [1]bucket // inline allocation for small maps.
18 len uint32
19 itercount uint32 // number of active iterators (ignored if frozen)
20 head *entry // insertion order doubly-linked list; may be nil
21 tailLink **entry // address of nil link at end of list (perhaps &head)
22 frozen bool
23}
24
25const bucketSize = 8
26
27type bucket struct {
28 entries [bucketSize]entry
29 next *bucket // linked list of buckets
30}
31
32type entry struct {
33 hash uint32 // nonzero => in use
34 key, value Value
35 next *entry // insertion order doubly-linked list; may be nil
36 prevLink **entry // address of link to this entry (perhaps &head)
37}
38
alandonovanf83458d2019-04-02 20:34:11 -040039func (ht *hashtable) init(size int) {
40 if size < 0 {
41 panic("size < 0")
42 }
43 nb := 1
44 for overloaded(size, nb) {
45 nb = nb << 1
46 }
47 if nb < 2 {
48 ht.table = ht.bucket0[:1]
49 } else {
50 ht.table = make([]bucket, nb)
51 }
52 ht.tailLink = &ht.head
53}
54
Alan Donovan312d1a52017-10-02 10:10:28 -040055func (ht *hashtable) freeze() {
56 if !ht.frozen {
57 ht.frozen = true
58 for i := range ht.table {
59 for p := &ht.table[i]; p != nil; p = p.next {
60 for i := range p.entries {
61 e := &p.entries[i]
62 if e.hash != 0 {
63 e.key.Freeze()
64 e.value.Freeze()
65 }
66 }
67 }
68 }
69 }
70}
71
72func (ht *hashtable) insert(k, v Value) error {
73 if ht.frozen {
74 return fmt.Errorf("cannot insert into frozen hash table")
75 }
76 if ht.itercount > 0 {
77 return fmt.Errorf("cannot insert into hash table during iteration")
78 }
79 if ht.table == nil {
alandonovanf83458d2019-04-02 20:34:11 -040080 ht.init(1)
Alan Donovan312d1a52017-10-02 10:10:28 -040081 }
82 h, err := k.Hash()
83 if err != nil {
84 return err
85 }
86 if h == 0 {
87 h = 1 // zero is reserved
88 }
89
90retry:
91 var insert *entry
92
93 // Inspect each bucket in the bucket list.
94 p := &ht.table[h&(uint32(len(ht.table)-1))]
95 for {
96 for i := range p.entries {
97 e := &p.entries[i]
98 if e.hash != h {
99 if e.hash == 0 {
100 // Found empty entry; make a note.
101 insert = e
102 }
103 continue
104 }
105 if eq, err := Equal(k, e.key); err != nil {
106 return err // e.g. excessively recursive tuple
107 } else if !eq {
108 continue
109 }
110 // Key already present; update value.
111 e.value = v
112 return nil
113 }
114 if p.next == nil {
115 break
116 }
117 p = p.next
118 }
119
120 // Key not found. p points to the last bucket.
121
122 // Does the number of elements exceed the buckets' load factor?
123 if overloaded(int(ht.len), len(ht.table)) {
124 ht.grow()
125 goto retry
126 }
127
128 if insert == nil {
129 // No space in existing buckets. Add a new one to the bucket list.
130 b := new(bucket)
131 p.next = b
132 insert = &b.entries[0]
133 }
134
135 // Insert key/value pair.
136 insert.hash = h
137 insert.key = k
138 insert.value = v
139
140 // Append entry to doubly-linked list.
141 insert.prevLink = ht.tailLink
142 *ht.tailLink = insert
143 ht.tailLink = &insert.next
144
145 ht.len++
146
147 return nil
148}
149
150func overloaded(elems, buckets int) bool {
151 const loadFactor = 6.5 // just a guess
152 return elems >= bucketSize && float64(elems) >= loadFactor*float64(buckets)
153}
154
155func (ht *hashtable) grow() {
156 // Double the number of buckets and rehash.
157 // TODO(adonovan): opt:
158 // - avoid reentrant calls to ht.insert, and specialize it.
159 // e.g. we know the calls to Equals will return false since
160 // there are no duplicates among the old keys.
161 // - saving the entire hash in the bucket would avoid the need to
162 // recompute the hash.
163 // - save the old buckets on a free list.
164 ht.table = make([]bucket, len(ht.table)<<1)
165 oldhead := ht.head
166 ht.head = nil
167 ht.tailLink = &ht.head
168 ht.len = 0
169 for e := oldhead; e != nil; e = e.next {
170 ht.insert(e.key, e.value)
171 }
172 ht.bucket0[0] = bucket{} // clear out unused initial bucket
173}
174
175func (ht *hashtable) lookup(k Value) (v Value, found bool, err error) {
176 h, err := k.Hash()
177 if err != nil {
178 return nil, false, err // unhashable
179 }
180 if h == 0 {
181 h = 1 // zero is reserved
182 }
183 if ht.table == nil {
184 return None, false, nil // empty
185 }
186
187 // Inspect each bucket in the bucket list.
188 for p := &ht.table[h&(uint32(len(ht.table)-1))]; p != nil; p = p.next {
189 for i := range p.entries {
190 e := &p.entries[i]
191 if e.hash == h {
192 if eq, err := Equal(k, e.key); err != nil {
193 return nil, false, err // e.g. excessively recursive tuple
194 } else if eq {
195 return e.value, true, nil // found
196 }
197 }
198 }
199 }
200 return None, false, nil // not found
201}
202
203// Items returns all the items in the map (as key/value pairs) in insertion order.
204func (ht *hashtable) items() []Tuple {
205 items := make([]Tuple, 0, ht.len)
206 array := make([]Value, ht.len*2) // allocate a single backing array
207 for e := ht.head; e != nil; e = e.next {
208 pair := Tuple(array[:2:2])
209 array = array[2:]
210 pair[0] = e.key
211 pair[1] = e.value
212 items = append(items, pair)
213 }
214 return items
215}
216
217func (ht *hashtable) first() (Value, bool) {
218 if ht.head != nil {
219 return ht.head.key, true
220 }
221 return None, false
222}
223
224func (ht *hashtable) keys() []Value {
225 keys := make([]Value, 0, ht.len)
226 for e := ht.head; e != nil; e = e.next {
227 keys = append(keys, e.key)
228 }
229 return keys
230}
231
232func (ht *hashtable) delete(k Value) (v Value, found bool, err error) {
233 if ht.frozen {
234 return nil, false, fmt.Errorf("cannot delete from frozen hash table")
235 }
236 if ht.itercount > 0 {
237 return nil, false, fmt.Errorf("cannot delete from hash table during iteration")
238 }
239 if ht.table == nil {
240 return None, false, nil // empty
241 }
242 h, err := k.Hash()
243 if err != nil {
244 return nil, false, err // unhashable
245 }
246 if h == 0 {
247 h = 1 // zero is reserved
248 }
249
250 // Inspect each bucket in the bucket list.
251 for p := &ht.table[h&(uint32(len(ht.table)-1))]; p != nil; p = p.next {
252 for i := range p.entries {
253 e := &p.entries[i]
254 if e.hash == h {
255 if eq, err := Equal(k, e.key); err != nil {
256 return nil, false, err
257 } else if eq {
258 // Remove e from doubly-linked list.
259 *e.prevLink = e.next
260 if e.next == nil {
261 ht.tailLink = e.prevLink // deletion of last entry
262 } else {
263 e.next.prevLink = e.prevLink
264 }
265
266 v := e.value
267 *e = entry{}
268 ht.len--
269 return v, true, nil // found
270 }
271 }
272 }
273 }
274
275 // TODO(adonovan): opt: remove completely empty bucket from bucket list.
276
277 return None, false, nil // not found
278}
279
280func (ht *hashtable) clear() error {
281 if ht.frozen {
282 return fmt.Errorf("cannot clear frozen hash table")
283 }
284 if ht.itercount > 0 {
285 return fmt.Errorf("cannot clear hash table during iteration")
286 }
287 if ht.table != nil {
288 for i := range ht.table {
289 ht.table[i] = bucket{}
290 }
291 }
292 ht.head = nil
293 ht.tailLink = &ht.head
294 ht.len = 0
295 return nil
296}
297
298// dump is provided as an aid to debugging.
299func (ht *hashtable) dump() {
300 fmt.Printf("hashtable %p len=%d head=%p tailLink=%p",
301 ht, ht.len, ht.head, ht.tailLink)
302 if ht.tailLink != nil {
303 fmt.Printf(" *tailLink=%p", *ht.tailLink)
304 }
305 fmt.Println()
306 for j := range ht.table {
307 fmt.Printf("bucket chain %d\n", j)
308 for p := &ht.table[j]; p != nil; p = p.next {
309 fmt.Printf("bucket %p\n", p)
310 for i := range p.entries {
311 e := &p.entries[i]
312 fmt.Printf("\tentry %d @ %p hash=%d key=%v value=%v\n",
313 i, e, e.hash, e.key, e.value)
314 fmt.Printf("\t\tnext=%p &next=%p prev=%p",
315 e.next, &e.next, e.prevLink)
316 if e.prevLink != nil {
317 fmt.Printf(" *prev=%p", *e.prevLink)
318 }
319 fmt.Println()
320 }
321 }
322 }
323}
324
325func (ht *hashtable) iterate() *keyIterator {
326 if !ht.frozen {
327 ht.itercount++
328 }
329 return &keyIterator{ht: ht, e: ht.head}
330}
331
332type keyIterator struct {
333 ht *hashtable
334 e *entry
335}
336
337func (it *keyIterator) Next(k *Value) bool {
338 if it.e != nil {
339 *k = it.e.key
340 it.e = it.e.next
341 return true
342 }
343 return false
344}
345
346func (it *keyIterator) Done() {
347 if !it.ht.frozen {
348 it.ht.itercount--
349 }
350}
351
alandonovanf94b0212018-10-12 15:31:48 -0400352// hashString computes the hash of s.
Alan Donovan312d1a52017-10-02 10:10:28 -0400353func hashString(s string) uint32 {
alandonovanf94b0212018-10-12 15:31:48 -0400354 if len(s) >= 12 {
355 // Call the Go runtime's optimized hash implementation,
356 // which uses the AESENC instruction on amd64 machines.
357 return uint32(goStringHash(s, 0))
358 }
359 return softHashString(s)
360}
361
362//go:linkname goStringHash runtime.stringHash
363func goStringHash(s string, seed uintptr) uintptr
364
alandonovanebe61bd2021-02-12 16:57:32 -0500365// softHashString computes the 32-bit FNV-1a hash of s in software.
alandonovanf94b0212018-10-12 15:31:48 -0400366func softHashString(s string) uint32 {
alandonovanebe61bd2021-02-12 16:57:32 -0500367 var h uint32 = 2166136261
Alan Donovan312d1a52017-10-02 10:10:28 -0400368 for i := 0; i < len(s); i++ {
369 h ^= uint32(s[i])
370 h *= 16777619
371 }
372 return h
373}