blob: ccefbcd8d62df23e3b08b051fd013d846cefcf9d [file] [log] [blame]
Dan Willemsen38f2dba2016-07-08 14:54:35 -07001// Copyright 2009 The Go Authors. All rights reserved.
Dan Willemsen09eb3b12015-09-16 14:34:17 -07002// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Garbage collector: type and heap bitmaps.
6//
7// Stack, data, and bss bitmaps
8//
9// Stack frames and global variables in the data and bss sections are described
10// by 1-bit bitmaps in which 0 means uninteresting and 1 means live pointer
11// to be visited during GC. The bits in each byte are consumed starting with
12// the low bit: 1<<0, 1<<1, and so on.
13//
14// Heap bitmap
15//
16// The allocated heap comes from a subset of the memory in the range [start, used),
17// where start == mheap_.arena_start and used == mheap_.arena_used.
18// The heap bitmap comprises 2 bits for each pointer-sized word in that range,
19// stored in bytes indexed backward in memory from start.
20// That is, the byte at address start-1 holds the 2-bit entries for the four words
21// start through start+3*ptrSize, the byte at start-2 holds the entries for
22// start+4*ptrSize through start+7*ptrSize, and so on.
23//
24// In each 2-bit entry, the lower bit holds the same information as in the 1-bit
25// bitmaps: 0 means uninteresting and 1 means live pointer to be visited during GC.
26// The meaning of the high bit depends on the position of the word being described
Dan Willemsen38f2dba2016-07-08 14:54:35 -070027// in its allocated object. In all words *except* the second word, the
28// high bit indicates that the object is still being described. In
29// these words, if a bit pair with a high bit 0 is encountered, the
30// low bit can also be assumed to be 0, and the object description is
31// over. This 00 is called the ``dead'' encoding: it signals that the
32// rest of the words in the object are uninteresting to the garbage
33// collector.
34//
Dan Willemsen09eb3b12015-09-16 14:34:17 -070035// In the second word, the high bit is the GC ``checkmarked'' bit (see below).
Dan Willemsen09eb3b12015-09-16 14:34:17 -070036//
37// The 2-bit entries are split when written into the byte, so that the top half
Dan Willemsen38f2dba2016-07-08 14:54:35 -070038// of the byte contains 4 high bits and the bottom half contains 4 low (pointer)
39// bits.
Dan Willemsen09eb3b12015-09-16 14:34:17 -070040// This form allows a copy from the 1-bit to the 4-bit form to keep the
41// pointer bits contiguous, instead of having to space them out.
42//
43// The code makes use of the fact that the zero value for a heap bitmap
Dan Willemsen38f2dba2016-07-08 14:54:35 -070044// has no live pointer bit set and is (depending on position), not used,
Dan Willemsen09eb3b12015-09-16 14:34:17 -070045// not checkmarked, and is the dead encoding.
46// These properties must be preserved when modifying the encoding.
47//
48// Checkmarks
49//
50// In a concurrent garbage collector, one worries about failing to mark
51// a live object due to mutations without write barriers or bugs in the
52// collector implementation. As a sanity check, the GC has a 'checkmark'
53// mode that retraverses the object graph with the world stopped, to make
54// sure that everything that should be marked is marked.
55// In checkmark mode, in the heap bitmap, the high bit of the 2-bit entry
56// for the second word of the object holds the checkmark bit.
57// When not in checkmark mode, this bit is set to 1.
58//
59// The smallest possible allocation is 8 bytes. On a 32-bit machine, that
60// means every allocated object has two words, so there is room for the
61// checkmark bit. On a 64-bit machine, however, the 8-byte allocation is
62// just one word, so the second bit pair is not available for encoding the
63// checkmark. However, because non-pointer allocations are combined
64// into larger 16-byte (maxTinySize) allocations, a plain 8-byte allocation
65// must be a pointer, so the type bit in the first word is not actually needed.
66// It is still used in general, except in checkmark the type bit is repurposed
67// as the checkmark bit and then reinitialized (to 1) as the type bit when
68// finished.
Dan Willemsen38f2dba2016-07-08 14:54:35 -070069//
Dan Willemsen09eb3b12015-09-16 14:34:17 -070070
71package runtime
72
Dan Willemsen38f2dba2016-07-08 14:54:35 -070073import (
74 "runtime/internal/atomic"
75 "runtime/internal/sys"
76 "unsafe"
77)
Dan Willemsen09eb3b12015-09-16 14:34:17 -070078
79const (
80 bitPointer = 1 << 0
Dan Willemsen38f2dba2016-07-08 14:54:35 -070081 bitMarked = 1 << 4 // TODO: Rename bitScan.
Dan Willemsen09eb3b12015-09-16 14:34:17 -070082
Dan Willemsen38f2dba2016-07-08 14:54:35 -070083 heapBitsShift = 1 // shift offset between successive bitPointer or bitMarked entries
84 heapBitmapScale = sys.PtrSize * (8 / 2) // number of data bytes described by one heap bitmap byte
Dan Willemsen09eb3b12015-09-16 14:34:17 -070085
86 // all mark/pointer bits in a byte
87 bitMarkedAll = bitMarked | bitMarked<<heapBitsShift | bitMarked<<(2*heapBitsShift) | bitMarked<<(3*heapBitsShift)
88 bitPointerAll = bitPointer | bitPointer<<heapBitsShift | bitPointer<<(2*heapBitsShift) | bitPointer<<(3*heapBitsShift)
89)
90
91// addb returns the byte pointer p+n.
92//go:nowritebarrier
Dan Willemsen38f2dba2016-07-08 14:54:35 -070093//go:nosplit
Dan Willemsen09eb3b12015-09-16 14:34:17 -070094func addb(p *byte, n uintptr) *byte {
95 // Note: wrote out full expression instead of calling add(p, n)
96 // to reduce the number of temporaries generated by the
97 // compiler for this trivial expression during inlining.
98 return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + n))
99}
100
101// subtractb returns the byte pointer p-n.
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700102// subtractb is typically used when traversing the pointer tables referred to by hbits
103// which are arranged in reverse order.
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700104//go:nowritebarrier
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700105//go:nosplit
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700106func subtractb(p *byte, n uintptr) *byte {
107 // Note: wrote out full expression instead of calling add(p, -n)
108 // to reduce the number of temporaries generated by the
109 // compiler for this trivial expression during inlining.
110 return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) - n))
111}
112
113// add1 returns the byte pointer p+1.
114//go:nowritebarrier
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700115//go:nosplit
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700116func add1(p *byte) *byte {
117 // Note: wrote out full expression instead of calling addb(p, 1)
118 // to reduce the number of temporaries generated by the
119 // compiler for this trivial expression during inlining.
120 return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + 1))
121}
122
123// subtract1 returns the byte pointer p-1.
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700124// subtract1 is typically used when traversing the pointer tables referred to by hbits
125// which are arranged in reverse order.
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700126//go:nowritebarrier
127//
128// nosplit because it is used during write barriers and must not be preempted.
129//go:nosplit
130func subtract1(p *byte) *byte {
131 // Note: wrote out full expression instead of calling subtractb(p, 1)
132 // to reduce the number of temporaries generated by the
133 // compiler for this trivial expression during inlining.
134 return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) - 1))
135}
136
137// mHeap_MapBits is called each time arena_used is extended.
138// It maps any additional bitmap memory needed for the new arena memory.
139// It must be called with the expected new value of arena_used,
140// *before* h.arena_used has been updated.
141// Waiting to update arena_used until after the memory has been mapped
142// avoids faults when other threads try access the bitmap immediately
143// after observing the change to arena_used.
144//
145//go:nowritebarrier
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700146func (h *mheap) mapBits(arena_used uintptr) {
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700147 // Caller has added extra mappings to the arena.
148 // Add extra mappings of bitmap words as needed.
149 // We allocate extra bitmap pieces in chunks of bitmapChunk.
150 const bitmapChunk = 8192
151
152 n := (arena_used - mheap_.arena_start) / heapBitmapScale
153 n = round(n, bitmapChunk)
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700154 n = round(n, sys.PhysPageSize)
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700155 if h.bitmap_mapped >= n {
156 return
157 }
158
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700159 sysMap(unsafe.Pointer(h.bitmap-n), n-h.bitmap_mapped, h.arena_reserved, &memstats.gc_sys)
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700160 h.bitmap_mapped = n
161}
162
163// heapBits provides access to the bitmap bits for a single heap word.
164// The methods on heapBits take value receivers so that the compiler
165// can more easily inline calls to those methods and registerize the
166// struct fields independently.
167type heapBits struct {
168 bitp *uint8
169 shift uint32
170}
171
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700172// markBits provides access to the mark bit for an object in the heap.
173// bytep points to the byte holding the mark bit.
174// mask is a byte with a single bit set that can be &ed with *bytep
175// to see if the bit has been set.
176// *m.byte&m.mask != 0 indicates the mark bit is set.
177// index can be used along with span information to generate
178// the address of the object in the heap.
179// We maintain one set of mark bits for allocation and one for
180// marking purposes.
181type markBits struct {
182 bytep *uint8
183 mask uint8
184 index uintptr
185}
186
187//go:nosplit
188func (s *mspan) allocBitsForIndex(allocBitIndex uintptr) markBits {
189 whichByte := allocBitIndex / 8
190 whichBit := allocBitIndex % 8
191 bytePtr := addb(s.allocBits, whichByte)
192 return markBits{bytePtr, uint8(1 << whichBit), allocBitIndex}
193}
194
195// refillaCache takes 8 bytes s.allocBits starting at whichByte
196// and negates them so that ctz (count trailing zeros) instructions
197// can be used. It then places these 8 bytes into the cached 64 bit
198// s.allocCache.
199func (s *mspan) refillAllocCache(whichByte uintptr) {
200 bytes := (*[8]uint8)(unsafe.Pointer(addb(s.allocBits, whichByte)))
201 aCache := uint64(0)
202 aCache |= uint64(bytes[0])
203 aCache |= uint64(bytes[1]) << (1 * 8)
204 aCache |= uint64(bytes[2]) << (2 * 8)
205 aCache |= uint64(bytes[3]) << (3 * 8)
206 aCache |= uint64(bytes[4]) << (4 * 8)
207 aCache |= uint64(bytes[5]) << (5 * 8)
208 aCache |= uint64(bytes[6]) << (6 * 8)
209 aCache |= uint64(bytes[7]) << (7 * 8)
210 s.allocCache = ^aCache
211}
212
213// nextFreeIndex returns the index of the next free object in s at
214// or after s.freeindex.
215// There are hardware instructions that can be used to make this
216// faster if profiling warrants it.
217func (s *mspan) nextFreeIndex() uintptr {
218 sfreeindex := s.freeindex
219 snelems := s.nelems
220 if sfreeindex == snelems {
221 return sfreeindex
222 }
223 if sfreeindex > snelems {
224 throw("s.freeindex > s.nelems")
225 }
226
227 aCache := s.allocCache
228
229 bitIndex := sys.Ctz64(aCache)
230 for bitIndex == 64 {
231 // Move index to start of next cached bits.
232 sfreeindex = (sfreeindex + 64) &^ (64 - 1)
233 if sfreeindex >= snelems {
234 s.freeindex = snelems
235 return snelems
236 }
237 whichByte := sfreeindex / 8
238 // Refill s.allocCache with the next 64 alloc bits.
239 s.refillAllocCache(whichByte)
240 aCache = s.allocCache
241 bitIndex = sys.Ctz64(aCache)
242 // nothing available in cached bits
243 // grab the next 8 bytes and try again.
244 }
245 result := sfreeindex + uintptr(bitIndex)
246 if result >= snelems {
247 s.freeindex = snelems
248 return snelems
249 }
250
251 s.allocCache >>= (bitIndex + 1)
252 sfreeindex = result + 1
253
254 if sfreeindex%64 == 0 && sfreeindex != snelems {
255 // We just incremented s.freeindex so it isn't 0.
256 // As each 1 in s.allocCache was encountered and used for allocation
257 // it was shifted away. At this point s.allocCache contains all 0s.
258 // Refill s.allocCache so that it corresponds
259 // to the bits at s.allocBits starting at s.freeindex.
260 whichByte := sfreeindex / 8
261 s.refillAllocCache(whichByte)
262 }
263 s.freeindex = sfreeindex
264 return result
265}
266
267func (s *mspan) isFree(index uintptr) bool {
268 whichByte := index / 8
269 whichBit := index % 8
270 byteVal := *addb(s.allocBits, whichByte)
271 return byteVal&uint8(1<<whichBit) == 0
272}
273
274func (s *mspan) objIndex(p uintptr) uintptr {
275 byteOffset := p - s.base()
276 if byteOffset == 0 {
277 return 0
278 }
279 if s.baseMask != 0 {
280 // s.baseMask is 0, elemsize is a power of two, so shift by s.divShift
281 return byteOffset >> s.divShift
282 }
283 return uintptr(((uint64(byteOffset) >> s.divShift) * uint64(s.divMul)) >> s.divShift2)
284}
285
286func markBitsForAddr(p uintptr) markBits {
287 s := spanOf(p)
288 objIndex := s.objIndex(p)
289 return s.markBitsForIndex(objIndex)
290}
291
292func (s *mspan) markBitsForIndex(objIndex uintptr) markBits {
293 whichByte := objIndex / 8
294 bitMask := uint8(1 << (objIndex % 8)) // low 3 bits hold the bit index
295 bytePtr := addb(s.gcmarkBits, whichByte)
296 return markBits{bytePtr, bitMask, objIndex}
297}
298
299func (s *mspan) markBitsForBase() markBits {
300 return markBits{s.gcmarkBits, uint8(1), 0}
301}
302
303// isMarked reports whether mark bit m is set.
304func (m markBits) isMarked() bool {
305 return *m.bytep&m.mask != 0
306}
307
308// setMarked sets the marked bit in the markbits, atomically. Some compilers
309// are not able to inline atomic.Or8 function so if it appears as a hot spot consider
310// inlining it manually.
311func (m markBits) setMarked() {
312 // Might be racing with other updates, so use atomic update always.
313 // We used to be clever here and use a non-atomic update in certain
314 // cases, but it's not worth the risk.
315 atomic.Or8(m.bytep, m.mask)
316}
317
318// setMarkedNonAtomic sets the marked bit in the markbits, non-atomically.
319func (m markBits) setMarkedNonAtomic() {
320 *m.bytep |= m.mask
321}
322
323// clearMarked clears the marked bit in the markbits, atomically.
324func (m markBits) clearMarked() {
325 // Might be racing with other updates, so use atomic update always.
326 // We used to be clever here and use a non-atomic update in certain
327 // cases, but it's not worth the risk.
328 atomic.And8(m.bytep, ^m.mask)
329}
330
331// clearMarkedNonAtomic clears the marked bit non-atomically.
332func (m markBits) clearMarkedNonAtomic() {
333 *m.bytep ^= m.mask
334}
335
336// markBitsForSpan returns the markBits for the span base address base.
337func markBitsForSpan(base uintptr) (mbits markBits) {
338 if base < mheap_.arena_start || base >= mheap_.arena_used {
339 throw("heapBitsForSpan: base out of range")
340 }
341 mbits = markBitsForAddr(base)
342 if mbits.mask != 1 {
343 throw("markBitsForSpan: unaligned start")
344 }
345 return mbits
346}
347
348// advance advances the markBits to the next object in the span.
349func (m *markBits) advance() {
350 if m.mask == 1<<7 {
351 m.bytep = (*uint8)(unsafe.Pointer(uintptr(unsafe.Pointer(m.bytep)) + 1))
352 m.mask = 1
353 } else {
354 m.mask = m.mask << 1
355 }
356 m.index++
357}
358
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700359// heapBitsForAddr returns the heapBits for the address addr.
360// The caller must have already checked that addr is in the range [mheap_.arena_start, mheap_.arena_used).
361//
362// nosplit because it is used during write barriers and must not be preempted.
363//go:nosplit
364func heapBitsForAddr(addr uintptr) heapBits {
365 // 2 bits per work, 4 pairs per byte, and a mask is hard coded.
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700366 off := (addr - mheap_.arena_start) / sys.PtrSize
367 return heapBits{(*uint8)(unsafe.Pointer(mheap_.bitmap - off/4 - 1)), uint32(off & 3)}
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700368}
369
370// heapBitsForSpan returns the heapBits for the span base address base.
371func heapBitsForSpan(base uintptr) (hbits heapBits) {
372 if base < mheap_.arena_start || base >= mheap_.arena_used {
373 throw("heapBitsForSpan: base out of range")
374 }
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700375 return heapBitsForAddr(base)
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700376}
377
378// heapBitsForObject returns the base address for the heap object
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700379// containing the address p, the heapBits for base,
380// the object's span, and of the index of the object in s.
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700381// If p does not point into a heap object,
382// return base == 0
383// otherwise return the base of the object.
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700384//
385// refBase and refOff optionally give the base address of the object
386// in which the pointer p was found and the byte offset at which it
387// was found. These are used for error reporting.
388func heapBitsForObject(p, refBase, refOff uintptr) (base uintptr, hbits heapBits, s *mspan, objIndex uintptr) {
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700389 arenaStart := mheap_.arena_start
390 if p < arenaStart || p >= mheap_.arena_used {
391 return
392 }
393 off := p - arenaStart
394 idx := off >> _PageShift
395 // p points into the heap, but possibly to the middle of an object.
396 // Consult the span table to find the block beginning.
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700397 s = h_spans[idx]
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700398 if s == nil || p < s.base() || p >= s.limit || s.state != mSpanInUse {
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700399 if s == nil || s.state == _MSpanStack {
400 // If s is nil, the virtual address has never been part of the heap.
401 // This pointer may be to some mmap'd region, so we allow it.
402 // Pointers into stacks are also ok, the runtime manages these explicitly.
403 return
404 }
405
406 // The following ensures that we are rigorous about what data
407 // structures hold valid pointers.
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700408 if debug.invalidptr != 0 {
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700409 // Typically this indicates an incorrect use
410 // of unsafe or cgo to store a bad pointer in
411 // the Go heap. It may also indicate a runtime
412 // bug.
413 //
414 // TODO(austin): We could be more aggressive
415 // and detect pointers to unallocated objects
416 // in allocated spans.
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700417 printlock()
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700418 print("runtime: pointer ", hex(p))
419 if s.state != mSpanInUse {
420 print(" to unallocated span")
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700421 } else {
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700422 print(" to unused region of span")
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700423 }
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700424 print("idx=", hex(idx), " span.base()=", hex(s.base()), " span.limit=", hex(s.limit), " span.state=", s.state, "\n")
425 if refBase != 0 {
426 print("runtime: found in object at *(", hex(refBase), "+", hex(refOff), ")\n")
427 gcDumpObject("object", refBase, refOff)
428 }
429 throw("found bad pointer in Go heap (incorrect use of unsafe or cgo?)")
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700430 }
431 return
432 }
433 // If this span holds object of a power of 2 size, just mask off the bits to
434 // the interior of the object. Otherwise use the size to get the base.
435 if s.baseMask != 0 {
436 // optimize for power of 2 sized objects.
437 base = s.base()
438 base = base + (p-base)&s.baseMask
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700439 objIndex = (base - s.base()) >> s.divShift
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700440 // base = p & s.baseMask is faster for small spans,
441 // but doesn't work for large spans.
442 // Overall, it's faster to use the more general computation above.
443 } else {
444 base = s.base()
445 if p-base >= s.elemsize {
446 // n := (p - base) / s.elemsize, using division by multiplication
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700447 objIndex = uintptr(uint64(p-base) >> s.divShift * uint64(s.divMul) >> s.divShift2)
448 base += objIndex * s.elemsize
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700449 }
450 }
451 // Now that we know the actual base, compute heapBits to return to caller.
452 hbits = heapBitsForAddr(base)
453 return
454}
455
456// prefetch the bits.
457func (h heapBits) prefetch() {
458 prefetchnta(uintptr(unsafe.Pointer((h.bitp))))
459}
460
461// next returns the heapBits describing the next pointer-sized word in memory.
462// That is, if h describes address p, h.next() describes p+ptrSize.
463// Note that next does not modify h. The caller must record the result.
464//
465// nosplit because it is used during write barriers and must not be preempted.
466//go:nosplit
467func (h heapBits) next() heapBits {
468 if h.shift < 3*heapBitsShift {
469 return heapBits{h.bitp, h.shift + heapBitsShift}
470 }
471 return heapBits{subtract1(h.bitp), 0}
472}
473
474// forward returns the heapBits describing n pointer-sized words ahead of h in memory.
475// That is, if h describes address p, h.forward(n) describes p+n*ptrSize.
476// h.forward(1) is equivalent to h.next(), just slower.
477// Note that forward does not modify h. The caller must record the result.
478// bits returns the heap bits for the current word.
479func (h heapBits) forward(n uintptr) heapBits {
480 n += uintptr(h.shift) / heapBitsShift
481 return heapBits{subtractb(h.bitp, n/4), uint32(n%4) * heapBitsShift}
482}
483
484// The caller can test isMarked and isPointer by &-ing with bitMarked and bitPointer.
485// The result includes in its higher bits the bits for subsequent words
486// described by the same bitmap byte.
487func (h heapBits) bits() uint32 {
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700488 // The (shift & 31) eliminates a test and conditional branch
489 // from the generated code.
490 return uint32(*h.bitp) >> (h.shift & 31)
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700491}
492
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700493// morePointers returns true if this word and all remaining words in this object
494// are scalars.
495// h must not describe the second word of the object.
496func (h heapBits) morePointers() bool {
497 return h.bits()&bitMarked != 0
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700498}
499
500// isPointer reports whether the heap bits describe a pointer word.
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700501//
502// nosplit because it is used during write barriers and must not be preempted.
503//go:nosplit
504func (h heapBits) isPointer() bool {
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700505 return h.bits()&bitPointer != 0
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700506}
507
508// hasPointers reports whether the given object has any pointers.
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700509// It must be told how large the object at h is for efficiency.
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700510// h must describe the initial word of the object.
511func (h heapBits) hasPointers(size uintptr) bool {
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700512 if size == sys.PtrSize { // 1-word objects are always pointers
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700513 return true
514 }
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700515 return (*h.bitp>>h.shift)&bitMarked != 0
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700516}
517
518// isCheckmarked reports whether the heap bits have the checkmarked bit set.
519// It must be told how large the object at h is, because the encoding of the
520// checkmark bit varies by size.
521// h must describe the initial word of the object.
522func (h heapBits) isCheckmarked(size uintptr) bool {
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700523 if size == sys.PtrSize {
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700524 return (*h.bitp>>h.shift)&bitPointer != 0
525 }
526 // All multiword objects are 2-word aligned,
527 // so we know that the initial word's 2-bit pair
528 // and the second word's 2-bit pair are in the
529 // same heap bitmap byte, *h.bitp.
530 return (*h.bitp>>(heapBitsShift+h.shift))&bitMarked != 0
531}
532
533// setCheckmarked sets the checkmarked bit.
534// It must be told how large the object at h is, because the encoding of the
535// checkmark bit varies by size.
536// h must describe the initial word of the object.
537func (h heapBits) setCheckmarked(size uintptr) {
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700538 if size == sys.PtrSize {
539 atomic.Or8(h.bitp, bitPointer<<h.shift)
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700540 return
541 }
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700542 atomic.Or8(h.bitp, bitMarked<<(heapBitsShift+h.shift))
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700543}
544
545// heapBitsBulkBarrier executes writebarrierptr_nostore
546// for every pointer slot in the memory range [p, p+size),
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700547// using the heap, data, or BSS bitmap to locate those pointer slots.
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700548// This executes the write barriers necessary after a memmove.
549// Both p and size must be pointer-aligned.
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700550// The range [p, p+size) must lie within a single object.
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700551//
552// Callers should call heapBitsBulkBarrier immediately after
553// calling memmove(p, src, size). This function is marked nosplit
554// to avoid being preempted; the GC must not stop the goroutine
555// between the memmove and the execution of the barriers.
556//
557// The heap bitmap is not maintained for allocations containing
558// no pointers at all; any caller of heapBitsBulkBarrier must first
559// make sure the underlying allocation contains pointers, usually
560// by checking typ.kind&kindNoPointers.
561//
562//go:nosplit
563func heapBitsBulkBarrier(p, size uintptr) {
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700564 if (p|size)&(sys.PtrSize-1) != 0 {
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700565 throw("heapBitsBulkBarrier: unaligned arguments")
566 }
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700567 if !writeBarrier.needed {
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700568 return
569 }
570 if !inheap(p) {
571 // If p is on the stack and in a higher frame than the
572 // caller, we either need to execute write barriers on
573 // it (which is what happens for normal stack writes
574 // through pointers to higher frames), or we need to
575 // force the mark termination stack scan to scan the
576 // frame containing p.
577 //
578 // Executing write barriers on p is complicated in the
579 // general case because we either need to unwind the
580 // stack to get the stack map, or we need the type's
581 // bitmap, which may be a GC program.
582 //
583 // Hence, we opt for forcing the re-scan to scan the
584 // frame containing p, which we can do by simply
585 // unwinding the stack barriers between the current SP
586 // and p's frame.
587 gp := getg().m.curg
588 if gp != nil && gp.stack.lo <= p && p < gp.stack.hi {
589 // Run on the system stack to give it more
590 // stack space.
591 systemstack(func() {
592 gcUnwindBarriers(gp, p)
593 })
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700594 return
595 }
596
597 // If p is a global, use the data or BSS bitmaps to
598 // execute write barriers.
599 for datap := &firstmoduledata; datap != nil; datap = datap.next {
600 if datap.data <= p && p < datap.edata {
601 bulkBarrierBitmap(p, size, p-datap.data, datap.gcdatamask.bytedata)
602 return
603 }
604 }
605 for datap := &firstmoduledata; datap != nil; datap = datap.next {
606 if datap.bss <= p && p < datap.ebss {
607 bulkBarrierBitmap(p, size, p-datap.bss, datap.gcbssmask.bytedata)
608 return
609 }
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700610 }
611 return
612 }
613
614 h := heapBitsForAddr(p)
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700615 for i := uintptr(0); i < size; i += sys.PtrSize {
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700616 if h.isPointer() {
617 x := (*uintptr)(unsafe.Pointer(p + i))
618 writebarrierptr_nostore(x, *x)
619 }
620 h = h.next()
621 }
622}
623
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700624// bulkBarrierBitmap executes write barriers for [p, p+size) using a
625// 1-bit pointer bitmap. p is assumed to start maskOffset bytes into
626// the data covered by the bitmap in bits.
627//
628// This is used by heapBitsBulkBarrier for writes to data and BSS.
629//
630//go:nosplit
631func bulkBarrierBitmap(p, size, maskOffset uintptr, bits *uint8) {
632 word := maskOffset / sys.PtrSize
633 bits = addb(bits, word/8)
634 mask := uint8(1) << (word % 8)
635
636 for i := uintptr(0); i < size; i += sys.PtrSize {
637 if mask == 0 {
638 bits = addb(bits, 1)
639 if *bits == 0 {
640 // Skip 8 words.
641 i += 7 * sys.PtrSize
642 continue
643 }
644 mask = 1
645 }
646 if *bits&mask != 0 {
647 x := (*uintptr)(unsafe.Pointer(p + i))
648 writebarrierptr_nostore(x, *x)
649 }
650 mask <<= 1
651 }
652}
653
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700654// typeBitsBulkBarrier executes writebarrierptr_nostore
655// for every pointer slot in the memory range [p, p+size),
656// using the type bitmap to locate those pointer slots.
657// The type typ must correspond exactly to [p, p+size).
658// This executes the write barriers necessary after a copy.
659// Both p and size must be pointer-aligned.
660// The type typ must have a plain bitmap, not a GC program.
661// The only use of this function is in channel sends, and the
662// 64 kB channel element limit takes care of this for us.
663//
664// Must not be preempted because it typically runs right after memmove,
665// and the GC must not complete between those two.
666//
667//go:nosplit
668func typeBitsBulkBarrier(typ *_type, p, size uintptr) {
669 if typ == nil {
670 throw("runtime: typeBitsBulkBarrier without type")
671 }
672 if typ.size != size {
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700673 println("runtime: typeBitsBulkBarrier with type ", typ.string(), " of size ", typ.size, " but memory size", size)
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700674 throw("runtime: invalid typeBitsBulkBarrier")
675 }
676 if typ.kind&kindGCProg != 0 {
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700677 println("runtime: typeBitsBulkBarrier with type ", typ.string(), " with GC prog")
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700678 throw("runtime: invalid typeBitsBulkBarrier")
679 }
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700680 if !writeBarrier.needed {
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700681 return
682 }
683 ptrmask := typ.gcdata
684 var bits uint32
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700685 for i := uintptr(0); i < typ.ptrdata; i += sys.PtrSize {
686 if i&(sys.PtrSize*8-1) == 0 {
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700687 bits = uint32(*ptrmask)
688 ptrmask = addb(ptrmask, 1)
689 } else {
690 bits = bits >> 1
691 }
692 if bits&1 != 0 {
693 x := (*uintptr)(unsafe.Pointer(p + i))
694 writebarrierptr_nostore(x, *x)
695 }
696 }
697}
698
699// The methods operating on spans all require that h has been returned
700// by heapBitsForSpan and that size, n, total are the span layout description
701// returned by the mspan's layout method.
702// If total > size*n, it means that there is extra leftover memory in the span,
703// usually due to rounding.
704//
705// TODO(rsc): Perhaps introduce a different heapBitsSpan type.
706
707// initSpan initializes the heap bitmap for a span.
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700708// It clears all checkmark bits.
709// If this is a span of pointer-sized objects, it initializes all
710// words to pointer/scan.
711// Otherwise, it initializes all words to scalar/dead.
712func (h heapBits) initSpan(s *mspan) {
713 size, n, total := s.layout()
714
715 // Init the markbit structures
716 s.freeindex = 0
717 s.allocCache = ^uint64(0) // all 1s indicating all free.
718 s.nelems = n
719 s.allocBits = nil
720 s.gcmarkBits = nil
721 s.gcmarkBits = newMarkBits(s.nelems)
722 s.allocBits = newAllocBits(s.nelems)
723
724 // Clear bits corresponding to objects.
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700725 if total%heapBitmapScale != 0 {
726 throw("initSpan: unaligned length")
727 }
728 nbyte := total / heapBitmapScale
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700729 if sys.PtrSize == 8 && size == sys.PtrSize {
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700730 end := h.bitp
731 bitp := subtractb(end, nbyte-1)
732 for {
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700733 *bitp = bitPointerAll | bitMarkedAll
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700734 if bitp == end {
735 break
736 }
737 bitp = add1(bitp)
738 }
739 return
740 }
741 memclr(unsafe.Pointer(subtractb(h.bitp, nbyte-1)), nbyte)
742}
743
744// initCheckmarkSpan initializes a span for being checkmarked.
745// It clears the checkmark bits, which are set to 1 in normal operation.
746func (h heapBits) initCheckmarkSpan(size, n, total uintptr) {
747 // The ptrSize == 8 is a compile-time constant false on 32-bit and eliminates this code entirely.
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700748 if sys.PtrSize == 8 && size == sys.PtrSize {
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700749 // Checkmark bit is type bit, bottom bit of every 2-bit entry.
750 // Only possible on 64-bit system, since minimum size is 8.
751 // Must clear type bit (checkmark bit) of every word.
752 // The type bit is the lower of every two-bit pair.
753 bitp := h.bitp
754 for i := uintptr(0); i < n; i += 4 {
755 *bitp &^= bitPointerAll
756 bitp = subtract1(bitp)
757 }
758 return
759 }
760 for i := uintptr(0); i < n; i++ {
761 *h.bitp &^= bitMarked << (heapBitsShift + h.shift)
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700762 h = h.forward(size / sys.PtrSize)
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700763 }
764}
765
766// clearCheckmarkSpan undoes all the checkmarking in a span.
767// The actual checkmark bits are ignored, so the only work to do
768// is to fix the pointer bits. (Pointer bits are ignored by scanobject
769// but consulted by typedmemmove.)
770func (h heapBits) clearCheckmarkSpan(size, n, total uintptr) {
771 // The ptrSize == 8 is a compile-time constant false on 32-bit and eliminates this code entirely.
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700772 if sys.PtrSize == 8 && size == sys.PtrSize {
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700773 // Checkmark bit is type bit, bottom bit of every 2-bit entry.
774 // Only possible on 64-bit system, since minimum size is 8.
775 // Must clear type bit (checkmark bit) of every word.
776 // The type bit is the lower of every two-bit pair.
777 bitp := h.bitp
778 for i := uintptr(0); i < n; i += 4 {
779 *bitp |= bitPointerAll
780 bitp = subtract1(bitp)
781 }
782 }
783}
784
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700785// oneBitCount is indexed by byte and produces the
786// number of 1 bits in that byte. For example 128 has 1 bit set
787// and oneBitCount[128] will holds 1.
788var oneBitCount = [256]uint8{
789 0, 1, 1, 2, 1, 2, 2, 3,
790 1, 2, 2, 3, 2, 3, 3, 4,
791 1, 2, 2, 3, 2, 3, 3, 4,
792 2, 3, 3, 4, 3, 4, 4, 5,
793 1, 2, 2, 3, 2, 3, 3, 4,
794 2, 3, 3, 4, 3, 4, 4, 5,
795 2, 3, 3, 4, 3, 4, 4, 5,
796 3, 4, 4, 5, 4, 5, 5, 6,
797 1, 2, 2, 3, 2, 3, 3, 4,
798 2, 3, 3, 4, 3, 4, 4, 5,
799 2, 3, 3, 4, 3, 4, 4, 5,
800 3, 4, 4, 5, 4, 5, 5, 6,
801 2, 3, 3, 4, 3, 4, 4, 5,
802 3, 4, 4, 5, 4, 5, 5, 6,
803 3, 4, 4, 5, 4, 5, 5, 6,
804 4, 5, 5, 6, 5, 6, 6, 7,
805 1, 2, 2, 3, 2, 3, 3, 4,
806 2, 3, 3, 4, 3, 4, 4, 5,
807 2, 3, 3, 4, 3, 4, 4, 5,
808 3, 4, 4, 5, 4, 5, 5, 6,
809 2, 3, 3, 4, 3, 4, 4, 5,
810 3, 4, 4, 5, 4, 5, 5, 6,
811 3, 4, 4, 5, 4, 5, 5, 6,
812 4, 5, 5, 6, 5, 6, 6, 7,
813 2, 3, 3, 4, 3, 4, 4, 5,
814 3, 4, 4, 5, 4, 5, 5, 6,
815 3, 4, 4, 5, 4, 5, 5, 6,
816 4, 5, 5, 6, 5, 6, 6, 7,
817 3, 4, 4, 5, 4, 5, 5, 6,
818 4, 5, 5, 6, 5, 6, 6, 7,
819 4, 5, 5, 6, 5, 6, 6, 7,
820 5, 6, 6, 7, 6, 7, 7, 8}
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700821
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700822// countFree runs through the mark bits in a span and counts the number of free objects
823// in the span.
824// TODO:(rlh) Use popcount intrinsic.
825func (s *mspan) countFree() int {
826 count := 0
827 maxIndex := s.nelems / 8
828 for i := uintptr(0); i < maxIndex; i++ {
829 mrkBits := *addb(s.gcmarkBits, i)
830 count += int(oneBitCount[mrkBits])
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700831 }
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700832 if bitsInLastByte := s.nelems % 8; bitsInLastByte != 0 {
833 mrkBits := *addb(s.gcmarkBits, maxIndex)
834 mask := uint8((1 << bitsInLastByte) - 1)
835 bits := mrkBits & mask
836 count += int(oneBitCount[bits])
837 }
838 return int(s.nelems) - count
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700839}
840
841// heapBitsSetType records that the new allocation [x, x+size)
842// holds in [x, x+dataSize) one or more values of type typ.
843// (The number of values is given by dataSize / typ.size.)
844// If dataSize < size, the fragment [x+dataSize, x+size) is
845// recorded as non-pointer data.
846// It is known that the type has pointers somewhere;
847// malloc does not call heapBitsSetType when there are no pointers,
848// because all free objects are marked as noscan during
849// heapBitsSweepSpan.
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700850//
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700851// There can only be one allocation from a given span active at a time,
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700852// and the bitmap for a span always falls on byte boundaries,
853// so there are no write-write races for access to the heap bitmap.
854// Hence, heapBitsSetType can access the bitmap without atomics.
855//
856// There can be read-write races between heapBitsSetType and things
857// that read the heap bitmap like scanobject. However, since
858// heapBitsSetType is only used for objects that have not yet been
859// made reachable, readers will ignore bits being modified by this
860// function. This does mean this function cannot transiently modify
861// bits that belong to neighboring objects. Also, on weakly-ordered
862// machines, callers must execute a store/store (publication) barrier
863// between calling this function and making the object reachable.
864//
865// TODO: This still has atomic accesses left over from when it could
866// race with GC accessing mark bits in the bitmap. Remove these.
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700867func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
868 const doubleCheck = false // slow but helpful; enable to test modifications to this code
869
870 // dataSize is always size rounded up to the next malloc size class,
871 // except in the case of allocating a defer block, in which case
872 // size is sizeof(_defer{}) (at least 6 words) and dataSize may be
873 // arbitrarily larger.
874 //
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700875 // The checks for size == sys.PtrSize and size == 2*sys.PtrSize can therefore
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700876 // assume that dataSize == size without checking it explicitly.
877
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700878 if sys.PtrSize == 8 && size == sys.PtrSize {
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700879 // It's one word and it has pointers, it must be a pointer.
880 // In general we'd need an atomic update here if the
881 // concurrent GC were marking objects in this span,
882 // because each bitmap byte describes 3 other objects
883 // in addition to the one being allocated.
884 // However, since all allocated one-word objects are pointers
885 // (non-pointers are aggregated into tinySize allocations),
886 // initSpan sets the pointer bits for us. Nothing to do here.
887 if doubleCheck {
888 h := heapBitsForAddr(x)
889 if !h.isPointer() {
890 throw("heapBitsSetType: pointer bit missing")
891 }
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700892 if !h.morePointers() {
893 throw("heapBitsSetType: scan bit missing")
894 }
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700895 }
896 return
897 }
898
899 h := heapBitsForAddr(x)
900 ptrmask := typ.gcdata // start of 1-bit pointer mask (or GC program, handled below)
901
902 // Heap bitmap bits for 2-word object are only 4 bits,
903 // so also shared with objects next to it; use atomic updates.
904 // This is called out as a special case primarily for 32-bit systems,
905 // so that on 32-bit systems the code below can assume all objects
906 // are 4-word aligned (because they're all 16-byte aligned).
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700907 if size == 2*sys.PtrSize {
908 if typ.size == sys.PtrSize {
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700909 // We're allocating a block big enough to hold two pointers.
910 // On 64-bit, that means the actual object must be two pointers,
911 // or else we'd have used the one-pointer-sized block.
912 // On 32-bit, however, this is the 8-byte block, the smallest one.
913 // So it could be that we're allocating one pointer and this was
914 // just the smallest block available. Distinguish by checking dataSize.
915 // (In general the number of instances of typ being allocated is
916 // dataSize/typ.size.)
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700917 if sys.PtrSize == 4 && dataSize == sys.PtrSize {
918 // 1 pointer object. On 32-bit machines clear the bit for the
919 // unused second word.
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700920 if gcphase == _GCoff {
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700921 *h.bitp &^= (bitPointer | bitMarked | ((bitPointer | bitMarked) << heapBitsShift)) << h.shift
922 *h.bitp |= (bitPointer | bitMarked) << h.shift
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700923 } else {
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700924 atomic.And8(h.bitp, ^uint8((bitPointer|bitMarked|((bitPointer|bitMarked)<<heapBitsShift))<<h.shift))
925 atomic.Or8(h.bitp, (bitPointer|bitMarked)<<h.shift)
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700926 }
927 } else {
928 // 2-element slice of pointer.
929 if gcphase == _GCoff {
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700930 *h.bitp |= (bitPointer | bitMarked | bitPointer<<heapBitsShift) << h.shift
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700931 } else {
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700932 atomic.Or8(h.bitp, (bitPointer|bitMarked|bitPointer<<heapBitsShift)<<h.shift)
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700933 }
934 }
935 return
936 }
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700937 // Otherwise typ.size must be 2*sys.PtrSize,
938 // and typ.kind&kindGCProg == 0.
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700939 if doubleCheck {
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700940 if typ.size != 2*sys.PtrSize || typ.kind&kindGCProg != 0 {
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700941 print("runtime: heapBitsSetType size=", size, " but typ.size=", typ.size, " gcprog=", typ.kind&kindGCProg != 0, "\n")
942 throw("heapBitsSetType")
943 }
944 }
945 b := uint32(*ptrmask)
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700946 hb := (b & 3) | bitMarked
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700947 if gcphase == _GCoff {
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700948 // bitPointer == 1, bitMarked is 1 << 4, heapBitsShift is 1.
949 // 110011 is shifted h.shift and complemented.
950 // This clears out the bits that are about to be
951 // ored into *h.hbitp in the next instructions.
952 *h.bitp &^= (bitPointer | bitMarked | ((bitPointer | bitMarked) << heapBitsShift)) << h.shift
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700953 *h.bitp |= uint8(hb << h.shift)
954 } else {
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700955 // TODO:(rlh) since the GC is not concurrently setting the
956 // mark bits in the heap map anymore and malloc
957 // owns the span we are allocating in why does this have
958 // to be atomic?
959
960 atomic.And8(h.bitp, ^uint8((bitPointer|bitMarked|((bitPointer|bitMarked)<<heapBitsShift))<<h.shift))
961 atomic.Or8(h.bitp, uint8(hb<<h.shift))
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700962 }
963 return
964 }
965
966 // Copy from 1-bit ptrmask into 2-bit bitmap.
967 // The basic approach is to use a single uintptr as a bit buffer,
968 // alternating between reloading the buffer and writing bitmap bytes.
969 // In general, one load can supply two bitmap byte writes.
970 // This is a lot of lines of code, but it compiles into relatively few
971 // machine instructions.
972
973 var (
974 // Ptrmask input.
975 p *byte // last ptrmask byte read
976 b uintptr // ptrmask bits already loaded
977 nb uintptr // number of bits in b at next read
978 endp *byte // final ptrmask byte to read (then repeat)
979 endnb uintptr // number of valid bits in *endp
980 pbits uintptr // alternate source of bits
981
982 // Heap bitmap output.
983 w uintptr // words processed
984 nw uintptr // number of words to process
985 hbitp *byte // next heap bitmap byte to write
986 hb uintptr // bits being prepared for *hbitp
987 )
988
989 hbitp = h.bitp
990
991 // Handle GC program. Delayed until this part of the code
992 // so that we can use the same double-checking mechanism
993 // as the 1-bit case. Nothing above could have encountered
994 // GC programs: the cases were all too small.
995 if typ.kind&kindGCProg != 0 {
996 heapBitsSetTypeGCProg(h, typ.ptrdata, typ.size, dataSize, size, addb(typ.gcdata, 4))
997 if doubleCheck {
998 // Double-check the heap bits written by GC program
999 // by running the GC program to create a 1-bit pointer mask
1000 // and then jumping to the double-check code below.
1001 // This doesn't catch bugs shared between the 1-bit and 4-bit
1002 // GC program execution, but it does catch mistakes specific
1003 // to just one of those and bugs in heapBitsSetTypeGCProg's
1004 // implementation of arrays.
1005 lock(&debugPtrmask.lock)
1006 if debugPtrmask.data == nil {
1007 debugPtrmask.data = (*byte)(persistentalloc(1<<20, 1, &memstats.other_sys))
1008 }
1009 ptrmask = debugPtrmask.data
1010 runGCProg(addb(typ.gcdata, 4), nil, ptrmask, 1)
1011 goto Phase4
1012 }
1013 return
1014 }
1015
1016 // Note about sizes:
1017 //
1018 // typ.size is the number of words in the object,
1019 // and typ.ptrdata is the number of words in the prefix
1020 // of the object that contains pointers. That is, the final
1021 // typ.size - typ.ptrdata words contain no pointers.
1022 // This allows optimization of a common pattern where
1023 // an object has a small header followed by a large scalar
1024 // buffer. If we know the pointers are over, we don't have
1025 // to scan the buffer's heap bitmap at all.
1026 // The 1-bit ptrmasks are sized to contain only bits for
1027 // the typ.ptrdata prefix, zero padded out to a full byte
1028 // of bitmap. This code sets nw (below) so that heap bitmap
1029 // bits are only written for the typ.ptrdata prefix; if there is
1030 // more room in the allocated object, the next heap bitmap
1031 // entry is a 00, indicating that there are no more pointers
1032 // to scan. So only the ptrmask for the ptrdata bytes is needed.
1033 //
1034 // Replicated copies are not as nice: if there is an array of
1035 // objects with scalar tails, all but the last tail does have to
1036 // be initialized, because there is no way to say "skip forward".
1037 // However, because of the possibility of a repeated type with
1038 // size not a multiple of 4 pointers (one heap bitmap byte),
1039 // the code already must handle the last ptrmask byte specially
1040 // by treating it as containing only the bits for endnb pointers,
1041 // where endnb <= 4. We represent large scalar tails that must
1042 // be expanded in the replication by setting endnb larger than 4.
1043 // This will have the effect of reading many bits out of b,
1044 // but once the real bits are shifted out, b will supply as many
1045 // zero bits as we try to read, which is exactly what we need.
1046
1047 p = ptrmask
1048 if typ.size < dataSize {
1049 // Filling in bits for an array of typ.
1050 // Set up for repetition of ptrmask during main loop.
1051 // Note that ptrmask describes only a prefix of
Dan Willemsen38f2dba2016-07-08 14:54:35 -07001052 const maxBits = sys.PtrSize*8 - 7
1053 if typ.ptrdata/sys.PtrSize <= maxBits {
Dan Willemsen09eb3b12015-09-16 14:34:17 -07001054 // Entire ptrmask fits in uintptr with room for a byte fragment.
1055 // Load into pbits and never read from ptrmask again.
1056 // This is especially important when the ptrmask has
1057 // fewer than 8 bits in it; otherwise the reload in the middle
1058 // of the Phase 2 loop would itself need to loop to gather
1059 // at least 8 bits.
1060
1061 // Accumulate ptrmask into b.
1062 // ptrmask is sized to describe only typ.ptrdata, but we record
1063 // it as describing typ.size bytes, since all the high bits are zero.
Dan Willemsen38f2dba2016-07-08 14:54:35 -07001064 nb = typ.ptrdata / sys.PtrSize
Dan Willemsen09eb3b12015-09-16 14:34:17 -07001065 for i := uintptr(0); i < nb; i += 8 {
1066 b |= uintptr(*p) << i
1067 p = add1(p)
1068 }
Dan Willemsen38f2dba2016-07-08 14:54:35 -07001069 nb = typ.size / sys.PtrSize
Dan Willemsen09eb3b12015-09-16 14:34:17 -07001070
1071 // Replicate ptrmask to fill entire pbits uintptr.
1072 // Doubling and truncating is fewer steps than
1073 // iterating by nb each time. (nb could be 1.)
Dan Willemsen38f2dba2016-07-08 14:54:35 -07001074 // Since we loaded typ.ptrdata/sys.PtrSize bits
1075 // but are pretending to have typ.size/sys.PtrSize,
Dan Willemsen09eb3b12015-09-16 14:34:17 -07001076 // there might be no replication necessary/possible.
1077 pbits = b
1078 endnb = nb
1079 if nb+nb <= maxBits {
Dan Willemsen38f2dba2016-07-08 14:54:35 -07001080 for endnb <= sys.PtrSize*8 {
Dan Willemsen09eb3b12015-09-16 14:34:17 -07001081 pbits |= pbits << endnb
1082 endnb += endnb
1083 }
1084 // Truncate to a multiple of original ptrmask.
1085 endnb = maxBits / nb * nb
1086 pbits &= 1<<endnb - 1
1087 b = pbits
1088 nb = endnb
1089 }
1090
1091 // Clear p and endp as sentinel for using pbits.
1092 // Checked during Phase 2 loop.
1093 p = nil
1094 endp = nil
1095 } else {
1096 // Ptrmask is larger. Read it multiple times.
Dan Willemsen38f2dba2016-07-08 14:54:35 -07001097 n := (typ.ptrdata/sys.PtrSize+7)/8 - 1
Dan Willemsen09eb3b12015-09-16 14:34:17 -07001098 endp = addb(ptrmask, n)
Dan Willemsen38f2dba2016-07-08 14:54:35 -07001099 endnb = typ.size/sys.PtrSize - n*8
Dan Willemsen09eb3b12015-09-16 14:34:17 -07001100 }
1101 }
1102 if p != nil {
1103 b = uintptr(*p)
1104 p = add1(p)
1105 nb = 8
1106 }
1107
1108 if typ.size == dataSize {
1109 // Single entry: can stop once we reach the non-pointer data.
Dan Willemsen38f2dba2016-07-08 14:54:35 -07001110 nw = typ.ptrdata / sys.PtrSize
Dan Willemsen09eb3b12015-09-16 14:34:17 -07001111 } else {
1112 // Repeated instances of typ in an array.
1113 // Have to process first N-1 entries in full, but can stop
1114 // once we reach the non-pointer data in the final entry.
Dan Willemsen38f2dba2016-07-08 14:54:35 -07001115 nw = ((dataSize/typ.size-1)*typ.size + typ.ptrdata) / sys.PtrSize
Dan Willemsen09eb3b12015-09-16 14:34:17 -07001116 }
1117 if nw == 0 {
1118 // No pointers! Caller was supposed to check.
Dan Willemsen38f2dba2016-07-08 14:54:35 -07001119 println("runtime: invalid type ", typ.string())
Dan Willemsen09eb3b12015-09-16 14:34:17 -07001120 throw("heapBitsSetType: called with non-pointer type")
1121 return
1122 }
1123 if nw < 2 {
1124 // Must write at least 2 words, because the "no scan"
1125 // encoding doesn't take effect until the third word.
1126 nw = 2
1127 }
1128
1129 // Phase 1: Special case for leading byte (shift==0) or half-byte (shift==4).
Dan Willemsen38f2dba2016-07-08 14:54:35 -07001130 // The leading byte is special because it contains the bits for word 1,
1131 // which does not have the marked bits set.
Dan Willemsen09eb3b12015-09-16 14:34:17 -07001132 // The leading half-byte is special because it's a half a byte and must be
1133 // manipulated atomically.
1134 switch {
1135 default:
1136 throw("heapBitsSetType: unexpected shift")
1137
1138 case h.shift == 0:
1139 // Ptrmask and heap bitmap are aligned.
1140 // Handle first byte of bitmap specially.
Dan Willemsen38f2dba2016-07-08 14:54:35 -07001141 //
1142 // The first byte we write out covers the first four
1143 // words of the object. The scan/dead bit on the first
1144 // word must be set to scan since there are pointers
1145 // somewhere in the object. The scan/dead bit on the
1146 // second word is the checkmark, so we don't set it.
1147 // In all following words, we set the scan/dead
1148 // appropriately to indicate that the object contains
1149 // to the next 2-bit entry in the bitmap.
1150 //
1151 // TODO: It doesn't matter if we set the checkmark, so
1152 // maybe this case isn't needed any more.
Dan Willemsen09eb3b12015-09-16 14:34:17 -07001153 hb = b & bitPointerAll
Dan Willemsen38f2dba2016-07-08 14:54:35 -07001154 hb |= bitMarked | bitMarked<<(2*heapBitsShift) | bitMarked<<(3*heapBitsShift)
Dan Willemsen09eb3b12015-09-16 14:34:17 -07001155 if w += 4; w >= nw {
1156 goto Phase3
1157 }
1158 *hbitp = uint8(hb)
1159 hbitp = subtract1(hbitp)
1160 b >>= 4
1161 nb -= 4
1162
Dan Willemsen38f2dba2016-07-08 14:54:35 -07001163 case sys.PtrSize == 8 && h.shift == 2:
Dan Willemsen09eb3b12015-09-16 14:34:17 -07001164 // Ptrmask and heap bitmap are misaligned.
1165 // The bits for the first two words are in a byte shared with another object
1166 // and must be updated atomically.
1167 // NOTE(rsc): The atomic here may not be necessary.
1168 // We took care of 1-word and 2-word objects above,
1169 // so this is at least a 6-word object, so our start bits
1170 // are shared only with the type bits of another object,
1171 // not with its mark bit. Since there is only one allocation
1172 // from a given span at a time, we should be able to set
1173 // these bits non-atomically. Not worth the risk right now.
Dan Willemsen38f2dba2016-07-08 14:54:35 -07001174 hb = (b & (bitPointer | bitPointer<<heapBitsShift)) << (2 * heapBitsShift)
1175 // This is not noscan, so set the scan bit in the
1176 // first word.
1177 hb |= bitMarked << (2 * heapBitsShift)
Dan Willemsen09eb3b12015-09-16 14:34:17 -07001178 b >>= 2
1179 nb -= 2
Dan Willemsen38f2dba2016-07-08 14:54:35 -07001180 // Note: no bitMarker for second word because that's
1181 // the checkmark.
Dan Willemsen09eb3b12015-09-16 14:34:17 -07001182 if gcphase == _GCoff {
Dan Willemsen38f2dba2016-07-08 14:54:35 -07001183 *hbitp &^= uint8((bitPointer | bitMarked | (bitPointer << heapBitsShift)) << (2 * heapBitsShift))
Dan Willemsen09eb3b12015-09-16 14:34:17 -07001184 *hbitp |= uint8(hb)
1185 } else {
Dan Willemsen38f2dba2016-07-08 14:54:35 -07001186 atomic.And8(hbitp, ^(uint8(bitPointer|bitMarked|bitPointer<<heapBitsShift) << (2 * heapBitsShift)))
1187 atomic.Or8(hbitp, uint8(hb))
Dan Willemsen09eb3b12015-09-16 14:34:17 -07001188 }
1189 hbitp = subtract1(hbitp)
1190 if w += 2; w >= nw {
1191 // We know that there is more data, because we handled 2-word objects above.
1192 // This must be at least a 6-word object. If we're out of pointer words,
1193 // mark no scan in next bitmap byte and finish.
1194 hb = 0
1195 w += 4
1196 goto Phase3
1197 }
1198 }
1199
1200 // Phase 2: Full bytes in bitmap, up to but not including write to last byte (full or partial) in bitmap.
1201 // The loop computes the bits for that last write but does not execute the write;
1202 // it leaves the bits in hb for processing by phase 3.
1203 // To avoid repeated adjustment of nb, we subtract out the 4 bits we're going to
1204 // use in the first half of the loop right now, and then we only adjust nb explicitly
1205 // if the 8 bits used by each iteration isn't balanced by 8 bits loaded mid-loop.
1206 nb -= 4
1207 for {
1208 // Emit bitmap byte.
1209 // b has at least nb+4 bits, with one exception:
1210 // if w+4 >= nw, then b has only nw-w bits,
1211 // but we'll stop at the break and then truncate
1212 // appropriately in Phase 3.
1213 hb = b & bitPointerAll
1214 hb |= bitMarkedAll
1215 if w += 4; w >= nw {
1216 break
1217 }
1218 *hbitp = uint8(hb)
1219 hbitp = subtract1(hbitp)
1220 b >>= 4
1221
1222 // Load more bits. b has nb right now.
1223 if p != endp {
1224 // Fast path: keep reading from ptrmask.
1225 // nb unmodified: we just loaded 8 bits,
1226 // and the next iteration will consume 8 bits,
1227 // leaving us with the same nb the next time we're here.
1228 if nb < 8 {
1229 b |= uintptr(*p) << nb
1230 p = add1(p)
1231 } else {
1232 // Reduce the number of bits in b.
1233 // This is important if we skipped
1234 // over a scalar tail, since nb could
1235 // be larger than the bit width of b.
1236 nb -= 8
1237 }
1238 } else if p == nil {
1239 // Almost as fast path: track bit count and refill from pbits.
1240 // For short repetitions.
1241 if nb < 8 {
1242 b |= pbits << nb
1243 nb += endnb
1244 }
1245 nb -= 8 // for next iteration
1246 } else {
1247 // Slow path: reached end of ptrmask.
1248 // Process final partial byte and rewind to start.
1249 b |= uintptr(*p) << nb
1250 nb += endnb
1251 if nb < 8 {
1252 b |= uintptr(*ptrmask) << nb
1253 p = add1(ptrmask)
1254 } else {
1255 nb -= 8
1256 p = ptrmask
1257 }
1258 }
1259
1260 // Emit bitmap byte.
1261 hb = b & bitPointerAll
1262 hb |= bitMarkedAll
1263 if w += 4; w >= nw {
1264 break
1265 }
1266 *hbitp = uint8(hb)
1267 hbitp = subtract1(hbitp)
1268 b >>= 4
1269 }
1270
1271Phase3:
1272 // Phase 3: Write last byte or partial byte and zero the rest of the bitmap entries.
1273 if w > nw {
1274 // Counting the 4 entries in hb not yet written to memory,
1275 // there are more entries than possible pointer slots.
1276 // Discard the excess entries (can't be more than 3).
1277 mask := uintptr(1)<<(4-(w-nw)) - 1
1278 hb &= mask | mask<<4 // apply mask to both pointer bits and mark bits
1279 }
1280
1281 // Change nw from counting possibly-pointer words to total words in allocation.
Dan Willemsen38f2dba2016-07-08 14:54:35 -07001282 nw = size / sys.PtrSize
Dan Willemsen09eb3b12015-09-16 14:34:17 -07001283
1284 // Write whole bitmap bytes.
1285 // The first is hb, the rest are zero.
1286 if w <= nw {
1287 *hbitp = uint8(hb)
1288 hbitp = subtract1(hbitp)
1289 hb = 0 // for possible final half-byte below
1290 for w += 4; w <= nw; w += 4 {
1291 *hbitp = 0
1292 hbitp = subtract1(hbitp)
1293 }
1294 }
1295
1296 // Write final partial bitmap byte if any.
1297 // We know w > nw, or else we'd still be in the loop above.
1298 // It can be bigger only due to the 4 entries in hb that it counts.
1299 // If w == nw+4 then there's nothing left to do: we wrote all nw entries
1300 // and can discard the 4 sitting in hb.
1301 // But if w == nw+2, we need to write first two in hb.
1302 // The byte is shared with the next object so we may need an atomic.
1303 if w == nw+2 {
1304 if gcphase == _GCoff {
1305 *hbitp = *hbitp&^(bitPointer|bitMarked|(bitPointer|bitMarked)<<heapBitsShift) | uint8(hb)
1306 } else {
Dan Willemsen38f2dba2016-07-08 14:54:35 -07001307 atomic.And8(hbitp, ^uint8(bitPointer|bitMarked|(bitPointer|bitMarked)<<heapBitsShift))
1308 atomic.Or8(hbitp, uint8(hb))
Dan Willemsen09eb3b12015-09-16 14:34:17 -07001309 }
1310 }
1311
1312Phase4:
1313 // Phase 4: all done, but perhaps double check.
1314 if doubleCheck {
1315 end := heapBitsForAddr(x + size)
1316 if typ.kind&kindGCProg == 0 && (hbitp != end.bitp || (w == nw+2) != (end.shift == 2)) {
Dan Willemsen38f2dba2016-07-08 14:54:35 -07001317 println("ended at wrong bitmap byte for", typ.string(), "x", dataSize/typ.size)
Dan Willemsen09eb3b12015-09-16 14:34:17 -07001318 print("typ.size=", typ.size, " typ.ptrdata=", typ.ptrdata, " dataSize=", dataSize, " size=", size, "\n")
1319 print("w=", w, " nw=", nw, " b=", hex(b), " nb=", nb, " hb=", hex(hb), "\n")
1320 h0 := heapBitsForAddr(x)
1321 print("initial bits h0.bitp=", h0.bitp, " h0.shift=", h0.shift, "\n")
1322 print("ended at hbitp=", hbitp, " but next starts at bitp=", end.bitp, " shift=", end.shift, "\n")
1323 throw("bad heapBitsSetType")
1324 }
1325
1326 // Double-check that bits to be written were written correctly.
1327 // Does not check that other bits were not written, unfortunately.
1328 h := heapBitsForAddr(x)
Dan Willemsen38f2dba2016-07-08 14:54:35 -07001329 nptr := typ.ptrdata / sys.PtrSize
1330 ndata := typ.size / sys.PtrSize
Dan Willemsen09eb3b12015-09-16 14:34:17 -07001331 count := dataSize / typ.size
Dan Willemsen38f2dba2016-07-08 14:54:35 -07001332 totalptr := ((count-1)*typ.size + typ.ptrdata) / sys.PtrSize
1333 for i := uintptr(0); i < size/sys.PtrSize; i++ {
Dan Willemsen09eb3b12015-09-16 14:34:17 -07001334 j := i % ndata
1335 var have, want uint8
1336 have = (*h.bitp >> h.shift) & (bitPointer | bitMarked)
1337 if i >= totalptr {
1338 want = 0 // deadmarker
1339 if typ.kind&kindGCProg != 0 && i < (totalptr+3)/4*4 {
1340 want = bitMarked
1341 }
1342 } else {
1343 if j < nptr && (*addb(ptrmask, j/8)>>(j%8))&1 != 0 {
1344 want |= bitPointer
1345 }
Dan Willemsen38f2dba2016-07-08 14:54:35 -07001346 if i != 1 {
Dan Willemsen09eb3b12015-09-16 14:34:17 -07001347 want |= bitMarked
1348 } else {
1349 have &^= bitMarked
1350 }
1351 }
1352 if have != want {
Dan Willemsen38f2dba2016-07-08 14:54:35 -07001353 println("mismatch writing bits for", typ.string(), "x", dataSize/typ.size)
Dan Willemsen09eb3b12015-09-16 14:34:17 -07001354 print("typ.size=", typ.size, " typ.ptrdata=", typ.ptrdata, " dataSize=", dataSize, " size=", size, "\n")
1355 print("kindGCProg=", typ.kind&kindGCProg != 0, "\n")
1356 print("w=", w, " nw=", nw, " b=", hex(b), " nb=", nb, " hb=", hex(hb), "\n")
1357 h0 := heapBitsForAddr(x)
1358 print("initial bits h0.bitp=", h0.bitp, " h0.shift=", h0.shift, "\n")
1359 print("current bits h.bitp=", h.bitp, " h.shift=", h.shift, " *h.bitp=", hex(*h.bitp), "\n")
1360 print("ptrmask=", ptrmask, " p=", p, " endp=", endp, " endnb=", endnb, " pbits=", hex(pbits), " b=", hex(b), " nb=", nb, "\n")
Dan Willemsen38f2dba2016-07-08 14:54:35 -07001361 println("at word", i, "offset", i*sys.PtrSize, "have", have, "want", want)
Dan Willemsen09eb3b12015-09-16 14:34:17 -07001362 if typ.kind&kindGCProg != 0 {
1363 println("GC program:")
1364 dumpGCProg(addb(typ.gcdata, 4))
1365 }
1366 throw("bad heapBitsSetType")
1367 }
1368 h = h.next()
1369 }
1370 if ptrmask == debugPtrmask.data {
1371 unlock(&debugPtrmask.lock)
1372 }
1373 }
1374}
1375
Dan Willemsen38f2dba2016-07-08 14:54:35 -07001376// heapBitsSetTypeNoScan marks x as noscan by setting the first word
1377// of x in the heap bitmap to scalar/dead.
1378func heapBitsSetTypeNoScan(x uintptr) {
1379 h := heapBitsForAddr(uintptr(x))
1380 *h.bitp &^= (bitPointer | bitMarked) << h.shift
1381}
1382
Dan Willemsen09eb3b12015-09-16 14:34:17 -07001383var debugPtrmask struct {
1384 lock mutex
1385 data *byte
1386}
1387
1388// heapBitsSetTypeGCProg implements heapBitsSetType using a GC program.
1389// progSize is the size of the memory described by the program.
1390// elemSize is the size of the element that the GC program describes (a prefix of).
1391// dataSize is the total size of the intended data, a multiple of elemSize.
1392// allocSize is the total size of the allocated memory.
1393//
1394// GC programs are only used for large allocations.
1395// heapBitsSetType requires that allocSize is a multiple of 4 words,
1396// so that the relevant bitmap bytes are not shared with surrounding
1397// objects and need not be accessed with atomic instructions.
1398func heapBitsSetTypeGCProg(h heapBits, progSize, elemSize, dataSize, allocSize uintptr, prog *byte) {
Dan Willemsen38f2dba2016-07-08 14:54:35 -07001399 if sys.PtrSize == 8 && allocSize%(4*sys.PtrSize) != 0 {
Dan Willemsen09eb3b12015-09-16 14:34:17 -07001400 // Alignment will be wrong.
1401 throw("heapBitsSetTypeGCProg: small allocation")
1402 }
1403 var totalBits uintptr
1404 if elemSize == dataSize {
1405 totalBits = runGCProg(prog, nil, h.bitp, 2)
Dan Willemsen38f2dba2016-07-08 14:54:35 -07001406 if totalBits*sys.PtrSize != progSize {
Dan Willemsen09eb3b12015-09-16 14:34:17 -07001407 println("runtime: heapBitsSetTypeGCProg: total bits", totalBits, "but progSize", progSize)
1408 throw("heapBitsSetTypeGCProg: unexpected bit count")
1409 }
1410 } else {
1411 count := dataSize / elemSize
1412
1413 // Piece together program trailer to run after prog that does:
1414 // literal(0)
1415 // repeat(1, elemSize-progSize-1) // zeros to fill element size
1416 // repeat(elemSize, count-1) // repeat that element for count
1417 // This zero-pads the data remaining in the first element and then
1418 // repeats that first element to fill the array.
1419 var trailer [40]byte // 3 varints (max 10 each) + some bytes
1420 i := 0
Dan Willemsen38f2dba2016-07-08 14:54:35 -07001421 if n := elemSize/sys.PtrSize - progSize/sys.PtrSize; n > 0 {
Dan Willemsen09eb3b12015-09-16 14:34:17 -07001422 // literal(0)
1423 trailer[i] = 0x01
1424 i++
1425 trailer[i] = 0
1426 i++
1427 if n > 1 {
1428 // repeat(1, n-1)
1429 trailer[i] = 0x81
1430 i++
1431 n--
1432 for ; n >= 0x80; n >>= 7 {
1433 trailer[i] = byte(n | 0x80)
1434 i++
1435 }
1436 trailer[i] = byte(n)
1437 i++
1438 }
1439 }
1440 // repeat(elemSize/ptrSize, count-1)
1441 trailer[i] = 0x80
1442 i++
Dan Willemsen38f2dba2016-07-08 14:54:35 -07001443 n := elemSize / sys.PtrSize
Dan Willemsen09eb3b12015-09-16 14:34:17 -07001444 for ; n >= 0x80; n >>= 7 {
1445 trailer[i] = byte(n | 0x80)
1446 i++
1447 }
1448 trailer[i] = byte(n)
1449 i++
1450 n = count - 1
1451 for ; n >= 0x80; n >>= 7 {
1452 trailer[i] = byte(n | 0x80)
1453 i++
1454 }
1455 trailer[i] = byte(n)
1456 i++
1457 trailer[i] = 0
1458 i++
1459
1460 runGCProg(prog, &trailer[0], h.bitp, 2)
1461
1462 // Even though we filled in the full array just now,
1463 // record that we only filled in up to the ptrdata of the
1464 // last element. This will cause the code below to
1465 // memclr the dead section of the final array element,
1466 // so that scanobject can stop early in the final element.
Dan Willemsen38f2dba2016-07-08 14:54:35 -07001467 totalBits = (elemSize*(count-1) + progSize) / sys.PtrSize
Dan Willemsen09eb3b12015-09-16 14:34:17 -07001468 }
1469 endProg := unsafe.Pointer(subtractb(h.bitp, (totalBits+3)/4))
1470 endAlloc := unsafe.Pointer(subtractb(h.bitp, allocSize/heapBitmapScale))
1471 memclr(add(endAlloc, 1), uintptr(endProg)-uintptr(endAlloc))
1472}
1473
1474// progToPointerMask returns the 1-bit pointer mask output by the GC program prog.
1475// size the size of the region described by prog, in bytes.
Dan Willemsen38f2dba2016-07-08 14:54:35 -07001476// The resulting bitvector will have no more than size/sys.PtrSize bits.
Dan Willemsen09eb3b12015-09-16 14:34:17 -07001477func progToPointerMask(prog *byte, size uintptr) bitvector {
Dan Willemsen38f2dba2016-07-08 14:54:35 -07001478 n := (size/sys.PtrSize + 7) / 8
Dan Willemsen09eb3b12015-09-16 14:34:17 -07001479 x := (*[1 << 30]byte)(persistentalloc(n+1, 1, &memstats.buckhash_sys))[:n+1]
1480 x[len(x)-1] = 0xa1 // overflow check sentinel
1481 n = runGCProg(prog, nil, &x[0], 1)
1482 if x[len(x)-1] != 0xa1 {
1483 throw("progToPointerMask: overflow")
1484 }
1485 return bitvector{int32(n), &x[0]}
1486}
1487
1488// Packed GC pointer bitmaps, aka GC programs.
1489//
1490// For large types containing arrays, the type information has a
1491// natural repetition that can be encoded to save space in the
1492// binary and in the memory representation of the type information.
1493//
1494// The encoding is a simple Lempel-Ziv style bytecode machine
1495// with the following instructions:
1496//
1497// 00000000: stop
1498// 0nnnnnnn: emit n bits copied from the next (n+7)/8 bytes
1499// 10000000 n c: repeat the previous n bits c times; n, c are varints
1500// 1nnnnnnn c: repeat the previous n bits c times; c is a varint
1501
1502// runGCProg executes the GC program prog, and then trailer if non-nil,
1503// writing to dst with entries of the given size.
1504// If size == 1, dst is a 1-bit pointer mask laid out moving forward from dst.
1505// If size == 2, dst is the 2-bit heap bitmap, and writes move backward
1506// starting at dst (because the heap bitmap does). In this case, the caller guarantees
1507// that only whole bytes in dst need to be written.
1508//
1509// runGCProg returns the number of 1- or 2-bit entries written to memory.
1510func runGCProg(prog, trailer, dst *byte, size int) uintptr {
1511 dstStart := dst
1512
1513 // Bits waiting to be written to memory.
1514 var bits uintptr
1515 var nbits uintptr
1516
1517 p := prog
1518Run:
1519 for {
1520 // Flush accumulated full bytes.
1521 // The rest of the loop assumes that nbits <= 7.
1522 for ; nbits >= 8; nbits -= 8 {
1523 if size == 1 {
1524 *dst = uint8(bits)
1525 dst = add1(dst)
1526 bits >>= 8
1527 } else {
1528 v := bits&bitPointerAll | bitMarkedAll
1529 *dst = uint8(v)
1530 dst = subtract1(dst)
1531 bits >>= 4
1532 v = bits&bitPointerAll | bitMarkedAll
1533 *dst = uint8(v)
1534 dst = subtract1(dst)
1535 bits >>= 4
1536 }
1537 }
1538
1539 // Process one instruction.
1540 inst := uintptr(*p)
1541 p = add1(p)
1542 n := inst & 0x7F
1543 if inst&0x80 == 0 {
1544 // Literal bits; n == 0 means end of program.
1545 if n == 0 {
1546 // Program is over; continue in trailer if present.
1547 if trailer != nil {
1548 //println("trailer")
1549 p = trailer
1550 trailer = nil
1551 continue
1552 }
1553 //println("done")
1554 break Run
1555 }
1556 //println("lit", n, dst)
1557 nbyte := n / 8
1558 for i := uintptr(0); i < nbyte; i++ {
1559 bits |= uintptr(*p) << nbits
1560 p = add1(p)
1561 if size == 1 {
1562 *dst = uint8(bits)
1563 dst = add1(dst)
1564 bits >>= 8
1565 } else {
1566 v := bits&0xf | bitMarkedAll
1567 *dst = uint8(v)
1568 dst = subtract1(dst)
1569 bits >>= 4
1570 v = bits&0xf | bitMarkedAll
1571 *dst = uint8(v)
1572 dst = subtract1(dst)
1573 bits >>= 4
1574 }
1575 }
1576 if n %= 8; n > 0 {
1577 bits |= uintptr(*p) << nbits
1578 p = add1(p)
1579 nbits += n
1580 }
1581 continue Run
1582 }
1583
1584 // Repeat. If n == 0, it is encoded in a varint in the next bytes.
1585 if n == 0 {
1586 for off := uint(0); ; off += 7 {
1587 x := uintptr(*p)
1588 p = add1(p)
1589 n |= (x & 0x7F) << off
1590 if x&0x80 == 0 {
1591 break
1592 }
1593 }
1594 }
1595
1596 // Count is encoded in a varint in the next bytes.
1597 c := uintptr(0)
1598 for off := uint(0); ; off += 7 {
1599 x := uintptr(*p)
1600 p = add1(p)
1601 c |= (x & 0x7F) << off
1602 if x&0x80 == 0 {
1603 break
1604 }
1605 }
1606 c *= n // now total number of bits to copy
1607
1608 // If the number of bits being repeated is small, load them
1609 // into a register and use that register for the entire loop
1610 // instead of repeatedly reading from memory.
1611 // Handling fewer than 8 bits here makes the general loop simpler.
Dan Willemsen38f2dba2016-07-08 14:54:35 -07001612 // The cutoff is sys.PtrSize*8 - 7 to guarantee that when we add
Dan Willemsen09eb3b12015-09-16 14:34:17 -07001613 // the pattern to a bit buffer holding at most 7 bits (a partial byte)
1614 // it will not overflow.
1615 src := dst
Dan Willemsen38f2dba2016-07-08 14:54:35 -07001616 const maxBits = sys.PtrSize*8 - 7
Dan Willemsen09eb3b12015-09-16 14:34:17 -07001617 if n <= maxBits {
1618 // Start with bits in output buffer.
1619 pattern := bits
1620 npattern := nbits
1621
1622 // If we need more bits, fetch them from memory.
1623 if size == 1 {
1624 src = subtract1(src)
1625 for npattern < n {
1626 pattern <<= 8
1627 pattern |= uintptr(*src)
1628 src = subtract1(src)
1629 npattern += 8
1630 }
1631 } else {
1632 src = add1(src)
1633 for npattern < n {
1634 pattern <<= 4
1635 pattern |= uintptr(*src) & 0xf
1636 src = add1(src)
1637 npattern += 4
1638 }
1639 }
1640
1641 // We started with the whole bit output buffer,
1642 // and then we loaded bits from whole bytes.
1643 // Either way, we might now have too many instead of too few.
1644 // Discard the extra.
1645 if npattern > n {
1646 pattern >>= npattern - n
1647 npattern = n
1648 }
1649
1650 // Replicate pattern to at most maxBits.
1651 if npattern == 1 {
1652 // One bit being repeated.
1653 // If the bit is 1, make the pattern all 1s.
1654 // If the bit is 0, the pattern is already all 0s,
1655 // but we can claim that the number of bits
1656 // in the word is equal to the number we need (c),
1657 // because right shift of bits will zero fill.
1658 if pattern == 1 {
1659 pattern = 1<<maxBits - 1
1660 npattern = maxBits
1661 } else {
1662 npattern = c
1663 }
1664 } else {
1665 b := pattern
1666 nb := npattern
1667 if nb+nb <= maxBits {
1668 // Double pattern until the whole uintptr is filled.
Dan Willemsen38f2dba2016-07-08 14:54:35 -07001669 for nb <= sys.PtrSize*8 {
Dan Willemsen09eb3b12015-09-16 14:34:17 -07001670 b |= b << nb
1671 nb += nb
1672 }
1673 // Trim away incomplete copy of original pattern in high bits.
1674 // TODO(rsc): Replace with table lookup or loop on systems without divide?
1675 nb = maxBits / npattern * npattern
1676 b &= 1<<nb - 1
1677 pattern = b
1678 npattern = nb
1679 }
1680 }
1681
1682 // Add pattern to bit buffer and flush bit buffer, c/npattern times.
1683 // Since pattern contains >8 bits, there will be full bytes to flush
1684 // on each iteration.
1685 for ; c >= npattern; c -= npattern {
1686 bits |= pattern << nbits
1687 nbits += npattern
1688 if size == 1 {
1689 for nbits >= 8 {
1690 *dst = uint8(bits)
1691 dst = add1(dst)
1692 bits >>= 8
1693 nbits -= 8
1694 }
1695 } else {
1696 for nbits >= 4 {
1697 *dst = uint8(bits&0xf | bitMarkedAll)
1698 dst = subtract1(dst)
1699 bits >>= 4
1700 nbits -= 4
1701 }
1702 }
1703 }
1704
1705 // Add final fragment to bit buffer.
1706 if c > 0 {
1707 pattern &= 1<<c - 1
1708 bits |= pattern << nbits
1709 nbits += c
1710 }
1711 continue Run
1712 }
1713
1714 // Repeat; n too large to fit in a register.
1715 // Since nbits <= 7, we know the first few bytes of repeated data
1716 // are already written to memory.
1717 off := n - nbits // n > nbits because n > maxBits and nbits <= 7
1718 if size == 1 {
1719 // Leading src fragment.
1720 src = subtractb(src, (off+7)/8)
1721 if frag := off & 7; frag != 0 {
1722 bits |= uintptr(*src) >> (8 - frag) << nbits
1723 src = add1(src)
1724 nbits += frag
1725 c -= frag
1726 }
1727 // Main loop: load one byte, write another.
1728 // The bits are rotating through the bit buffer.
1729 for i := c / 8; i > 0; i-- {
1730 bits |= uintptr(*src) << nbits
1731 src = add1(src)
1732 *dst = uint8(bits)
1733 dst = add1(dst)
1734 bits >>= 8
1735 }
1736 // Final src fragment.
1737 if c %= 8; c > 0 {
1738 bits |= (uintptr(*src) & (1<<c - 1)) << nbits
1739 nbits += c
1740 }
1741 } else {
1742 // Leading src fragment.
1743 src = addb(src, (off+3)/4)
1744 if frag := off & 3; frag != 0 {
1745 bits |= (uintptr(*src) & 0xf) >> (4 - frag) << nbits
1746 src = subtract1(src)
1747 nbits += frag
1748 c -= frag
1749 }
1750 // Main loop: load one byte, write another.
1751 // The bits are rotating through the bit buffer.
1752 for i := c / 4; i > 0; i-- {
1753 bits |= (uintptr(*src) & 0xf) << nbits
1754 src = subtract1(src)
1755 *dst = uint8(bits&0xf | bitMarkedAll)
1756 dst = subtract1(dst)
1757 bits >>= 4
1758 }
1759 // Final src fragment.
1760 if c %= 4; c > 0 {
1761 bits |= (uintptr(*src) & (1<<c - 1)) << nbits
1762 nbits += c
1763 }
1764 }
1765 }
1766
1767 // Write any final bits out, using full-byte writes, even for the final byte.
1768 var totalBits uintptr
1769 if size == 1 {
1770 totalBits = (uintptr(unsafe.Pointer(dst))-uintptr(unsafe.Pointer(dstStart)))*8 + nbits
1771 nbits += -nbits & 7
1772 for ; nbits > 0; nbits -= 8 {
1773 *dst = uint8(bits)
1774 dst = add1(dst)
1775 bits >>= 8
1776 }
1777 } else {
1778 totalBits = (uintptr(unsafe.Pointer(dstStart))-uintptr(unsafe.Pointer(dst)))*4 + nbits
1779 nbits += -nbits & 3
1780 for ; nbits > 0; nbits -= 4 {
1781 v := bits&0xf | bitMarkedAll
1782 *dst = uint8(v)
1783 dst = subtract1(dst)
1784 bits >>= 4
1785 }
Dan Willemsen09eb3b12015-09-16 14:34:17 -07001786 }
1787 return totalBits
1788}
1789
1790func dumpGCProg(p *byte) {
1791 nptr := 0
1792 for {
1793 x := *p
1794 p = add1(p)
1795 if x == 0 {
1796 print("\t", nptr, " end\n")
1797 break
1798 }
1799 if x&0x80 == 0 {
1800 print("\t", nptr, " lit ", x, ":")
1801 n := int(x+7) / 8
1802 for i := 0; i < n; i++ {
1803 print(" ", hex(*p))
1804 p = add1(p)
1805 }
1806 print("\n")
1807 nptr += int(x)
1808 } else {
1809 nbit := int(x &^ 0x80)
1810 if nbit == 0 {
1811 for nb := uint(0); ; nb += 7 {
1812 x := *p
1813 p = add1(p)
1814 nbit |= int(x&0x7f) << nb
1815 if x&0x80 == 0 {
1816 break
1817 }
1818 }
1819 }
1820 count := 0
1821 for nb := uint(0); ; nb += 7 {
1822 x := *p
1823 p = add1(p)
1824 count |= int(x&0x7f) << nb
1825 if x&0x80 == 0 {
1826 break
1827 }
1828 }
1829 print("\t", nptr, " repeat ", nbit, " × ", count, "\n")
1830 nptr += nbit * count
1831 }
1832 }
1833}
1834
1835// Testing.
1836
1837func getgcmaskcb(frame *stkframe, ctxt unsafe.Pointer) bool {
1838 target := (*stkframe)(ctxt)
1839 if frame.sp <= target.sp && target.sp < frame.varp {
1840 *target = *frame
1841 return false
1842 }
1843 return true
1844}
1845
1846// gcbits returns the GC type info for x, for testing.
1847// The result is the bitmap entries (0 or 1), one entry per byte.
1848//go:linkname reflect_gcbits reflect.gcbits
1849func reflect_gcbits(x interface{}) []byte {
1850 ret := getgcmask(x)
Dan Willemsen38f2dba2016-07-08 14:54:35 -07001851 typ := (*ptrtype)(unsafe.Pointer(efaceOf(&x)._type)).elem
1852 nptr := typ.ptrdata / sys.PtrSize
Dan Willemsen09eb3b12015-09-16 14:34:17 -07001853 for uintptr(len(ret)) > nptr && ret[len(ret)-1] == 0 {
1854 ret = ret[:len(ret)-1]
1855 }
1856 return ret
1857}
1858
1859// Returns GC type info for object p for testing.
1860func getgcmask(ep interface{}) (mask []byte) {
Dan Willemsen38f2dba2016-07-08 14:54:35 -07001861 e := *efaceOf(&ep)
Dan Willemsen09eb3b12015-09-16 14:34:17 -07001862 p := e.data
1863 t := e._type
1864 // data or bss
1865 for datap := &firstmoduledata; datap != nil; datap = datap.next {
1866 // data
1867 if datap.data <= uintptr(p) && uintptr(p) < datap.edata {
1868 bitmap := datap.gcdatamask.bytedata
1869 n := (*ptrtype)(unsafe.Pointer(t)).elem.size
Dan Willemsen38f2dba2016-07-08 14:54:35 -07001870 mask = make([]byte, n/sys.PtrSize)
1871 for i := uintptr(0); i < n; i += sys.PtrSize {
1872 off := (uintptr(p) + i - datap.data) / sys.PtrSize
1873 mask[i/sys.PtrSize] = (*addb(bitmap, off/8) >> (off % 8)) & 1
Dan Willemsen09eb3b12015-09-16 14:34:17 -07001874 }
1875 return
1876 }
1877
1878 // bss
1879 if datap.bss <= uintptr(p) && uintptr(p) < datap.ebss {
1880 bitmap := datap.gcbssmask.bytedata
1881 n := (*ptrtype)(unsafe.Pointer(t)).elem.size
Dan Willemsen38f2dba2016-07-08 14:54:35 -07001882 mask = make([]byte, n/sys.PtrSize)
1883 for i := uintptr(0); i < n; i += sys.PtrSize {
1884 off := (uintptr(p) + i - datap.bss) / sys.PtrSize
1885 mask[i/sys.PtrSize] = (*addb(bitmap, off/8) >> (off % 8)) & 1
Dan Willemsen09eb3b12015-09-16 14:34:17 -07001886 }
1887 return
1888 }
1889 }
1890
1891 // heap
1892 var n uintptr
1893 var base uintptr
1894 if mlookup(uintptr(p), &base, &n, nil) != 0 {
Dan Willemsen38f2dba2016-07-08 14:54:35 -07001895 mask = make([]byte, n/sys.PtrSize)
1896 for i := uintptr(0); i < n; i += sys.PtrSize {
Dan Willemsen09eb3b12015-09-16 14:34:17 -07001897 hbits := heapBitsForAddr(base + i)
1898 if hbits.isPointer() {
Dan Willemsen38f2dba2016-07-08 14:54:35 -07001899 mask[i/sys.PtrSize] = 1
Dan Willemsen09eb3b12015-09-16 14:34:17 -07001900 }
Dan Willemsen38f2dba2016-07-08 14:54:35 -07001901 if i != 1*sys.PtrSize && !hbits.morePointers() {
1902 mask = mask[:i/sys.PtrSize]
Dan Willemsen09eb3b12015-09-16 14:34:17 -07001903 break
1904 }
1905 }
1906 return
1907 }
1908
1909 // stack
1910 if _g_ := getg(); _g_.m.curg.stack.lo <= uintptr(p) && uintptr(p) < _g_.m.curg.stack.hi {
1911 var frame stkframe
1912 frame.sp = uintptr(p)
1913 _g_ := getg()
1914 gentraceback(_g_.m.curg.sched.pc, _g_.m.curg.sched.sp, 0, _g_.m.curg, 0, nil, 1000, getgcmaskcb, noescape(unsafe.Pointer(&frame)), 0)
1915 if frame.fn != nil {
1916 f := frame.fn
1917 targetpc := frame.continpc
1918 if targetpc == 0 {
1919 return
1920 }
1921 if targetpc != f.entry {
1922 targetpc--
1923 }
Dan Willemsen38f2dba2016-07-08 14:54:35 -07001924 pcdata := pcdatavalue(f, _PCDATA_StackMapIndex, targetpc, nil)
Dan Willemsen09eb3b12015-09-16 14:34:17 -07001925 if pcdata == -1 {
1926 return
1927 }
1928 stkmap := (*stackmap)(funcdata(f, _FUNCDATA_LocalsPointerMaps))
1929 if stkmap == nil || stkmap.n <= 0 {
1930 return
1931 }
1932 bv := stackmapdata(stkmap, pcdata)
Dan Willemsen38f2dba2016-07-08 14:54:35 -07001933 size := uintptr(bv.n) * sys.PtrSize
Dan Willemsen09eb3b12015-09-16 14:34:17 -07001934 n := (*ptrtype)(unsafe.Pointer(t)).elem.size
Dan Willemsen38f2dba2016-07-08 14:54:35 -07001935 mask = make([]byte, n/sys.PtrSize)
1936 for i := uintptr(0); i < n; i += sys.PtrSize {
Dan Willemsen09eb3b12015-09-16 14:34:17 -07001937 bitmap := bv.bytedata
Dan Willemsen38f2dba2016-07-08 14:54:35 -07001938 off := (uintptr(p) + i - frame.varp + size) / sys.PtrSize
1939 mask[i/sys.PtrSize] = (*addb(bitmap, off/8) >> (off % 8)) & 1
Dan Willemsen09eb3b12015-09-16 14:34:17 -07001940 }
1941 }
1942 return
1943 }
1944
1945 // otherwise, not something the GC knows about.
1946 // possibly read-only data, like malloc(0).
1947 // must not have pointers
1948 return
1949}