Blame - src/runtime/mbitmap.go - platform/prebuilts/go/darwin-x86

blob: ccefbcd8d62df23e3b08b051fd013d846cefcf9d [file] [log] [blame]

Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	1	// Copyright 2009 The Go Authors. All rights reserved.
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	2	// Use of this source code is governed by a BSD-style
				3	// license that can be found in the LICENSE file.
				4
				5	// Garbage collector: type and heap bitmaps.
				6	//
				7	// Stack, data, and bss bitmaps
				8	//
				9	// Stack frames and global variables in the data and bss sections are described
				10	// by 1-bit bitmaps in which 0 means uninteresting and 1 means live pointer
				11	// to be visited during GC. The bits in each byte are consumed starting with
				12	// the low bit: 1<<0, 1<<1, and so on.
				13	//
				14	// Heap bitmap
				15	//
				16	// The allocated heap comes from a subset of the memory in the range [start, used),
				17	// where start == mheap_.arena_start and used == mheap_.arena_used.
				18	// The heap bitmap comprises 2 bits for each pointer-sized word in that range,
				19	// stored in bytes indexed backward in memory from start.
				20	// That is, the byte at address start-1 holds the 2-bit entries for the four words
				21	// start through start+3*ptrSize, the byte at start-2 holds the entries for
				22	// start+4ptrSize through start+7ptrSize, and so on.
				23	//
				24	// In each 2-bit entry, the lower bit holds the same information as in the 1-bit
				25	// bitmaps: 0 means uninteresting and 1 means live pointer to be visited during GC.
				26	// The meaning of the high bit depends on the position of the word being described
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	27	// in its allocated object. In all words except the second word, the
				28	// high bit indicates that the object is still being described. In
				29	// these words, if a bit pair with a high bit 0 is encountered, the
				30	// low bit can also be assumed to be 0, and the object description is
				31	// over. This 00 is called the ``dead'' encoding: it signals that the
				32	// rest of the words in the object are uninteresting to the garbage
				33	// collector.
				34	//
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	35	// In the second word, the high bit is the GC ``checkmarked'' bit (see below).
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	36	//
				37	// The 2-bit entries are split when written into the byte, so that the top half
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	38	// of the byte contains 4 high bits and the bottom half contains 4 low (pointer)
				39	// bits.
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	40	// This form allows a copy from the 1-bit to the 4-bit form to keep the
				41	// pointer bits contiguous, instead of having to space them out.
				42	//
				43	// The code makes use of the fact that the zero value for a heap bitmap
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	44	// has no live pointer bit set and is (depending on position), not used,
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	45	// not checkmarked, and is the dead encoding.
				46	// These properties must be preserved when modifying the encoding.
				47	//
				48	// Checkmarks
				49	//
				50	// In a concurrent garbage collector, one worries about failing to mark
				51	// a live object due to mutations without write barriers or bugs in the
				52	// collector implementation. As a sanity check, the GC has a 'checkmark'
				53	// mode that retraverses the object graph with the world stopped, to make
				54	// sure that everything that should be marked is marked.
				55	// In checkmark mode, in the heap bitmap, the high bit of the 2-bit entry
				56	// for the second word of the object holds the checkmark bit.
				57	// When not in checkmark mode, this bit is set to 1.
				58	//
				59	// The smallest possible allocation is 8 bytes. On a 32-bit machine, that
				60	// means every allocated object has two words, so there is room for the
				61	// checkmark bit. On a 64-bit machine, however, the 8-byte allocation is
				62	// just one word, so the second bit pair is not available for encoding the
				63	// checkmark. However, because non-pointer allocations are combined
				64	// into larger 16-byte (maxTinySize) allocations, a plain 8-byte allocation
				65	// must be a pointer, so the type bit in the first word is not actually needed.
				66	// It is still used in general, except in checkmark the type bit is repurposed
				67	// as the checkmark bit and then reinitialized (to 1) as the type bit when
				68	// finished.
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	69	//
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	70
				71	package runtime
				72
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	73	import (
				74	"runtime/internal/atomic"
				75	"runtime/internal/sys"
				76	"unsafe"
				77	)
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	78
				79	const (
				80	bitPointer = 1 << 0
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	81	bitMarked = 1 << 4 // TODO: Rename bitScan.
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	82
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	83	heapBitsShift = 1 // shift offset between successive bitPointer or bitMarked entries
				84	heapBitmapScale = sys.PtrSize * (8 / 2) // number of data bytes described by one heap bitmap byte
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	85
				86	// all mark/pointer bits in a byte
				87	bitMarkedAll = bitMarked \| bitMarked<<heapBitsShift \| bitMarked<<(2heapBitsShift) \| bitMarked<<(3heapBitsShift)
				88	bitPointerAll = bitPointer \| bitPointer<<heapBitsShift \| bitPointer<<(2heapBitsShift) \| bitPointer<<(3heapBitsShift)
				89	)
				90
				91	// addb returns the byte pointer p+n.
				92	//go:nowritebarrier
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	93	//go:nosplit
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	94	func addb(p byte, n uintptr) byte {
				95	// Note: wrote out full expression instead of calling add(p, n)
				96	// to reduce the number of temporaries generated by the
				97	// compiler for this trivial expression during inlining.
				98	return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + n))
				99	}
				100
				101	// subtractb returns the byte pointer p-n.
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	102	// subtractb is typically used when traversing the pointer tables referred to by hbits
				103	// which are arranged in reverse order.
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	104	//go:nowritebarrier
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	105	//go:nosplit
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	106	func subtractb(p byte, n uintptr) byte {
				107	// Note: wrote out full expression instead of calling add(p, -n)
				108	// to reduce the number of temporaries generated by the
				109	// compiler for this trivial expression during inlining.
				110	return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) - n))
				111	}
				112
				113	// add1 returns the byte pointer p+1.
				114	//go:nowritebarrier
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	115	//go:nosplit
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	116	func add1(p byte) byte {
				117	// Note: wrote out full expression instead of calling addb(p, 1)
				118	// to reduce the number of temporaries generated by the
				119	// compiler for this trivial expression during inlining.
				120	return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + 1))
				121	}
				122
				123	// subtract1 returns the byte pointer p-1.
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	124	// subtract1 is typically used when traversing the pointer tables referred to by hbits
				125	// which are arranged in reverse order.
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	126	//go:nowritebarrier
				127	//
				128	// nosplit because it is used during write barriers and must not be preempted.
				129	//go:nosplit
				130	func subtract1(p byte) byte {
				131	// Note: wrote out full expression instead of calling subtractb(p, 1)
				132	// to reduce the number of temporaries generated by the
				133	// compiler for this trivial expression during inlining.
				134	return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) - 1))
				135	}
				136
				137	// mHeap_MapBits is called each time arena_used is extended.
				138	// It maps any additional bitmap memory needed for the new arena memory.
				139	// It must be called with the expected new value of arena_used,
				140	// before h.arena_used has been updated.
				141	// Waiting to update arena_used until after the memory has been mapped
				142	// avoids faults when other threads try access the bitmap immediately
				143	// after observing the change to arena_used.
				144	//
				145	//go:nowritebarrier
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	146	func (h *mheap) mapBits(arena_used uintptr) {
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	147	// Caller has added extra mappings to the arena.
				148	// Add extra mappings of bitmap words as needed.
				149	// We allocate extra bitmap pieces in chunks of bitmapChunk.
				150	const bitmapChunk = 8192
				151
				152	n := (arena_used - mheap_.arena_start) / heapBitmapScale
				153	n = round(n, bitmapChunk)
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	154	n = round(n, sys.PhysPageSize)
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	155	if h.bitmap_mapped >= n {
				156	return
				157	}
				158
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	159	sysMap(unsafe.Pointer(h.bitmap-n), n-h.bitmap_mapped, h.arena_reserved, &memstats.gc_sys)
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	160	h.bitmap_mapped = n
				161	}
				162
				163	// heapBits provides access to the bitmap bits for a single heap word.
				164	// The methods on heapBits take value receivers so that the compiler
				165	// can more easily inline calls to those methods and registerize the
				166	// struct fields independently.
				167	type heapBits struct {
				168	bitp *uint8
				169	shift uint32
				170	}
				171
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	172	// markBits provides access to the mark bit for an object in the heap.
				173	// bytep points to the byte holding the mark bit.
				174	// mask is a byte with a single bit set that can be &ed with *bytep
				175	// to see if the bit has been set.
				176	// *m.byte&m.mask != 0 indicates the mark bit is set.
				177	// index can be used along with span information to generate
				178	// the address of the object in the heap.
				179	// We maintain one set of mark bits for allocation and one for
				180	// marking purposes.
				181	type markBits struct {
				182	bytep *uint8
				183	mask uint8
				184	index uintptr
				185	}
				186
				187	//go:nosplit
				188	func (s *mspan) allocBitsForIndex(allocBitIndex uintptr) markBits {
				189	whichByte := allocBitIndex / 8
				190	whichBit := allocBitIndex % 8
				191	bytePtr := addb(s.allocBits, whichByte)
				192	return markBits{bytePtr, uint8(1 << whichBit), allocBitIndex}
				193	}
				194
				195	// refillaCache takes 8 bytes s.allocBits starting at whichByte
				196	// and negates them so that ctz (count trailing zeros) instructions
				197	// can be used. It then places these 8 bytes into the cached 64 bit
				198	// s.allocCache.
				199	func (s *mspan) refillAllocCache(whichByte uintptr) {
				200	bytes := (*[8]uint8)(unsafe.Pointer(addb(s.allocBits, whichByte)))
				201	aCache := uint64(0)
				202	aCache \|= uint64(bytes[0])
				203	aCache \|= uint64(bytes[1]) << (1 * 8)
				204	aCache \|= uint64(bytes[2]) << (2 * 8)
				205	aCache \|= uint64(bytes[3]) << (3 * 8)
				206	aCache \|= uint64(bytes[4]) << (4 * 8)
				207	aCache \|= uint64(bytes[5]) << (5 * 8)
				208	aCache \|= uint64(bytes[6]) << (6 * 8)
				209	aCache \|= uint64(bytes[7]) << (7 * 8)
				210	s.allocCache = ^aCache
				211	}
				212
				213	// nextFreeIndex returns the index of the next free object in s at
				214	// or after s.freeindex.
				215	// There are hardware instructions that can be used to make this
				216	// faster if profiling warrants it.
				217	func (s *mspan) nextFreeIndex() uintptr {
				218	sfreeindex := s.freeindex
				219	snelems := s.nelems
				220	if sfreeindex == snelems {
				221	return sfreeindex
				222	}
				223	if sfreeindex > snelems {
				224	throw("s.freeindex > s.nelems")
				225	}
				226
				227	aCache := s.allocCache
				228
				229	bitIndex := sys.Ctz64(aCache)
				230	for bitIndex == 64 {
				231	// Move index to start of next cached bits.
				232	sfreeindex = (sfreeindex + 64) &^ (64 - 1)
				233	if sfreeindex >= snelems {
				234	s.freeindex = snelems
				235	return snelems
				236	}
				237	whichByte := sfreeindex / 8
				238	// Refill s.allocCache with the next 64 alloc bits.
				239	s.refillAllocCache(whichByte)
				240	aCache = s.allocCache
				241	bitIndex = sys.Ctz64(aCache)
				242	// nothing available in cached bits
				243	// grab the next 8 bytes and try again.
				244	}
				245	result := sfreeindex + uintptr(bitIndex)
				246	if result >= snelems {
				247	s.freeindex = snelems
				248	return snelems
				249	}
				250
				251	s.allocCache >>= (bitIndex + 1)
				252	sfreeindex = result + 1
				253
				254	if sfreeindex%64 == 0 && sfreeindex != snelems {
				255	// We just incremented s.freeindex so it isn't 0.
				256	// As each 1 in s.allocCache was encountered and used for allocation
				257	// it was shifted away. At this point s.allocCache contains all 0s.
				258	// Refill s.allocCache so that it corresponds
				259	// to the bits at s.allocBits starting at s.freeindex.
				260	whichByte := sfreeindex / 8
				261	s.refillAllocCache(whichByte)
				262	}
				263	s.freeindex = sfreeindex
				264	return result
				265	}
				266
				267	func (s *mspan) isFree(index uintptr) bool {
				268	whichByte := index / 8
				269	whichBit := index % 8
				270	byteVal := *addb(s.allocBits, whichByte)
				271	return byteVal&uint8(1<<whichBit) == 0
				272	}
				273
				274	func (s *mspan) objIndex(p uintptr) uintptr {
				275	byteOffset := p - s.base()
				276	if byteOffset == 0 {
				277	return 0
				278	}
				279	if s.baseMask != 0 {
				280	// s.baseMask is 0, elemsize is a power of two, so shift by s.divShift
				281	return byteOffset >> s.divShift
				282	}
				283	return uintptr(((uint64(byteOffset) >> s.divShift) * uint64(s.divMul)) >> s.divShift2)
				284	}
				285
				286	func markBitsForAddr(p uintptr) markBits {
				287	s := spanOf(p)
				288	objIndex := s.objIndex(p)
				289	return s.markBitsForIndex(objIndex)
				290	}
				291
				292	func (s *mspan) markBitsForIndex(objIndex uintptr) markBits {
				293	whichByte := objIndex / 8
				294	bitMask := uint8(1 << (objIndex % 8)) // low 3 bits hold the bit index
				295	bytePtr := addb(s.gcmarkBits, whichByte)
				296	return markBits{bytePtr, bitMask, objIndex}
				297	}
				298
				299	func (s *mspan) markBitsForBase() markBits {
				300	return markBits{s.gcmarkBits, uint8(1), 0}
				301	}
				302
				303	// isMarked reports whether mark bit m is set.
				304	func (m markBits) isMarked() bool {
				305	return *m.bytep&m.mask != 0
				306	}
				307
				308	// setMarked sets the marked bit in the markbits, atomically. Some compilers
				309	// are not able to inline atomic.Or8 function so if it appears as a hot spot consider
				310	// inlining it manually.
				311	func (m markBits) setMarked() {
				312	// Might be racing with other updates, so use atomic update always.
				313	// We used to be clever here and use a non-atomic update in certain
				314	// cases, but it's not worth the risk.
				315	atomic.Or8(m.bytep, m.mask)
				316	}
				317
				318	// setMarkedNonAtomic sets the marked bit in the markbits, non-atomically.
				319	func (m markBits) setMarkedNonAtomic() {
				320	*m.bytep \|= m.mask
				321	}
				322
				323	// clearMarked clears the marked bit in the markbits, atomically.
				324	func (m markBits) clearMarked() {
				325	// Might be racing with other updates, so use atomic update always.
				326	// We used to be clever here and use a non-atomic update in certain
				327	// cases, but it's not worth the risk.
				328	atomic.And8(m.bytep, ^m.mask)
				329	}
				330
				331	// clearMarkedNonAtomic clears the marked bit non-atomically.
				332	func (m markBits) clearMarkedNonAtomic() {
				333	*m.bytep ^= m.mask
				334	}
				335
				336	// markBitsForSpan returns the markBits for the span base address base.
				337	func markBitsForSpan(base uintptr) (mbits markBits) {
				338	if base < mheap_.arena_start \|\| base >= mheap_.arena_used {
				339	throw("heapBitsForSpan: base out of range")
				340	}
				341	mbits = markBitsForAddr(base)
				342	if mbits.mask != 1 {
				343	throw("markBitsForSpan: unaligned start")
				344	}
				345	return mbits
				346	}
				347
				348	// advance advances the markBits to the next object in the span.
				349	func (m *markBits) advance() {
				350	if m.mask == 1<<7 {
				351	m.bytep = (*uint8)(unsafe.Pointer(uintptr(unsafe.Pointer(m.bytep)) + 1))
				352	m.mask = 1
				353	} else {
				354	m.mask = m.mask << 1
				355	}
				356	m.index++
				357	}
				358
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	359	// heapBitsForAddr returns the heapBits for the address addr.
				360	// The caller must have already checked that addr is in the range [mheap_.arena_start, mheap_.arena_used).
				361	//
				362	// nosplit because it is used during write barriers and must not be preempted.
				363	//go:nosplit
				364	func heapBitsForAddr(addr uintptr) heapBits {
				365	// 2 bits per work, 4 pairs per byte, and a mask is hard coded.
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	366	off := (addr - mheap_.arena_start) / sys.PtrSize
				367	return heapBits{(*uint8)(unsafe.Pointer(mheap_.bitmap - off/4 - 1)), uint32(off & 3)}
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	368	}
				369
				370	// heapBitsForSpan returns the heapBits for the span base address base.
				371	func heapBitsForSpan(base uintptr) (hbits heapBits) {
				372	if base < mheap_.arena_start \|\| base >= mheap_.arena_used {
				373	throw("heapBitsForSpan: base out of range")
				374	}
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	375	return heapBitsForAddr(base)
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	376	}
				377
				378	// heapBitsForObject returns the base address for the heap object
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	379	// containing the address p, the heapBits for base,
				380	// the object's span, and of the index of the object in s.
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	381	// If p does not point into a heap object,
				382	// return base == 0
				383	// otherwise return the base of the object.
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	384	//
				385	// refBase and refOff optionally give the base address of the object
				386	// in which the pointer p was found and the byte offset at which it
				387	// was found. These are used for error reporting.
				388	func heapBitsForObject(p, refBase, refOff uintptr) (base uintptr, hbits heapBits, s *mspan, objIndex uintptr) {
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	389	arenaStart := mheap_.arena_start
				390	if p < arenaStart \|\| p >= mheap_.arena_used {
				391	return
				392	}
				393	off := p - arenaStart
				394	idx := off >> _PageShift
				395	// p points into the heap, but possibly to the middle of an object.
				396	// Consult the span table to find the block beginning.
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	397	s = h_spans[idx]
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	398	if s == nil \|\| p < s.base() \|\| p >= s.limit \|\| s.state != mSpanInUse {
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	399	if s == nil \|\| s.state == _MSpanStack {
				400	// If s is nil, the virtual address has never been part of the heap.
				401	// This pointer may be to some mmap'd region, so we allow it.
				402	// Pointers into stacks are also ok, the runtime manages these explicitly.
				403	return
				404	}
				405
				406	// The following ensures that we are rigorous about what data
				407	// structures hold valid pointers.
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	408	if debug.invalidptr != 0 {
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	409	// Typically this indicates an incorrect use
				410	// of unsafe or cgo to store a bad pointer in
				411	// the Go heap. It may also indicate a runtime
				412	// bug.
				413	//
				414	// TODO(austin): We could be more aggressive
				415	// and detect pointers to unallocated objects
				416	// in allocated spans.
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	417	printlock()
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	418	print("runtime: pointer ", hex(p))
				419	if s.state != mSpanInUse {
				420	print(" to unallocated span")
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	421	} else {
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	422	print(" to unused region of span")
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	423	}
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	424	print("idx=", hex(idx), " span.base()=", hex(s.base()), " span.limit=", hex(s.limit), " span.state=", s.state, "\n")
				425	if refBase != 0 {
				426	print("runtime: found in object at *(", hex(refBase), "+", hex(refOff), ")\n")
				427	gcDumpObject("object", refBase, refOff)
				428	}
				429	throw("found bad pointer in Go heap (incorrect use of unsafe or cgo?)")
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	430	}
				431	return
				432	}
				433	// If this span holds object of a power of 2 size, just mask off the bits to
				434	// the interior of the object. Otherwise use the size to get the base.
				435	if s.baseMask != 0 {
				436	// optimize for power of 2 sized objects.
				437	base = s.base()
				438	base = base + (p-base)&s.baseMask
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	439	objIndex = (base - s.base()) >> s.divShift
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	440	// base = p & s.baseMask is faster for small spans,
				441	// but doesn't work for large spans.
				442	// Overall, it's faster to use the more general computation above.
				443	} else {
				444	base = s.base()
				445	if p-base >= s.elemsize {
				446	// n := (p - base) / s.elemsize, using division by multiplication
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	447	objIndex = uintptr(uint64(p-base) >> s.divShift * uint64(s.divMul) >> s.divShift2)
				448	base += objIndex * s.elemsize
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	449	}
				450	}
				451	// Now that we know the actual base, compute heapBits to return to caller.
				452	hbits = heapBitsForAddr(base)
				453	return
				454	}
				455
				456	// prefetch the bits.
				457	func (h heapBits) prefetch() {
				458	prefetchnta(uintptr(unsafe.Pointer((h.bitp))))
				459	}
				460
				461	// next returns the heapBits describing the next pointer-sized word in memory.
				462	// That is, if h describes address p, h.next() describes p+ptrSize.
				463	// Note that next does not modify h. The caller must record the result.
				464	//
				465	// nosplit because it is used during write barriers and must not be preempted.
				466	//go:nosplit
				467	func (h heapBits) next() heapBits {
				468	if h.shift < 3*heapBitsShift {
				469	return heapBits{h.bitp, h.shift + heapBitsShift}
				470	}
				471	return heapBits{subtract1(h.bitp), 0}
				472	}
				473
				474	// forward returns the heapBits describing n pointer-sized words ahead of h in memory.
				475	// That is, if h describes address p, h.forward(n) describes p+n*ptrSize.
				476	// h.forward(1) is equivalent to h.next(), just slower.
				477	// Note that forward does not modify h. The caller must record the result.
				478	// bits returns the heap bits for the current word.
				479	func (h heapBits) forward(n uintptr) heapBits {
				480	n += uintptr(h.shift) / heapBitsShift
				481	return heapBits{subtractb(h.bitp, n/4), uint32(n%4) * heapBitsShift}
				482	}
				483
				484	// The caller can test isMarked and isPointer by &-ing with bitMarked and bitPointer.
				485	// The result includes in its higher bits the bits for subsequent words
				486	// described by the same bitmap byte.
				487	func (h heapBits) bits() uint32 {
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	488	// The (shift & 31) eliminates a test and conditional branch
				489	// from the generated code.
				490	return uint32(*h.bitp) >> (h.shift & 31)
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	491	}
				492
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	493	// morePointers returns true if this word and all remaining words in this object
				494	// are scalars.
				495	// h must not describe the second word of the object.
				496	func (h heapBits) morePointers() bool {
				497	return h.bits()&bitMarked != 0
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	498	}
				499
				500	// isPointer reports whether the heap bits describe a pointer word.
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	501	//
				502	// nosplit because it is used during write barriers and must not be preempted.
				503	//go:nosplit
				504	func (h heapBits) isPointer() bool {
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	505	return h.bits()&bitPointer != 0
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	506	}
				507
				508	// hasPointers reports whether the given object has any pointers.
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	509	// It must be told how large the object at h is for efficiency.
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	510	// h must describe the initial word of the object.
				511	func (h heapBits) hasPointers(size uintptr) bool {
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	512	if size == sys.PtrSize { // 1-word objects are always pointers
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	513	return true
				514	}
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	515	return (*h.bitp>>h.shift)&bitMarked != 0
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	516	}
				517
				518	// isCheckmarked reports whether the heap bits have the checkmarked bit set.
				519	// It must be told how large the object at h is, because the encoding of the
				520	// checkmark bit varies by size.
				521	// h must describe the initial word of the object.
				522	func (h heapBits) isCheckmarked(size uintptr) bool {
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	523	if size == sys.PtrSize {
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	524	return (*h.bitp>>h.shift)&bitPointer != 0
				525	}
				526	// All multiword objects are 2-word aligned,
				527	// so we know that the initial word's 2-bit pair
				528	// and the second word's 2-bit pair are in the
				529	// same heap bitmap byte, *h.bitp.
				530	return (*h.bitp>>(heapBitsShift+h.shift))&bitMarked != 0
				531	}
				532
				533	// setCheckmarked sets the checkmarked bit.
				534	// It must be told how large the object at h is, because the encoding of the
				535	// checkmark bit varies by size.
				536	// h must describe the initial word of the object.
				537	func (h heapBits) setCheckmarked(size uintptr) {
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	538	if size == sys.PtrSize {
				539	atomic.Or8(h.bitp, bitPointer<<h.shift)
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	540	return
				541	}
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	542	atomic.Or8(h.bitp, bitMarked<<(heapBitsShift+h.shift))
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	543	}
				544
				545	// heapBitsBulkBarrier executes writebarrierptr_nostore
				546	// for every pointer slot in the memory range [p, p+size),
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	547	// using the heap, data, or BSS bitmap to locate those pointer slots.
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	548	// This executes the write barriers necessary after a memmove.
				549	// Both p and size must be pointer-aligned.
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	550	// The range [p, p+size) must lie within a single object.
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	551	//
				552	// Callers should call heapBitsBulkBarrier immediately after
				553	// calling memmove(p, src, size). This function is marked nosplit
				554	// to avoid being preempted; the GC must not stop the goroutine
				555	// between the memmove and the execution of the barriers.
				556	//
				557	// The heap bitmap is not maintained for allocations containing
				558	// no pointers at all; any caller of heapBitsBulkBarrier must first
				559	// make sure the underlying allocation contains pointers, usually
				560	// by checking typ.kind&kindNoPointers.
				561	//
				562	//go:nosplit
				563	func heapBitsBulkBarrier(p, size uintptr) {
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	564	if (p\|size)&(sys.PtrSize-1) != 0 {
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	565	throw("heapBitsBulkBarrier: unaligned arguments")
				566	}
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	567	if !writeBarrier.needed {
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	568	return
				569	}
				570	if !inheap(p) {
				571	// If p is on the stack and in a higher frame than the
				572	// caller, we either need to execute write barriers on
				573	// it (which is what happens for normal stack writes
				574	// through pointers to higher frames), or we need to
				575	// force the mark termination stack scan to scan the
				576	// frame containing p.
				577	//
				578	// Executing write barriers on p is complicated in the
				579	// general case because we either need to unwind the
				580	// stack to get the stack map, or we need the type's
				581	// bitmap, which may be a GC program.
				582	//
				583	// Hence, we opt for forcing the re-scan to scan the
				584	// frame containing p, which we can do by simply
				585	// unwinding the stack barriers between the current SP
				586	// and p's frame.
				587	gp := getg().m.curg
				588	if gp != nil && gp.stack.lo <= p && p < gp.stack.hi {
				589	// Run on the system stack to give it more
				590	// stack space.
				591	systemstack(func() {
				592	gcUnwindBarriers(gp, p)
				593	})
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	594	return
				595	}
				596
				597	// If p is a global, use the data or BSS bitmaps to
				598	// execute write barriers.
				599	for datap := &firstmoduledata; datap != nil; datap = datap.next {
				600	if datap.data <= p && p < datap.edata {
				601	bulkBarrierBitmap(p, size, p-datap.data, datap.gcdatamask.bytedata)
				602	return
				603	}
				604	}
				605	for datap := &firstmoduledata; datap != nil; datap = datap.next {
				606	if datap.bss <= p && p < datap.ebss {
				607	bulkBarrierBitmap(p, size, p-datap.bss, datap.gcbssmask.bytedata)
				608	return
				609	}
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	610	}
				611	return
				612	}
				613
				614	h := heapBitsForAddr(p)
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	615	for i := uintptr(0); i < size; i += sys.PtrSize {
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	616	if h.isPointer() {
				617	x := (*uintptr)(unsafe.Pointer(p + i))
				618	writebarrierptr_nostore(x, *x)
				619	}
				620	h = h.next()
				621	}
				622	}
				623
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	624	// bulkBarrierBitmap executes write barriers for [p, p+size) using a
				625	// 1-bit pointer bitmap. p is assumed to start maskOffset bytes into
				626	// the data covered by the bitmap in bits.
				627	//
				628	// This is used by heapBitsBulkBarrier for writes to data and BSS.
				629	//
				630	//go:nosplit
				631	func bulkBarrierBitmap(p, size, maskOffset uintptr, bits *uint8) {
				632	word := maskOffset / sys.PtrSize
				633	bits = addb(bits, word/8)
				634	mask := uint8(1) << (word % 8)
				635
				636	for i := uintptr(0); i < size; i += sys.PtrSize {
				637	if mask == 0 {
				638	bits = addb(bits, 1)
				639	if *bits == 0 {
				640	// Skip 8 words.
				641	i += 7 * sys.PtrSize
				642	continue
				643	}
				644	mask = 1
				645	}
				646	if *bits&mask != 0 {
				647	x := (*uintptr)(unsafe.Pointer(p + i))
				648	writebarrierptr_nostore(x, *x)
				649	}
				650	mask <<= 1
				651	}
				652	}
				653
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	654	// typeBitsBulkBarrier executes writebarrierptr_nostore
				655	// for every pointer slot in the memory range [p, p+size),
				656	// using the type bitmap to locate those pointer slots.
				657	// The type typ must correspond exactly to [p, p+size).
				658	// This executes the write barriers necessary after a copy.
				659	// Both p and size must be pointer-aligned.
				660	// The type typ must have a plain bitmap, not a GC program.
				661	// The only use of this function is in channel sends, and the
				662	// 64 kB channel element limit takes care of this for us.
				663	//
				664	// Must not be preempted because it typically runs right after memmove,
				665	// and the GC must not complete between those two.
				666	//
				667	//go:nosplit
				668	func typeBitsBulkBarrier(typ *_type, p, size uintptr) {
				669	if typ == nil {
				670	throw("runtime: typeBitsBulkBarrier without type")
				671	}
				672	if typ.size != size {
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	673	println("runtime: typeBitsBulkBarrier with type ", typ.string(), " of size ", typ.size, " but memory size", size)
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	674	throw("runtime: invalid typeBitsBulkBarrier")
				675	}
				676	if typ.kind&kindGCProg != 0 {
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	677	println("runtime: typeBitsBulkBarrier with type ", typ.string(), " with GC prog")
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	678	throw("runtime: invalid typeBitsBulkBarrier")
				679	}
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	680	if !writeBarrier.needed {
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	681	return
				682	}
				683	ptrmask := typ.gcdata
				684	var bits uint32
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	685	for i := uintptr(0); i < typ.ptrdata; i += sys.PtrSize {
				686	if i&(sys.PtrSize*8-1) == 0 {
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	687	bits = uint32(*ptrmask)
				688	ptrmask = addb(ptrmask, 1)
				689	} else {
				690	bits = bits >> 1
				691	}
				692	if bits&1 != 0 {
				693	x := (*uintptr)(unsafe.Pointer(p + i))
				694	writebarrierptr_nostore(x, *x)
				695	}
				696	}
				697	}
				698
				699	// The methods operating on spans all require that h has been returned
				700	// by heapBitsForSpan and that size, n, total are the span layout description
				701	// returned by the mspan's layout method.
				702	// If total > size*n, it means that there is extra leftover memory in the span,
				703	// usually due to rounding.
				704	//
				705	// TODO(rsc): Perhaps introduce a different heapBitsSpan type.
				706
				707	// initSpan initializes the heap bitmap for a span.
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	708	// It clears all checkmark bits.
				709	// If this is a span of pointer-sized objects, it initializes all
				710	// words to pointer/scan.
				711	// Otherwise, it initializes all words to scalar/dead.
				712	func (h heapBits) initSpan(s *mspan) {
				713	size, n, total := s.layout()
				714
				715	// Init the markbit structures
				716	s.freeindex = 0
				717	s.allocCache = ^uint64(0) // all 1s indicating all free.
				718	s.nelems = n
				719	s.allocBits = nil
				720	s.gcmarkBits = nil
				721	s.gcmarkBits = newMarkBits(s.nelems)
				722	s.allocBits = newAllocBits(s.nelems)
				723
				724	// Clear bits corresponding to objects.
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	725	if total%heapBitmapScale != 0 {
				726	throw("initSpan: unaligned length")
				727	}
				728	nbyte := total / heapBitmapScale
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	729	if sys.PtrSize == 8 && size == sys.PtrSize {
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	730	end := h.bitp
				731	bitp := subtractb(end, nbyte-1)
				732	for {
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	733	*bitp = bitPointerAll \| bitMarkedAll
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	734	if bitp == end {
				735	break
				736	}
				737	bitp = add1(bitp)
				738	}
				739	return
				740	}
				741	memclr(unsafe.Pointer(subtractb(h.bitp, nbyte-1)), nbyte)
				742	}
				743
				744	// initCheckmarkSpan initializes a span for being checkmarked.
				745	// It clears the checkmark bits, which are set to 1 in normal operation.
				746	func (h heapBits) initCheckmarkSpan(size, n, total uintptr) {
				747	// The ptrSize == 8 is a compile-time constant false on 32-bit and eliminates this code entirely.
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	748	if sys.PtrSize == 8 && size == sys.PtrSize {
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	749	// Checkmark bit is type bit, bottom bit of every 2-bit entry.
				750	// Only possible on 64-bit system, since minimum size is 8.
				751	// Must clear type bit (checkmark bit) of every word.
				752	// The type bit is the lower of every two-bit pair.
				753	bitp := h.bitp
				754	for i := uintptr(0); i < n; i += 4 {
				755	*bitp &^= bitPointerAll
				756	bitp = subtract1(bitp)
				757	}
				758	return
				759	}
				760	for i := uintptr(0); i < n; i++ {
				761	*h.bitp &^= bitMarked << (heapBitsShift + h.shift)
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	762	h = h.forward(size / sys.PtrSize)
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	763	}
				764	}
				765
				766	// clearCheckmarkSpan undoes all the checkmarking in a span.
				767	// The actual checkmark bits are ignored, so the only work to do
				768	// is to fix the pointer bits. (Pointer bits are ignored by scanobject
				769	// but consulted by typedmemmove.)
				770	func (h heapBits) clearCheckmarkSpan(size, n, total uintptr) {
				771	// The ptrSize == 8 is a compile-time constant false on 32-bit and eliminates this code entirely.
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	772	if sys.PtrSize == 8 && size == sys.PtrSize {
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	773	// Checkmark bit is type bit, bottom bit of every 2-bit entry.
				774	// Only possible on 64-bit system, since minimum size is 8.
				775	// Must clear type bit (checkmark bit) of every word.
				776	// The type bit is the lower of every two-bit pair.
				777	bitp := h.bitp
				778	for i := uintptr(0); i < n; i += 4 {
				779	*bitp \|= bitPointerAll
				780	bitp = subtract1(bitp)
				781	}
				782	}
				783	}
				784
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	785	// oneBitCount is indexed by byte and produces the
				786	// number of 1 bits in that byte. For example 128 has 1 bit set
				787	// and oneBitCount[128] will holds 1.
				788	var oneBitCount = [256]uint8{
				789	0, 1, 1, 2, 1, 2, 2, 3,
				790	1, 2, 2, 3, 2, 3, 3, 4,
				791	1, 2, 2, 3, 2, 3, 3, 4,
				792	2, 3, 3, 4, 3, 4, 4, 5,
				793	1, 2, 2, 3, 2, 3, 3, 4,
				794	2, 3, 3, 4, 3, 4, 4, 5,
				795	2, 3, 3, 4, 3, 4, 4, 5,
				796	3, 4, 4, 5, 4, 5, 5, 6,
				797	1, 2, 2, 3, 2, 3, 3, 4,
				798	2, 3, 3, 4, 3, 4, 4, 5,
				799	2, 3, 3, 4, 3, 4, 4, 5,
				800	3, 4, 4, 5, 4, 5, 5, 6,
				801	2, 3, 3, 4, 3, 4, 4, 5,
				802	3, 4, 4, 5, 4, 5, 5, 6,
				803	3, 4, 4, 5, 4, 5, 5, 6,
				804	4, 5, 5, 6, 5, 6, 6, 7,
				805	1, 2, 2, 3, 2, 3, 3, 4,
				806	2, 3, 3, 4, 3, 4, 4, 5,
				807	2, 3, 3, 4, 3, 4, 4, 5,
				808	3, 4, 4, 5, 4, 5, 5, 6,
				809	2, 3, 3, 4, 3, 4, 4, 5,
				810	3, 4, 4, 5, 4, 5, 5, 6,
				811	3, 4, 4, 5, 4, 5, 5, 6,
				812	4, 5, 5, 6, 5, 6, 6, 7,
				813	2, 3, 3, 4, 3, 4, 4, 5,
				814	3, 4, 4, 5, 4, 5, 5, 6,
				815	3, 4, 4, 5, 4, 5, 5, 6,
				816	4, 5, 5, 6, 5, 6, 6, 7,
				817	3, 4, 4, 5, 4, 5, 5, 6,
				818	4, 5, 5, 6, 5, 6, 6, 7,
				819	4, 5, 5, 6, 5, 6, 6, 7,
				820	5, 6, 6, 7, 6, 7, 7, 8}
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	821
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	822	// countFree runs through the mark bits in a span and counts the number of free objects
				823	// in the span.
				824	// TODO:(rlh) Use popcount intrinsic.
				825	func (s *mspan) countFree() int {
				826	count := 0
				827	maxIndex := s.nelems / 8
				828	for i := uintptr(0); i < maxIndex; i++ {
				829	mrkBits := *addb(s.gcmarkBits, i)
				830	count += int(oneBitCount[mrkBits])
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	831	}
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	832	if bitsInLastByte := s.nelems % 8; bitsInLastByte != 0 {
				833	mrkBits := *addb(s.gcmarkBits, maxIndex)
				834	mask := uint8((1 << bitsInLastByte) - 1)
				835	bits := mrkBits & mask
				836	count += int(oneBitCount[bits])
				837	}
				838	return int(s.nelems) - count
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	839	}
				840
				841	// heapBitsSetType records that the new allocation [x, x+size)
				842	// holds in [x, x+dataSize) one or more values of type typ.
				843	// (The number of values is given by dataSize / typ.size.)
				844	// If dataSize < size, the fragment [x+dataSize, x+size) is
				845	// recorded as non-pointer data.
				846	// It is known that the type has pointers somewhere;
				847	// malloc does not call heapBitsSetType when there are no pointers,
				848	// because all free objects are marked as noscan during
				849	// heapBitsSweepSpan.
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	850	//
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	851	// There can only be one allocation from a given span active at a time,
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	852	// and the bitmap for a span always falls on byte boundaries,
				853	// so there are no write-write races for access to the heap bitmap.
				854	// Hence, heapBitsSetType can access the bitmap without atomics.
				855	//
				856	// There can be read-write races between heapBitsSetType and things
				857	// that read the heap bitmap like scanobject. However, since
				858	// heapBitsSetType is only used for objects that have not yet been
				859	// made reachable, readers will ignore bits being modified by this
				860	// function. This does mean this function cannot transiently modify
				861	// bits that belong to neighboring objects. Also, on weakly-ordered
				862	// machines, callers must execute a store/store (publication) barrier
				863	// between calling this function and making the object reachable.
				864	//
				865	// TODO: This still has atomic accesses left over from when it could
				866	// race with GC accessing mark bits in the bitmap. Remove these.
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	867	func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
				868	const doubleCheck = false // slow but helpful; enable to test modifications to this code
				869
				870	// dataSize is always size rounded up to the next malloc size class,
				871	// except in the case of allocating a defer block, in which case
				872	// size is sizeof(_defer{}) (at least 6 words) and dataSize may be
				873	// arbitrarily larger.
				874	//
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	875	// The checks for size == sys.PtrSize and size == 2*sys.PtrSize can therefore
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	876	// assume that dataSize == size without checking it explicitly.
				877
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	878	if sys.PtrSize == 8 && size == sys.PtrSize {
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	879	// It's one word and it has pointers, it must be a pointer.
				880	// In general we'd need an atomic update here if the
				881	// concurrent GC were marking objects in this span,
				882	// because each bitmap byte describes 3 other objects
				883	// in addition to the one being allocated.
				884	// However, since all allocated one-word objects are pointers
				885	// (non-pointers are aggregated into tinySize allocations),
				886	// initSpan sets the pointer bits for us. Nothing to do here.
				887	if doubleCheck {
				888	h := heapBitsForAddr(x)
				889	if !h.isPointer() {
				890	throw("heapBitsSetType: pointer bit missing")
				891	}
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	892	if !h.morePointers() {
				893	throw("heapBitsSetType: scan bit missing")
				894	}
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	895	}
				896	return
				897	}
				898
				899	h := heapBitsForAddr(x)
				900	ptrmask := typ.gcdata // start of 1-bit pointer mask (or GC program, handled below)
				901
				902	// Heap bitmap bits for 2-word object are only 4 bits,
				903	// so also shared with objects next to it; use atomic updates.
				904	// This is called out as a special case primarily for 32-bit systems,
				905	// so that on 32-bit systems the code below can assume all objects
				906	// are 4-word aligned (because they're all 16-byte aligned).
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	907	if size == 2*sys.PtrSize {
				908	if typ.size == sys.PtrSize {
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	909	// We're allocating a block big enough to hold two pointers.
				910	// On 64-bit, that means the actual object must be two pointers,
				911	// or else we'd have used the one-pointer-sized block.
				912	// On 32-bit, however, this is the 8-byte block, the smallest one.
				913	// So it could be that we're allocating one pointer and this was
				914	// just the smallest block available. Distinguish by checking dataSize.
				915	// (In general the number of instances of typ being allocated is
				916	// dataSize/typ.size.)
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	917	if sys.PtrSize == 4 && dataSize == sys.PtrSize {
				918	// 1 pointer object. On 32-bit machines clear the bit for the
				919	// unused second word.
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	920	if gcphase == _GCoff {
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	921	*h.bitp &^= (bitPointer \| bitMarked \| ((bitPointer \| bitMarked) << heapBitsShift)) << h.shift
				922	*h.bitp \|= (bitPointer \| bitMarked) << h.shift
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	923	} else {
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	924	atomic.And8(h.bitp, ^uint8((bitPointer\|bitMarked\|((bitPointer\|bitMarked)<<heapBitsShift))<<h.shift))
				925	atomic.Or8(h.bitp, (bitPointer\|bitMarked)<<h.shift)
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	926	}
				927	} else {
				928	// 2-element slice of pointer.
				929	if gcphase == _GCoff {
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	930	*h.bitp \|= (bitPointer \| bitMarked \| bitPointer<<heapBitsShift) << h.shift
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	931	} else {
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	932	atomic.Or8(h.bitp, (bitPointer\|bitMarked\|bitPointer<<heapBitsShift)<<h.shift)
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	933	}
				934	}
				935	return
				936	}
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	937	// Otherwise typ.size must be 2*sys.PtrSize,
				938	// and typ.kind&kindGCProg == 0.
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	939	if doubleCheck {
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	940	if typ.size != 2*sys.PtrSize \|\| typ.kind&kindGCProg != 0 {
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	941	print("runtime: heapBitsSetType size=", size, " but typ.size=", typ.size, " gcprog=", typ.kind&kindGCProg != 0, "\n")
				942	throw("heapBitsSetType")
				943	}
				944	}
				945	b := uint32(*ptrmask)
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	946	hb := (b & 3) \| bitMarked
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	947	if gcphase == _GCoff {
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	948	// bitPointer == 1, bitMarked is 1 << 4, heapBitsShift is 1.
				949	// 110011 is shifted h.shift and complemented.
				950	// This clears out the bits that are about to be
				951	// ored into *h.hbitp in the next instructions.
				952	*h.bitp &^= (bitPointer \| bitMarked \| ((bitPointer \| bitMarked) << heapBitsShift)) << h.shift
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	953	*h.bitp \|= uint8(hb << h.shift)
				954	} else {
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	955	// TODO:(rlh) since the GC is not concurrently setting the
				956	// mark bits in the heap map anymore and malloc
				957	// owns the span we are allocating in why does this have
				958	// to be atomic?
				959
				960	atomic.And8(h.bitp, ^uint8((bitPointer\|bitMarked\|((bitPointer\|bitMarked)<<heapBitsShift))<<h.shift))
				961	atomic.Or8(h.bitp, uint8(hb<<h.shift))
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	962	}
				963	return
				964	}
				965
				966	// Copy from 1-bit ptrmask into 2-bit bitmap.
				967	// The basic approach is to use a single uintptr as a bit buffer,
				968	// alternating between reloading the buffer and writing bitmap bytes.
				969	// In general, one load can supply two bitmap byte writes.
				970	// This is a lot of lines of code, but it compiles into relatively few
				971	// machine instructions.
				972
				973	var (
				974	// Ptrmask input.
				975	p *byte // last ptrmask byte read
				976	b uintptr // ptrmask bits already loaded
				977	nb uintptr // number of bits in b at next read
				978	endp *byte // final ptrmask byte to read (then repeat)
				979	endnb uintptr // number of valid bits in *endp
				980	pbits uintptr // alternate source of bits
				981
				982	// Heap bitmap output.
				983	w uintptr // words processed
				984	nw uintptr // number of words to process
				985	hbitp *byte // next heap bitmap byte to write
				986	hb uintptr // bits being prepared for *hbitp
				987	)
				988
				989	hbitp = h.bitp
				990
				991	// Handle GC program. Delayed until this part of the code
				992	// so that we can use the same double-checking mechanism
				993	// as the 1-bit case. Nothing above could have encountered
				994	// GC programs: the cases were all too small.
				995	if typ.kind&kindGCProg != 0 {
				996	heapBitsSetTypeGCProg(h, typ.ptrdata, typ.size, dataSize, size, addb(typ.gcdata, 4))
				997	if doubleCheck {
				998	// Double-check the heap bits written by GC program
				999	// by running the GC program to create a 1-bit pointer mask
				1000	// and then jumping to the double-check code below.
				1001	// This doesn't catch bugs shared between the 1-bit and 4-bit
				1002	// GC program execution, but it does catch mistakes specific
				1003	// to just one of those and bugs in heapBitsSetTypeGCProg's
				1004	// implementation of arrays.
				1005	lock(&debugPtrmask.lock)
				1006	if debugPtrmask.data == nil {
				1007	debugPtrmask.data = (*byte)(persistentalloc(1<<20, 1, &memstats.other_sys))
				1008	}
				1009	ptrmask = debugPtrmask.data
				1010	runGCProg(addb(typ.gcdata, 4), nil, ptrmask, 1)
				1011	goto Phase4
				1012	}
				1013	return
				1014	}
				1015
				1016	// Note about sizes:
				1017	//
				1018	// typ.size is the number of words in the object,
				1019	// and typ.ptrdata is the number of words in the prefix
				1020	// of the object that contains pointers. That is, the final
				1021	// typ.size - typ.ptrdata words contain no pointers.
				1022	// This allows optimization of a common pattern where
				1023	// an object has a small header followed by a large scalar
				1024	// buffer. If we know the pointers are over, we don't have
				1025	// to scan the buffer's heap bitmap at all.
				1026	// The 1-bit ptrmasks are sized to contain only bits for
				1027	// the typ.ptrdata prefix, zero padded out to a full byte
				1028	// of bitmap. This code sets nw (below) so that heap bitmap
				1029	// bits are only written for the typ.ptrdata prefix; if there is
				1030	// more room in the allocated object, the next heap bitmap
				1031	// entry is a 00, indicating that there are no more pointers
				1032	// to scan. So only the ptrmask for the ptrdata bytes is needed.
				1033	//
				1034	// Replicated copies are not as nice: if there is an array of
				1035	// objects with scalar tails, all but the last tail does have to
				1036	// be initialized, because there is no way to say "skip forward".
				1037	// However, because of the possibility of a repeated type with
				1038	// size not a multiple of 4 pointers (one heap bitmap byte),
				1039	// the code already must handle the last ptrmask byte specially
				1040	// by treating it as containing only the bits for endnb pointers,
				1041	// where endnb <= 4. We represent large scalar tails that must
				1042	// be expanded in the replication by setting endnb larger than 4.
				1043	// This will have the effect of reading many bits out of b,
				1044	// but once the real bits are shifted out, b will supply as many
				1045	// zero bits as we try to read, which is exactly what we need.
				1046
				1047	p = ptrmask
				1048	if typ.size < dataSize {
				1049	// Filling in bits for an array of typ.
				1050	// Set up for repetition of ptrmask during main loop.
				1051	// Note that ptrmask describes only a prefix of
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	1052	const maxBits = sys.PtrSize*8 - 7
				1053	if typ.ptrdata/sys.PtrSize <= maxBits {
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1054	// Entire ptrmask fits in uintptr with room for a byte fragment.
				1055	// Load into pbits and never read from ptrmask again.
				1056	// This is especially important when the ptrmask has
				1057	// fewer than 8 bits in it; otherwise the reload in the middle
				1058	// of the Phase 2 loop would itself need to loop to gather
				1059	// at least 8 bits.
				1060
				1061	// Accumulate ptrmask into b.
				1062	// ptrmask is sized to describe only typ.ptrdata, but we record
				1063	// it as describing typ.size bytes, since all the high bits are zero.
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	1064	nb = typ.ptrdata / sys.PtrSize
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1065	for i := uintptr(0); i < nb; i += 8 {
				1066	b \|= uintptr(*p) << i
				1067	p = add1(p)
				1068	}
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	1069	nb = typ.size / sys.PtrSize
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1070
				1071	// Replicate ptrmask to fill entire pbits uintptr.
				1072	// Doubling and truncating is fewer steps than
				1073	// iterating by nb each time. (nb could be 1.)
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	1074	// Since we loaded typ.ptrdata/sys.PtrSize bits
				1075	// but are pretending to have typ.size/sys.PtrSize,
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1076	// there might be no replication necessary/possible.
				1077	pbits = b
				1078	endnb = nb
				1079	if nb+nb <= maxBits {
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	1080	for endnb <= sys.PtrSize*8 {
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1081	pbits \|= pbits << endnb
				1082	endnb += endnb
				1083	}
				1084	// Truncate to a multiple of original ptrmask.
				1085	endnb = maxBits / nb * nb
				1086	pbits &= 1<<endnb - 1
				1087	b = pbits
				1088	nb = endnb
				1089	}
				1090
				1091	// Clear p and endp as sentinel for using pbits.
				1092	// Checked during Phase 2 loop.
				1093	p = nil
				1094	endp = nil
				1095	} else {
				1096	// Ptrmask is larger. Read it multiple times.
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	1097	n := (typ.ptrdata/sys.PtrSize+7)/8 - 1
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1098	endp = addb(ptrmask, n)
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	1099	endnb = typ.size/sys.PtrSize - n*8
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1100	}
				1101	}
				1102	if p != nil {
				1103	b = uintptr(*p)
				1104	p = add1(p)
				1105	nb = 8
				1106	}
				1107
				1108	if typ.size == dataSize {
				1109	// Single entry: can stop once we reach the non-pointer data.
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	1110	nw = typ.ptrdata / sys.PtrSize
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1111	} else {
				1112	// Repeated instances of typ in an array.
				1113	// Have to process first N-1 entries in full, but can stop
				1114	// once we reach the non-pointer data in the final entry.
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	1115	nw = ((dataSize/typ.size-1)*typ.size + typ.ptrdata) / sys.PtrSize
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1116	}
				1117	if nw == 0 {
				1118	// No pointers! Caller was supposed to check.
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	1119	println("runtime: invalid type ", typ.string())
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1120	throw("heapBitsSetType: called with non-pointer type")
				1121	return
				1122	}
				1123	if nw < 2 {
				1124	// Must write at least 2 words, because the "no scan"
				1125	// encoding doesn't take effect until the third word.
				1126	nw = 2
				1127	}
				1128
				1129	// Phase 1: Special case for leading byte (shift==0) or half-byte (shift==4).
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	1130	// The leading byte is special because it contains the bits for word 1,
				1131	// which does not have the marked bits set.
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1132	// The leading half-byte is special because it's a half a byte and must be
				1133	// manipulated atomically.
				1134	switch {
				1135	default:
				1136	throw("heapBitsSetType: unexpected shift")
				1137
				1138	case h.shift == 0:
				1139	// Ptrmask and heap bitmap are aligned.
				1140	// Handle first byte of bitmap specially.
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	1141	//
				1142	// The first byte we write out covers the first four
				1143	// words of the object. The scan/dead bit on the first
				1144	// word must be set to scan since there are pointers
				1145	// somewhere in the object. The scan/dead bit on the
				1146	// second word is the checkmark, so we don't set it.
				1147	// In all following words, we set the scan/dead
				1148	// appropriately to indicate that the object contains
				1149	// to the next 2-bit entry in the bitmap.
				1150	//
				1151	// TODO: It doesn't matter if we set the checkmark, so
				1152	// maybe this case isn't needed any more.
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1153	hb = b & bitPointerAll
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	1154	hb \|= bitMarked \| bitMarked<<(2heapBitsShift) \| bitMarked<<(3heapBitsShift)
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1155	if w += 4; w >= nw {
				1156	goto Phase3
				1157	}
				1158	*hbitp = uint8(hb)
				1159	hbitp = subtract1(hbitp)
				1160	b >>= 4
				1161	nb -= 4
				1162
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	1163	case sys.PtrSize == 8 && h.shift == 2:
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1164	// Ptrmask and heap bitmap are misaligned.
				1165	// The bits for the first two words are in a byte shared with another object
				1166	// and must be updated atomically.
				1167	// NOTE(rsc): The atomic here may not be necessary.
				1168	// We took care of 1-word and 2-word objects above,
				1169	// so this is at least a 6-word object, so our start bits
				1170	// are shared only with the type bits of another object,
				1171	// not with its mark bit. Since there is only one allocation
				1172	// from a given span at a time, we should be able to set
				1173	// these bits non-atomically. Not worth the risk right now.
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	1174	hb = (b & (bitPointer \| bitPointer<<heapBitsShift)) << (2 * heapBitsShift)
				1175	// This is not noscan, so set the scan bit in the
				1176	// first word.
				1177	hb \|= bitMarked << (2 * heapBitsShift)
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1178	b >>= 2
				1179	nb -= 2
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	1180	// Note: no bitMarker for second word because that's
				1181	// the checkmark.
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1182	if gcphase == _GCoff {
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	1183	hbitp &^= uint8((bitPointer \| bitMarked \| (bitPointer << heapBitsShift)) << (2 heapBitsShift))
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1184	*hbitp \|= uint8(hb)
				1185	} else {
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	1186	atomic.And8(hbitp, ^(uint8(bitPointer\|bitMarked\|bitPointer<<heapBitsShift) << (2 * heapBitsShift)))
				1187	atomic.Or8(hbitp, uint8(hb))
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1188	}
				1189	hbitp = subtract1(hbitp)
				1190	if w += 2; w >= nw {
				1191	// We know that there is more data, because we handled 2-word objects above.
				1192	// This must be at least a 6-word object. If we're out of pointer words,
				1193	// mark no scan in next bitmap byte and finish.
				1194	hb = 0
				1195	w += 4
				1196	goto Phase3
				1197	}
				1198	}
				1199
				1200	// Phase 2: Full bytes in bitmap, up to but not including write to last byte (full or partial) in bitmap.
				1201	// The loop computes the bits for that last write but does not execute the write;
				1202	// it leaves the bits in hb for processing by phase 3.
				1203	// To avoid repeated adjustment of nb, we subtract out the 4 bits we're going to
				1204	// use in the first half of the loop right now, and then we only adjust nb explicitly
				1205	// if the 8 bits used by each iteration isn't balanced by 8 bits loaded mid-loop.
				1206	nb -= 4
				1207	for {
				1208	// Emit bitmap byte.
				1209	// b has at least nb+4 bits, with one exception:
				1210	// if w+4 >= nw, then b has only nw-w bits,
				1211	// but we'll stop at the break and then truncate
				1212	// appropriately in Phase 3.
				1213	hb = b & bitPointerAll
				1214	hb \|= bitMarkedAll
				1215	if w += 4; w >= nw {
				1216	break
				1217	}
				1218	*hbitp = uint8(hb)
				1219	hbitp = subtract1(hbitp)
				1220	b >>= 4
				1221
				1222	// Load more bits. b has nb right now.
				1223	if p != endp {
				1224	// Fast path: keep reading from ptrmask.
				1225	// nb unmodified: we just loaded 8 bits,
				1226	// and the next iteration will consume 8 bits,
				1227	// leaving us with the same nb the next time we're here.
				1228	if nb < 8 {
				1229	b \|= uintptr(*p) << nb
				1230	p = add1(p)
				1231	} else {
				1232	// Reduce the number of bits in b.
				1233	// This is important if we skipped
				1234	// over a scalar tail, since nb could
				1235	// be larger than the bit width of b.
				1236	nb -= 8
				1237	}
				1238	} else if p == nil {
				1239	// Almost as fast path: track bit count and refill from pbits.
				1240	// For short repetitions.
				1241	if nb < 8 {
				1242	b \|= pbits << nb
				1243	nb += endnb
				1244	}
				1245	nb -= 8 // for next iteration
				1246	} else {
				1247	// Slow path: reached end of ptrmask.
				1248	// Process final partial byte and rewind to start.
				1249	b \|= uintptr(*p) << nb
				1250	nb += endnb
				1251	if nb < 8 {
				1252	b \|= uintptr(*ptrmask) << nb
				1253	p = add1(ptrmask)
				1254	} else {
				1255	nb -= 8
				1256	p = ptrmask
				1257	}
				1258	}
				1259
				1260	// Emit bitmap byte.
				1261	hb = b & bitPointerAll
				1262	hb \|= bitMarkedAll
				1263	if w += 4; w >= nw {
				1264	break
				1265	}
				1266	*hbitp = uint8(hb)
				1267	hbitp = subtract1(hbitp)
				1268	b >>= 4
				1269	}
				1270
				1271	Phase3:
				1272	// Phase 3: Write last byte or partial byte and zero the rest of the bitmap entries.
				1273	if w > nw {
				1274	// Counting the 4 entries in hb not yet written to memory,
				1275	// there are more entries than possible pointer slots.
				1276	// Discard the excess entries (can't be more than 3).
				1277	mask := uintptr(1)<<(4-(w-nw)) - 1
				1278	hb &= mask \| mask<<4 // apply mask to both pointer bits and mark bits
				1279	}
				1280
				1281	// Change nw from counting possibly-pointer words to total words in allocation.
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	1282	nw = size / sys.PtrSize
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1283
				1284	// Write whole bitmap bytes.
				1285	// The first is hb, the rest are zero.
				1286	if w <= nw {
				1287	*hbitp = uint8(hb)
				1288	hbitp = subtract1(hbitp)
				1289	hb = 0 // for possible final half-byte below
				1290	for w += 4; w <= nw; w += 4 {
				1291	*hbitp = 0
				1292	hbitp = subtract1(hbitp)
				1293	}
				1294	}
				1295
				1296	// Write final partial bitmap byte if any.
				1297	// We know w > nw, or else we'd still be in the loop above.
				1298	// It can be bigger only due to the 4 entries in hb that it counts.
				1299	// If w == nw+4 then there's nothing left to do: we wrote all nw entries
				1300	// and can discard the 4 sitting in hb.
				1301	// But if w == nw+2, we need to write first two in hb.
				1302	// The byte is shared with the next object so we may need an atomic.
				1303	if w == nw+2 {
				1304	if gcphase == _GCoff {
				1305	hbitp = hbitp&^(bitPointer\|bitMarked\|(bitPointer\|bitMarked)<<heapBitsShift) \| uint8(hb)
				1306	} else {
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	1307	atomic.And8(hbitp, ^uint8(bitPointer\|bitMarked\|(bitPointer\|bitMarked)<<heapBitsShift))
				1308	atomic.Or8(hbitp, uint8(hb))
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1309	}
				1310	}
				1311
				1312	Phase4:
				1313	// Phase 4: all done, but perhaps double check.
				1314	if doubleCheck {
				1315	end := heapBitsForAddr(x + size)
				1316	if typ.kind&kindGCProg == 0 && (hbitp != end.bitp \|\| (w == nw+2) != (end.shift == 2)) {
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	1317	println("ended at wrong bitmap byte for", typ.string(), "x", dataSize/typ.size)
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1318	print("typ.size=", typ.size, " typ.ptrdata=", typ.ptrdata, " dataSize=", dataSize, " size=", size, "\n")
				1319	print("w=", w, " nw=", nw, " b=", hex(b), " nb=", nb, " hb=", hex(hb), "\n")
				1320	h0 := heapBitsForAddr(x)
				1321	print("initial bits h0.bitp=", h0.bitp, " h0.shift=", h0.shift, "\n")
				1322	print("ended at hbitp=", hbitp, " but next starts at bitp=", end.bitp, " shift=", end.shift, "\n")
				1323	throw("bad heapBitsSetType")
				1324	}
				1325
				1326	// Double-check that bits to be written were written correctly.
				1327	// Does not check that other bits were not written, unfortunately.
				1328	h := heapBitsForAddr(x)
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	1329	nptr := typ.ptrdata / sys.PtrSize
				1330	ndata := typ.size / sys.PtrSize
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1331	count := dataSize / typ.size
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	1332	totalptr := ((count-1)*typ.size + typ.ptrdata) / sys.PtrSize
				1333	for i := uintptr(0); i < size/sys.PtrSize; i++ {
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1334	j := i % ndata
				1335	var have, want uint8
				1336	have = (*h.bitp >> h.shift) & (bitPointer \| bitMarked)
				1337	if i >= totalptr {
				1338	want = 0 // deadmarker
				1339	if typ.kind&kindGCProg != 0 && i < (totalptr+3)/4*4 {
				1340	want = bitMarked
				1341	}
				1342	} else {
				1343	if j < nptr && (*addb(ptrmask, j/8)>>(j%8))&1 != 0 {
				1344	want \|= bitPointer
				1345	}
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	1346	if i != 1 {
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1347	want \|= bitMarked
				1348	} else {
				1349	have &^= bitMarked
				1350	}
				1351	}
				1352	if have != want {
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	1353	println("mismatch writing bits for", typ.string(), "x", dataSize/typ.size)
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1354	print("typ.size=", typ.size, " typ.ptrdata=", typ.ptrdata, " dataSize=", dataSize, " size=", size, "\n")
				1355	print("kindGCProg=", typ.kind&kindGCProg != 0, "\n")
				1356	print("w=", w, " nw=", nw, " b=", hex(b), " nb=", nb, " hb=", hex(hb), "\n")
				1357	h0 := heapBitsForAddr(x)
				1358	print("initial bits h0.bitp=", h0.bitp, " h0.shift=", h0.shift, "\n")
				1359	print("current bits h.bitp=", h.bitp, " h.shift=", h.shift, " h.bitp=", hex(h.bitp), "\n")
				1360	print("ptrmask=", ptrmask, " p=", p, " endp=", endp, " endnb=", endnb, " pbits=", hex(pbits), " b=", hex(b), " nb=", nb, "\n")
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	1361	println("at word", i, "offset", i*sys.PtrSize, "have", have, "want", want)
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1362	if typ.kind&kindGCProg != 0 {
				1363	println("GC program:")
				1364	dumpGCProg(addb(typ.gcdata, 4))
				1365	}
				1366	throw("bad heapBitsSetType")
				1367	}
				1368	h = h.next()
				1369	}
				1370	if ptrmask == debugPtrmask.data {
				1371	unlock(&debugPtrmask.lock)
				1372	}
				1373	}
				1374	}
				1375
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	1376	// heapBitsSetTypeNoScan marks x as noscan by setting the first word
				1377	// of x in the heap bitmap to scalar/dead.
				1378	func heapBitsSetTypeNoScan(x uintptr) {
				1379	h := heapBitsForAddr(uintptr(x))
				1380	*h.bitp &^= (bitPointer \| bitMarked) << h.shift
				1381	}
				1382
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1383	var debugPtrmask struct {
				1384	lock mutex
				1385	data *byte
				1386	}
				1387
				1388	// heapBitsSetTypeGCProg implements heapBitsSetType using a GC program.
				1389	// progSize is the size of the memory described by the program.
				1390	// elemSize is the size of the element that the GC program describes (a prefix of).
				1391	// dataSize is the total size of the intended data, a multiple of elemSize.
				1392	// allocSize is the total size of the allocated memory.
				1393	//
				1394	// GC programs are only used for large allocations.
				1395	// heapBitsSetType requires that allocSize is a multiple of 4 words,
				1396	// so that the relevant bitmap bytes are not shared with surrounding
				1397	// objects and need not be accessed with atomic instructions.
				1398	func heapBitsSetTypeGCProg(h heapBits, progSize, elemSize, dataSize, allocSize uintptr, prog *byte) {
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	1399	if sys.PtrSize == 8 && allocSize%(4*sys.PtrSize) != 0 {
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1400	// Alignment will be wrong.
				1401	throw("heapBitsSetTypeGCProg: small allocation")
				1402	}
				1403	var totalBits uintptr
				1404	if elemSize == dataSize {
				1405	totalBits = runGCProg(prog, nil, h.bitp, 2)
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	1406	if totalBits*sys.PtrSize != progSize {
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1407	println("runtime: heapBitsSetTypeGCProg: total bits", totalBits, "but progSize", progSize)
				1408	throw("heapBitsSetTypeGCProg: unexpected bit count")
				1409	}
				1410	} else {
				1411	count := dataSize / elemSize
				1412
				1413	// Piece together program trailer to run after prog that does:
				1414	// literal(0)
				1415	// repeat(1, elemSize-progSize-1) // zeros to fill element size
				1416	// repeat(elemSize, count-1) // repeat that element for count
				1417	// This zero-pads the data remaining in the first element and then
				1418	// repeats that first element to fill the array.
				1419	var trailer [40]byte // 3 varints (max 10 each) + some bytes
				1420	i := 0
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	1421	if n := elemSize/sys.PtrSize - progSize/sys.PtrSize; n > 0 {
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1422	// literal(0)
				1423	trailer[i] = 0x01
				1424	i++
				1425	trailer[i] = 0
				1426	i++
				1427	if n > 1 {
				1428	// repeat(1, n-1)
				1429	trailer[i] = 0x81
				1430	i++
				1431	n--
				1432	for ; n >= 0x80; n >>= 7 {
				1433	trailer[i] = byte(n \| 0x80)
				1434	i++
				1435	}
				1436	trailer[i] = byte(n)
				1437	i++
				1438	}
				1439	}
				1440	// repeat(elemSize/ptrSize, count-1)
				1441	trailer[i] = 0x80
				1442	i++
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	1443	n := elemSize / sys.PtrSize
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1444	for ; n >= 0x80; n >>= 7 {
				1445	trailer[i] = byte(n \| 0x80)
				1446	i++
				1447	}
				1448	trailer[i] = byte(n)
				1449	i++
				1450	n = count - 1
				1451	for ; n >= 0x80; n >>= 7 {
				1452	trailer[i] = byte(n \| 0x80)
				1453	i++
				1454	}
				1455	trailer[i] = byte(n)
				1456	i++
				1457	trailer[i] = 0
				1458	i++
				1459
				1460	runGCProg(prog, &trailer[0], h.bitp, 2)
				1461
				1462	// Even though we filled in the full array just now,
				1463	// record that we only filled in up to the ptrdata of the
				1464	// last element. This will cause the code below to
				1465	// memclr the dead section of the final array element,
				1466	// so that scanobject can stop early in the final element.
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	1467	totalBits = (elemSize*(count-1) + progSize) / sys.PtrSize
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1468	}
				1469	endProg := unsafe.Pointer(subtractb(h.bitp, (totalBits+3)/4))
				1470	endAlloc := unsafe.Pointer(subtractb(h.bitp, allocSize/heapBitmapScale))
				1471	memclr(add(endAlloc, 1), uintptr(endProg)-uintptr(endAlloc))
				1472	}
				1473
				1474	// progToPointerMask returns the 1-bit pointer mask output by the GC program prog.
				1475	// size the size of the region described by prog, in bytes.
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	1476	// The resulting bitvector will have no more than size/sys.PtrSize bits.
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1477	func progToPointerMask(prog *byte, size uintptr) bitvector {
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	1478	n := (size/sys.PtrSize + 7) / 8
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1479	x := (*[1 << 30]byte)(persistentalloc(n+1, 1, &memstats.buckhash_sys))[:n+1]
				1480	x[len(x)-1] = 0xa1 // overflow check sentinel
				1481	n = runGCProg(prog, nil, &x[0], 1)
				1482	if x[len(x)-1] != 0xa1 {
				1483	throw("progToPointerMask: overflow")
				1484	}
				1485	return bitvector{int32(n), &x[0]}
				1486	}
				1487
				1488	// Packed GC pointer bitmaps, aka GC programs.
				1489	//
				1490	// For large types containing arrays, the type information has a
				1491	// natural repetition that can be encoded to save space in the
				1492	// binary and in the memory representation of the type information.
				1493	//
				1494	// The encoding is a simple Lempel-Ziv style bytecode machine
				1495	// with the following instructions:
				1496	//
				1497	// 00000000: stop
				1498	// 0nnnnnnn: emit n bits copied from the next (n+7)/8 bytes
				1499	// 10000000 n c: repeat the previous n bits c times; n, c are varints
				1500	// 1nnnnnnn c: repeat the previous n bits c times; c is a varint
				1501
				1502	// runGCProg executes the GC program prog, and then trailer if non-nil,
				1503	// writing to dst with entries of the given size.
				1504	// If size == 1, dst is a 1-bit pointer mask laid out moving forward from dst.
				1505	// If size == 2, dst is the 2-bit heap bitmap, and writes move backward
				1506	// starting at dst (because the heap bitmap does). In this case, the caller guarantees
				1507	// that only whole bytes in dst need to be written.
				1508	//
				1509	// runGCProg returns the number of 1- or 2-bit entries written to memory.
				1510	func runGCProg(prog, trailer, dst *byte, size int) uintptr {
				1511	dstStart := dst
				1512
				1513	// Bits waiting to be written to memory.
				1514	var bits uintptr
				1515	var nbits uintptr
				1516
				1517	p := prog
				1518	Run:
				1519	for {
				1520	// Flush accumulated full bytes.
				1521	// The rest of the loop assumes that nbits <= 7.
				1522	for ; nbits >= 8; nbits -= 8 {
				1523	if size == 1 {
				1524	*dst = uint8(bits)
				1525	dst = add1(dst)
				1526	bits >>= 8
				1527	} else {
				1528	v := bits&bitPointerAll \| bitMarkedAll
				1529	*dst = uint8(v)
				1530	dst = subtract1(dst)
				1531	bits >>= 4
				1532	v = bits&bitPointerAll \| bitMarkedAll
				1533	*dst = uint8(v)
				1534	dst = subtract1(dst)
				1535	bits >>= 4
				1536	}
				1537	}
				1538
				1539	// Process one instruction.
				1540	inst := uintptr(*p)
				1541	p = add1(p)
				1542	n := inst & 0x7F
				1543	if inst&0x80 == 0 {
				1544	// Literal bits; n == 0 means end of program.
				1545	if n == 0 {
				1546	// Program is over; continue in trailer if present.
				1547	if trailer != nil {
				1548	//println("trailer")
				1549	p = trailer
				1550	trailer = nil
				1551	continue
				1552	}
				1553	//println("done")
				1554	break Run
				1555	}
				1556	//println("lit", n, dst)
				1557	nbyte := n / 8
				1558	for i := uintptr(0); i < nbyte; i++ {
				1559	bits \|= uintptr(*p) << nbits
				1560	p = add1(p)
				1561	if size == 1 {
				1562	*dst = uint8(bits)
				1563	dst = add1(dst)
				1564	bits >>= 8
				1565	} else {
				1566	v := bits&0xf \| bitMarkedAll
				1567	*dst = uint8(v)
				1568	dst = subtract1(dst)
				1569	bits >>= 4
				1570	v = bits&0xf \| bitMarkedAll
				1571	*dst = uint8(v)
				1572	dst = subtract1(dst)
				1573	bits >>= 4
				1574	}
				1575	}
				1576	if n %= 8; n > 0 {
				1577	bits \|= uintptr(*p) << nbits
				1578	p = add1(p)
				1579	nbits += n
				1580	}
				1581	continue Run
				1582	}
				1583
				1584	// Repeat. If n == 0, it is encoded in a varint in the next bytes.
				1585	if n == 0 {
				1586	for off := uint(0); ; off += 7 {
				1587	x := uintptr(*p)
				1588	p = add1(p)
				1589	n \|= (x & 0x7F) << off
				1590	if x&0x80 == 0 {
				1591	break
				1592	}
				1593	}
				1594	}
				1595
				1596	// Count is encoded in a varint in the next bytes.
				1597	c := uintptr(0)
				1598	for off := uint(0); ; off += 7 {
				1599	x := uintptr(*p)
				1600	p = add1(p)
				1601	c \|= (x & 0x7F) << off
				1602	if x&0x80 == 0 {
				1603	break
				1604	}
				1605	}
				1606	c *= n // now total number of bits to copy
				1607
				1608	// If the number of bits being repeated is small, load them
				1609	// into a register and use that register for the entire loop
				1610	// instead of repeatedly reading from memory.
				1611	// Handling fewer than 8 bits here makes the general loop simpler.
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	1612	// The cutoff is sys.PtrSize*8 - 7 to guarantee that when we add
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1613	// the pattern to a bit buffer holding at most 7 bits (a partial byte)
				1614	// it will not overflow.
				1615	src := dst
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	1616	const maxBits = sys.PtrSize*8 - 7
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1617	if n <= maxBits {
				1618	// Start with bits in output buffer.
				1619	pattern := bits
				1620	npattern := nbits
				1621
				1622	// If we need more bits, fetch them from memory.
				1623	if size == 1 {
				1624	src = subtract1(src)
				1625	for npattern < n {
				1626	pattern <<= 8
				1627	pattern \|= uintptr(*src)
				1628	src = subtract1(src)
				1629	npattern += 8
				1630	}
				1631	} else {
				1632	src = add1(src)
				1633	for npattern < n {
				1634	pattern <<= 4
				1635	pattern \|= uintptr(*src) & 0xf
				1636	src = add1(src)
				1637	npattern += 4
				1638	}
				1639	}
				1640
				1641	// We started with the whole bit output buffer,
				1642	// and then we loaded bits from whole bytes.
				1643	// Either way, we might now have too many instead of too few.
				1644	// Discard the extra.
				1645	if npattern > n {
				1646	pattern >>= npattern - n
				1647	npattern = n
				1648	}
				1649
				1650	// Replicate pattern to at most maxBits.
				1651	if npattern == 1 {
				1652	// One bit being repeated.
				1653	// If the bit is 1, make the pattern all 1s.
				1654	// If the bit is 0, the pattern is already all 0s,
				1655	// but we can claim that the number of bits
				1656	// in the word is equal to the number we need (c),
				1657	// because right shift of bits will zero fill.
				1658	if pattern == 1 {
				1659	pattern = 1<<maxBits - 1
				1660	npattern = maxBits
				1661	} else {
				1662	npattern = c
				1663	}
				1664	} else {
				1665	b := pattern
				1666	nb := npattern
				1667	if nb+nb <= maxBits {
				1668	// Double pattern until the whole uintptr is filled.
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	1669	for nb <= sys.PtrSize*8 {
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1670	b \|= b << nb
				1671	nb += nb
				1672	}
				1673	// Trim away incomplete copy of original pattern in high bits.
				1674	// TODO(rsc): Replace with table lookup or loop on systems without divide?
				1675	nb = maxBits / npattern * npattern
				1676	b &= 1<<nb - 1
				1677	pattern = b
				1678	npattern = nb
				1679	}
				1680	}
				1681
				1682	// Add pattern to bit buffer and flush bit buffer, c/npattern times.
				1683	// Since pattern contains >8 bits, there will be full bytes to flush
				1684	// on each iteration.
				1685	for ; c >= npattern; c -= npattern {
				1686	bits \|= pattern << nbits
				1687	nbits += npattern
				1688	if size == 1 {
				1689	for nbits >= 8 {
				1690	*dst = uint8(bits)
				1691	dst = add1(dst)
				1692	bits >>= 8
				1693	nbits -= 8
				1694	}
				1695	} else {
				1696	for nbits >= 4 {
				1697	*dst = uint8(bits&0xf \| bitMarkedAll)
				1698	dst = subtract1(dst)
				1699	bits >>= 4
				1700	nbits -= 4
				1701	}
				1702	}
				1703	}
				1704
				1705	// Add final fragment to bit buffer.
				1706	if c > 0 {
				1707	pattern &= 1<<c - 1
				1708	bits \|= pattern << nbits
				1709	nbits += c
				1710	}
				1711	continue Run
				1712	}
				1713
				1714	// Repeat; n too large to fit in a register.
				1715	// Since nbits <= 7, we know the first few bytes of repeated data
				1716	// are already written to memory.
				1717	off := n - nbits // n > nbits because n > maxBits and nbits <= 7
				1718	if size == 1 {
				1719	// Leading src fragment.
				1720	src = subtractb(src, (off+7)/8)
				1721	if frag := off & 7; frag != 0 {
				1722	bits \|= uintptr(*src) >> (8 - frag) << nbits
				1723	src = add1(src)
				1724	nbits += frag
				1725	c -= frag
				1726	}
				1727	// Main loop: load one byte, write another.
				1728	// The bits are rotating through the bit buffer.
				1729	for i := c / 8; i > 0; i-- {
				1730	bits \|= uintptr(*src) << nbits
				1731	src = add1(src)
				1732	*dst = uint8(bits)
				1733	dst = add1(dst)
				1734	bits >>= 8
				1735	}
				1736	// Final src fragment.
				1737	if c %= 8; c > 0 {
				1738	bits \|= (uintptr(*src) & (1<<c - 1)) << nbits
				1739	nbits += c
				1740	}
				1741	} else {
				1742	// Leading src fragment.
				1743	src = addb(src, (off+3)/4)
				1744	if frag := off & 3; frag != 0 {
				1745	bits \|= (uintptr(*src) & 0xf) >> (4 - frag) << nbits
				1746	src = subtract1(src)
				1747	nbits += frag
				1748	c -= frag
				1749	}
				1750	// Main loop: load one byte, write another.
				1751	// The bits are rotating through the bit buffer.
				1752	for i := c / 4; i > 0; i-- {
				1753	bits \|= (uintptr(*src) & 0xf) << nbits
				1754	src = subtract1(src)
				1755	*dst = uint8(bits&0xf \| bitMarkedAll)
				1756	dst = subtract1(dst)
				1757	bits >>= 4
				1758	}
				1759	// Final src fragment.
				1760	if c %= 4; c > 0 {
				1761	bits \|= (uintptr(*src) & (1<<c - 1)) << nbits
				1762	nbits += c
				1763	}
				1764	}
				1765	}
				1766
				1767	// Write any final bits out, using full-byte writes, even for the final byte.
				1768	var totalBits uintptr
				1769	if size == 1 {
				1770	totalBits = (uintptr(unsafe.Pointer(dst))-uintptr(unsafe.Pointer(dstStart)))*8 + nbits
				1771	nbits += -nbits & 7
				1772	for ; nbits > 0; nbits -= 8 {
				1773	*dst = uint8(bits)
				1774	dst = add1(dst)
				1775	bits >>= 8
				1776	}
				1777	} else {
				1778	totalBits = (uintptr(unsafe.Pointer(dstStart))-uintptr(unsafe.Pointer(dst)))*4 + nbits
				1779	nbits += -nbits & 3
				1780	for ; nbits > 0; nbits -= 4 {
				1781	v := bits&0xf \| bitMarkedAll
				1782	*dst = uint8(v)
				1783	dst = subtract1(dst)
				1784	bits >>= 4
				1785	}
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1786	}
				1787	return totalBits
				1788	}
				1789
				1790	func dumpGCProg(p *byte) {
				1791	nptr := 0
				1792	for {
				1793	x := *p
				1794	p = add1(p)
				1795	if x == 0 {
				1796	print("\t", nptr, " end\n")
				1797	break
				1798	}
				1799	if x&0x80 == 0 {
				1800	print("\t", nptr, " lit ", x, ":")
				1801	n := int(x+7) / 8
				1802	for i := 0; i < n; i++ {
				1803	print(" ", hex(*p))
				1804	p = add1(p)
				1805	}
				1806	print("\n")
				1807	nptr += int(x)
				1808	} else {
				1809	nbit := int(x &^ 0x80)
				1810	if nbit == 0 {
				1811	for nb := uint(0); ; nb += 7 {
				1812	x := *p
				1813	p = add1(p)
				1814	nbit \|= int(x&0x7f) << nb
				1815	if x&0x80 == 0 {
				1816	break
				1817	}
				1818	}
				1819	}
				1820	count := 0
				1821	for nb := uint(0); ; nb += 7 {
				1822	x := *p
				1823	p = add1(p)
				1824	count \|= int(x&0x7f) << nb
				1825	if x&0x80 == 0 {
				1826	break
				1827	}
				1828	}
				1829	print("\t", nptr, " repeat ", nbit, " × ", count, "\n")
				1830	nptr += nbit * count
				1831	}
				1832	}
				1833	}
				1834
				1835	// Testing.
				1836
				1837	func getgcmaskcb(frame *stkframe, ctxt unsafe.Pointer) bool {
				1838	target := (*stkframe)(ctxt)
				1839	if frame.sp <= target.sp && target.sp < frame.varp {
				1840	target = frame
				1841	return false
				1842	}
				1843	return true
				1844	}
				1845
				1846	// gcbits returns the GC type info for x, for testing.
				1847	// The result is the bitmap entries (0 or 1), one entry per byte.
				1848	//go:linkname reflect_gcbits reflect.gcbits
				1849	func reflect_gcbits(x interface{}) []byte {
				1850	ret := getgcmask(x)
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	1851	typ := (*ptrtype)(unsafe.Pointer(efaceOf(&x)._type)).elem
				1852	nptr := typ.ptrdata / sys.PtrSize
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1853	for uintptr(len(ret)) > nptr && ret[len(ret)-1] == 0 {
				1854	ret = ret[:len(ret)-1]
				1855	}
				1856	return ret
				1857	}
				1858
				1859	// Returns GC type info for object p for testing.
				1860	func getgcmask(ep interface{}) (mask []byte) {
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	1861	e := *efaceOf(&ep)
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1862	p := e.data
				1863	t := e._type
				1864	// data or bss
				1865	for datap := &firstmoduledata; datap != nil; datap = datap.next {
				1866	// data
				1867	if datap.data <= uintptr(p) && uintptr(p) < datap.edata {
				1868	bitmap := datap.gcdatamask.bytedata
				1869	n := (*ptrtype)(unsafe.Pointer(t)).elem.size
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	1870	mask = make([]byte, n/sys.PtrSize)
				1871	for i := uintptr(0); i < n; i += sys.PtrSize {
				1872	off := (uintptr(p) + i - datap.data) / sys.PtrSize
				1873	mask[i/sys.PtrSize] = (*addb(bitmap, off/8) >> (off % 8)) & 1
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1874	}
				1875	return
				1876	}
				1877
				1878	// bss
				1879	if datap.bss <= uintptr(p) && uintptr(p) < datap.ebss {
				1880	bitmap := datap.gcbssmask.bytedata
				1881	n := (*ptrtype)(unsafe.Pointer(t)).elem.size
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	1882	mask = make([]byte, n/sys.PtrSize)
				1883	for i := uintptr(0); i < n; i += sys.PtrSize {
				1884	off := (uintptr(p) + i - datap.bss) / sys.PtrSize
				1885	mask[i/sys.PtrSize] = (*addb(bitmap, off/8) >> (off % 8)) & 1
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1886	}
				1887	return
				1888	}
				1889	}
				1890
				1891	// heap
				1892	var n uintptr
				1893	var base uintptr
				1894	if mlookup(uintptr(p), &base, &n, nil) != 0 {
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	1895	mask = make([]byte, n/sys.PtrSize)
				1896	for i := uintptr(0); i < n; i += sys.PtrSize {
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1897	hbits := heapBitsForAddr(base + i)
				1898	if hbits.isPointer() {
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	1899	mask[i/sys.PtrSize] = 1
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1900	}
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	1901	if i != 1*sys.PtrSize && !hbits.morePointers() {
				1902	mask = mask[:i/sys.PtrSize]
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1903	break
				1904	}
				1905	}
				1906	return
				1907	}
				1908
				1909	// stack
				1910	if _g_ := getg(); _g_.m.curg.stack.lo <= uintptr(p) && uintptr(p) < _g_.m.curg.stack.hi {
				1911	var frame stkframe
				1912	frame.sp = uintptr(p)
				1913	_g_ := getg()
				1914	gentraceback(_g_.m.curg.sched.pc, _g_.m.curg.sched.sp, 0, _g_.m.curg, 0, nil, 1000, getgcmaskcb, noescape(unsafe.Pointer(&frame)), 0)
				1915	if frame.fn != nil {
				1916	f := frame.fn
				1917	targetpc := frame.continpc
				1918	if targetpc == 0 {
				1919	return
				1920	}
				1921	if targetpc != f.entry {
				1922	targetpc--
				1923	}
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	1924	pcdata := pcdatavalue(f, _PCDATA_StackMapIndex, targetpc, nil)
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1925	if pcdata == -1 {
				1926	return
				1927	}
				1928	stkmap := (*stackmap)(funcdata(f, _FUNCDATA_LocalsPointerMaps))
				1929	if stkmap == nil \|\| stkmap.n <= 0 {
				1930	return
				1931	}
				1932	bv := stackmapdata(stkmap, pcdata)
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	1933	size := uintptr(bv.n) * sys.PtrSize
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1934	n := (*ptrtype)(unsafe.Pointer(t)).elem.size
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	1935	mask = make([]byte, n/sys.PtrSize)
				1936	for i := uintptr(0); i < n; i += sys.PtrSize {
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1937	bitmap := bv.bytedata
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame^]	1938	off := (uintptr(p) + i - frame.varp + size) / sys.PtrSize
				1939	mask[i/sys.PtrSize] = (*addb(bitmap, off/8) >> (off % 8)) & 1
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1940	}
				1941	}
				1942	return
				1943	}
				1944
				1945	// otherwise, not something the GC knows about.
				1946	// possibly read-only data, like malloc(0).
				1947	// must not have pointers
				1948	return
				1949	}