Blame - src/runtime/mbitmap.go - platform/prebuilts/go/darwin-x86

blob: 89d8a4cc76e43c1431c5721ad1fac9c64af65fe8 [file] [log] [blame]

Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	1	// Copyright 2009 The Go Authors. All rights reserved.
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	2	// Use of this source code is governed by a BSD-style
				3	// license that can be found in the LICENSE file.
				4
				5	// Garbage collector: type and heap bitmaps.
				6	//
				7	// Stack, data, and bss bitmaps
				8	//
				9	// Stack frames and global variables in the data and bss sections are described
				10	// by 1-bit bitmaps in which 0 means uninteresting and 1 means live pointer
				11	// to be visited during GC. The bits in each byte are consumed starting with
				12	// the low bit: 1<<0, 1<<1, and so on.
				13	//
				14	// Heap bitmap
				15	//
				16	// The allocated heap comes from a subset of the memory in the range [start, used),
				17	// where start == mheap_.arena_start and used == mheap_.arena_used.
				18	// The heap bitmap comprises 2 bits for each pointer-sized word in that range,
				19	// stored in bytes indexed backward in memory from start.
				20	// That is, the byte at address start-1 holds the 2-bit entries for the four words
				21	// start through start+3*ptrSize, the byte at start-2 holds the entries for
				22	// start+4ptrSize through start+7ptrSize, and so on.
				23	//
				24	// In each 2-bit entry, the lower bit holds the same information as in the 1-bit
				25	// bitmaps: 0 means uninteresting and 1 means live pointer to be visited during GC.
				26	// The meaning of the high bit depends on the position of the word being described
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	27	// in its allocated object. In all words except the second word, the
				28	// high bit indicates that the object is still being described. In
				29	// these words, if a bit pair with a high bit 0 is encountered, the
				30	// low bit can also be assumed to be 0, and the object description is
				31	// over. This 00 is called the ``dead'' encoding: it signals that the
				32	// rest of the words in the object are uninteresting to the garbage
				33	// collector.
				34	//
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	35	// In the second word, the high bit is the GC ``checkmarked'' bit (see below).
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	36	//
				37	// The 2-bit entries are split when written into the byte, so that the top half
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	38	// of the byte contains 4 high bits and the bottom half contains 4 low (pointer)
				39	// bits.
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	40	// This form allows a copy from the 1-bit to the 4-bit form to keep the
				41	// pointer bits contiguous, instead of having to space them out.
				42	//
				43	// The code makes use of the fact that the zero value for a heap bitmap
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	44	// has no live pointer bit set and is (depending on position), not used,
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	45	// not checkmarked, and is the dead encoding.
				46	// These properties must be preserved when modifying the encoding.
				47	//
				48	// Checkmarks
				49	//
				50	// In a concurrent garbage collector, one worries about failing to mark
				51	// a live object due to mutations without write barriers or bugs in the
				52	// collector implementation. As a sanity check, the GC has a 'checkmark'
				53	// mode that retraverses the object graph with the world stopped, to make
				54	// sure that everything that should be marked is marked.
				55	// In checkmark mode, in the heap bitmap, the high bit of the 2-bit entry
				56	// for the second word of the object holds the checkmark bit.
				57	// When not in checkmark mode, this bit is set to 1.
				58	//
				59	// The smallest possible allocation is 8 bytes. On a 32-bit machine, that
				60	// means every allocated object has two words, so there is room for the
				61	// checkmark bit. On a 64-bit machine, however, the 8-byte allocation is
				62	// just one word, so the second bit pair is not available for encoding the
				63	// checkmark. However, because non-pointer allocations are combined
				64	// into larger 16-byte (maxTinySize) allocations, a plain 8-byte allocation
				65	// must be a pointer, so the type bit in the first word is not actually needed.
				66	// It is still used in general, except in checkmark the type bit is repurposed
				67	// as the checkmark bit and then reinitialized (to 1) as the type bit when
				68	// finished.
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	69	//
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	70
				71	package runtime
				72
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	73	import (
				74	"runtime/internal/atomic"
				75	"runtime/internal/sys"
				76	"unsafe"
				77	)
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	78
				79	const (
				80	bitPointer = 1 << 0
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	81	bitScan = 1 << 4
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	82
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	83	heapBitsShift = 1 // shift offset between successive bitPointer or bitScan entries
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	84	heapBitmapScale = sys.PtrSize * (8 / 2) // number of data bytes described by one heap bitmap byte
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	85
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	86	// all scan/pointer bits in a byte
				87	bitScanAll = bitScan \| bitScan<<heapBitsShift \| bitScan<<(2heapBitsShift) \| bitScan<<(3heapBitsShift)
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	88	bitPointerAll = bitPointer \| bitPointer<<heapBitsShift \| bitPointer<<(2heapBitsShift) \| bitPointer<<(3heapBitsShift)
				89	)
				90
				91	// addb returns the byte pointer p+n.
				92	//go:nowritebarrier
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	93	//go:nosplit
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	94	func addb(p byte, n uintptr) byte {
				95	// Note: wrote out full expression instead of calling add(p, n)
				96	// to reduce the number of temporaries generated by the
				97	// compiler for this trivial expression during inlining.
				98	return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + n))
				99	}
				100
				101	// subtractb returns the byte pointer p-n.
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	102	// subtractb is typically used when traversing the pointer tables referred to by hbits
				103	// which are arranged in reverse order.
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	104	//go:nowritebarrier
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	105	//go:nosplit
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	106	func subtractb(p byte, n uintptr) byte {
				107	// Note: wrote out full expression instead of calling add(p, -n)
				108	// to reduce the number of temporaries generated by the
				109	// compiler for this trivial expression during inlining.
				110	return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) - n))
				111	}
				112
				113	// add1 returns the byte pointer p+1.
				114	//go:nowritebarrier
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	115	//go:nosplit
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	116	func add1(p byte) byte {
				117	// Note: wrote out full expression instead of calling addb(p, 1)
				118	// to reduce the number of temporaries generated by the
				119	// compiler for this trivial expression during inlining.
				120	return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + 1))
				121	}
				122
				123	// subtract1 returns the byte pointer p-1.
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	124	// subtract1 is typically used when traversing the pointer tables referred to by hbits
				125	// which are arranged in reverse order.
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	126	//go:nowritebarrier
				127	//
				128	// nosplit because it is used during write barriers and must not be preempted.
				129	//go:nosplit
				130	func subtract1(p byte) byte {
				131	// Note: wrote out full expression instead of calling subtractb(p, 1)
				132	// to reduce the number of temporaries generated by the
				133	// compiler for this trivial expression during inlining.
				134	return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) - 1))
				135	}
				136
				137	// mHeap_MapBits is called each time arena_used is extended.
				138	// It maps any additional bitmap memory needed for the new arena memory.
				139	// It must be called with the expected new value of arena_used,
				140	// before h.arena_used has been updated.
				141	// Waiting to update arena_used until after the memory has been mapped
				142	// avoids faults when other threads try access the bitmap immediately
				143	// after observing the change to arena_used.
				144	//
				145	//go:nowritebarrier
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	146	func (h *mheap) mapBits(arena_used uintptr) {
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	147	// Caller has added extra mappings to the arena.
				148	// Add extra mappings of bitmap words as needed.
				149	// We allocate extra bitmap pieces in chunks of bitmapChunk.
				150	const bitmapChunk = 8192
				151
				152	n := (arena_used - mheap_.arena_start) / heapBitmapScale
				153	n = round(n, bitmapChunk)
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	154	n = round(n, physPageSize)
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	155	if h.bitmap_mapped >= n {
				156	return
				157	}
				158
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	159	sysMap(unsafe.Pointer(h.bitmap-n), n-h.bitmap_mapped, h.arena_reserved, &memstats.gc_sys)
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	160	h.bitmap_mapped = n
				161	}
				162
				163	// heapBits provides access to the bitmap bits for a single heap word.
				164	// The methods on heapBits take value receivers so that the compiler
				165	// can more easily inline calls to those methods and registerize the
				166	// struct fields independently.
				167	type heapBits struct {
				168	bitp *uint8
				169	shift uint32
				170	}
				171
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	172	// markBits provides access to the mark bit for an object in the heap.
				173	// bytep points to the byte holding the mark bit.
				174	// mask is a byte with a single bit set that can be &ed with *bytep
				175	// to see if the bit has been set.
				176	// *m.byte&m.mask != 0 indicates the mark bit is set.
				177	// index can be used along with span information to generate
				178	// the address of the object in the heap.
				179	// We maintain one set of mark bits for allocation and one for
				180	// marking purposes.
				181	type markBits struct {
				182	bytep *uint8
				183	mask uint8
				184	index uintptr
				185	}
				186
				187	//go:nosplit
				188	func (s *mspan) allocBitsForIndex(allocBitIndex uintptr) markBits {
				189	whichByte := allocBitIndex / 8
				190	whichBit := allocBitIndex % 8
				191	bytePtr := addb(s.allocBits, whichByte)
				192	return markBits{bytePtr, uint8(1 << whichBit), allocBitIndex}
				193	}
				194
				195	// refillaCache takes 8 bytes s.allocBits starting at whichByte
				196	// and negates them so that ctz (count trailing zeros) instructions
				197	// can be used. It then places these 8 bytes into the cached 64 bit
				198	// s.allocCache.
				199	func (s *mspan) refillAllocCache(whichByte uintptr) {
				200	bytes := (*[8]uint8)(unsafe.Pointer(addb(s.allocBits, whichByte)))
				201	aCache := uint64(0)
				202	aCache \|= uint64(bytes[0])
				203	aCache \|= uint64(bytes[1]) << (1 * 8)
				204	aCache \|= uint64(bytes[2]) << (2 * 8)
				205	aCache \|= uint64(bytes[3]) << (3 * 8)
				206	aCache \|= uint64(bytes[4]) << (4 * 8)
				207	aCache \|= uint64(bytes[5]) << (5 * 8)
				208	aCache \|= uint64(bytes[6]) << (6 * 8)
				209	aCache \|= uint64(bytes[7]) << (7 * 8)
				210	s.allocCache = ^aCache
				211	}
				212
				213	// nextFreeIndex returns the index of the next free object in s at
				214	// or after s.freeindex.
				215	// There are hardware instructions that can be used to make this
				216	// faster if profiling warrants it.
				217	func (s *mspan) nextFreeIndex() uintptr {
				218	sfreeindex := s.freeindex
				219	snelems := s.nelems
				220	if sfreeindex == snelems {
				221	return sfreeindex
				222	}
				223	if sfreeindex > snelems {
				224	throw("s.freeindex > s.nelems")
				225	}
				226
				227	aCache := s.allocCache
				228
				229	bitIndex := sys.Ctz64(aCache)
				230	for bitIndex == 64 {
				231	// Move index to start of next cached bits.
				232	sfreeindex = (sfreeindex + 64) &^ (64 - 1)
				233	if sfreeindex >= snelems {
				234	s.freeindex = snelems
				235	return snelems
				236	}
				237	whichByte := sfreeindex / 8
				238	// Refill s.allocCache with the next 64 alloc bits.
				239	s.refillAllocCache(whichByte)
				240	aCache = s.allocCache
				241	bitIndex = sys.Ctz64(aCache)
				242	// nothing available in cached bits
				243	// grab the next 8 bytes and try again.
				244	}
				245	result := sfreeindex + uintptr(bitIndex)
				246	if result >= snelems {
				247	s.freeindex = snelems
				248	return snelems
				249	}
				250
				251	s.allocCache >>= (bitIndex + 1)
				252	sfreeindex = result + 1
				253
				254	if sfreeindex%64 == 0 && sfreeindex != snelems {
				255	// We just incremented s.freeindex so it isn't 0.
				256	// As each 1 in s.allocCache was encountered and used for allocation
				257	// it was shifted away. At this point s.allocCache contains all 0s.
				258	// Refill s.allocCache so that it corresponds
				259	// to the bits at s.allocBits starting at s.freeindex.
				260	whichByte := sfreeindex / 8
				261	s.refillAllocCache(whichByte)
				262	}
				263	s.freeindex = sfreeindex
				264	return result
				265	}
				266
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	267	// isFree returns whether the index'th object in s is unallocated.
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	268	func (s *mspan) isFree(index uintptr) bool {
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	269	if index < s.freeindex {
				270	return false
				271	}
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	272	whichByte := index / 8
				273	whichBit := index % 8
				274	byteVal := *addb(s.allocBits, whichByte)
				275	return byteVal&uint8(1<<whichBit) == 0
				276	}
				277
				278	func (s *mspan) objIndex(p uintptr) uintptr {
				279	byteOffset := p - s.base()
				280	if byteOffset == 0 {
				281	return 0
				282	}
				283	if s.baseMask != 0 {
				284	// s.baseMask is 0, elemsize is a power of two, so shift by s.divShift
				285	return byteOffset >> s.divShift
				286	}
				287	return uintptr(((uint64(byteOffset) >> s.divShift) * uint64(s.divMul)) >> s.divShift2)
				288	}
				289
				290	func markBitsForAddr(p uintptr) markBits {
				291	s := spanOf(p)
				292	objIndex := s.objIndex(p)
				293	return s.markBitsForIndex(objIndex)
				294	}
				295
				296	func (s *mspan) markBitsForIndex(objIndex uintptr) markBits {
				297	whichByte := objIndex / 8
				298	bitMask := uint8(1 << (objIndex % 8)) // low 3 bits hold the bit index
				299	bytePtr := addb(s.gcmarkBits, whichByte)
				300	return markBits{bytePtr, bitMask, objIndex}
				301	}
				302
				303	func (s *mspan) markBitsForBase() markBits {
				304	return markBits{s.gcmarkBits, uint8(1), 0}
				305	}
				306
				307	// isMarked reports whether mark bit m is set.
				308	func (m markBits) isMarked() bool {
				309	return *m.bytep&m.mask != 0
				310	}
				311
				312	// setMarked sets the marked bit in the markbits, atomically. Some compilers
				313	// are not able to inline atomic.Or8 function so if it appears as a hot spot consider
				314	// inlining it manually.
				315	func (m markBits) setMarked() {
				316	// Might be racing with other updates, so use atomic update always.
				317	// We used to be clever here and use a non-atomic update in certain
				318	// cases, but it's not worth the risk.
				319	atomic.Or8(m.bytep, m.mask)
				320	}
				321
				322	// setMarkedNonAtomic sets the marked bit in the markbits, non-atomically.
				323	func (m markBits) setMarkedNonAtomic() {
				324	*m.bytep \|= m.mask
				325	}
				326
				327	// clearMarked clears the marked bit in the markbits, atomically.
				328	func (m markBits) clearMarked() {
				329	// Might be racing with other updates, so use atomic update always.
				330	// We used to be clever here and use a non-atomic update in certain
				331	// cases, but it's not worth the risk.
				332	atomic.And8(m.bytep, ^m.mask)
				333	}
				334
				335	// clearMarkedNonAtomic clears the marked bit non-atomically.
				336	func (m markBits) clearMarkedNonAtomic() {
				337	*m.bytep ^= m.mask
				338	}
				339
				340	// markBitsForSpan returns the markBits for the span base address base.
				341	func markBitsForSpan(base uintptr) (mbits markBits) {
				342	if base < mheap_.arena_start \|\| base >= mheap_.arena_used {
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	343	throw("markBitsForSpan: base out of range")
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	344	}
				345	mbits = markBitsForAddr(base)
				346	if mbits.mask != 1 {
				347	throw("markBitsForSpan: unaligned start")
				348	}
				349	return mbits
				350	}
				351
				352	// advance advances the markBits to the next object in the span.
				353	func (m *markBits) advance() {
				354	if m.mask == 1<<7 {
				355	m.bytep = (*uint8)(unsafe.Pointer(uintptr(unsafe.Pointer(m.bytep)) + 1))
				356	m.mask = 1
				357	} else {
				358	m.mask = m.mask << 1
				359	}
				360	m.index++
				361	}
				362
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	363	// heapBitsForAddr returns the heapBits for the address addr.
				364	// The caller must have already checked that addr is in the range [mheap_.arena_start, mheap_.arena_used).
				365	//
				366	// nosplit because it is used during write barriers and must not be preempted.
				367	//go:nosplit
				368	func heapBitsForAddr(addr uintptr) heapBits {
				369	// 2 bits per work, 4 pairs per byte, and a mask is hard coded.
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	370	off := (addr - mheap_.arena_start) / sys.PtrSize
				371	return heapBits{(*uint8)(unsafe.Pointer(mheap_.bitmap - off/4 - 1)), uint32(off & 3)}
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	372	}
				373
				374	// heapBitsForSpan returns the heapBits for the span base address base.
				375	func heapBitsForSpan(base uintptr) (hbits heapBits) {
				376	if base < mheap_.arena_start \|\| base >= mheap_.arena_used {
				377	throw("heapBitsForSpan: base out of range")
				378	}
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	379	return heapBitsForAddr(base)
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	380	}
				381
				382	// heapBitsForObject returns the base address for the heap object
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	383	// containing the address p, the heapBits for base,
				384	// the object's span, and of the index of the object in s.
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	385	// If p does not point into a heap object,
				386	// return base == 0
				387	// otherwise return the base of the object.
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	388	//
				389	// refBase and refOff optionally give the base address of the object
				390	// in which the pointer p was found and the byte offset at which it
				391	// was found. These are used for error reporting.
				392	func heapBitsForObject(p, refBase, refOff uintptr) (base uintptr, hbits heapBits, s *mspan, objIndex uintptr) {
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	393	arenaStart := mheap_.arena_start
				394	if p < arenaStart \|\| p >= mheap_.arena_used {
				395	return
				396	}
				397	off := p - arenaStart
				398	idx := off >> _PageShift
				399	// p points into the heap, but possibly to the middle of an object.
				400	// Consult the span table to find the block beginning.
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	401	s = mheap_.spans[idx]
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	402	if s == nil \|\| p < s.base() \|\| p >= s.limit \|\| s.state != mSpanInUse {
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	403	if s == nil \|\| s.state == _MSpanStack {
				404	// If s is nil, the virtual address has never been part of the heap.
				405	// This pointer may be to some mmap'd region, so we allow it.
				406	// Pointers into stacks are also ok, the runtime manages these explicitly.
				407	return
				408	}
				409
				410	// The following ensures that we are rigorous about what data
				411	// structures hold valid pointers.
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	412	if debug.invalidptr != 0 {
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	413	// Typically this indicates an incorrect use
				414	// of unsafe or cgo to store a bad pointer in
				415	// the Go heap. It may also indicate a runtime
				416	// bug.
				417	//
				418	// TODO(austin): We could be more aggressive
				419	// and detect pointers to unallocated objects
				420	// in allocated spans.
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	421	printlock()
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	422	print("runtime: pointer ", hex(p))
				423	if s.state != mSpanInUse {
				424	print(" to unallocated span")
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	425	} else {
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	426	print(" to unused region of span")
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	427	}
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	428	print(" idx=", hex(idx), " span.base()=", hex(s.base()), " span.limit=", hex(s.limit), " span.state=", s.state, "\n")
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	429	if refBase != 0 {
				430	print("runtime: found in object at *(", hex(refBase), "+", hex(refOff), ")\n")
				431	gcDumpObject("object", refBase, refOff)
				432	}
				433	throw("found bad pointer in Go heap (incorrect use of unsafe or cgo?)")
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	434	}
				435	return
				436	}
				437	// If this span holds object of a power of 2 size, just mask off the bits to
				438	// the interior of the object. Otherwise use the size to get the base.
				439	if s.baseMask != 0 {
				440	// optimize for power of 2 sized objects.
				441	base = s.base()
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	442	base = base + (p-base)&uintptr(s.baseMask)
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	443	objIndex = (base - s.base()) >> s.divShift
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	444	// base = p & s.baseMask is faster for small spans,
				445	// but doesn't work for large spans.
				446	// Overall, it's faster to use the more general computation above.
				447	} else {
				448	base = s.base()
				449	if p-base >= s.elemsize {
				450	// n := (p - base) / s.elemsize, using division by multiplication
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	451	objIndex = uintptr(p-base) >> s.divShift * uintptr(s.divMul) >> s.divShift2
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	452	base += objIndex * s.elemsize
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	453	}
				454	}
				455	// Now that we know the actual base, compute heapBits to return to caller.
				456	hbits = heapBitsForAddr(base)
				457	return
				458	}
				459
				460	// prefetch the bits.
				461	func (h heapBits) prefetch() {
				462	prefetchnta(uintptr(unsafe.Pointer((h.bitp))))
				463	}
				464
				465	// next returns the heapBits describing the next pointer-sized word in memory.
				466	// That is, if h describes address p, h.next() describes p+ptrSize.
				467	// Note that next does not modify h. The caller must record the result.
				468	//
				469	// nosplit because it is used during write barriers and must not be preempted.
				470	//go:nosplit
				471	func (h heapBits) next() heapBits {
				472	if h.shift < 3*heapBitsShift {
				473	return heapBits{h.bitp, h.shift + heapBitsShift}
				474	}
				475	return heapBits{subtract1(h.bitp), 0}
				476	}
				477
				478	// forward returns the heapBits describing n pointer-sized words ahead of h in memory.
				479	// That is, if h describes address p, h.forward(n) describes p+n*ptrSize.
				480	// h.forward(1) is equivalent to h.next(), just slower.
				481	// Note that forward does not modify h. The caller must record the result.
				482	// bits returns the heap bits for the current word.
				483	func (h heapBits) forward(n uintptr) heapBits {
				484	n += uintptr(h.shift) / heapBitsShift
				485	return heapBits{subtractb(h.bitp, n/4), uint32(n%4) * heapBitsShift}
				486	}
				487
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	488	// The caller can test morePointers and isPointer by &-ing with bitScan and bitPointer.
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	489	// The result includes in its higher bits the bits for subsequent words
				490	// described by the same bitmap byte.
				491	func (h heapBits) bits() uint32 {
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	492	// The (shift & 31) eliminates a test and conditional branch
				493	// from the generated code.
				494	return uint32(*h.bitp) >> (h.shift & 31)
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	495	}
				496
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	497	// morePointers returns true if this word and all remaining words in this object
				498	// are scalars.
				499	// h must not describe the second word of the object.
				500	func (h heapBits) morePointers() bool {
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	501	return h.bits()&bitScan != 0
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	502	}
				503
				504	// isPointer reports whether the heap bits describe a pointer word.
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	505	//
				506	// nosplit because it is used during write barriers and must not be preempted.
				507	//go:nosplit
				508	func (h heapBits) isPointer() bool {
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	509	return h.bits()&bitPointer != 0
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	510	}
				511
				512	// hasPointers reports whether the given object has any pointers.
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	513	// It must be told how large the object at h is for efficiency.
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	514	// h must describe the initial word of the object.
				515	func (h heapBits) hasPointers(size uintptr) bool {
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	516	if size == sys.PtrSize { // 1-word objects are always pointers
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	517	return true
				518	}
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	519	return (*h.bitp>>h.shift)&bitScan != 0
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	520	}
				521
				522	// isCheckmarked reports whether the heap bits have the checkmarked bit set.
				523	// It must be told how large the object at h is, because the encoding of the
				524	// checkmark bit varies by size.
				525	// h must describe the initial word of the object.
				526	func (h heapBits) isCheckmarked(size uintptr) bool {
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	527	if size == sys.PtrSize {
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	528	return (*h.bitp>>h.shift)&bitPointer != 0
				529	}
				530	// All multiword objects are 2-word aligned,
				531	// so we know that the initial word's 2-bit pair
				532	// and the second word's 2-bit pair are in the
				533	// same heap bitmap byte, *h.bitp.
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	534	return (*h.bitp>>(heapBitsShift+h.shift))&bitScan != 0
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	535	}
				536
				537	// setCheckmarked sets the checkmarked bit.
				538	// It must be told how large the object at h is, because the encoding of the
				539	// checkmark bit varies by size.
				540	// h must describe the initial word of the object.
				541	func (h heapBits) setCheckmarked(size uintptr) {
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	542	if size == sys.PtrSize {
				543	atomic.Or8(h.bitp, bitPointer<<h.shift)
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	544	return
				545	}
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	546	atomic.Or8(h.bitp, bitScan<<(heapBitsShift+h.shift))
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	547	}
				548
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	549	// bulkBarrierPreWrite executes writebarrierptr_prewrite1
				550	// for every pointer slot in the memory range [src, src+size),
				551	// using pointer/scalar information from [dst, dst+size).
				552	// This executes the write barriers necessary before a memmove.
				553	// src, dst, and size must be pointer-aligned.
				554	// The range [dst, dst+size) must lie within a single object.
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	555	//
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	556	// As a special case, src == 0 indicates that this is being used for a
				557	// memclr. bulkBarrierPreWrite will pass 0 for the src of each write
				558	// barrier.
				559	//
				560	// Callers should call bulkBarrierPreWrite immediately before
				561	// calling memmove(dst, src, size). This function is marked nosplit
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	562	// to avoid being preempted; the GC must not stop the goroutine
				563	// between the memmove and the execution of the barriers.
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	564	// The caller is also responsible for cgo pointer checks if this
				565	// may be writing Go pointers into non-Go memory.
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	566	//
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	567	// The pointer bitmap is not maintained for allocations containing
				568	// no pointers at all; any caller of bulkBarrierPreWrite must first
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	569	// make sure the underlying allocation contains pointers, usually
				570	// by checking typ.kind&kindNoPointers.
				571	//
				572	//go:nosplit
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	573	func bulkBarrierPreWrite(dst, src, size uintptr) {
				574	if (dst\|src\|size)&(sys.PtrSize-1) != 0 {
				575	throw("bulkBarrierPreWrite: unaligned arguments")
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	576	}
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	577	if !writeBarrier.needed {
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	578	return
				579	}
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	580	if !inheap(dst) {
				581	// If dst is on the stack and in a higher frame than the
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	582	// caller, we either need to execute write barriers on
				583	// it (which is what happens for normal stack writes
				584	// through pointers to higher frames), or we need to
				585	// force the mark termination stack scan to scan the
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	586	// frame containing dst.
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	587	//
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	588	// Executing write barriers on dst is complicated in the
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	589	// general case because we either need to unwind the
				590	// stack to get the stack map, or we need the type's
				591	// bitmap, which may be a GC program.
				592	//
				593	// Hence, we opt for forcing the re-scan to scan the
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	594	// frame containing dst, which we can do by simply
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	595	// unwinding the stack barriers between the current SP
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	596	// and dst's frame.
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	597	gp := getg().m.curg
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	598	if gp != nil && gp.stack.lo <= dst && dst < gp.stack.hi {
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	599	// Run on the system stack to give it more
				600	// stack space.
				601	systemstack(func() {
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	602	gcUnwindBarriers(gp, dst)
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	603	})
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	604	return
				605	}
				606
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	607	// If dst is a global, use the data or BSS bitmaps to
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	608	// execute write barriers.
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	609	for _, datap := range activeModules() {
				610	if datap.data <= dst && dst < datap.edata {
				611	bulkBarrierBitmap(dst, src, size, dst-datap.data, datap.gcdatamask.bytedata)
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	612	return
				613	}
				614	}
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	615	for _, datap := range activeModules() {
				616	if datap.bss <= dst && dst < datap.ebss {
				617	bulkBarrierBitmap(dst, src, size, dst-datap.bss, datap.gcbssmask.bytedata)
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	618	return
				619	}
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	620	}
				621	return
				622	}
				623
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	624	h := heapBitsForAddr(dst)
				625	if src == 0 {
				626	for i := uintptr(0); i < size; i += sys.PtrSize {
				627	if h.isPointer() {
				628	dstx := (*uintptr)(unsafe.Pointer(dst + i))
				629	writebarrierptr_prewrite1(dstx, 0)
				630	}
				631	h = h.next()
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	632	}
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	633	} else {
				634	for i := uintptr(0); i < size; i += sys.PtrSize {
				635	if h.isPointer() {
				636	dstx := (*uintptr)(unsafe.Pointer(dst + i))
				637	srcx := (*uintptr)(unsafe.Pointer(src + i))
				638	writebarrierptr_prewrite1(dstx, *srcx)
				639	}
				640	h = h.next()
				641	}
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	642	}
				643	}
				644
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	645	// bulkBarrierBitmap executes write barriers for copying from [src,
				646	// src+size) to [dst, dst+size) using a 1-bit pointer bitmap. src is
				647	// assumed to start maskOffset bytes into the data covered by the
				648	// bitmap in bits (which may not be a multiple of 8).
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	649	//
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	650	// This is used by bulkBarrierPreWrite for writes to data and BSS.
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	651	//
				652	//go:nosplit
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	653	func bulkBarrierBitmap(dst, src, size, maskOffset uintptr, bits *uint8) {
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	654	word := maskOffset / sys.PtrSize
				655	bits = addb(bits, word/8)
				656	mask := uint8(1) << (word % 8)
				657
				658	for i := uintptr(0); i < size; i += sys.PtrSize {
				659	if mask == 0 {
				660	bits = addb(bits, 1)
				661	if *bits == 0 {
				662	// Skip 8 words.
				663	i += 7 * sys.PtrSize
				664	continue
				665	}
				666	mask = 1
				667	}
				668	if *bits&mask != 0 {
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	669	dstx := (*uintptr)(unsafe.Pointer(dst + i))
				670	if src == 0 {
				671	writebarrierptr_prewrite1(dstx, 0)
				672	} else {
				673	srcx := (*uintptr)(unsafe.Pointer(src + i))
				674	writebarrierptr_prewrite1(dstx, *srcx)
				675	}
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	676	}
				677	mask <<= 1
				678	}
				679	}
				680
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	681	// typeBitsBulkBarrier executes writebarrierptr_prewrite for every
				682	// pointer that would be copied from [src, src+size) to [dst,
				683	// dst+size) by a memmove using the type bitmap to locate those
				684	// pointer slots.
				685	//
				686	// The type typ must correspond exactly to [src, src+size) and [dst, dst+size).
				687	// dst, src, and size must be pointer-aligned.
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	688	// The type typ must have a plain bitmap, not a GC program.
				689	// The only use of this function is in channel sends, and the
				690	// 64 kB channel element limit takes care of this for us.
				691	//
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	692	// Must not be preempted because it typically runs right before memmove,
				693	// and the GC must observe them as an atomic action.
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	694	//
				695	//go:nosplit
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	696	func typeBitsBulkBarrier(typ *_type, dst, src, size uintptr) {
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	697	if typ == nil {
				698	throw("runtime: typeBitsBulkBarrier without type")
				699	}
				700	if typ.size != size {
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	701	println("runtime: typeBitsBulkBarrier with type ", typ.string(), " of size ", typ.size, " but memory size", size)
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	702	throw("runtime: invalid typeBitsBulkBarrier")
				703	}
				704	if typ.kind&kindGCProg != 0 {
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	705	println("runtime: typeBitsBulkBarrier with type ", typ.string(), " with GC prog")
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	706	throw("runtime: invalid typeBitsBulkBarrier")
				707	}
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	708	if !writeBarrier.needed {
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	709	return
				710	}
				711	ptrmask := typ.gcdata
				712	var bits uint32
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	713	for i := uintptr(0); i < typ.ptrdata; i += sys.PtrSize {
				714	if i&(sys.PtrSize*8-1) == 0 {
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	715	bits = uint32(*ptrmask)
				716	ptrmask = addb(ptrmask, 1)
				717	} else {
				718	bits = bits >> 1
				719	}
				720	if bits&1 != 0 {
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	721	dstx := (*uintptr)(unsafe.Pointer(dst + i))
				722	srcx := (*uintptr)(unsafe.Pointer(src + i))
				723	writebarrierptr_prewrite(dstx, *srcx)
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	724	}
				725	}
				726	}
				727
				728	// The methods operating on spans all require that h has been returned
				729	// by heapBitsForSpan and that size, n, total are the span layout description
				730	// returned by the mspan's layout method.
				731	// If total > size*n, it means that there is extra leftover memory in the span,
				732	// usually due to rounding.
				733	//
				734	// TODO(rsc): Perhaps introduce a different heapBitsSpan type.
				735
				736	// initSpan initializes the heap bitmap for a span.
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	737	// It clears all checkmark bits.
				738	// If this is a span of pointer-sized objects, it initializes all
				739	// words to pointer/scan.
				740	// Otherwise, it initializes all words to scalar/dead.
				741	func (h heapBits) initSpan(s *mspan) {
				742	size, n, total := s.layout()
				743
				744	// Init the markbit structures
				745	s.freeindex = 0
				746	s.allocCache = ^uint64(0) // all 1s indicating all free.
				747	s.nelems = n
				748	s.allocBits = nil
				749	s.gcmarkBits = nil
				750	s.gcmarkBits = newMarkBits(s.nelems)
				751	s.allocBits = newAllocBits(s.nelems)
				752
				753	// Clear bits corresponding to objects.
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	754	if total%heapBitmapScale != 0 {
				755	throw("initSpan: unaligned length")
				756	}
				757	nbyte := total / heapBitmapScale
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	758	if sys.PtrSize == 8 && size == sys.PtrSize {
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	759	end := h.bitp
				760	bitp := subtractb(end, nbyte-1)
				761	for {
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	762	*bitp = bitPointerAll \| bitScanAll
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	763	if bitp == end {
				764	break
				765	}
				766	bitp = add1(bitp)
				767	}
				768	return
				769	}
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	770	memclrNoHeapPointers(unsafe.Pointer(subtractb(h.bitp, nbyte-1)), nbyte)
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	771	}
				772
				773	// initCheckmarkSpan initializes a span for being checkmarked.
				774	// It clears the checkmark bits, which are set to 1 in normal operation.
				775	func (h heapBits) initCheckmarkSpan(size, n, total uintptr) {
				776	// The ptrSize == 8 is a compile-time constant false on 32-bit and eliminates this code entirely.
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	777	if sys.PtrSize == 8 && size == sys.PtrSize {
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	778	// Checkmark bit is type bit, bottom bit of every 2-bit entry.
				779	// Only possible on 64-bit system, since minimum size is 8.
				780	// Must clear type bit (checkmark bit) of every word.
				781	// The type bit is the lower of every two-bit pair.
				782	bitp := h.bitp
				783	for i := uintptr(0); i < n; i += 4 {
				784	*bitp &^= bitPointerAll
				785	bitp = subtract1(bitp)
				786	}
				787	return
				788	}
				789	for i := uintptr(0); i < n; i++ {
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	790	*h.bitp &^= bitScan << (heapBitsShift + h.shift)
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	791	h = h.forward(size / sys.PtrSize)
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	792	}
				793	}
				794
				795	// clearCheckmarkSpan undoes all the checkmarking in a span.
				796	// The actual checkmark bits are ignored, so the only work to do
				797	// is to fix the pointer bits. (Pointer bits are ignored by scanobject
				798	// but consulted by typedmemmove.)
				799	func (h heapBits) clearCheckmarkSpan(size, n, total uintptr) {
				800	// The ptrSize == 8 is a compile-time constant false on 32-bit and eliminates this code entirely.
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	801	if sys.PtrSize == 8 && size == sys.PtrSize {
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	802	// Checkmark bit is type bit, bottom bit of every 2-bit entry.
				803	// Only possible on 64-bit system, since minimum size is 8.
				804	// Must clear type bit (checkmark bit) of every word.
				805	// The type bit is the lower of every two-bit pair.
				806	bitp := h.bitp
				807	for i := uintptr(0); i < n; i += 4 {
				808	*bitp \|= bitPointerAll
				809	bitp = subtract1(bitp)
				810	}
				811	}
				812	}
				813
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	814	// oneBitCount is indexed by byte and produces the
				815	// number of 1 bits in that byte. For example 128 has 1 bit set
				816	// and oneBitCount[128] will holds 1.
				817	var oneBitCount = [256]uint8{
				818	0, 1, 1, 2, 1, 2, 2, 3,
				819	1, 2, 2, 3, 2, 3, 3, 4,
				820	1, 2, 2, 3, 2, 3, 3, 4,
				821	2, 3, 3, 4, 3, 4, 4, 5,
				822	1, 2, 2, 3, 2, 3, 3, 4,
				823	2, 3, 3, 4, 3, 4, 4, 5,
				824	2, 3, 3, 4, 3, 4, 4, 5,
				825	3, 4, 4, 5, 4, 5, 5, 6,
				826	1, 2, 2, 3, 2, 3, 3, 4,
				827	2, 3, 3, 4, 3, 4, 4, 5,
				828	2, 3, 3, 4, 3, 4, 4, 5,
				829	3, 4, 4, 5, 4, 5, 5, 6,
				830	2, 3, 3, 4, 3, 4, 4, 5,
				831	3, 4, 4, 5, 4, 5, 5, 6,
				832	3, 4, 4, 5, 4, 5, 5, 6,
				833	4, 5, 5, 6, 5, 6, 6, 7,
				834	1, 2, 2, 3, 2, 3, 3, 4,
				835	2, 3, 3, 4, 3, 4, 4, 5,
				836	2, 3, 3, 4, 3, 4, 4, 5,
				837	3, 4, 4, 5, 4, 5, 5, 6,
				838	2, 3, 3, 4, 3, 4, 4, 5,
				839	3, 4, 4, 5, 4, 5, 5, 6,
				840	3, 4, 4, 5, 4, 5, 5, 6,
				841	4, 5, 5, 6, 5, 6, 6, 7,
				842	2, 3, 3, 4, 3, 4, 4, 5,
				843	3, 4, 4, 5, 4, 5, 5, 6,
				844	3, 4, 4, 5, 4, 5, 5, 6,
				845	4, 5, 5, 6, 5, 6, 6, 7,
				846	3, 4, 4, 5, 4, 5, 5, 6,
				847	4, 5, 5, 6, 5, 6, 6, 7,
				848	4, 5, 5, 6, 5, 6, 6, 7,
				849	5, 6, 6, 7, 6, 7, 7, 8}
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	850
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	851	// countFree runs through the mark bits in a span and counts the number of free objects
				852	// in the span.
				853	// TODO:(rlh) Use popcount intrinsic.
				854	func (s *mspan) countFree() int {
				855	count := 0
				856	maxIndex := s.nelems / 8
				857	for i := uintptr(0); i < maxIndex; i++ {
				858	mrkBits := *addb(s.gcmarkBits, i)
				859	count += int(oneBitCount[mrkBits])
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	860	}
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	861	if bitsInLastByte := s.nelems % 8; bitsInLastByte != 0 {
				862	mrkBits := *addb(s.gcmarkBits, maxIndex)
				863	mask := uint8((1 << bitsInLastByte) - 1)
				864	bits := mrkBits & mask
				865	count += int(oneBitCount[bits])
				866	}
				867	return int(s.nelems) - count
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	868	}
				869
				870	// heapBitsSetType records that the new allocation [x, x+size)
				871	// holds in [x, x+dataSize) one or more values of type typ.
				872	// (The number of values is given by dataSize / typ.size.)
				873	// If dataSize < size, the fragment [x+dataSize, x+size) is
				874	// recorded as non-pointer data.
				875	// It is known that the type has pointers somewhere;
				876	// malloc does not call heapBitsSetType when there are no pointers,
				877	// because all free objects are marked as noscan during
				878	// heapBitsSweepSpan.
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	879	//
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	880	// There can only be one allocation from a given span active at a time,
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	881	// and the bitmap for a span always falls on byte boundaries,
				882	// so there are no write-write races for access to the heap bitmap.
				883	// Hence, heapBitsSetType can access the bitmap without atomics.
				884	//
				885	// There can be read-write races between heapBitsSetType and things
				886	// that read the heap bitmap like scanobject. However, since
				887	// heapBitsSetType is only used for objects that have not yet been
				888	// made reachable, readers will ignore bits being modified by this
				889	// function. This does mean this function cannot transiently modify
				890	// bits that belong to neighboring objects. Also, on weakly-ordered
				891	// machines, callers must execute a store/store (publication) barrier
				892	// between calling this function and making the object reachable.
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	893	func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
				894	const doubleCheck = false // slow but helpful; enable to test modifications to this code
				895
				896	// dataSize is always size rounded up to the next malloc size class,
				897	// except in the case of allocating a defer block, in which case
				898	// size is sizeof(_defer{}) (at least 6 words) and dataSize may be
				899	// arbitrarily larger.
				900	//
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	901	// The checks for size == sys.PtrSize and size == 2*sys.PtrSize can therefore
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	902	// assume that dataSize == size without checking it explicitly.
				903
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	904	if sys.PtrSize == 8 && size == sys.PtrSize {
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	905	// It's one word and it has pointers, it must be a pointer.
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	906	// Since all allocated one-word objects are pointers
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	907	// (non-pointers are aggregated into tinySize allocations),
				908	// initSpan sets the pointer bits for us. Nothing to do here.
				909	if doubleCheck {
				910	h := heapBitsForAddr(x)
				911	if !h.isPointer() {
				912	throw("heapBitsSetType: pointer bit missing")
				913	}
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	914	if !h.morePointers() {
				915	throw("heapBitsSetType: scan bit missing")
				916	}
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	917	}
				918	return
				919	}
				920
				921	h := heapBitsForAddr(x)
				922	ptrmask := typ.gcdata // start of 1-bit pointer mask (or GC program, handled below)
				923
				924	// Heap bitmap bits for 2-word object are only 4 bits,
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	925	// so also shared with objects next to it.
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	926	// This is called out as a special case primarily for 32-bit systems,
				927	// so that on 32-bit systems the code below can assume all objects
				928	// are 4-word aligned (because they're all 16-byte aligned).
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	929	if size == 2*sys.PtrSize {
				930	if typ.size == sys.PtrSize {
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	931	// We're allocating a block big enough to hold two pointers.
				932	// On 64-bit, that means the actual object must be two pointers,
				933	// or else we'd have used the one-pointer-sized block.
				934	// On 32-bit, however, this is the 8-byte block, the smallest one.
				935	// So it could be that we're allocating one pointer and this was
				936	// just the smallest block available. Distinguish by checking dataSize.
				937	// (In general the number of instances of typ being allocated is
				938	// dataSize/typ.size.)
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	939	if sys.PtrSize == 4 && dataSize == sys.PtrSize {
				940	// 1 pointer object. On 32-bit machines clear the bit for the
				941	// unused second word.
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	942	*h.bitp &^= (bitPointer \| bitScan \| ((bitPointer \| bitScan) << heapBitsShift)) << h.shift
				943	*h.bitp \|= (bitPointer \| bitScan) << h.shift
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	944	} else {
				945	// 2-element slice of pointer.
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	946	*h.bitp \|= (bitPointer \| bitScan \| bitPointer<<heapBitsShift) << h.shift
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	947	}
				948	return
				949	}
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	950	// Otherwise typ.size must be 2*sys.PtrSize,
				951	// and typ.kind&kindGCProg == 0.
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	952	if doubleCheck {
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	953	if typ.size != 2*sys.PtrSize \|\| typ.kind&kindGCProg != 0 {
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	954	print("runtime: heapBitsSetType size=", size, " but typ.size=", typ.size, " gcprog=", typ.kind&kindGCProg != 0, "\n")
				955	throw("heapBitsSetType")
				956	}
				957	}
				958	b := uint32(*ptrmask)
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	959	hb := (b & 3) \| bitScan
				960	// bitPointer == 1, bitScan is 1 << 4, heapBitsShift is 1.
				961	// 110011 is shifted h.shift and complemented.
				962	// This clears out the bits that are about to be
				963	// ored into *h.hbitp in the next instructions.
				964	*h.bitp &^= (bitPointer \| bitScan \| ((bitPointer \| bitScan) << heapBitsShift)) << h.shift
				965	*h.bitp \|= uint8(hb << h.shift)
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	966	return
				967	}
				968
				969	// Copy from 1-bit ptrmask into 2-bit bitmap.
				970	// The basic approach is to use a single uintptr as a bit buffer,
				971	// alternating between reloading the buffer and writing bitmap bytes.
				972	// In general, one load can supply two bitmap byte writes.
				973	// This is a lot of lines of code, but it compiles into relatively few
				974	// machine instructions.
				975
				976	var (
				977	// Ptrmask input.
				978	p *byte // last ptrmask byte read
				979	b uintptr // ptrmask bits already loaded
				980	nb uintptr // number of bits in b at next read
				981	endp *byte // final ptrmask byte to read (then repeat)
				982	endnb uintptr // number of valid bits in *endp
				983	pbits uintptr // alternate source of bits
				984
				985	// Heap bitmap output.
				986	w uintptr // words processed
				987	nw uintptr // number of words to process
				988	hbitp *byte // next heap bitmap byte to write
				989	hb uintptr // bits being prepared for *hbitp
				990	)
				991
				992	hbitp = h.bitp
				993
				994	// Handle GC program. Delayed until this part of the code
				995	// so that we can use the same double-checking mechanism
				996	// as the 1-bit case. Nothing above could have encountered
				997	// GC programs: the cases were all too small.
				998	if typ.kind&kindGCProg != 0 {
				999	heapBitsSetTypeGCProg(h, typ.ptrdata, typ.size, dataSize, size, addb(typ.gcdata, 4))
				1000	if doubleCheck {
				1001	// Double-check the heap bits written by GC program
				1002	// by running the GC program to create a 1-bit pointer mask
				1003	// and then jumping to the double-check code below.
				1004	// This doesn't catch bugs shared between the 1-bit and 4-bit
				1005	// GC program execution, but it does catch mistakes specific
				1006	// to just one of those and bugs in heapBitsSetTypeGCProg's
				1007	// implementation of arrays.
				1008	lock(&debugPtrmask.lock)
				1009	if debugPtrmask.data == nil {
				1010	debugPtrmask.data = (*byte)(persistentalloc(1<<20, 1, &memstats.other_sys))
				1011	}
				1012	ptrmask = debugPtrmask.data
				1013	runGCProg(addb(typ.gcdata, 4), nil, ptrmask, 1)
				1014	goto Phase4
				1015	}
				1016	return
				1017	}
				1018
				1019	// Note about sizes:
				1020	//
				1021	// typ.size is the number of words in the object,
				1022	// and typ.ptrdata is the number of words in the prefix
				1023	// of the object that contains pointers. That is, the final
				1024	// typ.size - typ.ptrdata words contain no pointers.
				1025	// This allows optimization of a common pattern where
				1026	// an object has a small header followed by a large scalar
				1027	// buffer. If we know the pointers are over, we don't have
				1028	// to scan the buffer's heap bitmap at all.
				1029	// The 1-bit ptrmasks are sized to contain only bits for
				1030	// the typ.ptrdata prefix, zero padded out to a full byte
				1031	// of bitmap. This code sets nw (below) so that heap bitmap
				1032	// bits are only written for the typ.ptrdata prefix; if there is
				1033	// more room in the allocated object, the next heap bitmap
				1034	// entry is a 00, indicating that there are no more pointers
				1035	// to scan. So only the ptrmask for the ptrdata bytes is needed.
				1036	//
				1037	// Replicated copies are not as nice: if there is an array of
				1038	// objects with scalar tails, all but the last tail does have to
				1039	// be initialized, because there is no way to say "skip forward".
				1040	// However, because of the possibility of a repeated type with
				1041	// size not a multiple of 4 pointers (one heap bitmap byte),
				1042	// the code already must handle the last ptrmask byte specially
				1043	// by treating it as containing only the bits for endnb pointers,
				1044	// where endnb <= 4. We represent large scalar tails that must
				1045	// be expanded in the replication by setting endnb larger than 4.
				1046	// This will have the effect of reading many bits out of b,
				1047	// but once the real bits are shifted out, b will supply as many
				1048	// zero bits as we try to read, which is exactly what we need.
				1049
				1050	p = ptrmask
				1051	if typ.size < dataSize {
				1052	// Filling in bits for an array of typ.
				1053	// Set up for repetition of ptrmask during main loop.
				1054	// Note that ptrmask describes only a prefix of
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	1055	const maxBits = sys.PtrSize*8 - 7
				1056	if typ.ptrdata/sys.PtrSize <= maxBits {
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1057	// Entire ptrmask fits in uintptr with room for a byte fragment.
				1058	// Load into pbits and never read from ptrmask again.
				1059	// This is especially important when the ptrmask has
				1060	// fewer than 8 bits in it; otherwise the reload in the middle
				1061	// of the Phase 2 loop would itself need to loop to gather
				1062	// at least 8 bits.
				1063
				1064	// Accumulate ptrmask into b.
				1065	// ptrmask is sized to describe only typ.ptrdata, but we record
				1066	// it as describing typ.size bytes, since all the high bits are zero.
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	1067	nb = typ.ptrdata / sys.PtrSize
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1068	for i := uintptr(0); i < nb; i += 8 {
				1069	b \|= uintptr(*p) << i
				1070	p = add1(p)
				1071	}
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	1072	nb = typ.size / sys.PtrSize
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1073
				1074	// Replicate ptrmask to fill entire pbits uintptr.
				1075	// Doubling and truncating is fewer steps than
				1076	// iterating by nb each time. (nb could be 1.)
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	1077	// Since we loaded typ.ptrdata/sys.PtrSize bits
				1078	// but are pretending to have typ.size/sys.PtrSize,
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1079	// there might be no replication necessary/possible.
				1080	pbits = b
				1081	endnb = nb
				1082	if nb+nb <= maxBits {
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	1083	for endnb <= sys.PtrSize*8 {
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1084	pbits \|= pbits << endnb
				1085	endnb += endnb
				1086	}
				1087	// Truncate to a multiple of original ptrmask.
				1088	endnb = maxBits / nb * nb
				1089	pbits &= 1<<endnb - 1
				1090	b = pbits
				1091	nb = endnb
				1092	}
				1093
				1094	// Clear p and endp as sentinel for using pbits.
				1095	// Checked during Phase 2 loop.
				1096	p = nil
				1097	endp = nil
				1098	} else {
				1099	// Ptrmask is larger. Read it multiple times.
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	1100	n := (typ.ptrdata/sys.PtrSize+7)/8 - 1
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1101	endp = addb(ptrmask, n)
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	1102	endnb = typ.size/sys.PtrSize - n*8
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1103	}
				1104	}
				1105	if p != nil {
				1106	b = uintptr(*p)
				1107	p = add1(p)
				1108	nb = 8
				1109	}
				1110
				1111	if typ.size == dataSize {
				1112	// Single entry: can stop once we reach the non-pointer data.
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	1113	nw = typ.ptrdata / sys.PtrSize
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1114	} else {
				1115	// Repeated instances of typ in an array.
				1116	// Have to process first N-1 entries in full, but can stop
				1117	// once we reach the non-pointer data in the final entry.
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	1118	nw = ((dataSize/typ.size-1)*typ.size + typ.ptrdata) / sys.PtrSize
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1119	}
				1120	if nw == 0 {
				1121	// No pointers! Caller was supposed to check.
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	1122	println("runtime: invalid type ", typ.string())
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1123	throw("heapBitsSetType: called with non-pointer type")
				1124	return
				1125	}
				1126	if nw < 2 {
				1127	// Must write at least 2 words, because the "no scan"
				1128	// encoding doesn't take effect until the third word.
				1129	nw = 2
				1130	}
				1131
				1132	// Phase 1: Special case for leading byte (shift==0) or half-byte (shift==4).
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	1133	// The leading byte is special because it contains the bits for word 1,
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	1134	// which does not have the scan bit set.
				1135	// The leading half-byte is special because it's a half a byte,
				1136	// so we have to be careful with the bits already there.
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1137	switch {
				1138	default:
				1139	throw("heapBitsSetType: unexpected shift")
				1140
				1141	case h.shift == 0:
				1142	// Ptrmask and heap bitmap are aligned.
				1143	// Handle first byte of bitmap specially.
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	1144	//
				1145	// The first byte we write out covers the first four
				1146	// words of the object. The scan/dead bit on the first
				1147	// word must be set to scan since there are pointers
				1148	// somewhere in the object. The scan/dead bit on the
				1149	// second word is the checkmark, so we don't set it.
				1150	// In all following words, we set the scan/dead
				1151	// appropriately to indicate that the object contains
				1152	// to the next 2-bit entry in the bitmap.
				1153	//
				1154	// TODO: It doesn't matter if we set the checkmark, so
				1155	// maybe this case isn't needed any more.
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1156	hb = b & bitPointerAll
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	1157	hb \|= bitScan \| bitScan<<(2heapBitsShift) \| bitScan<<(3heapBitsShift)
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1158	if w += 4; w >= nw {
				1159	goto Phase3
				1160	}
				1161	*hbitp = uint8(hb)
				1162	hbitp = subtract1(hbitp)
				1163	b >>= 4
				1164	nb -= 4
				1165
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	1166	case sys.PtrSize == 8 && h.shift == 2:
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1167	// Ptrmask and heap bitmap are misaligned.
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	1168	// The bits for the first two words are in a byte shared
				1169	// with another object, so we must be careful with the bits
				1170	// already there.
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1171	// We took care of 1-word and 2-word objects above,
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	1172	// so this is at least a 6-word object.
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	1173	hb = (b & (bitPointer \| bitPointer<<heapBitsShift)) << (2 * heapBitsShift)
				1174	// This is not noscan, so set the scan bit in the
				1175	// first word.
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	1176	hb \|= bitScan << (2 * heapBitsShift)
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1177	b >>= 2
				1178	nb -= 2
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	1179	// Note: no bitScan for second word because that's
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	1180	// the checkmark.
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	1181	hbitp &^= uint8((bitPointer \| bitScan \| (bitPointer << heapBitsShift)) << (2 heapBitsShift))
				1182	*hbitp \|= uint8(hb)
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1183	hbitp = subtract1(hbitp)
				1184	if w += 2; w >= nw {
				1185	// We know that there is more data, because we handled 2-word objects above.
				1186	// This must be at least a 6-word object. If we're out of pointer words,
				1187	// mark no scan in next bitmap byte and finish.
				1188	hb = 0
				1189	w += 4
				1190	goto Phase3
				1191	}
				1192	}
				1193
				1194	// Phase 2: Full bytes in bitmap, up to but not including write to last byte (full or partial) in bitmap.
				1195	// The loop computes the bits for that last write but does not execute the write;
				1196	// it leaves the bits in hb for processing by phase 3.
				1197	// To avoid repeated adjustment of nb, we subtract out the 4 bits we're going to
				1198	// use in the first half of the loop right now, and then we only adjust nb explicitly
				1199	// if the 8 bits used by each iteration isn't balanced by 8 bits loaded mid-loop.
				1200	nb -= 4
				1201	for {
				1202	// Emit bitmap byte.
				1203	// b has at least nb+4 bits, with one exception:
				1204	// if w+4 >= nw, then b has only nw-w bits,
				1205	// but we'll stop at the break and then truncate
				1206	// appropriately in Phase 3.
				1207	hb = b & bitPointerAll
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	1208	hb \|= bitScanAll
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1209	if w += 4; w >= nw {
				1210	break
				1211	}
				1212	*hbitp = uint8(hb)
				1213	hbitp = subtract1(hbitp)
				1214	b >>= 4
				1215
				1216	// Load more bits. b has nb right now.
				1217	if p != endp {
				1218	// Fast path: keep reading from ptrmask.
				1219	// nb unmodified: we just loaded 8 bits,
				1220	// and the next iteration will consume 8 bits,
				1221	// leaving us with the same nb the next time we're here.
				1222	if nb < 8 {
				1223	b \|= uintptr(*p) << nb
				1224	p = add1(p)
				1225	} else {
				1226	// Reduce the number of bits in b.
				1227	// This is important if we skipped
				1228	// over a scalar tail, since nb could
				1229	// be larger than the bit width of b.
				1230	nb -= 8
				1231	}
				1232	} else if p == nil {
				1233	// Almost as fast path: track bit count and refill from pbits.
				1234	// For short repetitions.
				1235	if nb < 8 {
				1236	b \|= pbits << nb
				1237	nb += endnb
				1238	}
				1239	nb -= 8 // for next iteration
				1240	} else {
				1241	// Slow path: reached end of ptrmask.
				1242	// Process final partial byte and rewind to start.
				1243	b \|= uintptr(*p) << nb
				1244	nb += endnb
				1245	if nb < 8 {
				1246	b \|= uintptr(*ptrmask) << nb
				1247	p = add1(ptrmask)
				1248	} else {
				1249	nb -= 8
				1250	p = ptrmask
				1251	}
				1252	}
				1253
				1254	// Emit bitmap byte.
				1255	hb = b & bitPointerAll
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	1256	hb \|= bitScanAll
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1257	if w += 4; w >= nw {
				1258	break
				1259	}
				1260	*hbitp = uint8(hb)
				1261	hbitp = subtract1(hbitp)
				1262	b >>= 4
				1263	}
				1264
				1265	Phase3:
				1266	// Phase 3: Write last byte or partial byte and zero the rest of the bitmap entries.
				1267	if w > nw {
				1268	// Counting the 4 entries in hb not yet written to memory,
				1269	// there are more entries than possible pointer slots.
				1270	// Discard the excess entries (can't be more than 3).
				1271	mask := uintptr(1)<<(4-(w-nw)) - 1
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	1272	hb &= mask \| mask<<4 // apply mask to both pointer bits and scan bits
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1273	}
				1274
				1275	// Change nw from counting possibly-pointer words to total words in allocation.
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	1276	nw = size / sys.PtrSize
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1277
				1278	// Write whole bitmap bytes.
				1279	// The first is hb, the rest are zero.
				1280	if w <= nw {
				1281	*hbitp = uint8(hb)
				1282	hbitp = subtract1(hbitp)
				1283	hb = 0 // for possible final half-byte below
				1284	for w += 4; w <= nw; w += 4 {
				1285	*hbitp = 0
				1286	hbitp = subtract1(hbitp)
				1287	}
				1288	}
				1289
				1290	// Write final partial bitmap byte if any.
				1291	// We know w > nw, or else we'd still be in the loop above.
				1292	// It can be bigger only due to the 4 entries in hb that it counts.
				1293	// If w == nw+4 then there's nothing left to do: we wrote all nw entries
				1294	// and can discard the 4 sitting in hb.
				1295	// But if w == nw+2, we need to write first two in hb.
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	1296	// The byte is shared with the next object, so be careful with
				1297	// existing bits.
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1298	if w == nw+2 {
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	1299	hbitp = hbitp&^(bitPointer\|bitScan\|(bitPointer\|bitScan)<<heapBitsShift) \| uint8(hb)
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1300	}
				1301
				1302	Phase4:
				1303	// Phase 4: all done, but perhaps double check.
				1304	if doubleCheck {
				1305	end := heapBitsForAddr(x + size)
				1306	if typ.kind&kindGCProg == 0 && (hbitp != end.bitp \|\| (w == nw+2) != (end.shift == 2)) {
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	1307	println("ended at wrong bitmap byte for", typ.string(), "x", dataSize/typ.size)
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1308	print("typ.size=", typ.size, " typ.ptrdata=", typ.ptrdata, " dataSize=", dataSize, " size=", size, "\n")
				1309	print("w=", w, " nw=", nw, " b=", hex(b), " nb=", nb, " hb=", hex(hb), "\n")
				1310	h0 := heapBitsForAddr(x)
				1311	print("initial bits h0.bitp=", h0.bitp, " h0.shift=", h0.shift, "\n")
				1312	print("ended at hbitp=", hbitp, " but next starts at bitp=", end.bitp, " shift=", end.shift, "\n")
				1313	throw("bad heapBitsSetType")
				1314	}
				1315
				1316	// Double-check that bits to be written were written correctly.
				1317	// Does not check that other bits were not written, unfortunately.
				1318	h := heapBitsForAddr(x)
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	1319	nptr := typ.ptrdata / sys.PtrSize
				1320	ndata := typ.size / sys.PtrSize
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1321	count := dataSize / typ.size
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	1322	totalptr := ((count-1)*typ.size + typ.ptrdata) / sys.PtrSize
				1323	for i := uintptr(0); i < size/sys.PtrSize; i++ {
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1324	j := i % ndata
				1325	var have, want uint8
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	1326	have = (*h.bitp >> h.shift) & (bitPointer \| bitScan)
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1327	if i >= totalptr {
				1328	want = 0 // deadmarker
				1329	if typ.kind&kindGCProg != 0 && i < (totalptr+3)/4*4 {
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	1330	want = bitScan
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1331	}
				1332	} else {
				1333	if j < nptr && (*addb(ptrmask, j/8)>>(j%8))&1 != 0 {
				1334	want \|= bitPointer
				1335	}
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	1336	if i != 1 {
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	1337	want \|= bitScan
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1338	} else {
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	1339	have &^= bitScan
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1340	}
				1341	}
				1342	if have != want {
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	1343	println("mismatch writing bits for", typ.string(), "x", dataSize/typ.size)
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1344	print("typ.size=", typ.size, " typ.ptrdata=", typ.ptrdata, " dataSize=", dataSize, " size=", size, "\n")
				1345	print("kindGCProg=", typ.kind&kindGCProg != 0, "\n")
				1346	print("w=", w, " nw=", nw, " b=", hex(b), " nb=", nb, " hb=", hex(hb), "\n")
				1347	h0 := heapBitsForAddr(x)
				1348	print("initial bits h0.bitp=", h0.bitp, " h0.shift=", h0.shift, "\n")
				1349	print("current bits h.bitp=", h.bitp, " h.shift=", h.shift, " h.bitp=", hex(h.bitp), "\n")
				1350	print("ptrmask=", ptrmask, " p=", p, " endp=", endp, " endnb=", endnb, " pbits=", hex(pbits), " b=", hex(b), " nb=", nb, "\n")
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	1351	println("at word", i, "offset", i*sys.PtrSize, "have", have, "want", want)
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1352	if typ.kind&kindGCProg != 0 {
				1353	println("GC program:")
				1354	dumpGCProg(addb(typ.gcdata, 4))
				1355	}
				1356	throw("bad heapBitsSetType")
				1357	}
				1358	h = h.next()
				1359	}
				1360	if ptrmask == debugPtrmask.data {
				1361	unlock(&debugPtrmask.lock)
				1362	}
				1363	}
				1364	}
				1365
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	1366	// heapBitsSetTypeNoScan marks x as noscan by setting the first word
				1367	// of x in the heap bitmap to scalar/dead.
				1368	func heapBitsSetTypeNoScan(x uintptr) {
				1369	h := heapBitsForAddr(uintptr(x))
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	1370	*h.bitp &^= (bitPointer \| bitScan) << h.shift
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	1371	}
				1372
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1373	var debugPtrmask struct {
				1374	lock mutex
				1375	data *byte
				1376	}
				1377
				1378	// heapBitsSetTypeGCProg implements heapBitsSetType using a GC program.
				1379	// progSize is the size of the memory described by the program.
				1380	// elemSize is the size of the element that the GC program describes (a prefix of).
				1381	// dataSize is the total size of the intended data, a multiple of elemSize.
				1382	// allocSize is the total size of the allocated memory.
				1383	//
				1384	// GC programs are only used for large allocations.
				1385	// heapBitsSetType requires that allocSize is a multiple of 4 words,
				1386	// so that the relevant bitmap bytes are not shared with surrounding
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	1387	// objects.
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1388	func heapBitsSetTypeGCProg(h heapBits, progSize, elemSize, dataSize, allocSize uintptr, prog *byte) {
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	1389	if sys.PtrSize == 8 && allocSize%(4*sys.PtrSize) != 0 {
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1390	// Alignment will be wrong.
				1391	throw("heapBitsSetTypeGCProg: small allocation")
				1392	}
				1393	var totalBits uintptr
				1394	if elemSize == dataSize {
				1395	totalBits = runGCProg(prog, nil, h.bitp, 2)
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	1396	if totalBits*sys.PtrSize != progSize {
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1397	println("runtime: heapBitsSetTypeGCProg: total bits", totalBits, "but progSize", progSize)
				1398	throw("heapBitsSetTypeGCProg: unexpected bit count")
				1399	}
				1400	} else {
				1401	count := dataSize / elemSize
				1402
				1403	// Piece together program trailer to run after prog that does:
				1404	// literal(0)
				1405	// repeat(1, elemSize-progSize-1) // zeros to fill element size
				1406	// repeat(elemSize, count-1) // repeat that element for count
				1407	// This zero-pads the data remaining in the first element and then
				1408	// repeats that first element to fill the array.
				1409	var trailer [40]byte // 3 varints (max 10 each) + some bytes
				1410	i := 0
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	1411	if n := elemSize/sys.PtrSize - progSize/sys.PtrSize; n > 0 {
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1412	// literal(0)
				1413	trailer[i] = 0x01
				1414	i++
				1415	trailer[i] = 0
				1416	i++
				1417	if n > 1 {
				1418	// repeat(1, n-1)
				1419	trailer[i] = 0x81
				1420	i++
				1421	n--
				1422	for ; n >= 0x80; n >>= 7 {
				1423	trailer[i] = byte(n \| 0x80)
				1424	i++
				1425	}
				1426	trailer[i] = byte(n)
				1427	i++
				1428	}
				1429	}
				1430	// repeat(elemSize/ptrSize, count-1)
				1431	trailer[i] = 0x80
				1432	i++
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	1433	n := elemSize / sys.PtrSize
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1434	for ; n >= 0x80; n >>= 7 {
				1435	trailer[i] = byte(n \| 0x80)
				1436	i++
				1437	}
				1438	trailer[i] = byte(n)
				1439	i++
				1440	n = count - 1
				1441	for ; n >= 0x80; n >>= 7 {
				1442	trailer[i] = byte(n \| 0x80)
				1443	i++
				1444	}
				1445	trailer[i] = byte(n)
				1446	i++
				1447	trailer[i] = 0
				1448	i++
				1449
				1450	runGCProg(prog, &trailer[0], h.bitp, 2)
				1451
				1452	// Even though we filled in the full array just now,
				1453	// record that we only filled in up to the ptrdata of the
				1454	// last element. This will cause the code below to
				1455	// memclr the dead section of the final array element,
				1456	// so that scanobject can stop early in the final element.
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	1457	totalBits = (elemSize*(count-1) + progSize) / sys.PtrSize
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1458	}
				1459	endProg := unsafe.Pointer(subtractb(h.bitp, (totalBits+3)/4))
				1460	endAlloc := unsafe.Pointer(subtractb(h.bitp, allocSize/heapBitmapScale))
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	1461	memclrNoHeapPointers(add(endAlloc, 1), uintptr(endProg)-uintptr(endAlloc))
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1462	}
				1463
				1464	// progToPointerMask returns the 1-bit pointer mask output by the GC program prog.
				1465	// size the size of the region described by prog, in bytes.
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	1466	// The resulting bitvector will have no more than size/sys.PtrSize bits.
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1467	func progToPointerMask(prog *byte, size uintptr) bitvector {
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	1468	n := (size/sys.PtrSize + 7) / 8
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1469	x := (*[1 << 30]byte)(persistentalloc(n+1, 1, &memstats.buckhash_sys))[:n+1]
				1470	x[len(x)-1] = 0xa1 // overflow check sentinel
				1471	n = runGCProg(prog, nil, &x[0], 1)
				1472	if x[len(x)-1] != 0xa1 {
				1473	throw("progToPointerMask: overflow")
				1474	}
				1475	return bitvector{int32(n), &x[0]}
				1476	}
				1477
				1478	// Packed GC pointer bitmaps, aka GC programs.
				1479	//
				1480	// For large types containing arrays, the type information has a
				1481	// natural repetition that can be encoded to save space in the
				1482	// binary and in the memory representation of the type information.
				1483	//
				1484	// The encoding is a simple Lempel-Ziv style bytecode machine
				1485	// with the following instructions:
				1486	//
				1487	// 00000000: stop
				1488	// 0nnnnnnn: emit n bits copied from the next (n+7)/8 bytes
				1489	// 10000000 n c: repeat the previous n bits c times; n, c are varints
				1490	// 1nnnnnnn c: repeat the previous n bits c times; c is a varint
				1491
				1492	// runGCProg executes the GC program prog, and then trailer if non-nil,
				1493	// writing to dst with entries of the given size.
				1494	// If size == 1, dst is a 1-bit pointer mask laid out moving forward from dst.
				1495	// If size == 2, dst is the 2-bit heap bitmap, and writes move backward
				1496	// starting at dst (because the heap bitmap does). In this case, the caller guarantees
				1497	// that only whole bytes in dst need to be written.
				1498	//
				1499	// runGCProg returns the number of 1- or 2-bit entries written to memory.
				1500	func runGCProg(prog, trailer, dst *byte, size int) uintptr {
				1501	dstStart := dst
				1502
				1503	// Bits waiting to be written to memory.
				1504	var bits uintptr
				1505	var nbits uintptr
				1506
				1507	p := prog
				1508	Run:
				1509	for {
				1510	// Flush accumulated full bytes.
				1511	// The rest of the loop assumes that nbits <= 7.
				1512	for ; nbits >= 8; nbits -= 8 {
				1513	if size == 1 {
				1514	*dst = uint8(bits)
				1515	dst = add1(dst)
				1516	bits >>= 8
				1517	} else {
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	1518	v := bits&bitPointerAll \| bitScanAll
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1519	*dst = uint8(v)
				1520	dst = subtract1(dst)
				1521	bits >>= 4
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	1522	v = bits&bitPointerAll \| bitScanAll
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1523	*dst = uint8(v)
				1524	dst = subtract1(dst)
				1525	bits >>= 4
				1526	}
				1527	}
				1528
				1529	// Process one instruction.
				1530	inst := uintptr(*p)
				1531	p = add1(p)
				1532	n := inst & 0x7F
				1533	if inst&0x80 == 0 {
				1534	// Literal bits; n == 0 means end of program.
				1535	if n == 0 {
				1536	// Program is over; continue in trailer if present.
				1537	if trailer != nil {
				1538	//println("trailer")
				1539	p = trailer
				1540	trailer = nil
				1541	continue
				1542	}
				1543	//println("done")
				1544	break Run
				1545	}
				1546	//println("lit", n, dst)
				1547	nbyte := n / 8
				1548	for i := uintptr(0); i < nbyte; i++ {
				1549	bits \|= uintptr(*p) << nbits
				1550	p = add1(p)
				1551	if size == 1 {
				1552	*dst = uint8(bits)
				1553	dst = add1(dst)
				1554	bits >>= 8
				1555	} else {
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	1556	v := bits&0xf \| bitScanAll
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1557	*dst = uint8(v)
				1558	dst = subtract1(dst)
				1559	bits >>= 4
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	1560	v = bits&0xf \| bitScanAll
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1561	*dst = uint8(v)
				1562	dst = subtract1(dst)
				1563	bits >>= 4
				1564	}
				1565	}
				1566	if n %= 8; n > 0 {
				1567	bits \|= uintptr(*p) << nbits
				1568	p = add1(p)
				1569	nbits += n
				1570	}
				1571	continue Run
				1572	}
				1573
				1574	// Repeat. If n == 0, it is encoded in a varint in the next bytes.
				1575	if n == 0 {
				1576	for off := uint(0); ; off += 7 {
				1577	x := uintptr(*p)
				1578	p = add1(p)
				1579	n \|= (x & 0x7F) << off
				1580	if x&0x80 == 0 {
				1581	break
				1582	}
				1583	}
				1584	}
				1585
				1586	// Count is encoded in a varint in the next bytes.
				1587	c := uintptr(0)
				1588	for off := uint(0); ; off += 7 {
				1589	x := uintptr(*p)
				1590	p = add1(p)
				1591	c \|= (x & 0x7F) << off
				1592	if x&0x80 == 0 {
				1593	break
				1594	}
				1595	}
				1596	c *= n // now total number of bits to copy
				1597
				1598	// If the number of bits being repeated is small, load them
				1599	// into a register and use that register for the entire loop
				1600	// instead of repeatedly reading from memory.
				1601	// Handling fewer than 8 bits here makes the general loop simpler.
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	1602	// The cutoff is sys.PtrSize*8 - 7 to guarantee that when we add
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1603	// the pattern to a bit buffer holding at most 7 bits (a partial byte)
				1604	// it will not overflow.
				1605	src := dst
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	1606	const maxBits = sys.PtrSize*8 - 7
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1607	if n <= maxBits {
				1608	// Start with bits in output buffer.
				1609	pattern := bits
				1610	npattern := nbits
				1611
				1612	// If we need more bits, fetch them from memory.
				1613	if size == 1 {
				1614	src = subtract1(src)
				1615	for npattern < n {
				1616	pattern <<= 8
				1617	pattern \|= uintptr(*src)
				1618	src = subtract1(src)
				1619	npattern += 8
				1620	}
				1621	} else {
				1622	src = add1(src)
				1623	for npattern < n {
				1624	pattern <<= 4
				1625	pattern \|= uintptr(*src) & 0xf
				1626	src = add1(src)
				1627	npattern += 4
				1628	}
				1629	}
				1630
				1631	// We started with the whole bit output buffer,
				1632	// and then we loaded bits from whole bytes.
				1633	// Either way, we might now have too many instead of too few.
				1634	// Discard the extra.
				1635	if npattern > n {
				1636	pattern >>= npattern - n
				1637	npattern = n
				1638	}
				1639
				1640	// Replicate pattern to at most maxBits.
				1641	if npattern == 1 {
				1642	// One bit being repeated.
				1643	// If the bit is 1, make the pattern all 1s.
				1644	// If the bit is 0, the pattern is already all 0s,
				1645	// but we can claim that the number of bits
				1646	// in the word is equal to the number we need (c),
				1647	// because right shift of bits will zero fill.
				1648	if pattern == 1 {
				1649	pattern = 1<<maxBits - 1
				1650	npattern = maxBits
				1651	} else {
				1652	npattern = c
				1653	}
				1654	} else {
				1655	b := pattern
				1656	nb := npattern
				1657	if nb+nb <= maxBits {
				1658	// Double pattern until the whole uintptr is filled.
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	1659	for nb <= sys.PtrSize*8 {
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1660	b \|= b << nb
				1661	nb += nb
				1662	}
				1663	// Trim away incomplete copy of original pattern in high bits.
				1664	// TODO(rsc): Replace with table lookup or loop on systems without divide?
				1665	nb = maxBits / npattern * npattern
				1666	b &= 1<<nb - 1
				1667	pattern = b
				1668	npattern = nb
				1669	}
				1670	}
				1671
				1672	// Add pattern to bit buffer and flush bit buffer, c/npattern times.
				1673	// Since pattern contains >8 bits, there will be full bytes to flush
				1674	// on each iteration.
				1675	for ; c >= npattern; c -= npattern {
				1676	bits \|= pattern << nbits
				1677	nbits += npattern
				1678	if size == 1 {
				1679	for nbits >= 8 {
				1680	*dst = uint8(bits)
				1681	dst = add1(dst)
				1682	bits >>= 8
				1683	nbits -= 8
				1684	}
				1685	} else {
				1686	for nbits >= 4 {
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	1687	*dst = uint8(bits&0xf \| bitScanAll)
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1688	dst = subtract1(dst)
				1689	bits >>= 4
				1690	nbits -= 4
				1691	}
				1692	}
				1693	}
				1694
				1695	// Add final fragment to bit buffer.
				1696	if c > 0 {
				1697	pattern &= 1<<c - 1
				1698	bits \|= pattern << nbits
				1699	nbits += c
				1700	}
				1701	continue Run
				1702	}
				1703
				1704	// Repeat; n too large to fit in a register.
				1705	// Since nbits <= 7, we know the first few bytes of repeated data
				1706	// are already written to memory.
				1707	off := n - nbits // n > nbits because n > maxBits and nbits <= 7
				1708	if size == 1 {
				1709	// Leading src fragment.
				1710	src = subtractb(src, (off+7)/8)
				1711	if frag := off & 7; frag != 0 {
				1712	bits \|= uintptr(*src) >> (8 - frag) << nbits
				1713	src = add1(src)
				1714	nbits += frag
				1715	c -= frag
				1716	}
				1717	// Main loop: load one byte, write another.
				1718	// The bits are rotating through the bit buffer.
				1719	for i := c / 8; i > 0; i-- {
				1720	bits \|= uintptr(*src) << nbits
				1721	src = add1(src)
				1722	*dst = uint8(bits)
				1723	dst = add1(dst)
				1724	bits >>= 8
				1725	}
				1726	// Final src fragment.
				1727	if c %= 8; c > 0 {
				1728	bits \|= (uintptr(*src) & (1<<c - 1)) << nbits
				1729	nbits += c
				1730	}
				1731	} else {
				1732	// Leading src fragment.
				1733	src = addb(src, (off+3)/4)
				1734	if frag := off & 3; frag != 0 {
				1735	bits \|= (uintptr(*src) & 0xf) >> (4 - frag) << nbits
				1736	src = subtract1(src)
				1737	nbits += frag
				1738	c -= frag
				1739	}
				1740	// Main loop: load one byte, write another.
				1741	// The bits are rotating through the bit buffer.
				1742	for i := c / 4; i > 0; i-- {
				1743	bits \|= (uintptr(*src) & 0xf) << nbits
				1744	src = subtract1(src)
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	1745	*dst = uint8(bits&0xf \| bitScanAll)
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1746	dst = subtract1(dst)
				1747	bits >>= 4
				1748	}
				1749	// Final src fragment.
				1750	if c %= 4; c > 0 {
				1751	bits \|= (uintptr(*src) & (1<<c - 1)) << nbits
				1752	nbits += c
				1753	}
				1754	}
				1755	}
				1756
				1757	// Write any final bits out, using full-byte writes, even for the final byte.
				1758	var totalBits uintptr
				1759	if size == 1 {
				1760	totalBits = (uintptr(unsafe.Pointer(dst))-uintptr(unsafe.Pointer(dstStart)))*8 + nbits
				1761	nbits += -nbits & 7
				1762	for ; nbits > 0; nbits -= 8 {
				1763	*dst = uint8(bits)
				1764	dst = add1(dst)
				1765	bits >>= 8
				1766	}
				1767	} else {
				1768	totalBits = (uintptr(unsafe.Pointer(dstStart))-uintptr(unsafe.Pointer(dst)))*4 + nbits
				1769	nbits += -nbits & 3
				1770	for ; nbits > 0; nbits -= 4 {
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	1771	v := bits&0xf \| bitScanAll
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1772	*dst = uint8(v)
				1773	dst = subtract1(dst)
				1774	bits >>= 4
				1775	}
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1776	}
				1777	return totalBits
				1778	}
				1779
				1780	func dumpGCProg(p *byte) {
				1781	nptr := 0
				1782	for {
				1783	x := *p
				1784	p = add1(p)
				1785	if x == 0 {
				1786	print("\t", nptr, " end\n")
				1787	break
				1788	}
				1789	if x&0x80 == 0 {
				1790	print("\t", nptr, " lit ", x, ":")
				1791	n := int(x+7) / 8
				1792	for i := 0; i < n; i++ {
				1793	print(" ", hex(*p))
				1794	p = add1(p)
				1795	}
				1796	print("\n")
				1797	nptr += int(x)
				1798	} else {
				1799	nbit := int(x &^ 0x80)
				1800	if nbit == 0 {
				1801	for nb := uint(0); ; nb += 7 {
				1802	x := *p
				1803	p = add1(p)
				1804	nbit \|= int(x&0x7f) << nb
				1805	if x&0x80 == 0 {
				1806	break
				1807	}
				1808	}
				1809	}
				1810	count := 0
				1811	for nb := uint(0); ; nb += 7 {
				1812	x := *p
				1813	p = add1(p)
				1814	count \|= int(x&0x7f) << nb
				1815	if x&0x80 == 0 {
				1816	break
				1817	}
				1818	}
				1819	print("\t", nptr, " repeat ", nbit, " × ", count, "\n")
				1820	nptr += nbit * count
				1821	}
				1822	}
				1823	}
				1824
				1825	// Testing.
				1826
				1827	func getgcmaskcb(frame *stkframe, ctxt unsafe.Pointer) bool {
				1828	target := (*stkframe)(ctxt)
				1829	if frame.sp <= target.sp && target.sp < frame.varp {
				1830	target = frame
				1831	return false
				1832	}
				1833	return true
				1834	}
				1835
				1836	// gcbits returns the GC type info for x, for testing.
				1837	// The result is the bitmap entries (0 or 1), one entry per byte.
				1838	//go:linkname reflect_gcbits reflect.gcbits
				1839	func reflect_gcbits(x interface{}) []byte {
				1840	ret := getgcmask(x)
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	1841	typ := (*ptrtype)(unsafe.Pointer(efaceOf(&x)._type)).elem
				1842	nptr := typ.ptrdata / sys.PtrSize
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1843	for uintptr(len(ret)) > nptr && ret[len(ret)-1] == 0 {
				1844	ret = ret[:len(ret)-1]
				1845	}
				1846	return ret
				1847	}
				1848
				1849	// Returns GC type info for object p for testing.
				1850	func getgcmask(ep interface{}) (mask []byte) {
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	1851	e := *efaceOf(&ep)
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1852	p := e.data
				1853	t := e._type
				1854	// data or bss
Dan Willemsen	ebae302	2017-01-13 23:01:08 -0800	[diff] [blame]	1855	for _, datap := range activeModules() {
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1856	// data
				1857	if datap.data <= uintptr(p) && uintptr(p) < datap.edata {
				1858	bitmap := datap.gcdatamask.bytedata
				1859	n := (*ptrtype)(unsafe.Pointer(t)).elem.size
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	1860	mask = make([]byte, n/sys.PtrSize)
				1861	for i := uintptr(0); i < n; i += sys.PtrSize {
				1862	off := (uintptr(p) + i - datap.data) / sys.PtrSize
				1863	mask[i/sys.PtrSize] = (*addb(bitmap, off/8) >> (off % 8)) & 1
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1864	}
				1865	return
				1866	}
				1867
				1868	// bss
				1869	if datap.bss <= uintptr(p) && uintptr(p) < datap.ebss {
				1870	bitmap := datap.gcbssmask.bytedata
				1871	n := (*ptrtype)(unsafe.Pointer(t)).elem.size
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	1872	mask = make([]byte, n/sys.PtrSize)
				1873	for i := uintptr(0); i < n; i += sys.PtrSize {
				1874	off := (uintptr(p) + i - datap.bss) / sys.PtrSize
				1875	mask[i/sys.PtrSize] = (*addb(bitmap, off/8) >> (off % 8)) & 1
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1876	}
				1877	return
				1878	}
				1879	}
				1880
				1881	// heap
				1882	var n uintptr
				1883	var base uintptr
				1884	if mlookup(uintptr(p), &base, &n, nil) != 0 {
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	1885	mask = make([]byte, n/sys.PtrSize)
				1886	for i := uintptr(0); i < n; i += sys.PtrSize {
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1887	hbits := heapBitsForAddr(base + i)
				1888	if hbits.isPointer() {
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	1889	mask[i/sys.PtrSize] = 1
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1890	}
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	1891	if i != 1*sys.PtrSize && !hbits.morePointers() {
				1892	mask = mask[:i/sys.PtrSize]
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1893	break
				1894	}
				1895	}
				1896	return
				1897	}
				1898
				1899	// stack
				1900	if _g_ := getg(); _g_.m.curg.stack.lo <= uintptr(p) && uintptr(p) < _g_.m.curg.stack.hi {
				1901	var frame stkframe
				1902	frame.sp = uintptr(p)
				1903	_g_ := getg()
				1904	gentraceback(_g_.m.curg.sched.pc, _g_.m.curg.sched.sp, 0, _g_.m.curg, 0, nil, 1000, getgcmaskcb, noescape(unsafe.Pointer(&frame)), 0)
				1905	if frame.fn != nil {
				1906	f := frame.fn
				1907	targetpc := frame.continpc
				1908	if targetpc == 0 {
				1909	return
				1910	}
				1911	if targetpc != f.entry {
				1912	targetpc--
				1913	}
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	1914	pcdata := pcdatavalue(f, _PCDATA_StackMapIndex, targetpc, nil)
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1915	if pcdata == -1 {
				1916	return
				1917	}
				1918	stkmap := (*stackmap)(funcdata(f, _FUNCDATA_LocalsPointerMaps))
				1919	if stkmap == nil \|\| stkmap.n <= 0 {
				1920	return
				1921	}
				1922	bv := stackmapdata(stkmap, pcdata)
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	1923	size := uintptr(bv.n) * sys.PtrSize
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1924	n := (*ptrtype)(unsafe.Pointer(t)).elem.size
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	1925	mask = make([]byte, n/sys.PtrSize)
				1926	for i := uintptr(0); i < n; i += sys.PtrSize {
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1927	bitmap := bv.bytedata
Dan Willemsen	38f2dba	2016-07-08 14:54:35 -0700	[diff] [blame]	1928	off := (uintptr(p) + i - frame.varp + size) / sys.PtrSize
				1929	mask[i/sys.PtrSize] = (*addb(bitmap, off/8) >> (off % 8)) & 1
Dan Willemsen	09eb3b1	2015-09-16 14:34:17 -0700	[diff] [blame]	1930	}
				1931	}
				1932	return
				1933	}
				1934
				1935	// otherwise, not something the GC knows about.
				1936	// possibly read-only data, like malloc(0).
				1937	// must not have pointers
				1938	return
				1939	}