Blame - llgo/third_party/gotools/go/pointer/analysis.go - toolchain/llvm-project

blob: d02c3f732fa2cb4f859a10371b5cebcb6c14c33a [file] [log] [blame]

Peter Collingbourne	56109b7	2015-01-13 20:45:08 +0000	[diff] [blame]	1	// Copyright 2013 The Go Authors. All rights reserved.
				2	// Use of this source code is governed by a BSD-style
				3	// license that can be found in the LICENSE file.
				4
				5	package pointer
				6
				7	// This file defines the main datatypes and Analyze function of the pointer analysis.
				8
				9	import (
				10	"fmt"
				11	"go/token"
				12	"io"
				13	"os"
				14	"reflect"
				15	"runtime"
				16	"runtime/debug"
				17	"sort"
				18
				19	"llvm.org/llgo/third_party/gotools/go/callgraph"
				20	"llvm.org/llgo/third_party/gotools/go/ssa"
				21	"llvm.org/llgo/third_party/gotools/go/types"
				22	"llvm.org/llgo/third_party/gotools/go/types/typeutil"
				23	)
				24
				25	const (
				26	// optimization options; enable all when committing
				27	optRenumber = true // enable renumbering optimization (makes logs hard to read)
				28	optHVN = true // enable pointer equivalence via Hash-Value Numbering
				29
				30	// debugging options; disable all when committing
				31	debugHVN = false // enable assertions in HVN
				32	debugHVNVerbose = false // enable extra HVN logging
				33	debugHVNCrossCheck = false // run solver with/without HVN and compare (caveats below)
				34	debugTimers = false // show running time of each phase
				35	)
				36
				37	// object.flags bitmask values.
				38	const (
				39	otTagged = 1 << iota // type-tagged object
				40	otIndirect // type-tagged object with indirect payload
				41	otFunction // function object
				42	)
				43
				44	// An object represents a contiguous block of memory to which some
				45	// (generalized) pointer may point.
				46	//
				47	// (Note: most variables called 'obj' are not *objects but nodeids
				48	// such that a.nodes[obj].obj != nil.)
				49	//
				50	type object struct {
				51	// flags is a bitset of the node type (ot*) flags defined above.
				52	flags uint32
				53
				54	// Number of following nodes belonging to the same "object"
				55	// allocation. Zero for all other nodes.
				56	size uint32
				57
				58	// data describes this object; it has one of these types:
				59	//
				60	// ssa.Value for an object allocated by an SSA operation.
				61	// types.Type for an rtype instance object or *rtype-tagged object.
				62	// string for an instrinsic object, e.g. the array behind os.Args.
				63	// nil for an object allocated by an instrinsic.
				64	// (cgn provides the identity of the intrinsic.)
				65	data interface{}
				66
				67	// The call-graph node (=context) in which this object was allocated.
				68	// May be nil for global objects: Global, Const, some Functions.
				69	cgn *cgnode
				70	}
				71
				72	// nodeid denotes a node.
				73	// It is an index within analysis.nodes.
				74	// We use small integers, not *node pointers, for many reasons:
				75	// - they are smaller on 64-bit systems.
				76	// - sets of them can be represented compactly in bitvectors or BDDs.
				77	// - order matters; a field offset can be computed by simple addition.
				78	type nodeid uint32
				79
				80	// A node is an equivalence class of memory locations.
				81	// Nodes may be pointers, pointed-to locations, neither, or both.
				82	//
				83	// Nodes that are pointed-to locations ("labels") have an enclosing
				84	// object (see analysis.enclosingObject).
				85	//
				86	type node struct {
				87	// If non-nil, this node is the start of an object
				88	// (addressable memory location).
				89	// The following obj.size nodes implicitly belong to the object;
				90	// they locate their object by scanning back.
				91	obj *object
				92
				93	// The type of the field denoted by this node. Non-aggregate,
				94	// unless this is an tagged.T node (i.e. the thing
				95	// pointed to by an interface) in which case typ is that type.
				96	typ types.Type
				97
				98	// subelement indicates which directly embedded subelement of
				99	// an object of aggregate type (struct, tuple, array) this is.
				100	subelement fieldInfo // e.g. ".a.b[].c"
				101
				102	// Solver state for the canonical node of this pointer-
				103	// equivalence class. Each node is created with its own state
				104	// but they become shared after HVN.
				105	solve *solverState
				106	}
				107
				108	// An analysis instance holds the state of a single pointer analysis problem.
				109	type analysis struct {
				110	config *Config // the client's control/observer interface
				111	prog *ssa.Program // the program being analyzed
				112	log io.Writer // log stream; nil to disable
				113	panicNode nodeid // sink for panic, source for recover
				114	nodes []*node // indexed by nodeid
				115	flattenMemo map[types.Type][]*fieldInfo // memoization of flatten()
				116	trackTypes map[types.Type]bool // memoization of shouldTrack()
				117	constraints []constraint // set of constraints
				118	cgnodes []*cgnode // all cgnodes
				119	genq []*cgnode // queue of functions to generate constraints for
				120	intrinsics map[*ssa.Function]intrinsic // non-nil values are summaries for intrinsic fns
				121	globalval map[ssa.Value]nodeid // node for each global ssa.Value
				122	globalobj map[ssa.Value]nodeid // maps v to sole member of pts(v), if singleton
				123	localval map[ssa.Value]nodeid // node for each local ssa.Value
				124	localobj map[ssa.Value]nodeid // maps v to sole member of pts(v), if singleton
				125	atFuncs map[*ssa.Function]bool // address-taken functions (for presolver)
				126	mapValues []nodeid // values of makemap objects (indirect in HVN)
				127	work nodeset // solver's worklist
				128	result *Result // results of the analysis
				129	track track // pointerlike types whose aliasing we track
				130	deltaSpace []int // working space for iterating over PTS deltas
				131
				132	// Reflection & intrinsics:
				133	hasher typeutil.Hasher // cache of type hashes
				134	reflectValueObj types.Object // type symbol for reflect.Value (if present)
				135	reflectValueCall *ssa.Function // (reflect.Value).Call
				136	reflectRtypeObj types.Object // *types.TypeName for reflect.rtype (if present)
				137	reflectRtypePtr types.Pointer // reflect.rtype
				138	reflectType *types.Named // reflect.Type
				139	rtypes typeutil.Map // nodeid of canonical *rtype-tagged object for type T
				140	reflectZeros typeutil.Map // nodeid of canonical T-tagged object for zero value
				141	runtimeSetFinalizer *ssa.Function // runtime.SetFinalizer
				142	}
				143
				144	// enclosingObj returns the first node of the addressable memory
				145	// object that encloses node id. Panic ensues if that node does not
				146	// belong to any object.
				147	func (a *analysis) enclosingObj(id nodeid) nodeid {
				148	// Find previous node with obj != nil.
				149	for i := id; i >= 0; i-- {
				150	n := a.nodes[i]
				151	if obj := n.obj; obj != nil {
				152	if i+nodeid(obj.size) <= id {
				153	break // out of bounds
				154	}
				155	return i
				156	}
				157	}
				158	panic("node has no enclosing object")
				159	}
				160
				161	// labelFor returns the Label for node id.
				162	// Panic ensues if that node is not addressable.
				163	func (a analysis) labelFor(id nodeid) Label {
				164	return &Label{
				165	obj: a.nodes[a.enclosingObj(id)].obj,
				166	subelement: a.nodes[id].subelement,
				167	}
				168	}
				169
				170	func (a *analysis) warnf(pos token.Pos, format string, args ...interface{}) {
				171	msg := fmt.Sprintf(format, args...)
				172	if a.log != nil {
				173	fmt.Fprintf(a.log, "%s: warning: %s\n", a.prog.Fset.Position(pos), msg)
				174	}
				175	a.result.Warnings = append(a.result.Warnings, Warning{pos, msg})
				176	}
				177
				178	// computeTrackBits sets a.track to the necessary 'track' bits for the pointer queries.
				179	func (a *analysis) computeTrackBits() {
				180	var queryTypes []types.Type
				181	for v := range a.config.Queries {
				182	queryTypes = append(queryTypes, v.Type())
				183	}
				184	for v := range a.config.IndirectQueries {
				185	queryTypes = append(queryTypes, mustDeref(v.Type()))
				186	}
				187	for _, t := range queryTypes {
				188	switch t.Underlying().(type) {
				189	case *types.Chan:
				190	a.track \|= trackChan
				191	case *types.Map:
				192	a.track \|= trackMap
				193	case *types.Pointer:
				194	a.track \|= trackPtr
				195	case *types.Slice:
				196	a.track \|= trackSlice
				197	case *types.Interface:
				198	a.track = trackAll
				199	return
				200	}
				201	if rVObj := a.reflectValueObj; rVObj != nil && types.Identical(t, rVObj.Type()) {
				202	a.track = trackAll
				203	return
				204	}
				205	}
				206	}
				207
				208	// Analyze runs the pointer analysis with the scope and options
				209	// specified by config, and returns the (synthetic) root of the callgraph.
				210	//
				211	// Pointer analysis of a transitively closed well-typed program should
				212	// always succeed. An error can occur only due to an internal bug.
				213	//
				214	func Analyze(config Config) (result Result, err error) {
				215	if config.Mains == nil {
				216	return nil, fmt.Errorf("no main/test packages to analyze (check $GOROOT/$GOPATH)")
				217	}
				218	defer func() {
				219	if p := recover(); p != nil {
				220	err = fmt.Errorf("internal error in pointer analysis: %v (please report this bug)", p)
				221	fmt.Fprintln(os.Stderr, "Internal panic in pointer analysis:")
				222	debug.PrintStack()
				223	}
				224	}()
				225
				226	a := &analysis{
				227	config: config,
				228	log: config.Log,
				229	prog: config.prog(),
				230	globalval: make(map[ssa.Value]nodeid),
				231	globalobj: make(map[ssa.Value]nodeid),
				232	flattenMemo: make(map[types.Type][]*fieldInfo),
				233	trackTypes: make(map[types.Type]bool),
				234	atFuncs: make(map[*ssa.Function]bool),
				235	hasher: typeutil.MakeHasher(),
				236	intrinsics: make(map[*ssa.Function]intrinsic),
				237	result: &Result{
				238	Queries: make(map[ssa.Value]Pointer),
				239	IndirectQueries: make(map[ssa.Value]Pointer),
				240	},
				241	deltaSpace: make([]int, 0, 100),
				242	}
				243
				244	if false {
				245	a.log = os.Stderr // for debugging crashes; extremely verbose
				246	}
				247
				248	if a.log != nil {
				249	fmt.Fprintln(a.log, "==== Starting analysis")
				250	}
				251
				252	// Pointer analysis requires a complete program for soundness.
				253	// Check to prevent accidental misconfiguration.
				254	for _, pkg := range a.prog.AllPackages() {
				255	// (This only checks that the package scope is complete,
				256	// not that func bodies exist, but it's a good signal.)
				257	if !pkg.Object.Complete() {
Peter Collingbourne	7d39641	2015-04-05 23:28:18 +0000	[diff] [blame]	258	return nil, fmt.Errorf(`pointer analysis requires a complete program yet package %q was incomplete (don't set loader.Config.ImportFromBinary during loading)`, pkg.Object.Path())
Peter Collingbourne	56109b7	2015-01-13 20:45:08 +0000	[diff] [blame]	259	}
				260	}
				261
				262	if reflect := a.prog.ImportedPackage("reflect"); reflect != nil {
				263	rV := reflect.Object.Scope().Lookup("Value")
				264	a.reflectValueObj = rV
				265	a.reflectValueCall = a.prog.LookupMethod(rV.Type(), nil, "Call")
				266	a.reflectType = reflect.Object.Scope().Lookup("Type").Type().(*types.Named)
				267	a.reflectRtypeObj = reflect.Object.Scope().Lookup("rtype")
				268	a.reflectRtypePtr = types.NewPointer(a.reflectRtypeObj.Type())
				269
				270	// Override flattening of reflect.Value, treating it like a basic type.
				271	tReflectValue := a.reflectValueObj.Type()
				272	a.flattenMemo[tReflectValue] = []*fieldInfo{{typ: tReflectValue}}
				273
				274	// Override shouldTrack of reflect.Value and *reflect.rtype.
				275	// Always track pointers of these types.
				276	a.trackTypes[tReflectValue] = true
				277	a.trackTypes[a.reflectRtypePtr] = true
				278
				279	a.rtypes.SetHasher(a.hasher)
				280	a.reflectZeros.SetHasher(a.hasher)
				281	}
				282	if runtime := a.prog.ImportedPackage("runtime"); runtime != nil {
				283	a.runtimeSetFinalizer = runtime.Func("SetFinalizer")
				284	}
				285	a.computeTrackBits()
				286
				287	a.generate()
				288	a.showCounts()
				289
				290	if optRenumber {
				291	a.renumber()
				292	}
				293
				294	N := len(a.nodes) // excludes solver-created nodes
				295
				296	if optHVN {
				297	if debugHVNCrossCheck {
				298	// Cross-check: run the solver once without
				299	// optimization, once with, and compare the
				300	// solutions.
				301	savedConstraints := a.constraints
				302
				303	a.solve()
				304	a.dumpSolution("A.pts", N)
				305
				306	// Restore.
				307	a.constraints = savedConstraints
				308	for _, n := range a.nodes {
				309	n.solve = new(solverState)
				310	}
				311	a.nodes = a.nodes[:N]
				312
				313	// rtypes is effectively part of the solver state.
				314	a.rtypes = typeutil.Map{}
				315	a.rtypes.SetHasher(a.hasher)
				316	}
				317
				318	a.hvn()
				319	}
				320
				321	if debugHVNCrossCheck {
				322	runtime.GC()
				323	runtime.GC()
				324	}
				325
				326	a.solve()
				327
				328	// Compare solutions.
				329	if optHVN && debugHVNCrossCheck {
				330	a.dumpSolution("B.pts", N)
				331
				332	if !diff("A.pts", "B.pts") {
				333	return nil, fmt.Errorf("internal error: optimization changed solution")
				334	}
				335	}
				336
				337	// Create callgraph.Nodes in deterministic order.
				338	if cg := a.result.CallGraph; cg != nil {
				339	for _, caller := range a.cgnodes {
				340	cg.CreateNode(caller.fn)
				341	}
				342	}
				343
				344	// Add dynamic edges to call graph.
				345	var space [100]int
				346	for _, caller := range a.cgnodes {
				347	for _, site := range caller.sites {
				348	for _, callee := range a.nodes[site.targets].solve.pts.AppendTo(space[:0]) {
				349	a.callEdge(caller, site, nodeid(callee))
				350	}
				351	}
				352	}
				353
				354	return a.result, nil
				355	}
				356
				357	// callEdge is called for each edge in the callgraph.
				358	// calleeid is the callee's object node (has otFunction flag).
				359	//
				360	func (a analysis) callEdge(caller cgnode, site *callsite, calleeid nodeid) {
				361	obj := a.nodes[calleeid].obj
				362	if obj.flags&otFunction == 0 {
				363	panic(fmt.Sprintf("callEdge %s -> n%d: not a function object", site, calleeid))
				364	}
				365	callee := obj.cgn
				366
				367	if cg := a.result.CallGraph; cg != nil {
				368	// TODO(adonovan): opt: I would expect duplicate edges
				369	// (to wrappers) to arise due to the elimination of
				370	// context information, but I haven't observed any.
				371	// Understand this better.
				372	callgraph.AddEdge(cg.CreateNode(caller.fn), site.instr, cg.CreateNode(callee.fn))
				373	}
				374
				375	if a.log != nil {
				376	fmt.Fprintf(a.log, "\tcall edge %s -> %s\n", site, callee)
				377	}
				378
				379	// Warn about calls to non-intrinsic external functions.
				380	// TODO(adonovan): de-dup these messages.
				381	if fn := callee.fn; fn.Blocks == nil && a.findIntrinsic(fn) == nil {
				382	a.warnf(site.pos(), "unsound call to unknown intrinsic: %s", fn)
				383	a.warnf(fn.Pos(), " (declared here)")
				384	}
				385	}
				386
				387	// dumpSolution writes the PTS solution to the specified file.
				388	//
				389	// It only dumps the nodes that existed before solving. The order in
				390	// which solver-created nodes are created depends on pre-solver
				391	// optimization, so we can't include them in the cross-check.
				392	//
				393	func (a *analysis) dumpSolution(filename string, N int) {
				394	f, err := os.Create(filename)
				395	if err != nil {
				396	panic(err)
				397	}
				398	for id, n := range a.nodes[:N] {
				399	if _, err := fmt.Fprintf(f, "pts(n%d) = {", id); err != nil {
				400	panic(err)
				401	}
				402	var sep string
				403	for _, l := range n.solve.pts.AppendTo(a.deltaSpace) {
				404	if l >= N {
				405	break
				406	}
				407	fmt.Fprintf(f, "%s%d", sep, l)
				408	sep = " "
				409	}
				410	fmt.Fprintf(f, "} : %s\n", n.typ)
				411	}
				412	if err := f.Close(); err != nil {
				413	panic(err)
				414	}
				415	}
				416
				417	// showCounts logs the size of the constraint system. A typical
				418	// optimized distribution is 65% copy, 13% load, 11% addr, 5%
				419	// offsetAddr, 4% store, 2% others.
				420	//
				421	func (a *analysis) showCounts() {
				422	if a.log != nil {
				423	counts := make(map[reflect.Type]int)
				424	for _, c := range a.constraints {
				425	counts[reflect.TypeOf(c)]++
				426	}
				427	fmt.Fprintf(a.log, "# constraints:\t%d\n", len(a.constraints))
				428	var lines []string
				429	for t, n := range counts {
				430	line := fmt.Sprintf("%7d (%2d%%)\t%s", n, 100*n/len(a.constraints), t)
				431	lines = append(lines, line)
				432	}
				433	sort.Sort(sort.Reverse(sort.StringSlice(lines)))
				434	for _, line := range lines {
				435	fmt.Fprintf(a.log, "\t%s\n", line)
				436	}
				437
				438	fmt.Fprintf(a.log, "# nodes:\t%d\n", len(a.nodes))
				439
				440	// Show number of pointer equivalence classes.
				441	m := make(map[*solverState]bool)
				442	for _, n := range a.nodes {
				443	m[n.solve] = true
				444	}
				445	fmt.Fprintf(a.log, "# ptsets:\t%d\n", len(m))
				446	}
				447	}