blob: d02c3f732fa2cb4f859a10371b5cebcb6c14c33a [file] [log] [blame]
Peter Collingbourne56109b72015-01-13 20:45:08 +00001// Copyright 2013 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package pointer
6
7// This file defines the main datatypes and Analyze function of the pointer analysis.
8
9import (
10 "fmt"
11 "go/token"
12 "io"
13 "os"
14 "reflect"
15 "runtime"
16 "runtime/debug"
17 "sort"
18
19 "llvm.org/llgo/third_party/gotools/go/callgraph"
20 "llvm.org/llgo/third_party/gotools/go/ssa"
21 "llvm.org/llgo/third_party/gotools/go/types"
22 "llvm.org/llgo/third_party/gotools/go/types/typeutil"
23)
24
25const (
26 // optimization options; enable all when committing
27 optRenumber = true // enable renumbering optimization (makes logs hard to read)
28 optHVN = true // enable pointer equivalence via Hash-Value Numbering
29
30 // debugging options; disable all when committing
31 debugHVN = false // enable assertions in HVN
32 debugHVNVerbose = false // enable extra HVN logging
33 debugHVNCrossCheck = false // run solver with/without HVN and compare (caveats below)
34 debugTimers = false // show running time of each phase
35)
36
37// object.flags bitmask values.
38const (
39 otTagged = 1 << iota // type-tagged object
40 otIndirect // type-tagged object with indirect payload
41 otFunction // function object
42)
43
44// An object represents a contiguous block of memory to which some
45// (generalized) pointer may point.
46//
47// (Note: most variables called 'obj' are not *objects but nodeids
48// such that a.nodes[obj].obj != nil.)
49//
50type object struct {
51 // flags is a bitset of the node type (ot*) flags defined above.
52 flags uint32
53
54 // Number of following nodes belonging to the same "object"
55 // allocation. Zero for all other nodes.
56 size uint32
57
58 // data describes this object; it has one of these types:
59 //
60 // ssa.Value for an object allocated by an SSA operation.
61 // types.Type for an rtype instance object or *rtype-tagged object.
62 // string for an instrinsic object, e.g. the array behind os.Args.
63 // nil for an object allocated by an instrinsic.
64 // (cgn provides the identity of the intrinsic.)
65 data interface{}
66
67 // The call-graph node (=context) in which this object was allocated.
68 // May be nil for global objects: Global, Const, some Functions.
69 cgn *cgnode
70}
71
72// nodeid denotes a node.
73// It is an index within analysis.nodes.
74// We use small integers, not *node pointers, for many reasons:
75// - they are smaller on 64-bit systems.
76// - sets of them can be represented compactly in bitvectors or BDDs.
77// - order matters; a field offset can be computed by simple addition.
78type nodeid uint32
79
80// A node is an equivalence class of memory locations.
81// Nodes may be pointers, pointed-to locations, neither, or both.
82//
83// Nodes that are pointed-to locations ("labels") have an enclosing
84// object (see analysis.enclosingObject).
85//
86type node struct {
87 // If non-nil, this node is the start of an object
88 // (addressable memory location).
89 // The following obj.size nodes implicitly belong to the object;
90 // they locate their object by scanning back.
91 obj *object
92
93 // The type of the field denoted by this node. Non-aggregate,
94 // unless this is an tagged.T node (i.e. the thing
95 // pointed to by an interface) in which case typ is that type.
96 typ types.Type
97
98 // subelement indicates which directly embedded subelement of
99 // an object of aggregate type (struct, tuple, array) this is.
100 subelement *fieldInfo // e.g. ".a.b[*].c"
101
102 // Solver state for the canonical node of this pointer-
103 // equivalence class. Each node is created with its own state
104 // but they become shared after HVN.
105 solve *solverState
106}
107
108// An analysis instance holds the state of a single pointer analysis problem.
109type analysis struct {
110 config *Config // the client's control/observer interface
111 prog *ssa.Program // the program being analyzed
112 log io.Writer // log stream; nil to disable
113 panicNode nodeid // sink for panic, source for recover
114 nodes []*node // indexed by nodeid
115 flattenMemo map[types.Type][]*fieldInfo // memoization of flatten()
116 trackTypes map[types.Type]bool // memoization of shouldTrack()
117 constraints []constraint // set of constraints
118 cgnodes []*cgnode // all cgnodes
119 genq []*cgnode // queue of functions to generate constraints for
120 intrinsics map[*ssa.Function]intrinsic // non-nil values are summaries for intrinsic fns
121 globalval map[ssa.Value]nodeid // node for each global ssa.Value
122 globalobj map[ssa.Value]nodeid // maps v to sole member of pts(v), if singleton
123 localval map[ssa.Value]nodeid // node for each local ssa.Value
124 localobj map[ssa.Value]nodeid // maps v to sole member of pts(v), if singleton
125 atFuncs map[*ssa.Function]bool // address-taken functions (for presolver)
126 mapValues []nodeid // values of makemap objects (indirect in HVN)
127 work nodeset // solver's worklist
128 result *Result // results of the analysis
129 track track // pointerlike types whose aliasing we track
130 deltaSpace []int // working space for iterating over PTS deltas
131
132 // Reflection & intrinsics:
133 hasher typeutil.Hasher // cache of type hashes
134 reflectValueObj types.Object // type symbol for reflect.Value (if present)
135 reflectValueCall *ssa.Function // (reflect.Value).Call
136 reflectRtypeObj types.Object // *types.TypeName for reflect.rtype (if present)
137 reflectRtypePtr *types.Pointer // *reflect.rtype
138 reflectType *types.Named // reflect.Type
139 rtypes typeutil.Map // nodeid of canonical *rtype-tagged object for type T
140 reflectZeros typeutil.Map // nodeid of canonical T-tagged object for zero value
141 runtimeSetFinalizer *ssa.Function // runtime.SetFinalizer
142}
143
144// enclosingObj returns the first node of the addressable memory
145// object that encloses node id. Panic ensues if that node does not
146// belong to any object.
147func (a *analysis) enclosingObj(id nodeid) nodeid {
148 // Find previous node with obj != nil.
149 for i := id; i >= 0; i-- {
150 n := a.nodes[i]
151 if obj := n.obj; obj != nil {
152 if i+nodeid(obj.size) <= id {
153 break // out of bounds
154 }
155 return i
156 }
157 }
158 panic("node has no enclosing object")
159}
160
161// labelFor returns the Label for node id.
162// Panic ensues if that node is not addressable.
163func (a *analysis) labelFor(id nodeid) *Label {
164 return &Label{
165 obj: a.nodes[a.enclosingObj(id)].obj,
166 subelement: a.nodes[id].subelement,
167 }
168}
169
170func (a *analysis) warnf(pos token.Pos, format string, args ...interface{}) {
171 msg := fmt.Sprintf(format, args...)
172 if a.log != nil {
173 fmt.Fprintf(a.log, "%s: warning: %s\n", a.prog.Fset.Position(pos), msg)
174 }
175 a.result.Warnings = append(a.result.Warnings, Warning{pos, msg})
176}
177
178// computeTrackBits sets a.track to the necessary 'track' bits for the pointer queries.
179func (a *analysis) computeTrackBits() {
180 var queryTypes []types.Type
181 for v := range a.config.Queries {
182 queryTypes = append(queryTypes, v.Type())
183 }
184 for v := range a.config.IndirectQueries {
185 queryTypes = append(queryTypes, mustDeref(v.Type()))
186 }
187 for _, t := range queryTypes {
188 switch t.Underlying().(type) {
189 case *types.Chan:
190 a.track |= trackChan
191 case *types.Map:
192 a.track |= trackMap
193 case *types.Pointer:
194 a.track |= trackPtr
195 case *types.Slice:
196 a.track |= trackSlice
197 case *types.Interface:
198 a.track = trackAll
199 return
200 }
201 if rVObj := a.reflectValueObj; rVObj != nil && types.Identical(t, rVObj.Type()) {
202 a.track = trackAll
203 return
204 }
205 }
206}
207
208// Analyze runs the pointer analysis with the scope and options
209// specified by config, and returns the (synthetic) root of the callgraph.
210//
211// Pointer analysis of a transitively closed well-typed program should
212// always succeed. An error can occur only due to an internal bug.
213//
214func Analyze(config *Config) (result *Result, err error) {
215 if config.Mains == nil {
216 return nil, fmt.Errorf("no main/test packages to analyze (check $GOROOT/$GOPATH)")
217 }
218 defer func() {
219 if p := recover(); p != nil {
220 err = fmt.Errorf("internal error in pointer analysis: %v (please report this bug)", p)
221 fmt.Fprintln(os.Stderr, "Internal panic in pointer analysis:")
222 debug.PrintStack()
223 }
224 }()
225
226 a := &analysis{
227 config: config,
228 log: config.Log,
229 prog: config.prog(),
230 globalval: make(map[ssa.Value]nodeid),
231 globalobj: make(map[ssa.Value]nodeid),
232 flattenMemo: make(map[types.Type][]*fieldInfo),
233 trackTypes: make(map[types.Type]bool),
234 atFuncs: make(map[*ssa.Function]bool),
235 hasher: typeutil.MakeHasher(),
236 intrinsics: make(map[*ssa.Function]intrinsic),
237 result: &Result{
238 Queries: make(map[ssa.Value]Pointer),
239 IndirectQueries: make(map[ssa.Value]Pointer),
240 },
241 deltaSpace: make([]int, 0, 100),
242 }
243
244 if false {
245 a.log = os.Stderr // for debugging crashes; extremely verbose
246 }
247
248 if a.log != nil {
249 fmt.Fprintln(a.log, "==== Starting analysis")
250 }
251
252 // Pointer analysis requires a complete program for soundness.
253 // Check to prevent accidental misconfiguration.
254 for _, pkg := range a.prog.AllPackages() {
255 // (This only checks that the package scope is complete,
256 // not that func bodies exist, but it's a good signal.)
257 if !pkg.Object.Complete() {
Peter Collingbourne7d396412015-04-05 23:28:18 +0000258 return nil, fmt.Errorf(`pointer analysis requires a complete program yet package %q was incomplete (don't set loader.Config.ImportFromBinary during loading)`, pkg.Object.Path())
Peter Collingbourne56109b72015-01-13 20:45:08 +0000259 }
260 }
261
262 if reflect := a.prog.ImportedPackage("reflect"); reflect != nil {
263 rV := reflect.Object.Scope().Lookup("Value")
264 a.reflectValueObj = rV
265 a.reflectValueCall = a.prog.LookupMethod(rV.Type(), nil, "Call")
266 a.reflectType = reflect.Object.Scope().Lookup("Type").Type().(*types.Named)
267 a.reflectRtypeObj = reflect.Object.Scope().Lookup("rtype")
268 a.reflectRtypePtr = types.NewPointer(a.reflectRtypeObj.Type())
269
270 // Override flattening of reflect.Value, treating it like a basic type.
271 tReflectValue := a.reflectValueObj.Type()
272 a.flattenMemo[tReflectValue] = []*fieldInfo{{typ: tReflectValue}}
273
274 // Override shouldTrack of reflect.Value and *reflect.rtype.
275 // Always track pointers of these types.
276 a.trackTypes[tReflectValue] = true
277 a.trackTypes[a.reflectRtypePtr] = true
278
279 a.rtypes.SetHasher(a.hasher)
280 a.reflectZeros.SetHasher(a.hasher)
281 }
282 if runtime := a.prog.ImportedPackage("runtime"); runtime != nil {
283 a.runtimeSetFinalizer = runtime.Func("SetFinalizer")
284 }
285 a.computeTrackBits()
286
287 a.generate()
288 a.showCounts()
289
290 if optRenumber {
291 a.renumber()
292 }
293
294 N := len(a.nodes) // excludes solver-created nodes
295
296 if optHVN {
297 if debugHVNCrossCheck {
298 // Cross-check: run the solver once without
299 // optimization, once with, and compare the
300 // solutions.
301 savedConstraints := a.constraints
302
303 a.solve()
304 a.dumpSolution("A.pts", N)
305
306 // Restore.
307 a.constraints = savedConstraints
308 for _, n := range a.nodes {
309 n.solve = new(solverState)
310 }
311 a.nodes = a.nodes[:N]
312
313 // rtypes is effectively part of the solver state.
314 a.rtypes = typeutil.Map{}
315 a.rtypes.SetHasher(a.hasher)
316 }
317
318 a.hvn()
319 }
320
321 if debugHVNCrossCheck {
322 runtime.GC()
323 runtime.GC()
324 }
325
326 a.solve()
327
328 // Compare solutions.
329 if optHVN && debugHVNCrossCheck {
330 a.dumpSolution("B.pts", N)
331
332 if !diff("A.pts", "B.pts") {
333 return nil, fmt.Errorf("internal error: optimization changed solution")
334 }
335 }
336
337 // Create callgraph.Nodes in deterministic order.
338 if cg := a.result.CallGraph; cg != nil {
339 for _, caller := range a.cgnodes {
340 cg.CreateNode(caller.fn)
341 }
342 }
343
344 // Add dynamic edges to call graph.
345 var space [100]int
346 for _, caller := range a.cgnodes {
347 for _, site := range caller.sites {
348 for _, callee := range a.nodes[site.targets].solve.pts.AppendTo(space[:0]) {
349 a.callEdge(caller, site, nodeid(callee))
350 }
351 }
352 }
353
354 return a.result, nil
355}
356
357// callEdge is called for each edge in the callgraph.
358// calleeid is the callee's object node (has otFunction flag).
359//
360func (a *analysis) callEdge(caller *cgnode, site *callsite, calleeid nodeid) {
361 obj := a.nodes[calleeid].obj
362 if obj.flags&otFunction == 0 {
363 panic(fmt.Sprintf("callEdge %s -> n%d: not a function object", site, calleeid))
364 }
365 callee := obj.cgn
366
367 if cg := a.result.CallGraph; cg != nil {
368 // TODO(adonovan): opt: I would expect duplicate edges
369 // (to wrappers) to arise due to the elimination of
370 // context information, but I haven't observed any.
371 // Understand this better.
372 callgraph.AddEdge(cg.CreateNode(caller.fn), site.instr, cg.CreateNode(callee.fn))
373 }
374
375 if a.log != nil {
376 fmt.Fprintf(a.log, "\tcall edge %s -> %s\n", site, callee)
377 }
378
379 // Warn about calls to non-intrinsic external functions.
380 // TODO(adonovan): de-dup these messages.
381 if fn := callee.fn; fn.Blocks == nil && a.findIntrinsic(fn) == nil {
382 a.warnf(site.pos(), "unsound call to unknown intrinsic: %s", fn)
383 a.warnf(fn.Pos(), " (declared here)")
384 }
385}
386
387// dumpSolution writes the PTS solution to the specified file.
388//
389// It only dumps the nodes that existed before solving. The order in
390// which solver-created nodes are created depends on pre-solver
391// optimization, so we can't include them in the cross-check.
392//
393func (a *analysis) dumpSolution(filename string, N int) {
394 f, err := os.Create(filename)
395 if err != nil {
396 panic(err)
397 }
398 for id, n := range a.nodes[:N] {
399 if _, err := fmt.Fprintf(f, "pts(n%d) = {", id); err != nil {
400 panic(err)
401 }
402 var sep string
403 for _, l := range n.solve.pts.AppendTo(a.deltaSpace) {
404 if l >= N {
405 break
406 }
407 fmt.Fprintf(f, "%s%d", sep, l)
408 sep = " "
409 }
410 fmt.Fprintf(f, "} : %s\n", n.typ)
411 }
412 if err := f.Close(); err != nil {
413 panic(err)
414 }
415}
416
417// showCounts logs the size of the constraint system. A typical
418// optimized distribution is 65% copy, 13% load, 11% addr, 5%
419// offsetAddr, 4% store, 2% others.
420//
421func (a *analysis) showCounts() {
422 if a.log != nil {
423 counts := make(map[reflect.Type]int)
424 for _, c := range a.constraints {
425 counts[reflect.TypeOf(c)]++
426 }
427 fmt.Fprintf(a.log, "# constraints:\t%d\n", len(a.constraints))
428 var lines []string
429 for t, n := range counts {
430 line := fmt.Sprintf("%7d (%2d%%)\t%s", n, 100*n/len(a.constraints), t)
431 lines = append(lines, line)
432 }
433 sort.Sort(sort.Reverse(sort.StringSlice(lines)))
434 for _, line := range lines {
435 fmt.Fprintf(a.log, "\t%s\n", line)
436 }
437
438 fmt.Fprintf(a.log, "# nodes:\t%d\n", len(a.nodes))
439
440 // Show number of pointer equivalence classes.
441 m := make(map[*solverState]bool)
442 for _, n := range a.nodes {
443 m[n.solve] = true
444 }
445 fmt.Fprintf(a.log, "# ptsets:\t%d\n", len(m))
446 }
447}