Peter Collingbourne | 56109b7 | 2015-01-13 20:45:08 +0000 | [diff] [blame] | 1 | // Copyright 2013 The Go Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
| 5 | package pointer |
| 6 | |
| 7 | // This file defines the main datatypes and Analyze function of the pointer analysis. |
| 8 | |
| 9 | import ( |
| 10 | "fmt" |
| 11 | "go/token" |
| 12 | "io" |
| 13 | "os" |
| 14 | "reflect" |
| 15 | "runtime" |
| 16 | "runtime/debug" |
| 17 | "sort" |
| 18 | |
| 19 | "llvm.org/llgo/third_party/gotools/go/callgraph" |
| 20 | "llvm.org/llgo/third_party/gotools/go/ssa" |
| 21 | "llvm.org/llgo/third_party/gotools/go/types" |
| 22 | "llvm.org/llgo/third_party/gotools/go/types/typeutil" |
| 23 | ) |
| 24 | |
| 25 | const ( |
| 26 | // optimization options; enable all when committing |
| 27 | optRenumber = true // enable renumbering optimization (makes logs hard to read) |
| 28 | optHVN = true // enable pointer equivalence via Hash-Value Numbering |
| 29 | |
| 30 | // debugging options; disable all when committing |
| 31 | debugHVN = false // enable assertions in HVN |
| 32 | debugHVNVerbose = false // enable extra HVN logging |
| 33 | debugHVNCrossCheck = false // run solver with/without HVN and compare (caveats below) |
| 34 | debugTimers = false // show running time of each phase |
| 35 | ) |
| 36 | |
| 37 | // object.flags bitmask values. |
| 38 | const ( |
| 39 | otTagged = 1 << iota // type-tagged object |
| 40 | otIndirect // type-tagged object with indirect payload |
| 41 | otFunction // function object |
| 42 | ) |
| 43 | |
| 44 | // An object represents a contiguous block of memory to which some |
| 45 | // (generalized) pointer may point. |
| 46 | // |
| 47 | // (Note: most variables called 'obj' are not *objects but nodeids |
| 48 | // such that a.nodes[obj].obj != nil.) |
| 49 | // |
| 50 | type object struct { |
| 51 | // flags is a bitset of the node type (ot*) flags defined above. |
| 52 | flags uint32 |
| 53 | |
| 54 | // Number of following nodes belonging to the same "object" |
| 55 | // allocation. Zero for all other nodes. |
| 56 | size uint32 |
| 57 | |
| 58 | // data describes this object; it has one of these types: |
| 59 | // |
| 60 | // ssa.Value for an object allocated by an SSA operation. |
| 61 | // types.Type for an rtype instance object or *rtype-tagged object. |
| 62 | // string for an instrinsic object, e.g. the array behind os.Args. |
| 63 | // nil for an object allocated by an instrinsic. |
| 64 | // (cgn provides the identity of the intrinsic.) |
| 65 | data interface{} |
| 66 | |
| 67 | // The call-graph node (=context) in which this object was allocated. |
| 68 | // May be nil for global objects: Global, Const, some Functions. |
| 69 | cgn *cgnode |
| 70 | } |
| 71 | |
| 72 | // nodeid denotes a node. |
| 73 | // It is an index within analysis.nodes. |
| 74 | // We use small integers, not *node pointers, for many reasons: |
| 75 | // - they are smaller on 64-bit systems. |
| 76 | // - sets of them can be represented compactly in bitvectors or BDDs. |
| 77 | // - order matters; a field offset can be computed by simple addition. |
| 78 | type nodeid uint32 |
| 79 | |
| 80 | // A node is an equivalence class of memory locations. |
| 81 | // Nodes may be pointers, pointed-to locations, neither, or both. |
| 82 | // |
| 83 | // Nodes that are pointed-to locations ("labels") have an enclosing |
| 84 | // object (see analysis.enclosingObject). |
| 85 | // |
| 86 | type node struct { |
| 87 | // If non-nil, this node is the start of an object |
| 88 | // (addressable memory location). |
| 89 | // The following obj.size nodes implicitly belong to the object; |
| 90 | // they locate their object by scanning back. |
| 91 | obj *object |
| 92 | |
| 93 | // The type of the field denoted by this node. Non-aggregate, |
| 94 | // unless this is an tagged.T node (i.e. the thing |
| 95 | // pointed to by an interface) in which case typ is that type. |
| 96 | typ types.Type |
| 97 | |
| 98 | // subelement indicates which directly embedded subelement of |
| 99 | // an object of aggregate type (struct, tuple, array) this is. |
| 100 | subelement *fieldInfo // e.g. ".a.b[*].c" |
| 101 | |
| 102 | // Solver state for the canonical node of this pointer- |
| 103 | // equivalence class. Each node is created with its own state |
| 104 | // but they become shared after HVN. |
| 105 | solve *solverState |
| 106 | } |
| 107 | |
| 108 | // An analysis instance holds the state of a single pointer analysis problem. |
| 109 | type analysis struct { |
| 110 | config *Config // the client's control/observer interface |
| 111 | prog *ssa.Program // the program being analyzed |
| 112 | log io.Writer // log stream; nil to disable |
| 113 | panicNode nodeid // sink for panic, source for recover |
| 114 | nodes []*node // indexed by nodeid |
| 115 | flattenMemo map[types.Type][]*fieldInfo // memoization of flatten() |
| 116 | trackTypes map[types.Type]bool // memoization of shouldTrack() |
| 117 | constraints []constraint // set of constraints |
| 118 | cgnodes []*cgnode // all cgnodes |
| 119 | genq []*cgnode // queue of functions to generate constraints for |
| 120 | intrinsics map[*ssa.Function]intrinsic // non-nil values are summaries for intrinsic fns |
| 121 | globalval map[ssa.Value]nodeid // node for each global ssa.Value |
| 122 | globalobj map[ssa.Value]nodeid // maps v to sole member of pts(v), if singleton |
| 123 | localval map[ssa.Value]nodeid // node for each local ssa.Value |
| 124 | localobj map[ssa.Value]nodeid // maps v to sole member of pts(v), if singleton |
| 125 | atFuncs map[*ssa.Function]bool // address-taken functions (for presolver) |
| 126 | mapValues []nodeid // values of makemap objects (indirect in HVN) |
| 127 | work nodeset // solver's worklist |
| 128 | result *Result // results of the analysis |
| 129 | track track // pointerlike types whose aliasing we track |
| 130 | deltaSpace []int // working space for iterating over PTS deltas |
| 131 | |
| 132 | // Reflection & intrinsics: |
| 133 | hasher typeutil.Hasher // cache of type hashes |
| 134 | reflectValueObj types.Object // type symbol for reflect.Value (if present) |
| 135 | reflectValueCall *ssa.Function // (reflect.Value).Call |
| 136 | reflectRtypeObj types.Object // *types.TypeName for reflect.rtype (if present) |
| 137 | reflectRtypePtr *types.Pointer // *reflect.rtype |
| 138 | reflectType *types.Named // reflect.Type |
| 139 | rtypes typeutil.Map // nodeid of canonical *rtype-tagged object for type T |
| 140 | reflectZeros typeutil.Map // nodeid of canonical T-tagged object for zero value |
| 141 | runtimeSetFinalizer *ssa.Function // runtime.SetFinalizer |
| 142 | } |
| 143 | |
| 144 | // enclosingObj returns the first node of the addressable memory |
| 145 | // object that encloses node id. Panic ensues if that node does not |
| 146 | // belong to any object. |
| 147 | func (a *analysis) enclosingObj(id nodeid) nodeid { |
| 148 | // Find previous node with obj != nil. |
| 149 | for i := id; i >= 0; i-- { |
| 150 | n := a.nodes[i] |
| 151 | if obj := n.obj; obj != nil { |
| 152 | if i+nodeid(obj.size) <= id { |
| 153 | break // out of bounds |
| 154 | } |
| 155 | return i |
| 156 | } |
| 157 | } |
| 158 | panic("node has no enclosing object") |
| 159 | } |
| 160 | |
| 161 | // labelFor returns the Label for node id. |
| 162 | // Panic ensues if that node is not addressable. |
| 163 | func (a *analysis) labelFor(id nodeid) *Label { |
| 164 | return &Label{ |
| 165 | obj: a.nodes[a.enclosingObj(id)].obj, |
| 166 | subelement: a.nodes[id].subelement, |
| 167 | } |
| 168 | } |
| 169 | |
| 170 | func (a *analysis) warnf(pos token.Pos, format string, args ...interface{}) { |
| 171 | msg := fmt.Sprintf(format, args...) |
| 172 | if a.log != nil { |
| 173 | fmt.Fprintf(a.log, "%s: warning: %s\n", a.prog.Fset.Position(pos), msg) |
| 174 | } |
| 175 | a.result.Warnings = append(a.result.Warnings, Warning{pos, msg}) |
| 176 | } |
| 177 | |
| 178 | // computeTrackBits sets a.track to the necessary 'track' bits for the pointer queries. |
| 179 | func (a *analysis) computeTrackBits() { |
| 180 | var queryTypes []types.Type |
| 181 | for v := range a.config.Queries { |
| 182 | queryTypes = append(queryTypes, v.Type()) |
| 183 | } |
| 184 | for v := range a.config.IndirectQueries { |
| 185 | queryTypes = append(queryTypes, mustDeref(v.Type())) |
| 186 | } |
| 187 | for _, t := range queryTypes { |
| 188 | switch t.Underlying().(type) { |
| 189 | case *types.Chan: |
| 190 | a.track |= trackChan |
| 191 | case *types.Map: |
| 192 | a.track |= trackMap |
| 193 | case *types.Pointer: |
| 194 | a.track |= trackPtr |
| 195 | case *types.Slice: |
| 196 | a.track |= trackSlice |
| 197 | case *types.Interface: |
| 198 | a.track = trackAll |
| 199 | return |
| 200 | } |
| 201 | if rVObj := a.reflectValueObj; rVObj != nil && types.Identical(t, rVObj.Type()) { |
| 202 | a.track = trackAll |
| 203 | return |
| 204 | } |
| 205 | } |
| 206 | } |
| 207 | |
| 208 | // Analyze runs the pointer analysis with the scope and options |
| 209 | // specified by config, and returns the (synthetic) root of the callgraph. |
| 210 | // |
| 211 | // Pointer analysis of a transitively closed well-typed program should |
| 212 | // always succeed. An error can occur only due to an internal bug. |
| 213 | // |
| 214 | func Analyze(config *Config) (result *Result, err error) { |
| 215 | if config.Mains == nil { |
| 216 | return nil, fmt.Errorf("no main/test packages to analyze (check $GOROOT/$GOPATH)") |
| 217 | } |
| 218 | defer func() { |
| 219 | if p := recover(); p != nil { |
| 220 | err = fmt.Errorf("internal error in pointer analysis: %v (please report this bug)", p) |
| 221 | fmt.Fprintln(os.Stderr, "Internal panic in pointer analysis:") |
| 222 | debug.PrintStack() |
| 223 | } |
| 224 | }() |
| 225 | |
| 226 | a := &analysis{ |
| 227 | config: config, |
| 228 | log: config.Log, |
| 229 | prog: config.prog(), |
| 230 | globalval: make(map[ssa.Value]nodeid), |
| 231 | globalobj: make(map[ssa.Value]nodeid), |
| 232 | flattenMemo: make(map[types.Type][]*fieldInfo), |
| 233 | trackTypes: make(map[types.Type]bool), |
| 234 | atFuncs: make(map[*ssa.Function]bool), |
| 235 | hasher: typeutil.MakeHasher(), |
| 236 | intrinsics: make(map[*ssa.Function]intrinsic), |
| 237 | result: &Result{ |
| 238 | Queries: make(map[ssa.Value]Pointer), |
| 239 | IndirectQueries: make(map[ssa.Value]Pointer), |
| 240 | }, |
| 241 | deltaSpace: make([]int, 0, 100), |
| 242 | } |
| 243 | |
| 244 | if false { |
| 245 | a.log = os.Stderr // for debugging crashes; extremely verbose |
| 246 | } |
| 247 | |
| 248 | if a.log != nil { |
| 249 | fmt.Fprintln(a.log, "==== Starting analysis") |
| 250 | } |
| 251 | |
| 252 | // Pointer analysis requires a complete program for soundness. |
| 253 | // Check to prevent accidental misconfiguration. |
| 254 | for _, pkg := range a.prog.AllPackages() { |
| 255 | // (This only checks that the package scope is complete, |
| 256 | // not that func bodies exist, but it's a good signal.) |
| 257 | if !pkg.Object.Complete() { |
Peter Collingbourne | 7d39641 | 2015-04-05 23:28:18 +0000 | [diff] [blame] | 258 | return nil, fmt.Errorf(`pointer analysis requires a complete program yet package %q was incomplete (don't set loader.Config.ImportFromBinary during loading)`, pkg.Object.Path()) |
Peter Collingbourne | 56109b7 | 2015-01-13 20:45:08 +0000 | [diff] [blame] | 259 | } |
| 260 | } |
| 261 | |
| 262 | if reflect := a.prog.ImportedPackage("reflect"); reflect != nil { |
| 263 | rV := reflect.Object.Scope().Lookup("Value") |
| 264 | a.reflectValueObj = rV |
| 265 | a.reflectValueCall = a.prog.LookupMethod(rV.Type(), nil, "Call") |
| 266 | a.reflectType = reflect.Object.Scope().Lookup("Type").Type().(*types.Named) |
| 267 | a.reflectRtypeObj = reflect.Object.Scope().Lookup("rtype") |
| 268 | a.reflectRtypePtr = types.NewPointer(a.reflectRtypeObj.Type()) |
| 269 | |
| 270 | // Override flattening of reflect.Value, treating it like a basic type. |
| 271 | tReflectValue := a.reflectValueObj.Type() |
| 272 | a.flattenMemo[tReflectValue] = []*fieldInfo{{typ: tReflectValue}} |
| 273 | |
| 274 | // Override shouldTrack of reflect.Value and *reflect.rtype. |
| 275 | // Always track pointers of these types. |
| 276 | a.trackTypes[tReflectValue] = true |
| 277 | a.trackTypes[a.reflectRtypePtr] = true |
| 278 | |
| 279 | a.rtypes.SetHasher(a.hasher) |
| 280 | a.reflectZeros.SetHasher(a.hasher) |
| 281 | } |
| 282 | if runtime := a.prog.ImportedPackage("runtime"); runtime != nil { |
| 283 | a.runtimeSetFinalizer = runtime.Func("SetFinalizer") |
| 284 | } |
| 285 | a.computeTrackBits() |
| 286 | |
| 287 | a.generate() |
| 288 | a.showCounts() |
| 289 | |
| 290 | if optRenumber { |
| 291 | a.renumber() |
| 292 | } |
| 293 | |
| 294 | N := len(a.nodes) // excludes solver-created nodes |
| 295 | |
| 296 | if optHVN { |
| 297 | if debugHVNCrossCheck { |
| 298 | // Cross-check: run the solver once without |
| 299 | // optimization, once with, and compare the |
| 300 | // solutions. |
| 301 | savedConstraints := a.constraints |
| 302 | |
| 303 | a.solve() |
| 304 | a.dumpSolution("A.pts", N) |
| 305 | |
| 306 | // Restore. |
| 307 | a.constraints = savedConstraints |
| 308 | for _, n := range a.nodes { |
| 309 | n.solve = new(solverState) |
| 310 | } |
| 311 | a.nodes = a.nodes[:N] |
| 312 | |
| 313 | // rtypes is effectively part of the solver state. |
| 314 | a.rtypes = typeutil.Map{} |
| 315 | a.rtypes.SetHasher(a.hasher) |
| 316 | } |
| 317 | |
| 318 | a.hvn() |
| 319 | } |
| 320 | |
| 321 | if debugHVNCrossCheck { |
| 322 | runtime.GC() |
| 323 | runtime.GC() |
| 324 | } |
| 325 | |
| 326 | a.solve() |
| 327 | |
| 328 | // Compare solutions. |
| 329 | if optHVN && debugHVNCrossCheck { |
| 330 | a.dumpSolution("B.pts", N) |
| 331 | |
| 332 | if !diff("A.pts", "B.pts") { |
| 333 | return nil, fmt.Errorf("internal error: optimization changed solution") |
| 334 | } |
| 335 | } |
| 336 | |
| 337 | // Create callgraph.Nodes in deterministic order. |
| 338 | if cg := a.result.CallGraph; cg != nil { |
| 339 | for _, caller := range a.cgnodes { |
| 340 | cg.CreateNode(caller.fn) |
| 341 | } |
| 342 | } |
| 343 | |
| 344 | // Add dynamic edges to call graph. |
| 345 | var space [100]int |
| 346 | for _, caller := range a.cgnodes { |
| 347 | for _, site := range caller.sites { |
| 348 | for _, callee := range a.nodes[site.targets].solve.pts.AppendTo(space[:0]) { |
| 349 | a.callEdge(caller, site, nodeid(callee)) |
| 350 | } |
| 351 | } |
| 352 | } |
| 353 | |
| 354 | return a.result, nil |
| 355 | } |
| 356 | |
| 357 | // callEdge is called for each edge in the callgraph. |
| 358 | // calleeid is the callee's object node (has otFunction flag). |
| 359 | // |
| 360 | func (a *analysis) callEdge(caller *cgnode, site *callsite, calleeid nodeid) { |
| 361 | obj := a.nodes[calleeid].obj |
| 362 | if obj.flags&otFunction == 0 { |
| 363 | panic(fmt.Sprintf("callEdge %s -> n%d: not a function object", site, calleeid)) |
| 364 | } |
| 365 | callee := obj.cgn |
| 366 | |
| 367 | if cg := a.result.CallGraph; cg != nil { |
| 368 | // TODO(adonovan): opt: I would expect duplicate edges |
| 369 | // (to wrappers) to arise due to the elimination of |
| 370 | // context information, but I haven't observed any. |
| 371 | // Understand this better. |
| 372 | callgraph.AddEdge(cg.CreateNode(caller.fn), site.instr, cg.CreateNode(callee.fn)) |
| 373 | } |
| 374 | |
| 375 | if a.log != nil { |
| 376 | fmt.Fprintf(a.log, "\tcall edge %s -> %s\n", site, callee) |
| 377 | } |
| 378 | |
| 379 | // Warn about calls to non-intrinsic external functions. |
| 380 | // TODO(adonovan): de-dup these messages. |
| 381 | if fn := callee.fn; fn.Blocks == nil && a.findIntrinsic(fn) == nil { |
| 382 | a.warnf(site.pos(), "unsound call to unknown intrinsic: %s", fn) |
| 383 | a.warnf(fn.Pos(), " (declared here)") |
| 384 | } |
| 385 | } |
| 386 | |
| 387 | // dumpSolution writes the PTS solution to the specified file. |
| 388 | // |
| 389 | // It only dumps the nodes that existed before solving. The order in |
| 390 | // which solver-created nodes are created depends on pre-solver |
| 391 | // optimization, so we can't include them in the cross-check. |
| 392 | // |
| 393 | func (a *analysis) dumpSolution(filename string, N int) { |
| 394 | f, err := os.Create(filename) |
| 395 | if err != nil { |
| 396 | panic(err) |
| 397 | } |
| 398 | for id, n := range a.nodes[:N] { |
| 399 | if _, err := fmt.Fprintf(f, "pts(n%d) = {", id); err != nil { |
| 400 | panic(err) |
| 401 | } |
| 402 | var sep string |
| 403 | for _, l := range n.solve.pts.AppendTo(a.deltaSpace) { |
| 404 | if l >= N { |
| 405 | break |
| 406 | } |
| 407 | fmt.Fprintf(f, "%s%d", sep, l) |
| 408 | sep = " " |
| 409 | } |
| 410 | fmt.Fprintf(f, "} : %s\n", n.typ) |
| 411 | } |
| 412 | if err := f.Close(); err != nil { |
| 413 | panic(err) |
| 414 | } |
| 415 | } |
| 416 | |
| 417 | // showCounts logs the size of the constraint system. A typical |
| 418 | // optimized distribution is 65% copy, 13% load, 11% addr, 5% |
| 419 | // offsetAddr, 4% store, 2% others. |
| 420 | // |
| 421 | func (a *analysis) showCounts() { |
| 422 | if a.log != nil { |
| 423 | counts := make(map[reflect.Type]int) |
| 424 | for _, c := range a.constraints { |
| 425 | counts[reflect.TypeOf(c)]++ |
| 426 | } |
| 427 | fmt.Fprintf(a.log, "# constraints:\t%d\n", len(a.constraints)) |
| 428 | var lines []string |
| 429 | for t, n := range counts { |
| 430 | line := fmt.Sprintf("%7d (%2d%%)\t%s", n, 100*n/len(a.constraints), t) |
| 431 | lines = append(lines, line) |
| 432 | } |
| 433 | sort.Sort(sort.Reverse(sort.StringSlice(lines))) |
| 434 | for _, line := range lines { |
| 435 | fmt.Fprintf(a.log, "\t%s\n", line) |
| 436 | } |
| 437 | |
| 438 | fmt.Fprintf(a.log, "# nodes:\t%d\n", len(a.nodes)) |
| 439 | |
| 440 | // Show number of pointer equivalence classes. |
| 441 | m := make(map[*solverState]bool) |
| 442 | for _, n := range a.nodes { |
| 443 | m[n.solve] = true |
| 444 | } |
| 445 | fmt.Fprintf(a.log, "# ptsets:\t%d\n", len(m)) |
| 446 | } |
| 447 | } |