David Sehr | caacd11 | 2016-10-20 16:27:02 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2016 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | * |
| 16 | * Implementation file for control flow graph dumping for the dexdump utility. |
| 17 | */ |
| 18 | |
| 19 | #include "dexdump_cfg.h" |
| 20 | |
| 21 | #include <inttypes.h> |
Andreas Gampe | 8cf9cb3 | 2017-07-19 09:28:38 -0700 | [diff] [blame] | 22 | |
David Sehr | caacd11 | 2016-10-20 16:27:02 -0700 | [diff] [blame] | 23 | #include <map> |
Andreas Gampe | 8cf9cb3 | 2017-07-19 09:28:38 -0700 | [diff] [blame] | 24 | #include <ostream> |
David Sehr | caacd11 | 2016-10-20 16:27:02 -0700 | [diff] [blame] | 25 | #include <set> |
Andreas Gampe | 5794381 | 2017-12-06 21:39:13 -0800 | [diff] [blame] | 26 | #include <sstream> |
David Sehr | caacd11 | 2016-10-20 16:27:02 -0700 | [diff] [blame] | 27 | |
David Sehr | 0225f8e | 2018-01-31 08:52:24 +0000 | [diff] [blame] | 28 | #include "dex/code_item_accessors-inl.h" |
David Sehr | 9e734c7 | 2018-01-04 17:56:19 -0800 | [diff] [blame] | 29 | #include "dex/dex_file-inl.h" |
| 30 | #include "dex/dex_file_exception_helpers.h" |
| 31 | #include "dex/dex_instruction-inl.h" |
David Sehr | caacd11 | 2016-10-20 16:27:02 -0700 | [diff] [blame] | 32 | |
| 33 | namespace art { |
| 34 | |
| 35 | static void dumpMethodCFGImpl(const DexFile* dex_file, |
| 36 | uint32_t dex_method_idx, |
| 37 | const DexFile::CodeItem* code_item, |
| 38 | std::ostream& os) { |
| 39 | os << "digraph {\n"; |
| 40 | os << " # /* " << dex_file->PrettyMethod(dex_method_idx, true) << " */\n"; |
| 41 | |
Mathieu Chartier | 698ebbc | 2018-01-05 11:00:42 -0800 | [diff] [blame] | 42 | CodeItemDataAccessor accessor(*dex_file, code_item); |
Mathieu Chartier | 808c7a5 | 2017-12-15 11:19:33 -0800 | [diff] [blame] | 43 | |
David Sehr | caacd11 | 2016-10-20 16:27:02 -0700 | [diff] [blame] | 44 | std::set<uint32_t> dex_pc_is_branch_target; |
| 45 | { |
| 46 | // Go and populate. |
Mathieu Chartier | 808c7a5 | 2017-12-15 11:19:33 -0800 | [diff] [blame] | 47 | for (const DexInstructionPcPair& pair : accessor) { |
| 48 | const Instruction* inst = &pair.Inst(); |
David Sehr | caacd11 | 2016-10-20 16:27:02 -0700 | [diff] [blame] | 49 | if (inst->IsBranch()) { |
Mathieu Chartier | 808c7a5 | 2017-12-15 11:19:33 -0800 | [diff] [blame] | 50 | dex_pc_is_branch_target.insert(pair.DexPc() + inst->GetTargetOffset()); |
David Sehr | caacd11 | 2016-10-20 16:27:02 -0700 | [diff] [blame] | 51 | } else if (inst->IsSwitch()) { |
Mathieu Chartier | 808c7a5 | 2017-12-15 11:19:33 -0800 | [diff] [blame] | 52 | const uint16_t* insns = reinterpret_cast<const uint16_t*>(inst); |
David Sehr | caacd11 | 2016-10-20 16:27:02 -0700 | [diff] [blame] | 53 | int32_t switch_offset = insns[1] | (static_cast<int32_t>(insns[2]) << 16); |
| 54 | const uint16_t* switch_insns = insns + switch_offset; |
| 55 | uint32_t switch_count = switch_insns[1]; |
| 56 | int32_t targets_offset; |
| 57 | if ((*insns & 0xff) == Instruction::PACKED_SWITCH) { |
| 58 | /* 0=sig, 1=count, 2/3=firstKey */ |
| 59 | targets_offset = 4; |
| 60 | } else { |
| 61 | /* 0=sig, 1=count, 2..count*2 = keys */ |
| 62 | targets_offset = 2 + 2 * switch_count; |
| 63 | } |
| 64 | for (uint32_t targ = 0; targ < switch_count; targ++) { |
| 65 | int32_t offset = |
| 66 | static_cast<int32_t>(switch_insns[targets_offset + targ * 2]) | |
| 67 | static_cast<int32_t>(switch_insns[targets_offset + targ * 2 + 1] << 16); |
Mathieu Chartier | 808c7a5 | 2017-12-15 11:19:33 -0800 | [diff] [blame] | 68 | dex_pc_is_branch_target.insert(pair.DexPc() + offset); |
David Sehr | caacd11 | 2016-10-20 16:27:02 -0700 | [diff] [blame] | 69 | } |
| 70 | } |
| 71 | } |
| 72 | } |
| 73 | |
| 74 | // Create nodes for "basic blocks." |
| 75 | std::map<uint32_t, uint32_t> dex_pc_to_node_id; // This only has entries for block starts. |
| 76 | std::map<uint32_t, uint32_t> dex_pc_to_incl_id; // This has entries for all dex pcs. |
| 77 | |
| 78 | { |
David Sehr | caacd11 | 2016-10-20 16:27:02 -0700 | [diff] [blame] | 79 | bool first_in_block = true; |
| 80 | bool force_new_block = false; |
Mathieu Chartier | 808c7a5 | 2017-12-15 11:19:33 -0800 | [diff] [blame] | 81 | for (const DexInstructionPcPair& pair : accessor) { |
| 82 | const uint32_t dex_pc = pair.DexPc(); |
David Sehr | caacd11 | 2016-10-20 16:27:02 -0700 | [diff] [blame] | 83 | if (dex_pc == 0 || |
| 84 | (dex_pc_is_branch_target.find(dex_pc) != dex_pc_is_branch_target.end()) || |
| 85 | force_new_block) { |
| 86 | uint32_t id = dex_pc_to_node_id.size(); |
| 87 | if (id > 0) { |
| 88 | // End last node. |
| 89 | os << "}\"];\n"; |
| 90 | } |
| 91 | // Start next node. |
| 92 | os << " node" << id << " [shape=record,label=\"{"; |
| 93 | dex_pc_to_node_id.insert(std::make_pair(dex_pc, id)); |
| 94 | first_in_block = true; |
| 95 | force_new_block = false; |
| 96 | } |
| 97 | |
| 98 | // Register instruction. |
| 99 | dex_pc_to_incl_id.insert(std::make_pair(dex_pc, dex_pc_to_node_id.size() - 1)); |
| 100 | |
| 101 | // Print instruction. |
| 102 | if (!first_in_block) { |
| 103 | os << " | "; |
| 104 | } else { |
| 105 | first_in_block = false; |
| 106 | } |
| 107 | |
| 108 | // Dump the instruction. Need to escape '"', '<', '>', '{' and '}'. |
| 109 | os << "<" << "p" << dex_pc << ">"; |
| 110 | os << " 0x" << std::hex << dex_pc << std::dec << ": "; |
Mathieu Chartier | 808c7a5 | 2017-12-15 11:19:33 -0800 | [diff] [blame] | 111 | std::string inst_str = pair.Inst().DumpString(dex_file); |
David Sehr | caacd11 | 2016-10-20 16:27:02 -0700 | [diff] [blame] | 112 | size_t cur_start = 0; // It's OK to start at zero, instruction dumps don't start with chars |
| 113 | // we need to escape. |
| 114 | while (cur_start != std::string::npos) { |
| 115 | size_t next_escape = inst_str.find_first_of("\"{}<>", cur_start + 1); |
| 116 | if (next_escape == std::string::npos) { |
| 117 | os << inst_str.substr(cur_start, inst_str.size() - cur_start); |
| 118 | break; |
| 119 | } else { |
| 120 | os << inst_str.substr(cur_start, next_escape - cur_start); |
| 121 | // Escape all necessary characters. |
| 122 | while (next_escape < inst_str.size()) { |
Vladimir Marko | 35d5b8a | 2018-07-03 09:18:32 +0100 | [diff] [blame] | 123 | char c = inst_str[next_escape]; |
David Sehr | caacd11 | 2016-10-20 16:27:02 -0700 | [diff] [blame] | 124 | if (c == '"' || c == '{' || c == '}' || c == '<' || c == '>') { |
| 125 | os << '\\' << c; |
| 126 | } else { |
| 127 | break; |
| 128 | } |
| 129 | next_escape++; |
| 130 | } |
| 131 | if (next_escape >= inst_str.size()) { |
| 132 | next_escape = std::string::npos; |
| 133 | } |
| 134 | cur_start = next_escape; |
| 135 | } |
| 136 | } |
| 137 | |
| 138 | // Force a new block for some fall-throughs and some instructions that terminate the "local" |
| 139 | // control flow. |
Mathieu Chartier | 808c7a5 | 2017-12-15 11:19:33 -0800 | [diff] [blame] | 140 | force_new_block = pair.Inst().IsSwitch() || pair.Inst().IsBasicBlockEnd(); |
David Sehr | caacd11 | 2016-10-20 16:27:02 -0700 | [diff] [blame] | 141 | } |
| 142 | // Close last node. |
| 143 | if (dex_pc_to_node_id.size() > 0) { |
| 144 | os << "}\"];\n"; |
| 145 | } |
| 146 | } |
| 147 | |
| 148 | // Create edges between them. |
| 149 | { |
| 150 | std::ostringstream regular_edges; |
| 151 | std::ostringstream taken_edges; |
| 152 | std::ostringstream exception_edges; |
| 153 | |
| 154 | // Common set of exception edges. |
| 155 | std::set<uint32_t> exception_targets; |
| 156 | |
| 157 | // These blocks (given by the first dex pc) need exception per dex-pc handling in a second |
| 158 | // pass. In the first pass we try and see whether we can use a common set of edges. |
| 159 | std::set<uint32_t> blocks_with_detailed_exceptions; |
| 160 | |
| 161 | { |
| 162 | uint32_t last_node_id = std::numeric_limits<uint32_t>::max(); |
| 163 | uint32_t old_dex_pc = 0; |
| 164 | uint32_t block_start_dex_pc = std::numeric_limits<uint32_t>::max(); |
Mathieu Chartier | 808c7a5 | 2017-12-15 11:19:33 -0800 | [diff] [blame] | 165 | for (const DexInstructionPcPair& pair : accessor) { |
| 166 | const Instruction* inst = &pair.Inst(); |
| 167 | const uint32_t dex_pc = pair.DexPc(); |
David Sehr | caacd11 | 2016-10-20 16:27:02 -0700 | [diff] [blame] | 168 | { |
| 169 | auto it = dex_pc_to_node_id.find(dex_pc); |
| 170 | if (it != dex_pc_to_node_id.end()) { |
| 171 | if (!exception_targets.empty()) { |
| 172 | // It seems the last block had common exception handlers. Add the exception edges now. |
| 173 | uint32_t node_id = dex_pc_to_node_id.find(block_start_dex_pc)->second; |
| 174 | for (uint32_t handler_pc : exception_targets) { |
| 175 | auto node_id_it = dex_pc_to_incl_id.find(handler_pc); |
| 176 | if (node_id_it != dex_pc_to_incl_id.end()) { |
| 177 | exception_edges << " node" << node_id |
| 178 | << " -> node" << node_id_it->second << ":p" << handler_pc |
| 179 | << ";\n"; |
| 180 | } |
| 181 | } |
| 182 | exception_targets.clear(); |
| 183 | } |
| 184 | |
| 185 | block_start_dex_pc = dex_pc; |
| 186 | |
| 187 | // Seems to be a fall-through, connect to last_node_id. May be spurious edges for things |
| 188 | // like switch data. |
| 189 | uint32_t old_last = last_node_id; |
| 190 | last_node_id = it->second; |
| 191 | if (old_last != std::numeric_limits<uint32_t>::max()) { |
| 192 | regular_edges << " node" << old_last << ":p" << old_dex_pc |
| 193 | << " -> node" << last_node_id << ":p" << dex_pc |
| 194 | << ";\n"; |
| 195 | } |
| 196 | } |
| 197 | |
| 198 | // Look at the exceptions of the first entry. |
Mathieu Chartier | dc578c7 | 2017-12-27 11:51:45 -0800 | [diff] [blame] | 199 | CatchHandlerIterator catch_it(accessor, dex_pc); |
David Sehr | caacd11 | 2016-10-20 16:27:02 -0700 | [diff] [blame] | 200 | for (; catch_it.HasNext(); catch_it.Next()) { |
| 201 | exception_targets.insert(catch_it.GetHandlerAddress()); |
| 202 | } |
| 203 | } |
| 204 | |
| 205 | // Handle instruction. |
| 206 | |
| 207 | // Branch: something with at most two targets. |
| 208 | if (inst->IsBranch()) { |
| 209 | const int32_t offset = inst->GetTargetOffset(); |
| 210 | const bool conditional = !inst->IsUnconditional(); |
| 211 | |
| 212 | auto target_it = dex_pc_to_node_id.find(dex_pc + offset); |
| 213 | if (target_it != dex_pc_to_node_id.end()) { |
| 214 | taken_edges << " node" << last_node_id << ":p" << dex_pc |
| 215 | << " -> node" << target_it->second << ":p" << (dex_pc + offset) |
| 216 | << ";\n"; |
| 217 | } |
| 218 | if (!conditional) { |
| 219 | // No fall-through. |
| 220 | last_node_id = std::numeric_limits<uint32_t>::max(); |
| 221 | } |
| 222 | } else if (inst->IsSwitch()) { |
| 223 | // TODO: Iterate through all switch targets. |
Mathieu Chartier | 808c7a5 | 2017-12-15 11:19:33 -0800 | [diff] [blame] | 224 | const uint16_t* insns = reinterpret_cast<const uint16_t*>(inst); |
David Sehr | caacd11 | 2016-10-20 16:27:02 -0700 | [diff] [blame] | 225 | /* make sure the start of the switch is in range */ |
| 226 | int32_t switch_offset = insns[1] | (static_cast<int32_t>(insns[2]) << 16); |
| 227 | /* offset to switch table is a relative branch-style offset */ |
| 228 | const uint16_t* switch_insns = insns + switch_offset; |
| 229 | uint32_t switch_count = switch_insns[1]; |
| 230 | int32_t targets_offset; |
| 231 | if ((*insns & 0xff) == Instruction::PACKED_SWITCH) { |
| 232 | /* 0=sig, 1=count, 2/3=firstKey */ |
| 233 | targets_offset = 4; |
| 234 | } else { |
| 235 | /* 0=sig, 1=count, 2..count*2 = keys */ |
| 236 | targets_offset = 2 + 2 * switch_count; |
| 237 | } |
| 238 | /* make sure the end of the switch is in range */ |
| 239 | /* verify each switch target */ |
| 240 | for (uint32_t targ = 0; targ < switch_count; targ++) { |
| 241 | int32_t offset = |
| 242 | static_cast<int32_t>(switch_insns[targets_offset + targ * 2]) | |
| 243 | static_cast<int32_t>(switch_insns[targets_offset + targ * 2 + 1] << 16); |
| 244 | int32_t abs_offset = dex_pc + offset; |
| 245 | auto target_it = dex_pc_to_node_id.find(abs_offset); |
| 246 | if (target_it != dex_pc_to_node_id.end()) { |
| 247 | // TODO: value label. |
| 248 | taken_edges << " node" << last_node_id << ":p" << dex_pc |
| 249 | << " -> node" << target_it->second << ":p" << (abs_offset) |
| 250 | << ";\n"; |
| 251 | } |
| 252 | } |
| 253 | } |
| 254 | |
| 255 | // Exception edges. If this is not the first instruction in the block |
| 256 | if (block_start_dex_pc != dex_pc) { |
| 257 | std::set<uint32_t> current_handler_pcs; |
Mathieu Chartier | dc578c7 | 2017-12-27 11:51:45 -0800 | [diff] [blame] | 258 | CatchHandlerIterator catch_it(accessor, dex_pc); |
David Sehr | caacd11 | 2016-10-20 16:27:02 -0700 | [diff] [blame] | 259 | for (; catch_it.HasNext(); catch_it.Next()) { |
| 260 | current_handler_pcs.insert(catch_it.GetHandlerAddress()); |
| 261 | } |
| 262 | if (current_handler_pcs != exception_targets) { |
| 263 | exception_targets.clear(); // Clear so we don't do something at the end. |
| 264 | blocks_with_detailed_exceptions.insert(block_start_dex_pc); |
| 265 | } |
| 266 | } |
| 267 | |
| 268 | if (inst->IsReturn() || |
| 269 | (inst->Opcode() == Instruction::THROW) || |
| 270 | (inst->IsBranch() && inst->IsUnconditional())) { |
| 271 | // No fall-through. |
| 272 | last_node_id = std::numeric_limits<uint32_t>::max(); |
| 273 | } |
Mathieu Chartier | 808c7a5 | 2017-12-15 11:19:33 -0800 | [diff] [blame] | 274 | old_dex_pc = pair.DexPc(); |
David Sehr | caacd11 | 2016-10-20 16:27:02 -0700 | [diff] [blame] | 275 | } |
| 276 | // Finish up the last block, if it had common exceptions. |
| 277 | if (!exception_targets.empty()) { |
| 278 | // It seems the last block had common exception handlers. Add the exception edges now. |
| 279 | uint32_t node_id = dex_pc_to_node_id.find(block_start_dex_pc)->second; |
| 280 | for (uint32_t handler_pc : exception_targets) { |
| 281 | auto node_id_it = dex_pc_to_incl_id.find(handler_pc); |
| 282 | if (node_id_it != dex_pc_to_incl_id.end()) { |
| 283 | exception_edges << " node" << node_id |
| 284 | << " -> node" << node_id_it->second << ":p" << handler_pc |
| 285 | << ";\n"; |
| 286 | } |
| 287 | } |
| 288 | exception_targets.clear(); |
| 289 | } |
| 290 | } |
| 291 | |
| 292 | // Second pass for detailed exception blocks. |
| 293 | // TODO |
| 294 | // Exception edges. If this is not the first instruction in the block |
| 295 | for (uint32_t dex_pc : blocks_with_detailed_exceptions) { |
Mathieu Chartier | 808c7a5 | 2017-12-15 11:19:33 -0800 | [diff] [blame] | 296 | const Instruction* inst = &accessor.InstructionAt(dex_pc); |
David Sehr | caacd11 | 2016-10-20 16:27:02 -0700 | [diff] [blame] | 297 | uint32_t this_node_id = dex_pc_to_incl_id.find(dex_pc)->second; |
| 298 | while (true) { |
Mathieu Chartier | dc578c7 | 2017-12-27 11:51:45 -0800 | [diff] [blame] | 299 | CatchHandlerIterator catch_it(accessor, dex_pc); |
David Sehr | caacd11 | 2016-10-20 16:27:02 -0700 | [diff] [blame] | 300 | if (catch_it.HasNext()) { |
| 301 | std::set<uint32_t> handled_targets; |
| 302 | for (; catch_it.HasNext(); catch_it.Next()) { |
| 303 | uint32_t handler_pc = catch_it.GetHandlerAddress(); |
| 304 | auto it = handled_targets.find(handler_pc); |
| 305 | if (it == handled_targets.end()) { |
| 306 | auto node_id_it = dex_pc_to_incl_id.find(handler_pc); |
| 307 | if (node_id_it != dex_pc_to_incl_id.end()) { |
| 308 | exception_edges << " node" << this_node_id << ":p" << dex_pc |
| 309 | << " -> node" << node_id_it->second << ":p" << handler_pc |
| 310 | << ";\n"; |
| 311 | } |
| 312 | |
| 313 | // Mark as done. |
| 314 | handled_targets.insert(handler_pc); |
| 315 | } |
| 316 | } |
| 317 | } |
| 318 | if (inst->IsBasicBlockEnd()) { |
| 319 | break; |
| 320 | } |
| 321 | |
| 322 | // Loop update. Have a break-out if the next instruction is a branch target and thus in |
| 323 | // another block. |
| 324 | dex_pc += inst->SizeInCodeUnits(); |
Mathieu Chartier | 808c7a5 | 2017-12-15 11:19:33 -0800 | [diff] [blame] | 325 | if (dex_pc >= accessor.InsnsSizeInCodeUnits()) { |
David Sehr | caacd11 | 2016-10-20 16:27:02 -0700 | [diff] [blame] | 326 | break; |
| 327 | } |
| 328 | if (dex_pc_to_node_id.find(dex_pc) != dex_pc_to_node_id.end()) { |
| 329 | break; |
| 330 | } |
| 331 | inst = inst->Next(); |
| 332 | } |
| 333 | } |
| 334 | |
| 335 | // Write out the sub-graphs to make edges styled. |
| 336 | os << "\n"; |
| 337 | os << " subgraph regular_edges {\n"; |
| 338 | os << " edge [color=\"#000000\",weight=.3,len=3];\n\n"; |
| 339 | os << " " << regular_edges.str() << "\n"; |
| 340 | os << " }\n\n"; |
| 341 | |
| 342 | os << " subgraph taken_edges {\n"; |
| 343 | os << " edge [color=\"#00FF00\",weight=.3,len=3];\n\n"; |
| 344 | os << " " << taken_edges.str() << "\n"; |
| 345 | os << " }\n\n"; |
| 346 | |
| 347 | os << " subgraph exception_edges {\n"; |
| 348 | os << " edge [color=\"#FF0000\",weight=.3,len=3];\n\n"; |
| 349 | os << " " << exception_edges.str() << "\n"; |
| 350 | os << " }\n\n"; |
| 351 | } |
| 352 | |
| 353 | os << "}\n"; |
| 354 | } |
| 355 | |
| 356 | void DumpMethodCFG(const DexFile* dex_file, uint32_t dex_method_idx, std::ostream& os) { |
| 357 | // This is painful, we need to find the code item. That means finding the class, and then |
| 358 | // iterating the table. |
| 359 | if (dex_method_idx >= dex_file->NumMethodIds()) { |
| 360 | os << "Could not find method-idx."; |
| 361 | return; |
| 362 | } |
| 363 | const DexFile::MethodId& method_id = dex_file->GetMethodId(dex_method_idx); |
| 364 | |
| 365 | const DexFile::ClassDef* class_def = dex_file->FindClassDef(method_id.class_idx_); |
| 366 | if (class_def == nullptr) { |
| 367 | os << "Could not find class-def."; |
| 368 | return; |
| 369 | } |
| 370 | |
| 371 | const uint8_t* class_data = dex_file->GetClassData(*class_def); |
| 372 | if (class_data == nullptr) { |
| 373 | os << "No class data."; |
| 374 | return; |
| 375 | } |
| 376 | |
| 377 | ClassDataItemIterator it(*dex_file, class_data); |
Mathieu Chartier | e17cf24 | 2017-06-19 11:05:51 -0700 | [diff] [blame] | 378 | it.SkipAllFields(); |
David Sehr | caacd11 | 2016-10-20 16:27:02 -0700 | [diff] [blame] | 379 | |
| 380 | // Find method, and dump it. |
Mathieu Chartier | b7c273c | 2017-11-10 18:07:56 -0800 | [diff] [blame] | 381 | while (it.HasNextMethod()) { |
David Sehr | caacd11 | 2016-10-20 16:27:02 -0700 | [diff] [blame] | 382 | uint32_t method_idx = it.GetMemberIndex(); |
| 383 | if (method_idx == dex_method_idx) { |
| 384 | dumpMethodCFGImpl(dex_file, dex_method_idx, it.GetMethodCodeItem(), os); |
| 385 | return; |
| 386 | } |
| 387 | it.Next(); |
| 388 | } |
| 389 | |
| 390 | // Otherwise complain. |
| 391 | os << "Something went wrong, didn't find the method in the class data."; |
| 392 | } |
| 393 | |
| 394 | } // namespace art |