David Sehr | caacd11 | 2016-10-20 16:27:02 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2016 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | * |
| 16 | * Implementation file for control flow graph dumping for the dexdump utility. |
| 17 | */ |
| 18 | |
| 19 | #include "dexdump_cfg.h" |
| 20 | |
| 21 | #include <inttypes.h> |
Andreas Gampe | 8cf9cb3 | 2017-07-19 09:28:38 -0700 | [diff] [blame] | 22 | |
David Sehr | caacd11 | 2016-10-20 16:27:02 -0700 | [diff] [blame] | 23 | #include <map> |
Andreas Gampe | 8cf9cb3 | 2017-07-19 09:28:38 -0700 | [diff] [blame] | 24 | #include <ostream> |
David Sehr | caacd11 | 2016-10-20 16:27:02 -0700 | [diff] [blame] | 25 | #include <set> |
| 26 | |
| 27 | #include "dex_file-inl.h" |
| 28 | #include "dex_instruction-inl.h" |
| 29 | |
| 30 | namespace art { |
| 31 | |
| 32 | static void dumpMethodCFGImpl(const DexFile* dex_file, |
| 33 | uint32_t dex_method_idx, |
| 34 | const DexFile::CodeItem* code_item, |
| 35 | std::ostream& os) { |
| 36 | os << "digraph {\n"; |
| 37 | os << " # /* " << dex_file->PrettyMethod(dex_method_idx, true) << " */\n"; |
| 38 | |
| 39 | std::set<uint32_t> dex_pc_is_branch_target; |
| 40 | { |
| 41 | // Go and populate. |
| 42 | const Instruction* inst = Instruction::At(code_item->insns_); |
| 43 | for (uint32_t dex_pc = 0; |
| 44 | dex_pc < code_item->insns_size_in_code_units_; |
| 45 | dex_pc += inst->SizeInCodeUnits(), inst = inst->Next()) { |
| 46 | if (inst->IsBranch()) { |
| 47 | dex_pc_is_branch_target.insert(dex_pc + inst->GetTargetOffset()); |
| 48 | } else if (inst->IsSwitch()) { |
| 49 | const uint16_t* insns = code_item->insns_ + dex_pc; |
| 50 | int32_t switch_offset = insns[1] | (static_cast<int32_t>(insns[2]) << 16); |
| 51 | const uint16_t* switch_insns = insns + switch_offset; |
| 52 | uint32_t switch_count = switch_insns[1]; |
| 53 | int32_t targets_offset; |
| 54 | if ((*insns & 0xff) == Instruction::PACKED_SWITCH) { |
| 55 | /* 0=sig, 1=count, 2/3=firstKey */ |
| 56 | targets_offset = 4; |
| 57 | } else { |
| 58 | /* 0=sig, 1=count, 2..count*2 = keys */ |
| 59 | targets_offset = 2 + 2 * switch_count; |
| 60 | } |
| 61 | for (uint32_t targ = 0; targ < switch_count; targ++) { |
| 62 | int32_t offset = |
| 63 | static_cast<int32_t>(switch_insns[targets_offset + targ * 2]) | |
| 64 | static_cast<int32_t>(switch_insns[targets_offset + targ * 2 + 1] << 16); |
| 65 | dex_pc_is_branch_target.insert(dex_pc + offset); |
| 66 | } |
| 67 | } |
| 68 | } |
| 69 | } |
| 70 | |
| 71 | // Create nodes for "basic blocks." |
| 72 | std::map<uint32_t, uint32_t> dex_pc_to_node_id; // This only has entries for block starts. |
| 73 | std::map<uint32_t, uint32_t> dex_pc_to_incl_id; // This has entries for all dex pcs. |
| 74 | |
| 75 | { |
| 76 | const Instruction* inst = Instruction::At(code_item->insns_); |
| 77 | bool first_in_block = true; |
| 78 | bool force_new_block = false; |
| 79 | for (uint32_t dex_pc = 0; |
| 80 | dex_pc < code_item->insns_size_in_code_units_; |
| 81 | dex_pc += inst->SizeInCodeUnits(), inst = inst->Next()) { |
| 82 | if (dex_pc == 0 || |
| 83 | (dex_pc_is_branch_target.find(dex_pc) != dex_pc_is_branch_target.end()) || |
| 84 | force_new_block) { |
| 85 | uint32_t id = dex_pc_to_node_id.size(); |
| 86 | if (id > 0) { |
| 87 | // End last node. |
| 88 | os << "}\"];\n"; |
| 89 | } |
| 90 | // Start next node. |
| 91 | os << " node" << id << " [shape=record,label=\"{"; |
| 92 | dex_pc_to_node_id.insert(std::make_pair(dex_pc, id)); |
| 93 | first_in_block = true; |
| 94 | force_new_block = false; |
| 95 | } |
| 96 | |
| 97 | // Register instruction. |
| 98 | dex_pc_to_incl_id.insert(std::make_pair(dex_pc, dex_pc_to_node_id.size() - 1)); |
| 99 | |
| 100 | // Print instruction. |
| 101 | if (!first_in_block) { |
| 102 | os << " | "; |
| 103 | } else { |
| 104 | first_in_block = false; |
| 105 | } |
| 106 | |
| 107 | // Dump the instruction. Need to escape '"', '<', '>', '{' and '}'. |
| 108 | os << "<" << "p" << dex_pc << ">"; |
| 109 | os << " 0x" << std::hex << dex_pc << std::dec << ": "; |
| 110 | std::string inst_str = inst->DumpString(dex_file); |
| 111 | size_t cur_start = 0; // It's OK to start at zero, instruction dumps don't start with chars |
| 112 | // we need to escape. |
| 113 | while (cur_start != std::string::npos) { |
| 114 | size_t next_escape = inst_str.find_first_of("\"{}<>", cur_start + 1); |
| 115 | if (next_escape == std::string::npos) { |
| 116 | os << inst_str.substr(cur_start, inst_str.size() - cur_start); |
| 117 | break; |
| 118 | } else { |
| 119 | os << inst_str.substr(cur_start, next_escape - cur_start); |
| 120 | // Escape all necessary characters. |
| 121 | while (next_escape < inst_str.size()) { |
| 122 | char c = inst_str.at(next_escape); |
| 123 | if (c == '"' || c == '{' || c == '}' || c == '<' || c == '>') { |
| 124 | os << '\\' << c; |
| 125 | } else { |
| 126 | break; |
| 127 | } |
| 128 | next_escape++; |
| 129 | } |
| 130 | if (next_escape >= inst_str.size()) { |
| 131 | next_escape = std::string::npos; |
| 132 | } |
| 133 | cur_start = next_escape; |
| 134 | } |
| 135 | } |
| 136 | |
| 137 | // Force a new block for some fall-throughs and some instructions that terminate the "local" |
| 138 | // control flow. |
| 139 | force_new_block = inst->IsSwitch() || inst->IsBasicBlockEnd(); |
| 140 | } |
| 141 | // Close last node. |
| 142 | if (dex_pc_to_node_id.size() > 0) { |
| 143 | os << "}\"];\n"; |
| 144 | } |
| 145 | } |
| 146 | |
| 147 | // Create edges between them. |
| 148 | { |
| 149 | std::ostringstream regular_edges; |
| 150 | std::ostringstream taken_edges; |
| 151 | std::ostringstream exception_edges; |
| 152 | |
| 153 | // Common set of exception edges. |
| 154 | std::set<uint32_t> exception_targets; |
| 155 | |
| 156 | // These blocks (given by the first dex pc) need exception per dex-pc handling in a second |
| 157 | // pass. In the first pass we try and see whether we can use a common set of edges. |
| 158 | std::set<uint32_t> blocks_with_detailed_exceptions; |
| 159 | |
| 160 | { |
| 161 | uint32_t last_node_id = std::numeric_limits<uint32_t>::max(); |
| 162 | uint32_t old_dex_pc = 0; |
| 163 | uint32_t block_start_dex_pc = std::numeric_limits<uint32_t>::max(); |
| 164 | const Instruction* inst = Instruction::At(code_item->insns_); |
| 165 | for (uint32_t dex_pc = 0; |
| 166 | dex_pc < code_item->insns_size_in_code_units_; |
| 167 | old_dex_pc = dex_pc, dex_pc += inst->SizeInCodeUnits(), inst = inst->Next()) { |
| 168 | { |
| 169 | auto it = dex_pc_to_node_id.find(dex_pc); |
| 170 | if (it != dex_pc_to_node_id.end()) { |
| 171 | if (!exception_targets.empty()) { |
| 172 | // It seems the last block had common exception handlers. Add the exception edges now. |
| 173 | uint32_t node_id = dex_pc_to_node_id.find(block_start_dex_pc)->second; |
| 174 | for (uint32_t handler_pc : exception_targets) { |
| 175 | auto node_id_it = dex_pc_to_incl_id.find(handler_pc); |
| 176 | if (node_id_it != dex_pc_to_incl_id.end()) { |
| 177 | exception_edges << " node" << node_id |
| 178 | << " -> node" << node_id_it->second << ":p" << handler_pc |
| 179 | << ";\n"; |
| 180 | } |
| 181 | } |
| 182 | exception_targets.clear(); |
| 183 | } |
| 184 | |
| 185 | block_start_dex_pc = dex_pc; |
| 186 | |
| 187 | // Seems to be a fall-through, connect to last_node_id. May be spurious edges for things |
| 188 | // like switch data. |
| 189 | uint32_t old_last = last_node_id; |
| 190 | last_node_id = it->second; |
| 191 | if (old_last != std::numeric_limits<uint32_t>::max()) { |
| 192 | regular_edges << " node" << old_last << ":p" << old_dex_pc |
| 193 | << " -> node" << last_node_id << ":p" << dex_pc |
| 194 | << ";\n"; |
| 195 | } |
| 196 | } |
| 197 | |
| 198 | // Look at the exceptions of the first entry. |
| 199 | CatchHandlerIterator catch_it(*code_item, dex_pc); |
| 200 | for (; catch_it.HasNext(); catch_it.Next()) { |
| 201 | exception_targets.insert(catch_it.GetHandlerAddress()); |
| 202 | } |
| 203 | } |
| 204 | |
| 205 | // Handle instruction. |
| 206 | |
| 207 | // Branch: something with at most two targets. |
| 208 | if (inst->IsBranch()) { |
| 209 | const int32_t offset = inst->GetTargetOffset(); |
| 210 | const bool conditional = !inst->IsUnconditional(); |
| 211 | |
| 212 | auto target_it = dex_pc_to_node_id.find(dex_pc + offset); |
| 213 | if (target_it != dex_pc_to_node_id.end()) { |
| 214 | taken_edges << " node" << last_node_id << ":p" << dex_pc |
| 215 | << " -> node" << target_it->second << ":p" << (dex_pc + offset) |
| 216 | << ";\n"; |
| 217 | } |
| 218 | if (!conditional) { |
| 219 | // No fall-through. |
| 220 | last_node_id = std::numeric_limits<uint32_t>::max(); |
| 221 | } |
| 222 | } else if (inst->IsSwitch()) { |
| 223 | // TODO: Iterate through all switch targets. |
| 224 | const uint16_t* insns = code_item->insns_ + dex_pc; |
| 225 | /* make sure the start of the switch is in range */ |
| 226 | int32_t switch_offset = insns[1] | (static_cast<int32_t>(insns[2]) << 16); |
| 227 | /* offset to switch table is a relative branch-style offset */ |
| 228 | const uint16_t* switch_insns = insns + switch_offset; |
| 229 | uint32_t switch_count = switch_insns[1]; |
| 230 | int32_t targets_offset; |
| 231 | if ((*insns & 0xff) == Instruction::PACKED_SWITCH) { |
| 232 | /* 0=sig, 1=count, 2/3=firstKey */ |
| 233 | targets_offset = 4; |
| 234 | } else { |
| 235 | /* 0=sig, 1=count, 2..count*2 = keys */ |
| 236 | targets_offset = 2 + 2 * switch_count; |
| 237 | } |
| 238 | /* make sure the end of the switch is in range */ |
| 239 | /* verify each switch target */ |
| 240 | for (uint32_t targ = 0; targ < switch_count; targ++) { |
| 241 | int32_t offset = |
| 242 | static_cast<int32_t>(switch_insns[targets_offset + targ * 2]) | |
| 243 | static_cast<int32_t>(switch_insns[targets_offset + targ * 2 + 1] << 16); |
| 244 | int32_t abs_offset = dex_pc + offset; |
| 245 | auto target_it = dex_pc_to_node_id.find(abs_offset); |
| 246 | if (target_it != dex_pc_to_node_id.end()) { |
| 247 | // TODO: value label. |
| 248 | taken_edges << " node" << last_node_id << ":p" << dex_pc |
| 249 | << " -> node" << target_it->second << ":p" << (abs_offset) |
| 250 | << ";\n"; |
| 251 | } |
| 252 | } |
| 253 | } |
| 254 | |
| 255 | // Exception edges. If this is not the first instruction in the block |
| 256 | if (block_start_dex_pc != dex_pc) { |
| 257 | std::set<uint32_t> current_handler_pcs; |
| 258 | CatchHandlerIterator catch_it(*code_item, dex_pc); |
| 259 | for (; catch_it.HasNext(); catch_it.Next()) { |
| 260 | current_handler_pcs.insert(catch_it.GetHandlerAddress()); |
| 261 | } |
| 262 | if (current_handler_pcs != exception_targets) { |
| 263 | exception_targets.clear(); // Clear so we don't do something at the end. |
| 264 | blocks_with_detailed_exceptions.insert(block_start_dex_pc); |
| 265 | } |
| 266 | } |
| 267 | |
| 268 | if (inst->IsReturn() || |
| 269 | (inst->Opcode() == Instruction::THROW) || |
| 270 | (inst->IsBranch() && inst->IsUnconditional())) { |
| 271 | // No fall-through. |
| 272 | last_node_id = std::numeric_limits<uint32_t>::max(); |
| 273 | } |
| 274 | } |
| 275 | // Finish up the last block, if it had common exceptions. |
| 276 | if (!exception_targets.empty()) { |
| 277 | // It seems the last block had common exception handlers. Add the exception edges now. |
| 278 | uint32_t node_id = dex_pc_to_node_id.find(block_start_dex_pc)->second; |
| 279 | for (uint32_t handler_pc : exception_targets) { |
| 280 | auto node_id_it = dex_pc_to_incl_id.find(handler_pc); |
| 281 | if (node_id_it != dex_pc_to_incl_id.end()) { |
| 282 | exception_edges << " node" << node_id |
| 283 | << " -> node" << node_id_it->second << ":p" << handler_pc |
| 284 | << ";\n"; |
| 285 | } |
| 286 | } |
| 287 | exception_targets.clear(); |
| 288 | } |
| 289 | } |
| 290 | |
| 291 | // Second pass for detailed exception blocks. |
| 292 | // TODO |
| 293 | // Exception edges. If this is not the first instruction in the block |
| 294 | for (uint32_t dex_pc : blocks_with_detailed_exceptions) { |
| 295 | const Instruction* inst = Instruction::At(&code_item->insns_[dex_pc]); |
| 296 | uint32_t this_node_id = dex_pc_to_incl_id.find(dex_pc)->second; |
| 297 | while (true) { |
| 298 | CatchHandlerIterator catch_it(*code_item, dex_pc); |
| 299 | if (catch_it.HasNext()) { |
| 300 | std::set<uint32_t> handled_targets; |
| 301 | for (; catch_it.HasNext(); catch_it.Next()) { |
| 302 | uint32_t handler_pc = catch_it.GetHandlerAddress(); |
| 303 | auto it = handled_targets.find(handler_pc); |
| 304 | if (it == handled_targets.end()) { |
| 305 | auto node_id_it = dex_pc_to_incl_id.find(handler_pc); |
| 306 | if (node_id_it != dex_pc_to_incl_id.end()) { |
| 307 | exception_edges << " node" << this_node_id << ":p" << dex_pc |
| 308 | << " -> node" << node_id_it->second << ":p" << handler_pc |
| 309 | << ";\n"; |
| 310 | } |
| 311 | |
| 312 | // Mark as done. |
| 313 | handled_targets.insert(handler_pc); |
| 314 | } |
| 315 | } |
| 316 | } |
| 317 | if (inst->IsBasicBlockEnd()) { |
| 318 | break; |
| 319 | } |
| 320 | |
| 321 | // Loop update. Have a break-out if the next instruction is a branch target and thus in |
| 322 | // another block. |
| 323 | dex_pc += inst->SizeInCodeUnits(); |
| 324 | if (dex_pc >= code_item->insns_size_in_code_units_) { |
| 325 | break; |
| 326 | } |
| 327 | if (dex_pc_to_node_id.find(dex_pc) != dex_pc_to_node_id.end()) { |
| 328 | break; |
| 329 | } |
| 330 | inst = inst->Next(); |
| 331 | } |
| 332 | } |
| 333 | |
| 334 | // Write out the sub-graphs to make edges styled. |
| 335 | os << "\n"; |
| 336 | os << " subgraph regular_edges {\n"; |
| 337 | os << " edge [color=\"#000000\",weight=.3,len=3];\n\n"; |
| 338 | os << " " << regular_edges.str() << "\n"; |
| 339 | os << " }\n\n"; |
| 340 | |
| 341 | os << " subgraph taken_edges {\n"; |
| 342 | os << " edge [color=\"#00FF00\",weight=.3,len=3];\n\n"; |
| 343 | os << " " << taken_edges.str() << "\n"; |
| 344 | os << " }\n\n"; |
| 345 | |
| 346 | os << " subgraph exception_edges {\n"; |
| 347 | os << " edge [color=\"#FF0000\",weight=.3,len=3];\n\n"; |
| 348 | os << " " << exception_edges.str() << "\n"; |
| 349 | os << " }\n\n"; |
| 350 | } |
| 351 | |
| 352 | os << "}\n"; |
| 353 | } |
| 354 | |
| 355 | void DumpMethodCFG(const DexFile* dex_file, uint32_t dex_method_idx, std::ostream& os) { |
| 356 | // This is painful, we need to find the code item. That means finding the class, and then |
| 357 | // iterating the table. |
| 358 | if (dex_method_idx >= dex_file->NumMethodIds()) { |
| 359 | os << "Could not find method-idx."; |
| 360 | return; |
| 361 | } |
| 362 | const DexFile::MethodId& method_id = dex_file->GetMethodId(dex_method_idx); |
| 363 | |
| 364 | const DexFile::ClassDef* class_def = dex_file->FindClassDef(method_id.class_idx_); |
| 365 | if (class_def == nullptr) { |
| 366 | os << "Could not find class-def."; |
| 367 | return; |
| 368 | } |
| 369 | |
| 370 | const uint8_t* class_data = dex_file->GetClassData(*class_def); |
| 371 | if (class_data == nullptr) { |
| 372 | os << "No class data."; |
| 373 | return; |
| 374 | } |
| 375 | |
| 376 | ClassDataItemIterator it(*dex_file, class_data); |
Mathieu Chartier | e17cf24 | 2017-06-19 11:05:51 -0700 | [diff] [blame] | 377 | it.SkipAllFields(); |
David Sehr | caacd11 | 2016-10-20 16:27:02 -0700 | [diff] [blame] | 378 | |
| 379 | // Find method, and dump it. |
Mathieu Chartier | b7c273c | 2017-11-10 18:07:56 -0800 | [diff] [blame] | 380 | while (it.HasNextMethod()) { |
David Sehr | caacd11 | 2016-10-20 16:27:02 -0700 | [diff] [blame] | 381 | uint32_t method_idx = it.GetMemberIndex(); |
| 382 | if (method_idx == dex_method_idx) { |
| 383 | dumpMethodCFGImpl(dex_file, dex_method_idx, it.GetMethodCodeItem(), os); |
| 384 | return; |
| 385 | } |
| 386 | it.Next(); |
| 387 | } |
| 388 | |
| 389 | // Otherwise complain. |
| 390 | os << "Something went wrong, didn't find the method in the class data."; |
| 391 | } |
| 392 | |
| 393 | } // namespace art |