David Sehr | caacd11 | 2016-10-20 16:27:02 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2016 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | * |
| 16 | * Implementation file for control flow graph dumping for the dexdump utility. |
| 17 | */ |
| 18 | |
| 19 | #include "dexdump_cfg.h" |
| 20 | |
| 21 | #include <inttypes.h> |
| 22 | #include <ostream> |
| 23 | #include <map> |
| 24 | #include <set> |
| 25 | |
| 26 | #include "dex_file-inl.h" |
| 27 | #include "dex_instruction-inl.h" |
| 28 | |
| 29 | namespace art { |
| 30 | |
| 31 | static void dumpMethodCFGImpl(const DexFile* dex_file, |
| 32 | uint32_t dex_method_idx, |
| 33 | const DexFile::CodeItem* code_item, |
| 34 | std::ostream& os) { |
| 35 | os << "digraph {\n"; |
| 36 | os << " # /* " << dex_file->PrettyMethod(dex_method_idx, true) << " */\n"; |
| 37 | |
| 38 | std::set<uint32_t> dex_pc_is_branch_target; |
| 39 | { |
| 40 | // Go and populate. |
| 41 | const Instruction* inst = Instruction::At(code_item->insns_); |
| 42 | for (uint32_t dex_pc = 0; |
| 43 | dex_pc < code_item->insns_size_in_code_units_; |
| 44 | dex_pc += inst->SizeInCodeUnits(), inst = inst->Next()) { |
| 45 | if (inst->IsBranch()) { |
| 46 | dex_pc_is_branch_target.insert(dex_pc + inst->GetTargetOffset()); |
| 47 | } else if (inst->IsSwitch()) { |
| 48 | const uint16_t* insns = code_item->insns_ + dex_pc; |
| 49 | int32_t switch_offset = insns[1] | (static_cast<int32_t>(insns[2]) << 16); |
| 50 | const uint16_t* switch_insns = insns + switch_offset; |
| 51 | uint32_t switch_count = switch_insns[1]; |
| 52 | int32_t targets_offset; |
| 53 | if ((*insns & 0xff) == Instruction::PACKED_SWITCH) { |
| 54 | /* 0=sig, 1=count, 2/3=firstKey */ |
| 55 | targets_offset = 4; |
| 56 | } else { |
| 57 | /* 0=sig, 1=count, 2..count*2 = keys */ |
| 58 | targets_offset = 2 + 2 * switch_count; |
| 59 | } |
| 60 | for (uint32_t targ = 0; targ < switch_count; targ++) { |
| 61 | int32_t offset = |
| 62 | static_cast<int32_t>(switch_insns[targets_offset + targ * 2]) | |
| 63 | static_cast<int32_t>(switch_insns[targets_offset + targ * 2 + 1] << 16); |
| 64 | dex_pc_is_branch_target.insert(dex_pc + offset); |
| 65 | } |
| 66 | } |
| 67 | } |
| 68 | } |
| 69 | |
| 70 | // Create nodes for "basic blocks." |
| 71 | std::map<uint32_t, uint32_t> dex_pc_to_node_id; // This only has entries for block starts. |
| 72 | std::map<uint32_t, uint32_t> dex_pc_to_incl_id; // This has entries for all dex pcs. |
| 73 | |
| 74 | { |
| 75 | const Instruction* inst = Instruction::At(code_item->insns_); |
| 76 | bool first_in_block = true; |
| 77 | bool force_new_block = false; |
| 78 | for (uint32_t dex_pc = 0; |
| 79 | dex_pc < code_item->insns_size_in_code_units_; |
| 80 | dex_pc += inst->SizeInCodeUnits(), inst = inst->Next()) { |
| 81 | if (dex_pc == 0 || |
| 82 | (dex_pc_is_branch_target.find(dex_pc) != dex_pc_is_branch_target.end()) || |
| 83 | force_new_block) { |
| 84 | uint32_t id = dex_pc_to_node_id.size(); |
| 85 | if (id > 0) { |
| 86 | // End last node. |
| 87 | os << "}\"];\n"; |
| 88 | } |
| 89 | // Start next node. |
| 90 | os << " node" << id << " [shape=record,label=\"{"; |
| 91 | dex_pc_to_node_id.insert(std::make_pair(dex_pc, id)); |
| 92 | first_in_block = true; |
| 93 | force_new_block = false; |
| 94 | } |
| 95 | |
| 96 | // Register instruction. |
| 97 | dex_pc_to_incl_id.insert(std::make_pair(dex_pc, dex_pc_to_node_id.size() - 1)); |
| 98 | |
| 99 | // Print instruction. |
| 100 | if (!first_in_block) { |
| 101 | os << " | "; |
| 102 | } else { |
| 103 | first_in_block = false; |
| 104 | } |
| 105 | |
| 106 | // Dump the instruction. Need to escape '"', '<', '>', '{' and '}'. |
| 107 | os << "<" << "p" << dex_pc << ">"; |
| 108 | os << " 0x" << std::hex << dex_pc << std::dec << ": "; |
| 109 | std::string inst_str = inst->DumpString(dex_file); |
| 110 | size_t cur_start = 0; // It's OK to start at zero, instruction dumps don't start with chars |
| 111 | // we need to escape. |
| 112 | while (cur_start != std::string::npos) { |
| 113 | size_t next_escape = inst_str.find_first_of("\"{}<>", cur_start + 1); |
| 114 | if (next_escape == std::string::npos) { |
| 115 | os << inst_str.substr(cur_start, inst_str.size() - cur_start); |
| 116 | break; |
| 117 | } else { |
| 118 | os << inst_str.substr(cur_start, next_escape - cur_start); |
| 119 | // Escape all necessary characters. |
| 120 | while (next_escape < inst_str.size()) { |
| 121 | char c = inst_str.at(next_escape); |
| 122 | if (c == '"' || c == '{' || c == '}' || c == '<' || c == '>') { |
| 123 | os << '\\' << c; |
| 124 | } else { |
| 125 | break; |
| 126 | } |
| 127 | next_escape++; |
| 128 | } |
| 129 | if (next_escape >= inst_str.size()) { |
| 130 | next_escape = std::string::npos; |
| 131 | } |
| 132 | cur_start = next_escape; |
| 133 | } |
| 134 | } |
| 135 | |
| 136 | // Force a new block for some fall-throughs and some instructions that terminate the "local" |
| 137 | // control flow. |
| 138 | force_new_block = inst->IsSwitch() || inst->IsBasicBlockEnd(); |
| 139 | } |
| 140 | // Close last node. |
| 141 | if (dex_pc_to_node_id.size() > 0) { |
| 142 | os << "}\"];\n"; |
| 143 | } |
| 144 | } |
| 145 | |
| 146 | // Create edges between them. |
| 147 | { |
| 148 | std::ostringstream regular_edges; |
| 149 | std::ostringstream taken_edges; |
| 150 | std::ostringstream exception_edges; |
| 151 | |
| 152 | // Common set of exception edges. |
| 153 | std::set<uint32_t> exception_targets; |
| 154 | |
| 155 | // These blocks (given by the first dex pc) need exception per dex-pc handling in a second |
| 156 | // pass. In the first pass we try and see whether we can use a common set of edges. |
| 157 | std::set<uint32_t> blocks_with_detailed_exceptions; |
| 158 | |
| 159 | { |
| 160 | uint32_t last_node_id = std::numeric_limits<uint32_t>::max(); |
| 161 | uint32_t old_dex_pc = 0; |
| 162 | uint32_t block_start_dex_pc = std::numeric_limits<uint32_t>::max(); |
| 163 | const Instruction* inst = Instruction::At(code_item->insns_); |
| 164 | for (uint32_t dex_pc = 0; |
| 165 | dex_pc < code_item->insns_size_in_code_units_; |
| 166 | old_dex_pc = dex_pc, dex_pc += inst->SizeInCodeUnits(), inst = inst->Next()) { |
| 167 | { |
| 168 | auto it = dex_pc_to_node_id.find(dex_pc); |
| 169 | if (it != dex_pc_to_node_id.end()) { |
| 170 | if (!exception_targets.empty()) { |
| 171 | // It seems the last block had common exception handlers. Add the exception edges now. |
| 172 | uint32_t node_id = dex_pc_to_node_id.find(block_start_dex_pc)->second; |
| 173 | for (uint32_t handler_pc : exception_targets) { |
| 174 | auto node_id_it = dex_pc_to_incl_id.find(handler_pc); |
| 175 | if (node_id_it != dex_pc_to_incl_id.end()) { |
| 176 | exception_edges << " node" << node_id |
| 177 | << " -> node" << node_id_it->second << ":p" << handler_pc |
| 178 | << ";\n"; |
| 179 | } |
| 180 | } |
| 181 | exception_targets.clear(); |
| 182 | } |
| 183 | |
| 184 | block_start_dex_pc = dex_pc; |
| 185 | |
| 186 | // Seems to be a fall-through, connect to last_node_id. May be spurious edges for things |
| 187 | // like switch data. |
| 188 | uint32_t old_last = last_node_id; |
| 189 | last_node_id = it->second; |
| 190 | if (old_last != std::numeric_limits<uint32_t>::max()) { |
| 191 | regular_edges << " node" << old_last << ":p" << old_dex_pc |
| 192 | << " -> node" << last_node_id << ":p" << dex_pc |
| 193 | << ";\n"; |
| 194 | } |
| 195 | } |
| 196 | |
| 197 | // Look at the exceptions of the first entry. |
| 198 | CatchHandlerIterator catch_it(*code_item, dex_pc); |
| 199 | for (; catch_it.HasNext(); catch_it.Next()) { |
| 200 | exception_targets.insert(catch_it.GetHandlerAddress()); |
| 201 | } |
| 202 | } |
| 203 | |
| 204 | // Handle instruction. |
| 205 | |
| 206 | // Branch: something with at most two targets. |
| 207 | if (inst->IsBranch()) { |
| 208 | const int32_t offset = inst->GetTargetOffset(); |
| 209 | const bool conditional = !inst->IsUnconditional(); |
| 210 | |
| 211 | auto target_it = dex_pc_to_node_id.find(dex_pc + offset); |
| 212 | if (target_it != dex_pc_to_node_id.end()) { |
| 213 | taken_edges << " node" << last_node_id << ":p" << dex_pc |
| 214 | << " -> node" << target_it->second << ":p" << (dex_pc + offset) |
| 215 | << ";\n"; |
| 216 | } |
| 217 | if (!conditional) { |
| 218 | // No fall-through. |
| 219 | last_node_id = std::numeric_limits<uint32_t>::max(); |
| 220 | } |
| 221 | } else if (inst->IsSwitch()) { |
| 222 | // TODO: Iterate through all switch targets. |
| 223 | const uint16_t* insns = code_item->insns_ + dex_pc; |
| 224 | /* make sure the start of the switch is in range */ |
| 225 | int32_t switch_offset = insns[1] | (static_cast<int32_t>(insns[2]) << 16); |
| 226 | /* offset to switch table is a relative branch-style offset */ |
| 227 | const uint16_t* switch_insns = insns + switch_offset; |
| 228 | uint32_t switch_count = switch_insns[1]; |
| 229 | int32_t targets_offset; |
| 230 | if ((*insns & 0xff) == Instruction::PACKED_SWITCH) { |
| 231 | /* 0=sig, 1=count, 2/3=firstKey */ |
| 232 | targets_offset = 4; |
| 233 | } else { |
| 234 | /* 0=sig, 1=count, 2..count*2 = keys */ |
| 235 | targets_offset = 2 + 2 * switch_count; |
| 236 | } |
| 237 | /* make sure the end of the switch is in range */ |
| 238 | /* verify each switch target */ |
| 239 | for (uint32_t targ = 0; targ < switch_count; targ++) { |
| 240 | int32_t offset = |
| 241 | static_cast<int32_t>(switch_insns[targets_offset + targ * 2]) | |
| 242 | static_cast<int32_t>(switch_insns[targets_offset + targ * 2 + 1] << 16); |
| 243 | int32_t abs_offset = dex_pc + offset; |
| 244 | auto target_it = dex_pc_to_node_id.find(abs_offset); |
| 245 | if (target_it != dex_pc_to_node_id.end()) { |
| 246 | // TODO: value label. |
| 247 | taken_edges << " node" << last_node_id << ":p" << dex_pc |
| 248 | << " -> node" << target_it->second << ":p" << (abs_offset) |
| 249 | << ";\n"; |
| 250 | } |
| 251 | } |
| 252 | } |
| 253 | |
| 254 | // Exception edges. If this is not the first instruction in the block |
| 255 | if (block_start_dex_pc != dex_pc) { |
| 256 | std::set<uint32_t> current_handler_pcs; |
| 257 | CatchHandlerIterator catch_it(*code_item, dex_pc); |
| 258 | for (; catch_it.HasNext(); catch_it.Next()) { |
| 259 | current_handler_pcs.insert(catch_it.GetHandlerAddress()); |
| 260 | } |
| 261 | if (current_handler_pcs != exception_targets) { |
| 262 | exception_targets.clear(); // Clear so we don't do something at the end. |
| 263 | blocks_with_detailed_exceptions.insert(block_start_dex_pc); |
| 264 | } |
| 265 | } |
| 266 | |
| 267 | if (inst->IsReturn() || |
| 268 | (inst->Opcode() == Instruction::THROW) || |
| 269 | (inst->IsBranch() && inst->IsUnconditional())) { |
| 270 | // No fall-through. |
| 271 | last_node_id = std::numeric_limits<uint32_t>::max(); |
| 272 | } |
| 273 | } |
| 274 | // Finish up the last block, if it had common exceptions. |
| 275 | if (!exception_targets.empty()) { |
| 276 | // It seems the last block had common exception handlers. Add the exception edges now. |
| 277 | uint32_t node_id = dex_pc_to_node_id.find(block_start_dex_pc)->second; |
| 278 | for (uint32_t handler_pc : exception_targets) { |
| 279 | auto node_id_it = dex_pc_to_incl_id.find(handler_pc); |
| 280 | if (node_id_it != dex_pc_to_incl_id.end()) { |
| 281 | exception_edges << " node" << node_id |
| 282 | << " -> node" << node_id_it->second << ":p" << handler_pc |
| 283 | << ";\n"; |
| 284 | } |
| 285 | } |
| 286 | exception_targets.clear(); |
| 287 | } |
| 288 | } |
| 289 | |
| 290 | // Second pass for detailed exception blocks. |
| 291 | // TODO |
| 292 | // Exception edges. If this is not the first instruction in the block |
| 293 | for (uint32_t dex_pc : blocks_with_detailed_exceptions) { |
| 294 | const Instruction* inst = Instruction::At(&code_item->insns_[dex_pc]); |
| 295 | uint32_t this_node_id = dex_pc_to_incl_id.find(dex_pc)->second; |
| 296 | while (true) { |
| 297 | CatchHandlerIterator catch_it(*code_item, dex_pc); |
| 298 | if (catch_it.HasNext()) { |
| 299 | std::set<uint32_t> handled_targets; |
| 300 | for (; catch_it.HasNext(); catch_it.Next()) { |
| 301 | uint32_t handler_pc = catch_it.GetHandlerAddress(); |
| 302 | auto it = handled_targets.find(handler_pc); |
| 303 | if (it == handled_targets.end()) { |
| 304 | auto node_id_it = dex_pc_to_incl_id.find(handler_pc); |
| 305 | if (node_id_it != dex_pc_to_incl_id.end()) { |
| 306 | exception_edges << " node" << this_node_id << ":p" << dex_pc |
| 307 | << " -> node" << node_id_it->second << ":p" << handler_pc |
| 308 | << ";\n"; |
| 309 | } |
| 310 | |
| 311 | // Mark as done. |
| 312 | handled_targets.insert(handler_pc); |
| 313 | } |
| 314 | } |
| 315 | } |
| 316 | if (inst->IsBasicBlockEnd()) { |
| 317 | break; |
| 318 | } |
| 319 | |
| 320 | // Loop update. Have a break-out if the next instruction is a branch target and thus in |
| 321 | // another block. |
| 322 | dex_pc += inst->SizeInCodeUnits(); |
| 323 | if (dex_pc >= code_item->insns_size_in_code_units_) { |
| 324 | break; |
| 325 | } |
| 326 | if (dex_pc_to_node_id.find(dex_pc) != dex_pc_to_node_id.end()) { |
| 327 | break; |
| 328 | } |
| 329 | inst = inst->Next(); |
| 330 | } |
| 331 | } |
| 332 | |
| 333 | // Write out the sub-graphs to make edges styled. |
| 334 | os << "\n"; |
| 335 | os << " subgraph regular_edges {\n"; |
| 336 | os << " edge [color=\"#000000\",weight=.3,len=3];\n\n"; |
| 337 | os << " " << regular_edges.str() << "\n"; |
| 338 | os << " }\n\n"; |
| 339 | |
| 340 | os << " subgraph taken_edges {\n"; |
| 341 | os << " edge [color=\"#00FF00\",weight=.3,len=3];\n\n"; |
| 342 | os << " " << taken_edges.str() << "\n"; |
| 343 | os << " }\n\n"; |
| 344 | |
| 345 | os << " subgraph exception_edges {\n"; |
| 346 | os << " edge [color=\"#FF0000\",weight=.3,len=3];\n\n"; |
| 347 | os << " " << exception_edges.str() << "\n"; |
| 348 | os << " }\n\n"; |
| 349 | } |
| 350 | |
| 351 | os << "}\n"; |
| 352 | } |
| 353 | |
| 354 | void DumpMethodCFG(const DexFile* dex_file, uint32_t dex_method_idx, std::ostream& os) { |
| 355 | // This is painful, we need to find the code item. That means finding the class, and then |
| 356 | // iterating the table. |
| 357 | if (dex_method_idx >= dex_file->NumMethodIds()) { |
| 358 | os << "Could not find method-idx."; |
| 359 | return; |
| 360 | } |
| 361 | const DexFile::MethodId& method_id = dex_file->GetMethodId(dex_method_idx); |
| 362 | |
| 363 | const DexFile::ClassDef* class_def = dex_file->FindClassDef(method_id.class_idx_); |
| 364 | if (class_def == nullptr) { |
| 365 | os << "Could not find class-def."; |
| 366 | return; |
| 367 | } |
| 368 | |
| 369 | const uint8_t* class_data = dex_file->GetClassData(*class_def); |
| 370 | if (class_data == nullptr) { |
| 371 | os << "No class data."; |
| 372 | return; |
| 373 | } |
| 374 | |
| 375 | ClassDataItemIterator it(*dex_file, class_data); |
| 376 | // Skip fields |
| 377 | while (it.HasNextStaticField() || it.HasNextInstanceField()) { |
| 378 | it.Next(); |
| 379 | } |
| 380 | |
| 381 | // Find method, and dump it. |
| 382 | while (it.HasNextDirectMethod() || it.HasNextVirtualMethod()) { |
| 383 | uint32_t method_idx = it.GetMemberIndex(); |
| 384 | if (method_idx == dex_method_idx) { |
| 385 | dumpMethodCFGImpl(dex_file, dex_method_idx, it.GetMethodCodeItem(), os); |
| 386 | return; |
| 387 | } |
| 388 | it.Next(); |
| 389 | } |
| 390 | |
| 391 | // Otherwise complain. |
| 392 | os << "Something went wrong, didn't find the method in the class data."; |
| 393 | } |
| 394 | |
| 395 | } // namespace art |