JF Bastien | d4698e1 | 2015-08-15 01:23:28 +0000 | [diff] [blame] | 1 | //===-- Relooper.h - Top-level interface for WebAssembly ----*- C++ -*-===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===-------------------------------------------------------------------===// |
| 9 | /// |
| 10 | /// \file |
| 11 | /// \brief This defines an optimized C++ implemention of the Relooper |
| 12 | /// algorithm, originally developed as part of Emscripten, which |
| 13 | /// generates a structured AST from arbitrary control flow. |
| 14 | /// |
| 15 | //===-------------------------------------------------------------------===// |
| 16 | |
JF Bastien | 53bd975 | 2015-10-16 16:35:49 +0000 | [diff] [blame] | 17 | #include "llvm/ADT/MapVector.h" |
| 18 | #include "llvm/ADT/SetVector.h" |
| 19 | #include "llvm/Support/Casting.h" |
JF Bastien | d4698e1 | 2015-08-15 01:23:28 +0000 | [diff] [blame] | 20 | |
| 21 | #include <cassert> |
JF Bastien | d4698e1 | 2015-08-15 01:23:28 +0000 | [diff] [blame] | 22 | #include <cstdarg> |
JF Bastien | 53bd975 | 2015-10-16 16:35:49 +0000 | [diff] [blame] | 23 | #include <cstdio> |
JF Bastien | d4698e1 | 2015-08-15 01:23:28 +0000 | [diff] [blame] | 24 | #include <deque> |
| 25 | #include <list> |
| 26 | #include <map> |
| 27 | #include <memory> |
| 28 | #include <set> |
| 29 | |
| 30 | namespace llvm { |
| 31 | |
| 32 | namespace Relooper { |
| 33 | |
| 34 | struct Block; |
| 35 | struct Shape; |
| 36 | |
| 37 | /// |
| 38 | /// Info about a branching from one block to another |
| 39 | /// |
| 40 | struct Branch { |
| 41 | enum FlowType { |
| 42 | Direct = 0, // We will directly reach the right location through other |
| 43 | // means, no need for continue or break |
| 44 | Break = 1, |
| 45 | Continue = 2, |
| 46 | Nested = 3 // This code is directly reached, but we must be careful to |
| 47 | // ensure it is nested in an if - it is not reached |
| 48 | // unconditionally, other code paths exist alongside it that we need to make |
| 49 | // sure do not intertwine |
| 50 | }; |
| 51 | Shape |
| 52 | *Ancestor; // If not nullptr, this shape is the relevant one for purposes |
| 53 | // of getting to the target block. We break or continue on it |
| 54 | Branch::FlowType |
| 55 | Type; // If Ancestor is not nullptr, this says whether to break or |
| 56 | // continue |
| 57 | bool Labeled; // If a break or continue, whether we need to use a label |
| 58 | const char *Condition; // The condition for which we branch. For example, |
| 59 | // "my_var == 1". Conditions are checked one by one. |
| 60 | // One of the conditions should have nullptr as the |
| 61 | // condition, in which case it is the default |
| 62 | // FIXME: move from char* to LLVM data structures |
| 63 | const char *Code; // If provided, code that is run right before the branch is |
| 64 | // taken. This is useful for phis |
| 65 | // FIXME: move from char* to LLVM data structures |
| 66 | |
| 67 | Branch(const char *ConditionInit, const char *CodeInit = nullptr); |
| 68 | ~Branch(); |
| 69 | }; |
| 70 | |
| 71 | typedef SetVector<Block *> BlockSet; |
| 72 | typedef MapVector<Block *, Branch *> BlockBranchMap; |
| 73 | typedef MapVector<Block *, std::unique_ptr<Branch>> OwningBlockBranchMap; |
| 74 | |
| 75 | /// |
| 76 | /// Represents a basic block of code - some instructions that end with a |
| 77 | /// control flow modifier (a branch, return or throw). |
| 78 | /// |
| 79 | struct Block { |
| 80 | // Branches become processed after we finish the shape relevant to them. For |
| 81 | // example, when we recreate a loop, branches to the loop start become |
| 82 | // continues and are now processed. When we calculate what shape to generate |
| 83 | // from a set of blocks, we ignore processed branches. Blocks own the Branch |
| 84 | // objects they use, and destroy them when done. |
| 85 | OwningBlockBranchMap BranchesOut; |
| 86 | BlockSet BranchesIn; |
| 87 | OwningBlockBranchMap ProcessedBranchesOut; |
| 88 | BlockSet ProcessedBranchesIn; |
| 89 | Shape *Parent; // The shape we are directly inside |
| 90 | int Id; // A unique identifier, defined when added to relooper. Note that this |
| 91 | // uniquely identifies a *logical* block - if we split it, the two |
| 92 | // instances have the same content *and* the same Id |
| 93 | const char *Code; // The string representation of the code in this block. |
| 94 | // Owning pointer (we copy the input) |
| 95 | // FIXME: move from char* to LLVM data structures |
| 96 | const char *BranchVar; // A variable whose value determines where we go; if |
| 97 | // this is not nullptr, emit a switch on that variable |
| 98 | // FIXME: move from char* to LLVM data structures |
| 99 | bool IsCheckedMultipleEntry; // If true, we are a multiple entry, so reaching |
| 100 | // us requires setting the label variable |
| 101 | |
| 102 | Block(const char *CodeInit, const char *BranchVarInit); |
| 103 | ~Block(); |
| 104 | |
| 105 | void AddBranchTo(Block *Target, const char *Condition, |
| 106 | const char *Code = nullptr); |
| 107 | }; |
| 108 | |
| 109 | /// |
| 110 | /// Represents a structured control flow shape |
| 111 | /// |
| 112 | struct Shape { |
| 113 | int Id; // A unique identifier. Used to identify loops, labels are Lx where x |
| 114 | // is the Id. Defined when added to relooper |
| 115 | Shape *Next; // The shape that will appear in the code right after this one |
| 116 | Shape *Natural; // The shape that control flow gets to naturally (if there is |
| 117 | // Next, then this is Next) |
| 118 | |
| 119 | /// Discriminator for LLVM-style RTTI (dyn_cast<> et al.) |
| 120 | enum ShapeKind { SK_Simple, SK_Multiple, SK_Loop }; |
| 121 | |
| 122 | private: |
| 123 | ShapeKind Kind; |
| 124 | |
| 125 | public: |
| 126 | ShapeKind getKind() const { return Kind; } |
| 127 | |
| 128 | Shape(ShapeKind KindInit) : Id(-1), Next(nullptr), Kind(KindInit) {} |
| 129 | }; |
| 130 | |
| 131 | /// |
| 132 | /// Simple: No control flow at all, just instructions. |
| 133 | /// |
| 134 | struct SimpleShape : public Shape { |
| 135 | Block *Inner; |
| 136 | |
| 137 | SimpleShape() : Shape(SK_Simple), Inner(nullptr) {} |
| 138 | |
| 139 | static bool classof(const Shape *S) { return S->getKind() == SK_Simple; } |
| 140 | }; |
| 141 | |
| 142 | /// |
| 143 | /// A shape that may be implemented with a labeled loop. |
| 144 | /// |
| 145 | struct LabeledShape : public Shape { |
| 146 | bool Labeled; // If we have a loop, whether it needs to be labeled |
| 147 | |
| 148 | LabeledShape(ShapeKind KindInit) : Shape(KindInit), Labeled(false) {} |
| 149 | }; |
| 150 | |
| 151 | // Blocks with the same id were split and are identical, so we just care about |
| 152 | // ids in Multiple entries |
| 153 | typedef std::map<int, Shape *> IdShapeMap; |
| 154 | |
| 155 | /// |
| 156 | /// Multiple: A shape with more than one entry. If the next block to |
| 157 | /// be entered is among them, we run it and continue to |
| 158 | /// the next shape, otherwise we continue immediately to the |
| 159 | /// next shape. |
| 160 | /// |
| 161 | struct MultipleShape : public LabeledShape { |
| 162 | IdShapeMap InnerMap; // entry block ID -> shape |
| 163 | int Breaks; // If we have branches on us, we need a loop (or a switch). This |
| 164 | // is a counter of requirements, |
| 165 | // if we optimize it to 0, the loop is unneeded |
| 166 | bool UseSwitch; // Whether to switch on label as opposed to an if-else chain |
| 167 | |
| 168 | MultipleShape() : LabeledShape(SK_Multiple), Breaks(0), UseSwitch(false) {} |
| 169 | |
| 170 | static bool classof(const Shape *S) { return S->getKind() == SK_Multiple; } |
| 171 | }; |
| 172 | |
| 173 | /// |
| 174 | /// Loop: An infinite loop. |
| 175 | /// |
| 176 | struct LoopShape : public LabeledShape { |
| 177 | Shape *Inner; |
| 178 | |
| 179 | LoopShape() : LabeledShape(SK_Loop), Inner(nullptr) {} |
| 180 | |
| 181 | static bool classof(const Shape *S) { return S->getKind() == SK_Loop; } |
| 182 | }; |
| 183 | |
JF Bastien | d4698e1 | 2015-08-15 01:23:28 +0000 | [diff] [blame] | 184 | } // namespace Relooper |
| 185 | |
| 186 | } // namespace llvm |