Emit & read more compressed bytecode by not emitting a bytecodeblock for
each basic block in function. Instead, just emit a stream of instructions,
chopping up basic blocks based on when we find terminator instructions. This
saves a fairly substantial chunk of bytecode space. In stripped, sample
cases, for example, we get this reduction in size:
197.parser: 163036 -> 137180: 18.8% reduction
254.gap : 844936 -> 689392: 22.6%
255.vortex: 621724 -> 528444: 17.7%
...
Not bad for something this simple. :) Note that this doesn't require a new
bytecode version number at all, though version 1.1 should not need to support
the old format.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@10280 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Bytecode/Reader/Reader.cpp b/lib/Bytecode/Reader/Reader.cpp
index ed8f9e8..097d548 100644
--- a/lib/Bytecode/Reader/Reader.cpp
+++ b/lib/Bytecode/Reader/Reader.cpp
@@ -188,7 +188,8 @@
}
}
-
+/// ParseBasicBlock - In LLVM 1.0 bytecode files, we used to output one
+/// basicblock at a time. This method reads in one of the basicblock packets.
BasicBlock *BytecodeParser::ParseBasicBlock(const unsigned char *&Buf,
const unsigned char *EndBuf,
unsigned BlockNo) {
@@ -207,6 +208,38 @@
return BB;
}
+
+/// ParseInstructionList - Parse all of the BasicBlock's & Instruction's in the
+/// body of a function. In post 1.0 bytecode files, we no longer emit basic
+/// block individually, in order to avoid per-basic-block overhead.
+unsigned BytecodeParser::ParseInstructionList(Function *F,
+ const unsigned char *&Buf,
+ const unsigned char *EndBuf) {
+ unsigned BlockNo = 0;
+ std::vector<unsigned> Args;
+
+ while (Buf < EndBuf) {
+ BasicBlock *BB;
+ if (ParsedBasicBlocks.size() == BlockNo)
+ ParsedBasicBlocks.push_back(BB = new BasicBlock());
+ else if (ParsedBasicBlocks[BlockNo] == 0)
+ BB = ParsedBasicBlocks[BlockNo] = new BasicBlock();
+ else
+ BB = ParsedBasicBlocks[BlockNo];
+ ++BlockNo;
+ F->getBasicBlockList().push_back(BB);
+
+ // Read instructions into this basic block until we get to a terminator
+ while (Buf < EndBuf && !BB->getTerminator())
+ ParseInstruction(Buf, EndBuf, Args, BB);
+
+ if (!BB->getTerminator())
+ throw std::string("Non-terminated basic block found!");
+ }
+
+ return BlockNo;
+}
+
void BytecodeParser::ParseSymbolTable(const unsigned char *&Buf,
const unsigned char *EndBuf,
SymbolTable *ST,
@@ -345,6 +378,13 @@
break;
}
+ case BytecodeFormat::InstructionList: {
+ BCR_TRACE(2, "BLOCK BytecodeFormat::InstructionList: {\n");
+ if (BlockNum) throw std::string("Already parsed basic blocks!");
+ BlockNum = ParseInstructionList(F, Buf, Buf+Size);
+ break;
+ }
+
case BytecodeFormat::SymbolTable:
BCR_TRACE(2, "BLOCK BytecodeFormat::SymbolTable: {\n");
ParseSymbolTable(Buf, Buf+Size, &F->getSymbolTable(), F);
diff --git a/lib/Bytecode/Reader/ReaderInternals.h b/lib/Bytecode/Reader/ReaderInternals.h
index aea45c2..fd0a1ed 100644
--- a/lib/Bytecode/Reader/ReaderInternals.h
+++ b/lib/Bytecode/Reader/ReaderInternals.h
@@ -162,7 +162,9 @@
BasicBlock *ParseBasicBlock(const unsigned char *&Buf,
const unsigned char *End,
unsigned BlockNo);
-
+ unsigned ParseInstructionList(Function *F, const unsigned char *&Buf,
+ const unsigned char *EndBuf);
+
void ParseInstruction(const unsigned char *&Buf, const unsigned char *End,
std::vector<unsigned> &Args, BasicBlock *BB);
diff --git a/lib/Bytecode/Writer/Writer.cpp b/lib/Bytecode/Writer/Writer.cpp
index 9c9e1ab..7fa22b8 100644
--- a/lib/Bytecode/Writer/Writer.cpp
+++ b/lib/Bytecode/Writer/Writer.cpp
@@ -225,9 +225,13 @@
// Output information about the constants in the function...
outputConstants(true);
- // Output basic block nodes...
- for (Function::const_iterator I = F->begin(), E = F->end(); I != E; ++I)
- processBasicBlock(*I);
+ { // Output all of the instructions in the body of the function
+ BytecodeBlock ILBlock(BytecodeFormat::InstructionList, Out);
+
+ for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E;++BB)
+ for(BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E;++I)
+ processInstruction(*I);
+ }
// If needed, output the symbol table for the function...
outputSymbolTable(F->getSymbolTable());
@@ -236,14 +240,6 @@
}
}
-
-void BytecodeWriter::processBasicBlock(const BasicBlock &BB) {
- BytecodeBlock FunctionBlock(BytecodeFormat::BasicBlock, Out);
- // Process all the instructions in the bb...
- for(BasicBlock::const_iterator I = BB.begin(), E = BB.end(); I != E; ++I)
- processInstruction(*I);
-}
-
void BytecodeWriter::outputSymbolTable(const SymbolTable &MST) {
BytecodeBlock FunctionBlock(BytecodeFormat::SymbolTable, Out);
diff --git a/lib/Bytecode/Writer/WriterInternals.h b/lib/Bytecode/Writer/WriterInternals.h
index 8cb4bfd..2946515 100644
--- a/lib/Bytecode/Writer/WriterInternals.h
+++ b/lib/Bytecode/Writer/WriterInternals.h
@@ -36,7 +36,6 @@
protected:
void outputConstants(bool isFunction);
void outputFunction(const Function *F);
- void processBasicBlock(const BasicBlock &BB);
void processInstruction(const Instruction &I);
private :