PTH: Emitted tokens now consist of 12 bytes that are loaded used 3 32-bit loads.  This reduces user time but increases system time because of the slightly larger PTH file.  Although there is no performance win on Cocoa.h and -Eonly, overall this seems like a good step.

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@62542 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/Driver/CacheTokens.cpp b/Driver/CacheTokens.cpp
index 7aa63b3..1d33105 100644
--- a/Driver/CacheTokens.cpp
+++ b/Driver/CacheTokens.cpp
@@ -133,12 +133,13 @@
 
 void PTHWriter::EmitToken(const Token& T) {
   uint32_t fpos = PP.getSourceManager().getFullFilePos(T.getLocation());
-  Emit8(T.getKind());
-  Emit8(T.getFlags());
-  Emit24(ResolveID(T.getIdentifierInfo()));
+  
+  Emit32(((uint32_t) T.getKind()) |
+         (((uint32_t) T.getFlags()) << 8) |
+         (((uint32_t) T.getLength()) << 16));
+  Emit32(ResolveID(T.getIdentifierInfo()));
   Emit32(fpos);
-  Emit16(T.getLength());
-
+  
   // For specific tokens we cache their spelling.
   if (T.getIdentifierInfo())
     return;
@@ -270,9 +271,10 @@
 }
 
 PCHEntry PTHWriter::LexTokens(Lexer& L) {
-
-  // Record the location within the token file.
-  Offset off = (Offset) Out.tell();
+  // Pad 0's so that we emit tokens to a 4-byte alignment.
+  // This speed up reading them back in.
+  Offset off = (Offset) Out.tell();  
+  for (unsigned Pad = off % 4 ; Pad != 0 ; --Pad, ++off) Emit8(0);
   
   // Keep track of matching '#if' ... '#endif'.
   typedef std::vector<std::pair<Offset, unsigned> > PPCondTable;