On i386 and x86-64, just do unaligned loads 
instead of assembling from bytes.  This speeds up -Eonly PTH reading 
of cocoa.h by about 2ms, which is 4.2%.


git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@62447 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Lex/PTHLexer.cpp b/lib/Lex/PTHLexer.cpp
index 8d06ac6..92c5e88 100644
--- a/lib/Lex/PTHLexer.cpp
+++ b/lib/Lex/PTHLexer.cpp
@@ -38,25 +38,45 @@
 }
 
 static inline uint16_t Read16(const unsigned char *&Data) {
+// Targets that directly support unaligned little-endian 16-bit loads can just
+// use them.
+#if defined(__i386__) || defined(__x86_64__)
+  uint16_t V = *((uint16_t*)Data);
+#else
   uint16_t V = ((uint16_t)Data[0] <<  0) |
                ((uint16_t)Data[1] <<  8);
+#endif
   Data += 2;
   return V;
 }
 
 static inline uint32_t Read24(const unsigned char *&Data) {
+// Targets that directly support unaligned little-endian 16-bit loads can just
+// use them.
+#if defined(__i386__) || defined(__x86_64__)
+  uint32_t V = ((uint16_t*)Data)[0] | 
+                 ((uint32_t)Data[2] << 16);
+#else
   uint32_t V = ((uint32_t)Data[0] <<  0) |
                ((uint32_t)Data[1] <<  8) |
                ((uint32_t)Data[2] << 16);
+#endif
+  
   Data += 3;
   return V;
 }
 
 static inline uint32_t Read32(const unsigned char *&Data) {
+// Targets that directly support unaligned little-endian 32-bit loads can just
+// use them.
+#if defined(__i386__) || defined(__x86_64__)
+  uint32_t V = *((uint32_t*)Data);
+#else
   uint32_t V = ((uint32_t)Data[0] <<  0) |
                ((uint32_t)Data[1] <<  8) |
                ((uint32_t)Data[2] << 16) |
                ((uint32_t)Data[3] << 24);
+#endif
   Data += 4;
   return V;
 }