On i386 and x86-64, just do unaligned loads
instead of assembling from bytes. This speeds up -Eonly PTH reading
of cocoa.h by about 2ms, which is 4.2%.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@62447 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Lex/PTHLexer.cpp b/lib/Lex/PTHLexer.cpp
index 8d06ac6..92c5e88 100644
--- a/lib/Lex/PTHLexer.cpp
+++ b/lib/Lex/PTHLexer.cpp
@@ -38,25 +38,45 @@
}
static inline uint16_t Read16(const unsigned char *&Data) {
+// Targets that directly support unaligned little-endian 16-bit loads can just
+// use them.
+#if defined(__i386__) || defined(__x86_64__)
+ uint16_t V = *((uint16_t*)Data);
+#else
uint16_t V = ((uint16_t)Data[0] << 0) |
((uint16_t)Data[1] << 8);
+#endif
Data += 2;
return V;
}
static inline uint32_t Read24(const unsigned char *&Data) {
+// Targets that directly support unaligned little-endian 16-bit loads can just
+// use them.
+#if defined(__i386__) || defined(__x86_64__)
+ uint32_t V = ((uint16_t*)Data)[0] |
+ ((uint32_t)Data[2] << 16);
+#else
uint32_t V = ((uint32_t)Data[0] << 0) |
((uint32_t)Data[1] << 8) |
((uint32_t)Data[2] << 16);
+#endif
+
Data += 3;
return V;
}
static inline uint32_t Read32(const unsigned char *&Data) {
+// Targets that directly support unaligned little-endian 32-bit loads can just
+// use them.
+#if defined(__i386__) || defined(__x86_64__)
+ uint32_t V = *((uint32_t*)Data);
+#else
uint32_t V = ((uint32_t)Data[0] << 0) |
((uint32_t)Data[1] << 8) |
((uint32_t)Data[2] << 16) |
((uint32_t)Data[3] << 24);
+#endif
Data += 4;
return V;
}