Profiling shows that creating and destroying vertex declarations is extremely expensive, so we can keep a 16-element cache around to speed that up.

BUG=
TEST=JSGameBench

Review URL: http://codereview.appspot.com/4358051

git-svn-id: https://angleproject.googlecode.com/svn/trunk@609 736b8ea6-26fd-11df-bfd4-992fa37f6226
diff --git a/src/libGLESv2/VertexDataManager.cpp b/src/libGLESv2/VertexDataManager.cpp
index 6eebf57..463a2f7 100644
--- a/src/libGLESv2/VertexDataManager.cpp
+++ b/src/libGLESv2/VertexDataManager.cpp
@@ -26,13 +26,18 @@
 namespace gl
 {
 
-VertexDataManager::VertexDataManager(Context *context, IDirect3DDevice9 *device) : mContext(context), mDevice(device)
+VertexDataManager::VertexDataManager(Context *context, IDirect3DDevice9 *device) : mContext(context), mDevice(device), mMaxLru(0)
 {
     for (int i = 0; i < MAX_VERTEX_ATTRIBS; i++)
     {
         mDirtyCurrentValue[i] = true;
         mCurrentValueBuffer[i] = NULL;
     }
+    for (int i = 0; i < NUM_VERTEX_DECL_CACHE_ENTRIES; i++)
+    {
+        mVertexDeclCache[i].vertexDeclaration = NULL;
+        mVertexDeclCache[i].lruCount = 0;
+    }
 
     const D3DCAPS9 &caps = context->getDeviceCaps();
     checkVertexCaps(caps.DeclTypes);
@@ -48,6 +53,13 @@
     {
         delete mCurrentValueBuffer[i];
     }
+    for (int i = 0; i < NUM_VERTEX_DECL_CACHE_ENTRIES; i++)
+    {
+        if (mVertexDeclCache[i].vertexDeclaration)
+        {
+            mVertexDeclCache[i].vertexDeclaration->Release();
+        }
+    }
 }
 
 UINT VertexDataManager::writeAttributeData(ArrayVertexBuffer *vertexBuffer, GLint start, GLsizei count, const VertexAttribute &attribute)
@@ -509,7 +521,7 @@
 
 void VertexDataManager::setupAttributes(const TranslatedAttribute *attributes)
 {
-    D3DVERTEXELEMENT9 elements[MAX_VERTEX_ATTRIBS];
+    D3DVERTEXELEMENT9 elements[MAX_VERTEX_ATTRIBS + 1];
     D3DVERTEXELEMENT9 *element = &elements[0];
 
     for (int i = 0; i < MAX_VERTEX_ATTRIBS; i++)
@@ -529,12 +541,39 @@
     }
 
     static const D3DVERTEXELEMENT9 end = D3DDECL_END();
-    *element = end;
+    *(element++) = end;
 
-    IDirect3DVertexDeclaration9 *vertexDeclaration;
-    mDevice->CreateVertexDeclaration(elements, &vertexDeclaration);
-    mDevice->SetVertexDeclaration(vertexDeclaration);
-    vertexDeclaration->Release();
+    for (int i = 0; i < NUM_VERTEX_DECL_CACHE_ENTRIES; i++)
+    {
+        VertexDeclCacheEntry *entry = &mVertexDeclCache[i];
+        if (memcmp(entry->cachedElements, elements, (element - elements) * sizeof(D3DVERTEXELEMENT9)) == 0 && entry->vertexDeclaration)
+        {
+            entry->lruCount = ++mMaxLru;
+            mDevice->SetVertexDeclaration(entry->vertexDeclaration);
+            return;
+        }
+    }
+
+    VertexDeclCacheEntry *lastCache = mVertexDeclCache;
+
+    for (int i = 0; i < NUM_VERTEX_DECL_CACHE_ENTRIES; i++)
+    {
+        if (mVertexDeclCache[i].lruCount < lastCache->lruCount)
+        {
+            lastCache = &mVertexDeclCache[i];
+        }
+    }
+
+    if (lastCache->vertexDeclaration != NULL)
+    {
+        lastCache->vertexDeclaration->Release();
+        lastCache->vertexDeclaration = NULL;
+    }
+
+    memcpy(lastCache->cachedElements, elements, (element - elements) * sizeof(D3DVERTEXELEMENT9));
+    mDevice->CreateVertexDeclaration(elements, &lastCache->vertexDeclaration);
+    mDevice->SetVertexDeclaration(lastCache->vertexDeclaration);
+    lastCache->lruCount = ++mMaxLru;
 }
 
 VertexBuffer::VertexBuffer(IDirect3DDevice9 *device, std::size_t size, DWORD usageFlags) : mDevice(device), mVertexBuffer(NULL)