Allowing the quantiser serch to put more than one pulse at one,
giving a (minor) speedup. Also fixed optional memory-based ncwrs64().
diff --git a/libcelt/cwrs.c b/libcelt/cwrs.c
index 3bfd9d9..cc743ad 100644
--- a/libcelt/cwrs.c
+++ b/libcelt/cwrs.c
@@ -69,7 +69,7 @@
   if(_n<0||_m<0)return 0;
   if(!c[_n][_m]){
     if(_m<=0)c[_n][_m]=1;
-    else if(_n>0)c[_n][_m]=ncwrs(_n-1,_m)+ncwrs(_n,_m-1)+ncwrs(_n-1,_m-1);
+    else if(_n>0)c[_n][_m]=ncwrs64(_n-1,_m)+ncwrs64(_n,_m-1)+ncwrs64(_n-1,_m-1);
 }
   return c[_n][_m];
 }
diff --git a/libcelt/vq.c b/libcelt/vq.c
index f22e528..4518840 100644
--- a/libcelt/vq.c
+++ b/libcelt/vq.c
@@ -48,6 +48,7 @@
    float ny[L][N];
    int iny[L][N];
    int i, j, m;
+   int pulsesLeft;
    float xy[L], nxy[L];
    float yy[L], nyy[L];
    float yp[L], nyp[L];
@@ -77,14 +78,23 @@
    for (m=0;m<L;m++)
       xy[m] = yy[m] = yp[m] = gain[m] = 0;
    
-   for (i=0;i<K;i++)
+   pulsesLeft = K;
+   while (pulsesLeft > 0)
    {
+      int pulsesAtOnce=1;
       int L2 = L;
       if (L>maxL)
       {
          L2 = maxL;
          maxL *= N;
       }
+      if (pulsesLeft > 5)
+         L2 = 1;
+      
+      pulsesAtOnce = pulsesLeft/N;
+      if (pulsesAtOnce<1)
+         pulsesAtOnce = 1;
+
       for (m=0;m<L;m++)
          best_scores[m] = -1e10;
 
@@ -101,9 +111,9 @@
                float tmp_xy, tmp_yy, tmp_yp;
                float score;
                float g;
-               float s = sign;
+               float s = sign*pulsesAtOnce;
                tmp_xy = xy[m] + s*x[j]               - alpha*s*p[j]*Rxp;
-               tmp_yy = yy[m] + 2*s*y[m][j] + 1      +alpha*alpha*p[j]*p[j]*Rpp - 2*alpha*s*p[j]*yp[m] - 2*alpha*p[j]*p[j];
+               tmp_yy = yy[m] + 2*s*y[m][j] + s*s      +s*s*alpha*alpha*p[j]*p[j]*Rpp - 2*alpha*s*p[j]*yp[m] - 2*s*s*alpha*p[j]*p[j];
                tmp_yp = yp[m] + s*p[j]               *(1-alpha*Rpp);
                g = (sqrt(tmp_yp*tmp_yp + tmp_yy - tmp_yy*Rpp) - tmp_yp)/tmp_yy;
                score = 2*g*tmp_xy - g*g*tmp_yy;
@@ -142,9 +152,9 @@
                   for (n=0;n<N;n++)
                      iny[id][n] = iy[m][n];
                   if (s>0)
-                     iny[id][j] += 1;
+                     iny[id][j] += pulsesAtOnce;
                   else
-                     iny[id][j] -= 1;
+                     iny[id][j] -= pulsesAtOnce;
                   best_scores[id] = score;
                }
             }   
@@ -152,6 +162,7 @@
          
       }
       int k,n;
+      /* FIXME: We could be swapping pointers instead */
       for (k=0;k<L;k++)
       {
          xy[k] = nxy[k];
@@ -162,7 +173,7 @@
          for (n=0;n<N;n++)
             iy[k][n] = iny[k][n];
       }
-
+      pulsesLeft -= pulsesAtOnce;
    }
    
    for (i=0;i<N;i++)