Add array to keep track of frequency within active segment, fix malloc bug, update benchmarking result
diff --git a/contrib/experimental_dict_builders/benchmarkDictBuilder/README.md b/contrib/experimental_dict_builders/benchmarkDictBuilder/README.md
index 478d879..07d65b0 100644
--- a/contrib/experimental_dict_builders/benchmarkDictBuilder/README.md
+++ b/contrib/experimental_dict_builders/benchmarkDictBuilder/README.md
@@ -14,42 +14,46 @@
 
 ###Benchmarking Result:
 
+d=8
+f=23
+freq[i] = 0 when dmer added to best segment
+
 github:
 | Algorithm         | Speed(sec)    | Compression Ratio  |
-| ------------------|:-------------:| ------------------:|
-| nodict            | 0.000004      |  2.999642          |
-| random            | 0.148247      |  8.786957          |
-| cover             | 56.331553     |  10.641263         |
-| legacy            | 0.917595      |  8.989482          |
-| fastCover(opt)    | 13.169979     |  10.215174         |
-| fastCover(k=200)  | 2.692406      |  8.657219          |
+| ----------------- | ------------- | ------------------ |
+| nodict            | 0.000007      |  2.999642          |
+| random            | 0.150258      |  8.786957          |
+| cover             | 60.388853     |  10.641263         |
+| legacy            | 0.965050      |  8.989482          |
+| fastCover(opt)    | 84.968131     |  10.614747         |
+| fastCover(k=200)  | 6.465490      |  9.484150          |
 
 hg-commands
 | Algorithm         | Speed(sec)    | Compression Ratio  |
-| ----------------- |:-------------:| ------------------:|
-| nodict            | 0.000007      |  2.425291          |
-| random            | 0.093990      |  3.489515          |
-| cover             | 58.602385     |  4.131136          |
-| legacy            | 0.865683      |  3.911896          |
-| fastCover(opt)    | 9.404134      |  3.977229          |
-| fastCover(k=200)  | 1.037434      |  3.810326          |
+| ----------------- | ------------- | ------------------ |
+| nodict            | 0.000005      |  2.425291          |
+| random            | 0.084348      |  3.489515          |
+| cover             | 60.144894     |  4.131136          |
+| legacy            | 0.831981      |  3.911896          |
+| fastCover(opt)    | 59.030437     |  4.157595          |
+| fastCover(k=200)  | 3.702932      |  4.134222          |
 
 hg-changelog
 | Algorithm         | Speed(sec)    | Compression Ratio  |
-| ----------------- |:-------------:| ------------------:|
-| nodict            | 0.000022      |  1.377613          |
-| random            | 0.551539      |  2.096785          |
-| cover             | 221.370056    |  2.188654          |
-| legacy            | 2.405923      |  2.058273          |
-| fastCover(opt)    | 49.526246     |  2.124185          |
-| fastCover(k=200)  | 9.746872      |  2.114674          |
+| ----------------- | ------------- | ------------------ |
+| nodict            | 0.000004      |  1.377613          |
+| random            | 0.555964      |  2.096785          |
+| cover             | 214.423753    |  2.188654          |
+| legacy            | 2.180249      |  2.058273          |
+| fastCover(opt)    | 102.261452    |  2.180347          |
+| fastCover(k=200)  | 11.81039      |  2.170673          |
 
 hg-manifest
 | Algorithm         | Speed(sec)    | Compression Ratio  |
-| ----------------- |:-------------:| ------------------:|
-| nodict            | 0.000019      |  1.866385          |
-| random            | 1.083536      |  2.309485          |
-| cover             | 928.894887    |  2.582597          |
-| legacy            | 9.110371      |  2.506775          |
-| fastCover(opt)    | 116.508270    |  2.525689          |
-| fastCover(k=200)  | 12.176555     |  2.472221          |
+| ----------------- | ------------- | ------------------ |
+| nodict            | 0.000006      |  1.866385          |
+| random            | 1.063974      |  2.309485          |
+| cover             | 909.101849    |  2.582597          |
+| legacy            | 8.706580      |  2.506775          |
+| fastCover(opt)    | 188.598079    |  2.596761          |
+| fastCover(k=200)  | 13.392734     |  2.592985          |
diff --git a/contrib/experimental_dict_builders/fastCover/fastCover.c b/contrib/experimental_dict_builders/fastCover/fastCover.c
index abd592c..6f990e0 100644
--- a/contrib/experimental_dict_builders/fastCover/fastCover.c
+++ b/contrib/experimental_dict_builders/fastCover/fastCover.c
@@ -48,7 +48,7 @@
 
 
 /*-*************************************
-* Hash Function
+* Hash Functions
 ***************************************/
 static const U64 prime6bytes = 227718039650203ULL;
 static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u  << (64-48)) * prime6bytes) >> (64-h)) ; }
@@ -58,6 +58,7 @@
 static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
 static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
 
+
 /**
  * Hash the d-byte value pointed to by p and mod 2^f
  */
@@ -140,29 +141,41 @@
   activeSegment.begin = begin;
   activeSegment.end = begin;
   activeSegment.score = 0;
-  /* Slide the activeSegment through the whole epoch.
-   * Save the best segment in bestSegment.
-   */
-  while (activeSegment.end < end) {
-    /* Get hash value of current dmer  */
-    const size_t index = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.end, parameters.f, ctx->d);
-    /* Add frequency of this index to score */
-    activeSegment.score += freqs[index];
-    /* Increment end of segment */
-    activeSegment.end += 1;
-    /* If the window is now too large, drop the first position */
-    if (activeSegment.end - activeSegment.begin == dmersInK + 1) {
-      /* Get hash value of the dmer to be eliminated from active segment */
-      const size_t delIndex = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.begin, parameters.f, ctx->d);
-      /* Subtract frequency of this index from score */
-      activeSegment.score -= freqs[delIndex];
-      /* Increment start of segment */
-      activeSegment.begin += 1;
+  {
+    /* Keep track of number of times an index has been seen in current segment */
+    U16* currfreqs =(U16 *)malloc((1 << parameters.f) * sizeof(U16));
+    memset(currfreqs, 0, (1 << parameters.f) * sizeof(*currfreqs));
+    /* Slide the activeSegment through the whole epoch.
+     * Save the best segment in bestSegment.
+     */
+    while (activeSegment.end < end) {
+      /* Get hash value of current dmer */
+      const size_t index = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.end, parameters.f, ctx->d);
+      /* Add frequency of this index to score if this is the first occurence of index in active segment */
+      if (currfreqs[index] == 0) {
+        activeSegment.score += freqs[index];
+      }
+      currfreqs[index] += 1;
+      /* Increment end of segment */
+      activeSegment.end += 1;
+      /* If the window is now too large, drop the first position */
+      if (activeSegment.end - activeSegment.begin == dmersInK + 1) {
+        /* Get hash value of the dmer to be eliminated from active segment */
+        const size_t delIndex = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.begin, parameters.f, ctx->d);
+        currfreqs[delIndex] -= 1;
+        /* Subtract frequency of this index from score if this is the last occurrence of this index in active segment */
+        if (currfreqs[delIndex] == 0) {
+          activeSegment.score -= freqs[delIndex];
+        }
+        /* Increment start of segment */
+        activeSegment.begin += 1;
+      }
+      /* If this segment is the best so far save it */
+      if (activeSegment.score > bestSegment.score) {
+        bestSegment = activeSegment;
+      }
     }
-    /* If this segment is the best so far save it */
-    if (activeSegment.score > bestSegment.score) {
-      bestSegment = activeSegment;
-    }
+    free(currfreqs);
   }
   {
     /* Trim off the zero frequency head and tail from the segment. */
@@ -185,7 +198,7 @@
     U32 pos;
     for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) {
       const size_t i = FASTCOVER_hashPtrToIndex(ctx->samples + pos, parameters.f, ctx->d);
-      freqs[i] = freqs[i]/2;
+      freqs[i] = 0;
     }
   }
   return bestSegment;
@@ -245,12 +258,12 @@
 /**
  * Calculate for frequency of hash value of each dmer in ctx->samples
  */
-static void FASTCOVER_getFrequency(U32 *freqs, unsigned f, FASTCOVER_ctx_t *ctx){
+static void FASTCOVER_computeFrequency(U32 *freqs, unsigned f, FASTCOVER_ctx_t *ctx){
   /* inCurrSample keeps track of this hash value has already be seen in previous dmers in the same sample*/
-  size_t* inCurrSample = (size_t *)malloc((1<<f)*sizeof(size_t));
+  BYTE* inCurrSample = (BYTE *)malloc((1 << f) * sizeof(BYTE));
   size_t start; /* start of current dmer */
   for (unsigned i = 0; i < ctx->nbTrainSamples; i++) {
-    memset(inCurrSample, 0, (1 << f)); /* Reset inCurrSample for each sample */
+    memset(inCurrSample, 0, (1 << f) * sizeof(*inCurrSample)); /* Reset inCurrSample for each sample */
     size_t currSampleStart = ctx->offsets[i];
     size_t currSampleEnd = ctx->offsets[i+1];
     start = currSampleStart;
@@ -338,7 +351,7 @@
   memset(ctx->freqs, 0, (1 << f) * sizeof(U32));
 
   DISPLAYLEVEL(2, "Computing frequencies\n");
-  FASTCOVER_getFrequency(ctx->freqs, f, ctx);
+  FASTCOVER_computeFrequency(ctx->freqs, f, ctx);
 
   return 1;
 }
diff --git a/contrib/experimental_dict_builders/fastCover/main.c b/contrib/experimental_dict_builders/fastCover/main.c
index 260eeb2..f286b05 100644
--- a/contrib/experimental_dict_builders/fastCover/main.c
+++ b/contrib/experimental_dict_builders/fastCover/main.c
@@ -165,7 +165,7 @@
   params.splitPoint = (double)split/100;
 
   /* Build dictionary */
-  sampleInfo* info= getSampleInfo(filenameTable,
+  sampleInfo* info = getSampleInfo(filenameTable,
                     filenameIdx, blockSize, maxDictSize, zParams.notificationLevel);
   operationResult = FASTCOVER_trainFromFiles(outputFile, info, maxDictSize, &params);
 
diff --git a/contrib/experimental_dict_builders/randomDictBuilder/main.c b/contrib/experimental_dict_builders/randomDictBuilder/main.c
index 3f3a6ca..3ad8857 100644
--- a/contrib/experimental_dict_builders/randomDictBuilder/main.c
+++ b/contrib/experimental_dict_builders/randomDictBuilder/main.c
@@ -149,7 +149,7 @@
   params.zParams = zParams;
   params.k = k;
 
-  sampleInfo* info= getSampleInfo(filenameTable,
+  sampleInfo* info = getSampleInfo(filenameTable,
                     filenameIdx, blockSize, maxDictSize, zParams.notificationLevel);
   operationResult = RANDOM_trainFromFiles(outputFile, info, maxDictSize, &params);