handles litLength >= 65535
diff --git a/lib/bitstream.h b/lib/bitstream.h
index e123793..0fe36ea 100644
--- a/lib/bitstream.h
+++ b/lib/bitstream.h
@@ -206,7 +206,7 @@
  *  unsafe version; does not check buffer overflow */
 MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC)
 {
-    size_t nbBytes = bitC->bitPos >> 3;
+    size_t const nbBytes = bitC->bitPos >> 3;
     MEM_writeLEST(bitC->ptr, bitC->bitContainer);
     bitC->ptr += nbBytes;
     bitC->bitPos &= 7;
@@ -218,7 +218,7 @@
  *  note : does not signal buffer overflow. This will be revealed later on using BIT_closeCStream() */
 MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC)
 {
-    size_t nbBytes = bitC->bitPos >> 3;
+    size_t const nbBytes = bitC->bitPos >> 3;
     MEM_writeLEST(bitC->ptr, bitC->bitContainer);
     bitC->ptr += nbBytes;
     if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c
index ba84fde..937cce8 100644
--- a/lib/zstd_compress.c
+++ b/lib/zstd_compress.c
@@ -172,9 +172,9 @@
 {
     ZSTD_CCtx* zc = ZSTD_createCCtx();
     ZSTD_compressBegin_advanced(zc, NULL, 0, params);
-    { size_t const size = sizeof(*zc) + zc->workSpaceSize;
+    { size_t const ccsize = sizeof(*zc) + zc->workSpaceSize;
       ZSTD_freeCCtx(zc);
-      return size; }
+      return ccsize; }
 }
 
 
@@ -291,7 +291,7 @@
 
 
 /*! ZSTD_reduceTable() :
-*   rescale indexes from a table (indexes are U32) */
+*   reduce table indexes by `reducerValue` */
 static void ZSTD_reduceTable (U32* const table, U32 const size, U32 const reducerValue)
 {
     U32 u;
@@ -586,15 +586,15 @@
     FSE_CTable* CTable_MatchLength = zc->matchlengthCTable;
     U32 LLtype, Offtype, MLtype;   /* compressed, raw or rle */
     const U16*  const llTable = seqStorePtr->litLengthStart;
-    const U16*  const llPtr = seqStorePtr->litLength;
     const BYTE* const mlTable = seqStorePtr->matchLengthStart;
     const U32*  const offsetTable = seqStorePtr->offsetStart;
+    const U32*  const offsetTableEnd = seqStorePtr->offset;
     BYTE* const offCodeTable = seqStorePtr->offCodeStart;
     BYTE* const llCodeTable = seqStorePtr->llCodeStart;
     BYTE* const ostart = (BYTE*)dst;
     BYTE* const oend = ostart + dstCapacity;
     BYTE* op = ostart;
-    size_t const nbSeq = llPtr - llTable;
+    size_t const nbSeq = offsetTableEnd - offsetTable;
     BYTE* seqHead;
 
     /* Compress literals */
@@ -648,7 +648,8 @@
 
     {   size_t i;
         for (i=0; i<nbSeq; i++) {
-            U32 const ll = llTable[i];
+            U32 ll = llTable[i];
+            if (llTable[i] == 65535) ll = seqStorePtr->litLengthLong;
             llCodeTable[i] = (ll>63) ? ZSTD_highbit(ll) + deltaCode : llCode[ll];
     }   }
 
@@ -788,10 +789,11 @@
             const BYTE LLCode = llCodeTable[n];                             /* (7)*/  /* (7)*/
             FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode);      /* 17 */  /* 17 */
             if (MEM_32bits()) BIT_flushBits(&blockStream);                  /*  7 */
-            FSE_encodeSymbol(&blockStream, &stateLitLength, LLCode);        /* 17 */  /* 27 */
-            FSE_encodeSymbol(&blockStream, &stateOffsetBits, offCode);      /* 26 */  /* 36 */
+            FSE_encodeSymbol(&blockStream, &stateLitLength, LLCode);        /* 16 */  /* 26 */
+            FSE_encodeSymbol(&blockStream, &stateOffsetBits, offCode);      /* 25 */  /* 35 */
             if (MEM_32bits()) BIT_flushBits(&blockStream);                  /*  7 */
-            BIT_addBits(&blockStream, offset, nbBits);                      /* 31 */  /* 62 */   /* 24 bits max in 32-bits mode */
+            //BIT_flushBits(&blockStream);                                    /*  7 */  /*  7 */
+            BIT_addBits(&blockStream, offset, nbBits);                      /* 31 */  /* 61 */   /* 24 bits max in 32-bits mode */
             BIT_addBits(&blockStream, llTable[n], llBits[LLCode]);
             BIT_flushBits(&blockStream);                                    /*  7 */  /*  7 */
         } }
@@ -866,7 +868,8 @@
 
     /* literal Length */
 #if 1
-    *seqStorePtr->litLength++ = (U16)litLength;   /* take care of litLength >= 65535 ! */
+    if (litLength>=65535) { *(seqStorePtr->litLength++) = 65535; seqStorePtr->litLengthLong = (U32)litLength; }
+    else *seqStorePtr->litLength++ = (U16)litLength;
 #else
     if (litLength >= MaxLL) {
         *(seqStorePtr->litLength++) = MaxLL;
diff --git a/lib/zstd_internal.h b/lib/zstd_internal.h
index 7a3f213..2b830e3 100644
--- a/lib/zstd_internal.h
+++ b/lib/zstd_internal.h
@@ -110,7 +110,7 @@
 #define MaxLL  ((1<<LLbits) - 1)
 #define MaxOff ((1<<Offbits)- 1)
 #define MLFSELog   10
-#define LLFSELog   10
+#define LLFSELog    9
 #define OffFSELog   9
 #define MaxSeq MAX(MaxLL, MaxML)   /* Assumption : MaxOff < MaxLL,MaxML */
 
@@ -188,6 +188,7 @@
     BYTE* lit;
     U16*  litLengthStart;
     U16*  litLength;
+    U32   litLengthLong;
     BYTE* llCodeStart;
     BYTE* matchLengthStart;
     BYTE* matchLength;
diff --git a/programs/bench.c b/programs/bench.c
index e5b231d..7acfcca 100644
--- a/programs/bench.c
+++ b/programs/bench.c
@@ -188,8 +188,8 @@
                         const size_t* fileSizes, U32 nbFiles,
                         const void* dictBuffer, size_t dictBufferSize)
 {
-    const size_t blockSize = (g_blockSize ? g_blockSize : srcSize) + (!srcSize);   /* avoid div by 0 */
-    const U32 maxNbBlocks = (U32) ((srcSize + (blockSize-1)) / blockSize) + nbFiles;
+    size_t const blockSize = (g_blockSize ? g_blockSize : srcSize) + (!srcSize);   /* avoid div by 0 */
+    U32 const maxNbBlocks = (U32) ((srcSize + (blockSize-1)) / blockSize) + nbFiles;
     blockParam_t* const blockTable = (blockParam_t*) malloc(maxNbBlocks * sizeof(blockParam_t));
     const size_t maxCompressedSize = ZSTD_compressBound(srcSize) + (maxNbBlocks * 1024);   /* add some room for safety */
     void* const compressedBuffer = malloc(maxCompressedSize);