/*-*****************************
  *     Decompression functions
  *******************************/
-/* LZ4_decompress_generic() :
- * This generic decompression function cover all use cases.
- * It shall be instantiated several times, using different sets of directives
- * Note that it is important this generic function is really inlined,
+
+#define DEBUGLOG(l, ...) {}    /* disabled */
+
+#ifndef assert
+#define assert(condition) ((void)0)
+#endif
+
+/*
+ * LZ4_decompress_generic() :
+ * This generic decompression function covers all use cases.
+ * It shall be instantiated several times, using different sets of directives.
+ * Note that it is important for performance that this function really get inlined,
  * in order to remove useless branches during compilation optimization.
  */
 static FORCE_INLINE int LZ4_decompress_generic(
-        const char * const source,
-        char * const dest,
-        int inputSize,
+        const char * const src,
+        char * const dst,
+        int srcSize,
                /*
                 * If endOnInput == endOnInputSize,
-                * this value is the max size of Output Buffer.
+                * this value is `dstCapacity`
                 */
         int outputSize,
         /* endOnOutputSize, endOnInputSize */
-        int endOnInput,
+        endCondition_directive endOnInput,
         /* full, partial */
-        int partialDecoding,
-        /* only used if partialDecoding == partial */
-        int targetOutputSize,
+        earlyEnd_directive partialDecoding,
         /* noDict, withPrefix64k, usingExtDict */
-        int dict,
-        /* == dest when no prefix */
+        dict_directive dict,
+        /* always <= dst, == dst when no prefix */
         const BYTE * const lowPrefix,
         /* only if dict == usingExtDict */
         const BYTE * const dictStart,
         const size_t dictSize
         )
 {
-       /* Local Variables */
-       const BYTE *ip = (const BYTE *) source;
-       const BYTE * const iend = ip + inputSize;
+       const BYTE *ip = (const BYTE *) src;
+       const BYTE * const iend = ip + srcSize;
 
-       BYTE *op = (BYTE *) dest;
+       BYTE *op = (BYTE *) dst;
        BYTE * const oend = op + outputSize;
        BYTE *cpy;
-       BYTE *oexit = op + targetOutputSize;
-       const BYTE * const lowLimit = lowPrefix - dictSize;
 
        const BYTE * const dictEnd = (const BYTE *)dictStart + dictSize;
-       static const unsigned int dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 };
-       static const int dec64table[] = { 0, 0, 0, -1, 0, 1, 2, 3 };
+       static const unsigned int inc32table[8] = {0, 1, 2, 1, 0, 4, 4, 4};
+       static const int dec64table[8] = {0, 0, 0, -1, -4, 1, 2, 3};
 
        const int safeDecode = (endOnInput == endOnInputSize);
        const int checkOffset = ((safeDecode) && (dictSize < (int)(64 * KB)));
 
+       /* Set up the "end" pointers for the shortcut. */
+       const BYTE *const shortiend = iend -
+               (endOnInput ? 14 : 8) /*maxLL*/ - 2 /*offset*/;
+       const BYTE *const shortoend = oend -
+               (endOnInput ? 14 : 8) /*maxLL*/ - 18 /*maxML*/;
+
+       DEBUGLOG(5, "%s (srcSize:%i, dstSize:%i)", __func__,
+                srcSize, outputSize);
+
        /* Special cases */
-       /* targetOutputSize too high => decode everything */
-       if ((partialDecoding) && (oexit > oend - MFLIMIT))
-               oexit = oend - MFLIMIT;
+       assert(lowPrefix <= op);
+       assert(src != NULL);
 
        /* Empty output buffer */
        if ((endOnInput) && (unlikely(outputSize == 0)))
-               return ((inputSize == 1) && (*ip == 0)) ? 0 : -1;
+               return ((srcSize == 1) && (*ip == 0)) ? 0 : -1;
 
        if ((!endOnInput) && (unlikely(outputSize == 0)))
                return (*ip == 0 ? 1 : -1);
 
+       if ((endOnInput) && unlikely(srcSize == 0))
+               return -1;
+
        /* Main Loop : decode sequences */
        while (1) {
                size_t length;
 
                /* get literal length */
                unsigned int const token = *ip++;
-
                length = token>>ML_BITS;
 
+               /* ip < iend before the increment */
+               assert(!endOnInput || ip <= iend);
+
+               /*
+                * A two-stage shortcut for the most common case:
+                * 1) If the literal length is 0..14, and there is enough
+                * space, enter the shortcut and copy 16 bytes on behalf
+                * of the literals (in the fast mode, only 8 bytes can be
+                * safely copied this way).
+                * 2) Further if the match length is 4..18, copy 18 bytes
+                * in a similar manner; but we ensure that there's enough
+                * space in the output for those 18 bytes earlier, upon
+                * entering the shortcut (in other words, there is a
+                * combined check for both stages).
+                */
+               if ((endOnInput ? length != RUN_MASK : length <= 8)
+                  /*
+                   * strictly "less than" on input, to re-enter
+                   * the loop with at least one byte
+                   */
+                  && likely((endOnInput ? ip < shortiend : 1) &
+                            (op <= shortoend))) {
+                       /* Copy the literals */
+                       memcpy(op, ip, endOnInput ? 16 : 8);
+                       op += length; ip += length;
+
+                       /*
+                        * The second stage:
+                        * prepare for match copying, decode full info.
+                        * If it doesn't work out, the info won't be wasted.
+                        */
+                       length = token & ML_MASK; /* match length */
+                       offset = LZ4_readLE16(ip);
+                       ip += 2;
+                       match = op - offset;
+                       assert(match <= op); /* check overflow */
+
+                       /* Do not deal with overlapping matches. */
+                       if ((length != ML_MASK) &&
+                           (offset >= 8) &&
+                           (dict == withPrefix64k || match >= lowPrefix)) {
+                               /* Copy the match. */
+                               memcpy(op + 0, match + 0, 8);
+                               memcpy(op + 8, match + 8, 8);
+                               memcpy(op + 16, match + 16, 2);
+                               op += length + MINMATCH;
+                               /* Both stages worked, load the next token. */
+                               continue;
+                       }
+
+                       /*
+                        * The second stage didn't work out, but the info
+                        * is ready. Propel it right to the point of match
+                        * copying.
+                        */
+                       goto _copy_match;
+               }
+
+               /* decode literal length */
                if (length == RUN_MASK) {
                        unsigned int s;
 
+                       if (unlikely(endOnInput ? ip >= iend - RUN_MASK : 0)) {
+                               /* overflow detection */
+                               goto _output_error;
+                       }
                        do {
                                s = *ip++;
                                length += s;
                                : 1) & (s == 255));
 
                        if ((safeDecode)
-                               && unlikely(
-                                       (size_t)(op + length) < (size_t)(op))) {
+                           && unlikely((uptrval)(op) +
+                                       length < (uptrval)(op))) {
                                /* overflow detection */
                                goto _output_error;
                        }
                        if ((safeDecode)
-                               && unlikely(
-                                       (size_t)(ip + length) < (size_t)(ip))) {
+                           && unlikely((uptrval)(ip) +
+                                       length < (uptrval)(ip))) {
                                /* overflow detection */
                                goto _output_error;
                        }
 
                /* copy literals */
                cpy = op + length;
-               if (((endOnInput) && ((cpy > (partialDecoding ? oexit : oend - MFLIMIT))
+               LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
+
+               if (((endOnInput) && ((cpy > oend - MFLIMIT)
                        || (ip + length > iend - (2 + 1 + LASTLITERALS))))
                        || ((!endOnInput) && (cpy > oend - WILDCOPYLENGTH))) {
                        if (partialDecoding) {
                                if (cpy > oend) {
                                        /*
-                                        * Error :
-                                        * write attempt beyond end of output buffer
+                                        * Partial decoding :
+                                        * stop in the middle of literal segment
                                         */
-                                       goto _output_error;
+                                       cpy = oend;
+                                       length = oend - op;
                                }
                                if ((endOnInput)
                                        && (ip + length > iend)) {
                        memcpy(op, ip, length);
                        ip += length;
                        op += length;
+
                        /* Necessarily EOF, due to parsing restrictions */
-                       break;
+                       if (!partialDecoding || (cpy == oend))
+                               break;
+               } else {
+                       /* may overwrite up to WILDCOPYLENGTH beyond cpy */
+                       LZ4_wildCopy(op, ip, cpy);
+                       ip += length;
+                       op = cpy;
                }
 
-               LZ4_wildCopy(op, ip, cpy);
-               ip += length;
-               op = cpy;
-
                /* get offset */
                offset = LZ4_readLE16(ip);
                ip += 2;
                match = op - offset;
 
-               if ((checkOffset) && (unlikely(match < lowLimit))) {
+               /* get matchlength */
+               length = token & ML_MASK;
+
+_copy_match:
+               if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) {
                        /* Error : offset outside buffers */
                        goto _output_error;
                }
 
                /* costs ~1%; silence an msan warning when offset == 0 */
-               LZ4_write32(op, (U32)offset);
+               /*
+                * note : when partialDecoding, there is no guarantee that
+                * at least 4 bytes remain available in output buffer
+                */
+               if (!partialDecoding) {
+                       assert(oend > op);
+                       assert(oend - op >= 4);
+
+                       LZ4_write32(op, (U32)offset);
+               }
 
-               /* get matchlength */
-               length = token & ML_MASK;
                if (length == ML_MASK) {
                        unsigned int s;
 
 
                        if ((safeDecode)
                                && unlikely(
-                                       (size_t)(op + length) < (size_t)op)) {
+                                       (uptrval)(op) + length < (uptrval)op)) {
                                /* overflow detection */
                                goto _output_error;
                        }
 
                length += MINMATCH;
 
-               /* check external dictionary */
+               /* match starting within external dictionary */
                if ((dict == usingExtDict) && (match < lowPrefix)) {
                        if (unlikely(op + length > oend - LASTLITERALS)) {
                                /* doesn't respect parsing restriction */
-                               goto _output_error;
+                               if (!partialDecoding)
+                                       goto _output_error;
+                               length = min(length, (size_t)(oend - op));
                        }
 
                        if (length <= (size_t)(lowPrefix - match)) {
                                /*
-                                * match can be copied as a single segment
-                                * from external dictionary
+                                * match fits entirely within external
+                                * dictionary : just copy
                                 */
                                memmove(op, dictEnd - (lowPrefix - match),
                                        length);
                                op += length;
                        } else {
                                /*
-                                * match encompass external
+                                * match stretches into both external
                                 * dictionary and current block
                                 */
                                size_t const copySize = (size_t)(lowPrefix - match);
 
                                memcpy(op, dictEnd - copySize, copySize);
                                op += copySize;
-
                                if (restSize > (size_t)(op - lowPrefix)) {
                                        /* overlap copy */
                                        BYTE * const endOfMatch = op + restSize;
                                        op += restSize;
                                }
                        }
-
                        continue;
                }
 
                /* copy match within block */
                cpy = op + length;
 
-               if (unlikely(offset < 8)) {
-                       const int dec64 = dec64table[offset];
+               /*
+                * partialDecoding :
+                * may not respect endBlock parsing restrictions
+                */
+               assert(op <= oend);
+               if (partialDecoding &&
+                   (cpy > oend - MATCH_SAFEGUARD_DISTANCE)) {
+                       size_t const mlen = min(length, (size_t)(oend - op));
+                       const BYTE * const matchEnd = match + mlen;
+                       BYTE * const copyEnd = op + mlen;
+
+                       if (matchEnd > op) {
+                               /* overlap copy */
+                               while (op < copyEnd)
+                                       *op++ = *match++;
+                       } else {
+                               memcpy(op, match, mlen);
+                       }
+                       op = copyEnd;
+                       if (op == oend)
+                               break;
+                       continue;
+               }
 
+               if (unlikely(offset < 8)) {
                        op[0] = match[0];
                        op[1] = match[1];
                        op[2] = match[2];
                        op[3] = match[3];
-                       match += dec32table[offset];
+                       match += inc32table[offset];
                        memcpy(op + 4, match, 4);
-                       match -= dec64;
+                       match -= dec64table[offset];
                } else {
                        LZ4_copy8(op, match);
                        match += 8;
 
                op += 8;
 
-               if (unlikely(cpy > oend - 12)) {
+               if (unlikely(cpy > oend - MATCH_SAFEGUARD_DISTANCE)) {
                        BYTE * const oCopyLimit = oend - (WILDCOPYLENGTH - 1);
 
                        if (cpy > oend - LASTLITERALS) {
                                match += oCopyLimit - op;
                                op = oCopyLimit;
                        }
-
                        while (op < cpy)
                                *op++ = *match++;
                } else {
                        LZ4_copy8(op, match);
-
                        if (length > 16)
                                LZ4_wildCopy(op + 8, match + 8, cpy);
                }
-
-               op = cpy; /* correction */
+               op = cpy; /* wildcopy correction */
        }
 
        /* end of decoding */
        if (endOnInput) {
                /* Nb of output bytes decoded */
-               return (int) (((char *)op) - dest);
+               return (int) (((char *)op) - dst);
        } else {
                /* Nb of input bytes read */
-               return (int) (((const char *)ip) - source);
+               return (int) (((const char *)ip) - src);
        }
 
        /* Overflow error detected */
 _output_error:
-       return -1;
+       return (int) (-(((const char *)ip) - src)) - 1;
 }
 
 int LZ4_decompress_safe(const char *source, char *dest,
        int compressedSize, int maxDecompressedSize)
 {
-       return LZ4_decompress_generic(source, dest, compressedSize,
-               maxDecompressedSize, endOnInputSize, full, 0,
-               noDict, (BYTE *)dest, NULL, 0);
+       return LZ4_decompress_generic(source, dest,
+                                     compressedSize, maxDecompressedSize,
+                                     endOnInputSize, decode_full_block,
+                                     noDict, (BYTE *)dest, NULL, 0);
 }
 
-int LZ4_decompress_safe_partial(const char *source, char *dest,
-       int compressedSize, int targetOutputSize, int maxDecompressedSize)
+int LZ4_decompress_safe_partial(const char *src, char *dst,
+       int compressedSize, int targetOutputSize, int dstCapacity)
 {
-       return LZ4_decompress_generic(source, dest, compressedSize,
-               maxDecompressedSize, endOnInputSize, partial,
-               targetOutputSize, noDict, (BYTE *)dest, NULL, 0);
+       dstCapacity = min(targetOutputSize, dstCapacity);
+       return LZ4_decompress_generic(src, dst, compressedSize, dstCapacity,
+                                     endOnInputSize, partial_decode,
+                                     noDict, (BYTE *)dst, NULL, 0);
 }
 
 int LZ4_decompress_fast(const char *source, char *dest, int originalSize)
 {
        return LZ4_decompress_generic(source, dest, 0, originalSize,
-               endOnOutputSize, full, 0, withPrefix64k,
-               (BYTE *)(dest - 64 * KB), NULL, 64 * KB);
+                                     endOnOutputSize, decode_full_block,
+                                     withPrefix64k,
+                                     (BYTE *)dest - 64 * KB, NULL, 0);
+}
+
+/* ===== Instantiate a few more decoding cases, used more than once. ===== */
+
+int LZ4_decompress_safe_withPrefix64k(const char *source, char *dest,
+                                     int compressedSize, int maxOutputSize)
+{
+       return LZ4_decompress_generic(source, dest,
+                                     compressedSize, maxOutputSize,
+                                     endOnInputSize, decode_full_block,
+                                     withPrefix64k,
+                                     (BYTE *)dest - 64 * KB, NULL, 0);
+}
+
+static int LZ4_decompress_safe_withSmallPrefix(const char *source, char *dest,
+                                              int compressedSize,
+                                              int maxOutputSize,
+                                              size_t prefixSize)
+{
+       return LZ4_decompress_generic(source, dest,
+                                     compressedSize, maxOutputSize,
+                                     endOnInputSize, decode_full_block,
+                                     noDict,
+                                     (BYTE *)dest - prefixSize, NULL, 0);
+}
+
+int LZ4_decompress_safe_forceExtDict(const char *source, char *dest,
+                                    int compressedSize, int maxOutputSize,
+                                    const void *dictStart, size_t dictSize)
+{
+       return LZ4_decompress_generic(source, dest,
+                                     compressedSize, maxOutputSize,
+                                     endOnInputSize, decode_full_block,
+                                     usingExtDict, (BYTE *)dest,
+                                     (const BYTE *)dictStart, dictSize);
 }
 
+static int LZ4_decompress_fast_extDict(const char *source, char *dest,
+                                      int originalSize,
+                                      const void *dictStart, size_t dictSize)
+{
+       return LZ4_decompress_generic(source, dest,
+                                     0, originalSize,
+                                     endOnOutputSize, decode_full_block,
+                                     usingExtDict, (BYTE *)dest,
+                                     (const BYTE *)dictStart, dictSize);
+}
+
+/*
+ * The "double dictionary" mode, for use with e.g. ring buffers: the first part
+ * of the dictionary is passed as prefix, and the second via dictStart + dictSize.
+ * These routines are used only once, in LZ4_decompress_*_continue().
+ */
+static FORCE_INLINE
+int LZ4_decompress_safe_doubleDict(const char *source, char *dest,
+                                  int compressedSize, int maxOutputSize,
+                                  size_t prefixSize,
+                                  const void *dictStart, size_t dictSize)
+{
+       return LZ4_decompress_generic(source, dest,
+                                     compressedSize, maxOutputSize,
+                                     endOnInputSize, decode_full_block,
+                                     usingExtDict, (BYTE *)dest - prefixSize,
+                                     (const BYTE *)dictStart, dictSize);
+}
+
+static FORCE_INLINE
+int LZ4_decompress_fast_doubleDict(const char *source, char *dest,
+                                  int originalSize, size_t prefixSize,
+                                  const void *dictStart, size_t dictSize)
+{
+       return LZ4_decompress_generic(source, dest,
+                                     0, originalSize,
+                                     endOnOutputSize, decode_full_block,
+                                     usingExtDict, (BYTE *)dest - prefixSize,
+                                     (const BYTE *)dictStart, dictSize);
+}
+
+/* ===== streaming decompression functions ===== */
+
 int LZ4_setStreamDecode(LZ4_streamDecode_t *LZ4_streamDecode,
        const char *dictionary, int dictSize)
 {
-       LZ4_streamDecode_t_internal *lz4sd = (LZ4_streamDecode_t_internal *) LZ4_streamDecode;
+       LZ4_streamDecode_t_internal *lz4sd =
+               &LZ4_streamDecode->internal_donotuse;
 
        lz4sd->prefixSize = (size_t) dictSize;
        lz4sd->prefixEnd = (const BYTE *) dictionary + dictSize;
 int LZ4_decompress_safe_continue(LZ4_streamDecode_t *LZ4_streamDecode,
        const char *source, char *dest, int compressedSize, int maxOutputSize)
 {
-       LZ4_streamDecode_t_internal *lz4sd = &LZ4_streamDecode->internal_donotuse;
+       LZ4_streamDecode_t_internal *lz4sd =
+               &LZ4_streamDecode->internal_donotuse;
        int result;
 
-       if (lz4sd->prefixEnd == (BYTE *)dest) {
-               result = LZ4_decompress_generic(source, dest,
-                       compressedSize,
-                       maxOutputSize,
-                       endOnInputSize, full, 0,
-                       usingExtDict, lz4sd->prefixEnd - lz4sd->prefixSize,
-                       lz4sd->externalDict,
-                       lz4sd->extDictSize);
-
+       if (lz4sd->prefixSize == 0) {
+               /* The first call, no dictionary yet. */
+               assert(lz4sd->extDictSize == 0);
+               result = LZ4_decompress_safe(source, dest,
+                       compressedSize, maxOutputSize);
+               if (result <= 0)
+                       return result;
+               lz4sd->prefixSize = result;
+               lz4sd->prefixEnd = (BYTE *)dest + result;
+       } else if (lz4sd->prefixEnd == (BYTE *)dest) {
+               /* They're rolling the current segment. */
+               if (lz4sd->prefixSize >= 64 * KB - 1)
+                       result = LZ4_decompress_safe_withPrefix64k(source, dest,
+                               compressedSize, maxOutputSize);
+               else if (lz4sd->extDictSize == 0)
+                       result = LZ4_decompress_safe_withSmallPrefix(source,
+                               dest, compressedSize, maxOutputSize,
+                               lz4sd->prefixSize);
+               else
+                       result = LZ4_decompress_safe_doubleDict(source, dest,
+                               compressedSize, maxOutputSize,
+                               lz4sd->prefixSize,
+                               lz4sd->externalDict, lz4sd->extDictSize);
                if (result <= 0)
                        return result;
-
                lz4sd->prefixSize += result;
-               lz4sd->prefixEnd        += result;
+               lz4sd->prefixEnd  += result;
        } else {
+               /*
+                * The buffer wraps around, or they're
+                * switching to another buffer.
+                */
                lz4sd->extDictSize = lz4sd->prefixSize;
                lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
-               result = LZ4_decompress_generic(source, dest,
+               result = LZ4_decompress_safe_forceExtDict(source, dest,
                        compressedSize, maxOutputSize,
-                       endOnInputSize, full, 0,
-                       usingExtDict, (BYTE *)dest,
                        lz4sd->externalDict, lz4sd->extDictSize);
                if (result <= 0)
                        return result;
                lz4sd->prefixSize = result;
-               lz4sd->prefixEnd        = (BYTE *)dest + result;
+               lz4sd->prefixEnd  = (BYTE *)dest + result;
        }
 
        return result;
        LZ4_streamDecode_t_internal *lz4sd = &LZ4_streamDecode->internal_donotuse;
        int result;
 
-       if (lz4sd->prefixEnd == (BYTE *)dest) {
-               result = LZ4_decompress_generic(source, dest, 0, originalSize,
-                       endOnOutputSize, full, 0,
-                       usingExtDict,
-                       lz4sd->prefixEnd - lz4sd->prefixSize,
-                       lz4sd->externalDict, lz4sd->extDictSize);
-
+       if (lz4sd->prefixSize == 0) {
+               assert(lz4sd->extDictSize == 0);
+               result = LZ4_decompress_fast(source, dest, originalSize);
+               if (result <= 0)
+                       return result;
+               lz4sd->prefixSize = originalSize;
+               lz4sd->prefixEnd = (BYTE *)dest + originalSize;
+       } else if (lz4sd->prefixEnd == (BYTE *)dest) {
+               if (lz4sd->prefixSize >= 64 * KB - 1 ||
+                   lz4sd->extDictSize == 0)
+                       result = LZ4_decompress_fast(source, dest,
+                                                    originalSize);
+               else
+                       result = LZ4_decompress_fast_doubleDict(source, dest,
+                               originalSize, lz4sd->prefixSize,
+                               lz4sd->externalDict, lz4sd->extDictSize);
                if (result <= 0)
                        return result;
-
                lz4sd->prefixSize += originalSize;
-               lz4sd->prefixEnd        += originalSize;
+               lz4sd->prefixEnd  += originalSize;
        } else {
                lz4sd->extDictSize = lz4sd->prefixSize;
                lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
-               result = LZ4_decompress_generic(source, dest, 0, originalSize,
-                       endOnOutputSize, full, 0,
-                       usingExtDict, (BYTE *)dest,
-                       lz4sd->externalDict, lz4sd->extDictSize);
+               result = LZ4_decompress_fast_extDict(source, dest,
+                       originalSize, lz4sd->externalDict, lz4sd->extDictSize);
                if (result <= 0)
                        return result;
                lz4sd->prefixSize = originalSize;
-               lz4sd->prefixEnd        = (BYTE *)dest + originalSize;
+               lz4sd->prefixEnd = (BYTE *)dest + originalSize;
        }
-
        return result;
 }
 
-/*
- * Advanced decoding functions :
- * *_usingDict() :
- * These decoding functions work the same as "_continue" ones,
- * the dictionary must be explicitly provided within parameters
- */
-static FORCE_INLINE int LZ4_decompress_usingDict_generic(const char *source,
-       char *dest, int compressedSize, int maxOutputSize, int safe,
-       const char *dictStart, int dictSize)
+int LZ4_decompress_safe_usingDict(const char *source, char *dest,
+                                 int compressedSize, int maxOutputSize,
+                                 const char *dictStart, int dictSize)
 {
        if (dictSize == 0)
-               return LZ4_decompress_generic(source, dest,
-                       compressedSize, maxOutputSize, safe, full, 0,
-                       noDict, (BYTE *)dest, NULL, 0);
-       if (dictStart + dictSize == dest) {
-               if (dictSize >= (int)(64 * KB - 1))
-                       return LZ4_decompress_generic(source, dest,
-                               compressedSize, maxOutputSize, safe, full, 0,
-                               withPrefix64k, (BYTE *)dest - 64 * KB, NULL, 0);
-               return LZ4_decompress_generic(source, dest, compressedSize,
-                       maxOutputSize, safe, full, 0, noDict,
-                       (BYTE *)dest - dictSize, NULL, 0);
+               return LZ4_decompress_safe(source, dest,
+                                          compressedSize, maxOutputSize);
+       if (dictStart+dictSize == dest) {
+               if (dictSize >= 64 * KB - 1)
+                       return LZ4_decompress_safe_withPrefix64k(source, dest,
+                               compressedSize, maxOutputSize);
+               return LZ4_decompress_safe_withSmallPrefix(source, dest,
+                       compressedSize, maxOutputSize, dictSize);
        }
-       return LZ4_decompress_generic(source, dest, compressedSize,
-               maxOutputSize, safe, full, 0, usingExtDict,
-               (BYTE *)dest, (const BYTE *)dictStart, dictSize);
-}
-
-int LZ4_decompress_safe_usingDict(const char *source, char *dest,
-       int compressedSize, int maxOutputSize,
-       const char *dictStart, int dictSize)
-{
-       return LZ4_decompress_usingDict_generic(source, dest,
-               compressedSize, maxOutputSize, 1, dictStart, dictSize);
+       return LZ4_decompress_safe_forceExtDict(source, dest,
+               compressedSize, maxOutputSize, dictStart, dictSize);
 }
 
 int LZ4_decompress_fast_usingDict(const char *source, char *dest,
-       int originalSize, const char *dictStart, int dictSize)
+                                 int originalSize,
+                                 const char *dictStart, int dictSize)
 {
-       return LZ4_decompress_usingDict_generic(source, dest, 0,
-               originalSize, 0, dictStart, dictSize);
+       if (dictSize == 0 || dictStart + dictSize == dest)
+               return LZ4_decompress_fast(source, dest, originalSize);
+
+       return LZ4_decompress_fast_extDict(source, dest, originalSize,
+               dictStart, dictSize);
 }
 
 #ifndef STATIC