zstd_lazy.c 51 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116
  1. /*
  2. * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  3. * All rights reserved.
  4. *
  5. * This source code is licensed under both the BSD-style license (found in the
  6. * LICENSE file in the root directory of this source tree) and the GPLv2 (found
  7. * in the COPYING file in the root directory of this source tree).
  8. * You may select, at your option, one of the above-listed licenses.
  9. */
  10. #include "zstd_compress_internal.h"
  11. #include "zstd_lazy.h"
  12. /*-*************************************
  13. * Binary Tree search
  14. ***************************************/
  15. static void
  16. ZSTD_updateDUBT(ZSTD_matchState_t* ms,
  17. const BYTE* ip, const BYTE* iend,
  18. U32 mls)
  19. {
  20. const ZSTD_compressionParameters* const cParams = &ms->cParams;
  21. U32* const hashTable = ms->hashTable;
  22. U32 const hashLog = cParams->hashLog;
  23. U32* const bt = ms->chainTable;
  24. U32 const btLog = cParams->chainLog - 1;
  25. U32 const btMask = (1 << btLog) - 1;
  26. const BYTE* const base = ms->window.base;
  27. U32 const target = (U32)(ip - base);
  28. U32 idx = ms->nextToUpdate;
  29. if (idx != target)
  30. DEBUGLOG(7, "ZSTD_updateDUBT, from %u to %u (dictLimit:%u)",
  31. idx, target, ms->window.dictLimit);
  32. assert(ip + 8 <= iend); /* condition for ZSTD_hashPtr */
  33. (void)iend;
  34. assert(idx >= ms->window.dictLimit); /* condition for valid base+idx */
  35. for ( ; idx < target ; idx++) {
  36. size_t const h = ZSTD_hashPtr(base + idx, hashLog, mls); /* assumption : ip + 8 <= iend */
  37. U32 const matchIndex = hashTable[h];
  38. U32* const nextCandidatePtr = bt + 2*(idx&btMask);
  39. U32* const sortMarkPtr = nextCandidatePtr + 1;
  40. DEBUGLOG(8, "ZSTD_updateDUBT: insert %u", idx);
  41. hashTable[h] = idx; /* Update Hash Table */
  42. *nextCandidatePtr = matchIndex; /* update BT like a chain */
  43. *sortMarkPtr = ZSTD_DUBT_UNSORTED_MARK;
  44. }
  45. ms->nextToUpdate = target;
  46. }
  47. /** ZSTD_insertDUBT1() :
  48. * sort one already inserted but unsorted position
  49. * assumption : current >= btlow == (current - btmask)
  50. * doesn't fail */
  51. static void
  52. ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
  53. U32 current, const BYTE* inputEnd,
  54. U32 nbCompares, U32 btLow,
  55. const ZSTD_dictMode_e dictMode)
  56. {
  57. const ZSTD_compressionParameters* const cParams = &ms->cParams;
  58. U32* const bt = ms->chainTable;
  59. U32 const btLog = cParams->chainLog - 1;
  60. U32 const btMask = (1 << btLog) - 1;
  61. size_t commonLengthSmaller=0, commonLengthLarger=0;
  62. const BYTE* const base = ms->window.base;
  63. const BYTE* const dictBase = ms->window.dictBase;
  64. const U32 dictLimit = ms->window.dictLimit;
  65. const BYTE* const ip = (current>=dictLimit) ? base + current : dictBase + current;
  66. const BYTE* const iend = (current>=dictLimit) ? inputEnd : dictBase + dictLimit;
  67. const BYTE* const dictEnd = dictBase + dictLimit;
  68. const BYTE* const prefixStart = base + dictLimit;
  69. const BYTE* match;
  70. U32* smallerPtr = bt + 2*(current&btMask);
  71. U32* largerPtr = smallerPtr + 1;
  72. U32 matchIndex = *smallerPtr; /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */
  73. U32 dummy32; /* to be nullified at the end */
  74. U32 const windowValid = ms->window.lowLimit;
  75. U32 const maxDistance = 1U << cParams->windowLog;
  76. U32 const windowLow = (current - windowValid > maxDistance) ? current - maxDistance : windowValid;
  77. DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)",
  78. current, dictLimit, windowLow);
  79. assert(current >= btLow);
  80. assert(ip < iend); /* condition for ZSTD_count */
  81. while (nbCompares-- && (matchIndex > windowLow)) {
  82. U32* const nextPtr = bt + 2*(matchIndex & btMask);
  83. size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
  84. assert(matchIndex < current);
  85. /* note : all candidates are now supposed sorted,
  86. * but it's still possible to have nextPtr[1] == ZSTD_DUBT_UNSORTED_MARK
  87. * when a real index has the same value as ZSTD_DUBT_UNSORTED_MARK */
  88. if ( (dictMode != ZSTD_extDict)
  89. || (matchIndex+matchLength >= dictLimit) /* both in current segment*/
  90. || (current < dictLimit) /* both in extDict */) {
  91. const BYTE* const mBase = ( (dictMode != ZSTD_extDict)
  92. || (matchIndex+matchLength >= dictLimit)) ?
  93. base : dictBase;
  94. assert( (matchIndex+matchLength >= dictLimit) /* might be wrong if extDict is incorrectly set to 0 */
  95. || (current < dictLimit) );
  96. match = mBase + matchIndex;
  97. matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
  98. } else {
  99. match = dictBase + matchIndex;
  100. matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
  101. if (matchIndex+matchLength >= dictLimit)
  102. match = base + matchIndex; /* preparation for next read of match[matchLength] */
  103. }
  104. DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u : found %u common bytes ",
  105. current, matchIndex, (U32)matchLength);
  106. if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
  107. break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
  108. }
  109. if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */
  110. /* match is smaller than current */
  111. *smallerPtr = matchIndex; /* update smaller idx */
  112. commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
  113. if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */
  114. DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is smaller : next => %u",
  115. matchIndex, btLow, nextPtr[1]);
  116. smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */
  117. matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */
  118. } else {
  119. /* match is larger than current */
  120. *largerPtr = matchIndex;
  121. commonLengthLarger = matchLength;
  122. if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */
  123. DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is larger => %u",
  124. matchIndex, btLow, nextPtr[0]);
  125. largerPtr = nextPtr;
  126. matchIndex = nextPtr[0];
  127. } }
  128. *smallerPtr = *largerPtr = 0;
  129. }
  130. static size_t
  131. ZSTD_DUBT_findBetterDictMatch (
  132. ZSTD_matchState_t* ms,
  133. const BYTE* const ip, const BYTE* const iend,
  134. size_t* offsetPtr,
  135. size_t bestLength,
  136. U32 nbCompares,
  137. U32 const mls,
  138. const ZSTD_dictMode_e dictMode)
  139. {
  140. const ZSTD_matchState_t * const dms = ms->dictMatchState;
  141. const ZSTD_compressionParameters* const dmsCParams = &dms->cParams;
  142. const U32 * const dictHashTable = dms->hashTable;
  143. U32 const hashLog = dmsCParams->hashLog;
  144. size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
  145. U32 dictMatchIndex = dictHashTable[h];
  146. const BYTE* const base = ms->window.base;
  147. const BYTE* const prefixStart = base + ms->window.dictLimit;
  148. U32 const current = (U32)(ip-base);
  149. const BYTE* const dictBase = dms->window.base;
  150. const BYTE* const dictEnd = dms->window.nextSrc;
  151. U32 const dictHighLimit = (U32)(dms->window.nextSrc - dms->window.base);
  152. U32 const dictLowLimit = dms->window.lowLimit;
  153. U32 const dictIndexDelta = ms->window.lowLimit - dictHighLimit;
  154. U32* const dictBt = dms->chainTable;
  155. U32 const btLog = dmsCParams->chainLog - 1;
  156. U32 const btMask = (1 << btLog) - 1;
  157. U32 const btLow = (btMask >= dictHighLimit - dictLowLimit) ? dictLowLimit : dictHighLimit - btMask;
  158. size_t commonLengthSmaller=0, commonLengthLarger=0;
  159. (void)dictMode;
  160. assert(dictMode == ZSTD_dictMatchState);
  161. while (nbCompares-- && (dictMatchIndex > dictLowLimit)) {
  162. U32* const nextPtr = dictBt + 2*(dictMatchIndex & btMask);
  163. size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
  164. const BYTE* match = dictBase + dictMatchIndex;
  165. matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
  166. if (dictMatchIndex+matchLength >= dictHighLimit)
  167. match = base + dictMatchIndex + dictIndexDelta; /* to prepare for next usage of match[matchLength] */
  168. if (matchLength > bestLength) {
  169. U32 matchIndex = dictMatchIndex + dictIndexDelta;
  170. if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) {
  171. DEBUGLOG(9, "ZSTD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)",
  172. current, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, ZSTD_REP_MOVE + current - matchIndex, dictMatchIndex, matchIndex);
  173. bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex;
  174. }
  175. if (ip+matchLength == iend) { /* reached end of input : ip[matchLength] is not valid, no way to know if it's larger or smaller than match */
  176. break; /* drop, to guarantee consistency (miss a little bit of compression) */
  177. }
  178. }
  179. if (match[matchLength] < ip[matchLength]) {
  180. if (dictMatchIndex <= btLow) { break; } /* beyond tree size, stop the search */
  181. commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
  182. dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
  183. } else {
  184. /* match is larger than current */
  185. if (dictMatchIndex <= btLow) { break; } /* beyond tree size, stop the search */
  186. commonLengthLarger = matchLength;
  187. dictMatchIndex = nextPtr[0];
  188. }
  189. }
  190. if (bestLength >= MINMATCH) {
  191. U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
  192. DEBUGLOG(8, "ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
  193. current, (U32)bestLength, (U32)*offsetPtr, mIndex);
  194. }
  195. return bestLength;
  196. }
  197. static size_t
  198. ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
  199. const BYTE* const ip, const BYTE* const iend,
  200. size_t* offsetPtr,
  201. U32 const mls,
  202. const ZSTD_dictMode_e dictMode)
  203. {
  204. const ZSTD_compressionParameters* const cParams = &ms->cParams;
  205. U32* const hashTable = ms->hashTable;
  206. U32 const hashLog = cParams->hashLog;
  207. size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
  208. U32 matchIndex = hashTable[h];
  209. const BYTE* const base = ms->window.base;
  210. U32 const current = (U32)(ip-base);
  211. U32 const windowLow = ZSTD_getLowestMatchIndex(ms, current, cParams->windowLog);
  212. U32* const bt = ms->chainTable;
  213. U32 const btLog = cParams->chainLog - 1;
  214. U32 const btMask = (1 << btLog) - 1;
  215. U32 const btLow = (btMask >= current) ? 0 : current - btMask;
  216. U32 const unsortLimit = MAX(btLow, windowLow);
  217. U32* nextCandidate = bt + 2*(matchIndex&btMask);
  218. U32* unsortedMark = bt + 2*(matchIndex&btMask) + 1;
  219. U32 nbCompares = 1U << cParams->searchLog;
  220. U32 nbCandidates = nbCompares;
  221. U32 previousCandidate = 0;
  222. DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ", current);
  223. assert(ip <= iend-8); /* required for h calculation */
  224. /* reach end of unsorted candidates list */
  225. while ( (matchIndex > unsortLimit)
  226. && (*unsortedMark == ZSTD_DUBT_UNSORTED_MARK)
  227. && (nbCandidates > 1) ) {
  228. DEBUGLOG(8, "ZSTD_DUBT_findBestMatch: candidate %u is unsorted",
  229. matchIndex);
  230. *unsortedMark = previousCandidate; /* the unsortedMark becomes a reversed chain, to move up back to original position */
  231. previousCandidate = matchIndex;
  232. matchIndex = *nextCandidate;
  233. nextCandidate = bt + 2*(matchIndex&btMask);
  234. unsortedMark = bt + 2*(matchIndex&btMask) + 1;
  235. nbCandidates --;
  236. }
  237. /* nullify last candidate if it's still unsorted
  238. * simplification, detrimental to compression ratio, beneficial for speed */
  239. if ( (matchIndex > unsortLimit)
  240. && (*unsortedMark==ZSTD_DUBT_UNSORTED_MARK) ) {
  241. DEBUGLOG(7, "ZSTD_DUBT_findBestMatch: nullify last unsorted candidate %u",
  242. matchIndex);
  243. *nextCandidate = *unsortedMark = 0;
  244. }
  245. /* batch sort stacked candidates */
  246. matchIndex = previousCandidate;
  247. while (matchIndex) { /* will end on matchIndex == 0 */
  248. U32* const nextCandidateIdxPtr = bt + 2*(matchIndex&btMask) + 1;
  249. U32 const nextCandidateIdx = *nextCandidateIdxPtr;
  250. ZSTD_insertDUBT1(ms, matchIndex, iend,
  251. nbCandidates, unsortLimit, dictMode);
  252. matchIndex = nextCandidateIdx;
  253. nbCandidates++;
  254. }
  255. /* find longest match */
  256. { size_t commonLengthSmaller = 0, commonLengthLarger = 0;
  257. const BYTE* const dictBase = ms->window.dictBase;
  258. const U32 dictLimit = ms->window.dictLimit;
  259. const BYTE* const dictEnd = dictBase + dictLimit;
  260. const BYTE* const prefixStart = base + dictLimit;
  261. U32* smallerPtr = bt + 2*(current&btMask);
  262. U32* largerPtr = bt + 2*(current&btMask) + 1;
  263. U32 matchEndIdx = current + 8 + 1;
  264. U32 dummy32; /* to be nullified at the end */
  265. size_t bestLength = 0;
  266. matchIndex = hashTable[h];
  267. hashTable[h] = current; /* Update Hash Table */
  268. while (nbCompares-- && (matchIndex > windowLow)) {
  269. U32* const nextPtr = bt + 2*(matchIndex & btMask);
  270. size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
  271. const BYTE* match;
  272. if ((dictMode != ZSTD_extDict) || (matchIndex+matchLength >= dictLimit)) {
  273. match = base + matchIndex;
  274. matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
  275. } else {
  276. match = dictBase + matchIndex;
  277. matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
  278. if (matchIndex+matchLength >= dictLimit)
  279. match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
  280. }
  281. if (matchLength > bestLength) {
  282. if (matchLength > matchEndIdx - matchIndex)
  283. matchEndIdx = matchIndex + (U32)matchLength;
  284. if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) )
  285. bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex;
  286. if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
  287. if (dictMode == ZSTD_dictMatchState) {
  288. nbCompares = 0; /* in addition to avoiding checking any
  289. * further in this loop, make sure we
  290. * skip checking in the dictionary. */
  291. }
  292. break; /* drop, to guarantee consistency (miss a little bit of compression) */
  293. }
  294. }
  295. if (match[matchLength] < ip[matchLength]) {
  296. /* match is smaller than current */
  297. *smallerPtr = matchIndex; /* update smaller idx */
  298. commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
  299. if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
  300. smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
  301. matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
  302. } else {
  303. /* match is larger than current */
  304. *largerPtr = matchIndex;
  305. commonLengthLarger = matchLength;
  306. if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
  307. largerPtr = nextPtr;
  308. matchIndex = nextPtr[0];
  309. } }
  310. *smallerPtr = *largerPtr = 0;
  311. if (dictMode == ZSTD_dictMatchState && nbCompares) {
  312. bestLength = ZSTD_DUBT_findBetterDictMatch(
  313. ms, ip, iend,
  314. offsetPtr, bestLength, nbCompares,
  315. mls, dictMode);
  316. }
  317. assert(matchEndIdx > current+8); /* ensure nextToUpdate is increased */
  318. ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
  319. if (bestLength >= MINMATCH) {
  320. U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
  321. DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
  322. current, (U32)bestLength, (U32)*offsetPtr, mIndex);
  323. }
  324. return bestLength;
  325. }
  326. }
  327. /** ZSTD_BtFindBestMatch() : Tree updater, providing best match */
  328. FORCE_INLINE_TEMPLATE size_t
  329. ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms,
  330. const BYTE* const ip, const BYTE* const iLimit,
  331. size_t* offsetPtr,
  332. const U32 mls /* template */,
  333. const ZSTD_dictMode_e dictMode)
  334. {
  335. DEBUGLOG(7, "ZSTD_BtFindBestMatch");
  336. if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */
  337. ZSTD_updateDUBT(ms, ip, iLimit, mls);
  338. return ZSTD_DUBT_findBestMatch(ms, ip, iLimit, offsetPtr, mls, dictMode);
  339. }
  340. static size_t
  341. ZSTD_BtFindBestMatch_selectMLS ( ZSTD_matchState_t* ms,
  342. const BYTE* ip, const BYTE* const iLimit,
  343. size_t* offsetPtr)
  344. {
  345. switch(ms->cParams.minMatch)
  346. {
  347. default : /* includes case 3 */
  348. case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict);
  349. case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict);
  350. case 7 :
  351. case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict);
  352. }
  353. }
  354. static size_t ZSTD_BtFindBestMatch_dictMatchState_selectMLS (
  355. ZSTD_matchState_t* ms,
  356. const BYTE* ip, const BYTE* const iLimit,
  357. size_t* offsetPtr)
  358. {
  359. switch(ms->cParams.minMatch)
  360. {
  361. default : /* includes case 3 */
  362. case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState);
  363. case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState);
  364. case 7 :
  365. case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState);
  366. }
  367. }
  368. static size_t ZSTD_BtFindBestMatch_extDict_selectMLS (
  369. ZSTD_matchState_t* ms,
  370. const BYTE* ip, const BYTE* const iLimit,
  371. size_t* offsetPtr)
  372. {
  373. switch(ms->cParams.minMatch)
  374. {
  375. default : /* includes case 3 */
  376. case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict);
  377. case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict);
  378. case 7 :
  379. case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict);
  380. }
  381. }
  382. /* *********************************
  383. * Hash Chain
  384. ***********************************/
  385. #define NEXT_IN_CHAIN(d, mask) chainTable[(d) & (mask)]
  386. /* Update chains up to ip (excluded)
  387. Assumption : always within prefix (i.e. not within extDict) */
  388. static U32 ZSTD_insertAndFindFirstIndex_internal(
  389. ZSTD_matchState_t* ms,
  390. const ZSTD_compressionParameters* const cParams,
  391. const BYTE* ip, U32 const mls)
  392. {
  393. U32* const hashTable = ms->hashTable;
  394. const U32 hashLog = cParams->hashLog;
  395. U32* const chainTable = ms->chainTable;
  396. const U32 chainMask = (1 << cParams->chainLog) - 1;
  397. const BYTE* const base = ms->window.base;
  398. const U32 target = (U32)(ip - base);
  399. U32 idx = ms->nextToUpdate;
  400. while(idx < target) { /* catch up */
  401. size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls);
  402. NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
  403. hashTable[h] = idx;
  404. idx++;
  405. }
  406. ms->nextToUpdate = target;
  407. return hashTable[ZSTD_hashPtr(ip, hashLog, mls)];
  408. }
  409. U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) {
  410. const ZSTD_compressionParameters* const cParams = &ms->cParams;
  411. return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch);
  412. }
  413. /* inlining is important to hardwire a hot branch (template emulation) */
  414. FORCE_INLINE_TEMPLATE
  415. size_t ZSTD_HcFindBestMatch_generic (
  416. ZSTD_matchState_t* ms,
  417. const BYTE* const ip, const BYTE* const iLimit,
  418. size_t* offsetPtr,
  419. const U32 mls, const ZSTD_dictMode_e dictMode)
  420. {
  421. const ZSTD_compressionParameters* const cParams = &ms->cParams;
  422. U32* const chainTable = ms->chainTable;
  423. const U32 chainSize = (1 << cParams->chainLog);
  424. const U32 chainMask = chainSize-1;
  425. const BYTE* const base = ms->window.base;
  426. const BYTE* const dictBase = ms->window.dictBase;
  427. const U32 dictLimit = ms->window.dictLimit;
  428. const BYTE* const prefixStart = base + dictLimit;
  429. const BYTE* const dictEnd = dictBase + dictLimit;
  430. const U32 current = (U32)(ip-base);
  431. const U32 maxDistance = 1U << cParams->windowLog;
  432. const U32 lowestValid = ms->window.lowLimit;
  433. const U32 withinMaxDistance = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid;
  434. const U32 isDictionary = (ms->loadedDictEnd != 0);
  435. const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance;
  436. const U32 minChain = current > chainSize ? current - chainSize : 0;
  437. U32 nbAttempts = 1U << cParams->searchLog;
  438. size_t ml=4-1;
  439. /* HC4 match finder */
  440. U32 matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls);
  441. for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) {
  442. size_t currentMl=0;
  443. if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
  444. const BYTE* const match = base + matchIndex;
  445. assert(matchIndex >= dictLimit); /* ensures this is true if dictMode != ZSTD_extDict */
  446. if (match[ml] == ip[ml]) /* potentially better */
  447. currentMl = ZSTD_count(ip, match, iLimit);
  448. } else {
  449. const BYTE* const match = dictBase + matchIndex;
  450. assert(match+4 <= dictEnd);
  451. if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */
  452. currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4;
  453. }
  454. /* save best solution */
  455. if (currentMl > ml) {
  456. ml = currentMl;
  457. *offsetPtr = current - matchIndex + ZSTD_REP_MOVE;
  458. if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
  459. }
  460. if (matchIndex <= minChain) break;
  461. matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
  462. }
  463. if (dictMode == ZSTD_dictMatchState) {
  464. const ZSTD_matchState_t* const dms = ms->dictMatchState;
  465. const U32* const dmsChainTable = dms->chainTable;
  466. const U32 dmsChainSize = (1 << dms->cParams.chainLog);
  467. const U32 dmsChainMask = dmsChainSize - 1;
  468. const U32 dmsLowestIndex = dms->window.dictLimit;
  469. const BYTE* const dmsBase = dms->window.base;
  470. const BYTE* const dmsEnd = dms->window.nextSrc;
  471. const U32 dmsSize = (U32)(dmsEnd - dmsBase);
  472. const U32 dmsIndexDelta = dictLimit - dmsSize;
  473. const U32 dmsMinChain = dmsSize > dmsChainSize ? dmsSize - dmsChainSize : 0;
  474. matchIndex = dms->hashTable[ZSTD_hashPtr(ip, dms->cParams.hashLog, mls)];
  475. for ( ; (matchIndex>dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) {
  476. size_t currentMl=0;
  477. const BYTE* const match = dmsBase + matchIndex;
  478. assert(match+4 <= dmsEnd);
  479. if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */
  480. currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dmsEnd, prefixStart) + 4;
  481. /* save best solution */
  482. if (currentMl > ml) {
  483. ml = currentMl;
  484. *offsetPtr = current - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE;
  485. if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
  486. }
  487. if (matchIndex <= dmsMinChain) break;
  488. matchIndex = dmsChainTable[matchIndex & dmsChainMask];
  489. }
  490. }
  491. return ml;
  492. }
  493. FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS (
  494. ZSTD_matchState_t* ms,
  495. const BYTE* ip, const BYTE* const iLimit,
  496. size_t* offsetPtr)
  497. {
  498. switch(ms->cParams.minMatch)
  499. {
  500. default : /* includes case 3 */
  501. case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict);
  502. case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict);
  503. case 7 :
  504. case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict);
  505. }
  506. }
  507. static size_t ZSTD_HcFindBestMatch_dictMatchState_selectMLS (
  508. ZSTD_matchState_t* ms,
  509. const BYTE* ip, const BYTE* const iLimit,
  510. size_t* offsetPtr)
  511. {
  512. switch(ms->cParams.minMatch)
  513. {
  514. default : /* includes case 3 */
  515. case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState);
  516. case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState);
  517. case 7 :
  518. case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState);
  519. }
  520. }
  521. FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
  522. ZSTD_matchState_t* ms,
  523. const BYTE* ip, const BYTE* const iLimit,
  524. size_t* offsetPtr)
  525. {
  526. switch(ms->cParams.minMatch)
  527. {
  528. default : /* includes case 3 */
  529. case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict);
  530. case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict);
  531. case 7 :
  532. case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict);
  533. }
  534. }
  535. /* *******************************
  536. * Common parser - lazy strategy
  537. *********************************/
  538. typedef enum { search_hashChain, search_binaryTree } searchMethod_e;
  539. FORCE_INLINE_TEMPLATE size_t
  540. ZSTD_compressBlock_lazy_generic(
  541. ZSTD_matchState_t* ms, seqStore_t* seqStore,
  542. U32 rep[ZSTD_REP_NUM],
  543. const void* src, size_t srcSize,
  544. const searchMethod_e searchMethod, const U32 depth,
  545. ZSTD_dictMode_e const dictMode)
  546. {
  547. const BYTE* const istart = (const BYTE*)src;
  548. const BYTE* ip = istart;
  549. const BYTE* anchor = istart;
  550. const BYTE* const iend = istart + srcSize;
  551. const BYTE* const ilimit = iend - 8;
  552. const BYTE* const base = ms->window.base;
  553. const U32 prefixLowestIndex = ms->window.dictLimit;
  554. const BYTE* const prefixLowest = base + prefixLowestIndex;
  555. typedef size_t (*searchMax_f)(
  556. ZSTD_matchState_t* ms,
  557. const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
  558. searchMax_f const searchMax = dictMode == ZSTD_dictMatchState ?
  559. (searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_dictMatchState_selectMLS
  560. : ZSTD_HcFindBestMatch_dictMatchState_selectMLS) :
  561. (searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_selectMLS
  562. : ZSTD_HcFindBestMatch_selectMLS);
  563. U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0;
  564. const ZSTD_matchState_t* const dms = ms->dictMatchState;
  565. const U32 dictLowestIndex = dictMode == ZSTD_dictMatchState ?
  566. dms->window.dictLimit : 0;
  567. const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ?
  568. dms->window.base : NULL;
  569. const BYTE* const dictLowest = dictMode == ZSTD_dictMatchState ?
  570. dictBase + dictLowestIndex : NULL;
  571. const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ?
  572. dms->window.nextSrc : NULL;
  573. const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ?
  574. prefixLowestIndex - (U32)(dictEnd - dictBase) :
  575. 0;
  576. const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictLowest);
  577. /* init */
  578. ip += (dictAndPrefixLength == 0);
  579. if (dictMode == ZSTD_noDict) {
  580. U32 const maxRep = (U32)(ip - prefixLowest);
  581. if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
  582. if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
  583. }
  584. if (dictMode == ZSTD_dictMatchState) {
  585. /* dictMatchState repCode checks don't currently handle repCode == 0
  586. * disabling. */
  587. assert(offset_1 <= dictAndPrefixLength);
  588. assert(offset_2 <= dictAndPrefixLength);
  589. }
  590. /* Match Loop */
  591. while (ip < ilimit) {
  592. size_t matchLength=0;
  593. size_t offset=0;
  594. const BYTE* start=ip+1;
  595. /* check repCode */
  596. if (dictMode == ZSTD_dictMatchState) {
  597. const U32 repIndex = (U32)(ip - base) + 1 - offset_1;
  598. const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
  599. && repIndex < prefixLowestIndex) ?
  600. dictBase + (repIndex - dictIndexDelta) :
  601. base + repIndex;
  602. if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
  603. && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
  604. const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
  605. matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
  606. if (depth==0) goto _storeSequence;
  607. }
  608. }
  609. if ( dictMode == ZSTD_noDict
  610. && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
  611. matchLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
  612. if (depth==0) goto _storeSequence;
  613. }
  614. /* first search (depth 0) */
  615. { size_t offsetFound = 999999999;
  616. size_t const ml2 = searchMax(ms, ip, iend, &offsetFound);
  617. if (ml2 > matchLength)
  618. matchLength = ml2, start = ip, offset=offsetFound;
  619. }
  620. if (matchLength < 4) {
  621. ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */
  622. continue;
  623. }
  624. /* let's try to find a better solution */
  625. if (depth>=1)
  626. while (ip<ilimit) {
  627. ip ++;
  628. if ( (dictMode == ZSTD_noDict)
  629. && (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
  630. size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
  631. int const gain2 = (int)(mlRep * 3);
  632. int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
  633. if ((mlRep >= 4) && (gain2 > gain1))
  634. matchLength = mlRep, offset = 0, start = ip;
  635. }
  636. if (dictMode == ZSTD_dictMatchState) {
  637. const U32 repIndex = (U32)(ip - base) - offset_1;
  638. const BYTE* repMatch = repIndex < prefixLowestIndex ?
  639. dictBase + (repIndex - dictIndexDelta) :
  640. base + repIndex;
  641. if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
  642. && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
  643. const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
  644. size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
  645. int const gain2 = (int)(mlRep * 3);
  646. int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
  647. if ((mlRep >= 4) && (gain2 > gain1))
  648. matchLength = mlRep, offset = 0, start = ip;
  649. }
  650. }
  651. { size_t offset2=999999999;
  652. size_t const ml2 = searchMax(ms, ip, iend, &offset2);
  653. int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
  654. int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
  655. if ((ml2 >= 4) && (gain2 > gain1)) {
  656. matchLength = ml2, offset = offset2, start = ip;
  657. continue; /* search a better one */
  658. } }
  659. /* let's find an even better one */
  660. if ((depth==2) && (ip<ilimit)) {
  661. ip ++;
  662. if ( (dictMode == ZSTD_noDict)
  663. && (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
  664. size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
  665. int const gain2 = (int)(mlRep * 4);
  666. int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
  667. if ((mlRep >= 4) && (gain2 > gain1))
  668. matchLength = mlRep, offset = 0, start = ip;
  669. }
  670. if (dictMode == ZSTD_dictMatchState) {
  671. const U32 repIndex = (U32)(ip - base) - offset_1;
  672. const BYTE* repMatch = repIndex < prefixLowestIndex ?
  673. dictBase + (repIndex - dictIndexDelta) :
  674. base + repIndex;
  675. if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
  676. && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
  677. const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
  678. size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
  679. int const gain2 = (int)(mlRep * 4);
  680. int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
  681. if ((mlRep >= 4) && (gain2 > gain1))
  682. matchLength = mlRep, offset = 0, start = ip;
  683. }
  684. }
  685. { size_t offset2=999999999;
  686. size_t const ml2 = searchMax(ms, ip, iend, &offset2);
  687. int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
  688. int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
  689. if ((ml2 >= 4) && (gain2 > gain1)) {
  690. matchLength = ml2, offset = offset2, start = ip;
  691. continue;
  692. } } }
  693. break; /* nothing found : store previous solution */
  694. }
  695. /* NOTE:
  696. * start[-offset+ZSTD_REP_MOVE-1] is undefined behavior.
  697. * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which
  698. * overflows the pointer, which is undefined behavior.
  699. */
  700. /* catch up */
  701. if (offset) {
  702. if (dictMode == ZSTD_noDict) {
  703. while ( ((start > anchor) & (start - (offset-ZSTD_REP_MOVE) > prefixLowest))
  704. && (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) ) /* only search for offset within prefix */
  705. { start--; matchLength++; }
  706. }
  707. if (dictMode == ZSTD_dictMatchState) {
  708. U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
  709. const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex;
  710. const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest;
  711. while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */
  712. }
  713. offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
  714. }
  715. /* store sequence */
  716. _storeSequence:
  717. { size_t const litLength = start - anchor;
  718. ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
  719. anchor = ip = start + matchLength;
  720. }
  721. /* check immediate repcode */
  722. if (dictMode == ZSTD_dictMatchState) {
  723. while (ip <= ilimit) {
  724. U32 const current2 = (U32)(ip-base);
  725. U32 const repIndex = current2 - offset_2;
  726. const BYTE* repMatch = dictMode == ZSTD_dictMatchState
  727. && repIndex < prefixLowestIndex ?
  728. dictBase - dictIndexDelta + repIndex :
  729. base + repIndex;
  730. if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex) >= 3 /* intentional overflow */)
  731. && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
  732. const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend;
  733. matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4;
  734. offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset_2 <=> offset_1 */
  735. ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
  736. ip += matchLength;
  737. anchor = ip;
  738. continue;
  739. }
  740. break;
  741. }
  742. }
  743. if (dictMode == ZSTD_noDict) {
  744. while ( ((ip <= ilimit) & (offset_2>0))
  745. && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) {
  746. /* store sequence */
  747. matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
  748. offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
  749. ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
  750. ip += matchLength;
  751. anchor = ip;
  752. continue; /* faster when present ... (?) */
  753. } } }
  754. /* Save reps for next block */
  755. rep[0] = offset_1 ? offset_1 : savedOffset;
  756. rep[1] = offset_2 ? offset_2 : savedOffset;
  757. /* Return the last literals size */
  758. return (size_t)(iend - anchor);
  759. }
  760. size_t ZSTD_compressBlock_btlazy2(
  761. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  762. void const* src, size_t srcSize)
  763. {
  764. return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict);
  765. }
  766. size_t ZSTD_compressBlock_lazy2(
  767. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  768. void const* src, size_t srcSize)
  769. {
  770. return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict);
  771. }
  772. size_t ZSTD_compressBlock_lazy(
  773. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  774. void const* src, size_t srcSize)
  775. {
  776. return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict);
  777. }
  778. size_t ZSTD_compressBlock_greedy(
  779. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  780. void const* src, size_t srcSize)
  781. {
  782. return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict);
  783. }
  784. size_t ZSTD_compressBlock_btlazy2_dictMatchState(
  785. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  786. void const* src, size_t srcSize)
  787. {
  788. return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState);
  789. }
  790. size_t ZSTD_compressBlock_lazy2_dictMatchState(
  791. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  792. void const* src, size_t srcSize)
  793. {
  794. return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState);
  795. }
  796. size_t ZSTD_compressBlock_lazy_dictMatchState(
  797. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  798. void const* src, size_t srcSize)
  799. {
  800. return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState);
  801. }
  802. size_t ZSTD_compressBlock_greedy_dictMatchState(
  803. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  804. void const* src, size_t srcSize)
  805. {
  806. return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState);
  807. }
  808. FORCE_INLINE_TEMPLATE
  809. size_t ZSTD_compressBlock_lazy_extDict_generic(
  810. ZSTD_matchState_t* ms, seqStore_t* seqStore,
  811. U32 rep[ZSTD_REP_NUM],
  812. const void* src, size_t srcSize,
  813. const searchMethod_e searchMethod, const U32 depth)
  814. {
  815. const BYTE* const istart = (const BYTE*)src;
  816. const BYTE* ip = istart;
  817. const BYTE* anchor = istart;
  818. const BYTE* const iend = istart + srcSize;
  819. const BYTE* const ilimit = iend - 8;
  820. const BYTE* const base = ms->window.base;
  821. const U32 dictLimit = ms->window.dictLimit;
  822. const U32 lowestIndex = ms->window.lowLimit;
  823. const BYTE* const prefixStart = base + dictLimit;
  824. const BYTE* const dictBase = ms->window.dictBase;
  825. const BYTE* const dictEnd = dictBase + dictLimit;
  826. const BYTE* const dictStart = dictBase + lowestIndex;
  827. typedef size_t (*searchMax_f)(
  828. ZSTD_matchState_t* ms,
  829. const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
  830. searchMax_f searchMax = searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS;
  831. U32 offset_1 = rep[0], offset_2 = rep[1];
  832. /* init */
  833. ip += (ip == prefixStart);
  834. /* Match Loop */
  835. while (ip < ilimit) {
  836. size_t matchLength=0;
  837. size_t offset=0;
  838. const BYTE* start=ip+1;
  839. U32 current = (U32)(ip-base);
  840. /* check repCode */
  841. { const U32 repIndex = (U32)(current+1 - offset_1);
  842. const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
  843. const BYTE* const repMatch = repBase + repIndex;
  844. if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
  845. if (MEM_read32(ip+1) == MEM_read32(repMatch)) {
  846. /* repcode detected we should take it */
  847. const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
  848. matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repEnd, prefixStart) + 4;
  849. if (depth==0) goto _storeSequence;
  850. } }
  851. /* first search (depth 0) */
  852. { size_t offsetFound = 999999999;
  853. size_t const ml2 = searchMax(ms, ip, iend, &offsetFound);
  854. if (ml2 > matchLength)
  855. matchLength = ml2, start = ip, offset=offsetFound;
  856. }
  857. if (matchLength < 4) {
  858. ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */
  859. continue;
  860. }
  861. /* let's try to find a better solution */
  862. if (depth>=1)
  863. while (ip<ilimit) {
  864. ip ++;
  865. current++;
  866. /* check repCode */
  867. if (offset) {
  868. const U32 repIndex = (U32)(current - offset_1);
  869. const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
  870. const BYTE* const repMatch = repBase + repIndex;
  871. if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
  872. if (MEM_read32(ip) == MEM_read32(repMatch)) {
  873. /* repcode detected */
  874. const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
  875. size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
  876. int const gain2 = (int)(repLength * 3);
  877. int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
  878. if ((repLength >= 4) && (gain2 > gain1))
  879. matchLength = repLength, offset = 0, start = ip;
  880. } }
  881. /* search match, depth 1 */
  882. { size_t offset2=999999999;
  883. size_t const ml2 = searchMax(ms, ip, iend, &offset2);
  884. int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
  885. int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
  886. if ((ml2 >= 4) && (gain2 > gain1)) {
  887. matchLength = ml2, offset = offset2, start = ip;
  888. continue; /* search a better one */
  889. } }
  890. /* let's find an even better one */
  891. if ((depth==2) && (ip<ilimit)) {
  892. ip ++;
  893. current++;
  894. /* check repCode */
  895. if (offset) {
  896. const U32 repIndex = (U32)(current - offset_1);
  897. const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
  898. const BYTE* const repMatch = repBase + repIndex;
  899. if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
  900. if (MEM_read32(ip) == MEM_read32(repMatch)) {
  901. /* repcode detected */
  902. const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
  903. size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
  904. int const gain2 = (int)(repLength * 4);
  905. int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
  906. if ((repLength >= 4) && (gain2 > gain1))
  907. matchLength = repLength, offset = 0, start = ip;
  908. } }
  909. /* search match, depth 2 */
  910. { size_t offset2=999999999;
  911. size_t const ml2 = searchMax(ms, ip, iend, &offset2);
  912. int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
  913. int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
  914. if ((ml2 >= 4) && (gain2 > gain1)) {
  915. matchLength = ml2, offset = offset2, start = ip;
  916. continue;
  917. } } }
  918. break; /* nothing found : store previous solution */
  919. }
  920. /* catch up */
  921. if (offset) {
  922. U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
  923. const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex;
  924. const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart;
  925. while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */
  926. offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
  927. }
  928. /* store sequence */
  929. _storeSequence:
  930. { size_t const litLength = start - anchor;
  931. ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
  932. anchor = ip = start + matchLength;
  933. }
  934. /* check immediate repcode */
  935. while (ip <= ilimit) {
  936. const U32 repIndex = (U32)((ip-base) - offset_2);
  937. const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
  938. const BYTE* const repMatch = repBase + repIndex;
  939. if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
  940. if (MEM_read32(ip) == MEM_read32(repMatch)) {
  941. /* repcode detected we should take it */
  942. const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
  943. matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
  944. offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */
  945. ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
  946. ip += matchLength;
  947. anchor = ip;
  948. continue; /* faster when present ... (?) */
  949. }
  950. break;
  951. } }
  952. /* Save reps for next block */
  953. rep[0] = offset_1;
  954. rep[1] = offset_2;
  955. /* Return the last literals size */
  956. return (size_t)(iend - anchor);
  957. }
  958. size_t ZSTD_compressBlock_greedy_extDict(
  959. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  960. void const* src, size_t srcSize)
  961. {
  962. return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0);
  963. }
  964. size_t ZSTD_compressBlock_lazy_extDict(
  965. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  966. void const* src, size_t srcSize)
  967. {
  968. return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1);
  969. }
  970. size_t ZSTD_compressBlock_lazy2_extDict(
  971. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  972. void const* src, size_t srcSize)
  973. {
  974. return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2);
  975. }
  976. size_t ZSTD_compressBlock_btlazy2_extDict(
  977. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  978. void const* src, size_t srcSize)
  979. {
  980. return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2);
  981. }