zstd_lazy.c 62 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413
  1. /*
  2. * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
  3. * All rights reserved.
  4. *
  5. * This source code is licensed under both the BSD-style license (found in the
  6. * LICENSE file in the root directory of this source tree) and the GPLv2 (found
  7. * in the COPYING file in the root directory of this source tree).
  8. * You may select, at your option, one of the above-listed licenses.
  9. */
  10. #include "zstd_compress_internal.h"
  11. #include "zstd_lazy.h"
  12. /*-*************************************
  13. * Binary Tree search
  14. ***************************************/
  15. static void
  16. ZSTD_updateDUBT(ZSTD_matchState_t* ms,
  17. const BYTE* ip, const BYTE* iend,
  18. U32 mls)
  19. {
  20. const ZSTD_compressionParameters* const cParams = &ms->cParams;
  21. U32* const hashTable = ms->hashTable;
  22. U32 const hashLog = cParams->hashLog;
  23. U32* const bt = ms->chainTable;
  24. U32 const btLog = cParams->chainLog - 1;
  25. U32 const btMask = (1 << btLog) - 1;
  26. const BYTE* const base = ms->window.base;
  27. U32 const target = (U32)(ip - base);
  28. U32 idx = ms->nextToUpdate;
  29. if (idx != target)
  30. DEBUGLOG(7, "ZSTD_updateDUBT, from %u to %u (dictLimit:%u)",
  31. idx, target, ms->window.dictLimit);
  32. assert(ip + 8 <= iend); /* condition for ZSTD_hashPtr */
  33. (void)iend;
  34. assert(idx >= ms->window.dictLimit); /* condition for valid base+idx */
  35. for ( ; idx < target ; idx++) {
  36. size_t const h = ZSTD_hashPtr(base + idx, hashLog, mls); /* assumption : ip + 8 <= iend */
  37. U32 const matchIndex = hashTable[h];
  38. U32* const nextCandidatePtr = bt + 2*(idx&btMask);
  39. U32* const sortMarkPtr = nextCandidatePtr + 1;
  40. DEBUGLOG(8, "ZSTD_updateDUBT: insert %u", idx);
  41. hashTable[h] = idx; /* Update Hash Table */
  42. *nextCandidatePtr = matchIndex; /* update BT like a chain */
  43. *sortMarkPtr = ZSTD_DUBT_UNSORTED_MARK;
  44. }
  45. ms->nextToUpdate = target;
  46. }
  47. /** ZSTD_insertDUBT1() :
  48. * sort one already inserted but unsorted position
  49. * assumption : curr >= btlow == (curr - btmask)
  50. * doesn't fail */
  51. static void
  52. ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
  53. U32 curr, const BYTE* inputEnd,
  54. U32 nbCompares, U32 btLow,
  55. const ZSTD_dictMode_e dictMode)
  56. {
  57. const ZSTD_compressionParameters* const cParams = &ms->cParams;
  58. U32* const bt = ms->chainTable;
  59. U32 const btLog = cParams->chainLog - 1;
  60. U32 const btMask = (1 << btLog) - 1;
  61. size_t commonLengthSmaller=0, commonLengthLarger=0;
  62. const BYTE* const base = ms->window.base;
  63. const BYTE* const dictBase = ms->window.dictBase;
  64. const U32 dictLimit = ms->window.dictLimit;
  65. const BYTE* const ip = (curr>=dictLimit) ? base + curr : dictBase + curr;
  66. const BYTE* const iend = (curr>=dictLimit) ? inputEnd : dictBase + dictLimit;
  67. const BYTE* const dictEnd = dictBase + dictLimit;
  68. const BYTE* const prefixStart = base + dictLimit;
  69. const BYTE* match;
  70. U32* smallerPtr = bt + 2*(curr&btMask);
  71. U32* largerPtr = smallerPtr + 1;
  72. U32 matchIndex = *smallerPtr; /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */
  73. U32 dummy32; /* to be nullified at the end */
  74. U32 const windowValid = ms->window.lowLimit;
  75. U32 const maxDistance = 1U << cParams->windowLog;
  76. U32 const windowLow = (curr - windowValid > maxDistance) ? curr - maxDistance : windowValid;
  77. DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)",
  78. curr, dictLimit, windowLow);
  79. assert(curr >= btLow);
  80. assert(ip < iend); /* condition for ZSTD_count */
  81. while (nbCompares-- && (matchIndex > windowLow)) {
  82. U32* const nextPtr = bt + 2*(matchIndex & btMask);
  83. size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
  84. assert(matchIndex < curr);
  85. /* note : all candidates are now supposed sorted,
  86. * but it's still possible to have nextPtr[1] == ZSTD_DUBT_UNSORTED_MARK
  87. * when a real index has the same value as ZSTD_DUBT_UNSORTED_MARK */
  88. if ( (dictMode != ZSTD_extDict)
  89. || (matchIndex+matchLength >= dictLimit) /* both in current segment*/
  90. || (curr < dictLimit) /* both in extDict */) {
  91. const BYTE* const mBase = ( (dictMode != ZSTD_extDict)
  92. || (matchIndex+matchLength >= dictLimit)) ?
  93. base : dictBase;
  94. assert( (matchIndex+matchLength >= dictLimit) /* might be wrong if extDict is incorrectly set to 0 */
  95. || (curr < dictLimit) );
  96. match = mBase + matchIndex;
  97. matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
  98. } else {
  99. match = dictBase + matchIndex;
  100. matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
  101. if (matchIndex+matchLength >= dictLimit)
  102. match = base + matchIndex; /* preparation for next read of match[matchLength] */
  103. }
  104. DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u : found %u common bytes ",
  105. curr, matchIndex, (U32)matchLength);
  106. if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
  107. break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
  108. }
  109. if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */
  110. /* match is smaller than current */
  111. *smallerPtr = matchIndex; /* update smaller idx */
  112. commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
  113. if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */
  114. DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is smaller : next => %u",
  115. matchIndex, btLow, nextPtr[1]);
  116. smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */
  117. matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */
  118. } else {
  119. /* match is larger than current */
  120. *largerPtr = matchIndex;
  121. commonLengthLarger = matchLength;
  122. if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */
  123. DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is larger => %u",
  124. matchIndex, btLow, nextPtr[0]);
  125. largerPtr = nextPtr;
  126. matchIndex = nextPtr[0];
  127. } }
  128. *smallerPtr = *largerPtr = 0;
  129. }
  130. static size_t
  131. ZSTD_DUBT_findBetterDictMatch (
  132. ZSTD_matchState_t* ms,
  133. const BYTE* const ip, const BYTE* const iend,
  134. size_t* offsetPtr,
  135. size_t bestLength,
  136. U32 nbCompares,
  137. U32 const mls,
  138. const ZSTD_dictMode_e dictMode)
  139. {
  140. const ZSTD_matchState_t * const dms = ms->dictMatchState;
  141. const ZSTD_compressionParameters* const dmsCParams = &dms->cParams;
  142. const U32 * const dictHashTable = dms->hashTable;
  143. U32 const hashLog = dmsCParams->hashLog;
  144. size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
  145. U32 dictMatchIndex = dictHashTable[h];
  146. const BYTE* const base = ms->window.base;
  147. const BYTE* const prefixStart = base + ms->window.dictLimit;
  148. U32 const curr = (U32)(ip-base);
  149. const BYTE* const dictBase = dms->window.base;
  150. const BYTE* const dictEnd = dms->window.nextSrc;
  151. U32 const dictHighLimit = (U32)(dms->window.nextSrc - dms->window.base);
  152. U32 const dictLowLimit = dms->window.lowLimit;
  153. U32 const dictIndexDelta = ms->window.lowLimit - dictHighLimit;
  154. U32* const dictBt = dms->chainTable;
  155. U32 const btLog = dmsCParams->chainLog - 1;
  156. U32 const btMask = (1 << btLog) - 1;
  157. U32 const btLow = (btMask >= dictHighLimit - dictLowLimit) ? dictLowLimit : dictHighLimit - btMask;
  158. size_t commonLengthSmaller=0, commonLengthLarger=0;
  159. (void)dictMode;
  160. assert(dictMode == ZSTD_dictMatchState);
  161. while (nbCompares-- && (dictMatchIndex > dictLowLimit)) {
  162. U32* const nextPtr = dictBt + 2*(dictMatchIndex & btMask);
  163. size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
  164. const BYTE* match = dictBase + dictMatchIndex;
  165. matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
  166. if (dictMatchIndex+matchLength >= dictHighLimit)
  167. match = base + dictMatchIndex + dictIndexDelta; /* to prepare for next usage of match[matchLength] */
  168. if (matchLength > bestLength) {
  169. U32 matchIndex = dictMatchIndex + dictIndexDelta;
  170. if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) {
  171. DEBUGLOG(9, "ZSTD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)",
  172. curr, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, ZSTD_REP_MOVE + curr - matchIndex, dictMatchIndex, matchIndex);
  173. bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + curr - matchIndex;
  174. }
  175. if (ip+matchLength == iend) { /* reached end of input : ip[matchLength] is not valid, no way to know if it's larger or smaller than match */
  176. break; /* drop, to guarantee consistency (miss a little bit of compression) */
  177. }
  178. }
  179. if (match[matchLength] < ip[matchLength]) {
  180. if (dictMatchIndex <= btLow) { break; } /* beyond tree size, stop the search */
  181. commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
  182. dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
  183. } else {
  184. /* match is larger than current */
  185. if (dictMatchIndex <= btLow) { break; } /* beyond tree size, stop the search */
  186. commonLengthLarger = matchLength;
  187. dictMatchIndex = nextPtr[0];
  188. }
  189. }
  190. if (bestLength >= MINMATCH) {
  191. U32 const mIndex = curr - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
  192. DEBUGLOG(8, "ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
  193. curr, (U32)bestLength, (U32)*offsetPtr, mIndex);
  194. }
  195. return bestLength;
  196. }
  197. static size_t
  198. ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
  199. const BYTE* const ip, const BYTE* const iend,
  200. size_t* offsetPtr,
  201. U32 const mls,
  202. const ZSTD_dictMode_e dictMode)
  203. {
  204. const ZSTD_compressionParameters* const cParams = &ms->cParams;
  205. U32* const hashTable = ms->hashTable;
  206. U32 const hashLog = cParams->hashLog;
  207. size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
  208. U32 matchIndex = hashTable[h];
  209. const BYTE* const base = ms->window.base;
  210. U32 const curr = (U32)(ip-base);
  211. U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog);
  212. U32* const bt = ms->chainTable;
  213. U32 const btLog = cParams->chainLog - 1;
  214. U32 const btMask = (1 << btLog) - 1;
  215. U32 const btLow = (btMask >= curr) ? 0 : curr - btMask;
  216. U32 const unsortLimit = MAX(btLow, windowLow);
  217. U32* nextCandidate = bt + 2*(matchIndex&btMask);
  218. U32* unsortedMark = bt + 2*(matchIndex&btMask) + 1;
  219. U32 nbCompares = 1U << cParams->searchLog;
  220. U32 nbCandidates = nbCompares;
  221. U32 previousCandidate = 0;
  222. DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ", curr);
  223. assert(ip <= iend-8); /* required for h calculation */
  224. assert(dictMode != ZSTD_dedicatedDictSearch);
  225. /* reach end of unsorted candidates list */
  226. while ( (matchIndex > unsortLimit)
  227. && (*unsortedMark == ZSTD_DUBT_UNSORTED_MARK)
  228. && (nbCandidates > 1) ) {
  229. DEBUGLOG(8, "ZSTD_DUBT_findBestMatch: candidate %u is unsorted",
  230. matchIndex);
  231. *unsortedMark = previousCandidate; /* the unsortedMark becomes a reversed chain, to move up back to original position */
  232. previousCandidate = matchIndex;
  233. matchIndex = *nextCandidate;
  234. nextCandidate = bt + 2*(matchIndex&btMask);
  235. unsortedMark = bt + 2*(matchIndex&btMask) + 1;
  236. nbCandidates --;
  237. }
  238. /* nullify last candidate if it's still unsorted
  239. * simplification, detrimental to compression ratio, beneficial for speed */
  240. if ( (matchIndex > unsortLimit)
  241. && (*unsortedMark==ZSTD_DUBT_UNSORTED_MARK) ) {
  242. DEBUGLOG(7, "ZSTD_DUBT_findBestMatch: nullify last unsorted candidate %u",
  243. matchIndex);
  244. *nextCandidate = *unsortedMark = 0;
  245. }
  246. /* batch sort stacked candidates */
  247. matchIndex = previousCandidate;
  248. while (matchIndex) { /* will end on matchIndex == 0 */
  249. U32* const nextCandidateIdxPtr = bt + 2*(matchIndex&btMask) + 1;
  250. U32 const nextCandidateIdx = *nextCandidateIdxPtr;
  251. ZSTD_insertDUBT1(ms, matchIndex, iend,
  252. nbCandidates, unsortLimit, dictMode);
  253. matchIndex = nextCandidateIdx;
  254. nbCandidates++;
  255. }
  256. /* find longest match */
  257. { size_t commonLengthSmaller = 0, commonLengthLarger = 0;
  258. const BYTE* const dictBase = ms->window.dictBase;
  259. const U32 dictLimit = ms->window.dictLimit;
  260. const BYTE* const dictEnd = dictBase + dictLimit;
  261. const BYTE* const prefixStart = base + dictLimit;
  262. U32* smallerPtr = bt + 2*(curr&btMask);
  263. U32* largerPtr = bt + 2*(curr&btMask) + 1;
  264. U32 matchEndIdx = curr + 8 + 1;
  265. U32 dummy32; /* to be nullified at the end */
  266. size_t bestLength = 0;
  267. matchIndex = hashTable[h];
  268. hashTable[h] = curr; /* Update Hash Table */
  269. while (nbCompares-- && (matchIndex > windowLow)) {
  270. U32* const nextPtr = bt + 2*(matchIndex & btMask);
  271. size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
  272. const BYTE* match;
  273. if ((dictMode != ZSTD_extDict) || (matchIndex+matchLength >= dictLimit)) {
  274. match = base + matchIndex;
  275. matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
  276. } else {
  277. match = dictBase + matchIndex;
  278. matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
  279. if (matchIndex+matchLength >= dictLimit)
  280. match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
  281. }
  282. if (matchLength > bestLength) {
  283. if (matchLength > matchEndIdx - matchIndex)
  284. matchEndIdx = matchIndex + (U32)matchLength;
  285. if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) )
  286. bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + curr - matchIndex;
  287. if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
  288. if (dictMode == ZSTD_dictMatchState) {
  289. nbCompares = 0; /* in addition to avoiding checking any
  290. * further in this loop, make sure we
  291. * skip checking in the dictionary. */
  292. }
  293. break; /* drop, to guarantee consistency (miss a little bit of compression) */
  294. }
  295. }
  296. if (match[matchLength] < ip[matchLength]) {
  297. /* match is smaller than current */
  298. *smallerPtr = matchIndex; /* update smaller idx */
  299. commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
  300. if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
  301. smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
  302. matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
  303. } else {
  304. /* match is larger than current */
  305. *largerPtr = matchIndex;
  306. commonLengthLarger = matchLength;
  307. if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
  308. largerPtr = nextPtr;
  309. matchIndex = nextPtr[0];
  310. } }
  311. *smallerPtr = *largerPtr = 0;
  312. if (dictMode == ZSTD_dictMatchState && nbCompares) {
  313. bestLength = ZSTD_DUBT_findBetterDictMatch(
  314. ms, ip, iend,
  315. offsetPtr, bestLength, nbCompares,
  316. mls, dictMode);
  317. }
  318. assert(matchEndIdx > curr+8); /* ensure nextToUpdate is increased */
  319. ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
  320. if (bestLength >= MINMATCH) {
  321. U32 const mIndex = curr - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
  322. DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
  323. curr, (U32)bestLength, (U32)*offsetPtr, mIndex);
  324. }
  325. return bestLength;
  326. }
  327. }
  328. /** ZSTD_BtFindBestMatch() : Tree updater, providing best match */
  329. FORCE_INLINE_TEMPLATE size_t
  330. ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms,
  331. const BYTE* const ip, const BYTE* const iLimit,
  332. size_t* offsetPtr,
  333. const U32 mls /* template */,
  334. const ZSTD_dictMode_e dictMode)
  335. {
  336. DEBUGLOG(7, "ZSTD_BtFindBestMatch");
  337. if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */
  338. ZSTD_updateDUBT(ms, ip, iLimit, mls);
  339. return ZSTD_DUBT_findBestMatch(ms, ip, iLimit, offsetPtr, mls, dictMode);
  340. }
  341. static size_t
  342. ZSTD_BtFindBestMatch_selectMLS ( ZSTD_matchState_t* ms,
  343. const BYTE* ip, const BYTE* const iLimit,
  344. size_t* offsetPtr)
  345. {
  346. switch(ms->cParams.minMatch)
  347. {
  348. default : /* includes case 3 */
  349. case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict);
  350. case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict);
  351. case 7 :
  352. case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict);
  353. }
  354. }
  355. static size_t ZSTD_BtFindBestMatch_dictMatchState_selectMLS (
  356. ZSTD_matchState_t* ms,
  357. const BYTE* ip, const BYTE* const iLimit,
  358. size_t* offsetPtr)
  359. {
  360. switch(ms->cParams.minMatch)
  361. {
  362. default : /* includes case 3 */
  363. case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState);
  364. case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState);
  365. case 7 :
  366. case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState);
  367. }
  368. }
  369. static size_t ZSTD_BtFindBestMatch_extDict_selectMLS (
  370. ZSTD_matchState_t* ms,
  371. const BYTE* ip, const BYTE* const iLimit,
  372. size_t* offsetPtr)
  373. {
  374. switch(ms->cParams.minMatch)
  375. {
  376. default : /* includes case 3 */
  377. case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict);
  378. case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict);
  379. case 7 :
  380. case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict);
  381. }
  382. }
  383. /* *********************************
  384. * Hash Chain
  385. ***********************************/
  386. #define NEXT_IN_CHAIN(d, mask) chainTable[(d) & (mask)]
  387. /* Update chains up to ip (excluded)
  388. Assumption : always within prefix (i.e. not within extDict) */
  389. FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal(
  390. ZSTD_matchState_t* ms,
  391. const ZSTD_compressionParameters* const cParams,
  392. const BYTE* ip, U32 const mls)
  393. {
  394. U32* const hashTable = ms->hashTable;
  395. const U32 hashLog = cParams->hashLog;
  396. U32* const chainTable = ms->chainTable;
  397. const U32 chainMask = (1 << cParams->chainLog) - 1;
  398. const BYTE* const base = ms->window.base;
  399. const U32 target = (U32)(ip - base);
  400. U32 idx = ms->nextToUpdate;
  401. while(idx < target) { /* catch up */
  402. size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls);
  403. NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
  404. hashTable[h] = idx;
  405. idx++;
  406. }
  407. ms->nextToUpdate = target;
  408. return hashTable[ZSTD_hashPtr(ip, hashLog, mls)];
  409. }
  410. U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) {
  411. const ZSTD_compressionParameters* const cParams = &ms->cParams;
  412. return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch);
  413. }
  414. void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip)
  415. {
  416. const BYTE* const base = ms->window.base;
  417. U32 const target = (U32)(ip - base);
  418. U32* const hashTable = ms->hashTable;
  419. U32* const chainTable = ms->chainTable;
  420. U32 const chainSize = 1 << ms->cParams.chainLog;
  421. U32 idx = ms->nextToUpdate;
  422. U32 const minChain = chainSize < target ? target - chainSize : idx;
  423. U32 const bucketSize = 1 << ZSTD_LAZY_DDSS_BUCKET_LOG;
  424. U32 const cacheSize = bucketSize - 1;
  425. U32 const chainAttempts = (1 << ms->cParams.searchLog) - cacheSize;
  426. U32 const chainLimit = chainAttempts > 255 ? 255 : chainAttempts;
  427. /* We know the hashtable is oversized by a factor of `bucketSize`.
  428. * We are going to temporarily pretend `bucketSize == 1`, keeping only a
  429. * single entry. We will use the rest of the space to construct a temporary
  430. * chaintable.
  431. */
  432. U32 const hashLog = ms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG;
  433. U32* const tmpHashTable = hashTable;
  434. U32* const tmpChainTable = hashTable + ((size_t)1 << hashLog);
  435. U32 const tmpChainSize = ((1 << ZSTD_LAZY_DDSS_BUCKET_LOG) - 1) << hashLog;
  436. U32 const tmpMinChain = tmpChainSize < target ? target - tmpChainSize : idx;
  437. U32 hashIdx;
  438. assert(ms->cParams.chainLog <= 24);
  439. assert(ms->cParams.hashLog >= ms->cParams.chainLog);
  440. assert(idx != 0);
  441. assert(tmpMinChain <= minChain);
  442. /* fill conventional hash table and conventional chain table */
  443. for ( ; idx < target; idx++) {
  444. U32 const h = (U32)ZSTD_hashPtr(base + idx, hashLog, ms->cParams.minMatch);
  445. if (idx >= tmpMinChain) {
  446. tmpChainTable[idx - tmpMinChain] = hashTable[h];
  447. }
  448. tmpHashTable[h] = idx;
  449. }
  450. /* sort chains into ddss chain table */
  451. {
  452. U32 chainPos = 0;
  453. for (hashIdx = 0; hashIdx < (1U << hashLog); hashIdx++) {
  454. U32 count;
  455. U32 countBeyondMinChain = 0;
  456. U32 i = tmpHashTable[hashIdx];
  457. for (count = 0; i >= tmpMinChain && count < cacheSize; count++) {
  458. /* skip through the chain to the first position that won't be
  459. * in the hash cache bucket */
  460. if (i < minChain) {
  461. countBeyondMinChain++;
  462. }
  463. i = tmpChainTable[i - tmpMinChain];
  464. }
  465. if (count == cacheSize) {
  466. for (count = 0; count < chainLimit;) {
  467. if (i < minChain) {
  468. if (!i || countBeyondMinChain++ > cacheSize) {
  469. /* only allow pulling `cacheSize` number of entries
  470. * into the cache or chainTable beyond `minChain`,
  471. * to replace the entries pulled out of the
  472. * chainTable into the cache. This lets us reach
  473. * back further without increasing the total number
  474. * of entries in the chainTable, guaranteeing the
  475. * DDSS chain table will fit into the space
  476. * allocated for the regular one. */
  477. break;
  478. }
  479. }
  480. chainTable[chainPos++] = i;
  481. count++;
  482. if (i < tmpMinChain) {
  483. break;
  484. }
  485. i = tmpChainTable[i - tmpMinChain];
  486. }
  487. } else {
  488. count = 0;
  489. }
  490. if (count) {
  491. tmpHashTable[hashIdx] = ((chainPos - count) << 8) + count;
  492. } else {
  493. tmpHashTable[hashIdx] = 0;
  494. }
  495. }
  496. assert(chainPos <= chainSize); /* I believe this is guaranteed... */
  497. }
  498. /* move chain pointers into the last entry of each hash bucket */
  499. for (hashIdx = (1 << hashLog); hashIdx; ) {
  500. U32 const bucketIdx = --hashIdx << ZSTD_LAZY_DDSS_BUCKET_LOG;
  501. U32 const chainPackedPointer = tmpHashTable[hashIdx];
  502. U32 i;
  503. for (i = 0; i < cacheSize; i++) {
  504. hashTable[bucketIdx + i] = 0;
  505. }
  506. hashTable[bucketIdx + bucketSize - 1] = chainPackedPointer;
  507. }
  508. /* fill the buckets of the hash table */
  509. for (idx = ms->nextToUpdate; idx < target; idx++) {
  510. U32 const h = (U32)ZSTD_hashPtr(base + idx, hashLog, ms->cParams.minMatch)
  511. << ZSTD_LAZY_DDSS_BUCKET_LOG;
  512. U32 i;
  513. /* Shift hash cache down 1. */
  514. for (i = cacheSize - 1; i; i--)
  515. hashTable[h + i] = hashTable[h + i - 1];
  516. hashTable[h] = idx;
  517. }
  518. ms->nextToUpdate = target;
  519. }
  520. /* inlining is important to hardwire a hot branch (template emulation) */
  521. FORCE_INLINE_TEMPLATE
  522. size_t ZSTD_HcFindBestMatch_generic (
  523. ZSTD_matchState_t* ms,
  524. const BYTE* const ip, const BYTE* const iLimit,
  525. size_t* offsetPtr,
  526. const U32 mls, const ZSTD_dictMode_e dictMode)
  527. {
  528. const ZSTD_compressionParameters* const cParams = &ms->cParams;
  529. U32* const chainTable = ms->chainTable;
  530. const U32 chainSize = (1 << cParams->chainLog);
  531. const U32 chainMask = chainSize-1;
  532. const BYTE* const base = ms->window.base;
  533. const BYTE* const dictBase = ms->window.dictBase;
  534. const U32 dictLimit = ms->window.dictLimit;
  535. const BYTE* const prefixStart = base + dictLimit;
  536. const BYTE* const dictEnd = dictBase + dictLimit;
  537. const U32 curr = (U32)(ip-base);
  538. const U32 maxDistance = 1U << cParams->windowLog;
  539. const U32 lowestValid = ms->window.lowLimit;
  540. const U32 withinMaxDistance = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
  541. const U32 isDictionary = (ms->loadedDictEnd != 0);
  542. const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance;
  543. const U32 minChain = curr > chainSize ? curr - chainSize : 0;
  544. U32 nbAttempts = 1U << cParams->searchLog;
  545. size_t ml=4-1;
  546. const ZSTD_matchState_t* const dms = ms->dictMatchState;
  547. const U32 ddsHashLog = dictMode == ZSTD_dedicatedDictSearch
  548. ? dms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG : 0;
  549. const size_t ddsIdx = dictMode == ZSTD_dedicatedDictSearch
  550. ? ZSTD_hashPtr(ip, ddsHashLog, mls) << ZSTD_LAZY_DDSS_BUCKET_LOG : 0;
  551. U32 matchIndex;
  552. if (dictMode == ZSTD_dedicatedDictSearch) {
  553. const U32* entry = &dms->hashTable[ddsIdx];
  554. PREFETCH_L1(entry);
  555. }
  556. /* HC4 match finder */
  557. matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls);
  558. for ( ; (matchIndex>=lowLimit) & (nbAttempts>0) ; nbAttempts--) {
  559. size_t currentMl=0;
  560. if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
  561. const BYTE* const match = base + matchIndex;
  562. assert(matchIndex >= dictLimit); /* ensures this is true if dictMode != ZSTD_extDict */
  563. if (match[ml] == ip[ml]) /* potentially better */
  564. currentMl = ZSTD_count(ip, match, iLimit);
  565. } else {
  566. const BYTE* const match = dictBase + matchIndex;
  567. assert(match+4 <= dictEnd);
  568. if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */
  569. currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4;
  570. }
  571. /* save best solution */
  572. if (currentMl > ml) {
  573. ml = currentMl;
  574. *offsetPtr = curr - matchIndex + ZSTD_REP_MOVE;
  575. if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
  576. }
  577. if (matchIndex <= minChain) break;
  578. matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
  579. }
  580. if (dictMode == ZSTD_dedicatedDictSearch) {
  581. const U32 ddsLowestIndex = dms->window.dictLimit;
  582. const BYTE* const ddsBase = dms->window.base;
  583. const BYTE* const ddsEnd = dms->window.nextSrc;
  584. const U32 ddsSize = (U32)(ddsEnd - ddsBase);
  585. const U32 ddsIndexDelta = dictLimit - ddsSize;
  586. const U32 bucketSize = (1 << ZSTD_LAZY_DDSS_BUCKET_LOG);
  587. const U32 bucketLimit = nbAttempts < bucketSize - 1 ? nbAttempts : bucketSize - 1;
  588. U32 ddsAttempt;
  589. for (ddsAttempt = 0; ddsAttempt < bucketSize - 1; ddsAttempt++) {
  590. PREFETCH_L1(ddsBase + dms->hashTable[ddsIdx + ddsAttempt]);
  591. }
  592. {
  593. U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1];
  594. U32 const chainIndex = chainPackedPointer >> 8;
  595. PREFETCH_L1(&dms->chainTable[chainIndex]);
  596. }
  597. for (ddsAttempt = 0; ddsAttempt < bucketLimit; ddsAttempt++) {
  598. size_t currentMl=0;
  599. const BYTE* match;
  600. matchIndex = dms->hashTable[ddsIdx + ddsAttempt];
  601. match = ddsBase + matchIndex;
  602. if (!matchIndex) {
  603. return ml;
  604. }
  605. /* guaranteed by table construction */
  606. (void)ddsLowestIndex;
  607. assert(matchIndex >= ddsLowestIndex);
  608. assert(match+4 <= ddsEnd);
  609. if (MEM_read32(match) == MEM_read32(ip)) {
  610. /* assumption : matchIndex <= dictLimit-4 (by table construction) */
  611. currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4;
  612. }
  613. /* save best solution */
  614. if (currentMl > ml) {
  615. ml = currentMl;
  616. *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE;
  617. if (ip+currentMl == iLimit) {
  618. /* best possible, avoids read overflow on next attempt */
  619. return ml;
  620. }
  621. }
  622. }
  623. {
  624. U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1];
  625. U32 chainIndex = chainPackedPointer >> 8;
  626. U32 const chainLength = chainPackedPointer & 0xFF;
  627. U32 const chainAttempts = nbAttempts - ddsAttempt;
  628. U32 const chainLimit = chainAttempts > chainLength ? chainLength : chainAttempts;
  629. U32 chainAttempt;
  630. for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++) {
  631. PREFETCH_L1(ddsBase + dms->chainTable[chainIndex + chainAttempt]);
  632. }
  633. for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++, chainIndex++) {
  634. size_t currentMl=0;
  635. const BYTE* match;
  636. matchIndex = dms->chainTable[chainIndex];
  637. match = ddsBase + matchIndex;
  638. /* guaranteed by table construction */
  639. assert(matchIndex >= ddsLowestIndex);
  640. assert(match+4 <= ddsEnd);
  641. if (MEM_read32(match) == MEM_read32(ip)) {
  642. /* assumption : matchIndex <= dictLimit-4 (by table construction) */
  643. currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4;
  644. }
  645. /* save best solution */
  646. if (currentMl > ml) {
  647. ml = currentMl;
  648. *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE;
  649. if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
  650. }
  651. }
  652. }
  653. } else if (dictMode == ZSTD_dictMatchState) {
  654. const U32* const dmsChainTable = dms->chainTable;
  655. const U32 dmsChainSize = (1 << dms->cParams.chainLog);
  656. const U32 dmsChainMask = dmsChainSize - 1;
  657. const U32 dmsLowestIndex = dms->window.dictLimit;
  658. const BYTE* const dmsBase = dms->window.base;
  659. const BYTE* const dmsEnd = dms->window.nextSrc;
  660. const U32 dmsSize = (U32)(dmsEnd - dmsBase);
  661. const U32 dmsIndexDelta = dictLimit - dmsSize;
  662. const U32 dmsMinChain = dmsSize > dmsChainSize ? dmsSize - dmsChainSize : 0;
  663. matchIndex = dms->hashTable[ZSTD_hashPtr(ip, dms->cParams.hashLog, mls)];
  664. for ( ; (matchIndex>=dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) {
  665. size_t currentMl=0;
  666. const BYTE* const match = dmsBase + matchIndex;
  667. assert(match+4 <= dmsEnd);
  668. if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */
  669. currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dmsEnd, prefixStart) + 4;
  670. /* save best solution */
  671. if (currentMl > ml) {
  672. ml = currentMl;
  673. *offsetPtr = curr - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE;
  674. if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
  675. }
  676. if (matchIndex <= dmsMinChain) break;
  677. matchIndex = dmsChainTable[matchIndex & dmsChainMask];
  678. }
  679. }
  680. return ml;
  681. }
  682. FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS (
  683. ZSTD_matchState_t* ms,
  684. const BYTE* ip, const BYTE* const iLimit,
  685. size_t* offsetPtr)
  686. {
  687. switch(ms->cParams.minMatch)
  688. {
  689. default : /* includes case 3 */
  690. case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict);
  691. case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict);
  692. case 7 :
  693. case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict);
  694. }
  695. }
  696. static size_t ZSTD_HcFindBestMatch_dictMatchState_selectMLS (
  697. ZSTD_matchState_t* ms,
  698. const BYTE* ip, const BYTE* const iLimit,
  699. size_t* offsetPtr)
  700. {
  701. switch(ms->cParams.minMatch)
  702. {
  703. default : /* includes case 3 */
  704. case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState);
  705. case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState);
  706. case 7 :
  707. case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState);
  708. }
  709. }
  710. static size_t ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS (
  711. ZSTD_matchState_t* ms,
  712. const BYTE* ip, const BYTE* const iLimit,
  713. size_t* offsetPtr)
  714. {
  715. switch(ms->cParams.minMatch)
  716. {
  717. default : /* includes case 3 */
  718. case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dedicatedDictSearch);
  719. case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dedicatedDictSearch);
  720. case 7 :
  721. case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dedicatedDictSearch);
  722. }
  723. }
  724. FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
  725. ZSTD_matchState_t* ms,
  726. const BYTE* ip, const BYTE* const iLimit,
  727. size_t* offsetPtr)
  728. {
  729. switch(ms->cParams.minMatch)
  730. {
  731. default : /* includes case 3 */
  732. case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict);
  733. case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict);
  734. case 7 :
  735. case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict);
  736. }
  737. }
  738. /* *******************************
  739. * Common parser - lazy strategy
  740. *********************************/
  741. typedef enum { search_hashChain, search_binaryTree } searchMethod_e;
  742. FORCE_INLINE_TEMPLATE size_t
  743. ZSTD_compressBlock_lazy_generic(
  744. ZSTD_matchState_t* ms, seqStore_t* seqStore,
  745. U32 rep[ZSTD_REP_NUM],
  746. const void* src, size_t srcSize,
  747. const searchMethod_e searchMethod, const U32 depth,
  748. ZSTD_dictMode_e const dictMode)
  749. {
  750. const BYTE* const istart = (const BYTE*)src;
  751. const BYTE* ip = istart;
  752. const BYTE* anchor = istart;
  753. const BYTE* const iend = istart + srcSize;
  754. const BYTE* const ilimit = iend - 8;
  755. const BYTE* const base = ms->window.base;
  756. const U32 prefixLowestIndex = ms->window.dictLimit;
  757. const BYTE* const prefixLowest = base + prefixLowestIndex;
  758. typedef size_t (*searchMax_f)(
  759. ZSTD_matchState_t* ms,
  760. const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
  761. /**
  762. * This table is indexed first by the four ZSTD_dictMode_e values, and then
  763. * by the two searchMethod_e values. NULLs are placed for configurations
  764. * that should never occur (extDict modes go to the other implementation
  765. * below and there is no DDSS for binary tree search yet).
  766. */
  767. const searchMax_f searchFuncs[4][2] = {
  768. {
  769. ZSTD_HcFindBestMatch_selectMLS,
  770. ZSTD_BtFindBestMatch_selectMLS
  771. },
  772. {
  773. NULL,
  774. NULL
  775. },
  776. {
  777. ZSTD_HcFindBestMatch_dictMatchState_selectMLS,
  778. ZSTD_BtFindBestMatch_dictMatchState_selectMLS
  779. },
  780. {
  781. ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS,
  782. NULL
  783. }
  784. };
  785. searchMax_f const searchMax = searchFuncs[dictMode][searchMethod == search_binaryTree];
  786. U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0;
  787. const int isDMS = dictMode == ZSTD_dictMatchState;
  788. const int isDDS = dictMode == ZSTD_dedicatedDictSearch;
  789. const int isDxS = isDMS || isDDS;
  790. const ZSTD_matchState_t* const dms = ms->dictMatchState;
  791. const U32 dictLowestIndex = isDxS ? dms->window.dictLimit : 0;
  792. const BYTE* const dictBase = isDxS ? dms->window.base : NULL;
  793. const BYTE* const dictLowest = isDxS ? dictBase + dictLowestIndex : NULL;
  794. const BYTE* const dictEnd = isDxS ? dms->window.nextSrc : NULL;
  795. const U32 dictIndexDelta = isDxS ?
  796. prefixLowestIndex - (U32)(dictEnd - dictBase) :
  797. 0;
  798. const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest));
  799. assert(searchMax != NULL);
  800. DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u)", (U32)dictMode);
  801. /* init */
  802. ip += (dictAndPrefixLength == 0);
  803. if (dictMode == ZSTD_noDict) {
  804. U32 const curr = (U32)(ip - base);
  805. U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, ms->cParams.windowLog);
  806. U32 const maxRep = curr - windowLow;
  807. if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
  808. if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
  809. }
  810. if (isDxS) {
  811. /* dictMatchState repCode checks don't currently handle repCode == 0
  812. * disabling. */
  813. assert(offset_1 <= dictAndPrefixLength);
  814. assert(offset_2 <= dictAndPrefixLength);
  815. }
  816. /* Match Loop */
  817. #if defined(__GNUC__) && defined(__x86_64__)
  818. /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
  819. * code alignment is perturbed. To fix the instability align the loop on 32-bytes.
  820. */
  821. __asm__(".p2align 5");
  822. #endif
  823. while (ip < ilimit) {
  824. size_t matchLength=0;
  825. size_t offset=0;
  826. const BYTE* start=ip+1;
  827. /* check repCode */
  828. if (isDxS) {
  829. const U32 repIndex = (U32)(ip - base) + 1 - offset_1;
  830. const BYTE* repMatch = ((dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch)
  831. && repIndex < prefixLowestIndex) ?
  832. dictBase + (repIndex - dictIndexDelta) :
  833. base + repIndex;
  834. if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
  835. && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
  836. const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
  837. matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
  838. if (depth==0) goto _storeSequence;
  839. }
  840. }
  841. if ( dictMode == ZSTD_noDict
  842. && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
  843. matchLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
  844. if (depth==0) goto _storeSequence;
  845. }
  846. /* first search (depth 0) */
  847. { size_t offsetFound = 999999999;
  848. size_t const ml2 = searchMax(ms, ip, iend, &offsetFound);
  849. if (ml2 > matchLength)
  850. matchLength = ml2, start = ip, offset=offsetFound;
  851. }
  852. if (matchLength < 4) {
  853. ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */
  854. continue;
  855. }
  856. /* let's try to find a better solution */
  857. if (depth>=1)
  858. while (ip<ilimit) {
  859. ip ++;
  860. if ( (dictMode == ZSTD_noDict)
  861. && (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
  862. size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
  863. int const gain2 = (int)(mlRep * 3);
  864. int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
  865. if ((mlRep >= 4) && (gain2 > gain1))
  866. matchLength = mlRep, offset = 0, start = ip;
  867. }
  868. if (isDxS) {
  869. const U32 repIndex = (U32)(ip - base) - offset_1;
  870. const BYTE* repMatch = repIndex < prefixLowestIndex ?
  871. dictBase + (repIndex - dictIndexDelta) :
  872. base + repIndex;
  873. if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
  874. && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
  875. const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
  876. size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
  877. int const gain2 = (int)(mlRep * 3);
  878. int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
  879. if ((mlRep >= 4) && (gain2 > gain1))
  880. matchLength = mlRep, offset = 0, start = ip;
  881. }
  882. }
  883. { size_t offset2=999999999;
  884. size_t const ml2 = searchMax(ms, ip, iend, &offset2);
  885. int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
  886. int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
  887. if ((ml2 >= 4) && (gain2 > gain1)) {
  888. matchLength = ml2, offset = offset2, start = ip;
  889. continue; /* search a better one */
  890. } }
  891. /* let's find an even better one */
  892. if ((depth==2) && (ip<ilimit)) {
  893. ip ++;
  894. if ( (dictMode == ZSTD_noDict)
  895. && (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
  896. size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
  897. int const gain2 = (int)(mlRep * 4);
  898. int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
  899. if ((mlRep >= 4) && (gain2 > gain1))
  900. matchLength = mlRep, offset = 0, start = ip;
  901. }
  902. if (isDxS) {
  903. const U32 repIndex = (U32)(ip - base) - offset_1;
  904. const BYTE* repMatch = repIndex < prefixLowestIndex ?
  905. dictBase + (repIndex - dictIndexDelta) :
  906. base + repIndex;
  907. if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
  908. && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
  909. const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
  910. size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
  911. int const gain2 = (int)(mlRep * 4);
  912. int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
  913. if ((mlRep >= 4) && (gain2 > gain1))
  914. matchLength = mlRep, offset = 0, start = ip;
  915. }
  916. }
  917. { size_t offset2=999999999;
  918. size_t const ml2 = searchMax(ms, ip, iend, &offset2);
  919. int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
  920. int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
  921. if ((ml2 >= 4) && (gain2 > gain1)) {
  922. matchLength = ml2, offset = offset2, start = ip;
  923. continue;
  924. } } }
  925. break; /* nothing found : store previous solution */
  926. }
  927. /* NOTE:
  928. * start[-offset+ZSTD_REP_MOVE-1] is undefined behavior.
  929. * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which
  930. * overflows the pointer, which is undefined behavior.
  931. */
  932. /* catch up */
  933. if (offset) {
  934. if (dictMode == ZSTD_noDict) {
  935. while ( ((start > anchor) & (start - (offset-ZSTD_REP_MOVE) > prefixLowest))
  936. && (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) ) /* only search for offset within prefix */
  937. { start--; matchLength++; }
  938. }
  939. if (isDxS) {
  940. U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
  941. const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex;
  942. const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest;
  943. while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */
  944. }
  945. offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
  946. }
  947. /* store sequence */
  948. _storeSequence:
  949. { size_t const litLength = start - anchor;
  950. ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
  951. anchor = ip = start + matchLength;
  952. }
  953. /* check immediate repcode */
  954. if (isDxS) {
  955. while (ip <= ilimit) {
  956. U32 const current2 = (U32)(ip-base);
  957. U32 const repIndex = current2 - offset_2;
  958. const BYTE* repMatch = repIndex < prefixLowestIndex ?
  959. dictBase - dictIndexDelta + repIndex :
  960. base + repIndex;
  961. if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex) >= 3 /* intentional overflow */)
  962. && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
  963. const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend;
  964. matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4;
  965. offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset_2 <=> offset_1 */
  966. ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
  967. ip += matchLength;
  968. anchor = ip;
  969. continue;
  970. }
  971. break;
  972. }
  973. }
  974. if (dictMode == ZSTD_noDict) {
  975. while ( ((ip <= ilimit) & (offset_2>0))
  976. && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) {
  977. /* store sequence */
  978. matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
  979. offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
  980. ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
  981. ip += matchLength;
  982. anchor = ip;
  983. continue; /* faster when present ... (?) */
  984. } } }
  985. /* Save reps for next block */
  986. rep[0] = offset_1 ? offset_1 : savedOffset;
  987. rep[1] = offset_2 ? offset_2 : savedOffset;
  988. /* Return the last literals size */
  989. return (size_t)(iend - anchor);
  990. }
  991. size_t ZSTD_compressBlock_btlazy2(
  992. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  993. void const* src, size_t srcSize)
  994. {
  995. return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict);
  996. }
  997. size_t ZSTD_compressBlock_lazy2(
  998. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  999. void const* src, size_t srcSize)
  1000. {
  1001. return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict);
  1002. }
  1003. size_t ZSTD_compressBlock_lazy(
  1004. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  1005. void const* src, size_t srcSize)
  1006. {
  1007. return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict);
  1008. }
  1009. size_t ZSTD_compressBlock_greedy(
  1010. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  1011. void const* src, size_t srcSize)
  1012. {
  1013. return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict);
  1014. }
  1015. size_t ZSTD_compressBlock_btlazy2_dictMatchState(
  1016. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  1017. void const* src, size_t srcSize)
  1018. {
  1019. return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState);
  1020. }
  1021. size_t ZSTD_compressBlock_lazy2_dictMatchState(
  1022. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  1023. void const* src, size_t srcSize)
  1024. {
  1025. return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState);
  1026. }
  1027. size_t ZSTD_compressBlock_lazy_dictMatchState(
  1028. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  1029. void const* src, size_t srcSize)
  1030. {
  1031. return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState);
  1032. }
  1033. size_t ZSTD_compressBlock_greedy_dictMatchState(
  1034. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  1035. void const* src, size_t srcSize)
  1036. {
  1037. return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState);
  1038. }
  1039. size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
  1040. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  1041. void const* src, size_t srcSize)
  1042. {
  1043. return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dedicatedDictSearch);
  1044. }
  1045. size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
  1046. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  1047. void const* src, size_t srcSize)
  1048. {
  1049. return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dedicatedDictSearch);
  1050. }
  1051. size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
  1052. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  1053. void const* src, size_t srcSize)
  1054. {
  1055. return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dedicatedDictSearch);
  1056. }
  1057. FORCE_INLINE_TEMPLATE
  1058. size_t ZSTD_compressBlock_lazy_extDict_generic(
  1059. ZSTD_matchState_t* ms, seqStore_t* seqStore,
  1060. U32 rep[ZSTD_REP_NUM],
  1061. const void* src, size_t srcSize,
  1062. const searchMethod_e searchMethod, const U32 depth)
  1063. {
  1064. const BYTE* const istart = (const BYTE*)src;
  1065. const BYTE* ip = istart;
  1066. const BYTE* anchor = istart;
  1067. const BYTE* const iend = istart + srcSize;
  1068. const BYTE* const ilimit = iend - 8;
  1069. const BYTE* const base = ms->window.base;
  1070. const U32 dictLimit = ms->window.dictLimit;
  1071. const BYTE* const prefixStart = base + dictLimit;
  1072. const BYTE* const dictBase = ms->window.dictBase;
  1073. const BYTE* const dictEnd = dictBase + dictLimit;
  1074. const BYTE* const dictStart = dictBase + ms->window.lowLimit;
  1075. const U32 windowLog = ms->cParams.windowLog;
  1076. typedef size_t (*searchMax_f)(
  1077. ZSTD_matchState_t* ms,
  1078. const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
  1079. searchMax_f searchMax = searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS;
  1080. U32 offset_1 = rep[0], offset_2 = rep[1];
  1081. DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic");
  1082. /* init */
  1083. ip += (ip == prefixStart);
  1084. /* Match Loop */
  1085. #if defined(__GNUC__) && defined(__x86_64__)
  1086. /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
  1087. * code alignment is perturbed. To fix the instability align the loop on 32-bytes.
  1088. */
  1089. __asm__(".p2align 5");
  1090. #endif
  1091. while (ip < ilimit) {
  1092. size_t matchLength=0;
  1093. size_t offset=0;
  1094. const BYTE* start=ip+1;
  1095. U32 curr = (U32)(ip-base);
  1096. /* check repCode */
  1097. { const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr+1, windowLog);
  1098. const U32 repIndex = (U32)(curr+1 - offset_1);
  1099. const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
  1100. const BYTE* const repMatch = repBase + repIndex;
  1101. if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
  1102. if (MEM_read32(ip+1) == MEM_read32(repMatch)) {
  1103. /* repcode detected we should take it */
  1104. const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
  1105. matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repEnd, prefixStart) + 4;
  1106. if (depth==0) goto _storeSequence;
  1107. } }
  1108. /* first search (depth 0) */
  1109. { size_t offsetFound = 999999999;
  1110. size_t const ml2 = searchMax(ms, ip, iend, &offsetFound);
  1111. if (ml2 > matchLength)
  1112. matchLength = ml2, start = ip, offset=offsetFound;
  1113. }
  1114. if (matchLength < 4) {
  1115. ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */
  1116. continue;
  1117. }
  1118. /* let's try to find a better solution */
  1119. if (depth>=1)
  1120. while (ip<ilimit) {
  1121. ip ++;
  1122. curr++;
  1123. /* check repCode */
  1124. if (offset) {
  1125. const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog);
  1126. const U32 repIndex = (U32)(curr - offset_1);
  1127. const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
  1128. const BYTE* const repMatch = repBase + repIndex;
  1129. if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
  1130. if (MEM_read32(ip) == MEM_read32(repMatch)) {
  1131. /* repcode detected */
  1132. const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
  1133. size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
  1134. int const gain2 = (int)(repLength * 3);
  1135. int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
  1136. if ((repLength >= 4) && (gain2 > gain1))
  1137. matchLength = repLength, offset = 0, start = ip;
  1138. } }
  1139. /* search match, depth 1 */
  1140. { size_t offset2=999999999;
  1141. size_t const ml2 = searchMax(ms, ip, iend, &offset2);
  1142. int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
  1143. int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
  1144. if ((ml2 >= 4) && (gain2 > gain1)) {
  1145. matchLength = ml2, offset = offset2, start = ip;
  1146. continue; /* search a better one */
  1147. } }
  1148. /* let's find an even better one */
  1149. if ((depth==2) && (ip<ilimit)) {
  1150. ip ++;
  1151. curr++;
  1152. /* check repCode */
  1153. if (offset) {
  1154. const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog);
  1155. const U32 repIndex = (U32)(curr - offset_1);
  1156. const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
  1157. const BYTE* const repMatch = repBase + repIndex;
  1158. if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
  1159. if (MEM_read32(ip) == MEM_read32(repMatch)) {
  1160. /* repcode detected */
  1161. const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
  1162. size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
  1163. int const gain2 = (int)(repLength * 4);
  1164. int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
  1165. if ((repLength >= 4) && (gain2 > gain1))
  1166. matchLength = repLength, offset = 0, start = ip;
  1167. } }
  1168. /* search match, depth 2 */
  1169. { size_t offset2=999999999;
  1170. size_t const ml2 = searchMax(ms, ip, iend, &offset2);
  1171. int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
  1172. int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
  1173. if ((ml2 >= 4) && (gain2 > gain1)) {
  1174. matchLength = ml2, offset = offset2, start = ip;
  1175. continue;
  1176. } } }
  1177. break; /* nothing found : store previous solution */
  1178. }
  1179. /* catch up */
  1180. if (offset) {
  1181. U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
  1182. const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex;
  1183. const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart;
  1184. while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */
  1185. offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
  1186. }
  1187. /* store sequence */
  1188. _storeSequence:
  1189. { size_t const litLength = start - anchor;
  1190. ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
  1191. anchor = ip = start + matchLength;
  1192. }
  1193. /* check immediate repcode */
  1194. while (ip <= ilimit) {
  1195. const U32 repCurrent = (U32)(ip-base);
  1196. const U32 windowLow = ZSTD_getLowestMatchIndex(ms, repCurrent, windowLog);
  1197. const U32 repIndex = repCurrent - offset_2;
  1198. const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
  1199. const BYTE* const repMatch = repBase + repIndex;
  1200. if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
  1201. if (MEM_read32(ip) == MEM_read32(repMatch)) {
  1202. /* repcode detected we should take it */
  1203. const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
  1204. matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
  1205. offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */
  1206. ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
  1207. ip += matchLength;
  1208. anchor = ip;
  1209. continue; /* faster when present ... (?) */
  1210. }
  1211. break;
  1212. } }
  1213. /* Save reps for next block */
  1214. rep[0] = offset_1;
  1215. rep[1] = offset_2;
  1216. /* Return the last literals size */
  1217. return (size_t)(iend - anchor);
  1218. }
  1219. size_t ZSTD_compressBlock_greedy_extDict(
  1220. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  1221. void const* src, size_t srcSize)
  1222. {
  1223. return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0);
  1224. }
  1225. size_t ZSTD_compressBlock_lazy_extDict(
  1226. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  1227. void const* src, size_t srcSize)
  1228. {
  1229. return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1);
  1230. }
  1231. size_t ZSTD_compressBlock_lazy2_extDict(
  1232. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  1233. void const* src, size_t srcSize)
  1234. {
  1235. return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2);
  1236. }
  1237. size_t ZSTD_compressBlock_btlazy2_extDict(
  1238. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  1239. void const* src, size_t srcSize)
  1240. {
  1241. return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2);
  1242. }