OLZW.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529
  1. /*
  2. * Seven Kingdoms: Ancient Adversaries
  3. *
  4. * Copyright 1997,1998 Enlight Software Ltd.
  5. *
  6. * This program is free software: you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation, either version 2 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * This program is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  18. *
  19. */
  20. // Filename : OLZW.CPP
  21. // Description : LZW compression and decompression
  22. #include <ALL.h>
  23. #include <OFILE.h>
  24. #include <OLZW.h>
  25. // --------- define constant ------------//
  26. #define BITS 15
  27. #define MAX_CODE ( ( 1 << BITS ) - 1 )
  28. #define TABLE_SIZE 35023L
  29. #define END_OF_STREAM 256
  30. #define BUMP_CODE 257
  31. #define FLUSH_CODE 258
  32. #define FIRST_CODE 259
  33. #define UNUSED 0xffff
  34. static unsigned short BITS_MASK[] =
  35. {
  36. 0x0000,
  37. 0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff,
  38. 0x01ff, 0x03ff, 0x07ff, 0x0fff, 0x1fff, 0x3fff, 0x7fff, 0xffff,
  39. };
  40. // --------- define macro ----------//
  41. #define DICT( i ) dict[i]
  42. // --------- define struct Dictionary ---------//
  43. struct Dictionary
  44. {
  45. unsigned short code_value;
  46. unsigned short parent_code;
  47. unsigned char character;
  48. };
  49. BitStream::BitStream() : bit_offset(0)
  50. {
  51. }
  52. BitStream::~BitStream()
  53. {
  54. }
  55. unsigned short BitStream::input_bits(unsigned stringLen)
  56. {
  57. bit_offset += stringLen;
  58. return 0;
  59. }
  60. void BitStream::output_bits(unsigned short stringCode, unsigned stringLen)
  61. {
  62. bit_offset += stringLen;
  63. }
  64. BitMemStream::BitMemStream(unsigned char *p) : BitStream(), bytePtr(p)
  65. {
  66. }
  67. unsigned short BitMemStream::input_bits(unsigned stringLen)
  68. {
  69. unsigned char *p = bytePtr + bit_offset / 8;
  70. int s = bit_offset % 8;
  71. // get longer bits
  72. unsigned long ul = *(unsigned long *)p >> s;
  73. (void) BitStream::input_bits(stringLen);
  74. // mask off leading bits
  75. return (unsigned short)ul & BITS_MASK[stringLen];
  76. }
  77. void BitMemStream::output_bits(unsigned short stringCode, unsigned stringLen)
  78. {
  79. // fill low bit first
  80. unsigned char *p = bytePtr + bit_offset / 8;
  81. int s = bit_offset % 8;
  82. // mask off unused bit
  83. *(unsigned long *)p &= BITS_MASK[s];
  84. // stringCode &= bitMask[stringCode];
  85. // shift stringCode and put them to outPtr
  86. *(unsigned long *)p |= (unsigned long)stringCode << s;
  87. BitStream::output_bits(stringCode, stringLen);
  88. }
  89. BitFileRead::BitFileRead(File *f) : filePtr(f), last_offset(0)
  90. {
  91. f->file_read(&residue, sizeof(residue));
  92. }
  93. unsigned short BitFileRead::input_bits(unsigned stringLen)
  94. {
  95. if( bit_offset + stringLen > (last_offset+sizeof(residue))*8 )
  96. {
  97. // find byte to read
  98. long byteFetch = bit_offset/8 - last_offset;
  99. if( byteFetch >= sizeof(residue) ) // residue >>= 32 does not change to 0
  100. residue = 0;
  101. else
  102. residue >>= 8*byteFetch;
  103. filePtr->file_read( sizeof(residue)-byteFetch+(unsigned char *)&residue, byteFetch );
  104. last_offset += byteFetch;
  105. }
  106. err_when( bit_offset + stringLen > (last_offset+sizeof(residue))*8 );
  107. int s = bit_offset - last_offset*8;
  108. unsigned long ul = residue >> s;
  109. (void) BitStream::input_bits(stringLen);
  110. // mask off leading bits
  111. return (unsigned short)ul & BITS_MASK[stringLen];
  112. }
  113. void BitFileRead::output_bits(unsigned short stringCode, unsigned stringLen)
  114. {
  115. err_here();
  116. BitStream::output_bits(stringCode, stringLen);
  117. }
  118. BitFileWrite::BitFileWrite(File *f) : filePtr(f), residue(0), residue_len(0)
  119. {
  120. }
  121. BitFileWrite::~BitFileWrite()
  122. {
  123. // flush output
  124. filePtr->file_write(&residue, sizeof(residue));
  125. }
  126. unsigned short BitFileWrite::input_bits(unsigned stringLen)
  127. {
  128. err_here();
  129. return BitStream::input_bits(stringLen);
  130. }
  131. void BitFileWrite::output_bits(unsigned short stringCode, unsigned stringLen)
  132. {
  133. if( residue_len + stringLen > sizeof(residue)*8 )
  134. {
  135. // number of byte to write, is residue_len / 8
  136. int byteFlush = residue_len / 8;
  137. filePtr->file_write( &residue, byteFlush );
  138. if( byteFlush >= sizeof(residue)) // if byteFlush == 4, residue >>= 32 does not set residue to 0
  139. residue = 0;
  140. else
  141. residue >>= byteFlush * 8;
  142. residue_len -= byteFlush * 8;
  143. }
  144. err_when( residue_len + stringLen > sizeof(residue)*8 );
  145. residue |= (unsigned long) stringCode << residue_len;
  146. residue_len += stringLen;
  147. BitStream::output_bits(stringCode, stringLen);
  148. }
  149. // --------- begin of function Lzw::Lzw -------------//
  150. Lzw::Lzw()
  151. {
  152. dict = NULL;
  153. decode_stack = NULL;
  154. }
  155. // --------- end of function Lzw::Lzw -------------//
  156. // --------- begin of function Lzw::~Lzw -------------//
  157. Lzw::~Lzw()
  158. {
  159. free_storage();
  160. }
  161. // --------- end of function Lzw::~Lzw -------------//
  162. // --------- begin of function Lzw::initialize_storage -------------//
  163. void Lzw::initialize_storage()
  164. {
  165. if( !dict )
  166. dict = (Dictionary *) mem_add(sizeof(Dictionary) * TABLE_SIZE);
  167. if( !decode_stack )
  168. decode_stack = (unsigned char *)mem_add(sizeof(unsigned char) * TABLE_SIZE);
  169. }
  170. // --------- end of function Lzw::initialize_storage -------------//
  171. // --------- begin of function Lzw::free_storage -------------//
  172. void Lzw::free_storage()
  173. {
  174. if( decode_stack )
  175. {
  176. mem_del(decode_stack);
  177. decode_stack = NULL;
  178. }
  179. if( dict )
  180. {
  181. mem_del(dict);
  182. dict = NULL;
  183. }
  184. }
  185. // --------- end of function Lzw::free_storage -------------//
  186. // --------- begin of function Lzw::initialize_dictionary -------------//
  187. void Lzw::initialize_dictionary()
  188. {
  189. unsigned short i;
  190. for ( i = 0 ; i < TABLE_SIZE ; i++ )
  191. DICT( i ).code_value = UNUSED;
  192. next_code = FIRST_CODE;
  193. // putc( 'F', stdout );
  194. current_code_bits = 9;
  195. next_bump_code = 511;
  196. }
  197. // --------- end of function Lzw::initialize_dictionary -------------//
  198. // nul output, to find output size in bits
  199. long Lzw::compress( unsigned char *inPtr, long inByteLen)
  200. {
  201. BitStream nulStream;
  202. return basic_compress( inPtr, inByteLen, &nulStream);
  203. }
  204. // compressed data in memory
  205. long Lzw::compress( unsigned char *inPtr, long inByteLen, unsigned char *outPtr)
  206. {
  207. BitMemStream memStream(outPtr);
  208. return basic_compress( inPtr, inByteLen, &memStream);
  209. }
  210. // set outPtr to NULL to find the decompressed size
  211. long Lzw::expand( unsigned char *inPtr, long inBitLen, unsigned char *outPtr)
  212. {
  213. BitMemStream memStream(inPtr);
  214. return basic_expand( &memStream, outPtr );
  215. }
  216. // compressed data in file
  217. long Lzw::compress( unsigned char *inPtr, long inByteLen, File *outFile)
  218. {
  219. BitFileWrite fileStream(outFile);
  220. return basic_compress( inPtr, inByteLen, &fileStream);
  221. }
  222. // set outPtr to NULL to find the decompressed size
  223. long Lzw::expand( File *inFile, unsigned char *outPtr)
  224. {
  225. BitFileRead fileStream(inFile);
  226. return basic_expand( &fileStream, outPtr);
  227. }
  228. // --------- begin of function Lzw::basic_compress -------------//
  229. // compress a memory block to another memory block
  230. // <unsigned char *> inPtr address of decompressed input data
  231. // <long> inByteLen length of input data (in byte)
  232. // <BitStream *> outStream output stream, BitMemStream or BitFileWrite
  233. //
  234. // return in no. of bits, the size of the compressed data
  235. // call free_storage after compress to free allocated space, if it will
  236. // not going to compress/decompress soon
  237. long Lzw::basic_compress( unsigned char *inPtr, long inByteLen, BitStream *outStream)
  238. {
  239. unsigned char character;
  240. unsigned short stringCode;
  241. unsigned short index;
  242. initialize_storage();
  243. initialize_dictionary();
  244. if ( inByteLen == 0 )
  245. stringCode = END_OF_STREAM;
  246. else
  247. {
  248. stringCode = *inPtr++;
  249. inByteLen--;
  250. }
  251. while ( inByteLen-- > 0)
  252. {
  253. character = *inPtr++;
  254. index = find_child_node( stringCode, character );
  255. if ( DICT( index ).code_value != UNUSED )
  256. stringCode = DICT( index ).code_value;
  257. else
  258. {
  259. DICT( index ).code_value = next_code++;
  260. DICT( index ).parent_code = stringCode;
  261. DICT( index ).character = character;
  262. outStream->output_bits( stringCode, current_code_bits );
  263. stringCode = character;
  264. if ( next_code > MAX_CODE )
  265. {
  266. outStream->output_bits( FLUSH_CODE, current_code_bits );
  267. initialize_dictionary();
  268. }
  269. else if ( next_code > next_bump_code )
  270. {
  271. outStream->output_bits( BUMP_CODE, current_code_bits );
  272. current_code_bits++;
  273. next_bump_code <<= 1;
  274. next_bump_code |= 1;
  275. }
  276. }
  277. }
  278. outStream->output_bits( stringCode, current_code_bits );
  279. outStream->output_bits( END_OF_STREAM, current_code_bits);
  280. // free_storage();
  281. return outStream->bit_offset;
  282. }
  283. // --------- end of function Lzw::basic_compress -------------//
  284. // --------- begin of function Lzw::basic_expand -------------//
  285. // decompress a memory block to another memory block
  286. // <BitStream *> inStream bit stream, can be BitMemStream or BitFileRead
  287. // <unsigned char *> outPtr address of decompressed output data
  288. // (NULL to find the size of decompressed data)
  289. //
  290. // return the no. of byte of the decompressed data
  291. // call free_storage after decompress to free allocated space, if it will
  292. // not going to compress/decompress soon
  293. long Lzw::basic_expand( BitStream *inStream, unsigned char *outPtr)
  294. {
  295. unsigned short newCode;
  296. unsigned short oldCode;
  297. unsigned char character;
  298. unsigned int count;
  299. long outByteLen = 0;
  300. initialize_storage();
  301. for ( ; ; )
  302. {
  303. initialize_dictionary();
  304. oldCode = inStream->input_bits( current_code_bits );
  305. if ( oldCode == END_OF_STREAM )
  306. {
  307. // free_storage();
  308. return outByteLen;
  309. }
  310. character = (unsigned char) oldCode;
  311. if( outPtr )
  312. outPtr[outByteLen] = character;
  313. outByteLen++;
  314. for ( ; ; )
  315. {
  316. newCode = inStream->input_bits( current_code_bits );
  317. if ( newCode == END_OF_STREAM )
  318. {
  319. // free_storage();
  320. return outByteLen;
  321. }
  322. if ( newCode == FLUSH_CODE )
  323. break;
  324. if ( newCode == BUMP_CODE )
  325. {
  326. current_code_bits++;
  327. continue;
  328. }
  329. if ( newCode >= next_code )
  330. {
  331. decode_stack[ 0 ] = character;
  332. count = decode_string( 1, oldCode );
  333. }
  334. else
  335. count = decode_string( 0, newCode );
  336. character = decode_stack[ count - 1 ];
  337. if( outPtr )
  338. {
  339. while ( count > 0 )
  340. outPtr[outByteLen++] = decode_stack[ --count ];
  341. }
  342. else
  343. {
  344. outByteLen += count;
  345. }
  346. err_when( next_code >= TABLE_SIZE);
  347. DICT( next_code ).parent_code = oldCode;
  348. DICT( next_code ).character = character;
  349. next_code++;
  350. oldCode = newCode;
  351. }
  352. }
  353. // free_storage();
  354. return outByteLen;
  355. }
  356. // --------- end of function Lzw::basic_expand -------------//
  357. // --------- begin of function Lzw::find_child_node -------------//
  358. //
  359. // This hashing routine is responsible for finding the table location
  360. // for a string/character combination. The table index is created
  361. // by using an exclusive OR combination of the prefix and character.
  362. // This code also has to check for collisions, and handles them by
  363. // jumping around in the table.
  364. //
  365. unsigned short Lzw::find_child_node( unsigned short parentCode, unsigned char childChar )
  366. {
  367. unsigned short index;
  368. unsigned short offset;
  369. index = ( (unsigned short) childChar << ( BITS - 8 ) ) ^ parentCode;
  370. if ( index == 0 )
  371. offset = 1;
  372. else
  373. offset = TABLE_SIZE - index;
  374. for ( ; ; )
  375. {
  376. if ( DICT( index ).code_value == UNUSED )
  377. return( index );
  378. if ( DICT( index ).parent_code == parentCode &&
  379. DICT( index ).character == childChar )
  380. return( index );
  381. if ( index >= offset )
  382. index -= offset;
  383. else
  384. index += TABLE_SIZE - offset;
  385. }
  386. }
  387. // --------- end of function Lzw::find_child_node -------------//
  388. // --------- begin of function Lzw::decode_string -------------//
  389. //
  390. // This routine decodes a string from the dictionary, and stores it
  391. // in the decode_stack data structure. It returns a count to the
  392. // calling program of how many characters were placed in the stack.
  393. //
  394. unsigned int Lzw::decode_string( unsigned int count, unsigned short code )
  395. {
  396. unsigned short initCode = code;
  397. while ( code > 255 )
  398. {
  399. err_when(count >= TABLE_SIZE);
  400. decode_stack[ count++ ] = DICT( code ).character;
  401. code = DICT( code ).parent_code;
  402. }
  403. decode_stack[ count++ ] = (char) code;
  404. return( count );
  405. }
  406. // --------- end of function Lzw::decode_string -------------//
  407. /*
  408. // --------- begin of function Lzw::output_bits -------------//
  409. //
  410. // if outPtr is null to find the compressed length
  411. // make sure outPtr buffer is long enough
  412. //
  413. void Lzw::output_bits( unsigned char *outPtr, long &outLen, unsigned short stringCode, unsigned int stringLen )
  414. {
  415. // fill low bit first
  416. if( outPtr )
  417. {
  418. outPtr += outLen / 8;
  419. int s = outLen % 8;
  420. int r = 8 - s;
  421. // mask off unused bit
  422. *(unsigned long *)outPtr &= bitMask[s];
  423. // stringCode &= bitMask[stringCode];
  424. // shift stringCode and put them to outPtr
  425. *(unsigned long *)outPtr |= (unsigned long)stringCode << s;
  426. }
  427. outLen += stringLen;
  428. }
  429. // --------- end of function Lzw::output_bits -------------//
  430. // --------- begin of function Lzw::input_bits -------------//
  431. // inPtr is allocated at least 32 bits longer than inCnt
  432. unsigned short Lzw::input_bits( unsigned char *inPtr, long &inCnt, unsigned int stringLen )
  433. {
  434. inPtr += inCnt / 8;
  435. int s = inCnt % 8;
  436. // get longer bits
  437. unsigned long ul = *(unsigned long *)inPtr;
  438. ul >>= s;
  439. inCnt += stringLen;
  440. // mask off leading bits
  441. return (unsigned short)ul & BITS_MASK[stringLen];
  442. }
  443. // --------- end of function Lzw::input_bits -------------//
  444. */