rsync-algo.cc 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206
  1. // -*- mode: cpp; mode: fold -*-
  2. // Description /*{{{*/
  3. // $Id: rsync-algo.cc,v 1.3 1999/12/26 06:59:01 jgg Exp $
  4. /* ######################################################################
  5. RSync Algorithrim
  6. The RSync algorithim is attributed to Andrew Tridgell and is a means
  7. for matching blocks between two streams. The algorithrim implemented
  8. here differs slightly in its structure and is carefully optimized to be
  9. able to operate on very large files effectively.
  10. We rely on the RSync rolling weak checksum routine and the MD4 strong
  11. checksum routine. This implementation requires a uniform block size
  12. for each run.
  13. ##################################################################### */
  14. /*}}}*/
  15. // Include files /*{{{*/
  16. #ifdef __GNUG__
  17. #pragma implementation "dsync/rsync-algo.h"
  18. #endif
  19. #include <dsync/rsync-algo.h>
  20. #include <dsync/error.h>
  21. #include <dsync/slidingwindow.h>
  22. #include <dsync/md5.h>
  23. #include <dsync/md4.h>
  24. #include <stdio.h>
  25. #include <inttypes.h>
  26. #include <netinet/in.h>
  27. /*}}}*/
  28. // RollingChecksum - Compute the checksum perscribed by rsync /*{{{*/
  29. // ---------------------------------------------------------------------
  30. /* */
  31. static inline unsigned long RollingChecksum(unsigned char *Start,
  32. unsigned char *End)
  33. {
  34. unsigned long A = 0;
  35. unsigned long B = 0;
  36. /* A = sum(X[i],j,k) B = sum((k-j+1)*X[i],j,k);
  37. Which reduces to the recurrence, B = sum(A[I],j,k); */
  38. for (; Start != End; Start++)
  39. {
  40. A += *Start;
  41. B += A;
  42. }
  43. return (A & 0xFFFF) | (B << 16);
  44. }
  45. /*}}}*/
  46. // GenerateRSync - Compute the rsync blocks for a file /*{{{*/
  47. // ---------------------------------------------------------------------
  48. /* This function generates the RSync checksums for each uniform block in
  49. the file. */
  50. bool GenerateRSync(FileFd &Fd,dsFList::RSyncChecksum &Ck,
  51. unsigned char OutMD5[16],
  52. unsigned long BlockSize)
  53. {
  54. SlidingWindow Wind(Fd);
  55. MD5Summation MD5;
  56. Ck.Tag = dsFList::tRSyncChecksum;
  57. Ck.BlockSize = BlockSize;
  58. Ck.FileSize = Fd.Size();
  59. // Allocate sum storage space
  60. delete [] Ck.Sums;
  61. Ck.Sums = new unsigned char[(Ck.FileSize + BlockSize-1)/BlockSize*20];
  62. // Slide over the file
  63. unsigned char *Start = 0;
  64. unsigned char *End = 0;
  65. unsigned char *Sum = Ck.Sums;
  66. unsigned char *SumEnd = Sum + (Ck.FileSize + BlockSize-1)/BlockSize*20;
  67. while (Sum < SumEnd)
  68. {
  69. // Tail little bit of the file
  70. if ((unsigned)(End - Start) < BlockSize)
  71. {
  72. unsigned char *OldEnd = End;
  73. if (Wind.Extend(Start,End) == false)
  74. return false;
  75. // The file is very small, pretend this is the last block
  76. if ((unsigned)(End - Start) < BlockSize && End != Start)
  77. {
  78. OldEnd = End;
  79. End = Start;
  80. }
  81. // All Done
  82. if (Start == End)
  83. {
  84. /* The last block is rather artifical but can be of use in some
  85. cases. Just remember not to insert it into the hash
  86. search table!! */
  87. *(uint32_t *)Sum = htonl(0xDEADBEEF);
  88. InitMD4(Sum+4);
  89. ComputeMD4Final(Sum+4,Start,OldEnd,OldEnd-Start);
  90. MD5.Add(Start,OldEnd);
  91. Sum += 20;
  92. break;
  93. }
  94. }
  95. // Compute the checksums
  96. MD5.Add(Start,Start+BlockSize);
  97. *(uint32_t *)Sum = htonl(RollingChecksum(Start,Start+BlockSize));
  98. InitMD4(Sum+4);
  99. ComputeMD4Final(Sum+4,Start,Start+BlockSize,BlockSize);
  100. Sum += 20;
  101. Start += BlockSize;
  102. }
  103. if (Sum != SumEnd)
  104. return _error->Error("Size Mismatch generating checksums");
  105. MD5.Result().Value(OutMD5);
  106. return true;
  107. }
  108. /*}}}*/
  109. // RSyncMatch::RSyncMatch - Constructor /*{{{*/
  110. // ---------------------------------------------------------------------
  111. /* This generates the btree and hash table for looking up checksums */
  112. RSyncMatch::RSyncMatch(dsFList::RSyncChecksum const &Ck) : Fast(1 << 16),
  113. Ck(Ck)
  114. {
  115. Indexes = 0;
  116. unsigned int Blocks = (Ck.FileSize + Ck.BlockSize-1)/Ck.BlockSize;
  117. // Drop the last partial block from the hashing
  118. if (Blocks < 3)
  119. return;
  120. Blocks--;
  121. // Setup the index table
  122. Indexes = new uint32_t *[Blocks];
  123. IndexesEnd = Indexes + Blocks;
  124. // Ready the checksum pointers
  125. unsigned char *Sum = Ck.Sums;
  126. unsigned char *SumEnd = Sum + Blocks*20;
  127. for (uint32_t **I = Indexes; Sum < SumEnd; Sum += 20)
  128. {
  129. *I++ = (uint32_t *)Sum;
  130. }
  131. // Sort them
  132. qsort(Indexes,Blocks,sizeof(*Indexes),Sort);
  133. // Generate the hash table
  134. unsigned int Cur = 0;
  135. Hashes[Cur] = Indexes;
  136. for (uint32_t **I = Indexes; I != IndexesEnd; I++)
  137. {
  138. printf("%x\n",**I);
  139. Fast.Set((**I) >> 16);
  140. while (((**I) >> 24) > Cur)
  141. Hashes[Cur++] = I;
  142. }
  143. while (Cur <= 256)
  144. Hashes[Cur++] = IndexesEnd;
  145. for (unsigned int Cur = 1; Cur != 255; Cur++)
  146. {
  147. printf("%u %p %x\n",Hashes[Cur] - Hashes[Cur-1],Hashes[Cur],**Hashes[Cur] >> 24);
  148. }
  149. }
  150. /*}}}*/
  151. // RSyncMatch::~RSyncMatch - Destructor /*{{{*/
  152. // ---------------------------------------------------------------------
  153. /* */
  154. RSyncMatch::~RSyncMatch()
  155. {
  156. delete [] Indexes;
  157. }
  158. /*}}}*/
  159. // RSyncMatch::Sort - QSort function /*{{{*/
  160. // ---------------------------------------------------------------------
  161. /* */
  162. int RSyncMatch::Sort(const void *L,const void *R)
  163. {
  164. if (**(uint32_t **)L == **(uint32_t **)R)
  165. return 0;
  166. if (**(uint32_t **)L > **(uint32_t **)R)
  167. return 1;
  168. return -1;
  169. }
  170. /*}}}*/
  171. bool RSyncMatch::Scan(FileFd &Fd)
  172. {
  173. for (unsigned int Cur = 1; Cur != 256; Cur++)
  174. {
  175. printf("%u %p\n",Hashes[Cur] - Hashes[Cur-1],Hashes[Cur]);
  176. }
  177. return true;
  178. }