dsync-flist.cc 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098
  1. // -*- mode: cpp; mode: fold -*-
  2. // Description /*{{{*/
  3. // $Id: dsync-flist.cc,v 1.27 1999/12/26 06:59:00 jgg Exp $
  4. /* ######################################################################
  5. Dsync FileList is a tool to manipulate and generate the dsync file
  6. listing
  7. Several usefull functions are provided, the most notable is to generate
  8. the file list and to dump it. There is also a function to compare the
  9. file list against a local directory tree.
  10. ##################################################################### */
  11. /*}}}*/
  12. // Include files /*{{{*/
  13. #ifdef __GNUG__
  14. #pragma implementation "dsync-flist.h"
  15. #endif
  16. #include "dsync-flist.h"
  17. #include <dsync/cmndline.h>
  18. #include <dsync/error.h>
  19. #include <dsync/md5.h>
  20. #include <dsync/strutl.h>
  21. #include <config.h>
  22. #include <stdio.h>
  23. #include <sys/types.h>
  24. #include <sys/stat.h>
  25. #include <sys/ioctl.h>
  26. #include <utime.h>
  27. #include <unistd.h>
  28. #include <termios.h>
  29. #include <signal.h>
  30. #include <iostream>
  31. using namespace std;
  32. /*}}}*/
  33. // Externs /*{{{*/
  34. ostream c0out(cout.rdbuf());
  35. ostream c1out(cout.rdbuf());
  36. ostream c2out(cout.rdbuf());
  37. ofstream devnull("/dev/null");
  38. unsigned int ScreenWidth = 80;
  39. /*}}}*/
  40. // Progress::Progress - Constructor /*{{{*/
  41. // ---------------------------------------------------------------------
  42. /* */
  43. Progress::Progress()
  44. {
  45. Quiet = false;
  46. if (_config->FindI("quiet",0) > 0)
  47. Quiet = true;
  48. DirCount = 0;
  49. FileCount = 0;
  50. LinkCount = 0;
  51. Bytes = 0;
  52. CkSumBytes = 0;
  53. gettimeofday(&StartTime,0);
  54. }
  55. /*}}}*/
  56. // Progress::Done - Clear the progress meter /*{{{*/
  57. // ---------------------------------------------------------------------
  58. /* */
  59. void Progress::Done()
  60. {
  61. if (Quiet == false)
  62. c0out << '\r' << BlankLine << '\r' << flush;
  63. BlankLine[0] = 0;
  64. }
  65. /*}}}*/
  66. // Progress::ElaspedTime - Return the time that has elapsed /*{{{*/
  67. // ---------------------------------------------------------------------
  68. /* Computes the time difference with maximum accuracy */
  69. double Progress::ElapsedTime()
  70. {
  71. // Compute the CPS and elapsed time
  72. struct timeval Now;
  73. gettimeofday(&Now,0);
  74. return Now.tv_sec - StartTime.tv_sec + (Now.tv_usec -
  75. StartTime.tv_usec)/1000000.0;
  76. }
  77. /*}}}*/
  78. // Progress::Update - Update the meter /*{{{*/
  79. // ---------------------------------------------------------------------
  80. /* */
  81. void Progress::Update(const char *Directory)
  82. {
  83. LastCount = DirCount+LinkCount+FileCount;
  84. if (Quiet == true)
  85. return;
  86. // Put the number of files and bytes at the end of the meter
  87. char S[1024];
  88. if (ScreenWidth > sizeof(S)-1)
  89. ScreenWidth = sizeof(S)-1;
  90. unsigned int Len = snprintf(S,sizeof(S),"|%lu %sb",
  91. DirCount+LinkCount+FileCount,
  92. SizeToStr(Bytes).c_str());
  93. memmove(S + (ScreenWidth - Len),S,Len+1);
  94. memset(S,' ',ScreenWidth - Len);
  95. // Put the directory name at the front, possibly shortened
  96. if (Directory == 0 || Directory[0] == 0)
  97. S[snprintf(S,sizeof(S),"<root>")] = ' ';
  98. else
  99. {
  100. // If the path is too long fix it and prefix it with '...'
  101. if (strlen(Directory) >= ScreenWidth - Len - 1)
  102. {
  103. S[snprintf(S,sizeof(S),"%s",Directory +
  104. strlen(Directory) - ScreenWidth + Len + 1)] = ' ';
  105. S[0] = '.'; S[1] = '.'; S[2] = '.';
  106. }
  107. else
  108. S[snprintf(S,sizeof(S),"%s",Directory)] = ' ';
  109. }
  110. strcpy(LastLine,S);
  111. c0out << S << '\r' << flush;
  112. memset(BlankLine,' ',strlen(S));
  113. BlankLine[strlen(S)] = 0;
  114. }
  115. /*}}}*/
  116. // Progress::Stats - Show a statistics report /*{{{*/
  117. // ---------------------------------------------------------------------
  118. /* */
  119. void Progress::Stats(bool CkSum)
  120. {
  121. // Display some interesting statistics
  122. double Elapsed = ElapsedTime();
  123. c1out << DirCount << " directories, " << FileCount <<
  124. " files and " << LinkCount << " links (" <<
  125. (DirCount+FileCount+LinkCount) << "). ";
  126. if (CkSum == true)
  127. {
  128. if (CkSumBytes == Bytes)
  129. c1out << "Total Size is " << SizeToStr(Bytes) << "b. ";
  130. else
  131. c1out << SizeToStr(CkSumBytes) << '/' <<
  132. SizeToStr(Bytes) << "b hashed.";
  133. }
  134. else
  135. c1out << "Total Size is " << SizeToStr(Bytes) << "b. ";
  136. c1out << endl;
  137. c1out << "Elapsed time " << TimeToStr((long)Elapsed) <<
  138. " (" << SizeToStr((DirCount+FileCount+LinkCount)/Elapsed) <<
  139. " files/sec) ";
  140. if (CkSumBytes != 0)
  141. c1out << " (" << SizeToStr(CkSumBytes/Elapsed) << "b/s hash)";
  142. c1out << endl;
  143. }
  144. /*}}}*/
  145. // ListGenerator::ListGenerator - Constructor /*{{{*/
  146. // ---------------------------------------------------------------------
  147. /* */
  148. ListGenerator::ListGenerator()
  149. {
  150. Act = !_config->FindB("noact",false);
  151. StripDepth = _config->FindI("FileList::CkSum-PathStrip",0);
  152. Verbose = false;
  153. if (_config->FindI("verbose",0) > 0)
  154. Verbose = true;
  155. DB = 0;
  156. DBIO = 0;
  157. // Set RSync checksum limits
  158. MinRSyncSize = _config->FindI("FileList::MinRSyncSize",0);
  159. if (MinRSyncSize == 0)
  160. MinRSyncSize = 1;
  161. if (_config->FindB("FileList::RSync-Hashes",false) == false)
  162. MinRSyncSize = 0;
  163. // Load the rsync filter
  164. if (RSyncFilter.LoadFilter(_config->Tree("FList::RSync-Filter")) == false)
  165. return;
  166. // Load the clean filter
  167. if (RemoveFilter.LoadFilter(_config->Tree("FList::Clean-Filter")) == false)
  168. return;
  169. }
  170. /*}}}*/
  171. // ListGenerator::~ListGenerator - Destructor /*{{{*/
  172. // ---------------------------------------------------------------------
  173. /* */
  174. ListGenerator::~ListGenerator()
  175. {
  176. delete DB;
  177. delete DBIO;
  178. }
  179. /*}}}*/
  180. // ListGenerator::Visit - Collect statistics about the tree /*{{{*/
  181. // ---------------------------------------------------------------------
  182. /* */
  183. int ListGenerator::Visit(const char *Directory,const char *File,
  184. struct stat const &Stat)
  185. {
  186. if (Prog.DirCount+Prog.LinkCount+Prog.FileCount - Prog.LastCount > 100 ||
  187. File == 0)
  188. Prog.Update(Directory);
  189. // Ignore directory enters
  190. if (File == 0)
  191. return 0;
  192. // Increment our counters
  193. if (S_ISDIR(Stat.st_mode) != 0)
  194. Prog.DirCount++;
  195. else
  196. {
  197. if (S_ISLNK(Stat.st_mode) != 0)
  198. Prog.LinkCount++;
  199. else
  200. Prog.FileCount++;
  201. }
  202. // Normal file
  203. if (S_ISREG(Stat.st_mode) != 0)
  204. Prog.Bytes += Stat.st_size;
  205. // Look for files to erase
  206. if (S_ISDIR(Stat.st_mode) == 0 &&
  207. RemoveFilter.Test(Directory,File) == false)
  208. {
  209. Prog.Hide();
  210. c1out << "Unlinking " << Directory << File << endl;
  211. Prog.Show();
  212. if (Act == true && unlink(File) != 0)
  213. {
  214. _error->Errno("unlink","Failed to remove %s%s",Directory,File);
  215. return -1;
  216. }
  217. return 1;
  218. }
  219. return 0;
  220. }
  221. /*}}}*/
  222. // ListGenerator::EmitMD5 - Perform md5 lookup caching /*{{{*/
  223. // ---------------------------------------------------------------------
  224. /* This looks up the file in the cache to see if it is one we already
  225. know the hash too */
  226. bool ListGenerator::EmitMD5(const char *Dir,const char *File,
  227. struct stat const &St,unsigned char MD5[16],
  228. unsigned int Tag,unsigned int Flag)
  229. {
  230. if ((IO->Header.Flags[Tag] & Flag) != Flag)
  231. return true;
  232. // Lookup the md5 in the old file list
  233. if (DB != 0 && (DBIO->Header.Flags[Tag] & Flag) == Flag)
  234. {
  235. // Do a lookup and make sure the timestamps match
  236. dsFList List;
  237. bool Hit = false;
  238. const char *iDir = Dir;
  239. unsigned int Strip = StripDepth;
  240. while (true)
  241. {
  242. if (DB->Lookup(*DBIO,iDir,File,List) == true && List.Entity != 0)
  243. {
  244. if ((signed)(List.Entity->ModTime + List.Head.Epoch) == St.st_mtime)
  245. Hit = true;
  246. break;
  247. }
  248. if (Strip == 0)
  249. break;
  250. Strip--;
  251. for (; *iDir != 0 && *iDir != '/'; iDir++);
  252. if (*iDir == 0 || iDir[1] == 0)
  253. break;
  254. iDir++;
  255. }
  256. if (Hit == true)
  257. {
  258. /* Both hardlinks and normal files have md5s, also check that the
  259. sizes match */
  260. if (List.File != 0 && List.File->Size == (unsigned)St.st_size)
  261. {
  262. memcpy(MD5,List.File->MD5,sizeof(List.File->MD5));
  263. return true;
  264. }
  265. }
  266. }
  267. Prog.CkSumBytes += St.st_size;
  268. if (Verbose == true)
  269. {
  270. Prog.Hide();
  271. c1out << "MD5 " << Dir << File << endl;
  272. Prog.Show();
  273. }
  274. return dsGenFileList::EmitMD5(Dir,File,St,MD5,Tag,Flag);
  275. }
  276. /*}}}*/
  277. // ListGenerator::NeedsRSync - Check if a file is rsyncable /*{{{*/
  278. // ---------------------------------------------------------------------
  279. /* This checks the rsync filter list and the rsync size limit*/
  280. bool ListGenerator::NeedsRSync(const char *Dir,const char *File,
  281. dsFList::NormalFile &F)
  282. {
  283. if (MinRSyncSize == 0)
  284. return false;
  285. if (F.Size <= MinRSyncSize)
  286. return false;
  287. if (RSyncFilter.Test(Dir,File) == false)
  288. return false;
  289. /* Add it to the counters, EmitMD5 will not be called if rsync checksums
  290. are being built. */
  291. Prog.CkSumBytes += F.Size;
  292. if (Verbose == true)
  293. {
  294. Prog.Hide();
  295. c1out << "RSYNC " << Dir << File << endl;
  296. Prog.Show();
  297. }
  298. return true;
  299. }
  300. /*}}}*/
  301. // Compare::Compare - Constructor /*{{{*/
  302. // ---------------------------------------------------------------------
  303. /* */
  304. Compare::Compare()
  305. {
  306. Verbose = false;
  307. if (_config->FindI("verbose",0) > 0)
  308. Verbose = true;
  309. Act = !_config->FindB("noact",false);
  310. DoDelete = _config->FindB("delete",false);
  311. }
  312. /*}}}*/
  313. // Compare::Visit - Collect statistics about the tree /*{{{*/
  314. // ---------------------------------------------------------------------
  315. /* */
  316. bool Compare::Visit(dsFList &List,string Dir)
  317. {
  318. if (Prog.DirCount+Prog.LinkCount+Prog.FileCount - Prog.LastCount > 100 ||
  319. List.Tag == dsFList::tDirStart)
  320. Prog.Update(Dir.c_str());
  321. // Increment our counters
  322. if (List.Tag == dsFList::tDirectory)
  323. Prog.DirCount++;
  324. else
  325. {
  326. if (List.Tag == dsFList::tSymlink)
  327. Prog.LinkCount++;
  328. if (List.Tag == dsFList::tNormalFile ||
  329. List.Tag == dsFList::tHardLink ||
  330. List.Tag == dsFList::tDeviceSpecial)
  331. Prog.FileCount++;
  332. }
  333. // Normal file
  334. if (List.File != 0)
  335. Prog.Bytes += List.File->Size;
  336. return true;
  337. }
  338. /*}}}*/
  339. // Compare::PrintPath - Print out a path string /*{{{*/
  340. // ---------------------------------------------------------------------
  341. /* This handles the absolute paths that can occure while processing */
  342. void Compare::PrintPath(ostream &out,string Dir,string Name)
  343. {
  344. if (Name[0] != '/')
  345. out << Dir << Name << endl;
  346. else
  347. out << string(Name,Base.length()) << endl;
  348. }
  349. /*}}}*/
  350. // LookupPath - Find a full path within the database /*{{{*/
  351. // ---------------------------------------------------------------------
  352. /* This does the necessary path simplification and symlink resolution
  353. to locate the path safely. The file must exist locally inorder to
  354. resolve the local symlinks. */
  355. bool LookupPath(const char *Path,dsFList &List,dsFileListDB &DB,
  356. dsFList::IO &IO)
  357. {
  358. char Buffer[2024];
  359. strcpy(Buffer,Path);
  360. if (SimplifyPath(Buffer) == false ||
  361. ResolveLink(Buffer,sizeof(Buffer)) == false)
  362. return false;
  363. // Strip off the final component name
  364. char *I = Buffer + strlen(Buffer);
  365. for (; I != Buffer && (*I == '/' || *I == 0); I--);
  366. for (; I != Buffer && *I != '/'; I--);
  367. if (I != Buffer)
  368. {
  369. memmove(I+1,I,strlen(I) + 1);
  370. I++;
  371. *I = 0;
  372. I++;
  373. if (DB.Lookup(IO,Buffer,I,List) == false)
  374. return false;
  375. }
  376. else
  377. {
  378. if (DB.Lookup(IO,"",I,List) == false)
  379. return false;
  380. }
  381. return true;
  382. }
  383. /*}}}*/
  384. // PrintMD5 - Prints the MD5 of a file in the form similar to md5sum /*{{{*/
  385. // ---------------------------------------------------------------------
  386. /* */
  387. void PrintMD5(dsFList &List,const char *Dir,const char *File = 0)
  388. {
  389. if (List.File == 0 ||
  390. List.Head.Flags[List.Tag] & dsFList::NormalFile::FlMD5 == 0)
  391. return;
  392. char S[16*2+1];
  393. for (unsigned int I = 0; I != 16; I++)
  394. sprintf(S+2*I,"%02x",List.File->MD5[I]);
  395. S[16*2] = 0;
  396. if (File == 0)
  397. cout << S << " " << Dir << List.File->Name << endl;
  398. else
  399. cout << S << " " << File << endl;
  400. }
  401. /*}}}*/
  402. // DoGenerate - The Generate Command /*{{{*/
  403. // ---------------------------------------------------------------------
  404. /* */
  405. bool DoGenerate(CommandLine &CmdL)
  406. {
  407. ListGenerator Gen;
  408. if (_error->PendingError() == true)
  409. return false;
  410. // Load the filter list
  411. if (Gen.Filter.LoadFilter(_config->Tree("FileList::Filter")) == false)
  412. return false;
  413. // Load the delay filter list
  414. if (Gen.PreferFilter.LoadFilter(_config->Tree("FileList::Prefer-Filter")) == false)
  415. return false;
  416. // Determine the ordering to use
  417. string Ord = _config->Find("FileList::Order","tree");
  418. if (stringcasecmp(Ord,"tree") == 0)
  419. Gen.Type = dsGenFileList::Tree;
  420. else
  421. {
  422. if (stringcasecmp(Ord,"breadth") == 0)
  423. Gen.Type = dsGenFileList::Breadth;
  424. else
  425. {
  426. if (stringcasecmp(Ord,"depth") == 0)
  427. Gen.Type = dsGenFileList::Depth;
  428. else
  429. return _error->Error("Invalid ordering %s, must be tree, breadth or detph",Ord.c_str());
  430. }
  431. }
  432. if (CmdL.FileList[1] == 0)
  433. return _error->Error("You must specify a file name");
  434. string List = CmdL.FileList[1];
  435. // Open the original file to pull cached Check Sums out of
  436. if (FileExists(List) == true &&
  437. _config->FindB("FileList::MD5-Hashes",false) == true)
  438. {
  439. Gen.DBIO = new dsMMapIO(List);
  440. if (_error->PendingError() == true)
  441. return false;
  442. Gen.DB = new dsFileListDB;
  443. if (Gen.DB->Generate(*Gen.DBIO) == false)
  444. return false;
  445. }
  446. // Sub scope to close the file
  447. {
  448. FdIO IO(List + ".new",FileFd::WriteEmpty);
  449. // Set the flags for the list
  450. if (_config->FindB("FileList::MD5-Hashes",false) == true)
  451. {
  452. IO.Header.Flags[dsFList::tNormalFile] |= dsFList::NormalFile::FlMD5;
  453. IO.Header.Flags[dsFList::tHardLink] |= dsFList::HardLink::FlMD5;
  454. }
  455. if (_config->FindB("FileList::Permissions",false) == true)
  456. {
  457. IO.Header.Flags[dsFList::tDirectory] |= dsFList::Directory::FlPerm;
  458. IO.Header.Flags[dsFList::tNormalFile] |= dsFList::NormalFile::FlPerm;
  459. IO.Header.Flags[dsFList::tHardLink] |= dsFList::HardLink::FlPerm;
  460. }
  461. if (_config->FindB("FileList::Ownership",false) == true)
  462. {
  463. IO.Header.Flags[dsFList::tDirectory] |= dsFList::Directory::FlOwner;
  464. IO.Header.Flags[dsFList::tNormalFile] |= dsFList::NormalFile::FlOwner;
  465. IO.Header.Flags[dsFList::tSymlink] |= dsFList::Symlink::FlOwner;
  466. IO.Header.Flags[dsFList::tDeviceSpecial] |= dsFList::DeviceSpecial::FlOwner;
  467. IO.Header.Flags[dsFList::tHardLink] |= dsFList::HardLink::FlOwner;
  468. }
  469. if (Gen.Go("./",IO) == false)
  470. return false;
  471. Gen.Prog.Done();
  472. Gen.Prog.Stats(_config->FindB("FileList::MD5-Hashes",false));
  473. delete Gen.DB;
  474. Gen.DB = 0;
  475. delete Gen.DBIO;
  476. Gen.DBIO = 0;
  477. }
  478. // Just in case :>
  479. if (_error->PendingError() == true)
  480. return false;
  481. // Swap files
  482. bool OldExists = FileExists(List);
  483. if (OldExists == true && rename(List.c_str(),(List + "~").c_str()) != 0)
  484. return _error->Errno("rename","Unable to rename %s to %s~",List.c_str(),List.c_str());
  485. if (rename((List + ".new").c_str(),List.c_str()) != 0)
  486. return _error->Errno("rename","Unable to rename %s.new to %s",List.c_str(),List.c_str());
  487. if (OldExists == true && unlink((List + "~").c_str()) != 0)
  488. return _error->Errno("unlink","Unable to unlink %s~",List.c_str());
  489. return true;
  490. }
  491. /*}}}*/
  492. // DoDump - Dump the contents of a file list /*{{{*/
  493. // ---------------------------------------------------------------------
  494. /* This displays a short one line dump of each record in the file */
  495. bool DoDump(CommandLine &CmdL)
  496. {
  497. if (CmdL.FileList[1] == 0)
  498. return _error->Error("You must specify a file name");
  499. // Open the file
  500. dsMMapIO IO(CmdL.FileList[1]);
  501. if (_error->PendingError() == true)
  502. return false;
  503. dsFList List;
  504. unsigned long CountDir = 0;
  505. unsigned long CountFile = 0;
  506. unsigned long CountLink = 0;
  507. unsigned long CountLinkReal = 0;
  508. unsigned long NumFiles = 0;
  509. unsigned long NumDirs = 0;
  510. unsigned long NumLinks = 0;
  511. double Bytes = 0;
  512. while (List.Step(IO) == true)
  513. {
  514. if (List.Print(cout) == false)
  515. return false;
  516. switch (List.Tag)
  517. {
  518. case dsFList::tDirMarker:
  519. case dsFList::tDirStart:
  520. case dsFList::tDirectory:
  521. {
  522. CountDir += List.Dir.Name.length();
  523. if (List.Tag == dsFList::tDirectory)
  524. NumDirs++;
  525. break;
  526. }
  527. case dsFList::tHardLink:
  528. case dsFList::tNormalFile:
  529. {
  530. CountFile += List.File->Name.length();
  531. NumFiles++;
  532. Bytes += List.File->Size;
  533. break;
  534. }
  535. case dsFList::tSymlink:
  536. {
  537. CountFile += List.SLink.Name.length();
  538. CountLink += List.SLink.To.length();
  539. unsigned int Tmp = List.SLink.To.length();
  540. if ((List.SLink.Compress & (1<<7)) == (1<<7))
  541. Tmp -= List.SLink.Name.length();
  542. Tmp -= List.SLink.Compress & 0x7F;
  543. CountLinkReal += Tmp;
  544. NumLinks++;
  545. break;
  546. }
  547. }
  548. if (List.Tag == dsFList::tTrailer)
  549. break;
  550. }
  551. cout << "String Sizes: Dirs=" << CountDir << " Files=" << CountFile <<
  552. " Links=" << CountLink << " (" << CountLinkReal << ")";
  553. cout << " Total=" << CountDir+CountFile+CountLink << endl;
  554. cout << "Entries: Dirs=" << NumDirs << " Files=" << NumFiles <<
  555. " Links=" << NumLinks << " Total=" << NumDirs+NumFiles+NumLinks << endl;
  556. cout << "Totals " << SizeToStr(Bytes) << "b." << endl;
  557. return true;
  558. }
  559. /*}}}*/
  560. // DoMkHardLinks - Generate hardlinks for duplicated files /*{{{*/
  561. // ---------------------------------------------------------------------
  562. /* This scans the archive for any duplicated files, it uses the MD5 of each
  563. file and searches a map for another match then links the two */
  564. struct Md5Cmp
  565. {
  566. unsigned char MD5[16];
  567. int operator <(const Md5Cmp &rhs) const {return memcmp(MD5,rhs.MD5,sizeof(MD5)) < 0;};
  568. int operator <=(const Md5Cmp &rhs) const {return memcmp(MD5,rhs.MD5,sizeof(MD5)) <= 0;};
  569. int operator >=(const Md5Cmp &rhs) const {return memcmp(MD5,rhs.MD5,sizeof(MD5)) >= 0;};
  570. int operator >(const Md5Cmp &rhs) const {return memcmp(MD5,rhs.MD5,sizeof(MD5)) > 0;};
  571. int operator ==(const Md5Cmp &rhs) const {return memcmp(MD5,rhs.MD5,sizeof(MD5)) == 0;};
  572. Md5Cmp(unsigned char Md[16]) {memcpy(MD5,Md,sizeof(MD5));};
  573. };
  574. struct Location
  575. {
  576. string Dir;
  577. string File;
  578. Location() {};
  579. Location(string Dir,string File) : Dir(Dir), File(File) {};
  580. };
  581. bool DoMkHardLinks(CommandLine &CmdL)
  582. {
  583. if (CmdL.FileList[1] == 0)
  584. return _error->Error("You must specify a file name");
  585. // Open the file
  586. dsMMapIO IO(CmdL.FileList[1]);
  587. if (_error->PendingError() == true)
  588. return false;
  589. dsFList List;
  590. if (List.Step(IO) == false || List.Tag != dsFList::tHeader)
  591. return _error->Error("Unable to read header");
  592. // Make sure we have hashes
  593. if ((IO.Header.Flags[dsFList::tNormalFile] &
  594. dsFList::NormalFile::FlMD5) == 0 ||
  595. (IO.Header.Flags[dsFList::tHardLink] &
  596. dsFList::HardLink::FlMD5) == 0)
  597. return _error->Error("The file list must contain MD5 hashes");
  598. string LastDir;
  599. double Savings = 0;
  600. unsigned long Hits = 0;
  601. bool Act = !_config->FindB("noact",false);
  602. map<Md5Cmp,Location> Map;
  603. while (List.Step(IO) == true)
  604. {
  605. // Entering a new directory, just store it..
  606. if (List.Tag == dsFList::tDirStart)
  607. {
  608. LastDir = List.Dir.Name;
  609. continue;
  610. }
  611. /* Handle normal file entities. Pre-existing hard links we treat
  612. exactly like a normal file, if two hard link chains are identical
  613. one will be destroyed and its items placed on the other
  614. automatcially */
  615. if (List.File != 0)
  616. {
  617. map<Md5Cmp,Location>::const_iterator I = Map.find(Md5Cmp(List.File->MD5));
  618. if (I == Map.end())
  619. {
  620. Map[Md5Cmp(List.File->MD5)] = Location(LastDir,List.File->Name);
  621. continue;
  622. }
  623. // Compute full file names for both
  624. string FileA = (*I).second.Dir + (*I).second.File;
  625. struct stat StA;
  626. string FileB = LastDir + List.File->Name;
  627. struct stat StB;
  628. // Stat them
  629. if (lstat(FileA.c_str(),&StA) != 0)
  630. {
  631. _error->Warning("Unable to stat %s",FileA.c_str());
  632. continue;
  633. }
  634. if (lstat(FileB.c_str(),&StB) != 0)
  635. {
  636. _error->Warning("Unable to stat %s",FileB.c_str());
  637. continue;
  638. }
  639. // Verify they are on the same filesystem
  640. if (StA.st_dev != StB.st_dev || StA.st_size != StB.st_size)
  641. continue;
  642. // And not merged..
  643. if (StA.st_ino == StB.st_ino)
  644. continue;
  645. c1out << "Dup " << FileA << endl;
  646. c1out << " " << FileB << endl;
  647. // Relink the file and copy the mod time from the oldest one.
  648. if (Act == true)
  649. {
  650. if (unlink(FileB.c_str()) != 0)
  651. return _error->Errno("unlink","Failed to unlink %s",FileB.c_str());
  652. if (link(FileA.c_str(),FileB.c_str()) != 0)
  653. return _error->Errno("link","Failed to link %s to %s",FileA.c_str(),FileB.c_str());
  654. if (StB.st_mtime > StA.st_mtime)
  655. {
  656. struct utimbuf Time;
  657. Time.actime = Time.modtime = StB.st_mtime;
  658. if (utime(FileB.c_str(),&Time) != 0)
  659. _error->Warning("Unable to set mod time for %s",FileB.c_str());
  660. }
  661. }
  662. // Counters
  663. Savings += List.File->Size;
  664. Hits++;
  665. continue;
  666. }
  667. if (List.Tag == dsFList::tTrailer)
  668. break;
  669. }
  670. cout << "Total space saved by merging " <<
  671. SizeToStr(Savings) << "b. " << Hits << " files affected." << endl;
  672. return true;
  673. }
  674. /*}}}*/
  675. // DoLookup - Lookup a single file in the listing /*{{{*/
  676. // ---------------------------------------------------------------------
  677. /* */
  678. bool DoLookup(CommandLine &CmdL)
  679. {
  680. if (CmdL.FileSize() < 4)
  681. return _error->Error("You must specify a file name, directory name and a entry");
  682. // Open the file
  683. dsMMapIO IO(CmdL.FileList[1]);
  684. if (_error->PendingError() == true)
  685. return false;
  686. // Index it
  687. dsFileListDB DB;
  688. if (DB.Generate(IO) == false)
  689. return false;
  690. dsFList List;
  691. if (DB.Lookup(IO,CmdL.FileList[2],CmdL.FileList[3],List) == false)
  692. return _error->Error("Unable to locate item");
  693. List.Print(cout);
  694. return true;
  695. }
  696. /*}}}*/
  697. // DoMD5Cache - Lookup a stream of files in the listing /*{{{*/
  698. // ---------------------------------------------------------------------
  699. /* This takes a list of files names and prints out their MD5s, if possible
  700. data is used from the cache to save IO */
  701. bool DoMD5Cache(CommandLine &CmdL)
  702. {
  703. struct timeval Start;
  704. gettimeofday(&Start,0);
  705. if (CmdL.FileList[1] == 0)
  706. return _error->Error("You must specify a file name");
  707. // Open the file
  708. dsMMapIO IO(CmdL.FileList[1]);
  709. if (_error->PendingError() == true)
  710. return false;
  711. dsFList List;
  712. if (List.Step(IO) == false || List.Tag != dsFList::tHeader)
  713. return _error->Error("Unable to read header");
  714. // Make sure we have hashes
  715. if ((IO.Header.Flags[dsFList::tNormalFile] &
  716. dsFList::NormalFile::FlMD5) == 0 ||
  717. (IO.Header.Flags[dsFList::tHardLink] &
  718. dsFList::HardLink::FlMD5) == 0)
  719. return _error->Error("The file list must contain MD5 hashes");
  720. // Index it
  721. dsFileListDB DB;
  722. if (DB.Generate(IO) == false)
  723. return false;
  724. // Counters
  725. double Bytes = 0;
  726. double MD5Bytes = 0;
  727. unsigned long Files = 0;
  728. unsigned long Errors = 0;
  729. while (!cin == false)
  730. {
  731. char Buf2[200];
  732. cin.getline(Buf2,sizeof(Buf2));
  733. if (Buf2[0] == 0)
  734. continue;
  735. Files++;
  736. // Stat the file
  737. struct stat St;
  738. if (stat(Buf2,&St) != 0)
  739. {
  740. cout << "<ERROR> " << Buf2 << "(stat)" << endl;
  741. Errors++;
  742. continue;
  743. }
  744. // Lookup in the cache and make sure the file has not changed
  745. if (LookupPath(Buf2,List,DB,IO) == false ||
  746. (signed)(List.Entity->ModTime + List.Head.Epoch) != St.st_mtime ||
  747. (List.File != 0 && List.File->Size != (unsigned)St.st_size))
  748. {
  749. _error->DumpErrors();
  750. // Open the file and hash it
  751. MD5Summation Sum;
  752. FileFd Fd(Buf2,FileFd::ReadOnly);
  753. if (_error->PendingError() == true)
  754. {
  755. cout << "<ERROR> " << Buf2 << "(open)" << endl;
  756. continue;
  757. }
  758. if (Sum.AddFD(Fd.Fd(),Fd.Size()) == false)
  759. {
  760. cout << "<ERROR> " << Buf2 << "(md5)" << endl;
  761. continue;
  762. }
  763. // Store the new hash
  764. List.Tag = dsFList::tNormalFile;
  765. Sum.Result().Value(List.File->MD5);
  766. List.File->Size = (unsigned)St.st_size;
  767. MD5Bytes += List.File->Size;
  768. }
  769. PrintMD5(List,0,Buf2);
  770. Bytes += List.File->Size;
  771. }
  772. // Print out a summary
  773. struct timeval Now;
  774. gettimeofday(&Now,0);
  775. double Delta = Now.tv_sec - Start.tv_sec + (Now.tv_usec - Start.tv_usec)/1000000.0;
  776. cerr << Files << " files, " << SizeToStr(MD5Bytes) << "/" <<
  777. SizeToStr(Bytes) << " MD5'd, " << TimeToStr((unsigned)Delta) << endl;;
  778. return true;
  779. }
  780. /*}}}*/
  781. // DoMD5Dump - Dump the md5 list /*{{{*/
  782. // ---------------------------------------------------------------------
  783. /* This displays a short one line dump of each record in the file */
  784. bool DoMD5Dump(CommandLine &CmdL)
  785. {
  786. if (CmdL.FileList[1] == 0)
  787. return _error->Error("You must specify a file name");
  788. // Open the file
  789. dsMMapIO IO(CmdL.FileList[1]);
  790. if (_error->PendingError() == true)
  791. return false;
  792. dsFList List;
  793. if (List.Step(IO) == false || List.Tag != dsFList::tHeader)
  794. return _error->Error("Unable to read header");
  795. // Make sure we have hashes
  796. if ((IO.Header.Flags[dsFList::tNormalFile] &
  797. dsFList::NormalFile::FlMD5) == 0 ||
  798. (IO.Header.Flags[dsFList::tHardLink] &
  799. dsFList::HardLink::FlMD5) == 0)
  800. return _error->Error("The file list must contain MD5 hashes");
  801. string Dir;
  802. while (List.Step(IO) == true)
  803. {
  804. if (List.Tag == dsFList::tDirStart)
  805. {
  806. Dir = List.Dir.Name;
  807. continue;
  808. }
  809. PrintMD5(List,Dir.c_str());
  810. if (List.Tag == dsFList::tTrailer)
  811. break;
  812. }
  813. return true;
  814. }
  815. /*}}}*/
  816. // DoVerify - Verify the local tree against a file list /*{{{*/
  817. // ---------------------------------------------------------------------
  818. /* */
  819. bool DoVerify(CommandLine &CmdL)
  820. {
  821. if (CmdL.FileList[1] == 0)
  822. return _error->Error("You must specify a file name");
  823. // Open the file
  824. dsMMapIO IO(CmdL.FileList[1]);
  825. if (_error->PendingError() == true)
  826. return false;
  827. /* Set the hashing type, we can either do a full verify or only a date
  828. check verify */
  829. Compare Comp;
  830. if (_config->FindB("FileList::MD5-Hashes",false) == true)
  831. Comp.HashLevel = dsDirCompare::Md5Always;
  832. else
  833. Comp.HashLevel = dsDirCompare::Md5Date;
  834. // Scan the file list
  835. if (Comp.Process(".",IO) == false)
  836. return false;
  837. Comp.Prog.Done();
  838. // Report stats
  839. Comp.Prog.Stats((IO.Header.Flags[dsFList::tNormalFile] & dsFList::NormalFile::FlMD5) != 0 ||
  840. (IO.Header.Flags[dsFList::tHardLink] & dsFList::HardLink::FlMD5) != 0);
  841. return true;
  842. }
  843. /*}}}*/
  844. // SigWinch - Window size change signal handler /*{{{*/
  845. // ---------------------------------------------------------------------
  846. /* */
  847. void SigWinch(int)
  848. {
  849. // Riped from GNU ls
  850. #ifdef TIOCGWINSZ
  851. struct winsize ws;
  852. if (ioctl(1, TIOCGWINSZ, &ws) != -1 && ws.ws_col >= 5)
  853. ScreenWidth = ws.ws_col - 1;
  854. if (ScreenWidth > 250)
  855. ScreenWidth = 250;
  856. #endif
  857. }
  858. /*}}}*/
  859. // ShowHelp - Show the help screen /*{{{*/
  860. // ---------------------------------------------------------------------
  861. /* */
  862. bool ShowHelp(CommandLine &CmdL)
  863. {
  864. cout << PACKAGE << ' ' << VERSION << " for " << ARCHITECTURE <<
  865. " compiled on " << __DATE__ << " " << __TIME__ << endl;
  866. cout <<
  867. "Usage: dsync-flist [options] command [file]\n"
  868. "\n"
  869. "dsync-flist is a tool for manipulating dsync binary file lists.\n"
  870. "It can generate the lists and check them against a tree.\n"
  871. "\n"
  872. "Commands:\n"
  873. " generate - Build a file list\n"
  874. " help - This help text\n"
  875. " dump - Display the contents of the list\n"
  876. " md5sums - Print out 'indices' file, suitable for use with md5sum\n"
  877. " md5cache - Print out md5sums of the files given on stdin\n"
  878. " link-dups - Look for duplicate files\n"
  879. " lookup - Display a single file record\n"
  880. " verify - Compare the file list against the local directory\n"
  881. "\n"
  882. "Options:\n"
  883. " -h This help text.\n"
  884. " -q Loggable output - no progress indicator\n"
  885. " -qq No output except for errors\n"
  886. " -i=? Include pattern\n"
  887. " -e=? Exclude pattern\n"
  888. " -c=? Read this configuration file\n"
  889. " -o=? Set an arbitary configuration option, ie -o dir::cache=/tmp\n"
  890. "See the dsync-flist(1) and dsync.conf(5) manual\n"
  891. "pages for more information." << endl;
  892. return 100;
  893. }
  894. /*}}}*/
  895. int main(int argc, const char *argv[])
  896. {
  897. CommandLine::Args Args[] = {
  898. {'h',"help","help",0},
  899. {'q',"quiet","quiet",CommandLine::IntLevel},
  900. {'q',"silent","quiet",CommandLine::IntLevel},
  901. {'i',"include","FileList::Filter:: + ",CommandLine::HasArg},
  902. {'e',"exclude","FileList::Filter:: - ",CommandLine::HasArg},
  903. {'n',"no-act","noact",0},
  904. {'v',"verbose","verbose",CommandLine::IntLevel},
  905. {0,"delete","delete",0},
  906. {0,"prefer-include","FileList::Prefer-Filter:: + ",CommandLine::HasArg},
  907. {0,"prefer-exclude","FileList::Prefer-Filter:: - ",CommandLine::HasArg},
  908. {0,"pi","FileList::Prefer-Filter:: + ",CommandLine::HasArg},
  909. {0,"pe","FileList::Prefer-Filter:: - ",CommandLine::HasArg},
  910. {0,"clean-include","FList::Clean-Filter:: + ",CommandLine::HasArg},
  911. {0,"clean-exclude","FList::Clean-Filter:: - ",CommandLine::HasArg},
  912. {0,"ci","FList::Clean-Filter:: + ",CommandLine::HasArg},
  913. {0,"ce","FList::Clean-Filter:: - ",CommandLine::HasArg},
  914. {0,"rsync-include","FList::RSync-Filter:: + ",CommandLine::HasArg},
  915. {0,"rsync-exclude","FList::RSync-Filter:: - ",CommandLine::HasArg},
  916. {0,"ri","FList::RSync-Filter:: + ",CommandLine::HasArg},
  917. {0,"re","FList::RSync-Filter:: - ",CommandLine::HasArg},
  918. {0,"md5","FileList::MD5-Hashes",0},
  919. {0,"rsync","FileList::RSync-Hashes",0},
  920. {0,"rsync-min","FileList::MinRSyncSize",CommandLine::HasArg},
  921. {0,"perm","FileList::Permissions",0},
  922. {0,"owner","FileList::Ownership",0},
  923. {0,"order","FileList::Order",CommandLine::HasArg},
  924. {'c',"config-file",0,CommandLine::ConfigFile},
  925. {'o',"option",0,CommandLine::ArbItem},
  926. {0,0,0,0}};
  927. CommandLine::Dispatch Cmds[] = {{"generate",&DoGenerate},
  928. {"help",&ShowHelp},
  929. {"dump",&DoDump},
  930. {"link-dups",&DoMkHardLinks},
  931. {"md5sums",&DoMD5Dump},
  932. {"md5cache",&DoMD5Cache},
  933. {"lookup",&DoLookup},
  934. {"verify",&DoVerify},
  935. {0,0}};
  936. CommandLine CmdL(Args,_config);
  937. if (CmdL.Parse(argc,argv) == false)
  938. {
  939. _error->DumpErrors();
  940. return 100;
  941. }
  942. // See if the help should be shown
  943. if (_config->FindB("help") == true ||
  944. CmdL.FileSize() == 0)
  945. return ShowHelp(CmdL);
  946. // Setup the output streams
  947. /* c0out.rdbuf(cout.rdbuf());
  948. c1out.rdbuf(cout.rdbuf());
  949. c2out.rdbuf(cout.rdbuf()); */
  950. if (_config->FindI("quiet",0) > 0)
  951. c0out.rdbuf(devnull.rdbuf());
  952. if (_config->FindI("quiet",0) > 1)
  953. c1out.rdbuf(devnull.rdbuf());
  954. // Setup the signals
  955. signal(SIGWINCH,SigWinch);
  956. SigWinch(0);
  957. // Match the operation
  958. CmdL.DispatchArg(Cmds);
  959. // Print any errors or warnings found during parsing
  960. if (_error->empty() == false)
  961. {
  962. bool Errors = _error->PendingError();
  963. _error->DumpErrors();
  964. return Errors == true?100:0;
  965. }
  966. return 0;
  967. }