diff.php 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406
  1. <?php
  2. /***************************************************************************\
  3. * SPIP, Systeme de publication pour l'internet *
  4. * *
  5. * Copyright (c) 2001-2014 *
  6. * Arnaud Martin, Antoine Pitrou, Philippe Riviere, Emmanuel Saint-James *
  7. * *
  8. * Ce programme est un logiciel libre distribue sous licence GNU/GPL. *
  9. * Pour plus de details voir le fichier COPYING.txt ou l'aide en ligne. *
  10. \***************************************************************************/
  11. if (!defined("_ECRIRE_INC_VERSION")) return;
  12. //
  13. // LCS (Longest Common Subsequence) en deux versions
  14. // (ref: http://www2.toki.or.id/book/AlgDesignManual/BOOK/BOOK5/NODE208.HTM)
  15. // Version ultra-simplifiee : chaque chaine est une permutation de l'autre
  16. // et on passe en parametre un des deux tableaux de correspondances
  17. // http://code.spip.net/@lcs_opt
  18. function lcs_opt($s) {
  19. $n = count($s);
  20. if (!$n) return array();
  21. $paths = array();
  22. $paths_ymin = array();
  23. $max_len = 0;
  24. // Insertion des points
  25. asort($s);
  26. $max = 400;
  27. foreach ($s as $y => $c) {
  28. if ($max-- < 0) break; # eviter l'explosion memoire des tres gros diff
  29. for ($len = $max_len; $len > 0; $len--) {
  30. if ($paths_ymin[$len] < $y) {
  31. $paths_ymin[$len + 1] = $y;
  32. $paths[$len + 1] = $paths[$len];
  33. $paths[$len + 1][$y] = $c;
  34. break;
  35. }
  36. }
  37. if ($len == 0) {
  38. $paths_ymin[1] = $y;
  39. $paths[1] = array($y => $c);
  40. }
  41. if ($len + 1 > $max_len) $max_len = $len + 1;
  42. }
  43. return $paths[$max_len];
  44. }
  45. // Version normale : les deux chaines n'ont pas ete traitees au prealable
  46. // par la fonction d'appariement
  47. // http://code.spip.net/@lcs
  48. function lcs($s, $t) {
  49. $n = count($s);
  50. $p = count($t);
  51. if (!$n || !$p) return array(0 => array(), 1 => array());
  52. $paths = array();
  53. $paths_ymin = array();
  54. $max_len = 0;
  55. $s_pos = $t_pos = array();
  56. // Insertion des points
  57. foreach ($t as $y => $c) $t_pos[trim($c)][] = $y;
  58. foreach ($s as $x => $c) {
  59. $c = trim($c);
  60. if (!isset($t_pos[$c])) continue;
  61. krsort($t_pos[$c]);
  62. foreach ($t_pos[$c] as $y) {
  63. for ($len = $max_len; $len > 0; $len--) {
  64. if ($paths_ymin[$len] < $y) {
  65. $paths_ymin[$len + 1] = $y;
  66. // On construit le resultat sous forme de chaine d'abord,
  67. // car les tableaux de PHP sont dispendieux en taille memoire
  68. $paths[$len + 1] = $paths[$len]." $x,$y";
  69. break;
  70. }
  71. }
  72. if ($len + 1 > $max_len) $max_len = $len + 1;
  73. if ($len == 0) {
  74. $paths_ymin[1] = $y;
  75. $paths[1] = "$x,$y";
  76. }
  77. }
  78. }
  79. if (isset($paths[$max_len]) AND $paths[$max_len]) {
  80. $path = explode(" ", $paths[$max_len]);
  81. $u = $v = array();
  82. foreach ($path as $p) {
  83. list($x, $y) = explode(",", $p);
  84. $u[$x] = $y;
  85. $v[$y] = $x;
  86. }
  87. return array($u, $v);
  88. }
  89. return array(0 => array(), 1 => array());
  90. }
  91. //
  92. // Generation de diff a plusieurs etages
  93. //
  94. // http://code.spip.net/@Diff
  95. class Diff {
  96. var $diff;
  97. var $fuzzy;
  98. // http://code.spip.net/@Diff
  99. function Diff($diff) {
  100. $this->diff = $diff;
  101. $this->fuzzy = true;
  102. }
  103. // http://code.spip.net/@comparer
  104. function comparer($new, $old) {
  105. $paras = $this->diff->segmenter($new);
  106. $paras_old = $this->diff->segmenter($old);
  107. if ($this->diff->fuzzy()) {
  108. list($trans_rev, $trans) = apparier_paras($paras_old, $paras);
  109. $lcs = lcs_opt($trans);
  110. $lcs_rev = array_flip($lcs);
  111. }
  112. else {
  113. list($trans_rev, $trans) = lcs($paras_old, $paras);
  114. $lcs = $trans;
  115. $lcs_rev = $trans_rev;
  116. }
  117. reset($paras_old);
  118. reset($paras);
  119. reset($lcs);
  120. unset($i_old);
  121. $fin_old = false;
  122. foreach ($paras as $i => $p) {
  123. if (!isset($trans[$i])) {
  124. // Paragraphe ajoute
  125. $this->diff->ajouter($p);
  126. continue;
  127. }
  128. $j = $trans[$i];
  129. if (!isset($lcs[$i])) {
  130. // Paragraphe deplace
  131. $this->diff->deplacer($p, $paras_old[$j]);
  132. continue;
  133. }
  134. if (!$fin_old) {
  135. // Paragraphes supprimes jusqu'au paragraphe courant
  136. if (!isset($i_old)) {
  137. list($i_old, $p_old) = each($paras_old);
  138. if (!$p_old) $fin_old = true;
  139. }
  140. while (!$fin_old && $i_old < $j) {
  141. if (!isset($trans_rev[$i_old])) {
  142. $this->diff->supprimer($p_old);
  143. }
  144. unset($i_old);
  145. list($i_old, $p_old) = each($paras_old);
  146. if (!$p_old) $fin_old = true;
  147. }
  148. }
  149. // Paragraphe n'ayant pas change de place
  150. $this->diff->comparer($p, $paras_old[$j]);
  151. }
  152. // Paragraphes supprimes a la fin du texte
  153. if (!$fin_old) {
  154. if (!isset($i_old)) {
  155. list($i_old, $p_old) = each($paras_old);
  156. if (!strlen($p_old)) $fin_old = true;
  157. }
  158. while (!$fin_old) {
  159. if (!isset($trans_rev[$i_old])) {
  160. $this->diff->supprimer($p_old);
  161. }
  162. list($i_old, $p_old) = each($paras_old);
  163. if (!$p_old) $fin_old = true;
  164. }
  165. }
  166. if (isset($i_old)) {
  167. if (!isset($trans_rev[$i_old])) {
  168. $this->diff->supprimer($p_old);
  169. }
  170. }
  171. return $this->diff->resultat();
  172. }
  173. }
  174. // http://code.spip.net/@DiffTexte
  175. class DiffTexte {
  176. var $r;
  177. // http://code.spip.net/@DiffTexte
  178. function DiffTexte() {
  179. $this->r = "";
  180. }
  181. // http://code.spip.net/@_diff
  182. function _diff($p, $p_old) {
  183. $diff = new Diff(new DiffPara);
  184. return $diff->comparer($p, $p_old);
  185. }
  186. // http://code.spip.net/@fuzzy
  187. function fuzzy() {
  188. return true;
  189. }
  190. // http://code.spip.net/@segmenter
  191. function segmenter($texte) {
  192. return separer_paras($texte);
  193. }
  194. // NB : rem=\"diff-\" est un signal pour la fonction "afficher_para_modifies"
  195. // http://code.spip.net/@ajouter
  196. function ajouter($p) {
  197. $p = trim($p);
  198. $this->r .= "\n\n\n<span class=\"diff-para-ajoute\" title=\""._T('revisions:diff_para_ajoute')."\">".$p."</span rem=\"diff-\">";
  199. }
  200. // http://code.spip.net/@supprimer
  201. function supprimer($p_old) {
  202. $p_old = trim($p_old);
  203. $this->r .= "\n\n\n<span class=\"diff-para-supprime\" title=\""._T('revisions:diff_para_supprime')."\">".$p_old."</span rem=\"diff-\">";
  204. }
  205. // http://code.spip.net/@deplacer
  206. function deplacer($p, $p_old) {
  207. $this->r .= "\n\n\n<span class=\"diff-para-deplace\" title=\""._T('revisions:diff_para_deplace')."\">";
  208. $this->r .= trim($this->_diff($p, $p_old));
  209. $this->r .= "</span rem=\"diff-\">";
  210. }
  211. // http://code.spip.net/@comparer
  212. function comparer($p, $p_old) {
  213. $this->r .= "\n\n\n".$this->_diff($p, $p_old);
  214. }
  215. // http://code.spip.net/@resultat
  216. function resultat() {
  217. return $this->r;
  218. }
  219. }
  220. // http://code.spip.net/@DiffPara
  221. class DiffPara {
  222. var $r;
  223. // http://code.spip.net/@DiffPara
  224. function DiffPara() {
  225. $this->r = "";
  226. }
  227. // http://code.spip.net/@_diff
  228. function _diff($p, $p_old) {
  229. $diff = new Diff(new DiffPhrase);
  230. return $diff->comparer($p, $p_old);
  231. }
  232. // http://code.spip.net/@fuzzy
  233. function fuzzy() {
  234. return true;
  235. }
  236. // http://code.spip.net/@segmenter
  237. function segmenter($texte) {
  238. $paras = array();
  239. $texte = trim($texte);
  240. while (preg_match('/[\.!\?\]]+\s*/u', $texte, $regs)) {
  241. $p = strpos($texte, $regs[0]) + strlen($regs[0]);
  242. $paras[] = substr($texte, 0, $p);
  243. $texte = substr($texte, $p);
  244. }
  245. if ($texte) $paras[] = $texte;
  246. return $paras;
  247. }
  248. // http://code.spip.net/@ajouter
  249. function ajouter($p) {
  250. $this->r .= "<span class=\"diff-ajoute\" title=\""._T('revisions:diff_texte_ajoute')."\">".$p."</span rem=\"diff-\">";
  251. }
  252. // http://code.spip.net/@supprimer
  253. function supprimer($p_old) {
  254. $this->r .= "<span class=\"diff-supprime\" title=\""._T('revisions:diff_texte_supprime')."\">".$p_old."</span rem=\"diff-\">";
  255. }
  256. // http://code.spip.net/@deplacer
  257. function deplacer($p, $p_old) {
  258. $this->r .= "<span class=\"diff-deplace\" title=\""._T('revisions:diff_texte_deplace')."\">".$this->_diff($p, $p_old)."</span rem=\"diff-\">";
  259. }
  260. // http://code.spip.net/@comparer
  261. function comparer($p, $p_old) {
  262. $this->r .= $this->_diff($p, $p_old);
  263. }
  264. // http://code.spip.net/@resultat
  265. function resultat() {
  266. return $this->r;
  267. }
  268. }
  269. // http://code.spip.net/@DiffPhrase
  270. class DiffPhrase {
  271. var $r;
  272. // http://code.spip.net/@DiffPhrase
  273. function DiffPhrase() {
  274. $this->r = "";
  275. }
  276. // http://code.spip.net/@fuzzy
  277. function fuzzy() {
  278. return false;
  279. }
  280. // http://code.spip.net/@segmenter
  281. function segmenter($texte) {
  282. $paras = array();
  283. if (test_pcre_unicode()) {
  284. $punct = '([[:punct:]]|'.plage_punct_unicode().')';
  285. $mode = 'u';
  286. }
  287. else {
  288. // Plages de poncutation pour preg_match bugge (ha ha)
  289. $punct = '([^\w\s\x80-\xFF]|'.plage_punct_unicode().')';
  290. $mode = '';
  291. }
  292. $preg = '/('.$punct.'+)(\s+|$)|(\s+)('.$punct.'*)/'.$mode;
  293. while (preg_match($preg, $texte, $regs)) {
  294. $p = strpos($texte, $regs[0]);
  295. $l = strlen($regs[0]);
  296. $punct = $regs[1] ? $regs[1] : $regs[6];
  297. $milieu = "";
  298. if ($punct) {
  299. // notes
  300. if ($punct == '[[') {
  301. $avant = substr($texte, 0, $p) . $regs[5] . $punct;
  302. $texte = $regs[4] . substr($texte, $p + $l);
  303. }
  304. else
  305. if ($punct == ']]') {
  306. $avant = substr($texte, 0, $p) . $regs[5] . $punct;
  307. $texte = substr($texte, $p + $l);
  308. }
  309. // Attacher les raccourcis fermants au mot precedent
  310. else
  311. if (preg_match(',^[\]}]+$,', $punct)) {
  312. $avant = substr($texte, 0, $p) . (isset($regs[5])?$regs[5]:'') . $punct;
  313. $texte = $regs[4] . substr($texte, $p + $l);
  314. }
  315. // Attacher les raccourcis ouvrants au mot suivant
  316. else if (isset($regs[5]) && $regs[5] && preg_match(',^[\[{]+$,', $punct)) {
  317. $avant = substr($texte, 0, $p) . $regs[5];
  318. $texte = $punct . substr($texte, $p + $l);
  319. }
  320. // Les autres signes de ponctuation sont des mots a part entiere
  321. else {
  322. $avant = substr($texte, 0, $p);
  323. $milieu = $regs[0];
  324. $texte = substr($texte, $p + $l);
  325. }
  326. }
  327. else {
  328. $avant = substr($texte, 0, $p + $l);
  329. $texte = substr($texte, $p + $l);
  330. }
  331. if ($avant) $paras[] = $avant;
  332. if ($milieu) $paras[] = $milieu;
  333. }
  334. if ($texte) $paras[] = $texte;
  335. return $paras;
  336. }
  337. // http://code.spip.net/@ajouter
  338. function ajouter($p) {
  339. $this->r .= "<span class=\"diff-ajoute\" title=\""._T('revisions:diff_texte_ajoute')."\">".$p."</span rem=\"diff-\"> ";
  340. }
  341. // http://code.spip.net/@supprimer
  342. function supprimer($p_old) {
  343. $this->r .= "<span class=\"diff-supprime\" title=\""._T('revisions:diff_texte_supprime')."\">".$p_old."</span rem=\"diff-\"> ";
  344. }
  345. // http://code.spip.net/@comparer
  346. function comparer($p, $p_old) {
  347. $this->r .= $p;
  348. }
  349. // http://code.spip.net/@resultat
  350. function resultat() {
  351. return $this->r;
  352. }
  353. }
  354. // http://code.spip.net/@preparer_diff
  355. function preparer_diff($texte) {
  356. include_spip('inc/charsets');
  357. $charset = $GLOBALS['meta']['charset'];
  358. if ($charset == 'utf-8')
  359. return unicode_to_utf_8(html2unicode($texte));
  360. return unicode_to_utf_8(html2unicode(charset2unicode($texte, $charset, true)));
  361. }
  362. // http://code.spip.net/@afficher_diff
  363. function afficher_diff($texte) {
  364. $charset = $GLOBALS['meta']['charset'];
  365. if ($charset == 'utf-8') return $texte;
  366. return charset2unicode($texte, 'utf-8');
  367. }
  368. ?>