textwheel.php 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573
  1. <?php
  2. /*
  3. * TextWheel 0.1
  4. *
  5. * let's reinvent the wheel one last time
  6. *
  7. * This library of code is meant to be a fast and universal replacement
  8. * for any and all text-processing systems written in PHP
  9. *
  10. * It is dual-licensed for any use under the GNU/GPL2 and MIT licenses,
  11. * as suits you best
  12. *
  13. * (c) 2009 Fil - fil@rezo.net
  14. * Documentation & http://zzz.rezo.net/-TextWheel-
  15. *
  16. * Usage: $wheel = new TextWheel(); echo $wheel->text($text);
  17. *
  18. */
  19. if (!defined('_ECRIRE_INC_VERSION')) return;
  20. require_once dirname(__FILE__)."/textwheelruleset.php";
  21. class TextWheel {
  22. protected $ruleset;
  23. protected static $subwheel = array();
  24. protected $compiled = array();
  25. /**
  26. * Constructor
  27. * @param TextWheelRuleSet $ruleset
  28. */
  29. public function TextWheel($ruleset = null) {
  30. $this->setRuleSet($ruleset);
  31. }
  32. /**
  33. * Set RuleSet
  34. * @param TextWheelRuleSet $ruleset
  35. */
  36. public function setRuleSet($ruleset){
  37. if (!is_object($ruleset))
  38. $ruleset = new TextWheelRuleSet ($ruleset);
  39. $this->ruleset = $ruleset;
  40. }
  41. /**
  42. * Apply all rules of RuleSet to a text
  43. *
  44. * @param string $t
  45. * @return string
  46. */
  47. public function text($t) {
  48. $rules = & $this->ruleset->getRules();
  49. ## apply each in order
  50. foreach ($rules as $name => $rule) #php4+php5
  51. {
  52. $this->apply($rules[$name], $t);
  53. }
  54. #foreach ($this->rules as &$rule) #smarter &reference, but php5 only
  55. # $this->apply($rule, $t);
  56. return $t;
  57. }
  58. private function export($x) {
  59. return addcslashes(var_export($x, true), "\n\r\t");
  60. }
  61. public function compile($b = null) {
  62. $rules = & $this->ruleset->getRules();
  63. ## apply each in order
  64. $pre = array();
  65. $comp = array();
  66. foreach ($rules as $name => $rule)
  67. {
  68. $rule->name = $name;
  69. $this->initRule($rule);
  70. if (is_string($rule->replace)
  71. AND isset($this->compiled[$rule->replace])
  72. AND $fun = $this->compiled[$rule->replace]) {
  73. $pre[] = "\n###\n## $name\n###\n" . $fun;
  74. preg_match(',function (\w+),', $fun, $r);
  75. $rule->compilereplace = $r[1]; # ne pas modifier ->replace sinon on casse l'execution...
  76. }
  77. $r = "\t/* $name */\n";
  78. if ($rule->require)
  79. $r .= "\t".'require_once '.TextWheel::export($rule->require).';'."\n";
  80. if ($rule->if_str)
  81. $r .= "\t".'if (strpos($t, '.TextWheel::export($rule->if_str).') === false)'."\n";
  82. if ($rule->if_stri)
  83. $r .= "\t".'if (stripos($t, '.TextWheel::export($rule->if_stri).') === false)'."\n";
  84. if ($rule->if_match)
  85. $r .= "\t".'if (preg_match('.TextWheel::export($rule->if_match).', $t))'."\n";
  86. if ($rule->func_replace !== 'replace_identity') {
  87. $fun = 'TextWheel::'.$rule->func_replace;
  88. switch($fun) {
  89. case 'TextWheel::replace_all_cb':
  90. $fun = $rule->replace; # trim()...
  91. break;
  92. case 'TextWheel::replace_preg':
  93. $fun = 'preg_replace';
  94. break;
  95. case 'TextWheel::replace_str':
  96. $fun = 'str_replace';
  97. break;
  98. case 'TextWheel::replace_preg_cb':
  99. $fun = 'preg_replace_callback';
  100. break;
  101. default:
  102. break;
  103. }
  104. $r .= "\t".'$t = '.$fun.'('.TextWheel::export($rule->match).', '.TextWheel::export($rule->replace).', $t);'."\n";
  105. }
  106. $comp[] = $r;
  107. }
  108. $code = join ("\n", $comp);
  109. $code = 'function '.$b.'($t) {' . "\n". $code . "\n\treturn \$t;\n}\n\n";
  110. $code = join ("\n", $pre) . $code;
  111. return $code;
  112. }
  113. /**
  114. * Get an internal global subwheel
  115. * read acces for annymous function only
  116. *
  117. * @param int $n
  118. * @return TextWheel
  119. */
  120. public static function &getSubWheel($n){
  121. return TextWheel::$subwheel[$n];
  122. }
  123. /**
  124. * Create SubWheel (can be overriden in debug class)
  125. * @param TextWheelRuleset $rules
  126. * @return TextWheel
  127. */
  128. protected function &createSubWheel(&$rules){
  129. $tw = new TextWheel($rules);
  130. return $tw;
  131. }
  132. /**
  133. * Initializing a rule a first call
  134. * including file, creating function or wheel
  135. * optimizing tests
  136. *
  137. * @param TextWheelRule $rule
  138. */
  139. protected function initRule(&$rule){
  140. # language specific
  141. if ($rule->require){
  142. require_once $rule->require;
  143. }
  144. # optimization: strpos or stripos?
  145. if (isset($rule->if_str)) {
  146. if (strtolower($rule->if_str) !== strtoupper($rule->if_str)) {
  147. $rule->if_stri = $rule->if_str;
  148. unset($rule->if_str);
  149. }
  150. }
  151. if ($rule->create_replace){
  152. $compile = $rule->replace.'($t)';
  153. $rule->replace = create_function('$m', $rule->replace);
  154. $this->compiled[$rule->replace] = $compile;
  155. $rule->create_replace = false;
  156. $rule->is_callback = true;
  157. }
  158. elseif ($rule->is_wheel){
  159. $n = count(TextWheel::$subwheel);
  160. TextWheel::$subwheel[] = $this->createSubWheel($rule->replace);
  161. $var = '$m['.intval($rule->pick_match).']';
  162. if ($rule->type=='all' OR $rule->type=='str' OR $rule->type=='split' OR !isset($rule->match))
  163. $var = '$m';
  164. $code = 'return TextWheel::getSubWheel('.$n.')->text('.$var.');';
  165. $rule->replace = create_function('$m', $code);
  166. $cname = 'compiled_'.str_replace('-','_', $rule->name);
  167. $compile = TextWheel::getSubWheel($n)->compile($cname);
  168. $this->compiled[$rule->replace] = $compile;
  169. $rule->is_wheel = false;
  170. $rule->is_callback = true;
  171. }
  172. # optimization
  173. $rule->func_replace = '';
  174. if (isset($rule->replace)) {
  175. switch($rule->type) {
  176. case 'all':
  177. $rule->func_replace = 'replace_all';
  178. break;
  179. case 'str':
  180. $rule->func_replace = 'replace_str';
  181. // test if quicker strtr usable
  182. if (!$rule->is_callback
  183. AND is_array($rule->match) AND is_array($rule->replace)
  184. AND $c = array_map('strlen',$rule->match)
  185. AND $c = array_unique($c)
  186. AND count($c)==1
  187. AND reset($c)==1
  188. AND $c = array_map('strlen',$rule->replace)
  189. AND $c = array_unique($c)
  190. AND count($c)==1
  191. AND reset($c)==1
  192. ){
  193. $rule->match = implode('',$rule->match);
  194. $rule->replace = implode('',$rule->replace);
  195. $rule->func_replace = 'replace_strtr';
  196. }
  197. break;
  198. case 'split':
  199. $rule->func_replace = 'replace_split';
  200. $rule->match = array($rule->match, is_null($rule->glue)?$rule->match:$rule->glue);
  201. break;
  202. case 'preg':
  203. default:
  204. $rule->func_replace = 'replace_preg';
  205. break;
  206. }
  207. if ($rule->is_callback)
  208. $rule->func_replace .= '_cb';
  209. }
  210. if (!method_exists("TextWheel", $rule->func_replace)){
  211. $rule->disabled = true;
  212. $rule->func_replace = 'replace_identity';
  213. }
  214. # /end
  215. }
  216. /**
  217. * Apply a rule to a text
  218. *
  219. * @param TextWheelRule $rule
  220. * @param string $t
  221. * @param int $count
  222. */
  223. protected function apply(&$rule, &$t, &$count=null) {
  224. if ($rule->disabled)
  225. return;
  226. if (isset($rule->if_chars) AND (strpbrk($t, $rule->if_chars) === false))
  227. return;
  228. if (isset($rule->if_match) AND !preg_match($rule->if_match, $t))
  229. return;
  230. // init rule before testing if_str / if_stri as they are optimized by initRule
  231. if (!isset($rule->func_replace))
  232. $this->initRule($rule);
  233. if (isset($rule->if_str) AND strpos($t, $rule->if_str) === false)
  234. return;
  235. if (isset($rule->if_stri) AND stripos($t, $rule->if_stri) === false)
  236. return;
  237. $func = $rule->func_replace;
  238. TextWheel::$func($rule->match,$rule->replace,$t,$count);
  239. }
  240. /**
  241. * No Replacement function
  242. * fall back in case of unknown method for replacing
  243. * should be called max once per rule
  244. *
  245. * @param mixed $match
  246. * @param mixed $replace
  247. * @param string $t
  248. * @param int $count
  249. */
  250. protected static function replace_identity(&$match,&$replace,&$t,&$count){
  251. }
  252. /**
  253. * Static replacement of All text
  254. * @param mixed $match
  255. * @param mixed $replace
  256. * @param string $t
  257. * @param int $count
  258. */
  259. protected static function replace_all(&$match,&$replace,&$t,&$count){
  260. # special case: replace $0 with $t
  261. # replace: "A$0B" will surround the string with A..B
  262. # replace: "$0$0" will repeat the string
  263. if (strpos($replace, '$0')!==FALSE)
  264. $t = str_replace('$0', $t, $replace);
  265. else
  266. $t = $replace;
  267. }
  268. /**
  269. * Call back replacement of All text
  270. * @param mixed $match
  271. * @param mixed $replace
  272. * @param string $t
  273. * @param int $count
  274. */
  275. protected static function replace_all_cb(&$match,&$replace,&$t,&$count){
  276. $t = $replace($t);
  277. }
  278. /**
  279. * Static string replacement
  280. *
  281. * @param mixed $match
  282. * @param mixed $replace
  283. * @param string $t
  284. * @param int $count
  285. */
  286. protected static function replace_str(&$match,&$replace,&$t,&$count){
  287. if (!is_string($match) OR strpos($t,$match)!==FALSE)
  288. $t = str_replace($match, $replace, $t, $count);
  289. }
  290. /**
  291. * Fast Static string replacement one char to one char
  292. *
  293. * @param mixed $match
  294. * @param mixed $replace
  295. * @param string $t
  296. * @param int $count
  297. */
  298. protected static function replace_strtr(&$match,&$replace,&$t,&$count){
  299. $t = strtr( $t, $match, $replace);
  300. }
  301. /**
  302. * Callback string replacement
  303. *
  304. * @param mixed $match
  305. * @param mixed $replace
  306. * @param string $t
  307. * @param int $count
  308. */
  309. protected static function replace_str_cb(&$match,&$replace,&$t,&$count){
  310. if (strpos($t,$match)!==FALSE)
  311. if (count($b = explode($match, $t)) > 1)
  312. $t = join($replace($match), $b);
  313. }
  314. /**
  315. * Static Preg replacement
  316. *
  317. * @param mixed $match
  318. * @param mixed $replace
  319. * @param string $t
  320. * @param int $count
  321. */
  322. protected static function replace_preg(&$match,&$replace,&$t,&$count){
  323. $t = preg_replace($match, $replace, $t, -1, $count);
  324. }
  325. /**
  326. * Callback Preg replacement
  327. * @param mixed $match
  328. * @param mixed $replace
  329. * @param string $t
  330. * @param int $count
  331. */
  332. protected static function replace_preg_cb(&$match,&$replace,&$t,&$count){
  333. $t = preg_replace_callback($match, $replace, $t, -1, $count);
  334. }
  335. /**
  336. * Static split replacement : invalid
  337. * @param mixed $match
  338. * @param mixed $replace
  339. * @param string $t
  340. * @param int $count
  341. */
  342. protected static function replace_split(&$match,&$replace,&$t,&$count){
  343. throw new InvalidArgumentException('split rule always needs a callback function as replace');
  344. }
  345. /**
  346. * Callback split replacement
  347. * @param array $match
  348. * @param mixed $replace
  349. * @param string $t
  350. * @param int $count
  351. */
  352. protected static function replace_split_cb(&$match,&$replace,&$t,&$count){
  353. $a = explode($match[0], $t);
  354. $t = join($match[1], array_map($replace,$a));
  355. }
  356. }
  357. class TextWheelDebug extends TextWheel {
  358. static protected $t; #tableaux des temps
  359. static protected $tu; #tableaux des temps (rules utilises)
  360. static protected $tnu; #tableaux des temps (rules non utilises)
  361. static protected $u; #compteur des rules utiles
  362. static protected $w; #compteur des rules appliques
  363. static $total;
  364. /**
  365. * Timer for profiling
  366. *
  367. * @staticvar int $time
  368. * @param string $t
  369. * @param bool $raw
  370. * @return int/strinf
  371. */
  372. protected function timer($t='rien', $raw = false) {
  373. static $time;
  374. $a=time(); $b=microtime();
  375. // microtime peut contenir les microsecondes et le temps
  376. $b=explode(' ',$b);
  377. if (count($b)==2) $a = end($b); // plus precis !
  378. $b = reset($b);
  379. if (!isset($time[$t])) {
  380. $time[$t] = $a + $b;
  381. } else {
  382. $p = ($a + $b - $time[$t]) * 1000;
  383. unset($time[$t]);
  384. if ($raw) return $p;
  385. if ($p < 1000)
  386. $s = '';
  387. else {
  388. $s = sprintf("%d ", $x = floor($p/1000));
  389. $p -= ($x*1000);
  390. }
  391. return $s . sprintf("%.3f ms", $p);
  392. }
  393. }
  394. /**
  395. * Apply all rules of RuleSet to a text
  396. *
  397. * @param string $t
  398. * @return string
  399. */
  400. public function text($t) {
  401. $rules = & $this->ruleset->getRules();
  402. ## apply each in order
  403. foreach ($rules as $name => $rule) #php4+php5
  404. {
  405. if (is_int($name))
  406. $name .= ' '.$rule->match;
  407. $this->timer($name);
  408. $b = $t;
  409. $this->apply($rule, $t);
  410. TextWheelDebug::$w[$name] ++; # nombre de fois appliquee
  411. $v = $this->timer($name, true); # timer
  412. TextWheelDebug::$t[$name] += $v;
  413. if ($t !== $b) {
  414. TextWheelDebug::$u[$name] ++; # nombre de fois utile
  415. TextWheelDebug::$tu[$name] += $v;
  416. } else {
  417. TextWheelDebug::$tnu[$name] += $v;
  418. }
  419. }
  420. #foreach ($this->rules as &$rule) #smarter &reference, but php5 only
  421. # $this->apply($rule, $t);
  422. return $t;
  423. }
  424. /**
  425. * Ouputs data stored for profiling/debuging purposes
  426. */
  427. public static function outputDebug(){
  428. if (isset(TextWheelDebug::$t)) {
  429. $time = array_flip(array_map('strval', TextWheelDebug::$t));
  430. krsort($time);
  431. echo "
  432. <div class='textwheeldebug'>
  433. <style type='text/css'>
  434. .textwheeldebug table { margin:1em 0; }
  435. .textwheeldebug th,.textwheeldebug td { padding-left: 15px }
  436. .textwheeldebug .prof-0 .number { padding-right: 60px }
  437. .textwheeldebug .prof-1 .number { padding-right: 30px }
  438. .textwheeldebug .prof-1 .name { padding-left: 30px }
  439. .textwheeldebug .prof-2 .name { padding-left: 60px }
  440. .textwheeldebug .zero { color:orange; }
  441. .textwheeldebug .number { text-align:right; }
  442. .textwheeldebug .strong { font-weight:bold; }
  443. </style>
  444. <table class='sortable'>
  445. <caption>Temps par rule</caption>
  446. <thead><tr><th>temps&nbsp;(ms)</th><th>rule</th><th>application</th><th>t/u&nbsp;(ms)</th><th>t/n-u&nbsp;(ms)</th></tr></thead>\n";
  447. $total = 0;
  448. foreach($time as $t => $r) {
  449. $applications = intval(TextWheelDebug::$u[$r]);
  450. $total += $t;
  451. if(intval($t*10))
  452. echo "<tr>
  453. <td class='number strong'>".number_format(round($t*10)/10,1)."</td><td> ".spip_htmlspecialchars($r)."</td>
  454. <td"
  455. . (!$applications ? " class='zero'" : "")
  456. .">".$applications."/".intval(TextWheelDebug::$w[$r])."</td>
  457. <td class='number'>".($applications?number_format(round(TextWheelDebug::$tu[$r]/$applications*100)/100,2):"") ."</td>
  458. <td class='number'>".(($nu = intval(TextWheelDebug::$w[$r])-$applications)?number_format(round(TextWheelDebug::$tnu[$r]/$nu*100)/100,2):"") ."</td>
  459. </tr>";
  460. }
  461. echo "</table>\n";
  462. echo "
  463. <table>
  464. <caption>Temps total par rule</caption>
  465. <thead><tr><th>temps</th><th>rule</th></tr></thead>\n";
  466. ksort($GLOBALS['totaux']);
  467. TextWheelDebug::outputTotal($GLOBALS['totaux']);
  468. echo "</table>";
  469. # somme des temps des rules, ne tient pas compte des subwheels
  470. echo "<p>temps total rules: ".round($total)."&nbsp;ms</p>\n";
  471. echo "</div>\n";
  472. }
  473. }
  474. public static function outputTotal($liste, $profondeur=0) {
  475. ksort($liste);
  476. foreach ($liste as $cause => $duree) {
  477. if (is_array($duree)) {
  478. TextWheelDebug::outputTotal($duree, $profondeur+1);
  479. } else {
  480. echo "<tr class='prof-$profondeur'>
  481. <td class='number'><b>".intval($duree)."</b>&nbsp;ms</td>
  482. <td class='name'>".spip_htmlspecialchars($cause)."</td>
  483. </tr>\n";
  484. }
  485. }
  486. }
  487. /**
  488. * Create SubWheel (can be overriden in debug class)
  489. * @param TextWheelRuleset $rules
  490. * @return TextWheel
  491. */
  492. protected function &createSubWheel(&$rules){
  493. return new TextWheelDebug($rules);
  494. }
  495. }
  496. /**
  497. * stripos for php4
  498. */
  499. if (!function_exists('stripos')) {
  500. function stripos($haystack, $needle) {
  501. return strpos($haystack, stristr( $haystack, $needle ));
  502. }
  503. }
  504. /**
  505. * approximation of strpbrk for php4
  506. * return false if no char of $char_list is in $haystack
  507. */
  508. if (!function_exists('strpbrk')) {
  509. function strpbrk($haystack, $char_list) {
  510. $result = strcspn($haystack, $char_list);
  511. if ($result != strlen($haystack)) {
  512. return $result;
  513. }
  514. return false;
  515. }
  516. }