RemexDriver.php 1.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172
  1. <?php
  2. namespace MediaWiki\Tidy;
  3. use RemexHtml\Serializer\Serializer;
  4. use RemexHtml\Serializer\SerializerWithTracer;
  5. use RemexHtml\Tokenizer\Tokenizer;
  6. use RemexHtml\TreeBuilder\Dispatcher;
  7. use RemexHtml\TreeBuilder\TreeBuilder;
  8. use RemexHtml\TreeBuilder\TreeMutationTracer;
  9. class RemexDriver extends TidyDriverBase {
  10. private $treeMutationTrace;
  11. private $serializerTrace;
  12. private $mungerTrace;
  13. private $pwrap;
  14. public function __construct( array $config ) {
  15. $config += [
  16. 'treeMutationTrace' => false,
  17. 'serializerTrace' => false,
  18. 'mungerTrace' => false,
  19. 'pwrap' => true
  20. ];
  21. $this->treeMutationTrace = $config['treeMutationTrace'];
  22. $this->serializerTrace = $config['serializerTrace'];
  23. $this->mungerTrace = $config['mungerTrace'];
  24. $this->pwrap = $config['pwrap'];
  25. parent::__construct( $config );
  26. }
  27. public function tidy( $text ) {
  28. $traceCallback = function ( $msg ) {
  29. wfDebug( "RemexHtml: $msg" );
  30. };
  31. $formatter = new RemexCompatFormatter;
  32. if ( $this->serializerTrace ) {
  33. $serializer = new SerializerWithTracer( $formatter, null, $traceCallback );
  34. } else {
  35. $serializer = new Serializer( $formatter );
  36. }
  37. if ( $this->pwrap ) {
  38. $munger = new RemexCompatMunger( $serializer, $this->mungerTrace );
  39. } else {
  40. $munger = $serializer;
  41. }
  42. if ( $this->treeMutationTrace ) {
  43. $tracer = new TreeMutationTracer( $munger, $traceCallback );
  44. } else {
  45. $tracer = $munger;
  46. }
  47. $treeBuilder = new TreeBuilder( $tracer, [
  48. 'ignoreErrors' => true,
  49. 'ignoreNulls' => true,
  50. ] );
  51. $dispatcher = new Dispatcher( $treeBuilder );
  52. $tokenizer = new Tokenizer( $dispatcher, $text, [
  53. 'ignoreErrors' => true,
  54. 'ignoreCharRefs' => true,
  55. 'ignoreNulls' => true,
  56. 'skipPreprocess' => true,
  57. ] );
  58. $tokenizer->execute( [
  59. 'fragmentNamespace' => \RemexHtml\HTMLData::NS_HTML,
  60. 'fragmentName' => 'body'
  61. ] );
  62. return $serializer->getResult();
  63. }
  64. }