NamespaceAwareForeignTitleFactory.php 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142
  1. <?php
  2. /**
  3. * This program is free software; you can redistribute it and/or modify
  4. * it under the terms of the GNU General Public License as published by
  5. * the Free Software Foundation; either version 2 of the License, or
  6. * (at your option) any later version.
  7. *
  8. * This program is distributed in the hope that it will be useful,
  9. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. * GNU General Public License for more details.
  12. *
  13. * You should have received a copy of the GNU General Public License along
  14. * with this program; if not, write to the Free Software Foundation, Inc.,
  15. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  16. * http://www.gnu.org/copyleft/gpl.html
  17. *
  18. * @file
  19. */
  20. /**
  21. * A parser that translates page titles on a foreign wiki into ForeignTitle
  22. * objects, using information about the namespace setup on the foreign site.
  23. */
  24. class NamespaceAwareForeignTitleFactory implements ForeignTitleFactory {
  25. /**
  26. * @var array
  27. */
  28. protected $foreignNamespaces;
  29. /**
  30. * @var array
  31. */
  32. private $foreignNamespacesFlipped;
  33. /**
  34. * Normalizes an array name for $foreignNamespacesFlipped.
  35. * @param string $name
  36. * @return string
  37. */
  38. private function normalizeNamespaceName( $name ) {
  39. return strtolower( str_replace( ' ', '_', $name ) );
  40. }
  41. /**
  42. * @param array|null $foreignNamespaces An array 'id' => 'name' which contains
  43. * the complete namespace setup of the foreign wiki. Such data could be
  44. * obtained from siteinfo/namespaces in an XML dump file, or by an action API
  45. * query such as api.php?action=query&meta=siteinfo&siprop=namespaces. If
  46. * this data is unavailable, use NaiveForeignTitleFactory instead.
  47. */
  48. public function __construct( $foreignNamespaces ) {
  49. $this->foreignNamespaces = $foreignNamespaces;
  50. if ( !is_null( $foreignNamespaces ) ) {
  51. $this->foreignNamespacesFlipped = [];
  52. foreach ( $foreignNamespaces as $id => $name ) {
  53. $newKey = self::normalizeNamespaceName( $name );
  54. $this->foreignNamespacesFlipped[$newKey] = $id;
  55. }
  56. }
  57. }
  58. /**
  59. * Creates a ForeignTitle object based on the page title, and optionally the
  60. * namespace ID, of a page on a foreign wiki. These values could be, for
  61. * example, the <title> and <ns> attributes found in an XML dump.
  62. *
  63. * @param string $title The page title
  64. * @param int|null $ns The namespace ID, or null if this data is not available
  65. * @return ForeignTitle
  66. */
  67. public function createForeignTitle( $title, $ns = null ) {
  68. // Export schema version 0.5 and earlier (MW 1.18 and earlier) does not
  69. // contain a <ns> tag, so we need to be able to handle that case.
  70. if ( is_null( $ns ) ) {
  71. return self::parseTitleNoNs( $title );
  72. } else {
  73. return self::parseTitleWithNs( $title, $ns );
  74. }
  75. }
  76. /**
  77. * Helper function to parse the title when the namespace ID is not specified.
  78. *
  79. * @param string $title
  80. * @return ForeignTitle
  81. */
  82. protected function parseTitleNoNs( $title ) {
  83. $pieces = explode( ':', $title, 2 );
  84. $key = self::normalizeNamespaceName( $pieces[0] );
  85. // Does the part before the colon match a known namespace? Check the
  86. // foreign namespaces
  87. $isNamespacePartValid = isset( $this->foreignNamespacesFlipped[$key] );
  88. if ( count( $pieces ) === 2 && $isNamespacePartValid ) {
  89. list( $namespaceName, $pageName ) = $pieces;
  90. $ns = $this->foreignNamespacesFlipped[$key];
  91. } else {
  92. $namespaceName = '';
  93. $pageName = $title;
  94. $ns = 0;
  95. }
  96. return new ForeignTitle( $ns, $namespaceName, $pageName );
  97. }
  98. /**
  99. * Helper function to parse the title when the namespace value is known.
  100. *
  101. * @param string $title
  102. * @param int $ns
  103. * @return ForeignTitle
  104. */
  105. protected function parseTitleWithNs( $title, $ns ) {
  106. $pieces = explode( ':', $title, 2 );
  107. // Is $title of the form Namespace:Title (true), or just Title (false)?
  108. $titleIncludesNamespace = ( $ns != '0' && count( $pieces ) === 2 );
  109. if ( isset( $this->foreignNamespaces[$ns] ) ) {
  110. $namespaceName = $this->foreignNamespaces[$ns];
  111. } else {
  112. // If the foreign wiki is misconfigured, XML dumps can contain a page with
  113. // a non-zero namespace ID, but whose title doesn't contain a colon
  114. // (T114115). In those cases, output a made-up namespace name to avoid
  115. // collisions. The ImportTitleFactory might replace this with something
  116. // more appropriate.
  117. $namespaceName = $titleIncludesNamespace ? $pieces[0] : "Ns$ns";
  118. }
  119. // We assume that the portion of the page title before the colon is the
  120. // namespace name, except in the case of namespace 0.
  121. if ( $titleIncludesNamespace ) {
  122. $pageName = $pieces[1];
  123. } else {
  124. $pageName = $title;
  125. }
  126. return new ForeignTitle( $ns, $namespaceName, $pageName );
  127. }
  128. }