ImportStreamSource.php 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191
  1. <?php
  2. /**
  3. * MediaWiki page data importer.
  4. *
  5. * Copyright © 2003,2005 Brion Vibber <brion@pobox.com>
  6. * https://www.mediawiki.org/
  7. *
  8. * This program is free software; you can redistribute it and/or modify
  9. * it under the terms of the GNU General Public License as published by
  10. * the Free Software Foundation; either version 2 of the License, or
  11. * (at your option) any later version.
  12. *
  13. * This program is distributed in the hope that it will be useful,
  14. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16. * GNU General Public License for more details.
  17. *
  18. * You should have received a copy of the GNU General Public License along
  19. * with this program; if not, write to the Free Software Foundation, Inc.,
  20. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  21. * http://www.gnu.org/copyleft/gpl.html
  22. *
  23. * @file
  24. * @ingroup SpecialPage
  25. */
  26. use MediaWiki\MediaWikiServices;
  27. /**
  28. * Imports a XML dump from a file (either from file upload, files on disk, or HTTP)
  29. * @ingroup SpecialPage
  30. */
  31. class ImportStreamSource implements ImportSource {
  32. /** @var resource */
  33. private $mHandle;
  34. /**
  35. * @param resource $handle
  36. */
  37. function __construct( $handle ) {
  38. $this->mHandle = $handle;
  39. }
  40. /**
  41. * @return bool
  42. */
  43. function atEnd() {
  44. return feof( $this->mHandle );
  45. }
  46. /**
  47. * @return string
  48. */
  49. function readChunk() {
  50. return fread( $this->mHandle, 32768 );
  51. }
  52. /**
  53. * @param string $filename
  54. * @return Status
  55. */
  56. static function newFromFile( $filename ) {
  57. Wikimedia\suppressWarnings();
  58. $file = fopen( $filename, 'rt' );
  59. Wikimedia\restoreWarnings();
  60. if ( !$file ) {
  61. return Status::newFatal( "importcantopen" );
  62. }
  63. return Status::newGood( new ImportStreamSource( $file ) );
  64. }
  65. /**
  66. * @param string $fieldname
  67. * @return Status
  68. */
  69. static function newFromUpload( $fieldname = "xmlimport" ) {
  70. $upload =& $_FILES[$fieldname];
  71. if ( $upload === null || !$upload['name'] ) {
  72. return Status::newFatal( 'importnofile' );
  73. }
  74. if ( !empty( $upload['error'] ) ) {
  75. switch ( $upload['error'] ) {
  76. case UPLOAD_ERR_INI_SIZE:
  77. // The uploaded file exceeds the upload_max_filesize directive in php.ini.
  78. return Status::newFatal( 'importuploaderrorsize' );
  79. case UPLOAD_ERR_FORM_SIZE:
  80. // The uploaded file exceeds the MAX_FILE_SIZE directive that
  81. // was specified in the HTML form.
  82. // FIXME This is probably never used since that directive was removed in 8e91c520?
  83. return Status::newFatal( 'importuploaderrorsize' );
  84. case UPLOAD_ERR_PARTIAL:
  85. // The uploaded file was only partially uploaded
  86. return Status::newFatal( 'importuploaderrorpartial' );
  87. case UPLOAD_ERR_NO_TMP_DIR:
  88. // Missing a temporary folder.
  89. return Status::newFatal( 'importuploaderrortemp' );
  90. // Other error codes get the generic 'importnofile' error message below
  91. }
  92. }
  93. $fname = $upload['tmp_name'];
  94. if ( is_uploaded_file( $fname ) ) {
  95. return self::newFromFile( $fname );
  96. } else {
  97. return Status::newFatal( 'importnofile' );
  98. }
  99. }
  100. /**
  101. * @param string $url
  102. * @param string $method
  103. * @return Status
  104. */
  105. static function newFromURL( $url, $method = 'GET' ) {
  106. global $wgHTTPImportTimeout;
  107. wfDebug( __METHOD__ . ": opening $url\n" );
  108. # Use the standard HTTP fetch function; it times out
  109. # quicker and sorts out user-agent problems which might
  110. # otherwise prevent importing from large sites, such
  111. # as the Wikimedia cluster, etc.
  112. $data = MediaWikiServices::getInstance()->getHttpRequestFactory()->request(
  113. $method,
  114. $url,
  115. [
  116. 'followRedirects' => true,
  117. 'timeout' => $wgHTTPImportTimeout
  118. ],
  119. __METHOD__
  120. );
  121. if ( $data !== false ) {
  122. $file = tmpfile();
  123. fwrite( $file, $data );
  124. fflush( $file );
  125. fseek( $file, 0 );
  126. return Status::newGood( new ImportStreamSource( $file ) );
  127. } else {
  128. return Status::newFatal( 'importcantopen' );
  129. }
  130. }
  131. /**
  132. * @param string $interwiki
  133. * @param string $page
  134. * @param bool $history
  135. * @param bool $templates
  136. * @param int $pageLinkDepth
  137. * @return Status
  138. */
  139. public static function newFromInterwiki( $interwiki, $page, $history = false,
  140. $templates = false, $pageLinkDepth = 0
  141. ) {
  142. if ( $page == '' ) {
  143. return Status::newFatal( 'import-noarticle' );
  144. }
  145. # Look up the first interwiki prefix, and let the foreign site handle
  146. # subsequent interwiki prefixes
  147. $firstIwPrefix = strtok( $interwiki, ':' );
  148. $interwikiLookup = MediaWikiServices::getInstance()->getInterwikiLookup();
  149. $firstIw = $interwikiLookup->fetch( $firstIwPrefix );
  150. if ( !$firstIw ) {
  151. return Status::newFatal( 'importbadinterwiki' );
  152. }
  153. $additionalIwPrefixes = strtok( '' );
  154. if ( $additionalIwPrefixes ) {
  155. $additionalIwPrefixes .= ':';
  156. }
  157. # Have to do a DB-key replacement ourselves; otherwise spaces get
  158. # URL-encoded to +, which is wrong in this case. Similar to logic in
  159. # Title::getLocalURL
  160. $link = $firstIw->getURL( strtr( "${additionalIwPrefixes}Special:Export/$page",
  161. ' ', '_' ) );
  162. $params = [];
  163. if ( $history ) {
  164. $params['history'] = 1;
  165. }
  166. if ( $templates ) {
  167. $params['templates'] = 1;
  168. }
  169. if ( $pageLinkDepth ) {
  170. $params['pagelink-depth'] = $pageLinkDepth;
  171. }
  172. $url = wfAppendQuery( $link, $params );
  173. # For interwikis, use POST to avoid redirects.
  174. return self::newFromURL( $url, "POST" );
  175. }
  176. }