ApiQueryDuplicateFiles.php 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191
  1. <?php
  2. /**
  3. * Copyright © 2008 Roan Kattouw "<Firstname>.<Lastname>@gmail.com"
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 2 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License along
  16. * with this program; if not, write to the Free Software Foundation, Inc.,
  17. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18. * http://www.gnu.org/copyleft/gpl.html
  19. *
  20. * @file
  21. */
  22. /**
  23. * A query module to list duplicates of the given file(s)
  24. *
  25. * @ingroup API
  26. */
  27. class ApiQueryDuplicateFiles extends ApiQueryGeneratorBase {
  28. public function __construct( ApiQuery $query, $moduleName ) {
  29. parent::__construct( $query, $moduleName, 'df' );
  30. }
  31. public function execute() {
  32. $this->run();
  33. }
  34. public function getCacheMode( $params ) {
  35. return 'public';
  36. }
  37. public function executeGenerator( $resultPageSet ) {
  38. $this->run( $resultPageSet );
  39. }
  40. /**
  41. * @param ApiPageSet $resultPageSet
  42. */
  43. private function run( $resultPageSet = null ) {
  44. $params = $this->extractRequestParams();
  45. $namespaces = $this->getPageSet()->getGoodAndMissingTitlesByNamespace();
  46. if ( empty( $namespaces[NS_FILE] ) ) {
  47. return;
  48. }
  49. $images = $namespaces[NS_FILE];
  50. if ( $params['dir'] == 'descending' ) {
  51. $images = array_reverse( $images );
  52. }
  53. $skipUntilThisDup = false;
  54. if ( isset( $params['continue'] ) ) {
  55. $cont = explode( '|', $params['continue'] );
  56. $this->dieContinueUsageIf( count( $cont ) != 2 );
  57. $fromImage = $cont[0];
  58. $skipUntilThisDup = $cont[1];
  59. // Filter out any images before $fromImage
  60. foreach ( $images as $image => $pageId ) {
  61. if ( $image < $fromImage ) {
  62. unset( $images[$image] );
  63. } else {
  64. break;
  65. }
  66. }
  67. }
  68. $filesToFind = array_keys( $images );
  69. if ( $params['localonly'] ) {
  70. $files = RepoGroup::singleton()->getLocalRepo()->findFiles( $filesToFind );
  71. } else {
  72. $files = RepoGroup::singleton()->findFiles( $filesToFind );
  73. }
  74. $fit = true;
  75. $count = 0;
  76. $titles = [];
  77. $sha1s = [];
  78. foreach ( $files as $file ) {
  79. /** @var File $file */
  80. $sha1s[$file->getName()] = $file->getSha1();
  81. }
  82. // find all files with the hashes, result format is:
  83. // [ hash => [ dup1, dup2 ], hash1 => ... ]
  84. $filesToFindBySha1s = array_unique( array_values( $sha1s ) );
  85. if ( $params['localonly'] ) {
  86. $filesBySha1s = RepoGroup::singleton()->getLocalRepo()->findBySha1s( $filesToFindBySha1s );
  87. } else {
  88. $filesBySha1s = RepoGroup::singleton()->findBySha1s( $filesToFindBySha1s );
  89. }
  90. // iterate over $images to handle continue param correct
  91. foreach ( $images as $image => $pageId ) {
  92. if ( !isset( $sha1s[$image] ) ) {
  93. continue; // file does not exist
  94. }
  95. $sha1 = $sha1s[$image];
  96. $dupFiles = $filesBySha1s[$sha1];
  97. if ( $params['dir'] == 'descending' ) {
  98. $dupFiles = array_reverse( $dupFiles );
  99. }
  100. /** @var File $dupFile */
  101. foreach ( $dupFiles as $dupFile ) {
  102. $dupName = $dupFile->getName();
  103. if ( $image == $dupName && $dupFile->isLocal() ) {
  104. continue; // ignore the local file itself
  105. }
  106. if ( $skipUntilThisDup !== false && $dupName < $skipUntilThisDup ) {
  107. continue; // skip to pos after the image from continue param
  108. }
  109. $skipUntilThisDup = false;
  110. if ( ++$count > $params['limit'] ) {
  111. $fit = false; // break outer loop
  112. // We're one over limit which shows that
  113. // there are additional images to be had. Stop here...
  114. $this->setContinueEnumParameter( 'continue', $image . '|' . $dupName );
  115. break;
  116. }
  117. if ( !is_null( $resultPageSet ) ) {
  118. $titles[] = $dupFile->getTitle();
  119. } else {
  120. $r = [
  121. 'name' => $dupName,
  122. 'user' => $dupFile->getUser( 'text' ),
  123. 'timestamp' => wfTimestamp( TS_ISO_8601, $dupFile->getTimestamp() ),
  124. 'shared' => !$dupFile->isLocal(),
  125. ];
  126. $fit = $this->addPageSubItem( $pageId, $r );
  127. if ( !$fit ) {
  128. $this->setContinueEnumParameter( 'continue', $image . '|' . $dupName );
  129. break;
  130. }
  131. }
  132. }
  133. if ( !$fit ) {
  134. break;
  135. }
  136. }
  137. if ( !is_null( $resultPageSet ) ) {
  138. $resultPageSet->populateFromTitles( $titles );
  139. }
  140. }
  141. public function getAllowedParams() {
  142. return [
  143. 'limit' => [
  144. ApiBase::PARAM_DFLT => 10,
  145. ApiBase::PARAM_TYPE => 'limit',
  146. ApiBase::PARAM_MIN => 1,
  147. ApiBase::PARAM_MAX => ApiBase::LIMIT_BIG1,
  148. ApiBase::PARAM_MAX2 => ApiBase::LIMIT_BIG2
  149. ],
  150. 'continue' => [
  151. ApiBase::PARAM_HELP_MSG => 'api-help-param-continue',
  152. ],
  153. 'dir' => [
  154. ApiBase::PARAM_DFLT => 'ascending',
  155. ApiBase::PARAM_TYPE => [
  156. 'ascending',
  157. 'descending'
  158. ]
  159. ],
  160. 'localonly' => false,
  161. ];
  162. }
  163. protected function getExamplesMessages() {
  164. return [
  165. 'action=query&titles=File:Albert_Einstein_Head.jpg&prop=duplicatefiles'
  166. => 'apihelp-query+duplicatefiles-example-simple',
  167. 'action=query&generator=allimages&prop=duplicatefiles'
  168. => 'apihelp-query+duplicatefiles-example-generated',
  169. ];
  170. }
  171. public function getHelpUrls() {
  172. return 'https://www.mediawiki.org/wiki/Special:MyLanguage/API:Duplicatefiles';
  173. }
  174. }