CategoryFinder.php 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263
  1. <?php
  2. /**
  3. * Recent changes filtering by category.
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 2 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License along
  16. * with this program; if not, write to the Free Software Foundation, Inc.,
  17. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18. * http://www.gnu.org/copyleft/gpl.html
  19. *
  20. * @file
  21. */
  22. use Wikimedia\Rdbms\IDatabase;
  23. /**
  24. * The "CategoryFinder" class takes a list of articles, creates an internal
  25. * representation of all their parent categories (as well as parents of
  26. * parents etc.). From this representation, it determines which of these
  27. * articles are in one or all of a given subset of categories.
  28. *
  29. * Example use :
  30. * @code
  31. * # Determines whether the article with the page_id 12345 is in both
  32. * # "Category 1" and "Category 2" or their subcategories, respectively
  33. *
  34. * $cf = new CategoryFinder;
  35. * $cf->seed(
  36. * [ 12345 ],
  37. * [ 'Category 1', 'Category 2' ],
  38. * 'AND'
  39. * );
  40. * $a = $cf->run();
  41. * print implode( ',' , $a );
  42. * @endcode
  43. *
  44. * @deprecated since 1.31
  45. */
  46. class CategoryFinder {
  47. /** @var int[] The original article IDs passed to the seed function */
  48. protected $articles = [];
  49. /** @var array Array of DBKEY category names for categories that don't have a page */
  50. protected $deadend = [];
  51. /** @var array Array of [ ID => [] ] */
  52. protected $parents = [];
  53. /** @var array Array of article/category IDs */
  54. protected $next = [];
  55. /** @var int Max layer depth */
  56. protected $maxdepth = -1;
  57. /** @var array Array of DBKEY category names */
  58. protected $targets = [];
  59. /** @var array */
  60. protected $name2id = [];
  61. /** @var string "AND" or "OR" */
  62. protected $mode;
  63. /** @var IDatabase Read-DB replica DB */
  64. protected $dbr;
  65. /**
  66. * Initializes the instance. Do this prior to calling run().
  67. * @param array $articleIds Array of article IDs
  68. * @param array $categories FIXME
  69. * @param string $mode FIXME, default 'AND'.
  70. * @param int $maxdepth Maximum layer depth. Where:
  71. * -1 means deep recursion (default);
  72. * 0 means no-parents;
  73. * 1 means one parent layer, etc.
  74. * @todo FIXME: $categories/$mode
  75. */
  76. public function seed( $articleIds, $categories, $mode = 'AND', $maxdepth = -1 ) {
  77. $this->articles = $articleIds;
  78. $this->next = $articleIds;
  79. $this->mode = $mode;
  80. $this->maxdepth = $maxdepth;
  81. # Set the list of target categories; convert them to DBKEY form first
  82. $this->targets = [];
  83. foreach ( $categories as $c ) {
  84. $ct = Title::makeTitleSafe( NS_CATEGORY, $c );
  85. if ( $ct ) {
  86. $c = $ct->getDBkey();
  87. $this->targets[$c] = $c;
  88. }
  89. }
  90. }
  91. /**
  92. * Iterates through the parent tree starting with the seed values,
  93. * then checks the articles if they match the conditions
  94. * @return array Array of page_ids (those given to seed() that match the conditions)
  95. */
  96. public function run() {
  97. $this->dbr = wfGetDB( DB_REPLICA );
  98. $i = 0;
  99. $dig = true;
  100. while ( count( $this->next ) && $dig ) {
  101. $this->scanNextLayer();
  102. // Is there any depth limit?
  103. if ( $this->maxdepth !== -1 ) {
  104. $dig = $i < $this->maxdepth;
  105. $i++;
  106. }
  107. }
  108. # Now check if this applies to the individual articles
  109. $ret = [];
  110. foreach ( $this->articles as $article ) {
  111. $conds = $this->targets;
  112. if ( $this->check( $article, $conds ) ) {
  113. # Matches the conditions
  114. $ret[] = $article;
  115. }
  116. }
  117. return $ret;
  118. }
  119. /**
  120. * Get the parents. Only really useful if run() has been called already
  121. * @return array
  122. */
  123. public function getParents() {
  124. return $this->parents;
  125. }
  126. /**
  127. * This functions recurses through the parent representation, trying to match the conditions
  128. * @param int $id The article/category to check
  129. * @param array $conds The array of categories to match
  130. * @param array $path Used to check for recursion loops
  131. * @return bool Does this match the conditions?
  132. */
  133. private function check( $id, &$conds, $path = [] ) {
  134. // Check for loops and stop!
  135. if ( in_array( $id, $path ) ) {
  136. return false;
  137. }
  138. $path[] = $id;
  139. # Shortcut (runtime paranoia): No conditions=all matched
  140. if ( count( $conds ) == 0 ) {
  141. return true;
  142. }
  143. if ( !isset( $this->parents[$id] ) ) {
  144. return false;
  145. }
  146. # iterate through the parents
  147. foreach ( $this->parents[$id] as $p ) {
  148. $pname = $p->cl_to;
  149. # Is this a condition?
  150. if ( isset( $conds[$pname] ) ) {
  151. # This key is in the category list!
  152. if ( $this->mode == 'OR' ) {
  153. # One found, that's enough!
  154. $conds = [];
  155. return true;
  156. } else {
  157. # Assuming "AND" as default
  158. unset( $conds[$pname] );
  159. if ( count( $conds ) == 0 ) {
  160. # All conditions met, done
  161. return true;
  162. }
  163. }
  164. }
  165. # Not done yet, try sub-parents
  166. if ( !isset( $this->name2id[$pname] ) ) {
  167. # No sub-parent
  168. continue;
  169. }
  170. $done = $this->check( $this->name2id[$pname], $conds, $path );
  171. if ( $done || count( $conds ) == 0 ) {
  172. # Subparents have done it!
  173. return true;
  174. }
  175. }
  176. return false;
  177. }
  178. /**
  179. * Scans a "parent layer" of the articles/categories in $this->next
  180. */
  181. private function scanNextLayer() {
  182. # Find all parents of the article currently in $this->next
  183. $layer = [];
  184. $res = $this->dbr->select(
  185. /* FROM */ 'categorylinks',
  186. /* SELECT */ [ 'cl_to', 'cl_from' ],
  187. /* WHERE */ [ 'cl_from' => $this->next ],
  188. __METHOD__ . '-1'
  189. );
  190. foreach ( $res as $row ) {
  191. $k = $row->cl_to;
  192. # Update parent tree
  193. if ( !isset( $this->parents[$row->cl_from] ) ) {
  194. $this->parents[$row->cl_from] = [];
  195. }
  196. $this->parents[$row->cl_from][$k] = $row;
  197. # Ignore those we already have
  198. if ( in_array( $k, $this->deadend ) ) {
  199. continue;
  200. }
  201. if ( isset( $this->name2id[$k] ) ) {
  202. continue;
  203. }
  204. # Hey, new category!
  205. $layer[$k] = $k;
  206. }
  207. $this->next = [];
  208. # Find the IDs of all category pages in $layer, if they exist
  209. if ( count( $layer ) > 0 ) {
  210. $res = $this->dbr->select(
  211. /* FROM */ 'page',
  212. /* SELECT */ [ 'page_id', 'page_title' ],
  213. /* WHERE */ [ 'page_namespace' => NS_CATEGORY, 'page_title' => $layer ],
  214. __METHOD__ . '-2'
  215. );
  216. foreach ( $res as $row ) {
  217. $id = $row->page_id;
  218. $name = $row->page_title;
  219. $this->name2id[$name] = $id;
  220. $this->next[] = $id;
  221. unset( $layer[$name] );
  222. }
  223. }
  224. # Mark dead ends
  225. foreach ( $layer as $v ) {
  226. $this->deadend[$v] = $v;
  227. }
  228. }
  229. }