DerivedPageDataUpdater.php 54 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663
  1. <?php
  2. /**
  3. * A handle for managing updates for derived page data on edit, import, purge, etc.
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 2 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License along
  16. * with this program; if not, write to the Free Software Foundation, Inc.,
  17. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18. * http://www.gnu.org/copyleft/gpl.html
  19. *
  20. * @file
  21. */
  22. namespace MediaWiki\Storage;
  23. use CategoryMembershipChangeJob;
  24. use Content;
  25. use ContentHandler;
  26. use DataUpdate;
  27. use DeferrableUpdate;
  28. use DeferredUpdates;
  29. use Hooks;
  30. use IDBAccessObject;
  31. use InvalidArgumentException;
  32. use JobQueueGroup;
  33. use Language;
  34. use LinksDeletionUpdate;
  35. use LinksUpdate;
  36. use LogicException;
  37. use MediaWiki\Edit\PreparedEdit;
  38. use MediaWiki\MediaWikiServices;
  39. use MediaWiki\Revision\MutableRevisionRecord;
  40. use MediaWiki\Revision\RenderedRevision;
  41. use MediaWiki\Revision\RevisionRecord;
  42. use MediaWiki\Revision\RevisionRenderer;
  43. use MediaWiki\Revision\RevisionSlots;
  44. use MediaWiki\Revision\RevisionStore;
  45. use MediaWiki\Revision\SlotRoleRegistry;
  46. use MediaWiki\Revision\SlotRecord;
  47. use MediaWiki\User\UserIdentity;
  48. use MessageCache;
  49. use MWCallableUpdate;
  50. use ParserCache;
  51. use ParserOptions;
  52. use ParserOutput;
  53. use Psr\Log\LoggerAwareInterface;
  54. use Psr\Log\LoggerInterface;
  55. use Psr\Log\NullLogger;
  56. use RecentChangesUpdateJob;
  57. use ResourceLoaderWikiModule;
  58. use Revision;
  59. use SearchUpdate;
  60. use SiteStatsUpdate;
  61. use Title;
  62. use User;
  63. use Wikimedia\Assert\Assert;
  64. use Wikimedia\Rdbms\ILBFactory;
  65. use WikiPage;
  66. /**
  67. * A handle for managing updates for derived page data on edit, import, purge, etc.
  68. *
  69. * @note Avoid direct usage of DerivedPageDataUpdater.
  70. *
  71. * @todo Define interfaces for the different use cases of DerivedPageDataUpdater, particularly
  72. * providing access to post-PST content and ParserOutput to callbacks during revision creation,
  73. * which currently use WikiPage::prepareContentForEdit, and allowing updates to be triggered on
  74. * purge, import, and undeletion, which currently use WikiPage::doEditUpdates() and
  75. * Content::getSecondaryDataUpdates().
  76. *
  77. * DerivedPageDataUpdater instances are designed to be cached inside a WikiPage instance,
  78. * and re-used by callback code over the course of an update operation. It's a stepping stone
  79. * on the way to a more complete refactoring of WikiPage.
  80. *
  81. * When using a DerivedPageDataUpdater, the following life cycle must be observed:
  82. * grabCurrentRevision (optional), prepareContent (optional), prepareUpdate (required
  83. * for doUpdates). getCanonicalParserOutput, getSlots, and getSecondaryDataUpdates
  84. * require prepareContent or prepareUpdate to have been called first, to initialize the
  85. * DerivedPageDataUpdater.
  86. *
  87. * @see docs/pageupdater.txt for more information.
  88. *
  89. * MCR migration note: this replaces the relevant methods in WikiPage, and covers the use cases
  90. * of PreparedEdit.
  91. *
  92. * @internal
  93. *
  94. * @since 1.32
  95. * @ingroup Page
  96. */
  97. class DerivedPageDataUpdater implements IDBAccessObject, LoggerAwareInterface {
  98. /**
  99. * @var UserIdentity|null
  100. */
  101. private $user = null;
  102. /**
  103. * @var WikiPage
  104. */
  105. private $wikiPage;
  106. /**
  107. * @var ParserCache
  108. */
  109. private $parserCache;
  110. /**
  111. * @var RevisionStore
  112. */
  113. private $revisionStore;
  114. /**
  115. * @var Language
  116. */
  117. private $contLang;
  118. /**
  119. * @var JobQueueGroup
  120. */
  121. private $jobQueueGroup;
  122. /**
  123. * @var MessageCache
  124. */
  125. private $messageCache;
  126. /**
  127. * @var ILBFactory
  128. */
  129. private $loadbalancerFactory;
  130. /**
  131. * @var LoggerInterface
  132. */
  133. private $logger;
  134. /**
  135. * @var string see $wgArticleCountMethod
  136. */
  137. private $articleCountMethod;
  138. /**
  139. * @var boolean see $wgRCWatchCategoryMembership
  140. */
  141. private $rcWatchCategoryMembership = false;
  142. /**
  143. * Stores (most of) the $options parameter of prepareUpdate().
  144. * @see prepareUpdate()
  145. */
  146. private $options = [
  147. 'changed' => true,
  148. // newrev is true if prepareUpdate is handling the creation of a new revision,
  149. // as opposed to a null edit or a forced update.
  150. 'newrev' => false,
  151. 'created' => false,
  152. 'moved' => false,
  153. 'restored' => false,
  154. 'oldrevision' => null,
  155. 'oldcountable' => null,
  156. 'oldredirect' => null,
  157. 'triggeringUser' => null,
  158. // causeAction/causeAgent default to 'unknown' but that's handled where it's read,
  159. // to make the life of prepareUpdate() callers easier.
  160. 'causeAction' => null,
  161. 'causeAgent' => null,
  162. ];
  163. /**
  164. * The state of the relevant row in page table before the edit.
  165. * This is determined by the first call to grabCurrentRevision, prepareContent,
  166. * or prepareUpdate (so it is only accessible in 'knows-current' or a later stage).
  167. * If pageState was not initialized when prepareUpdate() is called, prepareUpdate() will
  168. * attempt to emulate the state of the page table before the edit.
  169. *
  170. * Contains the following fields:
  171. * - oldRevision (RevisionRecord|null): the revision that was current before the change
  172. * associated with this update. Might not be set, use getParentRevision().
  173. * - oldId (int|null): the id of the above revision. 0 if there is no such revision (the change
  174. * was about creating a new page); null if not known (that should not happen).
  175. * - oldIsRedirect (bool|null): whether the page was a redirect before the change. Lazy-loaded,
  176. * can be null; use wasRedirect() instead of direct access.
  177. * - oldCountable (bool|null): whether the page was countable before the change (or null
  178. * if we don't have that information)
  179. *
  180. * @var array
  181. */
  182. private $pageState = null;
  183. /**
  184. * @var RevisionSlotsUpdate|null
  185. */
  186. private $slotsUpdate = null;
  187. /**
  188. * @var RevisionRecord|null
  189. */
  190. private $parentRevision = null;
  191. /**
  192. * @var RevisionRecord|null
  193. */
  194. private $revision = null;
  195. /**
  196. * @var RenderedRevision
  197. */
  198. private $renderedRevision = null;
  199. /**
  200. * @var RevisionRenderer
  201. */
  202. private $revisionRenderer;
  203. /** @var SlotRoleRegistry */
  204. private $slotRoleRegistry;
  205. /**
  206. * A stage identifier for managing the life cycle of this instance.
  207. * Possible stages are 'new', 'knows-current', 'has-content', 'has-revision', and 'done'.
  208. *
  209. * @see docs/pageupdater.txt for documentation of the life cycle.
  210. *
  211. * @var string
  212. */
  213. private $stage = 'new';
  214. /**
  215. * Transition table for managing the life cycle of DerivedPageDateUpdater instances.
  216. *
  217. * XXX: Overkill. This is a linear order, we could just count. Names are nice though,
  218. * and constants are also overkill...
  219. *
  220. * @see docs/pageupdater.txt for documentation of the life cycle.
  221. *
  222. * @var array[]
  223. */
  224. private static $transitions = [
  225. 'new' => [
  226. 'new' => true,
  227. 'knows-current' => true,
  228. 'has-content' => true,
  229. 'has-revision' => true,
  230. ],
  231. 'knows-current' => [
  232. 'knows-current' => true,
  233. 'has-content' => true,
  234. 'has-revision' => true,
  235. ],
  236. 'has-content' => [
  237. 'has-content' => true,
  238. 'has-revision' => true,
  239. ],
  240. 'has-revision' => [
  241. 'has-revision' => true,
  242. 'done' => true,
  243. ],
  244. ];
  245. /**
  246. * @param WikiPage $wikiPage ,
  247. * @param RevisionStore $revisionStore
  248. * @param RevisionRenderer $revisionRenderer
  249. * @param SlotRoleRegistry $slotRoleRegistry
  250. * @param ParserCache $parserCache
  251. * @param JobQueueGroup $jobQueueGroup
  252. * @param MessageCache $messageCache
  253. * @param Language $contLang
  254. * @param ILBFactory $loadbalancerFactory
  255. */
  256. public function __construct(
  257. WikiPage $wikiPage,
  258. RevisionStore $revisionStore,
  259. RevisionRenderer $revisionRenderer,
  260. SlotRoleRegistry $slotRoleRegistry,
  261. ParserCache $parserCache,
  262. JobQueueGroup $jobQueueGroup,
  263. MessageCache $messageCache,
  264. Language $contLang,
  265. ILBFactory $loadbalancerFactory
  266. ) {
  267. $this->wikiPage = $wikiPage;
  268. $this->parserCache = $parserCache;
  269. $this->revisionStore = $revisionStore;
  270. $this->revisionRenderer = $revisionRenderer;
  271. $this->slotRoleRegistry = $slotRoleRegistry;
  272. $this->jobQueueGroup = $jobQueueGroup;
  273. $this->messageCache = $messageCache;
  274. $this->contLang = $contLang;
  275. // XXX only needed for waiting for replicas to catch up; there should be a narrower
  276. // interface for that.
  277. $this->loadbalancerFactory = $loadbalancerFactory;
  278. $this->logger = new NullLogger();
  279. }
  280. public function setLogger( LoggerInterface $logger ) {
  281. $this->logger = $logger;
  282. }
  283. /**
  284. * Transition function for managing the life cycle of this instances.
  285. *
  286. * @see docs/pageupdater.txt for documentation of the life cycle.
  287. *
  288. * @param string $newStage the new stage
  289. * @return string the previous stage
  290. *
  291. * @throws LogicException If a transition to the given stage is not possible in the current
  292. * stage.
  293. */
  294. private function doTransition( $newStage ) {
  295. $this->assertTransition( $newStage );
  296. $oldStage = $this->stage;
  297. $this->stage = $newStage;
  298. return $oldStage;
  299. }
  300. /**
  301. * Asserts that a transition to the given stage is possible, without performing it.
  302. *
  303. * @see docs/pageupdater.txt for documentation of the life cycle.
  304. *
  305. * @param string $newStage the new stage
  306. *
  307. * @throws LogicException If this instance is not in the expected stage
  308. */
  309. private function assertTransition( $newStage ) {
  310. if ( empty( self::$transitions[$this->stage][$newStage] ) ) {
  311. throw new LogicException( "Cannot transition from {$this->stage} to $newStage" );
  312. }
  313. }
  314. /**
  315. * Checks whether this DerivedPageDataUpdater can be re-used for running updates targeting
  316. * the given revision.
  317. *
  318. * @param UserIdentity|null $user The user creating the revision in question
  319. * @param RevisionRecord|null $revision New revision (after save, if already saved)
  320. * @param RevisionSlotsUpdate|null $slotsUpdate New content (before PST)
  321. * @param null|int $parentId Parent revision of the edit (use 0 for page creation)
  322. *
  323. * @return bool
  324. */
  325. public function isReusableFor(
  326. UserIdentity $user = null,
  327. RevisionRecord $revision = null,
  328. RevisionSlotsUpdate $slotsUpdate = null,
  329. $parentId = null
  330. ) {
  331. if ( $revision
  332. && $parentId
  333. && $revision->getParentId() !== $parentId
  334. ) {
  335. throw new InvalidArgumentException( '$parentId should match the parent of $revision' );
  336. }
  337. // NOTE: For null revisions, $user may be different from $this->revision->getUser
  338. // and also from $revision->getUser.
  339. // But $user should always match $this->user.
  340. if ( $user && $this->user && $user->getName() !== $this->user->getName() ) {
  341. return false;
  342. }
  343. if ( $revision && $this->revision && $this->revision->getId()
  344. && $this->revision->getId() !== $revision->getId()
  345. ) {
  346. return false;
  347. }
  348. if ( $this->pageState
  349. && $revision
  350. && $revision->getParentId() !== null
  351. && $this->pageState['oldId'] !== $revision->getParentId()
  352. ) {
  353. return false;
  354. }
  355. if ( $this->pageState
  356. && $parentId !== null
  357. && $this->pageState['oldId'] !== $parentId
  358. ) {
  359. return false;
  360. }
  361. // NOTE: this check is the primary reason for having the $this->slotsUpdate field!
  362. if ( $this->slotsUpdate
  363. && $slotsUpdate
  364. && !$this->slotsUpdate->hasSameUpdates( $slotsUpdate )
  365. ) {
  366. return false;
  367. }
  368. if ( $revision
  369. && $this->revision
  370. && !$this->revision->getSlots()->hasSameContent( $revision->getSlots() )
  371. ) {
  372. return false;
  373. }
  374. return true;
  375. }
  376. /**
  377. * @param string $articleCountMethod "any" or "link".
  378. * @see $wgArticleCountMethod
  379. */
  380. public function setArticleCountMethod( $articleCountMethod ) {
  381. $this->articleCountMethod = $articleCountMethod;
  382. }
  383. /**
  384. * @param bool $rcWatchCategoryMembership
  385. * @see $wgRCWatchCategoryMembership
  386. */
  387. public function setRcWatchCategoryMembership( $rcWatchCategoryMembership ) {
  388. $this->rcWatchCategoryMembership = $rcWatchCategoryMembership;
  389. }
  390. /**
  391. * @return Title
  392. */
  393. private function getTitle() {
  394. // NOTE: eventually, we won't get a WikiPage passed into the constructor any more
  395. return $this->wikiPage->getTitle();
  396. }
  397. /**
  398. * @return WikiPage
  399. */
  400. private function getWikiPage() {
  401. // NOTE: eventually, we won't get a WikiPage passed into the constructor any more
  402. return $this->wikiPage;
  403. }
  404. /**
  405. * Determines whether the page being edited already existed.
  406. * Only defined after calling grabCurrentRevision() or prepareContent() or prepareUpdate()!
  407. *
  408. * @return bool
  409. * @throws LogicException if called before grabCurrentRevision
  410. */
  411. public function pageExisted() {
  412. $this->assertHasPageState( __METHOD__ );
  413. return $this->pageState['oldId'] > 0;
  414. }
  415. /**
  416. * Returns the parent revision of the new revision wrapped by this update.
  417. * If the update is a null-edit, this will return the parent of the current (and new) revision.
  418. * This will return null if the revision wrapped by this update created the page.
  419. * Only defined after calling prepareContent() or prepareUpdate()!
  420. *
  421. * @return RevisionRecord|null the parent revision of the new revision, or null if
  422. * the update created the page.
  423. */
  424. private function getParentRevision() {
  425. $this->assertPrepared( __METHOD__ );
  426. if ( $this->parentRevision ) {
  427. return $this->parentRevision;
  428. }
  429. if ( !$this->pageState['oldId'] ) {
  430. // If there was no current revision, there is no parent revision,
  431. // since the page didn't exist.
  432. return null;
  433. }
  434. $oldId = $this->revision->getParentId();
  435. $flags = $this->useMaster() ? RevisionStore::READ_LATEST : 0;
  436. $this->parentRevision = $oldId
  437. ? $this->revisionStore->getRevisionById( $oldId, $flags )
  438. : null;
  439. return $this->parentRevision;
  440. }
  441. /**
  442. * Returns the revision that was the page's current revision when grabCurrentRevision()
  443. * was first called.
  444. *
  445. * During an edit, that revision will act as the logical parent of the new revision.
  446. *
  447. * Some updates are performed based on the difference between the database state at the
  448. * moment this method is first called, and the state after the edit.
  449. *
  450. * @see docs/pageupdater.txt for more information on when thie method can and should be called.
  451. *
  452. * @note After prepareUpdate() was called, grabCurrentRevision() will throw an exception
  453. * to avoid confusion, since the page's current revision is then the new revision after
  454. * the edit, which was presumably passed to prepareUpdate() as the $revision parameter.
  455. * Use getParentRevision() instead to access the revision that is the parent of the
  456. * new revision.
  457. *
  458. * @return RevisionRecord|null the page's current revision, or null if the page does not
  459. * yet exist.
  460. */
  461. public function grabCurrentRevision() {
  462. if ( $this->pageState ) {
  463. return $this->pageState['oldRevision'];
  464. }
  465. $this->assertTransition( 'knows-current' );
  466. // NOTE: eventually, we won't get a WikiPage passed into the constructor any more
  467. $wikiPage = $this->getWikiPage();
  468. // Do not call WikiPage::clear(), since the caller may already have caused page data
  469. // to be loaded with SELECT FOR UPDATE. Just assert it's loaded now.
  470. $wikiPage->loadPageData( self::READ_LATEST );
  471. $rev = $wikiPage->getRevision();
  472. $current = $rev ? $rev->getRevisionRecord() : null;
  473. $this->pageState = [
  474. 'oldRevision' => $current,
  475. 'oldId' => $rev ? $rev->getId() : 0,
  476. 'oldIsRedirect' => $wikiPage->isRedirect(), // NOTE: uses page table
  477. 'oldCountable' => $wikiPage->isCountable(), // NOTE: uses pagelinks table
  478. ];
  479. $this->doTransition( 'knows-current' );
  480. return $this->pageState['oldRevision'];
  481. }
  482. /**
  483. * Whether prepareUpdate() or prepareContent() have been called on this instance.
  484. *
  485. * @return bool
  486. */
  487. public function isContentPrepared() {
  488. return $this->revision !== null;
  489. }
  490. /**
  491. * Whether prepareUpdate() has been called on this instance.
  492. *
  493. * @note will also return null in case of a null-edit!
  494. *
  495. * @return bool
  496. */
  497. public function isUpdatePrepared() {
  498. return $this->revision !== null && $this->revision->getId() !== null;
  499. }
  500. /**
  501. * @return int
  502. */
  503. private function getPageId() {
  504. // NOTE: eventually, we won't get a WikiPage passed into the constructor any more
  505. return $this->wikiPage->getId();
  506. }
  507. /**
  508. * Whether the content is deleted and thus not visible to the public.
  509. *
  510. * @return bool
  511. */
  512. public function isContentDeleted() {
  513. if ( $this->revision ) {
  514. return $this->revision->isDeleted( RevisionRecord::DELETED_TEXT );
  515. } else {
  516. // If the content has not been saved yet, it cannot have been deleted yet.
  517. return false;
  518. }
  519. }
  520. /**
  521. * Returns the slot, modified or inherited, after PST, with no audience checks applied.
  522. *
  523. * @param string $role slot role name
  524. *
  525. * @throws PageUpdateException If the slot is neither set for update nor inherited from the
  526. * parent revision.
  527. * @return SlotRecord
  528. */
  529. public function getRawSlot( $role ) {
  530. return $this->getSlots()->getSlot( $role );
  531. }
  532. /**
  533. * Returns the content of the given slot, with no audience checks.
  534. *
  535. * @throws PageUpdateException If the slot is neither set for update nor inherited from the
  536. * parent revision.
  537. * @param string $role slot role name
  538. * @return Content
  539. */
  540. public function getRawContent( $role ) {
  541. return $this->getRawSlot( $role )->getContent();
  542. }
  543. /**
  544. * Returns the content model of the given slot
  545. *
  546. * @param string $role slot role name
  547. * @return string
  548. */
  549. private function getContentModel( $role ) {
  550. return $this->getRawSlot( $role )->getModel();
  551. }
  552. /**
  553. * @param string $role slot role name
  554. * @return ContentHandler
  555. */
  556. private function getContentHandler( $role ) {
  557. // TODO: inject something like a ContentHandlerRegistry
  558. return ContentHandler::getForModelID( $this->getContentModel( $role ) );
  559. }
  560. private function useMaster() {
  561. // TODO: can we just set a flag to true in prepareContent()?
  562. return $this->wikiPage->wasLoadedFrom( self::READ_LATEST );
  563. }
  564. /**
  565. * @return bool
  566. */
  567. public function isCountable() {
  568. // NOTE: Keep in sync with WikiPage::isCountable.
  569. if ( !$this->getTitle()->isContentPage() ) {
  570. return false;
  571. }
  572. if ( $this->isContentDeleted() ) {
  573. // This should be irrelevant: countability only applies to the current revision,
  574. // and the current revision is never suppressed.
  575. return false;
  576. }
  577. if ( $this->isRedirect() ) {
  578. return false;
  579. }
  580. $hasLinks = null;
  581. if ( $this->articleCountMethod === 'link' ) {
  582. // NOTE: it would be more appropriate to determine for each slot separately
  583. // whether it has links, and use that information with that slot's
  584. // isCountable() method. However, that would break parity with
  585. // WikiPage::isCountable, which uses the pagelinks table to determine
  586. // whether the current revision has links.
  587. $hasLinks = (bool)count( $this->getCanonicalParserOutput()->getLinks() );
  588. }
  589. foreach ( $this->getSlots()->getSlotRoles() as $role ) {
  590. $roleHandler = $this->slotRoleRegistry->getRoleHandler( $role );
  591. if ( $roleHandler->supportsArticleCount() ) {
  592. $content = $this->getRawContent( $role );
  593. if ( $content->isCountable( $hasLinks ) ) {
  594. return true;
  595. }
  596. }
  597. }
  598. return false;
  599. }
  600. /**
  601. * @return bool
  602. */
  603. public function isRedirect() {
  604. // NOTE: main slot determines redirect status
  605. // TODO: MCR: this should be controlled by a PageTypeHandler
  606. $mainContent = $this->getRawContent( SlotRecord::MAIN );
  607. return $mainContent->isRedirect();
  608. }
  609. /**
  610. * @param RevisionRecord $rev
  611. *
  612. * @return bool
  613. */
  614. private function revisionIsRedirect( RevisionRecord $rev ) {
  615. // NOTE: main slot determines redirect status
  616. $mainContent = $rev->getContent( SlotRecord::MAIN, RevisionRecord::RAW );
  617. return $mainContent->isRedirect();
  618. }
  619. /**
  620. * Prepare updates based on an update which has not yet been saved.
  621. *
  622. * This may be used to create derived data that is needed when creating a new revision;
  623. * particularly, this makes available the slots of the new revision via the getSlots()
  624. * method, after applying PST and slot inheritance.
  625. *
  626. * The derived data prepared for revision creation may then later be re-used by doUpdates(),
  627. * without the need to re-calculate.
  628. *
  629. * @see docs/pageupdater.txt for more information on when thie method can and should be called.
  630. *
  631. * @note Calling this method more than once with the same $slotsUpdate
  632. * has no effect. Calling this method multiple times with different content will cause
  633. * an exception.
  634. *
  635. * @note Calling this method after prepareUpdate() has been called will cause an exception.
  636. *
  637. * @param User $user The user to act as context for pre-save transformation (PST).
  638. * Type hint should be reduced to UserIdentity at some point.
  639. * @param RevisionSlotsUpdate $slotsUpdate The new content of the slots to be updated
  640. * by this edit, before PST.
  641. * @param bool $useStash Whether to use stashed ParserOutput
  642. */
  643. public function prepareContent(
  644. User $user,
  645. RevisionSlotsUpdate $slotsUpdate,
  646. $useStash = true
  647. ) {
  648. if ( $this->slotsUpdate ) {
  649. if ( !$this->user ) {
  650. throw new LogicException(
  651. 'Unexpected state: $this->slotsUpdate was initialized, '
  652. . 'but $this->user was not.'
  653. );
  654. }
  655. if ( $this->user->getName() !== $user->getName() ) {
  656. throw new LogicException( 'Can\'t call prepareContent() again for different user! '
  657. . 'Expected ' . $this->user->getName() . ', got ' . $user->getName()
  658. );
  659. }
  660. if ( !$this->slotsUpdate->hasSameUpdates( $slotsUpdate ) ) {
  661. throw new LogicException(
  662. 'Can\'t call prepareContent() again with different slot content!'
  663. );
  664. }
  665. return; // prepareContent() already done, nothing to do
  666. }
  667. $this->assertTransition( 'has-content' );
  668. $wikiPage = $this->getWikiPage(); // TODO: use only for legacy hooks!
  669. $title = $this->getTitle();
  670. $parentRevision = $this->grabCurrentRevision();
  671. // The edit may have already been prepared via api.php?action=stashedit
  672. $stashedEdit = false;
  673. // TODO: MCR: allow output for all slots to be stashed.
  674. if ( $useStash && $slotsUpdate->isModifiedSlot( SlotRecord::MAIN ) ) {
  675. $editStash = MediaWikiServices::getInstance()->getPageEditStash();
  676. $stashedEdit = $editStash->checkCache(
  677. $title,
  678. $slotsUpdate->getModifiedSlot( SlotRecord::MAIN )->getContent(),
  679. User::newFromIdentity( $user )
  680. );
  681. }
  682. $userPopts = ParserOptions::newFromUserAndLang( $user, $this->contLang );
  683. Hooks::run( 'ArticlePrepareTextForEdit', [ $wikiPage, $userPopts ] );
  684. $this->user = $user;
  685. $this->slotsUpdate = $slotsUpdate;
  686. if ( $parentRevision ) {
  687. $this->revision = MutableRevisionRecord::newFromParentRevision( $parentRevision );
  688. } else {
  689. $this->revision = new MutableRevisionRecord( $title );
  690. }
  691. // NOTE: user and timestamp must be set, so they can be used for
  692. // {{subst:REVISIONUSER}} and {{subst:REVISIONTIMESTAMP}} in PST!
  693. $this->revision->setTimestamp( wfTimestampNow() );
  694. $this->revision->setUser( $user );
  695. // Set up ParserOptions to operate on the new revision
  696. $oldCallback = $userPopts->getCurrentRevisionCallback();
  697. $userPopts->setCurrentRevisionCallback(
  698. function ( Title $parserTitle, $parser = false ) use ( $title, $oldCallback ) {
  699. if ( $parserTitle->equals( $title ) ) {
  700. $legacyRevision = new Revision( $this->revision );
  701. return $legacyRevision;
  702. } else {
  703. return call_user_func( $oldCallback, $parserTitle, $parser );
  704. }
  705. }
  706. );
  707. $pstContentSlots = $this->revision->getSlots();
  708. foreach ( $slotsUpdate->getModifiedRoles() as $role ) {
  709. $slot = $slotsUpdate->getModifiedSlot( $role );
  710. if ( $slot->isInherited() ) {
  711. // No PST for inherited slots! Note that "modified" slots may still be inherited
  712. // from an earlier version, e.g. for rollbacks.
  713. $pstSlot = $slot;
  714. } elseif ( $role === SlotRecord::MAIN && $stashedEdit ) {
  715. // TODO: MCR: allow PST content for all slots to be stashed.
  716. $pstSlot = SlotRecord::newUnsaved( $role, $stashedEdit->pstContent );
  717. } else {
  718. $content = $slot->getContent();
  719. $pstContent = $content->preSaveTransform( $title, $this->user, $userPopts );
  720. $pstSlot = SlotRecord::newUnsaved( $role, $pstContent );
  721. }
  722. $pstContentSlots->setSlot( $pstSlot );
  723. }
  724. foreach ( $slotsUpdate->getRemovedRoles() as $role ) {
  725. $pstContentSlots->removeSlot( $role );
  726. }
  727. $this->options['created'] = ( $parentRevision === null );
  728. $this->options['changed'] = ( $parentRevision === null
  729. || !$pstContentSlots->hasSameContent( $parentRevision->getSlots() ) );
  730. $this->doTransition( 'has-content' );
  731. if ( !$this->options['changed'] ) {
  732. // null-edit!
  733. // TODO: move this into MutableRevisionRecord
  734. // TODO: This needs to behave differently for a forced dummy edit!
  735. $this->revision->setId( $parentRevision->getId() );
  736. $this->revision->setTimestamp( $parentRevision->getTimestamp() );
  737. $this->revision->setPageId( $parentRevision->getPageId() );
  738. $this->revision->setParentId( $parentRevision->getParentId() );
  739. $this->revision->setUser( $parentRevision->getUser( RevisionRecord::RAW ) );
  740. $this->revision->setComment( $parentRevision->getComment( RevisionRecord::RAW ) );
  741. $this->revision->setMinorEdit( $parentRevision->isMinor() );
  742. $this->revision->setVisibility( $parentRevision->getVisibility() );
  743. // prepareUpdate() is redundant for null-edits
  744. $this->doTransition( 'has-revision' );
  745. } else {
  746. $this->parentRevision = $parentRevision;
  747. }
  748. $renderHints = [ 'use-master' => $this->useMaster(), 'audience' => RevisionRecord::RAW ];
  749. if ( $stashedEdit ) {
  750. /** @var ParserOutput $output */
  751. $output = $stashedEdit->output;
  752. // TODO: this should happen when stashing the ParserOutput, not now!
  753. $output->setCacheTime( $stashedEdit->timestamp );
  754. $renderHints['known-revision-output'] = $output;
  755. $this->logger->debug( __METHOD__ . ': using stashed edit output...' );
  756. }
  757. // NOTE: we want a canonical rendering, so don't pass $this->user or ParserOptions
  758. // NOTE: the revision is either new or current, so we can bypass audience checks.
  759. $this->renderedRevision = $this->revisionRenderer->getRenderedRevision(
  760. $this->revision,
  761. null,
  762. null,
  763. $renderHints
  764. );
  765. }
  766. /**
  767. * Returns the update's target revision - that is, the revision that will be the current
  768. * revision after the update.
  769. *
  770. * @note Callers must treat the returned RevisionRecord's content as immutable, even
  771. * if it is a MutableRevisionRecord instance. Other aspects of a MutableRevisionRecord
  772. * returned from here, such as the user or the comment, may be changed, but may not
  773. * be reflected in ParserOutput until after prepareUpdate() has been called.
  774. *
  775. * @todo This is currently used by PageUpdater::makeNewRevision() to construct an unsaved
  776. * MutableRevisionRecord instance. Introduce something like an UnsavedRevisionFactory service
  777. * for that purpose instead!
  778. *
  779. * @return RevisionRecord
  780. */
  781. public function getRevision() {
  782. $this->assertPrepared( __METHOD__ );
  783. return $this->revision;
  784. }
  785. /**
  786. * @return RenderedRevision
  787. */
  788. public function getRenderedRevision() {
  789. $this->assertPrepared( __METHOD__ );
  790. return $this->renderedRevision;
  791. }
  792. private function assertHasPageState( $method ) {
  793. if ( !$this->pageState ) {
  794. throw new LogicException(
  795. 'Must call grabCurrentRevision() or prepareContent() '
  796. . 'or prepareUpdate() before calling ' . $method
  797. );
  798. }
  799. }
  800. private function assertPrepared( $method ) {
  801. if ( !$this->revision ) {
  802. throw new LogicException(
  803. 'Must call prepareContent() or prepareUpdate() before calling ' . $method
  804. );
  805. }
  806. }
  807. private function assertHasRevision( $method ) {
  808. if ( !$this->revision->getId() ) {
  809. throw new LogicException(
  810. 'Must call prepareUpdate() before calling ' . $method
  811. );
  812. }
  813. }
  814. /**
  815. * Whether the edit creates the page.
  816. *
  817. * @return bool
  818. */
  819. public function isCreation() {
  820. $this->assertPrepared( __METHOD__ );
  821. return $this->options['created'];
  822. }
  823. /**
  824. * Whether the edit created, or should create, a new revision (that is, it's not a null-edit).
  825. *
  826. * @warning at present, "null-revisions" that do not change content but do have a revision
  827. * record would return false after prepareContent(), but true after prepareUpdate()!
  828. * This should probably be fixed.
  829. *
  830. * @return bool
  831. */
  832. public function isChange() {
  833. $this->assertPrepared( __METHOD__ );
  834. return $this->options['changed'];
  835. }
  836. /**
  837. * Whether the page was a redirect before the edit.
  838. *
  839. * @return bool
  840. */
  841. public function wasRedirect() {
  842. $this->assertHasPageState( __METHOD__ );
  843. if ( $this->pageState['oldIsRedirect'] === null ) {
  844. /** @var RevisionRecord $rev */
  845. $rev = $this->pageState['oldRevision'];
  846. if ( $rev ) {
  847. $this->pageState['oldIsRedirect'] = $this->revisionIsRedirect( $rev );
  848. } else {
  849. $this->pageState['oldIsRedirect'] = false;
  850. }
  851. }
  852. return $this->pageState['oldIsRedirect'];
  853. }
  854. /**
  855. * Returns the slots of the target revision, after PST.
  856. *
  857. * @note Callers must treat the returned RevisionSlots instance as immutable, even
  858. * if it is a MutableRevisionSlots instance.
  859. *
  860. * @return RevisionSlots
  861. */
  862. public function getSlots() {
  863. $this->assertPrepared( __METHOD__ );
  864. return $this->revision->getSlots();
  865. }
  866. /**
  867. * Returns the RevisionSlotsUpdate for this updater.
  868. *
  869. * @return RevisionSlotsUpdate
  870. */
  871. private function getRevisionSlotsUpdate() {
  872. $this->assertPrepared( __METHOD__ );
  873. if ( !$this->slotsUpdate ) {
  874. $old = $this->getParentRevision();
  875. $this->slotsUpdate = RevisionSlotsUpdate::newFromRevisionSlots(
  876. $this->revision->getSlots(),
  877. $old ? $old->getSlots() : null
  878. );
  879. }
  880. return $this->slotsUpdate;
  881. }
  882. /**
  883. * Returns the role names of the slots touched by the new revision,
  884. * including removed roles.
  885. *
  886. * @return string[]
  887. */
  888. public function getTouchedSlotRoles() {
  889. return $this->getRevisionSlotsUpdate()->getTouchedRoles();
  890. }
  891. /**
  892. * Returns the role names of the slots modified by the new revision,
  893. * not including removed roles.
  894. *
  895. * @return string[]
  896. */
  897. public function getModifiedSlotRoles() {
  898. return $this->getRevisionSlotsUpdate()->getModifiedRoles();
  899. }
  900. /**
  901. * Returns the role names of the slots removed by the new revision.
  902. *
  903. * @return string[]
  904. */
  905. public function getRemovedSlotRoles() {
  906. return $this->getRevisionSlotsUpdate()->getRemovedRoles();
  907. }
  908. /**
  909. * Prepare derived data updates targeting the given Revision.
  910. *
  911. * Calling this method requires the given revision to be present in the database.
  912. * This may be right after a new revision has been created, or when re-generating
  913. * derived data e.g. in ApiPurge, RefreshLinksJob, and the refreshLinks
  914. * script.
  915. *
  916. * @see docs/pageupdater.txt for more information on when thie method can and should be called.
  917. *
  918. * @note Calling this method more than once with the same revision has no effect.
  919. * $options are only used for the first call. Calling this method multiple times with
  920. * different revisions will cause an exception.
  921. *
  922. * @note If grabCurrentRevision() (or prepareContent()) has been called before
  923. * calling this method, $revision->getParentRevision() has to refer to the revision that
  924. * was the current revision at the time grabCurrentRevision() was called.
  925. *
  926. * @param RevisionRecord $revision
  927. * @param array $options Array of options, following indexes are used:
  928. * - changed: bool, whether the revision changed the content (default true)
  929. * - created: bool, whether the revision created the page (default false)
  930. * - moved: bool, whether the page was moved (default false)
  931. * - restored: bool, whether the page was undeleted (default false)
  932. * - oldrevision: Revision object for the pre-update revision (default null)
  933. * - triggeringUser: The user triggering the update (UserIdentity, defaults to the
  934. * user who created the revision)
  935. * - oldredirect: bool, null, or string 'no-change' (default null):
  936. * - bool: whether the page was counted as a redirect before that
  937. * revision, only used in changed is true and created is false
  938. * - null or 'no-change': don't update the redirect status.
  939. * - oldcountable: bool, null, or string 'no-change' (default null):
  940. * - bool: whether the page was counted as an article before that
  941. * revision, only used in changed is true and created is false
  942. * - null: if created is false, don't update the article count; if created
  943. * is true, do update the article count
  944. * - 'no-change': don't update the article count, ever
  945. * When set to null, pageState['oldCountable'] will be used instead if available.
  946. * - causeAction: an arbitrary string identifying the reason for the update.
  947. * See DataUpdate::getCauseAction(). (default 'unknown')
  948. * - causeAgent: name of the user who caused the update. See DataUpdate::getCauseAgent().
  949. * (string, default 'unknown')
  950. * - known-revision-output: a combined canonical ParserOutput for the revision, perhaps
  951. * from some cache. The caller is responsible for ensuring that the ParserOutput indeed
  952. * matched the $rev and $options. This mechanism is intended as a temporary stop-gap,
  953. * for the time until caches have been changed to store RenderedRevision states instead
  954. * of ParserOutput objects. (default: null) (since 1.33)
  955. */
  956. public function prepareUpdate( RevisionRecord $revision, array $options = [] ) {
  957. Assert::parameter(
  958. !isset( $options['oldrevision'] )
  959. || $options['oldrevision'] instanceof Revision
  960. || $options['oldrevision'] instanceof RevisionRecord,
  961. '$options["oldrevision"]',
  962. 'must be a RevisionRecord (or Revision)'
  963. );
  964. Assert::parameter(
  965. !isset( $options['triggeringUser'] )
  966. || $options['triggeringUser'] instanceof UserIdentity,
  967. '$options["triggeringUser"]',
  968. 'must be a UserIdentity'
  969. );
  970. if ( !$revision->getId() ) {
  971. throw new InvalidArgumentException(
  972. 'Revision must have an ID set for it to be used with prepareUpdate()!'
  973. );
  974. }
  975. if ( $this->revision && $this->revision->getId() ) {
  976. if ( $this->revision->getId() === $revision->getId() ) {
  977. return; // nothing to do!
  978. } else {
  979. throw new LogicException(
  980. 'Trying to re-use DerivedPageDataUpdater with revision '
  981. . $revision->getId()
  982. . ', but it\'s already bound to revision '
  983. . $this->revision->getId()
  984. );
  985. }
  986. }
  987. if ( $this->revision
  988. && !$this->revision->getSlots()->hasSameContent( $revision->getSlots() )
  989. ) {
  990. throw new LogicException(
  991. 'The Revision provided has mismatching content!'
  992. );
  993. }
  994. // Override fields defined in $this->options with values from $options.
  995. $this->options = array_intersect_key( $options, $this->options ) + $this->options;
  996. if ( $this->revision ) {
  997. $oldId = $this->pageState['oldId'] ?? 0;
  998. $this->options['newrev'] = ( $revision->getId() !== $oldId );
  999. } elseif ( isset( $this->options['oldrevision'] ) ) {
  1000. /** @var Revision|RevisionRecord $oldRev */
  1001. $oldRev = $this->options['oldrevision'];
  1002. $oldId = $oldRev->getId();
  1003. $this->options['newrev'] = ( $revision->getId() !== $oldId );
  1004. } else {
  1005. $oldId = $revision->getParentId();
  1006. }
  1007. if ( $oldId !== null ) {
  1008. // XXX: what if $options['changed'] disagrees?
  1009. // MovePage creates a dummy revision with changed = false!
  1010. // We may want to explicitly distinguish between "no new revision" (null-edit)
  1011. // and "new revision without new content" (dummy revision).
  1012. if ( $oldId === $revision->getParentId() ) {
  1013. // NOTE: this may still be a NullRevision!
  1014. // New revision!
  1015. $this->options['changed'] = true;
  1016. } elseif ( $oldId === $revision->getId() ) {
  1017. // Null-edit!
  1018. $this->options['changed'] = false;
  1019. } else {
  1020. // This indicates that calling code has given us the wrong Revision object
  1021. throw new LogicException(
  1022. 'The Revision mismatches old revision ID: '
  1023. . 'Old ID is ' . $oldId
  1024. . ', parent ID is ' . $revision->getParentId()
  1025. . ', revision ID is ' . $revision->getId()
  1026. );
  1027. }
  1028. }
  1029. // If prepareContent() was used to generate the PST content (which is indicated by
  1030. // $this->slotsUpdate being set), and this is not a null-edit, then the given
  1031. // revision must have the acting user as the revision author. Otherwise, user
  1032. // signatures generated by PST would mismatch the user in the revision record.
  1033. if ( $this->user !== null && $this->options['changed'] && $this->slotsUpdate ) {
  1034. $user = $revision->getUser();
  1035. if ( !$this->user->equals( $user ) ) {
  1036. throw new LogicException(
  1037. 'The Revision provided has a mismatching actor: expected '
  1038. . $this->user->getName()
  1039. . ', got '
  1040. . $user->getName()
  1041. );
  1042. }
  1043. }
  1044. // If $this->pageState was not yet initialized by grabCurrentRevision or prepareContent,
  1045. // emulate the state of the page table before the edit, as good as we can.
  1046. if ( !$this->pageState ) {
  1047. $this->pageState = [
  1048. 'oldIsRedirect' => isset( $this->options['oldredirect'] )
  1049. && is_bool( $this->options['oldredirect'] )
  1050. ? $this->options['oldredirect']
  1051. : null,
  1052. 'oldCountable' => isset( $this->options['oldcountable'] )
  1053. && is_bool( $this->options['oldcountable'] )
  1054. ? $this->options['oldcountable']
  1055. : null,
  1056. ];
  1057. if ( $this->options['changed'] ) {
  1058. // The edit created a new revision
  1059. $this->pageState['oldId'] = $revision->getParentId();
  1060. if ( isset( $this->options['oldrevision'] ) ) {
  1061. $rev = $this->options['oldrevision'];
  1062. $this->pageState['oldRevision'] = $rev instanceof Revision
  1063. ? $rev->getRevisionRecord()
  1064. : $rev;
  1065. }
  1066. } else {
  1067. // This is a null-edit, so the old revision IS the new revision!
  1068. $this->pageState['oldId'] = $revision->getId();
  1069. $this->pageState['oldRevision'] = $revision;
  1070. }
  1071. }
  1072. // "created" is forced here
  1073. $this->options['created'] = ( $this->options['created'] ||
  1074. ( $this->pageState['oldId'] === 0 ) );
  1075. $this->revision = $revision;
  1076. $this->doTransition( 'has-revision' );
  1077. // NOTE: in case we have a User object, don't override with a UserIdentity.
  1078. // We already checked that $revision->getUser() mathces $this->user;
  1079. if ( !$this->user ) {
  1080. $this->user = $revision->getUser( RevisionRecord::RAW );
  1081. }
  1082. // Prune any output that depends on the revision ID.
  1083. if ( $this->renderedRevision ) {
  1084. $this->renderedRevision->updateRevision( $revision );
  1085. } else {
  1086. // NOTE: we want a canonical rendering, so don't pass $this->user or ParserOptions
  1087. // NOTE: the revision is either new or current, so we can bypass audience checks.
  1088. $this->renderedRevision = $this->revisionRenderer->getRenderedRevision(
  1089. $this->revision,
  1090. null,
  1091. null,
  1092. [
  1093. 'use-master' => $this->useMaster(),
  1094. 'audience' => RevisionRecord::RAW,
  1095. 'known-revision-output' => $options['known-revision-output'] ?? null
  1096. ]
  1097. );
  1098. // XXX: Since we presumably are dealing with the current revision,
  1099. // we could try to get the ParserOutput from the parser cache.
  1100. }
  1101. // TODO: optionally get ParserOutput from the ParserCache here.
  1102. // Move the logic used by RefreshLinksJob here!
  1103. }
  1104. /**
  1105. * @deprecated This only exists for B/C, use the getters on DerivedPageDataUpdater directly!
  1106. * @return PreparedEdit
  1107. */
  1108. public function getPreparedEdit() {
  1109. $this->assertPrepared( __METHOD__ );
  1110. $slotsUpdate = $this->getRevisionSlotsUpdate();
  1111. $preparedEdit = new PreparedEdit();
  1112. $preparedEdit->popts = $this->getCanonicalParserOptions();
  1113. $preparedEdit->parserOutputCallback = [ $this, 'getCanonicalParserOutput' ];
  1114. $preparedEdit->pstContent = $this->revision->getContent( SlotRecord::MAIN );
  1115. $preparedEdit->newContent =
  1116. $slotsUpdate->isModifiedSlot( SlotRecord::MAIN )
  1117. ? $slotsUpdate->getModifiedSlot( SlotRecord::MAIN )->getContent()
  1118. : $this->revision->getContent( SlotRecord::MAIN ); // XXX: can we just remove this?
  1119. $preparedEdit->oldContent = null; // unused. // XXX: could get this from the parent revision
  1120. $preparedEdit->revid = $this->revision ? $this->revision->getId() : null;
  1121. $preparedEdit->timestamp = $preparedEdit->output->getCacheTime();
  1122. $preparedEdit->format = $preparedEdit->pstContent->getDefaultFormat();
  1123. return $preparedEdit;
  1124. }
  1125. /**
  1126. * @param string $role
  1127. * @param bool $generateHtml
  1128. * @return ParserOutput
  1129. */
  1130. public function getSlotParserOutput( $role, $generateHtml = true ) {
  1131. return $this->getRenderedRevision()->getSlotParserOutput(
  1132. $role,
  1133. [ 'generate-html' => $generateHtml ]
  1134. );
  1135. }
  1136. /**
  1137. * @return ParserOutput
  1138. */
  1139. public function getCanonicalParserOutput() {
  1140. return $this->getRenderedRevision()->getRevisionParserOutput();
  1141. }
  1142. /**
  1143. * @return ParserOptions
  1144. */
  1145. public function getCanonicalParserOptions() {
  1146. return $this->getRenderedRevision()->getOptions();
  1147. }
  1148. /**
  1149. * @param bool $recursive
  1150. *
  1151. * @return DeferrableUpdate[]
  1152. */
  1153. public function getSecondaryDataUpdates( $recursive = false ) {
  1154. if ( $this->isContentDeleted() ) {
  1155. // This shouldn't happen, since the current content is always public,
  1156. // and DataUpates are only needed for current content.
  1157. return [];
  1158. }
  1159. $output = $this->getCanonicalParserOutput();
  1160. // Construct a LinksUpdate for the combined canonical output.
  1161. $linksUpdate = new LinksUpdate(
  1162. $this->getTitle(),
  1163. $output,
  1164. $recursive
  1165. );
  1166. $allUpdates = [ $linksUpdate ];
  1167. // NOTE: Run updates for all slots, not just the modified slots! Otherwise,
  1168. // info for an inherited slot may end up being removed. This is also needed
  1169. // to ensure that purges are effective.
  1170. $renderedRevision = $this->getRenderedRevision();
  1171. foreach ( $this->getSlots()->getSlotRoles() as $role ) {
  1172. $slot = $this->getRawSlot( $role );
  1173. $content = $slot->getContent();
  1174. $handler = $content->getContentHandler();
  1175. $updates = $handler->getSecondaryDataUpdates(
  1176. $this->getTitle(),
  1177. $content,
  1178. $role,
  1179. $renderedRevision
  1180. );
  1181. $allUpdates = array_merge( $allUpdates, $updates );
  1182. // TODO: remove B/C hack in 1.32!
  1183. // NOTE: we assume that the combined output contains all relevant meta-data for
  1184. // all slots!
  1185. $legacyUpdates = $content->getSecondaryDataUpdates(
  1186. $this->getTitle(),
  1187. null,
  1188. $recursive,
  1189. $output
  1190. );
  1191. // HACK: filter out redundant and incomplete LinksUpdates
  1192. $legacyUpdates = array_filter( $legacyUpdates, function ( $update ) {
  1193. return !( $update instanceof LinksUpdate );
  1194. } );
  1195. $allUpdates = array_merge( $allUpdates, $legacyUpdates );
  1196. }
  1197. // XXX: if a slot was removed by an earlier edit, but deletion updates failed to run at
  1198. // that time, we don't know for which slots to run deletion updates when purging a page.
  1199. // We'd have to examine the entire history of the page to determine that. Perhaps there
  1200. // could be a "try extra hard" mode for that case that would run a DB query to find all
  1201. // roles/models ever used on the page. On the other hand, removing slots should be quite
  1202. // rare, so perhaps this isn't worth the trouble.
  1203. // TODO: consolidate with similar logic in WikiPage::getDeletionUpdates()
  1204. $wikiPage = $this->getWikiPage();
  1205. $parentRevision = $this->getParentRevision();
  1206. foreach ( $this->getRemovedSlotRoles() as $role ) {
  1207. // HACK: we should get the content model of the removed slot from a SlotRoleHandler!
  1208. // For now, find the slot in the parent revision - if the slot was removed, it should
  1209. // always exist in the parent revision.
  1210. $parentSlot = $parentRevision->getSlot( $role, RevisionRecord::RAW );
  1211. $content = $parentSlot->getContent();
  1212. $handler = $content->getContentHandler();
  1213. $updates = $handler->getDeletionUpdates(
  1214. $this->getTitle(),
  1215. $role
  1216. );
  1217. $allUpdates = array_merge( $allUpdates, $updates );
  1218. // TODO: remove B/C hack in 1.32!
  1219. $legacyUpdates = $content->getDeletionUpdates( $wikiPage );
  1220. // HACK: filter out redundant and incomplete LinksDeletionUpdate
  1221. $legacyUpdates = array_filter( $legacyUpdates, function ( $update ) {
  1222. return !( $update instanceof LinksDeletionUpdate );
  1223. } );
  1224. $allUpdates = array_merge( $allUpdates, $legacyUpdates );
  1225. }
  1226. // TODO: hard deprecate SecondaryDataUpdates in favor of RevisionDataUpdates in 1.33!
  1227. Hooks::run(
  1228. 'RevisionDataUpdates',
  1229. [ $this->getTitle(), $renderedRevision, &$allUpdates ]
  1230. );
  1231. return $allUpdates;
  1232. }
  1233. /**
  1234. * Do standard updates after page edit, purge, or import.
  1235. * Update links tables, site stats, search index, title cache, message cache, etc.
  1236. * Purges pages that depend on this page when appropriate.
  1237. * With a 10% chance, triggers pruning the recent changes table.
  1238. *
  1239. * @note prepareUpdate() must be called before calling this method!
  1240. *
  1241. * MCR migration note: this replaces WikiPage::doEditUpdates.
  1242. */
  1243. public function doUpdates() {
  1244. $this->assertTransition( 'done' );
  1245. // TODO: move logic into a PageEventEmitter service
  1246. $wikiPage = $this->getWikiPage(); // TODO: use only for legacy hooks!
  1247. $legacyUser = User::newFromIdentity( $this->user );
  1248. $legacyRevision = new Revision( $this->revision );
  1249. $userParserOptions = ParserOptions::newFromUser( $legacyUser );
  1250. // Decide whether to save the final canonical parser ouput based on the fact that
  1251. // users are typically redirected to viewing pages right after they edit those pages.
  1252. // Due to vary-revision-id, getting/saving that output here might require a reparse.
  1253. if ( $userParserOptions->matchesForCacheKey( $this->getCanonicalParserOptions() ) ) {
  1254. // Whether getting the final output requires a reparse or not, the user will
  1255. // need canonical output anyway, since that is what their parser options use.
  1256. // A reparse now at least has the benefit of various warm process caches.
  1257. $this->doParserCacheUpdate();
  1258. } else {
  1259. // If the user does not have canonical parse options, then don't risk another parse
  1260. // to make output they cannot use on the page refresh that typically occurs after
  1261. // editing. Doing the parser output save post-send will still benefit *other* users.
  1262. DeferredUpdates::addCallableUpdate( function () {
  1263. $this->doParserCacheUpdate();
  1264. } );
  1265. }
  1266. // Defer the getCannonicalParserOutput() call triggered by getSecondaryDataUpdates()
  1267. // by wrapping the code that schedules the secondary updates in a callback itself
  1268. $wrapperUpdate = new MWCallableUpdate(
  1269. function () {
  1270. $this->doSecondaryDataUpdates( [
  1271. // T52785 do not update any other pages on a null edit
  1272. 'recursive' => $this->options['changed']
  1273. ] );
  1274. },
  1275. __METHOD__
  1276. );
  1277. $wrapperUpdate->setTransactionRoundRequirement( $wrapperUpdate::TRX_ROUND_ABSENT );
  1278. DeferredUpdates::addUpdate( $wrapperUpdate );
  1279. // TODO: MCR: check if *any* changed slot supports categories!
  1280. if ( $this->rcWatchCategoryMembership
  1281. && $this->getContentHandler( SlotRecord::MAIN )->supportsCategories() === true
  1282. && ( $this->options['changed'] || $this->options['created'] )
  1283. && !$this->options['restored']
  1284. ) {
  1285. // Note: jobs are pushed after deferred updates, so the job should be able to see
  1286. // the recent change entry (also done via deferred updates) and carry over any
  1287. // bot/deletion/IP flags, ect.
  1288. $this->jobQueueGroup->lazyPush(
  1289. CategoryMembershipChangeJob::newSpec(
  1290. $this->getTitle(),
  1291. $this->revision->getTimestamp()
  1292. )
  1293. );
  1294. }
  1295. // TODO: replace legacy hook! Use a listener on PageEventEmitter instead!
  1296. // @note: Extensions should *avoid* calling getCannonicalParserOutput() when using
  1297. // this hook whenever possible in order to avoid unnecessary additional parses.
  1298. $editInfo = $this->getPreparedEdit();
  1299. Hooks::run( 'ArticleEditUpdates',
  1300. [ &$wikiPage, &$editInfo, $this->options['changed'] ] );
  1301. // TODO: replace legacy hook! Use a listener on PageEventEmitter instead!
  1302. if ( Hooks::run( 'ArticleEditUpdatesDeleteFromRecentchanges', [ &$wikiPage ] ) ) {
  1303. // Flush old entries from the `recentchanges` table
  1304. if ( mt_rand( 0, 9 ) == 0 ) {
  1305. $this->jobQueueGroup->lazyPush( RecentChangesUpdateJob::newPurgeJob() );
  1306. }
  1307. }
  1308. $id = $this->getPageId();
  1309. $title = $this->getTitle();
  1310. $shortTitle = $title->getDBkey();
  1311. if ( !$title->exists() ) {
  1312. wfDebug( __METHOD__ . ": Page doesn't exist any more, bailing out\n" );
  1313. $this->doTransition( 'done' );
  1314. return;
  1315. }
  1316. DeferredUpdates::addCallableUpdate( function () {
  1317. if (
  1318. $this->options['oldcountable'] === 'no-change' ||
  1319. ( !$this->options['changed'] && !$this->options['moved'] )
  1320. ) {
  1321. $good = 0;
  1322. } elseif ( $this->options['created'] ) {
  1323. $good = (int)$this->isCountable();
  1324. } elseif ( $this->options['oldcountable'] !== null ) {
  1325. $good = (int)$this->isCountable()
  1326. - (int)$this->options['oldcountable'];
  1327. } elseif ( isset( $this->pageState['oldCountable'] ) ) {
  1328. $good = (int)$this->isCountable()
  1329. - (int)$this->pageState['oldCountable'];
  1330. } else {
  1331. $good = 0;
  1332. }
  1333. $edits = $this->options['changed'] ? 1 : 0;
  1334. $pages = $this->options['created'] ? 1 : 0;
  1335. DeferredUpdates::addUpdate( SiteStatsUpdate::factory(
  1336. [ 'edits' => $edits, 'articles' => $good, 'pages' => $pages ]
  1337. ) );
  1338. } );
  1339. // TODO: make search infrastructure aware of slots!
  1340. $mainSlot = $this->revision->getSlot( SlotRecord::MAIN );
  1341. if ( !$mainSlot->isInherited() && !$this->isContentDeleted() ) {
  1342. DeferredUpdates::addUpdate( new SearchUpdate( $id, $title, $mainSlot->getContent() ) );
  1343. }
  1344. // If this is another user's talk page, update newtalk.
  1345. // Don't do this if $options['changed'] = false (null-edits) nor if
  1346. // it's a minor edit and the user making the edit doesn't generate notifications for those.
  1347. if ( $this->options['changed']
  1348. && $title->getNamespace() == NS_USER_TALK
  1349. && $shortTitle != $legacyUser->getTitleKey()
  1350. && !( $this->revision->isMinor() && MediaWikiServices::getInstance()
  1351. ->getPermissionManager()
  1352. ->userHasRight( $legacyUser, 'nominornewtalk' ) )
  1353. ) {
  1354. $recipient = User::newFromName( $shortTitle, false );
  1355. if ( !$recipient ) {
  1356. wfDebug( __METHOD__ . ": invalid username\n" );
  1357. } else {
  1358. // Allow extensions to prevent user notification
  1359. // when a new message is added to their talk page
  1360. // TODO: replace legacy hook! Use a listener on PageEventEmitter instead!
  1361. if ( Hooks::run( 'ArticleEditUpdateNewTalk', [ &$wikiPage, $recipient ] ) ) {
  1362. if ( User::isIP( $shortTitle ) ) {
  1363. // An anonymous user
  1364. $recipient->setNewtalk( true, $legacyRevision );
  1365. } elseif ( $recipient->isLoggedIn() ) {
  1366. $recipient->setNewtalk( true, $legacyRevision );
  1367. } else {
  1368. wfDebug( __METHOD__ . ": don't need to notify a nonexistent user\n" );
  1369. }
  1370. }
  1371. }
  1372. }
  1373. if ( $title->getNamespace() == NS_MEDIAWIKI
  1374. && $this->getRevisionSlotsUpdate()->isModifiedSlot( SlotRecord::MAIN )
  1375. ) {
  1376. $mainContent = $this->isContentDeleted() ? null : $this->getRawContent( SlotRecord::MAIN );
  1377. $this->messageCache->updateMessageOverride( $title, $mainContent );
  1378. }
  1379. // TODO: move onArticleCreate and onArticle into a PageEventEmitter service
  1380. if ( $this->options['created'] ) {
  1381. WikiPage::onArticleCreate( $title );
  1382. } elseif ( $this->options['changed'] ) { // T52785
  1383. WikiPage::onArticleEdit( $title, $legacyRevision, $this->getTouchedSlotRoles() );
  1384. }
  1385. $oldRevision = $this->getParentRevision();
  1386. $oldLegacyRevision = $oldRevision ? new Revision( $oldRevision ) : null;
  1387. // TODO: In the wiring, register a listener for this on the new PageEventEmitter
  1388. ResourceLoaderWikiModule::invalidateModuleCache(
  1389. $title,
  1390. $oldLegacyRevision,
  1391. $legacyRevision,
  1392. $this->loadbalancerFactory->getLocalDomainID()
  1393. );
  1394. $this->doTransition( 'done' );
  1395. }
  1396. /**
  1397. * Do secondary data updates (such as updating link tables).
  1398. *
  1399. * MCR note: this method is temporarily exposed via WikiPage::doSecondaryDataUpdates.
  1400. *
  1401. * @param array $options
  1402. * - recursive: make the update recursive, i.e. also update pages which transclude the
  1403. * current page or otherwise depend on it (default: false)
  1404. * - defer: one of the DeferredUpdates constants, or false to run immediately after waiting
  1405. * for replication of the changes from the SecondaryDataUpdates hooks (default: false)
  1406. * @since 1.32
  1407. */
  1408. public function doSecondaryDataUpdates( array $options = [] ) {
  1409. $this->assertHasRevision( __METHOD__ );
  1410. $options += [ 'recursive' => false, 'defer' => false ];
  1411. $deferValues = [ false, DeferredUpdates::PRESEND, DeferredUpdates::POSTSEND ];
  1412. if ( !in_array( $options['defer'], $deferValues, true ) ) {
  1413. throw new InvalidArgumentException( 'Invalid value for defer: ' . $options['defer'] );
  1414. }
  1415. $updates = $this->getSecondaryDataUpdates( $options['recursive'] );
  1416. $triggeringUser = $this->options['triggeringUser'] ?? $this->user;
  1417. if ( !$triggeringUser instanceof User ) {
  1418. $triggeringUser = User::newFromIdentity( $triggeringUser );
  1419. }
  1420. $causeAction = $this->options['causeAction'] ?? 'unknown';
  1421. $causeAgent = $this->options['causeAgent'] ?? 'unknown';
  1422. $legacyRevision = new Revision( $this->revision );
  1423. foreach ( $updates as $update ) {
  1424. if ( $update instanceof DataUpdate ) {
  1425. $update->setCause( $causeAction, $causeAgent );
  1426. }
  1427. if ( $update instanceof LinksUpdate ) {
  1428. $update->setRevision( $legacyRevision );
  1429. $update->setTriggeringUser( $triggeringUser );
  1430. }
  1431. }
  1432. if ( $options['defer'] === false ) {
  1433. // T221577: flush any transaction; each update needs outer transaction scope
  1434. $this->loadbalancerFactory->commitMasterChanges( __METHOD__ );
  1435. foreach ( $updates as $update ) {
  1436. DeferredUpdates::attemptUpdate( $update, $this->loadbalancerFactory );
  1437. }
  1438. } else {
  1439. foreach ( $updates as $update ) {
  1440. DeferredUpdates::addUpdate( $update, $options['defer'] );
  1441. }
  1442. }
  1443. }
  1444. public function doParserCacheUpdate() {
  1445. $this->assertHasRevision( __METHOD__ );
  1446. $wikiPage = $this->getWikiPage(); // TODO: ParserCache should accept a RevisionRecord instead
  1447. // NOTE: this may trigger the first parsing of the new content after an edit (when not
  1448. // using pre-generated stashed output).
  1449. // XXX: we may want to use the PoolCounter here. This would perhaps allow the initial parse
  1450. // to be performed post-send. The client could already follow a HTTP redirect to the
  1451. // page view, but would then have to wait for a response until rendering is complete.
  1452. $output = $this->getCanonicalParserOutput();
  1453. // Save it to the parser cache. Use the revision timestamp in the case of a
  1454. // freshly saved edit, as that matches page_touched and a mismatch would trigger an
  1455. // unnecessary reparse.
  1456. $timestamp = $this->options['newrev'] ? $this->revision->getTimestamp()
  1457. : $output->getCacheTime();
  1458. $this->parserCache->save(
  1459. $output, $wikiPage, $this->getCanonicalParserOptions(),
  1460. $timestamp, $this->revision->getId()
  1461. );
  1462. }
  1463. }