NameTableStore.php 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530
  1. <?php
  2. /**
  3. * This program is free software; you can redistribute it and/or modify
  4. * it under the terms of the GNU General Public License as published by
  5. * the Free Software Foundation; either version 2 of the License, or
  6. * (at your option) any later version.
  7. *
  8. * This program is distributed in the hope that it will be useful,
  9. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. * GNU General Public License for more details.
  12. *
  13. * You should have received a copy of the GNU General Public License along
  14. * with this program; if not, write to the Free Software Foundation, Inc.,
  15. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  16. * http://www.gnu.org/copyleft/gpl.html
  17. *
  18. * @file
  19. */
  20. namespace MediaWiki\Storage;
  21. use Exception;
  22. use IExpiringStore;
  23. use Psr\Log\LoggerInterface;
  24. use WANObjectCache;
  25. use Wikimedia\Assert\Assert;
  26. use Wikimedia\Rdbms\Database;
  27. use Wikimedia\Rdbms\IDatabase;
  28. use Wikimedia\Rdbms\ILoadBalancer;
  29. /**
  30. * @author Addshore
  31. * @since 1.31
  32. */
  33. class NameTableStore {
  34. /** @var ILoadBalancer */
  35. private $loadBalancer;
  36. /** @var WANObjectCache */
  37. private $cache;
  38. /** @var LoggerInterface */
  39. private $logger;
  40. /** @var string[] */
  41. private $tableCache = null;
  42. /** @var bool|string */
  43. private $domain = false;
  44. /** @var int */
  45. private $cacheTTL;
  46. /** @var string */
  47. private $table;
  48. /** @var string */
  49. private $idField;
  50. /** @var string */
  51. private $nameField;
  52. /** @var null|callable */
  53. private $normalizationCallback = null;
  54. /** @var null|callable */
  55. private $insertCallback = null;
  56. /**
  57. * @param ILoadBalancer $dbLoadBalancer A load balancer for acquiring database connections
  58. * @param WANObjectCache $cache A cache manager for caching data. This can be the local
  59. * wiki's default instance even if $dbDomain refers to a different wiki, since
  60. * makeGlobalKey() is used to constructed a key that allows cached names from
  61. * the same database to be re-used between wikis. For example, enwiki and frwiki will
  62. * use the same cache keys for names from the wikidatawiki database, regardless
  63. * of the cache's default key space.
  64. * @param LoggerInterface $logger
  65. * @param string $table
  66. * @param string $idField
  67. * @param string $nameField
  68. * @param callable|null $normalizationCallback Normalization to be applied to names before being
  69. * saved or queried. This should be a callback that accepts and returns a single string.
  70. * @param bool|string $dbDomain Database domain ID. Use false for the local database domain.
  71. * @param callable|null $insertCallback Callback to change insert fields accordingly.
  72. * This parameter was introduced in 1.32
  73. */
  74. public function __construct(
  75. ILoadBalancer $dbLoadBalancer,
  76. WANObjectCache $cache,
  77. LoggerInterface $logger,
  78. $table,
  79. $idField,
  80. $nameField,
  81. callable $normalizationCallback = null,
  82. $dbDomain = false,
  83. callable $insertCallback = null
  84. ) {
  85. $this->loadBalancer = $dbLoadBalancer;
  86. $this->cache = $cache;
  87. $this->logger = $logger;
  88. $this->table = $table;
  89. $this->idField = $idField;
  90. $this->nameField = $nameField;
  91. $this->normalizationCallback = $normalizationCallback;
  92. $this->domain = $dbDomain;
  93. $this->cacheTTL = IExpiringStore::TTL_MONTH;
  94. $this->insertCallback = $insertCallback;
  95. }
  96. /**
  97. * @param int $index A database index, like DB_MASTER or DB_REPLICA
  98. * @param int $flags Database connection flags
  99. *
  100. * @return IDatabase
  101. */
  102. private function getDBConnection( $index, $flags = 0 ) {
  103. return $this->loadBalancer->getConnectionRef( $index, [], $this->domain, $flags );
  104. }
  105. /**
  106. * Gets the cache key for names.
  107. *
  108. * The cache key is constructed based on the wiki ID passed to the constructor, and allows
  109. * sharing of name tables cached for a specific database between wikis.
  110. *
  111. * @return string
  112. */
  113. private function getCacheKey() {
  114. return $this->cache->makeGlobalKey(
  115. 'NameTableSqlStore',
  116. $this->table,
  117. $this->loadBalancer->resolveDomainID( $this->domain )
  118. );
  119. }
  120. /**
  121. * @param string $name
  122. * @return string
  123. */
  124. private function normalizeName( $name ) {
  125. if ( $this->normalizationCallback === null ) {
  126. return $name;
  127. }
  128. return call_user_func( $this->normalizationCallback, $name );
  129. }
  130. /**
  131. * Acquire the id of the given name.
  132. * This creates a row in the table if it doesn't already exist.
  133. *
  134. * @note If called within an atomic section, there is a chance for the acquired ID
  135. * to be lost on rollback. A best effort is made to re-insert the mapping
  136. * in this case, and consistency of the cache with the database table is ensured
  137. * by re-loading the map after a failed atomic section. However, there is no guarantee
  138. * that an ID returned by this method is valid outside the transaction in which it
  139. * was produced. This means that calling code should not retain the return value beyond
  140. * the scope of a transaction, but rather call acquireId() again after the transaction
  141. * is complete. In some rare cases, this may produce an ID different from the first call.
  142. *
  143. * @param string $name
  144. * @throws NameTableAccessException
  145. * @return int
  146. */
  147. public function acquireId( $name ) {
  148. Assert::parameterType( 'string', $name, '$name' );
  149. $name = $this->normalizeName( $name );
  150. $table = $this->getTableFromCachesOrReplica();
  151. $searchResult = array_search( $name, $table, true );
  152. if ( $searchResult === false ) {
  153. $id = $this->store( $name );
  154. if ( $id === null ) {
  155. // RACE: $name was already in the db, probably just inserted, so load from master.
  156. // Use DBO_TRX to avoid missing inserts due to other threads or REPEATABLE-READs.
  157. $table = $this->reloadMap( ILoadBalancer::CONN_TRX_AUTOCOMMIT );
  158. $searchResult = array_search( $name, $table, true );
  159. if ( $searchResult === false ) {
  160. // Insert failed due to IGNORE flag, but DB_MASTER didn't give us the data
  161. $m = "No insert possible but master didn't give us a record for " .
  162. "'{$name}' in '{$this->table}'";
  163. $this->logger->error( $m );
  164. throw new NameTableAccessException( $m );
  165. }
  166. } else {
  167. if ( isset( $table[$id] ) ) {
  168. // This can happen when a transaction is rolled back and acquireId is called in
  169. // an onTransactionResolution() callback, which gets executed before retryStore()
  170. // has a chance to run. The right thing to do in this case is to discard the old
  171. // value. According to the contract of acquireId, the caller should not have
  172. // used it outside the transaction, so it should not be persisted anywhere after
  173. // the rollback.
  174. $m = "Got ID $id for '$name' from insert"
  175. . " into '{$this->table}', but ID $id was previously associated with"
  176. . " the name '{$table[$id]}'. Overriding the old value, which presumably"
  177. . " has been removed from the database due to a transaction rollback.";
  178. $this->logger->warning( $m );
  179. }
  180. $table[$id] = $name;
  181. $searchResult = $id;
  182. // As store returned an ID we know we inserted so delete from WAN cache
  183. $dbw = $this->getDBConnection( DB_MASTER );
  184. $dbw->onTransactionPreCommitOrIdle( function () {
  185. $this->cache->delete( $this->getCacheKey() );
  186. } );
  187. }
  188. $this->tableCache = $table;
  189. }
  190. return $searchResult;
  191. }
  192. /**
  193. * Reloads the name table from the master database, and purges the WAN cache entry.
  194. *
  195. * @note This should only be called in situations where the local cache has been detected
  196. * to be out of sync with the database. There should be no reason to call this method
  197. * from outside the NameTabelStore during normal operation. This method may however be
  198. * useful in unit tests.
  199. *
  200. * @param int $connFlags ILoadBalancer::CONN_XXX flags. Optional.
  201. *
  202. * @return string[] The freshly reloaded name map
  203. */
  204. public function reloadMap( $connFlags = 0 ) {
  205. if ( $connFlags !== 0 && defined( 'MW_PHPUNIT_TEST' ) ) {
  206. // HACK: We can't use $connFlags while doing PHPUnit tests, because the
  207. // fake database tables are bound to a single connection.
  208. $connFlags = 0;
  209. }
  210. $dbw = $this->getDBConnection( DB_MASTER, $connFlags );
  211. $this->tableCache = $this->loadTable( $dbw );
  212. $dbw->onTransactionPreCommitOrIdle( function () {
  213. $this->cache->reap( $this->getCacheKey(), INF );
  214. } );
  215. return $this->tableCache;
  216. }
  217. /**
  218. * Get the id of the given name.
  219. * If the name doesn't exist this will throw.
  220. * This should be used in cases where we believe the name already exists or want to check for
  221. * existence.
  222. *
  223. * @param string $name
  224. * @throws NameTableAccessException The name does not exist
  225. * @return int Id
  226. */
  227. public function getId( $name ) {
  228. Assert::parameterType( 'string', $name, '$name' );
  229. $name = $this->normalizeName( $name );
  230. $table = $this->getTableFromCachesOrReplica();
  231. $searchResult = array_search( $name, $table, true );
  232. if ( $searchResult !== false ) {
  233. return $searchResult;
  234. }
  235. throw NameTableAccessException::newFromDetails( $this->table, 'name', $name );
  236. }
  237. /**
  238. * Get the name of the given id.
  239. * If the id doesn't exist this will throw.
  240. * This should be used in cases where we believe the id already exists.
  241. *
  242. * Note: Calls to this method will result in a master select for non existing IDs.
  243. *
  244. * @param int $id
  245. * @throws NameTableAccessException The id does not exist
  246. * @return string name
  247. */
  248. public function getName( $id ) {
  249. Assert::parameterType( 'integer', $id, '$id' );
  250. $table = $this->getTableFromCachesOrReplica();
  251. if ( array_key_exists( $id, $table ) ) {
  252. return $table[$id];
  253. }
  254. $fname = __METHOD__;
  255. $table = $this->cache->getWithSetCallback(
  256. $this->getCacheKey(),
  257. $this->cacheTTL,
  258. function ( $oldValue, &$ttl, &$setOpts ) use ( $id, $fname ) {
  259. // Check if cached value is up-to-date enough to have $id
  260. if ( is_array( $oldValue ) && array_key_exists( $id, $oldValue ) ) {
  261. // Completely leave the cache key alone
  262. $ttl = WANObjectCache::TTL_UNCACHEABLE;
  263. // Use the old value
  264. return $oldValue;
  265. }
  266. // Regenerate from replica DB, and master DB if needed
  267. foreach ( [ DB_REPLICA, DB_MASTER ] as $source ) {
  268. // Log a fallback to master
  269. if ( $source === DB_MASTER ) {
  270. $this->logger->info(
  271. $fname . ' falling back to master select from ' .
  272. $this->table . ' with id ' . $id
  273. );
  274. }
  275. $db = $this->getDBConnection( $source );
  276. $cacheSetOpts = Database::getCacheSetOptions( $db );
  277. $table = $this->loadTable( $db );
  278. if ( array_key_exists( $id, $table ) ) {
  279. break; // found it
  280. }
  281. }
  282. // Use the value from last source checked
  283. $setOpts += $cacheSetOpts;
  284. return $table;
  285. },
  286. [ 'minAsOf' => INF ] // force callback run
  287. );
  288. $this->tableCache = $table;
  289. if ( array_key_exists( $id, $table ) ) {
  290. return $table[$id];
  291. }
  292. throw NameTableAccessException::newFromDetails( $this->table, 'id', $id );
  293. }
  294. /**
  295. * Get the whole table, in no particular order as a map of ids to names.
  296. * This method could be subject to DB or cache lag.
  297. *
  298. * @return string[] keys are the name ids, values are the names themselves
  299. * Example: [ 1 => 'foo', 3 => 'bar' ]
  300. */
  301. public function getMap() {
  302. return $this->getTableFromCachesOrReplica();
  303. }
  304. /**
  305. * @return string[]
  306. */
  307. private function getTableFromCachesOrReplica() {
  308. if ( $this->tableCache !== null ) {
  309. return $this->tableCache;
  310. }
  311. $table = $this->cache->getWithSetCallback(
  312. $this->getCacheKey(),
  313. $this->cacheTTL,
  314. function ( $oldValue, &$ttl, &$setOpts ) {
  315. $dbr = $this->getDBConnection( DB_REPLICA );
  316. $setOpts += Database::getCacheSetOptions( $dbr );
  317. return $this->loadTable( $dbr );
  318. }
  319. );
  320. $this->tableCache = $table;
  321. return $table;
  322. }
  323. /**
  324. * Gets the table from the db
  325. *
  326. * @param IDatabase $db
  327. *
  328. * @return string[]
  329. */
  330. private function loadTable( IDatabase $db ) {
  331. $result = $db->select(
  332. $this->table,
  333. [
  334. 'id' => $this->idField,
  335. 'name' => $this->nameField
  336. ],
  337. [],
  338. __METHOD__,
  339. [ 'ORDER BY' => 'id' ]
  340. );
  341. $assocArray = [];
  342. foreach ( $result as $row ) {
  343. $assocArray[$row->id] = $row->name;
  344. }
  345. return $assocArray;
  346. }
  347. /**
  348. * Stores the given name in the DB, returning the ID when an insert occurs.
  349. *
  350. * @param string $name
  351. * @return int|null int if we know the ID, null if we don't
  352. */
  353. private function store( $name ) {
  354. Assert::parameterType( 'string', $name, '$name' );
  355. Assert::parameter( $name !== '', '$name', 'should not be an empty string' );
  356. // Note: this is only called internally so normalization of $name has already occurred.
  357. $dbw = $this->getDBConnection( DB_MASTER );
  358. $id = null;
  359. $dbw->doAtomicSection(
  360. __METHOD__,
  361. function ( IDatabase $unused, $fname )
  362. use ( $name, &$id, $dbw ) {
  363. // NOTE: use IDatabase from the parent scope here, not the function parameter.
  364. // If $dbw is a wrapper around the actual DB, we need to call the wrapper here,
  365. // not the inner instance.
  366. $dbw->insert(
  367. $this->table,
  368. $this->getFieldsToStore( $name ),
  369. $fname,
  370. [ 'IGNORE' ]
  371. );
  372. if ( $dbw->affectedRows() === 0 ) {
  373. $this->logger->info(
  374. 'Tried to insert name into table ' . $this->table . ', but value already existed.'
  375. );
  376. return;
  377. }
  378. $id = $dbw->insertId();
  379. // Any open transaction may still be rolled back. If that happens, we have to re-try the
  380. // insertion and restore a consistent state of the cached table.
  381. $dbw->onAtomicSectionCancel(
  382. function ( $trigger, IDatabase $unused ) use ( $name, $id, $dbw ) {
  383. $this->retryStore( $dbw, $name, $id );
  384. },
  385. $fname );
  386. },
  387. IDatabase::ATOMIC_CANCELABLE
  388. );
  389. return $id;
  390. }
  391. /**
  392. * After the initial insertion got rolled back, this can be used to try the insertion again,
  393. * and ensure a consistent state of the cache.
  394. *
  395. * @param IDatabase $dbw
  396. * @param string $name
  397. * @param int $id
  398. */
  399. private function retryStore( IDatabase $dbw, $name, $id ) {
  400. // NOTE: in the closure below, use the IDatabase from the original method call,
  401. // not the one passed to the closure as a parameter.
  402. // If $dbw is a wrapper around the actual DB, we need to call the wrapper,
  403. // not the inner instance.
  404. try {
  405. $dbw->doAtomicSection(
  406. __METHOD__,
  407. function ( IDatabase $unused, $fname ) use ( $name, $id, &$ok, $dbw ) {
  408. // Try to insert a row with the ID we originally got.
  409. // If that fails (because of a key conflict), we will just try to get another ID again later.
  410. $dbw->insert(
  411. $this->table,
  412. $this->getFieldsToStore( $name, $id ),
  413. $fname
  414. );
  415. // Make sure we re-load the map in case this gets rolled back again.
  416. // We could re-try once more, but that bears the risk of an infinite loop.
  417. // So let's just give up on the ID.
  418. $dbw->onAtomicSectionCancel(
  419. function ( $trigger, IDatabase $unused ) use ( $name, $id, $dbw ) {
  420. $this->logger->warning(
  421. 'Re-insertion of name into table ' . $this->table
  422. . ' was rolled back. Giving up and reloading the cache.'
  423. );
  424. $this->reloadMap( ILoadBalancer::CONN_TRX_AUTOCOMMIT );
  425. },
  426. $fname
  427. );
  428. $this->logger->info(
  429. 'Re-insert name into table ' . $this->table . ' after failed transaction.'
  430. );
  431. },
  432. IDatabase::ATOMIC_CANCELABLE
  433. );
  434. } catch ( Exception $ex ) {
  435. $this->logger->error(
  436. 'Re-insertion of name into table ' . $this->table . ' failed: ' . $ex->getMessage()
  437. );
  438. } finally {
  439. // NOTE: we reload regardless of whether the above insert succeeded. There is
  440. // only three possibilities: the insert succeeded, so the new map will have
  441. // the desired $id/$name mapping. Or the insert failed because another
  442. // process already inserted that same $id/$name mapping, in which case the
  443. // new map will also have it. Or another process grabbed the desired ID for
  444. // another name, or the database refuses to insert the given ID into the
  445. // auto increment field - in that case, the new map will not have a mapping
  446. // for $name (or has a different mapping for $name). In that last case, we can
  447. // only hope that the ID produced within the failed transaction has not been
  448. // used outside that transaction.
  449. $this->reloadMap( ILoadBalancer::CONN_TRX_AUTOCOMMIT );
  450. }
  451. }
  452. /**
  453. * @param string $name
  454. * @param int|null $id
  455. * @return array
  456. */
  457. private function getFieldsToStore( $name, $id = null ) {
  458. $fields = [];
  459. $fields[$this->nameField] = $name;
  460. if ( $id !== null ) {
  461. $fields[$this->idField] = $id;
  462. }
  463. if ( $this->insertCallback !== null ) {
  464. $fields = call_user_func( $this->insertCallback, $fields );
  465. }
  466. return $fields;
  467. }
  468. }