HashRing.php 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454
  1. <?php
  2. /**
  3. * Convenience class for weighted consistent hash rings.
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 2 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License along
  16. * with this program; if not, write to the Free Software Foundation, Inc.,
  17. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18. * http://www.gnu.org/copyleft/gpl.html
  19. *
  20. * @file
  21. */
  22. /**
  23. * Convenience class for weighted consistent hash rings
  24. *
  25. * This deterministically maps "keys" to a set of "locations" while avoiding clumping
  26. *
  27. * Each location is represented by a number of nodes on a ring proportionate to the ratio
  28. * of its weight compared to the total location weight. Note positions are deterministically
  29. * derived from the hash of the location name. Nodes are responsible for the portion of the
  30. * ring, counter-clockwise, up until the next node. Locations are responsible for all portions
  31. * of the ring that the location's nodes are responsible for.
  32. *
  33. * A location that is temporarily "ejected" is said to be absent from the "live" ring.
  34. * If no location ejections are active, then the base ring and live ring are identical.
  35. *
  36. * @since 1.22
  37. */
  38. class HashRing implements Serializable {
  39. /** @var string Hashing algorithm for hash() */
  40. protected $algo;
  41. /** @var int[] Non-empty (location => integer weight) */
  42. protected $weightByLocation;
  43. /** @var int[] Map of (location => UNIX timestamp) */
  44. protected $ejectExpiryByLocation;
  45. /** @var array[] Non-empty position-ordered list of (position, location name) */
  46. protected $baseRing;
  47. /** @var array[] Non-empty position-ordered list of (position, location name) */
  48. protected $liveRing;
  49. /** @var float Number of positions on the ring */
  50. const RING_SIZE = 4294967296.0; // 2^32
  51. /** @var integer Overall number of node groups per server */
  52. const HASHES_PER_LOCATION = 40;
  53. /** @var integer Number of nodes in a node group */
  54. const SECTORS_PER_HASH = 4;
  55. const KEY_POS = 0;
  56. const KEY_LOCATION = 1;
  57. /** @var int Consider all locations */
  58. const RING_ALL = 0;
  59. /** @var int Only consider "live" locations */
  60. const RING_LIVE = 1;
  61. /**
  62. * Make a consistent hash ring given a set of locations and their weight values
  63. *
  64. * @param int[] $map Map of (location => weight)
  65. * @param string $algo Hashing algorithm listed in hash_algos() [optional]
  66. * @param int[] $ejections Map of (location => UNIX timestamp) for ejection expiries
  67. * @since 1.31
  68. */
  69. public function __construct( array $map, $algo = 'sha1', array $ejections = [] ) {
  70. $this->init( $map, $algo, $ejections );
  71. }
  72. /**
  73. * @param int[] $map Map of (location => integer)
  74. * @param string $algo Hashing algorithm
  75. * @param int[] $ejections Map of (location => UNIX timestamp) for ejection expires
  76. */
  77. protected function init( array $map, $algo, array $ejections ) {
  78. if ( !in_array( $algo, hash_algos(), true ) ) {
  79. throw new RuntimeException( __METHOD__ . ": unsupported '$algo' hash algorithm." );
  80. }
  81. $weightByLocation = array_filter( $map );
  82. if ( $weightByLocation === [] ) {
  83. throw new UnexpectedValueException( "No locations with non-zero weight." );
  84. } elseif ( min( $map ) < 0 ) {
  85. throw new InvalidArgumentException( "Location weight cannot be negative." );
  86. }
  87. $this->algo = $algo;
  88. $this->weightByLocation = $weightByLocation;
  89. $this->ejectExpiryByLocation = $ejections;
  90. $this->baseRing = $this->buildLocationRing( $this->weightByLocation );
  91. }
  92. /**
  93. * Get the location of an item on the ring
  94. *
  95. * @param string $item
  96. * @return string Location
  97. * @throws UnexpectedValueException
  98. */
  99. final public function getLocation( $item ) {
  100. return $this->getLocations( $item, 1 )[0];
  101. }
  102. /**
  103. * Get the location of an item on the ring followed by the next ring locations
  104. *
  105. * @param string $item
  106. * @param int $limit Maximum number of locations to return
  107. * @param int $from One of the RING_* class constants
  108. * @return string[] List of locations
  109. * @throws InvalidArgumentException
  110. * @throws UnexpectedValueException
  111. */
  112. public function getLocations( $item, $limit, $from = self::RING_ALL ) {
  113. if ( $from === self::RING_ALL ) {
  114. $ring = $this->baseRing;
  115. } elseif ( $from === self::RING_LIVE ) {
  116. $ring = $this->getLiveRing();
  117. } else {
  118. throw new InvalidArgumentException( "Invalid ring source specified." );
  119. }
  120. // Short-circuit for the common single-location case. Note that if there was only one
  121. // location and it was ejected from the live ring, getLiveRing() would have error out.
  122. if ( count( $this->weightByLocation ) == 1 ) {
  123. return ( $limit > 0 ) ? [ $ring[0][self::KEY_LOCATION] ] : [];
  124. }
  125. // Locate the node index for this item's position on the hash ring
  126. $itemIndex = $this->findNodeIndexForPosition( $this->getItemPosition( $item ), $ring );
  127. $locations = [];
  128. $currentIndex = null;
  129. while ( count( $locations ) < $limit ) {
  130. if ( $currentIndex === null ) {
  131. $currentIndex = $itemIndex;
  132. } else {
  133. $currentIndex = $this->getNextClockwiseNodeIndex( $currentIndex, $ring );
  134. if ( $currentIndex === $itemIndex ) {
  135. break; // all nodes visited
  136. }
  137. }
  138. $nodeLocation = $ring[$currentIndex][self::KEY_LOCATION];
  139. if ( !in_array( $nodeLocation, $locations, true ) ) {
  140. // Ignore other nodes for the same locations already added
  141. $locations[] = $nodeLocation;
  142. }
  143. }
  144. return $locations;
  145. }
  146. /**
  147. * @param float $position
  148. * @param array[] $ring Either the base or live ring
  149. * @return int|null
  150. */
  151. private function findNodeIndexForPosition( $position, $ring ) {
  152. $count = count( $ring );
  153. if ( $count === 0 ) {
  154. return null;
  155. }
  156. $index = null;
  157. $lowPos = 0;
  158. $highPos = $count;
  159. while ( true ) {
  160. $midPos = (int)( ( $lowPos + $highPos ) / 2 );
  161. if ( $midPos === $count ) {
  162. $index = 0;
  163. break;
  164. }
  165. $midVal = $ring[$midPos][self::KEY_POS];
  166. $midMinusOneVal = ( $midPos === 0 ) ? 0 : $ring[$midPos - 1][self::KEY_POS];
  167. if ( $position <= $midVal && $position > $midMinusOneVal ) {
  168. $index = $midPos;
  169. break;
  170. }
  171. if ( $midVal < $position ) {
  172. $lowPos = $midPos + 1;
  173. } else {
  174. $highPos = $midPos - 1;
  175. }
  176. if ( $lowPos > $highPos ) {
  177. $index = 0;
  178. break;
  179. }
  180. }
  181. return $index;
  182. }
  183. /**
  184. * Get the map of locations to weight (does not include zero weight items)
  185. *
  186. * @return int[]
  187. */
  188. public function getLocationWeights() {
  189. return $this->weightByLocation;
  190. }
  191. /**
  192. * Remove a location from the "live" hash ring
  193. *
  194. * @param string $location
  195. * @param int $ttl Seconds
  196. * @return bool Whether some non-ejected locations are left
  197. * @throws UnexpectedValueException
  198. */
  199. public function ejectFromLiveRing( $location, $ttl ) {
  200. if ( !isset( $this->weightByLocation[$location] ) ) {
  201. throw new UnexpectedValueException( "No location '$location' in the ring." );
  202. }
  203. $expiry = $this->getCurrentTime() + $ttl;
  204. $this->ejectExpiryByLocation[$location] = $expiry;
  205. $this->liveRing = null; // invalidate ring cache
  206. return ( count( $this->ejectExpiryByLocation ) < count( $this->weightByLocation ) );
  207. }
  208. /**
  209. * Get the location of an item on the "live" ring
  210. *
  211. * @param string $item
  212. * @return string Location
  213. * @throws UnexpectedValueException
  214. */
  215. final public function getLiveLocation( $item ) {
  216. return $this->getLocations( $item, 1, self::RING_LIVE )[0];
  217. }
  218. /**
  219. * Get the location of an item on the "live" ring, as well as the next locations
  220. *
  221. * @param string $item
  222. * @param int $limit Maximum number of locations to return
  223. * @return string[] List of locations
  224. * @throws UnexpectedValueException
  225. */
  226. final public function getLiveLocations( $item, $limit ) {
  227. return $this->getLocations( $item, $limit, self::RING_LIVE );
  228. }
  229. /**
  230. * Get the map of "live" locations to weight (does not include zero weight items)
  231. *
  232. * @return int[]
  233. * @throws UnexpectedValueException
  234. */
  235. public function getLiveLocationWeights() {
  236. $now = $this->getCurrentTime();
  237. return array_diff_key(
  238. $this->weightByLocation,
  239. array_filter(
  240. $this->ejectExpiryByLocation,
  241. function ( $expiry ) use ( $now ) {
  242. return ( $expiry > $now );
  243. }
  244. )
  245. );
  246. }
  247. /**
  248. * @param int[] $weightByLocation
  249. * @return array[]
  250. */
  251. private function buildLocationRing( array $weightByLocation ) {
  252. $locationCount = count( $weightByLocation );
  253. $totalWeight = array_sum( $weightByLocation );
  254. $ring = [];
  255. // Assign nodes to all locations based on location weight
  256. $claimed = []; // (position as string => (node, index))
  257. foreach ( $weightByLocation as $location => $weight ) {
  258. $ratio = $weight / $totalWeight;
  259. // There $locationCount * (HASHES_PER_LOCATION * 4) nodes available;
  260. // assign a few groups of nodes to this location based on its weight.
  261. $nodesQuartets = intval( $ratio * self::HASHES_PER_LOCATION * $locationCount );
  262. for ( $qi = 0; $qi < $nodesQuartets; ++$qi ) {
  263. // For efficiency, get 4 points per hash call and 4X node count.
  264. // If $algo is MD5, then this matches that of with libketama.
  265. // See https://github.com/RJ/ketama/blob/master/libketama/ketama.c
  266. $positions = $this->getNodePositionQuartet( "{$location}-{$qi}" );
  267. foreach ( $positions as $gi => $position ) {
  268. $node = ( $qi * self::SECTORS_PER_HASH + $gi ) . "@$location";
  269. $posKey = (string)$position; // large integer
  270. if ( isset( $claimed[$posKey] ) ) {
  271. // Disallow duplicates for sanity (name decides precedence)
  272. if ( $claimed[$posKey]['node'] > $node ) {
  273. continue;
  274. } else {
  275. unset( $ring[$claimed[$posKey]['index']] );
  276. }
  277. }
  278. $ring[] = [
  279. self::KEY_POS => $position,
  280. self::KEY_LOCATION => $location
  281. ];
  282. $claimed[$posKey] = [ 'node' => $node, 'index' => count( $ring ) - 1 ];
  283. }
  284. }
  285. }
  286. // Sort the locations into clockwise order based on the hash ring position
  287. usort( $ring, function ( $a, $b ) {
  288. if ( $a[self::KEY_POS] === $b[self::KEY_POS] ) {
  289. throw new UnexpectedValueException( 'Duplicate node positions.' );
  290. }
  291. return ( $a[self::KEY_POS] < $b[self::KEY_POS] ? -1 : 1 );
  292. } );
  293. return $ring;
  294. }
  295. /**
  296. * @param string $item Key
  297. * @return float Ring position; integral number in [0, self::RING_SIZE - 1]
  298. */
  299. private function getItemPosition( $item ) {
  300. // If $algo is MD5, then this matches that of with libketama.
  301. // See https://github.com/RJ/ketama/blob/master/libketama/ketama.c
  302. $octets = substr( hash( $this->algo, (string)$item, true ), 0, 4 );
  303. if ( strlen( $octets ) != 4 ) {
  304. throw new UnexpectedValueException( __METHOD__ . ": {$this->algo} is < 32 bits." );
  305. }
  306. $pos = unpack( 'V', $octets )[1];
  307. if ( $pos < 0 ) {
  308. // Most-significant-bit is set, causing unpack() to return a negative integer due
  309. // to the fact that it returns a signed int. Cast it to an unsigned integer string.
  310. $pos = sprintf( '%u', $pos );
  311. }
  312. return (float)$pos;
  313. }
  314. /**
  315. * @param string $nodeGroupName
  316. * @return float[] Four ring positions on [0, self::RING_SIZE - 1]
  317. */
  318. private function getNodePositionQuartet( $nodeGroupName ) {
  319. $octets = substr( hash( $this->algo, (string)$nodeGroupName, true ), 0, 16 );
  320. if ( strlen( $octets ) != 16 ) {
  321. throw new UnexpectedValueException( __METHOD__ . ": {$this->algo} is < 128 bits." );
  322. }
  323. $positions = [];
  324. foreach ( unpack( 'V4', $octets ) as $signed ) {
  325. $positions[] = (float)sprintf( '%u', $signed );
  326. }
  327. return $positions;
  328. }
  329. /**
  330. * @param int $i Valid index for a node in the ring
  331. * @param array[] $ring Either the base or live ring
  332. * @return int Valid index for a node in the ring
  333. */
  334. private function getNextClockwiseNodeIndex( $i, $ring ) {
  335. if ( !isset( $ring[$i] ) ) {
  336. throw new UnexpectedValueException( __METHOD__ . ": reference index is invalid." );
  337. }
  338. $next = $i + 1;
  339. return ( $next < count( $ring ) ) ? $next : 0;
  340. }
  341. /**
  342. * Get the "live" hash ring (which does not include ejected locations)
  343. *
  344. * @return array[]
  345. * @throws UnexpectedValueException
  346. */
  347. protected function getLiveRing() {
  348. if ( !$this->ejectExpiryByLocation ) {
  349. return $this->baseRing; // nothing ejected
  350. }
  351. $now = $this->getCurrentTime();
  352. if ( $this->liveRing === null || min( $this->ejectExpiryByLocation ) <= $now ) {
  353. // Live ring needs to be regerenated...
  354. $this->ejectExpiryByLocation = array_filter(
  355. $this->ejectExpiryByLocation,
  356. function ( $expiry ) use ( $now ) {
  357. return ( $expiry > $now );
  358. }
  359. );
  360. if ( count( $this->ejectExpiryByLocation ) ) {
  361. // Some locations are still ejected from the ring
  362. $liveRing = [];
  363. foreach ( $this->baseRing as $i => $nodeInfo ) {
  364. $location = $nodeInfo[self::KEY_LOCATION];
  365. if ( !isset( $this->ejectExpiryByLocation[$location] ) ) {
  366. $liveRing[] = $nodeInfo;
  367. }
  368. }
  369. } else {
  370. $liveRing = $this->baseRing;
  371. }
  372. $this->liveRing = $liveRing;
  373. }
  374. if ( !$this->liveRing ) {
  375. throw new UnexpectedValueException( "The live ring is currently empty." );
  376. }
  377. return $this->liveRing;
  378. }
  379. /**
  380. * @return int UNIX timestamp
  381. */
  382. protected function getCurrentTime() {
  383. return time();
  384. }
  385. public function serialize() {
  386. return serialize( [
  387. 'algorithm' => $this->algo,
  388. 'locations' => $this->weightByLocation,
  389. 'ejections' => $this->ejectExpiryByLocation
  390. ] );
  391. }
  392. public function unserialize( $serialized ) {
  393. $data = unserialize( $serialized );
  394. if ( is_array( $data ) ) {
  395. $this->init( $data['locations'], $data['algorithm'], $data['ejections'] );
  396. } else {
  397. throw new UnexpectedValueException( __METHOD__ . ": unable to decode JSON." );
  398. }
  399. }
  400. }