reducer_impl.cpp 30 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013
  1. /* reducer_impl.cpp -*-C++-*-
  2. *
  3. *************************************************************************
  4. *
  5. * @copyright
  6. * Copyright (C) 2009-2013, Intel Corporation
  7. * All rights reserved.
  8. *
  9. * @copyright
  10. * Redistribution and use in source and binary forms, with or without
  11. * modification, are permitted provided that the following conditions
  12. * are met:
  13. *
  14. * * Redistributions of source code must retain the above copyright
  15. * notice, this list of conditions and the following disclaimer.
  16. * * Redistributions in binary form must reproduce the above copyright
  17. * notice, this list of conditions and the following disclaimer in
  18. * the documentation and/or other materials provided with the
  19. * distribution.
  20. * * Neither the name of Intel Corporation nor the names of its
  21. * contributors may be used to endorse or promote products derived
  22. * from this software without specific prior written permission.
  23. *
  24. * @copyright
  25. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  26. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  27. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  28. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  29. * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  30. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  31. * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
  32. * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
  33. * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  34. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
  35. * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  36. * POSSIBILITY OF SUCH DAMAGE.
  37. *
  38. * Patents Pending, Intel Corporation.
  39. **************************************************************************/
  40. /**
  41. * Support for reducers
  42. */
  43. // ICL: Don't complain about conversion from pointer to same-sized integral type
  44. // in hashfun. That's why we're using size_t
  45. #ifdef _WIN32
  46. # pragma warning(disable: 1684)
  47. #endif
  48. #include "reducer_impl.h"
  49. #include "scheduler.h"
  50. #include "bug.h"
  51. #include "os.h"
  52. #include "global_state.h"
  53. #include "frame_malloc.h"
  54. #include "cilk/hyperobject_base.h"
  55. #include "cilktools/cilkscreen.h"
  56. #include "internal/abi.h"
  57. #if REDPAR_DEBUG > 0
  58. #include <stdio.h>
  59. #include <stdlib.h>
  60. #endif
  61. #define DBG if(0) // if(1) enables some internal checks
  62. // Check that w is the currently executing worker. This method is a
  63. // no-op unless the debug level is set high enough.
  64. static inline void verify_current_wkr(__cilkrts_worker *w)
  65. {
  66. #if REDPAR_DEBUG >= 5
  67. __cilkrts_worker* tmp = __cilkrts_get_tls_worker();
  68. if (w != tmp) {
  69. fprintf(stderr, "W=%d, actual=%d... missing a refresh....\n",
  70. w->self,
  71. tmp->self);
  72. }
  73. CILK_ASSERT(w == tmp); // __cilkrts_get_tls_worker());
  74. #endif
  75. }
  76. // Suppress clang warning that the expression result is unused
  77. #if defined(__clang__) && (! defined(__INTEL_COMPILER))
  78. # pragma clang diagnostic push
  79. # pragma clang diagnostic ignored "-Wunused-value"
  80. #endif // __clang__
  81. /// Helper class to disable and re-enable Cilkscreen
  82. struct DisableCilkscreen
  83. {
  84. DisableCilkscreen () { __cilkscreen_disable_checking(); }
  85. ~DisableCilkscreen () { __cilkscreen_enable_checking(); }
  86. };
  87. /// Helper class to enable and re-disable Cilkscreen
  88. struct EnableCilkscreen
  89. {
  90. EnableCilkscreen () { __cilkscreen_enable_checking(); }
  91. ~EnableCilkscreen () { __cilkscreen_disable_checking(); }
  92. };
  93. #if defined(__clang__) && (! defined(__INTEL_COMPILER))
  94. # pragma clang diagnostic pop
  95. #endif // __clang__
  96. /**
  97. * @brief Element for a hyperobject
  98. */
  99. struct elem {
  100. void *key; ///< Shared key for this hyperobject
  101. __cilkrts_hyperobject_base *hb; ///< Base of the hyperobject.
  102. void *view; ///< Strand-private view of this hyperobject
  103. /// Destroy and deallocate the view object for this element and set view to
  104. /// null.
  105. void destroy();
  106. /// Returns true if this element contains a leftmost view.
  107. bool is_leftmost() const;
  108. };
  109. /** Bucket containing at most NMAX elements */
  110. struct bucket {
  111. /// Size of the array of elements for this bucket
  112. size_t nmax;
  113. /**
  114. * We use the ``struct hack'' to allocate an array of variable
  115. * dimension at the end of the struct. However, we allocate a
  116. * total of NMAX+1 elements instead of NMAX. The last one always
  117. * has key == 0, which we use as a termination criterion
  118. */
  119. elem el[1];
  120. };
  121. /**
  122. * Class that implements the map for reducers so we can find the
  123. * view for a strand.
  124. */
  125. struct cilkred_map {
  126. /** Handy pointer to the global state */
  127. global_state_t *g;
  128. /** Number of elements in table */
  129. size_t nelem;
  130. /** Number of buckets */
  131. size_t nbuckets;
  132. /** Array of pointers to buckets */
  133. bucket **buckets;
  134. /** Set true if merging (for debugging purposes) */
  135. bool merging;
  136. /** Set true for leftmost reducer map */
  137. bool is_leftmost;
  138. /** @brief Return element mapped to 'key' or null if not found. */
  139. elem *lookup(void *key);
  140. /**
  141. * @brief Insert key/value element into hash map without rehashing.
  142. * Does not check for duplicate key.
  143. */
  144. elem *insert_no_rehash(__cilkrts_worker *w,
  145. void *key,
  146. __cilkrts_hyperobject_base *hb,
  147. void *value);
  148. /**
  149. * @brief Insert key/value element into hash map, rehashing if necessary.
  150. * Does not check for duplicate key.
  151. */
  152. inline elem *rehash_and_insert(__cilkrts_worker *w,
  153. void *key,
  154. __cilkrts_hyperobject_base *hb,
  155. void *value);
  156. /** @brief Grow bucket by one element, reallocating bucket if necessary */
  157. static elem *grow(__cilkrts_worker *w, bucket **bp);
  158. /** @brief Rehash a worker's reducer map */
  159. void rehash(__cilkrts_worker *);
  160. /**
  161. * @brief Returns true if a rehash is needed due to the number of elements that
  162. * have been inserted.
  163. */
  164. inline bool need_rehash_p() const;
  165. /** @brief Allocate and initialize the buckets */
  166. void make_buckets(__cilkrts_worker *w, size_t nbuckets);
  167. /**
  168. * Specify behavior when the same key is present in both maps passed
  169. * into merge().
  170. */
  171. enum merge_kind
  172. {
  173. MERGE_UNORDERED, ///< Assertion fails
  174. MERGE_INTO_LEFT, ///< Merges the argument from the right into the left
  175. MERGE_INTO_RIGHT ///< Merges the argument from the left into the right
  176. };
  177. /**
  178. * @brief Merge another reducer map into this one, destroying the other map in
  179. * the process.
  180. */
  181. __cilkrts_worker* merge(__cilkrts_worker *current_wkr,
  182. cilkred_map *other_map,
  183. enum merge_kind kind);
  184. /** @brief check consistency of a reducer map */
  185. void check(bool allow_null_view);
  186. /** @brief Test whether the cilkred_map is empty */
  187. bool is_empty() { return nelem == 0; }
  188. };
  189. static inline struct cilkred_map* install_new_reducer_map(__cilkrts_worker *w) {
  190. cilkred_map *h;
  191. h = __cilkrts_make_reducer_map(w);
  192. w->reducer_map = h;
  193. return h;
  194. }
  195. static size_t sizeof_bucket(size_t nmax)
  196. {
  197. bucket *b = 0;
  198. return (sizeof(*b) + nmax * sizeof(b->el[0]));
  199. }
  200. static bucket *alloc_bucket(__cilkrts_worker *w, size_t nmax)
  201. {
  202. bucket *b = (bucket *)
  203. __cilkrts_frame_malloc(w, sizeof_bucket(nmax));
  204. b->nmax = nmax;
  205. return b;
  206. }
  207. static void free_bucket(__cilkrts_worker *w, bucket **bp)
  208. {
  209. bucket *b = *bp;
  210. if (b) {
  211. __cilkrts_frame_free(w, b, sizeof_bucket(b->nmax));
  212. *bp = 0;
  213. }
  214. }
  215. /* round up nmax to fill a memory allocator block completely */
  216. static size_t roundup(size_t nmax)
  217. {
  218. size_t sz = sizeof_bucket(nmax);
  219. /* round up size to a full malloc block */
  220. sz = __cilkrts_frame_malloc_roundup(sz);
  221. /* invert sizeof_bucket() */
  222. nmax = ((sz - sizeof(bucket)) / sizeof(elem));
  223. return nmax;
  224. }
  225. static bool is_power_of_2(size_t n)
  226. {
  227. return (n & (n - 1)) == 0;
  228. }
  229. void cilkred_map::make_buckets(__cilkrts_worker *w,
  230. size_t new_nbuckets)
  231. {
  232. nbuckets = new_nbuckets;
  233. CILK_ASSERT(is_power_of_2(nbuckets));
  234. #if defined __GNUC__ && defined __ICC
  235. /* bug workaround -- suppress calls to _intel_fast_memset */
  236. bucket *volatile*new_buckets = (bucket *volatile*)
  237. #else
  238. bucket **new_buckets = (bucket **)
  239. #endif
  240. __cilkrts_frame_malloc(w, nbuckets * sizeof(*(buckets)));
  241. #if REDPAR_DEBUG >= 1
  242. fprintf(stderr, "W=%d, desc=make_buckets, new_buckets=%p, new_nbuckets=%zd\n",
  243. w->self, new_buckets, new_nbuckets);
  244. #endif
  245. for (size_t i = 0; i < new_nbuckets; ++i)
  246. new_buckets[i] = 0;
  247. #if defined __GNUC__ && defined __ICC
  248. buckets = (bucket **)new_buckets;
  249. #else
  250. buckets = new_buckets;
  251. #endif
  252. nelem = 0;
  253. }
  254. static void free_buckets(__cilkrts_worker *w,
  255. bucket **buckets,
  256. size_t nbuckets)
  257. {
  258. size_t i;
  259. #if REDPAR_DEBUG >= 1
  260. verify_current_wkr(w);
  261. fprintf(stderr, "W=%d, desc=free_buckets, buckets=%p, size=%zd\n",
  262. w->self, buckets,
  263. nbuckets * sizeof(*buckets));
  264. #endif
  265. for (i = 0; i < nbuckets; ++i)
  266. free_bucket(w, buckets + i);
  267. __cilkrts_frame_free(w, buckets, nbuckets * sizeof(*buckets));
  268. }
  269. static size_t minsz(size_t nelem)
  270. {
  271. return 1U + nelem + nelem / 8U;
  272. }
  273. static size_t nextsz(size_t nelem)
  274. {
  275. return 2 * nelem;
  276. }
  277. bool cilkred_map::need_rehash_p() const
  278. {
  279. return minsz(nelem) > nbuckets;
  280. }
  281. static inline size_t hashfun(const cilkred_map *h, void *key)
  282. {
  283. size_t k = (size_t) key;
  284. k ^= k >> 21;
  285. k ^= k >> 8;
  286. k ^= k >> 3;
  287. return k & (h->nbuckets - 1);
  288. }
  289. // Given a __cilkrts_hyperobject_base, return the key to that hyperobject in
  290. // the reducer map.
  291. static inline void* get_hyperobject_key(__cilkrts_hyperobject_base *hb)
  292. {
  293. // The current implementation uses the address of the lefmost view as the
  294. // key.
  295. return reinterpret_cast<char*>(hb) + hb->__view_offset;
  296. }
  297. // Given a hyperobject key, return a pointer to the leftmost object. In the
  298. // current implementation, the address of the leftmost object IS the key, so
  299. // this function is an effective noop.
  300. static inline void* get_leftmost_view(void *key)
  301. {
  302. return key;
  303. }
  304. /* debugging support: check consistency of a reducer map */
  305. void cilkred_map::check(bool allow_null_view)
  306. {
  307. size_t count = 0;
  308. CILK_ASSERT(buckets);
  309. for (size_t i = 0; i < nbuckets; ++i) {
  310. bucket *b = buckets[i];
  311. if (b)
  312. for (elem *el = b->el; el->key; ++el) {
  313. CILK_ASSERT(allow_null_view || el->view);
  314. ++count;
  315. }
  316. }
  317. CILK_ASSERT(nelem == count);
  318. /*global_reducer_map::check();*/
  319. }
  320. /* grow bucket by one element, reallocating bucket if necessary */
  321. elem *cilkred_map::grow(__cilkrts_worker *w,
  322. bucket **bp)
  323. {
  324. size_t i, nmax, nnmax;
  325. bucket *b, *nb;
  326. b = *bp;
  327. if (b) {
  328. nmax = b->nmax;
  329. /* find empty element if any */
  330. for (i = 0; i < nmax; ++i)
  331. if (b->el[i].key == 0)
  332. return &(b->el[i]);
  333. /* do not use the last one even if empty */
  334. } else {
  335. nmax = 0;
  336. }
  337. verify_current_wkr(w);
  338. /* allocate a new bucket */
  339. nnmax = roundup(2 * nmax);
  340. nb = alloc_bucket(w, nnmax);
  341. /* copy old bucket into new */
  342. for (i = 0; i < nmax; ++i)
  343. nb->el[i] = b->el[i];
  344. free_bucket(w, bp); *bp = nb;
  345. /* zero out extra elements */
  346. for (; i < nnmax; ++i)
  347. nb->el[i].key = 0;
  348. /* zero out the last one */
  349. nb->el[i].key = 0;
  350. return &(nb->el[nmax]);
  351. }
  352. elem *cilkred_map::insert_no_rehash(__cilkrts_worker *w,
  353. void *key,
  354. __cilkrts_hyperobject_base *hb,
  355. void *view)
  356. {
  357. #if REDPAR_DEBUG >= 2
  358. fprintf(stderr, "[W=%d, desc=insert_no_rehash, this_map=%p]\n",
  359. w->self, this);
  360. verify_current_wkr(w);
  361. #endif
  362. CILK_ASSERT((w == 0 && g == 0) || w->g == g);
  363. CILK_ASSERT(key != 0);
  364. CILK_ASSERT(view != 0);
  365. elem *el = grow(w, &(buckets[hashfun(this, key)]));
  366. #if REDPAR_DEBUG >= 3
  367. fprintf(stderr, "[W=%d, this=%p, inserting key=%p, view=%p, el = %p]\n",
  368. w->self, this, key, view, el);
  369. #endif
  370. el->key = key;
  371. el->hb = hb;
  372. el->view = view;
  373. ++nelem;
  374. return el;
  375. }
  376. void cilkred_map::rehash(__cilkrts_worker *w)
  377. {
  378. #if REDPAR_DEBUG >= 1
  379. fprintf(stderr, "[W=%d, desc=rehash, this_map=%p, g=%p, w->g=%p]\n",
  380. w->self, this, g, w->g);
  381. verify_current_wkr(w);
  382. #endif
  383. CILK_ASSERT((w == 0 && g == 0) || w->g == g);
  384. size_t onbuckets = nbuckets;
  385. size_t onelem = nelem;
  386. bucket **obuckets = buckets;
  387. size_t i;
  388. bucket *b;
  389. make_buckets(w, nextsz(nbuckets));
  390. for (i = 0; i < onbuckets; ++i) {
  391. b = obuckets[i];
  392. if (b) {
  393. elem *oel;
  394. for (oel = b->el; oel->key; ++oel)
  395. insert_no_rehash(w, oel->key, oel->hb, oel->view);
  396. }
  397. }
  398. CILK_ASSERT(nelem == onelem);
  399. free_buckets(w, obuckets, onbuckets);
  400. }
  401. elem *cilkred_map::rehash_and_insert(__cilkrts_worker *w,
  402. void *key,
  403. __cilkrts_hyperobject_base *hb,
  404. void *view)
  405. {
  406. #if REDPAR_DEBUG >= 1
  407. fprintf(stderr, "W=%d, this_map =%p, inserting key=%p, view=%p\n",
  408. w->self, this, key, view);
  409. verify_current_wkr(w);
  410. #endif
  411. if (need_rehash_p())
  412. rehash(w);
  413. return insert_no_rehash(w, key, hb, view);
  414. }
  415. elem *cilkred_map::lookup(void *key)
  416. {
  417. bucket *b = buckets[hashfun(this, key)];
  418. if (b) {
  419. elem *el;
  420. for (el = b->el; el->key; ++el) {
  421. if (el->key == key) {
  422. CILK_ASSERT(el->view);
  423. return el;
  424. }
  425. }
  426. }
  427. return 0;
  428. }
  429. void elem::destroy()
  430. {
  431. if (! is_leftmost()) {
  432. // Call destroy_fn and deallocate_fn on the view, but not if it's the
  433. // leftmost view.
  434. cilk_c_monoid *monoid = &(hb->__c_monoid);
  435. cilk_c_reducer_destroy_fn_t destroy_fn = monoid->destroy_fn;
  436. cilk_c_reducer_deallocate_fn_t deallocate_fn = monoid->deallocate_fn;
  437. destroy_fn((void*)hb, view);
  438. deallocate_fn((void*)hb, view);
  439. }
  440. view = 0;
  441. }
  442. inline
  443. bool elem::is_leftmost() const
  444. {
  445. // implementation uses the address of the leftmost view as the key, so if
  446. // key == view, then this element refers to the leftmost view.
  447. return key == view;
  448. }
  449. /* remove the reducer from the current reducer map. If the reducer
  450. exists in maps other than the current one, the behavior is
  451. undefined. */
  452. extern "C"
  453. CILK_EXPORT void __CILKRTS_STRAND_STALE(
  454. __cilkrts_hyper_destroy(__cilkrts_hyperobject_base *hb))
  455. {
  456. // Disable Cilkscreen for the duration of this call. The destructor for
  457. // this class will re-enable Cilkscreen when the method returns. This
  458. // will prevent Cilkscreen from reporting apparent races in reducers
  459. DisableCilkscreen x;
  460. __cilkrts_worker* w = __cilkrts_get_tls_worker();
  461. if (! w) {
  462. // If no worker, then Cilk is not running and there is no reducer
  463. // map. Do nothing. The reducer's destructor will take care of
  464. // destroying the leftmost view.
  465. return;
  466. }
  467. const char *UNSYNCED_REDUCER_MSG =
  468. "Destroying a reducer while it is visible to unsynced child tasks, or\n"
  469. "calling CILK_C_UNREGISTER_REDUCER() on an unregistered reducer.\n"
  470. "Did you forget a _Cilk_sync or CILK_C_REGISTER_REDUCER()?";
  471. cilkred_map* h = w->reducer_map;
  472. if (NULL == h)
  473. cilkos_error(UNSYNCED_REDUCER_MSG); // Does not return
  474. if (h->merging) {
  475. verify_current_wkr(w);
  476. __cilkrts_bug("User error: hyperobject used by another hyperobject");
  477. }
  478. void* key = get_hyperobject_key(hb);
  479. elem *el = h->lookup(key);
  480. // Verify that the reducer is being destroyed from the leftmost strand for
  481. // which the reducer is defined.
  482. if (! (el && el->is_leftmost()))
  483. cilkos_error(UNSYNCED_REDUCER_MSG);
  484. #if REDPAR_DEBUG >= 3
  485. fprintf(stderr, "[W=%d, key=%p, lookup in map %p, found el=%p, about to destroy]\n",
  486. w->self, key, h, el);
  487. #endif
  488. // Remove the element from the hash bucket. Do not bother shrinking
  489. // the bucket. Note that the destroy() function does not actually
  490. // call the destructor for the leftmost view.
  491. el->destroy();
  492. do {
  493. el[0] = el[1];
  494. ++el;
  495. } while (el->key);
  496. --h->nelem;
  497. #if REDPAR_DEBUG >= 2
  498. fprintf(stderr, "[W=%d, desc=hyper_destroy_finish, key=%p, w->reducer_map=%p]\n",
  499. w->self, key, w->reducer_map);
  500. #endif
  501. }
  502. extern "C"
  503. CILK_EXPORT
  504. void __cilkrts_hyper_create(__cilkrts_hyperobject_base *hb)
  505. {
  506. // This function registers the specified hyperobject in the current
  507. // reducer map and registers the initial value of the hyperobject as the
  508. // leftmost view of the reducer.
  509. __cilkrts_worker *w = __cilkrts_get_tls_worker();
  510. if (! w) {
  511. // If there is no worker, then there is nothing to do: The iniitial
  512. // value will automatically be used as the left-most view when we
  513. // enter Cilk.
  514. return;
  515. }
  516. // Disable Cilkscreen for the duration of this call. The destructor for
  517. // this class will re-enable Cilkscreen when the method returns. This
  518. // will prevent Cilkscreen from reporting apparent races in reducers
  519. DisableCilkscreen x;
  520. void* key = get_hyperobject_key(hb);
  521. void* view = get_leftmost_view(key);
  522. cilkred_map *h = w->reducer_map;
  523. if (__builtin_expect(!h, 0)) {
  524. h = install_new_reducer_map(w);
  525. #if REDPAR_DEBUG >= 2
  526. fprintf(stderr, "[W=%d, hb=%p, hyper_create, isntalled new map %p, view=%p]\n",
  527. w->self, hb, h, view);
  528. #endif
  529. }
  530. /* Must not exist. */
  531. CILK_ASSERT(h->lookup(key) == NULL);
  532. #if REDPAR_DEBUG >= 3
  533. verify_current_wkr(w);
  534. fprintf(stderr, "[W=%d, hb=%p, lookup in map %p of view %p, should be null]\n",
  535. w->self, hb, h, view);
  536. fprintf(stderr, "W=%d, h=%p, inserting key %p, view%p\n",
  537. w->self,
  538. h,
  539. &(hb->__c_monoid),
  540. view);
  541. #endif
  542. if (h->merging)
  543. __cilkrts_bug("User error: hyperobject used by another hyperobject");
  544. CILK_ASSERT(w->reducer_map == h);
  545. // The address of the leftmost value is the same as the key for lookup.
  546. (void) h->rehash_and_insert(w, view, hb, view);
  547. }
  548. extern "C"
  549. CILK_EXPORT void* __CILKRTS_STRAND_PURE(
  550. __cilkrts_hyper_lookup(__cilkrts_hyperobject_base *hb))
  551. {
  552. __cilkrts_worker* w = __cilkrts_get_tls_worker_fast();
  553. void* key = get_hyperobject_key(hb);
  554. if (! w)
  555. return get_leftmost_view(key);
  556. // Disable Cilkscreen for the duration of this call. This will
  557. // prevent Cilkscreen from reporting apparent races in reducers
  558. DisableCilkscreen dguard;
  559. if (__builtin_expect(w->g->force_reduce, 0))
  560. __cilkrts_promote_own_deque(w);
  561. cilkred_map* h = w->reducer_map;
  562. if (__builtin_expect(!h, 0)) {
  563. h = install_new_reducer_map(w);
  564. }
  565. if (h->merging)
  566. __cilkrts_bug("User error: hyperobject used by another hyperobject");
  567. elem* el = h->lookup(key);
  568. if (! el) {
  569. /* lookup failed; insert a new default element */
  570. void *rep;
  571. {
  572. /* re-enable cilkscreen while calling the constructor */
  573. EnableCilkscreen eguard;
  574. if (h->is_leftmost)
  575. {
  576. // This special case is called only if the reducer was not
  577. // registered using __cilkrts_hyper_create, e.g., if this is a
  578. // C reducer in global scope or if there is no bound worker.
  579. rep = get_leftmost_view(key);
  580. }
  581. else
  582. {
  583. rep = hb->__c_monoid.allocate_fn((void*)hb,
  584. hb->__view_size);
  585. // TBD: Handle exception on identity function
  586. hb->__c_monoid.identity_fn((void*)hb, rep);
  587. }
  588. }
  589. #if REDPAR_DEBUG >= 3
  590. fprintf(stderr, "W=%d, h=%p, inserting key %p, view%p\n",
  591. w->self,
  592. h,
  593. &(hb->__c_monoid),
  594. rep);
  595. CILK_ASSERT(w->reducer_map == h);
  596. #endif
  597. el = h->rehash_and_insert(w, key, hb, rep);
  598. }
  599. return el->view;
  600. }
  601. extern "C" CILK_EXPORT
  602. void* __cilkrts_hyperobject_alloc(void* ignore, std::size_t bytes)
  603. {
  604. return std::malloc(bytes);
  605. }
  606. extern "C" CILK_EXPORT
  607. void __cilkrts_hyperobject_dealloc(void* ignore, void* view)
  608. {
  609. std::free(view);
  610. }
  611. /* No-op destroy function */
  612. extern "C" CILK_EXPORT
  613. void __cilkrts_hyperobject_noop_destroy(void* ignore, void* ignore2)
  614. {
  615. }
  616. cilkred_map *__cilkrts_make_reducer_map(__cilkrts_worker *w)
  617. {
  618. CILK_ASSERT(w);
  619. cilkred_map *h;
  620. size_t nbuckets = 1; /* default value */
  621. h = (cilkred_map *)__cilkrts_frame_malloc(w, sizeof(*h));
  622. #if REDPAR_DEBUG >= 1
  623. fprintf(stderr, "[W=%d, desc=make_reducer_frame_malloc_reducer_map, h=%p]\n",
  624. w->self, h);
  625. #endif
  626. h->g = w ? w->g : 0;
  627. h->make_buckets(w, nbuckets);
  628. h->merging = false;
  629. h->is_leftmost = false;
  630. return h;
  631. }
  632. /* Destroy a reducer map. The map must have been allocated
  633. from the worker's global context and should have been
  634. allocated from the same worker. */
  635. void __cilkrts_destroy_reducer_map(__cilkrts_worker *w, cilkred_map *h)
  636. {
  637. CILK_ASSERT((w == 0 && h->g == 0) || w->g == h->g);
  638. verify_current_wkr(w);
  639. /* the reducer map is allowed to contain el->view == NULL here (and
  640. only here). We set el->view == NULL only when we know that the
  641. map will be destroyed immediately afterwards. */
  642. DBG h->check(/*allow_null_view=*/true);
  643. bucket *b;
  644. size_t i;
  645. for (i = 0; i < h->nbuckets; ++i) {
  646. b = h->buckets[i];
  647. if (b) {
  648. elem *el;
  649. for (el = b->el; el->key; ++el) {
  650. if (el->view)
  651. el->destroy();
  652. }
  653. }
  654. }
  655. free_buckets(w, h->buckets, h->nbuckets);
  656. #if REDPAR_DEBUG >= 1
  657. fprintf(stderr, "W=%d, destroy_red_map, freeing map h=%p, size=%zd\n",
  658. w->self, h, sizeof(*h));
  659. #endif
  660. __cilkrts_frame_free(w, h, sizeof(*h));
  661. }
  662. /* Set the specified reducer map as the leftmost map if is_leftmost is true,
  663. otherwise, set it to not be the leftmost map. */
  664. void __cilkrts_set_leftmost_reducer_map(cilkred_map *h, int is_leftmost)
  665. {
  666. h->is_leftmost = is_leftmost;
  667. }
  668. __cilkrts_worker* cilkred_map::merge(__cilkrts_worker *w,
  669. cilkred_map *other_map,
  670. enum merge_kind kind)
  671. {
  672. // Disable Cilkscreen while the we merge the maps. The destructor for
  673. // the guard class will re-enable Cilkscreen when it goes out of scope.
  674. // This will prevent Cilkscreen from reporting apparent races in between
  675. // the reduce function and the reducer operations. The Cilk runtime
  676. // guarantees that a pair of reducer maps will only be merged when no
  677. // other strand will access them.
  678. DisableCilkscreen guard;
  679. #if REDPAR_DEBUG >= 2
  680. fprintf(stderr, "[W=%d, desc=merge, this_map=%p, other_map=%p]\n",
  681. w->self,
  682. this, other_map);
  683. #endif
  684. // Remember the current stack frame.
  685. __cilkrts_stack_frame *current_sf = w->current_stack_frame;
  686. merging = true;
  687. other_map->merging = true;
  688. // Merging to the leftmost view is a special case because every leftmost
  689. // element must be initialized before the merge.
  690. CILK_ASSERT(!other_map->is_leftmost /* || kind == MERGE_UNORDERED */);
  691. bool merge_to_leftmost = (this->is_leftmost
  692. /* && !other_map->is_leftmost */);
  693. DBG check(/*allow_null_view=*/false);
  694. DBG other_map->check(/*allow_null_view=*/false);
  695. for (size_t i = 0; i < other_map->nbuckets; ++i) {
  696. bucket *b = other_map->buckets[i];
  697. if (b) {
  698. for (elem *other_el = b->el; other_el->key; ++other_el) {
  699. /* Steal the value from the other map, which will be
  700. destroyed at the end of this operation. */
  701. void *other_view = other_el->view;
  702. CILK_ASSERT(other_view);
  703. void *key = other_el->key;
  704. __cilkrts_hyperobject_base *hb = other_el->hb;
  705. elem *this_el = lookup(key);
  706. if (this_el == 0 && merge_to_leftmost) {
  707. /* Initialize leftmost view before merging. */
  708. void* leftmost = get_leftmost_view(key);
  709. // leftmost == other_view can be true if the initial view
  710. // was created in other than the leftmost strand of the
  711. // spawn tree, but then made visible to subsequent strands
  712. // (E.g., the reducer was allocated on the heap and the
  713. // pointer was returned to the caller.) In such cases,
  714. // parallel semantics says that syncing with earlier
  715. // strands will always result in 'this_el' being null,
  716. // thus propagating the initial view up the spawn tree
  717. // until it reaches the leftmost strand. When synching
  718. // with the leftmost strand, leftmost == other_view will be
  719. // true and we must avoid reducing the initial view with
  720. // itself.
  721. if (leftmost != other_view)
  722. this_el = rehash_and_insert(w, key, hb, leftmost);
  723. }
  724. if (this_el == 0) {
  725. /* move object from other map into this one */
  726. rehash_and_insert(w, key, hb, other_view);
  727. other_el->view = 0;
  728. continue; /* No element-level merge necessary */
  729. }
  730. /* The same key is present in both maps with values
  731. A and B. Three choices: fail, A OP B, B OP A. */
  732. switch (kind)
  733. {
  734. case MERGE_UNORDERED:
  735. __cilkrts_bug("TLS Reducer race");
  736. break;
  737. case MERGE_INTO_RIGHT:
  738. /* Swap elements in order to preserve object
  739. identity */
  740. other_el->view = this_el->view;
  741. this_el->view = other_view;
  742. /* FALL THROUGH */
  743. case MERGE_INTO_LEFT: {
  744. /* Stealing should be disabled during reduce
  745. (even if force-reduce is enabled). */
  746. #if DISABLE_PARALLEL_REDUCERS
  747. __cilkrts_stack_frame * volatile *saved_protected_tail;
  748. saved_protected_tail = __cilkrts_disallow_stealing(w, NULL);
  749. #endif
  750. {
  751. CILK_ASSERT(current_sf->worker == w);
  752. CILK_ASSERT(w->current_stack_frame == current_sf);
  753. /* TBD: if reduce throws an exception we need to stop it
  754. here. */
  755. hb->__c_monoid.reduce_fn((void*)hb,
  756. this_el->view,
  757. other_el->view);
  758. w = current_sf->worker;
  759. #if REDPAR_DEBUG >= 2
  760. verify_current_wkr(w);
  761. CILK_ASSERT(w->current_stack_frame == current_sf);
  762. #endif
  763. }
  764. #if DISABLE_PARALLEL_REDUCERS
  765. /* Restore stealing */
  766. __cilkrts_restore_stealing(w, saved_protected_tail);
  767. #endif
  768. } break;
  769. }
  770. }
  771. }
  772. }
  773. this->is_leftmost = this->is_leftmost || other_map->is_leftmost;
  774. merging = false;
  775. other_map->merging = false;
  776. verify_current_wkr(w);
  777. __cilkrts_destroy_reducer_map(w, other_map);
  778. return w;
  779. }
  780. /**
  781. * Print routine for debugging the merging of reducer maps.
  782. * A no-op unless REDPAR_DEBUG set high enough.
  783. */
  784. static inline
  785. void debug_map_merge(__cilkrts_worker *w,
  786. cilkred_map *left_map,
  787. cilkred_map *right_map,
  788. __cilkrts_worker **final_wkr)
  789. {
  790. #if REDPAR_DEBUG >= 2
  791. fprintf(stderr, "[W=%d, desc=finish_merge, left_map=%p, right_map=%p, w->reducer_map=%p, right_ans=%p, final_wkr=%d]\n",
  792. w->self, left_map, right_map, w->reducer_map, right_map, (*final_wkr)->self);
  793. #endif
  794. }
  795. /**
  796. * merge RIGHT into LEFT;
  797. * return whichever map allows for faster merge, and destroy the other one.
  798. *
  799. * *w_ptr should be the currently executing worker.
  800. * *w_ptr may change during execution if the reduction is parallel.
  801. */
  802. cilkred_map*
  803. merge_reducer_maps(__cilkrts_worker **w_ptr,
  804. cilkred_map *left_map,
  805. cilkred_map *right_map)
  806. {
  807. __cilkrts_worker *w = *w_ptr;
  808. if (!left_map) {
  809. debug_map_merge(w, left_map, right_map, w_ptr);
  810. return right_map;
  811. }
  812. if (!right_map) {
  813. debug_map_merge(w, left_map, right_map, w_ptr);
  814. return left_map;
  815. }
  816. /* Special case, if left_map is leftmost, then always merge into it.
  817. For C reducers this forces lazy creation of the leftmost views. */
  818. if (left_map->is_leftmost || left_map->nelem > right_map->nelem) {
  819. *w_ptr = left_map->merge(w, right_map, cilkred_map::MERGE_INTO_LEFT);
  820. debug_map_merge(*w_ptr, left_map, right_map, w_ptr);
  821. return left_map;
  822. } else {
  823. *w_ptr = right_map->merge(w, left_map, cilkred_map::MERGE_INTO_RIGHT);
  824. debug_map_merge(*w_ptr, left_map, right_map, w_ptr);
  825. return right_map;
  826. }
  827. }
  828. /**
  829. * Merges RIGHT into LEFT, and then repeatedly calls
  830. * merge_reducer_maps_helper() until (*w_ptr)->reducer_map is NULL.
  831. *
  832. * *w_ptr may change as reductions execute.
  833. */
  834. cilkred_map*
  835. repeated_merge_reducer_maps(__cilkrts_worker **w_ptr,
  836. cilkred_map *left_map,
  837. cilkred_map *right_map)
  838. {
  839. // Note: if right_map == NULL but w->reducer_map != NULL, then
  840. // this loop will reduce w->reducer_map into left_map.
  841. do {
  842. left_map = merge_reducer_maps(w_ptr, left_map, right_map);
  843. verify_current_wkr(*w_ptr);
  844. // Pull any newly created reducer map and loop around again.
  845. right_map = (*w_ptr)->reducer_map;
  846. (*w_ptr)->reducer_map = NULL;
  847. } while (right_map);
  848. return left_map;
  849. }
  850. /* End reducer_impl.cpp */