full_frame.h 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494
  1. /* full_frame.h -*-C++-*-
  2. *
  3. *************************************************************************
  4. *
  5. * @copyright
  6. * Copyright (C) 2009-2013, Intel Corporation
  7. * All rights reserved.
  8. *
  9. * @copyright
  10. * Redistribution and use in source and binary forms, with or without
  11. * modification, are permitted provided that the following conditions
  12. * are met:
  13. *
  14. * * Redistributions of source code must retain the above copyright
  15. * notice, this list of conditions and the following disclaimer.
  16. * * Redistributions in binary form must reproduce the above copyright
  17. * notice, this list of conditions and the following disclaimer in
  18. * the documentation and/or other materials provided with the
  19. * distribution.
  20. * * Neither the name of Intel Corporation nor the names of its
  21. * contributors may be used to endorse or promote products derived
  22. * from this software without specific prior written permission.
  23. *
  24. * @copyright
  25. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  26. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  27. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  28. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  29. * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  30. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  31. * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
  32. * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
  33. * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  34. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
  35. * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  36. * POSSIBILITY OF SUCH DAMAGE.
  37. **************************************************************************/
  38. #ifndef INCLUDED_FULL_FRAME_DOT_H
  39. #define INCLUDED_FULL_FRAME_DOT_H
  40. #include "rts-common.h"
  41. #include "worker_mutex.h"
  42. #include <cilk/common.h>
  43. #include <internal/abi.h>
  44. #include <stddef.h>
  45. #include "cilk_fiber.h"
  46. __CILKRTS_BEGIN_EXTERN_C
  47. /** Magic numbers for full_frame, used for debugging */
  48. typedef unsigned long long ff_magic_t;
  49. /* COMMON_SYSDEP */ struct pending_exception_info; /* opaque */
  50. /*************************************************************
  51. Full frames
  52. *************************************************************/
  53. /**
  54. * @file full_frame.h
  55. * @brief A full frame includes additional information such as a join
  56. * counter and parent frame.
  57. * @defgroup FullFrames Full Frames
  58. * A full frame includes additional information such as a join
  59. * counter and parent frame.
  60. * @{
  61. */
  62. /**
  63. * Convenience typedef so we don't have to specify "struct full_frame"
  64. * all over the code. Putting it before the structure definition allows
  65. * us to use the typedef within the structure itself
  66. */
  67. typedef struct full_frame full_frame;
  68. /**
  69. * @brief A full frame includes additional information such as a join
  70. * counter and parent frame.
  71. *
  72. * The frame at the top of a worker's stack is promoted into a "full"
  73. * frame, which carries additional information, such as join counter
  74. * and parent frame. Full frames can be suspended at a sync, in which
  75. * case they lie somewhere in memory and do not belong to any
  76. * worker.
  77. *
  78. * Full frames are in contrast to the entries in the worker's deque which
  79. * are only represented by a pointer to their __cilkrts_stack_frame.
  80. *
  81. * At any instant, we say that a full frame ff is either "suspended",
  82. * or "owned" by some worker w.
  83. *
  84. * More precisely, we say that a worker w owns a frame ff under one of
  85. * the following conditions:
  86. *
  87. * 1. Creation: Worker w has just created ff, but not yet linked ff
  88. * into the tree of full frames. This situation can occur when a
  89. * worker is unrolling a call stack to promote a
  90. * __cilkrts_stack_frame to a full_frame.
  91. * 2. Executing frame: We have w->l->frame_ff == ff, i.e,. ff is the
  92. * currently executing frame for w.
  93. * 3. Next frame: We have w->l->next_frame_ff == ff, i.e,. ff is the
  94. * next frame that w is about to execute.
  95. * 4. Resume execution: Worker w has popped ff from
  96. * w->l->next_frame_ff, and is about to resume execution of ff.
  97. * 5. Dying leaf: Worker w has finished executing a frame ff
  98. * that is a leaf the tree of full frames, and is in the process
  99. * of unlinking "ff" from the tree.
  100. *
  101. * Otherwise, the frame ff is suspended, and has no owner.
  102. * Note that work-stealing changes the owner of a full frame from the
  103. * victim to the thief.
  104. *
  105. * Using this notion of ownership, we classify the fields of a full
  106. * frame into one of several categories:
  107. *
  108. * 1. Local:
  109. * These fields are accessed only by the owner of the full frame.
  110. * Because a frame can have only one owner at a time, these fields
  111. * can be modified without any (additional) locking or
  112. * synchronization, assuming the correct synchronization for
  113. * changing the ownership of full frame (e.g., on a successful
  114. * steal) is already in place.
  115. *
  116. * 2. Constant (i.e., read-only):
  117. * This field is constant for the lifetime of the full frame.
  118. * No locks are needed to access this field.
  119. * Technically, a field could be read-only and local, but we assume
  120. * it is shared.
  121. *
  122. * 3. Self-locked:
  123. * To access this field in the frame ff, a worker should acquire
  124. * the lock on ff.
  125. * A self-locked field is conceptually "shared" between the worker
  126. * that owns frame ff (which is a child) and the worker that
  127. * owns the frame ff->parent (which is the parent of ff).
  128. *
  129. * 4. Parent-locked:
  130. * To access this field in the frame ff, a worker should
  131. * acquire the lock on ff->parent.
  132. * A parent-locked field is conceptually "shared" between the worker
  133. * that owns frame ff, and a worker that is either owns the
  134. * parent frame (ff->parent) or owns a sibling frame of ff (i.e.,
  135. * any child of ff->parent).
  136. *
  137. * 5. Synchronization
  138. * A field used explicitly for synchronization (i.e., locks).
  139. */
  140. /* COMMON_PORTABLE */
  141. struct full_frame
  142. {
  143. /**
  144. * Value to detect writes off the beginning of a full_frame.
  145. */
  146. # define FULL_FRAME_MAGIC_0 ((ff_magic_t)0x361e710b9597d553ULL)
  147. /**
  148. * Field to detect writes off the beginning of a full_frame. Must be
  149. * FULL_FRAME_MAGIC_0.
  150. * [constant]
  151. */
  152. ff_magic_t full_frame_magic_0;
  153. /**
  154. * Used to serialize access to this full_frame
  155. * [synchronization]
  156. */
  157. struct mutex lock;
  158. /**
  159. * Count of outstanding children running in parallel
  160. * [self-locked]
  161. */
  162. int join_counter;
  163. /**
  164. * If TRUE: frame was called by the parent.
  165. * If FALSE: frame was spawned by parent.
  166. * [constant]
  167. */
  168. int is_call_child;
  169. /**
  170. * TRUE if this frame is the loot of a simulated steal.
  171. *
  172. * This situation never happens in normal execution. However,
  173. * when running under cilkscreen, a worker may promote frames and
  174. * then immediately suspend them, in order to simulate an
  175. * execution on an infinite number of processors where all spawns
  176. * are stolen. In this case, the frame is marked as the loot of a fake
  177. * steal.
  178. * [local]
  179. */
  180. int simulated_stolen;
  181. /**
  182. * Caller of this full_frame
  183. * [constant]
  184. */
  185. full_frame *parent;
  186. /**
  187. * Doubly-linked list of children. The serial execution order is
  188. * by definition from left to right. Because of how we do work
  189. * stealing, the parent is always to the right of all its
  190. * children.
  191. *
  192. * For a frame ff, we lock the ff->parent to follow the sibling
  193. * links for ff.
  194. *
  195. * [parent-locked]
  196. */
  197. full_frame *left_sibling;
  198. /**
  199. * @copydoc left_sibling
  200. */
  201. full_frame *right_sibling;
  202. /**
  203. * Pointer to rightmost child
  204. *
  205. * [self-locked]
  206. */
  207. full_frame *rightmost_child;
  208. /**
  209. * Call stack associated with this frame.
  210. * Set and reset in make_unrunnable and make_runnable
  211. *
  212. * [self-locked]
  213. */
  214. __cilkrts_stack_frame *call_stack;
  215. /**
  216. * Accumulated reducers of children
  217. *
  218. * [self-locked]
  219. */
  220. struct cilkred_map *children_reducer_map;
  221. /**
  222. * Accumulated reducers of right siblings that have already
  223. * terminated
  224. *
  225. * [parent-locked]
  226. */
  227. struct cilkred_map *right_reducer_map;
  228. /**
  229. * Exception that needs to be pass to our parent
  230. *
  231. * [local]
  232. *
  233. * TBD: verify that the exception code satisfies this requirement.
  234. */
  235. struct pending_exception_info *pending_exception;
  236. /**
  237. * Exception from one of our children
  238. *
  239. * [self-locked]
  240. */
  241. struct pending_exception_info *child_pending_exception;
  242. /**
  243. * Exception from any right siblings
  244. *
  245. * [parent-locked]
  246. */
  247. struct pending_exception_info *right_pending_exception;
  248. /**
  249. * Stack pointer to restore on sync.
  250. * [local]
  251. */
  252. char *sync_sp;
  253. #ifdef _WIN32
  254. /**
  255. * Stack pointer to restore on exception.
  256. * [local]
  257. */
  258. char *exception_sp;
  259. /**
  260. * Exception trylevel at steal
  261. * [local]
  262. *
  263. * TBD: this field is set but not read?
  264. */
  265. unsigned long trylevel;
  266. /**
  267. * Exception registration head pointer to restore on sync.
  268. * [local]
  269. */
  270. unsigned long registration;
  271. #endif
  272. /**
  273. * Size of frame to match sync sp
  274. * [local]
  275. * TBD: obsolete field only used in debugging?
  276. */
  277. ptrdiff_t frame_size;
  278. /**
  279. * Allocated fibers that need to be freed. The fibers work
  280. * like a reducer. The leftmost frame may have @c fiber_self
  281. * null and owner non-null.
  282. *
  283. * [local]
  284. * TBD: verify exception code satisfies this requirement.
  285. */
  286. cilk_fiber *fiber_self;
  287. /**
  288. * Allocated fibers that need to be freed. The fibers work
  289. * like a reducer. The leftmost frame may have @c fiber_self
  290. * null and owner non-null.
  291. *
  292. * [self-locked]
  293. */
  294. cilk_fiber *fiber_child;
  295. /**
  296. * If the sync_master is set, this function can only be sync'd by the team
  297. * leader, who first entered Cilk. This is set by the first worker to steal
  298. * from the user worker.
  299. *
  300. * [self-locked]
  301. */
  302. __cilkrts_worker *sync_master;
  303. /**
  304. * Value to detect writes off the end of a full_frame.
  305. */
  306. # define FULL_FRAME_MAGIC_1 ((ff_magic_t)0x189986dcc7aee1caULL)
  307. /**
  308. * Field to detect writes off the end of a full_frame. Must be
  309. * FULL_FRAME_MAGIC_1.
  310. *
  311. * [constant]
  312. */
  313. ff_magic_t full_frame_magic_1;
  314. };
  315. /* The functions __cilkrts_put_stack and __cilkrts_take_stack keep track of
  316. * changes in the stack's depth between when the point at which a frame is
  317. * stolen and when it is resumed at a sync. A stolen frame typically goes
  318. * through the following phase changes:
  319. *
  320. * 1. Suspend frame while stealing it.
  321. * 2. Resume stolen frame at begining of continuation
  322. * 3. Suspend stolen frame at a sync
  323. * 4. Resume frame (no longer marked stolen) after the sync
  324. *
  325. * When the frame is suspended (steps 1 and 3), __cilkrts_put_stack is called to
  326. * establish the stack pointer for the sync. When the frame is resumed (steps
  327. * 2 and 4), __cilkrts_take_stack is called to indicate the stack pointer
  328. * (which may be on a different stack) at
  329. * the point of resume. If the stack pointer changes between steps 2 and 3,
  330. * e.g., as a result of pushing 4 bytes onto the stack,
  331. * the offset is reflected in the value of ff->sync_sp after step 3 relative to
  332. * its value after step 1 (e.g., the value of ff->sync_sp after step 3 would be
  333. * 4 less than its value after step 1, for a down-growing stack).
  334. *
  335. * Imp detail: The actual call chains for each of these phase-change events is:
  336. *
  337. * 1. unroll_call_stack -> make_unrunnable -> __cilkrts_put_stack
  338. * 2. do_work -> __cilkrts_resume -> __cilkrts_take_stack
  339. * 3. do_sync -> disown -> make_runnable -> __cilkrts_put_stack
  340. * 4. __cilkrts_resume -> __cilkrts_take_stack
  341. *
  342. * (The above is a changeable implementation detail. The resume, sequence, in
  343. * particular, is more complex on some operating systems.)
  344. */
  345. /**
  346. * @brief Records the stack pointer within the @c sf stack frame as the
  347. * current stack pointer at the point of suspending full frame @c ff.
  348. *
  349. * @pre @c ff->sync_sp must be either null or contain the result of a prior call to
  350. * @c __cilkrts_take_stack().
  351. * @pre If @c ff->sync_sp is not null, then @c SP(sf) must refer to the same stack as
  352. * the @c sp argument to the prior call to @c __cilkrts_take_stack().
  353. *
  354. * @post If @c ff->sync_sp was null before the call, then @c
  355. * ff->sync_sp will be set to @c SP(sf).
  356. * @post Otherwise, @c ff->sync_sp will be restored to the value it had just prior
  357. * to the last call to @c __cilkrts_take_stack(), except offset by any change
  358. * in the stack pointer between the call to @c __cilkrts_take_stack() and
  359. * this call to @c __cilkrts_put_stack().
  360. *
  361. * @param ff The full frame that is being suspended.
  362. * @param sf The @c __cilkrts_stack_frame that is being suspended. The stack
  363. * pointer will be taken from the jmpbuf contained within this
  364. * @c __cilkrts_stack_frame.
  365. */
  366. COMMON_PORTABLE void __cilkrts_put_stack(full_frame *ff,
  367. __cilkrts_stack_frame *sf);
  368. /**
  369. * @brief Records the stack pointer @c sp as the stack pointer at the point of
  370. * resuming execution on full frame @c ff.
  371. *
  372. * The value of @c sp may be on a different stack than the original
  373. * value recorded for the stack pointer using __cilkrts_put_stack().
  374. *
  375. * @pre @c ff->sync_sp must contain a value set by @c __cilkrts_put_stack().
  376. *
  377. * @post @c ff->sync_sp contains an *integer* value used to compute a change in the
  378. * stack pointer upon the next call to @c __cilkrts_take_stack().
  379. * @post If @c sp equals @c ff->sync_sp, then @c ff->sync_sp is set to null.
  380. *
  381. * @param ff The full frame that is being resumed.
  382. * @param sp The stack pointer for the stack the function is being resumed on.
  383. */
  384. COMMON_PORTABLE void __cilkrts_take_stack(full_frame *ff, void *sp);
  385. /*
  386. * @brief Adjust the stack for to deallocate a Variable Length Array
  387. *
  388. * @param ff The full frame that is being adjusted.
  389. * @param size The size of the array being deallocated from the stack
  390. */
  391. COMMON_PORTABLE void __cilkrts_adjust_stack(full_frame *ff, size_t size);
  392. /**
  393. * @brief Allocates and initailizes a full_frame.
  394. *
  395. * @param w The memory for the full_frame will be allocated out of the
  396. * worker's pool.
  397. * @param sf The @c __cilkrts_stack_frame which will be saved as the call_stack
  398. * for this full_frame.
  399. *
  400. * @return The newly allocated and initialized full_frame.
  401. */
  402. COMMON_PORTABLE
  403. full_frame *__cilkrts_make_full_frame(__cilkrts_worker *w,
  404. __cilkrts_stack_frame *sf);
  405. /**
  406. * @brief Deallocates a full_frame.
  407. *
  408. * @param w The memory for the full_frame will be returned to the worker's pool.
  409. * @param ff The full_frame to be deallocated.
  410. */
  411. COMMON_PORTABLE
  412. void __cilkrts_destroy_full_frame(__cilkrts_worker *w, full_frame *ff);
  413. /**
  414. * @brief Performs sanity checks to check the integrity of a full_frame.
  415. *
  416. * @param ff The full_frame to be validated.
  417. */
  418. COMMON_PORTABLE void validate_full_frame(full_frame *ff);
  419. /**
  420. * @brief Locks the mutex contained in a full_frame.
  421. *
  422. * The full_frame is validated before the runtime attempts to lock it.
  423. *
  424. * @post @c ff->lock will be owned by @c w.
  425. *
  426. * @param w The worker that will own the full_frame. If the runtime is
  427. * collecting stats, the intervals will be attributed to the worker.
  428. * @param ff The full_frame containing the mutex to be locked.
  429. */
  430. COMMON_PORTABLE void __cilkrts_frame_lock(__cilkrts_worker *w,
  431. full_frame *ff);
  432. /**
  433. * @brief Unlocks the mutex contained in a full_frame.
  434. *
  435. * @pre @c ff->lock must must be owned by @c w.
  436. *
  437. * @param w The worker that currently owns the full_frame.
  438. * @param ff The full_frame containing the mutex to be unlocked.
  439. */
  440. COMMON_PORTABLE void __cilkrts_frame_unlock(__cilkrts_worker *w,
  441. full_frame *ff);
  442. /** @} */
  443. __CILKRTS_END_EXTERN_C
  444. #endif // ! defined(INCLUDED_FULL_FRAME_DOT_H)