record-replay.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433
  1. /* record_replay.h -*-C++-*-
  2. *
  3. *************************************************************************
  4. *
  5. * @copyright
  6. * Copyright (C) 2012-2013, Intel Corporation
  7. * All rights reserved.
  8. *
  9. * @copyright
  10. * Redistribution and use in source and binary forms, with or without
  11. * modification, are permitted provided that the following conditions
  12. * are met:
  13. *
  14. * * Redistributions of source code must retain the above copyright
  15. * notice, this list of conditions and the following disclaimer.
  16. * * Redistributions in binary form must reproduce the above copyright
  17. * notice, this list of conditions and the following disclaimer in
  18. * the documentation and/or other materials provided with the
  19. * distribution.
  20. * * Neither the name of Intel Corporation nor the names of its
  21. * contributors may be used to endorse or promote products derived
  22. * from this software without specific prior written permission.
  23. *
  24. * @copyright
  25. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  26. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  27. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  28. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  29. * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  30. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  31. * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
  32. * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
  33. * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  34. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
  35. * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  36. * POSSIBILITY OF SUCH DAMAGE.
  37. *
  38. **************************************************************************/
  39. /**
  40. * @file record-replay.h
  41. *
  42. * @brief record-replay.h and .cpp encapsulate most of the functionality to
  43. * record and play back a Cilk Plus application.
  44. *
  45. * Recording is directed by the setting of the CILK_RECORD_LOG environment
  46. * variable. If it's defined, the value specifies the root we'll use to
  47. * generate files for each worker using the following format string:
  48. * "%s%d.cilklog", where the integer is the value of w->self.
  49. *
  50. * Replay is directed by the setting of the CILK_REPLAY_LOG environment
  51. * variable, interpreted the same way as CILK_RECORD_LOG. If both
  52. * CILK_RECORD_LOG and CILK_REPLAY_LOG are defined, a warning will be given
  53. * and the attempt to record a log will be ignored.
  54. *
  55. * Recording is relatively straightforward. We write all information about a
  56. * worker to a per-worker file.
  57. *
  58. * Each pedigree record consists of the following fields. All fields must be
  59. * present in every record to make parsing easy.
  60. * - Type - A string identifying the pedigree record. See the PED_TYPE_STR_
  61. * macros for the currently defined values.
  62. * - Pedigree - A string of pedigree values, with underscores between
  63. * adjacent values.
  64. * - i1 - Record type-specific value. -1 if not used.
  65. * - i2 - Record type-specific value. -1 if not used.
  66. *
  67. * WORKERS record - only written to the file for worker 0. Note that this is
  68. * the first worker in the workers array. Worker 0 is the first system worker,
  69. * *NOT* a user worker.
  70. * - Type: "Workers"
  71. * - Pedigree: Always "0" - ignored
  72. * - i1: Number of workers (g->P) when we recorded the log. A mismatch when
  73. * we attempt to replay the log will result in aborting the execution.
  74. * - i2: Log version number - Specified by PED_VERSION in record-replay.cpp
  75. *
  76. * STEAL record - written after a successful steal.
  77. * - Type: "Steal"
  78. * - Pedigree: Pedigree of stolen frame
  79. * - i1: Worker the frame was stolen from
  80. * - i2: -1
  81. *
  82. * SYNC record - written after a worker continues from a sync.
  83. * - Type: "Sync"
  84. * - Pedigree: Pedigree of sync. Note that this is the pedigree *before*
  85. * the pedigree in incremented in setup_for_execution_pedigree().
  86. * - i1: -1
  87. * - i2: -1
  88. *
  89. * ORPHANED record - saved on a return to a stolen parent.
  90. * - Type: "Orphaned"
  91. * - Pedigree: Pedigree of the parent frame *before* the pedigree is
  92. * incremented by the return
  93. * - i1: -1
  94. * - i2: -1
  95. *
  96. * On replay, the data is loaded into a per-worker array, and the data is
  97. * consumed in order as needed.
  98. */
  99. #ifndef INCLUDED_RECORD_REPLAY_DOT_H
  100. #define INCLUDED_RECORD_REPLAY_DOT_H
  101. #include "cilk/common.h"
  102. #include "global_state.h"
  103. /**
  104. * Define CILK_RECORD_REPLAY to enable record/replay functionality. If
  105. * CILK_RECORD_REPLAY is not defined, all of the record/replay functions in
  106. * record-replay.h will be stubbed out. Since they're declared as inline,
  107. * functions, the resulting build should have no performance impact due to
  108. * the implementation or record/replay.
  109. */
  110. #define CILK_RECORD_REPLAY 1
  111. /**
  112. * Define RECORD_ON_REPLAY=1 to write logs when we're replaying a log. This
  113. * should only be needed when debugging the replay functionality. This should
  114. * always be defined as 0 when record-replay.h is checked in.
  115. */
  116. #define RECORD_ON_REPLAY 0
  117. __CILKRTS_BEGIN_EXTERN_C
  118. #ifdef CILK_RECORD_REPLAY
  119. // Declarations of internal record/replay functions. The inlined versions
  120. // further down do some preliminary testing (like if we're not recording or
  121. // replaying) and will stub out the functionality if we've compiled out the
  122. // record/replay feature
  123. int replay_match_sync_pedigree_internal(__cilkrts_worker *w);
  124. void replay_wait_for_steal_if_parent_was_stolen_internal(__cilkrts_worker *w);
  125. void replay_record_steal_internal(__cilkrts_worker *w, int32_t victim_id);
  126. void replay_record_sync_internal(__cilkrts_worker *w);
  127. void replay_record_orphaned_internal(__cilkrts_worker *w);
  128. int replay_match_victim_pedigree_internal(__cilkrts_worker *w, __cilkrts_worker *victim);
  129. void replay_advance_from_sync_internal (__cilkrts_worker *w);
  130. int replay_get_next_recorded_victim_internal(__cilkrts_worker *w);
  131. #endif // CILK_RECORD_REPLAY
  132. // Publically defined record/replay API
  133. /**
  134. * If we're replaying a log, wait for our parent to be stolen if it was when
  135. * the log was recorded. If record/replay is compiled out, this is a noop.
  136. *
  137. * @param w The __cilkrts_worker we're executing on. The worker's replay
  138. * list will be checked for a ORPHANED record with a matching pedigree. If
  139. * there is a match, the ORPHANED record will be consumed.
  140. */
  141. #ifdef CILK_RECORD_REPLAY
  142. __CILKRTS_INLINE
  143. void replay_wait_for_steal_if_parent_was_stolen(__cilkrts_worker *w)
  144. {
  145. // Only check if we're replaying a log
  146. if (REPLAY_LOG == w->g->record_or_replay)
  147. replay_wait_for_steal_if_parent_was_stolen_internal(w);
  148. }
  149. #else
  150. __CILKRTS_INLINE
  151. void replay_wait_for_steal_if_parent_was_stolen(__cilkrts_worker *w)
  152. {
  153. // If record/replay is disabled, we never wait
  154. }
  155. #endif // CILK_RECORD_REPLAY
  156. /**
  157. * Called from random_steal() to override the ID of the randomly chosen victim
  158. * worker which this worker will attempt to steal from. Returns the worker id
  159. * of the next victim this worker was recorded stealing from, or -1 if the
  160. * next record in the log is not a STEAL.
  161. *
  162. * @note This call does NOT attempt to match the pedigree. That will be done
  163. * by replay_match_victim_pedigree() after random_steal() has locked the victim
  164. * worker.
  165. *
  166. * @param w The __cilkrts_worker we're executing on. The worker's replay log
  167. * is checked for a STEAL record. If we've got one, the stolen worker ID is
  168. * returned.
  169. * @param id The randomly chosen victim worker ID. If we're not replaying a
  170. * log, or if record/replay has been compiled out, this is the value that
  171. * will be returned.
  172. *
  173. * @return id if we're not replaying a log
  174. * @return -1 if the next record is not a STEAL
  175. * @return recorded stolen worker ID if we've got a matching STEAL record
  176. */
  177. #ifdef CILK_RECORD_REPLAY
  178. __CILKRTS_INLINE
  179. int replay_get_next_recorded_victim(__cilkrts_worker *w, int id)
  180. {
  181. // Only check if we're replaying a log
  182. if (REPLAY_LOG == w->g->record_or_replay)
  183. return replay_get_next_recorded_victim_internal(w);
  184. else
  185. return id;
  186. }
  187. #else
  188. __CILKRTS_INLINE
  189. int replay_get_next_recorded_victim(__cilkrts_worker *w, int id)
  190. {
  191. // Record/replay is disabled. Always return the original worker id
  192. return id;
  193. }
  194. #endif // CILK_RECORD_REPLAY
  195. /**
  196. * Initialize per-worker data for record/replay. A noop if record/replay
  197. * is disabled, or if we're not recording or replaying anything.
  198. *
  199. * If we're recording a log, this will ready us to create the per-worker
  200. * logs.
  201. *
  202. * If we're replaying a log, this will read the logs into the per-worker
  203. * structures.
  204. *
  205. * @param g Cilk runtime global state
  206. */
  207. void replay_init_workers(global_state_t *g);
  208. /**
  209. * Record a record on a successful steal. A noop if record/replay is
  210. * diabled, or if we're not recording anything
  211. *
  212. * @param w The __cilkrts_worker we're executing on. The pedigree of
  213. * the stolen frame will be walked to generate the STEAL record.
  214. *
  215. * @param victim_id The worker ID of the worker w stole from.
  216. */
  217. #ifdef CILK_RECORD_REPLAY
  218. __CILKRTS_INLINE
  219. void replay_record_steal(__cilkrts_worker *w, int32_t victim_id)
  220. {
  221. #if RECORD_ON_REPLAY
  222. // If we're recording on replay, write the record if we're recording or
  223. // replaying
  224. if (RECORD_REPLAY_NONE == w->g->record_or_replay)
  225. return;
  226. #else
  227. // Only write the record if we're recording
  228. if (RECORD_LOG != w->g->record_or_replay)
  229. return;
  230. #endif
  231. replay_record_steal_internal(w, victim_id);
  232. }
  233. #else
  234. __CILKRTS_INLINE
  235. void replay_record_steal(__cilkrts_worker *w, int32_t victim_id)
  236. {
  237. }
  238. #endif // CILK_RECORD_REPLAY
  239. /**
  240. * Record a record when continuing after a sync. A noop if record/replay is
  241. * diabled, or if we're not recording anything, or if the sync was abandoned,
  242. * meaning this isn't the worker that continues from the sync.
  243. *
  244. * @param w The __cilkrts_worker for we're executing on. The pedigree of
  245. * the sync-ing frame will be walked to generate the SYNC record.
  246. *
  247. * @param continuing True if this worker will be continuing from the
  248. * cilk_sync. A SYNC record will only be generated if continuing is true.
  249. */
  250. #ifdef CILK_RECORD_REPLAY
  251. __CILKRTS_INLINE
  252. void replay_record_sync(__cilkrts_worker *w, int continuing)
  253. {
  254. // If this was not the last worker to the syn, return
  255. if (! continuing)
  256. return;
  257. #if RECORD_ON_REPLAY
  258. // If we're recording on replay, write the record if we're recording or
  259. // replaying
  260. if (RECORD_REPLAY_NONE == w->g->record_or_replay)
  261. return;
  262. #else
  263. // Only write the record if we're recording
  264. if (RECORD_LOG != w->g->record_or_replay)
  265. return;
  266. #endif
  267. replay_record_sync_internal(w);
  268. }
  269. #else
  270. __CILKRTS_INLINE
  271. void replay_record_sync(__cilkrts_worker *w, int abandoned)
  272. {
  273. }
  274. #endif // CILK_RECORD_REPLAY
  275. /**
  276. * Record a record on a return to a stolen parent. A noop if record/replay is
  277. * diabled, or if we're not recording anything.
  278. *
  279. * @param w The __cilkrts_worker for we're executing on. The pedigree of the
  280. * frame that has discovered that its parent has been stolken will be walked
  281. * to generate the ORPHANED record.
  282. */
  283. #ifdef CILK_RECORD_REPLAY
  284. __CILKRTS_INLINE
  285. void replay_record_orphaned(__cilkrts_worker *w)
  286. {
  287. #if RECORD_ON_REPLAY
  288. // If we're recording on replay, write the record if we're recording or
  289. // replaying
  290. if (RECORD_REPLAY_NONE == w->g->record_or_replay)
  291. return;
  292. #else
  293. // Only write the record if we're recording
  294. if (RECORD_LOG != w->g->record_or_replay)
  295. return;
  296. #endif
  297. replay_record_orphaned_internal(w);
  298. }
  299. #else
  300. __CILKRTS_INLINE
  301. void replay_record_orphaned(__cilkrts_worker *w)
  302. {
  303. }
  304. #endif // CILK_RECORD_REPLAY
  305. /**
  306. * Test whether the frame at the head of the victim matches the pedigree of
  307. * the frame that was recorded being stolen. Called in random steal to verify
  308. * that we're about to steal the correct frame.
  309. *
  310. * @param w The __cilkrts_worker for we're executing on. The current worker
  311. * is needed to find the replay entry to be checked.
  312. *
  313. * @param victim The __cilkrts_worker for we're proposing to steal a frame
  314. * from. The victim's head entry is
  315. * is needed to find the replay entry to be checked.
  316. *
  317. * @return 0 if we're replaying a log and the victim's pedigree does NOT match
  318. * the next frame the worker is expected to steal.
  319. *
  320. * @return 1 in all other cases to indicate that the steal attempt should
  321. * continue
  322. */
  323. #ifdef CILK_RECORD_REPLAY
  324. __CILKRTS_INLINE
  325. int replay_match_victim_pedigree(__cilkrts_worker *w, __cilkrts_worker *victim)
  326. {
  327. // We're not replaying a log. The victim is always acceptable
  328. if (REPLAY_LOG != w->g->record_or_replay)
  329. return 1;
  330. // Return 1 if the victim's pedigree matches the frame the worker stole
  331. // when we recorded the log
  332. return replay_match_victim_pedigree_internal(w, victim);
  333. }
  334. #else
  335. __CILKRTS_INLINE
  336. int replay_match_victim_pedigree(__cilkrts_worker *w, __cilkrts_worker *victim)
  337. {
  338. // Record/replay is disabled. The victim is always acceptable
  339. return 1;
  340. }
  341. #endif // CILK_RECORD_REPLAY
  342. /**
  343. * Test whether the current replay entry is a sync record matching the
  344. * worker's pedigree.
  345. *
  346. * @param w The __cilkrts_worker for we're executing on.
  347. *
  348. * @return 1 if the current replay entry matches the current pedigree.
  349. * @return 0 if there's no match, or if we're not replaying a log.
  350. */
  351. #ifdef CILK_RECORD_REPLAY
  352. __CILKRTS_INLINE
  353. int replay_match_sync_pedigree(__cilkrts_worker *w)
  354. {
  355. // If we're not replaying, assume no match
  356. if (REPLAY_LOG != w->g->record_or_replay)
  357. return 0;
  358. return replay_match_sync_pedigree_internal(w);
  359. }
  360. #else
  361. __CILKRTS_INLINE
  362. int replay_match_sync_pedigree(__cilkrts_worker *w)
  363. {
  364. // Record/replay is disabled. Assume no match
  365. return 0;
  366. }
  367. #endif
  368. /**
  369. * Marks a sync record seen, advancing to the next record in the replay list.
  370. *
  371. * This function will only advance to the next record if:
  372. * - Record/replay hasn't been compiled out AND
  373. * - We're replaying a log AND
  374. * - A match was found AND
  375. * - The sync is not being abandoned
  376. *
  377. * @param w The __cilkrts_worker for we're executing on.
  378. * @param match_found The value returned by replay_match_sync_pedigree(). If
  379. * match_found is false, nothing is done.
  380. * @param continuing Flag indicating whether this worker will continue from
  381. * the sync (it's the last worker to the sync) or if it will abandon the work
  382. * and go to the scheduling loop to look for more work it can steal.
  383. */
  384. #ifdef CILK_RECORD_REPLAY
  385. __CILKRTS_INLINE
  386. void replay_advance_from_sync(__cilkrts_worker *w, int match_found, int continuing)
  387. {
  388. // If we're replaying a log, and the current sync wasn't abandoned, and we
  389. // found a match in the log, mark the sync record seen.
  390. if ((REPLAY_LOG == w->g->record_or_replay) && match_found && continuing)
  391. replay_advance_from_sync_internal(w);
  392. }
  393. #else
  394. __CILKRTS_INLINE
  395. void replay_advance_from_sync(__cilkrts_worker *w, int match_found, int continuing)
  396. {
  397. }
  398. #endif
  399. /**
  400. * Release any resources used to read or write a replay log.
  401. *
  402. * @param g Cilk runtime global state
  403. */
  404. void replay_term(global_state_t *g);
  405. __CILKRTS_END_EXTERN_C
  406. #endif // ! defined(INCLUDED_RECORD_REPLAY_DOT_H)