global_state.h 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418
  1. /* global_state.h -*-C++-*-
  2. *
  3. *************************************************************************
  4. *
  5. * @copyright
  6. * Copyright (C) 2009-2013, Intel Corporation
  7. * All rights reserved.
  8. *
  9. * @copyright
  10. * Redistribution and use in source and binary forms, with or without
  11. * modification, are permitted provided that the following conditions
  12. * are met:
  13. *
  14. * * Redistributions of source code must retain the above copyright
  15. * notice, this list of conditions and the following disclaimer.
  16. * * Redistributions in binary form must reproduce the above copyright
  17. * notice, this list of conditions and the following disclaimer in
  18. * the documentation and/or other materials provided with the
  19. * distribution.
  20. * * Neither the name of Intel Corporation nor the names of its
  21. * contributors may be used to endorse or promote products derived
  22. * from this software without specific prior written permission.
  23. *
  24. * @copyright
  25. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  26. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  27. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  28. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  29. * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  30. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  31. * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
  32. * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
  33. * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  34. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
  35. * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  36. * POSSIBILITY OF SUCH DAMAGE.
  37. **************************************************************************/
  38. /**
  39. * @file global_state.h
  40. *
  41. * @brief The global_state_t structure contains most of the global context
  42. * maintained by the Intel Cilk runtime.
  43. */
  44. #ifndef INCLUDED_GLOBAL_STATE_DOT_H
  45. #define INCLUDED_GLOBAL_STATE_DOT_H
  46. #include <cilk/common.h>
  47. #include "frame_malloc.h"
  48. #include "stats.h"
  49. #include "bug.h"
  50. #include "cilk_fiber.h"
  51. __CILKRTS_BEGIN_EXTERN_C
  52. /**
  53. * Non-null place-holder for a stack handle that has no meaningful value.
  54. */
  55. #define PLACEHOLDER_FIBER ((cilk_fiber *) -2)
  56. /**
  57. * States for record_or_replay
  58. */
  59. enum record_replay_t {
  60. RECORD_REPLAY_NONE,
  61. RECORD_LOG,
  62. REPLAY_LOG
  63. };
  64. /**
  65. * @brief The global state is a structure that is shared by all workers in
  66. * Cilk.
  67. *
  68. * Make the structure ready for use by calling
  69. * cilkg_init_global_state() and then cilkg_publish_global_state().
  70. *
  71. * The same global lock should be held while both of these methods are
  72. * called. These methods are split because it is useful to execute
  73. * other runtime initialization code in between.
  74. *
  75. * After cilkg_publish_global_state() has completed, Cilk runtime
  76. * methods may call cilkg_get_global_state() to look at the published
  77. * value without holding the global lock.
  78. *
  79. * Finally, clean up the global state by calling
  80. * cilkg_deinit_global_state(). This method should be called only
  81. * after all calls to cilkg_get_global_state() have completed, and
  82. * while holding the global lock.
  83. *
  84. * Before initialization and after deinitialization, the fields in the
  85. * global state have unspecified values, except for a few special
  86. * fields labeled "USER SETTING", which can be read and written before
  87. * initialization and after deinitialization.
  88. */
  89. struct global_state_t { /* COMMON_PORTABLE */
  90. /* Fields described as "(fixed)" should not be changed after
  91. * initialization.
  92. */
  93. /*************************************************************************
  94. * Note that debugger integration must reach into the
  95. * global state! The debugger integration is depending on the
  96. * offsets of the addr_size, system_workers, total_workers,
  97. * stealing_disabled, sysdep, and workers. If these offsets change, the
  98. * debugger integration library will need to be changed to match!!!
  99. *************************************************************************/
  100. int addr_size; ///< Number of bytes for an address, used by debugger (fixed)
  101. int system_workers; ///< Number of system workers (fixed)
  102. /**
  103. * @brief USER SETTING: Maximum number of user workers that can be
  104. * bound to cilk workers.
  105. *
  106. * 0 unless set by user. Call cilkg_calc_max_user_workers to get
  107. * the value.
  108. */
  109. int max_user_workers;
  110. int total_workers; ///< Total number of worker threads allocated (fixed)
  111. int workers_running; ///< True when system workers have beens started */
  112. /// Set by debugger to disable stealing (fixed)
  113. int stealing_disabled;
  114. /// System-dependent part of the global state
  115. struct global_sysdep_state *sysdep;
  116. /// Array of worker structures.
  117. __cilkrts_worker **workers;
  118. /******* END OF DEBUGGER-INTEGRATION FIELDS ***************/
  119. /// Number of frames in each worker's lazy task queue
  120. __STDNS size_t ltqsize;
  121. /**
  122. * @brief USER SETTING: Force all possible reductions.
  123. *
  124. * TRUE if running a p-tool that requires reducers to call the reduce()
  125. * method even if no actual stealing occurs.
  126. *
  127. * When set to TRUE, runtime will simulate steals, forcing calls to the
  128. * the reduce() methods of reducers.
  129. *
  130. */
  131. int force_reduce;
  132. /// USER SETTING: Per-worker fiber pool size
  133. int fiber_pool_size;
  134. /// USER SETTING: Global fiber pool size
  135. int global_fiber_pool_size;
  136. /**
  137. * @brief TRUE when workers should exit scheduling loop so we can
  138. * shut down the runtime and free the global state.
  139. *
  140. * @note @c work_done will be checked *FREQUENTLY* in the scheduling loop
  141. * by idle workers. We need to ensure that it's not in a cache line which
  142. * may be invalidated by other cores. The surrounding fields are either
  143. * constant after initialization or not used until shutdown (stats) so we
  144. * should be OK.
  145. */
  146. volatile int work_done;
  147. int under_ptool; ///< True when running under a serial PIN tool
  148. statistics stats; ///< Statistics on use of runtime
  149. /**
  150. * @brief USER SETTING: Maximum number of stacks the runtime will
  151. * allocate (apart from those created by the OS when worker
  152. * threads are created).
  153. *
  154. * If max_stacks == 0,there is no pre-defined maximum.
  155. */
  156. unsigned max_stacks;
  157. /// Size of each stack
  158. size_t stack_size;
  159. /// Global cache for per-worker memory
  160. struct __cilkrts_frame_cache frame_malloc;
  161. /// Global fiber pool
  162. cilk_fiber_pool fiber_pool;
  163. /**
  164. * @brief Track whether the runtime has failed to allocate a
  165. * stack.
  166. *
  167. * Setting this flag prevents multiple warnings from being
  168. * issued.
  169. */
  170. int failure_to_allocate_stack;
  171. /**
  172. * @brief USER SETTING: indicate record or replay log.
  173. * Set to NULL if not used in this run.
  174. */
  175. char *record_replay_file_name;
  176. /**
  177. * @brief Record/replay state.
  178. * Valid states are:
  179. * RECORD_REPLAY_NONE - Not recording or replaying a log
  180. * RECORD_LOG - Recording a log for replay later
  181. * REPLAY_LOG - Replay a log recorded earlier
  182. */
  183. enum record_replay_t record_or_replay;
  184. /**
  185. * @brief Buffer to force max_steal_failures to appear on a
  186. * different cache line from the previous member variables.
  187. *
  188. * This padding is needed because max_steal_failures is read
  189. * constantly and other modified values in the global state will
  190. * cause thrashing.
  191. */
  192. char cache_buf[64];
  193. /**
  194. * @brief Maximum number of times a thread should fail to steal
  195. * before checking if Cilk is shutting down.
  196. */
  197. unsigned int max_steal_failures;
  198. /// Pointer to scheduler entry point
  199. void (*scheduler)(__cilkrts_worker *w);
  200. /**
  201. * @brief Buffer to force P and Q to appear on a different cache
  202. * line from the previous member variables.
  203. */
  204. char cache_buf_2[64];
  205. int P; ///< USER SETTING: number of system workers + 1 (fixed)
  206. int Q; ///< Number of user threads currently bound to workers
  207. };
  208. /**
  209. * @brief Initialize the global state object. This method must both
  210. * complete before referencing any fields in the global state, except
  211. * those specified as "user-settable values".
  212. */
  213. global_state_t* cilkg_init_global_state();
  214. /**
  215. * @brief Publish the global state object, so that
  216. * cilkg_is_published can return true.
  217. *
  218. * @param g - the global state created by cilkg_init_global_state() to
  219. * publish.
  220. *
  221. * After the global state object has been published, a thread should
  222. * not modify this state unless it has exclusive access (i.e., holds
  223. * the global lock).
  224. */
  225. void cilkg_publish_global_state(global_state_t* g);
  226. /**
  227. * @brief Return true if the global state has been fully initialized
  228. * and published, and has not been deinitialized.
  229. */
  230. int cilkg_is_published(void);
  231. /**
  232. * @brief De-initializes the global state object. Must be called to free
  233. * resources when the global state is no longer needed.
  234. */
  235. void cilkg_deinit_global_state(void);
  236. /**
  237. * @brief Returns the global state object. Result is valid only if the
  238. * global state has been published (see cilkg_publish_global_state()).
  239. */
  240. static inline
  241. global_state_t* cilkg_get_global_state(void)
  242. {
  243. // "private" extern declaration:
  244. extern global_state_t *cilkg_singleton_ptr;
  245. __CILKRTS_ASSERT(cilkg_singleton_ptr); // Debug only
  246. return cilkg_singleton_ptr;
  247. }
  248. /**
  249. * @brief Implementation of __cilkrts_set_params.
  250. *
  251. * Set user controllable parameters
  252. * @param param - string specifying parameter to be set
  253. * @param value - string specifying new value
  254. * @returns One of: CILKG_SET_PARAM_SUCCESS ( = 0),
  255. * CILKG_SET_PARAM_UNIMP, CILKG_SET_PARAM_XRANGE,
  256. * CILKG_SET_PARAM_INVALID, or CILKG_SET_PARAM_LATE.
  257. *
  258. * @attention The wide character version __cilkrts_set_param_w() is available
  259. * only on Windows.
  260. *
  261. * Allowable parameter names:
  262. *
  263. * - "nworkers" - number of processors that should run Cilk code.
  264. * The value is a string of digits to be parsed by strtol.
  265. *
  266. * - "force reduce" - test reducer callbacks by allocating new views
  267. * for every spawn within which a reducer is accessed. This can
  268. * significantly reduce performance. The value is "1" or "true"
  269. * to enable, "0" or "false" to disable.
  270. * @warning Enabling "force reduce" when running with more than a single
  271. * worker is currently broken.
  272. *
  273. * - "max user workers" - (Not publicly documented) Sets the number of slots
  274. * allocated for user worker threads
  275. *
  276. * - "local stacks" - (Not publicly documented) Number of stacks we'll hold in
  277. * the per-worker stack cache. Range 1 .. 42. See
  278. * cilkg_init_global_state for details.
  279. *
  280. * - "shared stacks" - (Not publicly documented) Maximum number of stacks
  281. * we'll hold in the global stack cache. Maximum value is 42. See
  282. * __cilkrts_make_global_state for details
  283. *
  284. * - "nstacks" - (Not publicly documented at this time, though it may be
  285. * exposed in the future) Sets the maximum number of stacks permitted at one
  286. * time. If the runtime reaches this maximum, it will cease to allocate
  287. * stacks and the app will lose parallelism. 0 means unlimited. Default is
  288. * unlimited. Minimum is twice the number of worker threads, though that
  289. * cannot be tested at this time.
  290. */
  291. int cilkg_set_param(const char* param, const char* value);
  292. #ifdef _WIN32
  293. /**
  294. * @brief Implementation of __cilkrts_set_params for Unicode characters on
  295. * Windows. See the documentation on @ref cilkg_set_param for more details.
  296. *
  297. * Set user controllable parameters
  298. * @param param - string specifying parameter to be set
  299. * @param value - string specifying new value
  300. * @returns One of: CILKG_SET_PARAM_SUCCESS ( = 0),
  301. * CILKG_SET_PARAM_UNIMP, CILKG_SET_PARAM_XRANGE,
  302. * CILKG_SET_PARAM_INVALID, or CILKG_SET_PARAM_LATE.
  303. */
  304. int cilkg_set_param_w(const wchar_t* param, const wchar_t* value);
  305. #endif
  306. /**
  307. * @brief implementation of __cilkrts_get_nworkers()
  308. */
  309. static inline
  310. int cilkg_get_nworkers(void)
  311. {
  312. // "private" extern declaration
  313. extern global_state_t* cilkg_get_user_settable_values(void);
  314. return cilkg_get_user_settable_values()->P;
  315. }
  316. /**
  317. * @brief implementation of __cilkrts_get_total_workers()
  318. */
  319. static inline
  320. int cilkg_get_total_workers(void)
  321. {
  322. // "private" extern declaration
  323. extern int cilkg_calc_total_workers(void);
  324. // This number can fluctate until initialization so we
  325. // compute it from scratch
  326. return cilkg_calc_total_workers();
  327. }
  328. /**
  329. * @brief implementation of __cilkrts_get_force_reduce()
  330. */
  331. static inline
  332. int cilkg_get_force_reduce(void)
  333. {
  334. // "private" extern declaration
  335. extern global_state_t* cilkg_get_user_settable_values(void);
  336. return cilkg_get_user_settable_values()->force_reduce;
  337. }
  338. /**
  339. * @brief implementation of __cilkrts_get_stack_size()
  340. */
  341. static inline
  342. size_t cilkg_get_stack_size(void)
  343. {
  344. // "private" extern declaration
  345. extern global_state_t* cilkg_get_user_settable_values(void);
  346. return cilkg_get_user_settable_values()->stack_size;
  347. }
  348. /**
  349. * @brief Run the scheduler function stored in the global_state
  350. *
  351. * Look up the scheduler function in global_state and run it. Report a fatal
  352. * error if an exception escapes the scheduler function.
  353. *
  354. * @param w - Worker structure to associate with the current thread.
  355. *
  356. * @attention The scheduler field of the global state must be set before this
  357. * function is called.
  358. */
  359. void __cilkrts_run_scheduler_with_exceptions(__cilkrts_worker *w);
  360. __CILKRTS_END_EXTERN_C
  361. #endif // ! defined(INCLUDED_GLOBAL_STATE_DOT_H)