global_state.cpp 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629
  1. /* global_state.cpp -*-C++-*-
  2. *
  3. *************************************************************************
  4. *
  5. * @copyright
  6. * Copyright (C) 2009-2013, Intel Corporation
  7. * All rights reserved.
  8. *
  9. * @copyright
  10. * Redistribution and use in source and binary forms, with or without
  11. * modification, are permitted provided that the following conditions
  12. * are met:
  13. *
  14. * * Redistributions of source code must retain the above copyright
  15. * notice, this list of conditions and the following disclaimer.
  16. * * Redistributions in binary form must reproduce the above copyright
  17. * notice, this list of conditions and the following disclaimer in
  18. * the documentation and/or other materials provided with the
  19. * distribution.
  20. * * Neither the name of Intel Corporation nor the names of its
  21. * contributors may be used to endorse or promote products derived
  22. * from this software without specific prior written permission.
  23. *
  24. * @copyright
  25. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  26. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  27. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  28. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  29. * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  30. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  31. * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
  32. * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
  33. * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  34. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
  35. * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  36. * POSSIBILITY OF SUCH DAMAGE.
  37. **************************************************************************/
  38. #include "global_state.h"
  39. #include "os.h"
  40. #include "bug.h"
  41. #include "metacall_impl.h"
  42. #include "stats.h"
  43. #include "cilk/cilk_api.h"
  44. #include "cilk_malloc.h"
  45. #include "record-replay.h"
  46. #include <algorithm> // For max()
  47. #include <cstring>
  48. #include <cstdlib>
  49. #include <climits>
  50. #include <cerrno>
  51. #ifdef _WIN32
  52. # include <wchar.h>
  53. #endif
  54. // TBD: There is a race when multiple threads try to initialize the
  55. // user_settable_values??
  56. //
  57. // Set to true if the user settable values portion of the global state
  58. // singleton is initialized, even if the rest of the singleton is not
  59. // initialized.
  60. int cilkg_user_settable_values_initialized = false;
  61. namespace {
  62. // Single copy of the global state. Zero-filled until
  63. // cilkg_get_user_settable_values() is called and partially-zero-filled until
  64. // cilkg_init_global_state() is called. The first field is filled in with
  65. // the size of a void* for the debugger and must be valid before initialization
  66. global_state_t global_state_singleton =
  67. {
  68. sizeof(void *), // addr_size
  69. };
  70. // Variables that need to export C-style names
  71. extern "C"
  72. {
  73. // Pointer to the global state singleton.
  74. global_state_t *cilkg_singleton_ptr = NULL;
  75. // __cilkrts_global_state is exported and referenced by the debugger.
  76. // The debugger expects it to be valid when the module loads.
  77. // CILK_EXPORT_DATA
  78. global_state_t *__cilkrts_global_state = &global_state_singleton;
  79. }
  80. // Returns true if 'a' and 'b' are equal null-terminated strings
  81. inline bool strmatch(const char* a, const char* b)
  82. {
  83. return 0 == std::strcmp(a, b);
  84. }
  85. // Returns the integer value represented by the null-terminated string at 's'.
  86. inline long to_long(const char* s)
  87. {
  88. char *end;
  89. errno = 0;
  90. return std::strtol(s, &end, 0);
  91. }
  92. #ifdef _WIN32
  93. // Returns true if 'a' and 'b' are equal null-terminated wide-char strings
  94. inline bool strmatch(const wchar_t* a, const wchar_t* b)
  95. {
  96. return 0 == wcscmp(a, b);
  97. }
  98. // Returns true if the multi-byte character string at 'a' represents the same
  99. // character sequence as the wide-character string at 'b'. The behavior is
  100. // undefined if 'a' contains more than 30 multi-byte characters.
  101. bool strmatch(const char* a, const wchar_t* b)
  102. {
  103. // Convert 'a' to wide-characters, then compare.
  104. wchar_t wa[31];
  105. std::size_t count;
  106. errno_t err = mbstowcs_s(&count, wa, a, 30);
  107. CILK_ASSERT(0 == err);
  108. if (err) return false;
  109. return strmatch(wa, b);
  110. }
  111. // Returns true if the wide-character string at 'a' represents the same
  112. // character sequence as the multi-byte character string at 'b'. The behavior
  113. // id undefined if 'b' contains more than 30 multi-byte characters.
  114. inline
  115. bool strmatch(const wchar_t* a, const char* b)
  116. {
  117. return strmatch(b, a);
  118. }
  119. // Returns the integer value represented by the null-terminated wide-char
  120. // string at 's'.
  121. inline long to_long(const wchar_t* s)
  122. {
  123. wchar_t *end;
  124. errno = 0;
  125. return wcstol(s, &end, 0);
  126. }
  127. #endif
  128. // Check if Cilkscreen or other sequential ptool wants to force reducers.
  129. bool always_force_reduce()
  130. {
  131. // Metacall *looks* like a no-op. volatile needed to keep compiler from
  132. // optimizing away variable.
  133. volatile char not_force_reduce = '\377';
  134. __cilkrts_metacall(METACALL_TOOL_SYSTEM, HYPER_ZERO_IF_FORCE_REDUCE,
  135. const_cast<char*>(&not_force_reduce));
  136. return ! not_force_reduce;
  137. }
  138. // Stores the boolean value represented by the null-terminated string at 'val'
  139. // into the integer object at 'out'. Returns '__CILKRTS_SET_PARAM_SUCCESS' if
  140. // 'val' is "true", "false", "0" or "1" and '__CILKRTS_SET_PARAM_INVALID'
  141. // otherwise.
  142. template <typename INT_T, typename CHAR_T>
  143. int store_bool(INT_T *out, const CHAR_T *val)
  144. {
  145. static const char* const s_zero = "0";
  146. static const char* const s_one = "1";
  147. static const char* const s_true = "true";
  148. static const char* const s_false = "false";
  149. if (val == 0)
  150. return __CILKRTS_SET_PARAM_INVALID;
  151. if (strmatch(s_false, val) || strmatch(s_zero, val)) {
  152. *out = 0;
  153. return __CILKRTS_SET_PARAM_SUCCESS;
  154. }
  155. if (strmatch(s_true, val) || strmatch(s_one, val)) {
  156. *out = 1;
  157. return __CILKRTS_SET_PARAM_SUCCESS;
  158. }
  159. return __CILKRTS_SET_PARAM_INVALID;
  160. }
  161. // Stores the integer value represented by the null-terminated string at 'val'
  162. // into the integer object at 'out', restricting the result to the range 'min'
  163. // to 'max', inclusive. Returns '__CILKRTS_SET_PARAM_SUCCESS' if the conversion
  164. // succeeds and is in range, '__CILKRTS_SET_PARAM_XRANGE' if the conversion
  165. // succeeds but is out of range, and '__CILKRTS_SET_PARAM_INVALID' otherwise. In
  166. // the case of any error, '*out' is unchanged.
  167. template <typename INT_T, typename CHAR_T>
  168. int store_int(INT_T *out, const CHAR_T *val, INT_T min, INT_T max)
  169. {
  170. errno = 0;
  171. long val_as_long = to_long(val);
  172. if (val_as_long == 0 && errno != 0)
  173. return __CILKRTS_SET_PARAM_INVALID;
  174. if (val_as_long < min || val_as_long == LONG_MIN)
  175. return __CILKRTS_SET_PARAM_XRANGE;
  176. else if (val_as_long > max || val_as_long == LONG_MAX)
  177. return __CILKRTS_SET_PARAM_XRANGE;
  178. *out = val_as_long;
  179. return __CILKRTS_SET_PARAM_SUCCESS;
  180. }
  181. // Implementaton of cilkg_set_param templatized on character type.
  182. // Windows will instantiate with both char and wchar_t.
  183. // Note that g must have its user settable values set, but need not be fully
  184. // initialized.
  185. template <class CHAR_T>
  186. int set_param_imp(global_state_t* g, const CHAR_T* param, const CHAR_T* value)
  187. {
  188. static const char* const s_force_reduce = "force reduce";
  189. static const char* const s_nworkers = "nworkers";
  190. static const char* const s_max_user_workers = "max user workers";
  191. static const char* const s_local_stacks = "local stacks";
  192. static const char* const s_shared_stacks = "shared stacks";
  193. static const char* const s_nstacks = "nstacks";
  194. static const char* const s_stack_size = "stack size";
  195. // We must have a parameter and a value
  196. if (0 == param)
  197. return __CILKRTS_SET_PARAM_INVALID;
  198. if (0 == value)
  199. return __CILKRTS_SET_PARAM_INVALID;
  200. if (strmatch(param, s_force_reduce))
  201. {
  202. // Sets whether we force a reduce operation at every sync. Useful for
  203. // debugging reducers. Off by default. Overridden by Cilkscreen
  204. //
  205. // Documented in cilk_api_<os>.h
  206. if (always_force_reduce())
  207. // Force reduce is set by cilkscreen. User cannot change it.
  208. return __CILKRTS_SET_PARAM_LATE;
  209. return store_bool(&g->force_reduce, value);
  210. }
  211. else if (strmatch(param, s_nworkers))
  212. {
  213. // Set the total number of workers. Overrides count of cores we get
  214. // from the OS and the setting of the CILK_NWORKERS environment
  215. // variable. Setting to 0 indicates that the default worker count
  216. // should be used.
  217. //
  218. // Documented in cilk_api_<os>.h
  219. if (cilkg_singleton_ptr)
  220. return __CILKRTS_SET_PARAM_LATE;
  221. // Fetch the number of cores. There must be at last 1, since we're
  222. // executing on *something*, aren't we!?
  223. int hardware_cpu_count = __cilkrts_hardware_cpu_count();
  224. CILK_ASSERT(hardware_cpu_count > 0);
  225. int max_cpu_count = 16 * hardware_cpu_count;
  226. if (__cilkrts_running_under_sequential_ptool())
  227. {
  228. hardware_cpu_count = 1;
  229. max_cpu_count = 1;
  230. }
  231. // Allow a value of 0, which means "set to hardware thread count".
  232. int ret = store_int(&g->P, value, 0, max_cpu_count);
  233. if (0 == g->P)
  234. g->P = hardware_cpu_count;
  235. return ret;
  236. }
  237. else if (strmatch(param, s_max_user_workers))
  238. {
  239. // ** UNDOCUMENTED **
  240. //
  241. // Sets the number of slots allocated for user worker threads
  242. int hardware_cpu_count = __cilkrts_hardware_cpu_count();
  243. CILK_ASSERT (hardware_cpu_count > 0);
  244. return store_int(&g->max_user_workers, value, 1,
  245. 16 * hardware_cpu_count);
  246. }
  247. else if (strmatch(param, s_local_stacks))
  248. {
  249. // ** UNDOCUMENTED **
  250. //
  251. // Number of stacks we'll hold in the per-worker stack cache. Maximum
  252. // value is 42. See __cilkrts_make_global_state for details.
  253. return store_int(&g->fiber_pool_size, value, 0, 42);
  254. }
  255. else if (strmatch(param, s_shared_stacks))
  256. {
  257. // ** UNDOCUMENTED **
  258. //
  259. // Maximum number of stacks we'll hold in the global stack
  260. // cache. Maximum value is 42. See __cilkrts_make_global_state for
  261. // details.
  262. return store_int(&g->global_fiber_pool_size, value, 0, 42);
  263. }
  264. else if (strmatch(param, s_nstacks))
  265. {
  266. // Sets the maximum number of stacks permitted at one time. If the
  267. // runtime reaches this maximum, it will cease to allocate stacks and
  268. // the app will lose parallelism. 0 means unlimited. Default is
  269. // unlimited. Minimum is twice the number of worker threads, though
  270. // that cannot be tested at this time.
  271. //
  272. // Undocumented at this time, though there are plans to expose it.
  273. // The current implentation is for Linux debugging only and is not
  274. // robust enough for users.
  275. if (cilkg_singleton_ptr)
  276. return __CILKRTS_SET_PARAM_LATE;
  277. return store_int<unsigned>(&g->max_stacks, value, 0, INT_MAX);
  278. }
  279. else if (strmatch(param, s_stack_size))
  280. {
  281. // ** UNDOCUMENTED **
  282. //
  283. // Sets the size (in bytes) of the stacks that Cilk creates.
  284. // Can only be set before the runtime starts.
  285. if (cilkg_singleton_ptr)
  286. return __CILKRTS_SET_PARAM_LATE;
  287. // Maximum value that can be parsed is MAX_INT (32-bit).
  288. int ret = store_int<size_t>(&g->stack_size, value, 0, INT_MAX);
  289. // Process the value the user set (or 0 if the user didn't set
  290. // anything) into something nice for the current OS. This
  291. // processing is done immediately and stored into
  292. // g->stack_size so that a call to get stack size will return
  293. // the value that the runtime will actually use.
  294. g->stack_size = cilkos_validate_stack_size(g->stack_size);
  295. return ret;
  296. }
  297. // If got here, then didn't match any of the strings
  298. return __CILKRTS_SET_PARAM_UNIMP;
  299. }
  300. inline
  301. int calc_max_user_workers(global_state_t *g)
  302. {
  303. // If it's been set by the user, give back what we got
  304. if (g->max_user_workers > 0)
  305. return g->max_user_workers;
  306. // Calculate it
  307. return std::max(3, g->P * 2);
  308. }
  309. } // end unnamed namespace
  310. __CILKRTS_BEGIN_EXTERN_C
  311. /**
  312. * @brief Returns the global state object. If called for the first time,
  313. * initializes the user-settable values in the global state, but does not
  314. * initialize the rest of the structure.
  315. */
  316. global_state_t* cilkg_get_user_settable_values()
  317. {
  318. // Environment variable value. More than big enough for a 64-bit signed
  319. // integer.
  320. char envstr[24];
  321. // Abbreviating &global_state_singleton as g is not only shorter, it also
  322. // facilitates grepping for the string "g->", which appears ubiquitously
  323. // in the runtime code.
  324. global_state_t* g = &global_state_singleton;
  325. // TBD: We need synchronization around this loop to prevent
  326. // multiple threads from initializing this data.
  327. if (! cilkg_user_settable_values_initialized)
  328. {
  329. size_t len;
  330. // Preserve stealing disabled since it may have been set by the
  331. // debugger
  332. int stealing_disabled = g->stealing_disabled;
  333. // All fields will be zero until set. In particular
  334. std::memset(g, 0, sizeof(global_state_t));
  335. // Fetch the number of cores. There must be at last 1, since we're
  336. // executing on *something*, aren't we!?
  337. int hardware_cpu_count = __cilkrts_hardware_cpu_count();
  338. CILK_ASSERT(hardware_cpu_count > 0);
  339. bool under_ptool = __cilkrts_running_under_sequential_ptool();
  340. if (under_ptool)
  341. hardware_cpu_count = 1;
  342. g->stealing_disabled = stealing_disabled;
  343. g->under_ptool = under_ptool;
  344. g->force_reduce = 0; // Default Off
  345. g->P = hardware_cpu_count; // Defaults to hardware CPU count
  346. g->max_user_workers = 0; // 0 unless set by user
  347. g->fiber_pool_size = 7; // Arbitrary default
  348. g->global_fiber_pool_size = 3 * 3* g->P; // Arbitrary default
  349. // 3*P was the default size of the worker array (including
  350. // space for extra user workers). This parameter was chosen
  351. // to match previous versions of the runtime.
  352. if (4 == sizeof(void *))
  353. g->max_stacks = 1200; // Only 1GB on 32-bit machines
  354. else
  355. g->max_stacks = 2400; // 2GB on 64-bit machines
  356. // If we have 2400 1MB stacks, that is 2 gb. If we reach this
  357. // limit on a single-socket machine, we may have other
  358. // problems. Is 2400 too small for large multicore machines?
  359. // TBD(jsukha, 11/27/2012): I set this limit on stacks to be a
  360. // value independent of P. When running on a Xeon Phi with
  361. // small values of P, I recall seeing a few microbenchmarks
  362. // (e.g., fib) where a limit of 10*P seemed to be
  363. // unnecessarily slowing things down.
  364. //
  365. // That being said, the code has changed sufficiently that
  366. // this observation may no longer be true.
  367. //
  368. // Note: in general, the worst-case number of stacks required
  369. // for a Cilk computation with spawn depth "d" on P workers is
  370. // O(Pd). Code with unbalanced recursion may run into issues
  371. // with this stack usage.
  372. g->max_steal_failures = 128; // TBD: depend on max_workers?
  373. g->stack_size = 0; // 0 unless set by the user
  374. // Assume no record or replay log for now
  375. g->record_replay_file_name = NULL;
  376. g->record_or_replay = RECORD_REPLAY_NONE; // set by user
  377. if (always_force_reduce())
  378. g->force_reduce = true;
  379. else if (cilkos_getenv(envstr, sizeof(envstr), "CILK_FORCE_REDUCE"))
  380. store_bool(&g->force_reduce, envstr);
  381. if (under_ptool)
  382. g->P = 1; // Ignore environment variable if under cilkscreen
  383. else if (cilkos_getenv(envstr, sizeof(envstr), "CILK_NWORKERS"))
  384. // Set P to environment variable, but limit to no less than 1
  385. // and no more than 16 times the number of hardware threads.
  386. store_int(&g->P, envstr, 1, 16 * hardware_cpu_count);
  387. if (cilkos_getenv(envstr, sizeof(envstr), "CILK_MAX_USER_WORKERS"))
  388. // Set max_user_workers to environment variable, but limit to no
  389. // less than 1 and no more 16 times the number of hardware
  390. // threads. If not specified, defaults (somewhat arbitrarily) to
  391. // the larger of 3 and twice the number of hardware threads.
  392. store_int(&g->max_user_workers, envstr, 1, 16*hardware_cpu_count);
  393. if (cilkos_getenv(envstr, sizeof(envstr), "CILK_STEAL_FAILURES"))
  394. // Set the number of times a worker should fail to steal before
  395. // it looks to see whether it should suspend itself.
  396. store_int<unsigned>(&g->max_steal_failures, envstr, 1, INT_MAX);
  397. // Compute the total number of workers to allocate. Subtract one from
  398. // nworkers and user workers so that the first user worker isn't
  399. // factored in twice.
  400. //
  401. // total_workers must be computed now to support __cilkrts_get_total_workers
  402. g->total_workers = g->P + calc_max_user_workers(g) - 1;
  403. #ifdef CILK_RECORD_REPLAY
  404. // RecordReplay: See if we've been asked to replay a log
  405. len = cilkos_getenv(envstr, 0, "CILK_REPLAY_LOG");
  406. if (len > 0)
  407. {
  408. len += 1; // Allow for trailing NUL
  409. g->record_or_replay = REPLAY_LOG;
  410. g->record_replay_file_name = (char *)__cilkrts_malloc(len);
  411. cilkos_getenv(g->record_replay_file_name, len, "CILK_REPLAY_LOG");
  412. }
  413. // RecordReplay: See if we've been asked to record a log
  414. len = cilkos_getenv(envstr, 0, "CILK_RECORD_LOG");
  415. if (len > 0)
  416. {
  417. if (RECORD_REPLAY_NONE != g->record_or_replay)
  418. cilkos_warning("CILK_RECORD_LOG ignored since CILK_REPLAY_LOG is defined.\n");
  419. else
  420. {
  421. len += 1; // Allow for trailing NUL
  422. g->record_or_replay = RECORD_LOG;
  423. g->record_replay_file_name = (char *)__cilkrts_malloc(len);
  424. cilkos_getenv(g->record_replay_file_name, len, "CILK_RECORD_LOG");
  425. }
  426. }
  427. #endif
  428. cilkg_user_settable_values_initialized = true;
  429. }
  430. return g;
  431. }
  432. int cilkg_calc_total_workers()
  433. {
  434. global_state_t* g = cilkg_get_user_settable_values();
  435. // Compute the total number of workers to allocate. Subtract one from
  436. // nworkers and user workers so that the first user worker isn't
  437. // factored in twice.
  438. return g->P + calc_max_user_workers(g) - 1;
  439. }
  440. // Should be called while holding the global lock.
  441. global_state_t* cilkg_init_global_state()
  442. {
  443. if (cilkg_singleton_ptr)
  444. return cilkg_singleton_ptr;
  445. // Get partially-initialized global state.
  446. global_state_t* g = cilkg_get_user_settable_values();
  447. if (g->max_stacks > 0) {
  448. // nstacks is currently honored on non-Windows systems only.
  449. // Set an upper bound on the number of stacks that are allocated. If
  450. // nstacks is set, each worker gets up to one stack in its cache so that
  451. // no one worker can hog all of the free stacks and keep work from being
  452. // stolen by the other workers.
  453. // nstacks corresponds to the number of stacks that will be allocated by
  454. // the runtime apart from the initial stack created for each thread by
  455. // the system. Therefore, if a user asks for n stacks, and there are
  456. // p workers created, the total number of stacks is actually n + p.
  457. // This feature is primarily for MIC which has flat memory
  458. // instead of virtual addresses and tends to run out really quickly.
  459. // It is not implemented for Windows and it's non-intuitive
  460. // interaction with the local stack cache is specifically to help out
  461. // MIC.
  462. // About max_stacks / P stacks, except we require at least 1
  463. // per pool.
  464. if (((int)g->max_stacks / g->P) < g->fiber_pool_size)
  465. g->fiber_pool_size = g->max_stacks / g->P;
  466. if (g->fiber_pool_size <= 0) {
  467. g->fiber_pool_size = 1;
  468. }
  469. if ((int)g->max_stacks < g->P)
  470. g->max_stacks = g->P;
  471. g->global_fiber_pool_size = g->P * (g->fiber_pool_size+1);
  472. }
  473. // Number of bytes/address - validation for debugger integration
  474. g->addr_size = sizeof(void *);
  475. __cilkrts_init_stats(&g->stats);
  476. __cilkrts_frame_malloc_global_init(g);
  477. g->Q = 0;
  478. g->total_workers = cilkg_calc_total_workers();
  479. g->system_workers = g->P - 1; // system_workers is here for the debugger.
  480. g->work_done = 0;
  481. g->workers_running = 0;
  482. g->ltqsize = 1024; /* FIXME */
  483. g->stack_size = cilkos_validate_stack_size(g->stack_size);
  484. g->failure_to_allocate_stack = 0;
  485. return g;
  486. }
  487. void cilkg_publish_global_state(global_state_t* g)
  488. {
  489. // TBD: which one of these needs to be executed first? I say
  490. // cilkg_singleton_ptr needs to be set last, with a mfence in
  491. // between, since it is the flag that cilkg_is_published_is
  492. // checking for.
  493. __cilkrts_global_state = g;
  494. __cilkrts_fence();
  495. cilkg_singleton_ptr = g;
  496. }
  497. void cilkg_deinit_global_state()
  498. {
  499. cilkg_singleton_ptr = NULL;
  500. __cilkrts_global_state = NULL;
  501. }
  502. int cilkg_is_published(void)
  503. {
  504. return NULL != cilkg_singleton_ptr;
  505. }
  506. int cilkg_set_param(const char* param, const char* value)
  507. {
  508. return set_param_imp(cilkg_get_user_settable_values(), param, value);
  509. }
  510. #ifdef _WIN32
  511. int cilkg_set_param_w(const wchar_t* param, const wchar_t* value)
  512. {
  513. return set_param_imp(cilkg_get_user_settable_values(), param, value);
  514. }
  515. #endif
  516. extern "C++" {
  517. // C++ scheduler function (that may throw exceptions)
  518. typedef void cpp_scheduler_t(__cilkrts_worker *w);
  519. }
  520. void __cilkrts_run_scheduler_with_exceptions(__cilkrts_worker *w)
  521. {
  522. global_state_t* g = cilkg_get_global_state();
  523. CILK_ASSERT(g->scheduler);
  524. cpp_scheduler_t* scheduler = (cpp_scheduler_t*) g->scheduler;
  525. try {
  526. scheduler(w);
  527. } catch (...) {
  528. __cilkrts_bug("Exception escaped Cilk context");
  529. }
  530. }
  531. __CILKRTS_END_EXTERN_C
  532. /* End global_state.cpp */