123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418 |
- /* global_state.h -*-C++-*-
- *
- *************************************************************************
- *
- * @copyright
- * Copyright (C) 2009-2013, Intel Corporation
- * All rights reserved.
- *
- * @copyright
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * @copyright
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
- * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- **************************************************************************/
- /**
- * @file global_state.h
- *
- * @brief The global_state_t structure contains most of the global context
- * maintained by the Intel Cilk runtime.
- */
- #ifndef INCLUDED_GLOBAL_STATE_DOT_H
- #define INCLUDED_GLOBAL_STATE_DOT_H
- #include <cilk/common.h>
- #include "frame_malloc.h"
- #include "stats.h"
- #include "bug.h"
- #include "cilk_fiber.h"
- __CILKRTS_BEGIN_EXTERN_C
- /**
- * Non-null place-holder for a stack handle that has no meaningful value.
- */
- #define PLACEHOLDER_FIBER ((cilk_fiber *) -2)
- /**
- * States for record_or_replay
- */
- enum record_replay_t {
- RECORD_REPLAY_NONE,
- RECORD_LOG,
- REPLAY_LOG
- };
- /**
- * @brief The global state is a structure that is shared by all workers in
- * Cilk.
- *
- * Make the structure ready for use by calling
- * cilkg_init_global_state() and then cilkg_publish_global_state().
- *
- * The same global lock should be held while both of these methods are
- * called. These methods are split because it is useful to execute
- * other runtime initialization code in between.
- *
- * After cilkg_publish_global_state() has completed, Cilk runtime
- * methods may call cilkg_get_global_state() to look at the published
- * value without holding the global lock.
- *
- * Finally, clean up the global state by calling
- * cilkg_deinit_global_state(). This method should be called only
- * after all calls to cilkg_get_global_state() have completed, and
- * while holding the global lock.
- *
- * Before initialization and after deinitialization, the fields in the
- * global state have unspecified values, except for a few special
- * fields labeled "USER SETTING", which can be read and written before
- * initialization and after deinitialization.
- */
- struct global_state_t { /* COMMON_PORTABLE */
- /* Fields described as "(fixed)" should not be changed after
- * initialization.
- */
- /*************************************************************************
- * Note that debugger integration must reach into the
- * global state! The debugger integration is depending on the
- * offsets of the addr_size, system_workers, total_workers,
- * stealing_disabled, sysdep, and workers. If these offsets change, the
- * debugger integration library will need to be changed to match!!!
- *************************************************************************/
- int addr_size; ///< Number of bytes for an address, used by debugger (fixed)
- int system_workers; ///< Number of system workers (fixed)
- /**
- * @brief USER SETTING: Maximum number of user workers that can be
- * bound to cilk workers.
- *
- * 0 unless set by user. Call cilkg_calc_max_user_workers to get
- * the value.
- */
- int max_user_workers;
- int total_workers; ///< Total number of worker threads allocated (fixed)
- int workers_running; ///< True when system workers have beens started */
- /// Set by debugger to disable stealing (fixed)
- int stealing_disabled;
- /// System-dependent part of the global state
- struct global_sysdep_state *sysdep;
- /// Array of worker structures.
- __cilkrts_worker **workers;
- /******* END OF DEBUGGER-INTEGRATION FIELDS ***************/
- /// Number of frames in each worker's lazy task queue
- __STDNS size_t ltqsize;
- /**
- * @brief USER SETTING: Force all possible reductions.
- *
- * TRUE if running a p-tool that requires reducers to call the reduce()
- * method even if no actual stealing occurs.
- *
- * When set to TRUE, runtime will simulate steals, forcing calls to the
- * the reduce() methods of reducers.
- *
- */
- int force_reduce;
- /// USER SETTING: Per-worker fiber pool size
- int fiber_pool_size;
- /// USER SETTING: Global fiber pool size
- int global_fiber_pool_size;
- /**
- * @brief TRUE when workers should exit scheduling loop so we can
- * shut down the runtime and free the global state.
- *
- * @note @c work_done will be checked *FREQUENTLY* in the scheduling loop
- * by idle workers. We need to ensure that it's not in a cache line which
- * may be invalidated by other cores. The surrounding fields are either
- * constant after initialization or not used until shutdown (stats) so we
- * should be OK.
- */
- volatile int work_done;
- int under_ptool; ///< True when running under a serial PIN tool
- statistics stats; ///< Statistics on use of runtime
- /**
- * @brief USER SETTING: Maximum number of stacks the runtime will
- * allocate (apart from those created by the OS when worker
- * threads are created).
- *
- * If max_stacks == 0,there is no pre-defined maximum.
- */
- unsigned max_stacks;
- /// Size of each stack
- size_t stack_size;
- /// Global cache for per-worker memory
- struct __cilkrts_frame_cache frame_malloc;
- /// Global fiber pool
- cilk_fiber_pool fiber_pool;
- /**
- * @brief Track whether the runtime has failed to allocate a
- * stack.
- *
- * Setting this flag prevents multiple warnings from being
- * issued.
- */
- int failure_to_allocate_stack;
- /**
- * @brief USER SETTING: indicate record or replay log.
- * Set to NULL if not used in this run.
- */
- char *record_replay_file_name;
- /**
- * @brief Record/replay state.
- * Valid states are:
- * RECORD_REPLAY_NONE - Not recording or replaying a log
- * RECORD_LOG - Recording a log for replay later
- * REPLAY_LOG - Replay a log recorded earlier
- */
- enum record_replay_t record_or_replay;
- /**
- * @brief Buffer to force max_steal_failures to appear on a
- * different cache line from the previous member variables.
- *
- * This padding is needed because max_steal_failures is read
- * constantly and other modified values in the global state will
- * cause thrashing.
- */
- char cache_buf[64];
- /**
- * @brief Maximum number of times a thread should fail to steal
- * before checking if Cilk is shutting down.
- */
- unsigned int max_steal_failures;
- /// Pointer to scheduler entry point
- void (*scheduler)(__cilkrts_worker *w);
- /**
- * @brief Buffer to force P and Q to appear on a different cache
- * line from the previous member variables.
- */
- char cache_buf_2[64];
- int P; ///< USER SETTING: number of system workers + 1 (fixed)
- int Q; ///< Number of user threads currently bound to workers
- };
- /**
- * @brief Initialize the global state object. This method must both
- * complete before referencing any fields in the global state, except
- * those specified as "user-settable values".
- */
- global_state_t* cilkg_init_global_state();
- /**
- * @brief Publish the global state object, so that
- * cilkg_is_published can return true.
- *
- * @param g - the global state created by cilkg_init_global_state() to
- * publish.
- *
- * After the global state object has been published, a thread should
- * not modify this state unless it has exclusive access (i.e., holds
- * the global lock).
- */
- void cilkg_publish_global_state(global_state_t* g);
- /**
- * @brief Return true if the global state has been fully initialized
- * and published, and has not been deinitialized.
- */
- int cilkg_is_published(void);
- /**
- * @brief De-initializes the global state object. Must be called to free
- * resources when the global state is no longer needed.
- */
- void cilkg_deinit_global_state(void);
- /**
- * @brief Returns the global state object. Result is valid only if the
- * global state has been published (see cilkg_publish_global_state()).
- */
- static inline
- global_state_t* cilkg_get_global_state(void)
- {
- // "private" extern declaration:
- extern global_state_t *cilkg_singleton_ptr;
- __CILKRTS_ASSERT(cilkg_singleton_ptr); // Debug only
- return cilkg_singleton_ptr;
- }
- /**
- * @brief Implementation of __cilkrts_set_params.
- *
- * Set user controllable parameters
- * @param param - string specifying parameter to be set
- * @param value - string specifying new value
- * @returns One of: CILKG_SET_PARAM_SUCCESS ( = 0),
- * CILKG_SET_PARAM_UNIMP, CILKG_SET_PARAM_XRANGE,
- * CILKG_SET_PARAM_INVALID, or CILKG_SET_PARAM_LATE.
- *
- * @attention The wide character version __cilkrts_set_param_w() is available
- * only on Windows.
- *
- * Allowable parameter names:
- *
- * - "nworkers" - number of processors that should run Cilk code.
- * The value is a string of digits to be parsed by strtol.
- *
- * - "force reduce" - test reducer callbacks by allocating new views
- * for every spawn within which a reducer is accessed. This can
- * significantly reduce performance. The value is "1" or "true"
- * to enable, "0" or "false" to disable.
- * @warning Enabling "force reduce" when running with more than a single
- * worker is currently broken.
- *
- * - "max user workers" - (Not publicly documented) Sets the number of slots
- * allocated for user worker threads
- *
- * - "local stacks" - (Not publicly documented) Number of stacks we'll hold in
- * the per-worker stack cache. Range 1 .. 42. See
- * cilkg_init_global_state for details.
- *
- * - "shared stacks" - (Not publicly documented) Maximum number of stacks
- * we'll hold in the global stack cache. Maximum value is 42. See
- * __cilkrts_make_global_state for details
- *
- * - "nstacks" - (Not publicly documented at this time, though it may be
- * exposed in the future) Sets the maximum number of stacks permitted at one
- * time. If the runtime reaches this maximum, it will cease to allocate
- * stacks and the app will lose parallelism. 0 means unlimited. Default is
- * unlimited. Minimum is twice the number of worker threads, though that
- * cannot be tested at this time.
- */
- int cilkg_set_param(const char* param, const char* value);
- #ifdef _WIN32
- /**
- * @brief Implementation of __cilkrts_set_params for Unicode characters on
- * Windows. See the documentation on @ref cilkg_set_param for more details.
- *
- * Set user controllable parameters
- * @param param - string specifying parameter to be set
- * @param value - string specifying new value
- * @returns One of: CILKG_SET_PARAM_SUCCESS ( = 0),
- * CILKG_SET_PARAM_UNIMP, CILKG_SET_PARAM_XRANGE,
- * CILKG_SET_PARAM_INVALID, or CILKG_SET_PARAM_LATE.
- */
- int cilkg_set_param_w(const wchar_t* param, const wchar_t* value);
- #endif
- /**
- * @brief implementation of __cilkrts_get_nworkers()
- */
- static inline
- int cilkg_get_nworkers(void)
- {
- // "private" extern declaration
- extern global_state_t* cilkg_get_user_settable_values(void);
- return cilkg_get_user_settable_values()->P;
- }
- /**
- * @brief implementation of __cilkrts_get_total_workers()
- */
- static inline
- int cilkg_get_total_workers(void)
- {
- // "private" extern declaration
- extern int cilkg_calc_total_workers(void);
- // This number can fluctate until initialization so we
- // compute it from scratch
- return cilkg_calc_total_workers();
- }
- /**
- * @brief implementation of __cilkrts_get_force_reduce()
- */
- static inline
- int cilkg_get_force_reduce(void)
- {
- // "private" extern declaration
- extern global_state_t* cilkg_get_user_settable_values(void);
- return cilkg_get_user_settable_values()->force_reduce;
- }
- /**
- * @brief implementation of __cilkrts_get_stack_size()
- */
- static inline
- size_t cilkg_get_stack_size(void)
- {
- // "private" extern declaration
- extern global_state_t* cilkg_get_user_settable_values(void);
- return cilkg_get_user_settable_values()->stack_size;
- }
- /**
- * @brief Run the scheduler function stored in the global_state
- *
- * Look up the scheduler function in global_state and run it. Report a fatal
- * error if an exception escapes the scheduler function.
- *
- * @param w - Worker structure to associate with the current thread.
- *
- * @attention The scheduler field of the global state must be set before this
- * function is called.
- */
- void __cilkrts_run_scheduler_with_exceptions(__cilkrts_worker *w);
- __CILKRTS_END_EXTERN_C
- #endif // ! defined(INCLUDED_GLOBAL_STATE_DOT_H)
|