12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941 |
- /* scheduler.c -*-C-*-
- *
- *************************************************************************
- *
- * @copyright
- * Copyright (C) 2007-2013, Intel Corporation
- * All rights reserved.
- *
- * @copyright
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * @copyright
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
- * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- **************************************************************************/
- /*
- * Cilk scheduler
- */
- #include "scheduler.h"
- #include "bug.h"
- #include "os.h"
- #include "os_mutex.h"
- #include "local_state.h"
- #include "signal_node.h"
- #include "full_frame.h"
- #include "sysdep.h"
- #include "except.h"
- #include "cilk_malloc.h"
- #include "pedigrees.h"
- #include "record-replay.h"
- #include <limits.h>
- #include <string.h> /* memcpy */
- #include <stdio.h> // sprintf
- #include <stdlib.h> // malloc, free, abort
- #ifdef _WIN32
- # pragma warning(disable:1786) // disable warning: sprintf is deprecated
- # include "sysdep-win.h"
- # include "except-win32.h"
- #endif // _WIN32
- // ICL: Don't complain about conversion from pointer to same-sized integral
- // type in __cilkrts_put_stack. That's why we're using ptrdiff_t
- #ifdef _WIN32
- # pragma warning(disable: 1684)
- #endif
- #include "cilk/cilk_api.h"
- #include "frame_malloc.h"
- #include "metacall_impl.h"
- #include "reducer_impl.h"
- #include "cilk-tbb-interop.h"
- #include "cilk-ittnotify.h"
- #include "stats.h"
- // ICL: Don't complain about loss of precision in myrand
- // I tried restoring the warning after the function, but it didn't
- // suppress it
- #ifdef _WIN32
- # pragma warning(disable: 2259)
- #endif
- #ifndef _WIN32
- # include <unistd.h>
- #endif
- #ifdef __VXWORKS__
- // redeclare longjmp() with noreturn to stop warnings
- extern __attribute__((noreturn))
- void longjmp(jmp_buf, int);
- #endif
- //#define DEBUG_LOCKS 1
- #ifdef DEBUG_LOCKS
- // The currently executing worker must own this worker's lock
- # define ASSERT_WORKER_LOCK_OWNED(w) \
- { \
- __cilkrts_worker *tls_worker = __cilkrts_get_tls_worker(); \
- CILK_ASSERT((w)->l->lock.owner == tls_worker); \
- }
- #else
- # define ASSERT_WORKER_LOCK_OWNED(w)
- #endif // DEBUG_LOCKS
- // Options for the scheduler.
- enum schedule_t { SCHEDULE_RUN,
- SCHEDULE_WAIT,
- SCHEDULE_EXIT };
- // Return values for provably_good_steal()
- enum provably_good_steal_t
- {
- ABANDON_EXECUTION, // Not the last child to the sync - attempt to steal work
- CONTINUE_EXECUTION, // Last child to the sync - continue executing on this worker
- WAIT_FOR_CONTINUE // The replay log indicates that this was the worker
- // which continued. Loop until we are the last worker
- // to the sync.
- };
- // Verify that "w" is the worker we are currently executing on.
- // Because this check is expensive, this method is usually a no-op.
- static inline void verify_current_wkr(__cilkrts_worker *w)
- {
- #if ((REDPAR_DEBUG >= 3) || (FIBER_DEBUG >= 1))
- // Lookup the worker from TLS and compare to w.
- __cilkrts_worker* tmp = __cilkrts_get_tls_worker();
- if (w != tmp) {
- fprintf(stderr, "Error. W=%d, actual worker =%d...\n",
- w->self,
- tmp->self);
- }
- CILK_ASSERT(w == tmp);
- #endif
- }
- static enum schedule_t worker_runnable(__cilkrts_worker *w);
- // Scheduling-fiber functions:
- static void do_return_from_spawn (__cilkrts_worker *w,
- full_frame *ff,
- __cilkrts_stack_frame *sf);
- static void do_sync (__cilkrts_worker *w,
- full_frame *ff,
- __cilkrts_stack_frame *sf);
- // max is defined on Windows and VxWorks
- #if (! defined(_WIN32)) && (! defined(__VXWORKS__))
- // TBD: definition of max() for Linux.
- # define max(a, b) ((a) < (b) ? (b) : (a))
- #endif
- void __cilkrts_dump_stats_to_stderr(global_state_t *g)
- {
- #ifdef CILK_PROFILE
- int i;
- for (i = 0; i < g->total_workers; ++i) {
- // Print out statistics for each worker. We collected them,
- // so why not print them out?
- fprintf(stderr, "Stats for worker %d\n", i);
- dump_stats_to_file(stderr, g->workers[i]->l->stats);
- __cilkrts_accum_stats(&g->stats, g->workers[i]->l->stats);
- }
- // Also print out aggregate statistics.
- dump_stats_to_file(stderr, &g->stats);
- #endif
- fprintf(stderr,
- "CILK PLUS Thread Info: P=%d, Q=%d\n",
- g->P,
- g->Q);
- fprintf(stderr,
- "CILK PLUS RUNTIME MEMORY USAGE: %lld bytes",
- (long long)g->frame_malloc.allocated_from_os);
- #ifdef CILK_PROFILE
- if (g->stats.stack_hwm)
- fprintf(stderr, ", %ld stacks", g->stats.stack_hwm);
- #endif
- fputc('\n', stderr);
- }
- static void validate_worker(__cilkrts_worker *w)
- {
- /* check the magic numbers, for debugging purposes */
- if (w->l->worker_magic_0 != WORKER_MAGIC_0 ||
- w->l->worker_magic_1 != WORKER_MAGIC_1)
- abort_because_rts_is_corrupted();
- }
- static void double_link(full_frame *left_ff, full_frame *right_ff)
- {
- if (left_ff)
- left_ff->right_sibling = right_ff;
- if (right_ff)
- right_ff->left_sibling = left_ff;
- }
- /* add CHILD to the right of all children of PARENT */
- static void push_child(full_frame *parent_ff, full_frame *child_ff)
- {
- double_link(parent_ff->rightmost_child, child_ff);
- double_link(child_ff, 0);
- parent_ff->rightmost_child = child_ff;
- }
- /* unlink CHILD from the list of all children of PARENT */
- static void unlink_child(full_frame *parent_ff, full_frame *child_ff)
- {
- double_link(child_ff->left_sibling, child_ff->right_sibling);
- if (!child_ff->right_sibling) {
- /* this is the rightmost child -- update parent link */
- CILK_ASSERT(parent_ff->rightmost_child == child_ff);
- parent_ff->rightmost_child = child_ff->left_sibling;
- }
- child_ff->left_sibling = child_ff->right_sibling = 0; /* paranoia */
- }
- static void incjoin(full_frame *ff)
- {
- ++ff->join_counter;
- }
- static int decjoin(full_frame *ff)
- {
- CILK_ASSERT(ff->join_counter > 0);
- return (--ff->join_counter);
- }
- static int simulate_decjoin(full_frame *ff)
- {
- CILK_ASSERT(ff->join_counter > 0);
- return (ff->join_counter - 1);
- }
- /*
- * Pseudo-random generator defined by the congruence S' = 69070 * S
- * mod (2^32 - 5). Marsaglia (CACM July 1993) says on page 107 that
- * this is a ``good one''. There you go.
- *
- * The literature makes a big fuss about avoiding the division, but
- * for us it is not worth the hassle.
- */
- static const unsigned RNGMOD = ((1ULL << 32) - 5);
- static const unsigned RNGMUL = 69070U;
- static unsigned myrand(__cilkrts_worker *w)
- {
- unsigned state = w->l->rand_seed;
- state = (unsigned)((RNGMUL * (unsigned long long)state) % RNGMOD);
- w->l->rand_seed = state;
- return state;
- }
- static void mysrand(__cilkrts_worker *w, unsigned seed)
- {
- seed %= RNGMOD;
- seed += (seed == 0); /* 0 does not belong to the multiplicative
- group. Use 1 instead */
- w->l->rand_seed = seed;
- }
- /* W grabs its own lock */
- void __cilkrts_worker_lock(__cilkrts_worker *w)
- {
- validate_worker(w);
- CILK_ASSERT(w->l->do_not_steal == 0);
- /* tell thieves to stay out of the way */
- w->l->do_not_steal = 1;
- __cilkrts_fence(); /* probably redundant */
- __cilkrts_mutex_lock(w, &w->l->lock);
- }
- void __cilkrts_worker_unlock(__cilkrts_worker *w)
- {
- __cilkrts_mutex_unlock(w, &w->l->lock);
- CILK_ASSERT(w->l->do_not_steal == 1);
- /* The fence is probably redundant. Use a release
- operation when supported (gcc and compatibile);
- that is faster on x86 which serializes normal stores. */
- #if defined __GNUC__ && (__GNUC__ * 10 + __GNUC_MINOR__ > 43 || __ICC >= 1110)
- __sync_lock_release(&w->l->do_not_steal);
- #else
- w->l->do_not_steal = 0;
- __cilkrts_fence(); /* store-store barrier, redundant on x86 */
- #endif
- }
- /* try to acquire the lock of some *other* worker */
- static int worker_trylock_other(__cilkrts_worker *w,
- __cilkrts_worker *other)
- {
- int status = 0;
- validate_worker(other);
- /* This protocol guarantees that, after setting the DO_NOT_STEAL
- flag, worker W can enter its critical section after waiting for
- the thief currently in the critical section (if any) and at
- most one other thief.
- This requirement is overly paranoid, but it should protect us
- against future nonsense from OS implementors.
- */
- /* compete for the right to disturb OTHER */
- if (__cilkrts_mutex_trylock(w, &other->l->steal_lock)) {
- if (other->l->do_not_steal) {
- /* leave it alone */
- } else {
- status = __cilkrts_mutex_trylock(w, &other->l->lock);
- }
- __cilkrts_mutex_unlock(w, &other->l->steal_lock);
- }
- return status;
- }
- static void worker_unlock_other(__cilkrts_worker *w,
- __cilkrts_worker *other)
- {
- __cilkrts_mutex_unlock(w, &other->l->lock);
- }
- /* Lock macro Usage:
- BEGIN_WITH_WORKER_LOCK(w) {
- statement;
- statement;
- BEGIN_WITH_FRAME_LOCK(w, ff) {
- statement;
- statement;
- } END_WITH_FRAME_LOCK(w, ff);
- } END_WITH_WORKER_LOCK(w);
- */
- #define BEGIN_WITH_WORKER_LOCK(w) __cilkrts_worker_lock(w); do
- #define END_WITH_WORKER_LOCK(w) while (__cilkrts_worker_unlock(w), 0)
- // TBD(jsukha): These are worker lock acquistions on
- // a worker whose deque is empty. My conjecture is that we
- // do not need to hold the worker lock at these points.
- // I have left them in for now, however.
- //
- // #define REMOVE_POSSIBLY_OPTIONAL_LOCKS
- #ifdef REMOVE_POSSIBLY_OPTIONAL_LOCKS
- #define BEGIN_WITH_WORKER_LOCK_OPTIONAL(w) do
- #define END_WITH_WORKER_LOCK_OPTIONAL(w) while (0)
- #else
- #define BEGIN_WITH_WORKER_LOCK_OPTIONAL(w) __cilkrts_worker_lock(w); do
- #define END_WITH_WORKER_LOCK_OPTIONAL(w) while (__cilkrts_worker_unlock(w), 0)
- #endif
- #define BEGIN_WITH_FRAME_LOCK(w, ff) \
- do { full_frame *_locked_ff = ff; __cilkrts_frame_lock(w, _locked_ff); do
- #define END_WITH_FRAME_LOCK(w, ff) \
- while (__cilkrts_frame_unlock(w, _locked_ff), 0); } while (0)
- /* W becomes the owner of F and F can be stolen from W */
- static void make_runnable(__cilkrts_worker *w, full_frame *ff)
- {
- w->l->frame_ff = ff;
- /* CALL_STACK is invalid (the information is stored implicitly in W) */
- ff->call_stack = 0;
- }
- /*
- * The worker parameter is unused, except for print-debugging purposes.
- */
- static void make_unrunnable(__cilkrts_worker *w,
- full_frame *ff,
- __cilkrts_stack_frame *sf,
- int is_loot,
- const char *why)
- {
- /* CALL_STACK becomes valid again */
- ff->call_stack = sf;
- if (sf) {
- #if CILK_LIB_DEBUG
- if (__builtin_expect(sf->flags & CILK_FRAME_EXITING, 0))
- __cilkrts_bug("W%d suspending exiting frame %p/%p\n", w->self, ff, sf);
- #endif
- sf->flags |= CILK_FRAME_STOLEN | CILK_FRAME_SUSPENDED;
- sf->worker = 0;
- if (is_loot)
- __cilkrts_put_stack(ff, sf);
- /* perform any system-dependent action, such as saving the
- state of the stack */
- __cilkrts_make_unrunnable_sysdep(w, ff, sf, is_loot, why);
- }
- }
- /* Push the next full frame to be made active in this worker and increment its
- * join counter. __cilkrts_push_next_frame and pop_next_frame work on a
- * one-element queue. This queue is used to communicate across the runtime
- * from the code that wants to activate a frame to the code that can actually
- * begin execution on that frame. They are asymetrical in that push
- * increments the join counter but pop does not decrement it. Rather, a
- * single push/pop combination makes a frame active and increments its join
- * counter once. */
- void __cilkrts_push_next_frame(__cilkrts_worker *w, full_frame *ff)
- {
- CILK_ASSERT(ff);
- CILK_ASSERT(!w->l->next_frame_ff);
- incjoin(ff);
- w->l->next_frame_ff = ff;
- }
- /* Get the next full-frame to be made active in this worker. The join count
- * of the full frame will have been incremented by the corresponding push
- * event. See __cilkrts_push_next_frame, above.
- */
- static full_frame *pop_next_frame(__cilkrts_worker *w)
- {
- full_frame *ff;
- ff = w->l->next_frame_ff;
- // Remove the frame from the next_frame field.
- //
- // If this is a user worker, then there is a chance that another worker
- // from our team could push work into our next_frame (if it is the last
- // worker doing work for this team). The other worker's setting of the
- // next_frame could race with our setting of next_frame to NULL. This is
- // the only possible race condition on next_frame. However, if next_frame
- // has a non-NULL value, then it means the team still has work to do, and
- // there is no chance of another team member populating next_frame. Thus,
- // it is safe to set next_frame to NULL, if it was populated. There is no
- // need for an atomic op.
- if (NULL != ff) {
- w->l->next_frame_ff = NULL;
- }
- return ff;
- }
- /*
- * Identify the single worker that is allowed to cross a sync in this frame. A
- * thief should call this function when it is the first to steal work from a
- * user worker. "First to steal work" may mean that there has been parallelism
- * in the user worker before, but the whole team sync'd, and this is the first
- * steal after that.
- *
- * This should happen while holding the worker and frame lock.
- */
- static void set_sync_master(__cilkrts_worker *w, full_frame *ff)
- {
- w->l->last_full_frame = ff;
- ff->sync_master = w;
- }
- /*
- * The sync that ends all parallelism for a particular user worker is about to
- * be crossed. Decouple the worker and frame.
- *
- * No locks need to be held since the user worker isn't doing anything, and none
- * of the system workers can steal from it. But unset_sync_master() should be
- * called before the user worker knows about this work (i.e., before it is
- * inserted into the w->l->next_frame_ff is set).
- */
- static void unset_sync_master(__cilkrts_worker *w, full_frame *ff)
- {
- CILK_ASSERT(WORKER_USER == w->l->type);
- CILK_ASSERT(ff->sync_master == w);
- ff->sync_master = NULL;
- w->l->last_full_frame = NULL;
- }
- /********************************************************************
- * THE protocol:
- ********************************************************************/
- /*
- * This is a protocol for work stealing that minimizes the overhead on
- * the victim.
- *
- * The protocol uses three shared pointers into the worker's deque:
- * - T - the "tail"
- * - H - the "head"
- * - E - the "exception" NB: In this case, "exception" has nothing to do
- * with C++ throw-catch exceptions -- it refers only to a non-normal return,
- * i.e., a steal or similar scheduling exception.
- *
- * with H <= E, H <= T.
- *
- * Stack frames SF, where H <= E < T, are available for stealing.
- *
- * The worker operates on the T end of the stack. The frame being
- * worked on is not on the stack. To make a continuation available for
- * stealing the worker pushes a from onto the stack: stores *T++ = SF.
- * To return, it pops the frame off the stack: obtains SF = *--T.
- *
- * After decrementing T, the condition E > T signals to the victim that
- * it should invoke the runtime system's "THE" exception handler. The
- * pointer E can become INFINITY, in which case the victim must invoke
- * the THE exception handler as soon as possible.
- *
- * See "The implementation of the Cilk-5 multithreaded language", PLDI 1998,
- * http://portal.acm.org/citation.cfm?doid=277652.277725, for more information
- * on the THE protocol.
- */
- /* the infinity value of E */
- #define EXC_INFINITY ((__cilkrts_stack_frame **) (-1))
- static void increment_E(__cilkrts_worker *victim)
- {
- __cilkrts_stack_frame *volatile *tmp;
- // The currently executing worker must own the worker lock to touch
- // victim->exc
- ASSERT_WORKER_LOCK_OWNED(victim);
- tmp = victim->exc;
- if (tmp != EXC_INFINITY) {
- /* On most x86 this pair of operations would be slightly faster
- as an atomic exchange due to the implicit memory barrier in
- an atomic instruction. */
- victim->exc = tmp + 1;
- __cilkrts_fence();
- }
- }
- static void decrement_E(__cilkrts_worker *victim)
- {
- __cilkrts_stack_frame *volatile *tmp;
- // The currently executing worker must own the worker lock to touch
- // victim->exc
- ASSERT_WORKER_LOCK_OWNED(victim);
- tmp = victim->exc;
- if (tmp != EXC_INFINITY) {
- /* On most x86 this pair of operations would be slightly faster
- as an atomic exchange due to the implicit memory barrier in
- an atomic instruction. */
- victim->exc = tmp - 1;
- __cilkrts_fence(); /* memory fence not really necessary */
- }
- }
- #if 0
- /* for now unused, will be necessary if we implement abort */
- static void signal_THE_exception(__cilkrts_worker *wparent)
- {
- wparent->exc = EXC_INFINITY;
- __cilkrts_fence();
- }
- #endif
- static void reset_THE_exception(__cilkrts_worker *w)
- {
- // The currently executing worker must own the worker lock to touch
- // w->exc
- ASSERT_WORKER_LOCK_OWNED(w);
- w->exc = w->head;
- __cilkrts_fence();
- }
- /* conditions under which victim->head can be stolen: */
- static int can_steal_from(__cilkrts_worker *victim)
- {
- return ((victim->head < victim->tail) &&
- (victim->head < victim->protected_tail));
- }
- /* Return TRUE if the frame can be stolen, false otherwise */
- static int dekker_protocol(__cilkrts_worker *victim)
- {
- // increment_E and decrement_E are going to touch victim->exc. The
- // currently executing worker must own victim's lock before they can
- // modify it
- ASSERT_WORKER_LOCK_OWNED(victim);
- /* ASSERT(E >= H); */
- increment_E(victim);
- /* ASSERT(E >= H + 1); */
- if (can_steal_from(victim)) {
- /* success, we can steal victim->head and set H <- H + 1
- in detach() */
- return 1;
- } else {
- /* failure, restore previous state */
- decrement_E(victim);
- return 0;
- }
- }
- /* Link PARENT and CHILD in the spawn tree */
- static full_frame *make_child(__cilkrts_worker *w,
- full_frame *parent_ff,
- __cilkrts_stack_frame *child_sf,
- cilk_fiber *fiber)
- {
- full_frame *child_ff = __cilkrts_make_full_frame(w, child_sf);
- child_ff->parent = parent_ff;
- push_child(parent_ff, child_ff);
- //DBGPRINTF("%d- make_child - child_frame: %p, parent_frame: %p, child_sf: %p\n"
- // " parent - parent: %p, left_sibling: %p, right_sibling: %p, rightmost_child: %p\n"
- // " child - parent: %p, left_sibling: %p, right_sibling: %p, rightmost_child: %p\n",
- // w->self, child, parent, child_sf,
- // parent->parent, parent->left_sibling, parent->right_sibling, parent->rightmost_child,
- // child->parent, child->left_sibling, child->right_sibling, child->rightmost_child);
- CILK_ASSERT(parent_ff->call_stack);
- child_ff->is_call_child = (fiber == NULL);
- /* PLACEHOLDER_FIBER is used as non-null marker indicating that
- child should be treated as a spawn child even though we have not
- yet assigned a real fiber to its parent. */
- if (fiber == PLACEHOLDER_FIBER)
- fiber = NULL; /* Parent actually gets a null fiber, for now */
- /* perform any system-dependent actions, such as capturing
- parameter passing information */
- /*__cilkrts_make_child_sysdep(child, parent);*/
- /* Child gets reducer map and stack of parent.
- Parent gets a new map and new stack. */
- child_ff->fiber_self = parent_ff->fiber_self;
- child_ff->sync_master = NULL;
- if (child_ff->is_call_child) {
- /* Cause segfault on any attempted access. The parent gets
- the child map and stack when the child completes. */
- parent_ff->fiber_self = 0;
- } else {
- parent_ff->fiber_self = fiber;
- }
- incjoin(parent_ff);
- return child_ff;
- }
- static inline __cilkrts_stack_frame *__cilkrts_advance_frame(__cilkrts_stack_frame *sf)
- {
- __cilkrts_stack_frame *p = sf->call_parent;
- sf->call_parent = 0;
- return p;
- }
- /* w should be the currently executing worker.
- * loot_sf is the youngest stack frame in the call stack being
- * unrolled (i.e., the most deeply nested stack frame.)
- *
- * When this method is called for a steal, loot_sf should be on a
- * victim worker which is different from w.
- * For CILK_FORCE_REDUCE, the victim worker will equal w.
- *
- * Before execution, the __cilkrts_stack_frame's have pointers from
- * older to younger, i.e., a __cilkrts_stack_frame points to parent.
- *
- * This method creates a full frame for each __cilkrts_stack_frame in
- * the call stack, with each full frame also pointing to its parent.
- *
- * The method returns the full frame created for loot_sf, i.e., the
- * youngest full frame.
- */
- static full_frame *unroll_call_stack(__cilkrts_worker *w,
- full_frame *ff,
- __cilkrts_stack_frame *const loot_sf)
- {
- __cilkrts_stack_frame *sf = loot_sf;
- __cilkrts_stack_frame *rev_sf = 0;
- __cilkrts_stack_frame *t_sf;
- CILK_ASSERT(sf);
- /*CILK_ASSERT(sf->call_parent != sf);*/
- /* The leafmost frame is unsynched. */
- if (sf->worker != w)
- sf->flags |= CILK_FRAME_UNSYNCHED;
- /* Reverse the call stack to make a linked list ordered from parent
- to child. sf->call_parent points to the child of SF instead of
- the parent. */
- do {
- t_sf = (sf->flags & (CILK_FRAME_DETACHED|CILK_FRAME_STOLEN|CILK_FRAME_LAST))? 0 : sf->call_parent;
- sf->call_parent = rev_sf;
- rev_sf = sf;
- sf = t_sf;
- } while (sf);
- sf = rev_sf;
- /* Promote each stack frame to a full frame in order from parent
- to child, following the reversed list we just built. */
- make_unrunnable(w, ff, sf, sf == loot_sf, "steal 1");
- /* T is the *child* of SF, because we have reversed the list */
- for (t_sf = __cilkrts_advance_frame(sf); t_sf;
- sf = t_sf, t_sf = __cilkrts_advance_frame(sf)) {
- ff = make_child(w, ff, t_sf, NULL);
- make_unrunnable(w, ff, t_sf, t_sf == loot_sf, "steal 2");
- }
- /* XXX What if the leafmost frame does not contain a sync
- and this steal is from promote own deque? */
- /*sf->flags |= CILK_FRAME_UNSYNCHED;*/
- CILK_ASSERT(!sf->call_parent);
- return ff;
- }
- /* detach the top of the deque frame from the VICTIM and install a new
- CHILD frame in its place */
- static void detach_for_steal(__cilkrts_worker *w,
- __cilkrts_worker *victim,
- cilk_fiber* fiber)
- {
- /* ASSERT: we own victim->lock */
- full_frame *parent_ff, *child_ff, *loot_ff;
- __cilkrts_stack_frame *volatile *h;
- __cilkrts_stack_frame *sf;
- w->l->team = victim->l->team;
- CILK_ASSERT(w->l->frame_ff == 0 || w == victim);
- h = victim->head;
- CILK_ASSERT(*h);
- victim->head = h + 1;
- parent_ff = victim->l->frame_ff;
- BEGIN_WITH_FRAME_LOCK(w, parent_ff) {
- /* parent no longer referenced by victim */
- decjoin(parent_ff);
- /* obtain the victim call stack */
- sf = *h;
- /* perform system-dependent normalizations */
- /*__cilkrts_normalize_call_stack_on_steal(sf);*/
- /* unroll PARENT_FF with call stack SF, adopt the youngest
- frame LOOT. If loot_ff == parent_ff, then we hold loot_ff->lock,
- otherwise, loot_ff is newly created and we can modify it without
- holding its lock. */
- loot_ff = unroll_call_stack(w, parent_ff, sf);
- #if REDPAR_DEBUG >= 3
- fprintf(stderr, "[W=%d, victim=%d, desc=detach, parent_ff=%p, loot=%p]\n",
- w->self, victim->self,
- parent_ff, loot_ff);
- #endif
- if (WORKER_USER == victim->l->type &&
- NULL == victim->l->last_full_frame) {
- // Mark this looted frame as special: only the original user worker
- // may cross the sync.
- //
- // This call is a shared access to
- // victim->l->last_full_frame.
- set_sync_master(victim, loot_ff);
- }
- /* LOOT is the next frame that the thief W is supposed to
- run, unless the thief is stealing from itself, in which
- case the thief W == VICTIM executes CHILD and nobody
- executes LOOT. */
- if (w == victim) {
- /* Pretend that frame has been stolen */
- loot_ff->call_stack->flags |= CILK_FRAME_UNSYNCHED;
- loot_ff->simulated_stolen = 1;
- }
- else
- __cilkrts_push_next_frame(w, loot_ff);
- // After this "push_next_frame" call, w now owns loot_ff.
- child_ff = make_child(w, loot_ff, 0, fiber);
- BEGIN_WITH_FRAME_LOCK(w, child_ff) {
- /* install child in the victim's work queue, taking
- the parent_ff's place */
- /* child is referenced by victim */
- incjoin(child_ff);
- // With this call, w is bestowing ownership of the newly
- // created frame child_ff to the victim, and victim is
- // giving up ownership of parent_ff.
- //
- // Worker w will either take ownership of parent_ff
- // if parent_ff == loot_ff, or parent_ff will be
- // suspended.
- //
- // Note that this call changes the victim->frame_ff
- // while the victim may be executing.
- make_runnable(victim, child_ff);
- } END_WITH_FRAME_LOCK(w, child_ff);
- } END_WITH_FRAME_LOCK(w, parent_ff);
- }
- /**
- * @brief cilk_fiber_proc that resumes user code after a successful
- * random steal.
- * This function longjmps back into the user code whose state is
- * stored in cilk_fiber_get_data(fiber)->resume_sf. The stack pointer
- * is adjusted so that the code resumes on the specified fiber stack
- * instead of its original stack.
- *
- * This method gets executed only on a fiber freshly allocated from a
- * pool.
- *
- * @param fiber The fiber being used to resume user code.
- * @param arg Unused.
- */
- static
- void fiber_proc_to_resume_user_code_for_random_steal(cilk_fiber *fiber)
- {
- cilk_fiber_data *data = cilk_fiber_get_data(fiber);
- __cilkrts_stack_frame* sf = data->resume_sf;
- full_frame *ff;
- CILK_ASSERT(sf);
- // When we pull the resume_sf out of the fiber to resume it, clear
- // the old value.
- data->resume_sf = NULL;
- CILK_ASSERT(sf->worker == data->owner);
- ff = sf->worker->l->frame_ff;
- // For Win32, we need to overwrite the default exception handler
- // in this function, so that when the OS exception handling code
- // walks off the top of the current Cilk stack, it reaches our stub
- // handler.
-
- // Also, this function needs to be wrapped into a try-catch block
- // so the compiler generates the appropriate exception information
- // in this frame.
-
- // TBD: IS THIS HANDLER IN THE WRONG PLACE? Can we longjmp out of
- // this function (and does it matter?)
- #if defined(_WIN32) && !defined(_WIN64)
- install_exception_stub_handler();
- __try
- #endif
- {
- char* new_sp = sysdep_reset_jump_buffers_for_resume(fiber, ff, sf);
-
- // Notify the Intel tools that we're stealing code
- ITT_SYNC_ACQUIRED(sf->worker);
- NOTIFY_ZC_INTRINSIC("cilk_continue", sf);
- // TBD: We'd like to move TBB-interop methods into the fiber
- // eventually.
- cilk_fiber_invoke_tbb_stack_op(fiber, CILK_TBB_STACK_ADOPT);
-
- sf->flags &= ~CILK_FRAME_SUSPENDED;
- // longjmp to user code. Don't process exceptions here,
- // because we are resuming a stolen frame.
- sysdep_longjmp_to_sf(new_sp, sf, NULL);
- /*NOTREACHED*/
- // Intel's C compiler respects the preceding lint pragma
- }
- #if defined(_WIN32) && !defined(_WIN64)
- __except (CILK_ASSERT(!"should not execute the the stub filter"),
- EXCEPTION_EXECUTE_HANDLER)
- {
- // If we are here, that means something very wrong
- // has happened in our exception processing...
- CILK_ASSERT(! "should not be here!");
- }
- #endif
- }
- static void random_steal(__cilkrts_worker *w)
- {
- __cilkrts_worker *victim = NULL;
- cilk_fiber *fiber = NULL;
- int n;
- int success = 0;
- int32_t victim_id;
- // Nothing's been stolen yet. When true, this will flag
- // setup_for_execution_pedigree to increment the pedigree
- w->l->work_stolen = 0;
- /* If the user has disabled stealing (using the debugger) we fail */
- if (__builtin_expect(w->g->stealing_disabled, 0))
- return;
- CILK_ASSERT(w->l->type == WORKER_SYSTEM || w->l->team == w);
- /* If there is only one processor work can still be stolen.
- There must be only one worker to prevent stealing. */
- CILK_ASSERT(w->g->total_workers > 1);
- /* pick random *other* victim */
- n = myrand(w) % (w->g->total_workers - 1);
- if (n >= w->self)
- ++n;
- // If we're replaying a log, override the victim. -1 indicates that
- // we've exhausted the list of things this worker stole when we recorded
- // the log so just return. If we're not replaying a log,
- // replay_get_next_recorded_victim() just returns the victim ID passed in.
- n = replay_get_next_recorded_victim(w, n);
- if (-1 == n)
- return;
- victim = w->g->workers[n];
- START_INTERVAL(w, INTERVAL_FIBER_ALLOCATE) {
- /* Verify that we can get a stack. If not, no need to continue. */
- fiber = cilk_fiber_allocate(&w->l->fiber_pool);
- } STOP_INTERVAL(w, INTERVAL_FIBER_ALLOCATE);
- if (NULL == fiber) {
- #if FIBER_DEBUG >= 2
- fprintf(stderr, "w=%d: failed steal because we could not get a fiber\n",
- w->self);
- #endif
- return;
- }
- /* do not steal from self */
- CILK_ASSERT (victim != w);
- /* Execute a quick check before engaging in the THE protocol.
- Avoid grabbing locks if there is nothing to steal. */
- if (!can_steal_from(victim)) {
- NOTE_INTERVAL(w, INTERVAL_STEAL_FAIL_EMPTYQ);
- START_INTERVAL(w, INTERVAL_FIBER_DEALLOCATE) {
- int ref_count = cilk_fiber_remove_reference(fiber, &w->l->fiber_pool);
- // Fibers we use when trying to steal should not be active,
- // and thus should not have any other references.
- CILK_ASSERT(0 == ref_count);
- } STOP_INTERVAL(w, INTERVAL_FIBER_DEALLOCATE);
- return;
- }
-
- /* Attempt to steal work from the victim */
- if (worker_trylock_other(w, victim)) {
- if (w->l->type == WORKER_USER && victim->l->team != w) {
- // Fail to steal if this is a user worker and the victim is not
- // on this team. If a user worker were allowed to steal work
- // descended from another user worker, the former might not be
- // done with its work by the time it was needed to resume and
- // unbind. Therefore, user workers are not permitted to change
- // teams.
- // There is no race on the victim's team because the victim cannot
- // change its team until it runs out of work to do, at which point
- // it will try to take out its own lock, and this worker already
- // holds it.
- NOTE_INTERVAL(w, INTERVAL_STEAL_FAIL_USER_WORKER);
- } else if (victim->l->frame_ff) {
- // A successful steal will change victim->frame_ff, even
- // though the victim may be executing. Thus, the lock on
- // the victim's deque is also protecting victim->frame_ff.
- if (dekker_protocol(victim)) {
- int proceed_with_steal = 1; // optimistic
- // If we're replaying a log, verify that this the correct frame
- // to steal from the victim
- if (! replay_match_victim_pedigree(w, victim))
- {
- // Abort the steal attempt. decrement_E(victim) to
- // counter the increment_E(victim) done by the
- // dekker protocol
- decrement_E(victim);
- proceed_with_steal = 0;
- }
- if (proceed_with_steal)
- {
- START_INTERVAL(w, INTERVAL_STEAL_SUCCESS) {
- success = 1;
- detach_for_steal(w, victim, fiber);
- victim_id = victim->self;
- #if REDPAR_DEBUG >= 1
- fprintf(stderr, "Wkr %d stole from victim %d, fiber = %p\n",
- w->self, victim->self, fiber);
- #endif
- // The use of victim->self contradicts our
- // classification of the "self" field as
- // local. But since this code is only for
- // debugging, it is ok.
- DBGPRINTF ("%d-%p: Stealing work from worker %d\n"
- " sf: %p, call parent: %p\n",
- w->self, GetCurrentFiber(), victim->self,
- w->l->next_frame_ff->call_stack,
- w->l->next_frame_ff->call_stack->call_parent);
- } STOP_INTERVAL(w, INTERVAL_STEAL_SUCCESS);
- } // end if(proceed_with_steal)
- } else {
- NOTE_INTERVAL(w, INTERVAL_STEAL_FAIL_DEKKER);
- }
- } else {
- NOTE_INTERVAL(w, INTERVAL_STEAL_FAIL_EMPTYQ);
- }
- worker_unlock_other(w, victim);
- } else {
- NOTE_INTERVAL(w, INTERVAL_STEAL_FAIL_LOCK);
- }
- // Record whether work was stolen. When true, this will flag
- // setup_for_execution_pedigree to increment the pedigree
- w->l->work_stolen = success;
- if (0 == success) {
- // failed to steal work. Return the fiber to the pool.
- START_INTERVAL(w, INTERVAL_FIBER_DEALLOCATE) {
- int ref_count = cilk_fiber_remove_reference(fiber, &w->l->fiber_pool);
- // Fibers we use when trying to steal should not be active,
- // and thus should not have any other references.
- CILK_ASSERT(0 == ref_count);
- } STOP_INTERVAL(w, INTERVAL_FIBER_DEALLOCATE);
- }
- else
- {
- // Since our steal was successful, finish initialization of
- // the fiber.
- cilk_fiber_reset_state(fiber,
- fiber_proc_to_resume_user_code_for_random_steal);
- // Record the pedigree of the frame that w has stolen.
- // record only if CILK_RECORD_LOG is set
- replay_record_steal(w, victim_id);
- }
- }
- /**
- * At a provably good steal, we need to transfer the child reducer map
- * from ff->children_reducer_map into v->reducer_map, where v is the
- * worker that resumes execution of ff.
- *
- * Normally, we have v == w, where w is the currently executing
- * worker. In the case where we are resuming a team leader on a user
- * worker, however, v might differ from w.
- * Thus, this, operation is a no-op, since we can't really move
- * ff->children_reducer_map into w here.
- *
- * Instead, this work is done in setup_for_execution_reducers().
- */
- static inline void provably_good_steal_reducers(__cilkrts_worker *w,
- full_frame *ff)
- {
- // No-op.
- }
- /* at a provably good steal, incorporate the accumulated exceptions of
- children into the parent's exception */
- static void provably_good_steal_exceptions(__cilkrts_worker *w,
- full_frame *ff)
- {
- // ASSERT: we own ff->lock
- ff->pending_exception =
- __cilkrts_merge_pending_exceptions(w,
- ff->child_pending_exception,
- ff->pending_exception);
- ff->child_pending_exception = NULL;
- }
- /* At sync discard the frame's old stack and take the leftmost child's. */
- static void provably_good_steal_stacks(__cilkrts_worker *w, full_frame *ff)
- {
- CILK_ASSERT(NULL == ff->fiber_self);
- ff->fiber_self = ff->fiber_child;
- ff->fiber_child = NULL;
- }
- static void __cilkrts_mark_synched(full_frame *ff)
- {
- ff->call_stack->flags &= ~CILK_FRAME_UNSYNCHED;
- ff->simulated_stolen = 0;
- }
- static
- enum provably_good_steal_t provably_good_steal(__cilkrts_worker *w,
- full_frame *ff)
- {
- // ASSERT: we hold w->lock and ff->lock
- enum provably_good_steal_t result = ABANDON_EXECUTION;
- // If the current replay entry is a sync record matching the worker's
- // pedigree, AND this isn't the last child to the sync, return
- // WAIT_FOR_CONTINUE to indicate that the caller should loop until
- // we find the right frame to steal and CONTINUE_EXECUTION is returned.
- int match_found = replay_match_sync_pedigree(w);
- if (match_found && (0 != simulate_decjoin(ff)))
- return WAIT_FOR_CONTINUE;
- START_INTERVAL(w, INTERVAL_PROVABLY_GOOD_STEAL) {
- if (decjoin(ff) == 0) {
- provably_good_steal_reducers(w, ff);
- provably_good_steal_exceptions(w, ff);
- provably_good_steal_stacks(w, ff);
- __cilkrts_mark_synched(ff);
- // If the original owner wants this frame back (to resume
- // it on its original thread) pass it back now.
- if (NULL != ff->sync_master) {
- // The frame wants to go back and be executed by the original
- // user thread. We can throw caution to the wind and push the
- // frame straight onto its queue because the only way we have
- // gotten to this point of being able to continue execution of
- // the frame is if the original user worker is spinning without
- // work.
- unset_sync_master(w->l->team, ff);
- __cilkrts_push_next_frame(w->l->team, ff);
- // If this is the team leader we're not abandoning the work
- if (w == w->l->team)
- result = CONTINUE_EXECUTION;
- } else {
- __cilkrts_push_next_frame(w, ff);
- result = CONTINUE_EXECUTION; // Continue working on this thread
- }
- // The __cilkrts_push_next_frame() call changes ownership
- // of ff to the specified worker.
- }
- } STOP_INTERVAL(w, INTERVAL_PROVABLY_GOOD_STEAL);
- // Only write a SYNC record if:
- // - We're recording a log *AND*
- // - We're the worker continuing from this sync
- replay_record_sync(w, result == CONTINUE_EXECUTION);
- // If we're replaying a log, and matched a sync from the log, mark the
- // sync record seen if the sync isn't going to be abandoned.
- replay_advance_from_sync (w, match_found, result == CONTINUE_EXECUTION);
- return result;
- }
- static void unconditional_steal(__cilkrts_worker *w,
- full_frame *ff)
- {
- // ASSERT: we hold ff->lock
- START_INTERVAL(w, INTERVAL_UNCONDITIONAL_STEAL) {
- decjoin(ff);
- __cilkrts_push_next_frame(w, ff);
- } STOP_INTERVAL(w, INTERVAL_UNCONDITIONAL_STEAL);
- }
- /* CHILD is about to die. Give its exceptions to a sibling or to the
- parent. */
- static inline void splice_exceptions_for_call(__cilkrts_worker *w,
- full_frame *parent_ff,
- full_frame *child_ff)
- {
- // ASSERT: We own parent_ff->lock
- CILK_ASSERT(child_ff->is_call_child);
- CILK_ASSERT(NULL == child_ff->right_pending_exception);
- CILK_ASSERT(NULL == parent_ff->pending_exception);
-
- parent_ff->pending_exception = child_ff->pending_exception;
- child_ff->pending_exception = NULL;
- }
- /**
- * Merge exceptions for a dying child.
- *
- * @param w The currently executing worker.
- * @param ff The child frame that is dying.
- * @param left_exception_ptr Pointer to the exception that is to our left.
- */
- static inline
- void splice_exceptions_for_spawn(__cilkrts_worker *w,
- full_frame *ff,
- struct pending_exception_info **left_exception_ptr)
- {
- // ASSERT: parent_ff == child_ff->parent.
- // ASSERT: We own parent_ff->lock
- // Merge current exception into the slot where the left
- // exception should go.
- *left_exception_ptr =
- __cilkrts_merge_pending_exceptions(w,
- *left_exception_ptr,
- ff->pending_exception);
- ff->pending_exception = NULL;
- // Merge right exception into the slot where the left exception
- // should go.
- *left_exception_ptr =
- __cilkrts_merge_pending_exceptions(w,
- *left_exception_ptr,
- ff->right_pending_exception);
- ff->right_pending_exception = NULL;
- }
- static inline void splice_stacks_for_call(__cilkrts_worker *w,
- full_frame *parent_ff,
- full_frame *child_ff)
- {
- #if CILK_LIB_DEBUG
- if (parent_ff->call_stack)
- CILK_ASSERT(!(parent_ff->call_stack->flags & CILK_FRAME_MBZ));
- #endif
- /* A synched frame does not have accumulated child reducers. */
- CILK_ASSERT(!child_ff->fiber_child);
- CILK_ASSERT(child_ff->is_call_child);
- /* An attached parent has no self fiber. It may have
- accumulated child fibers or child owners, which should be
- ignored until sync. */
- CILK_ASSERT(!parent_ff->fiber_self);
- parent_ff->fiber_self = child_ff->fiber_self;
- child_ff->fiber_self = NULL;
- }
- static void finalize_child_for_call(__cilkrts_worker *w,
- full_frame *parent_ff,
- full_frame *child_ff)
- {
- // ASSERT: we hold w->lock and parent_ff->lock
-
- START_INTERVAL(w, INTERVAL_FINALIZE_CHILD) {
- CILK_ASSERT(child_ff->is_call_child);
- CILK_ASSERT(child_ff->join_counter == 0);
- CILK_ASSERT(!child_ff->rightmost_child);
- CILK_ASSERT(child_ff == parent_ff->rightmost_child);
- // CHILD is about to die.
- // Splicing out reducers is a no-op for a call since
- // w->reducer_map should already store the correct
- // reducer map.
-
- // ASSERT there are no maps left to reduce.
- CILK_ASSERT(NULL == child_ff->children_reducer_map);
- CILK_ASSERT(NULL == child_ff->right_reducer_map);
-
- splice_exceptions_for_call(w, parent_ff, child_ff);
- splice_stacks_for_call(w, parent_ff, child_ff);
- /* remove CHILD from list of children of PARENT */
- unlink_child(parent_ff, child_ff);
- /* continue with the parent. */
- unconditional_steal(w, parent_ff);
- __cilkrts_destroy_full_frame(w, child_ff);
- } STOP_INTERVAL(w, INTERVAL_FINALIZE_CHILD);
- }
- /**
- * The invariant on ff->children_reducer_map is that when ff is
- * synched and when we are about to resume execution of ff, at least
- * one of ff->children_reducer_map and w->reducer_map must be NULL.
- *
- * Consider the two possibilities before resuming execution of ff:
- *
- * 1. Suppose ff is synched and suspended. Then either
- *
- * (a) ff->children_reducer_map stores the reducer map that w
- * should use, where w is the worker resuming execution of ff,
- * OR
- * (b) w already has a user map, and ff->children_reducer_map is NULL.
- *
- * Case (a) happens when we are resuming execution of ff as a
- * provably good steal. In this case, w->reducer_map should be
- * NULL and ff->children_reducer_map is valid. To resume
- * execution of ff on w, set w->reducer_map to
- * ff->children_reducer_map.
- *
- * Case (b) occurs when we resume execution of ff because ff is a
- * called child. Then, ff->children_reducer_map should be NULL,
- * and w should already have a valid reducer map when resuming
- * execution of ff. We resume execution of ff without changing
- * w->reducer_map.
- *
- * 2. Suppose frame ff is not synched (i.e., it is active and might have
- * active children). Then ff->children_reducer_map is the slot for
- * storing the reducer map from ff's leftmost child, as in the reducer
- * protocol. The runtime may resume execution of ff while it is not
- * synched only because of a steal.
- * In this case, while we are resuming ff, ff->children_reducer_map
- * may be non-NULL (because one of ff's children has completed).
- * We resume execution of ff without changing w->reducer_map.
- */
- static void setup_for_execution_reducers(__cilkrts_worker *w,
- full_frame *ff)
- {
- // We only need to move ff->children_reducer_map into
- // w->reducer_map in case 1(a).
- //
- // First check whether ff is synched.
- __cilkrts_stack_frame *sf = ff->call_stack;
- if (!(sf->flags & CILK_FRAME_UNSYNCHED)) {
- // In this case, ff is synched. (Case 1).
- CILK_ASSERT(!ff->rightmost_child);
- // Test whether we are in case 1(a) and have
- // something to do. Note that if both
- // ff->children_reducer_map and w->reducer_map are NULL, we
- // can't distinguish between cases 1(a) and 1(b) here.
- if (ff->children_reducer_map) {
- // We are in Case 1(a).
- CILK_ASSERT(!w->reducer_map);
- w->reducer_map = ff->children_reducer_map;
- ff->children_reducer_map = NULL;
- }
- }
- }
- static void setup_for_execution_exceptions(__cilkrts_worker *w,
- full_frame *ff)
- {
- CILK_ASSERT(NULL == w->l->pending_exception);
- w->l->pending_exception = ff->pending_exception;
- ff->pending_exception = NULL;
- }
- #if 0 /* unused */
- static void setup_for_execution_stack(__cilkrts_worker *w,
- full_frame *ff)
- {
- }
- #endif
- /*
- * setup_for_execution_pedigree
- *
- * Copies the pedigree information from the frame we're resuming to the
- * worker. Increments the pedigree if this is work that has been stolen
- * to match the increment on a return from a spawn helper.
- */
- static void setup_for_execution_pedigree(__cilkrts_worker *w)
- {
- int pedigree_unsynched;
- __cilkrts_stack_frame *sf = w->current_stack_frame;
- CILK_ASSERT(NULL != sf);
- // If this isn't an ABI 1 or later frame, there's no pedigree information
- if (0 == CILK_FRAME_VERSION_VALUE(sf->flags))
- return;
- // Note whether the pedigree is unsynched and clear the flag before
- // we forget
- pedigree_unsynched = sf->flags & CILK_FRAME_SF_PEDIGREE_UNSYNCHED;
- sf->flags &= ~CILK_FRAME_SF_PEDIGREE_UNSYNCHED;
- // If we're just marshalling onto this worker, do not increment
- // the rank since that wouldn't happen in a sequential execution
- if (w->l->work_stolen || pedigree_unsynched)
- {
- if (w->l->work_stolen)
- w->pedigree.rank = sf->parent_pedigree.rank + 1;
- else
- w->pedigree.rank = sf->parent_pedigree.rank;
- }
- w->pedigree.parent = sf->parent_pedigree.parent;
- w->l->work_stolen = 0;
- }
- static void setup_for_execution(__cilkrts_worker *w,
- full_frame *ff,
- int is_return_from_call)
- {
- // ASSERT: We own w->lock and ff->lock || P == 1
- setup_for_execution_reducers(w, ff);
- setup_for_execution_exceptions(w, ff);
- /*setup_for_execution_stack(w, ff);*/
- ff->call_stack->worker = w;
- w->current_stack_frame = ff->call_stack;
- // If this is a return from a call, leave the pedigree alone
- if (! is_return_from_call)
- setup_for_execution_pedigree(w);
- __cilkrts_setup_for_execution_sysdep(w, ff);
- w->head = w->tail = w->l->ltq;
- reset_THE_exception(w);
- make_runnable(w, ff);
- }
- /*
- * Called by the scheduling fiber, right before
- * resuming a sf/ff for user code.
- *
- * This method associates the specified sf with the worker.
- *
- * It also asserts that w, ff, and sf all have the expected properties
- * for resuming user code.
- */
- void scheduling_fiber_prepare_to_resume_user_code(__cilkrts_worker *w,
- full_frame *ff,
- __cilkrts_stack_frame *sf)
- {
- w->current_stack_frame = sf;
- sf->worker = w;
- // Lots of debugging checks on the state of the fiber we might be
- // resuming.
- #if FIBER_DEBUG >= 1
- # if FIBER_DEBUG >= 3
- {
- fprintf(stderr, "w=%d: ff=%p, sf=%p. about to resume user code\n",
- w->self, ff, sf);
- }
- # endif
- const int flags = sf->flags;
- CILK_ASSERT(flags & CILK_FRAME_SUSPENDED);
- CILK_ASSERT(!sf->call_parent);
- CILK_ASSERT(w->head == w->tail);
- /* A frame can not be resumed unless it was suspended. */
- CILK_ASSERT(ff->sync_sp != NULL);
- /* The leftmost frame has no allocated stack */
- if (ff->simulated_stolen)
- CILK_ASSERT(flags & CILK_FRAME_UNSYNCHED);
- else if (flags & CILK_FRAME_UNSYNCHED)
- /* XXX By coincidence sync_sp could be null. */
- CILK_ASSERT(ff->fiber_self != NULL);
- else
- /* XXX This frame could be resumed unsynched on the leftmost stack */
- CILK_ASSERT((ff->sync_master == 0 || ff->sync_master == w));
- CILK_ASSERT(w->l->frame_ff == ff);
- #endif
- }
- /**
- * This method is the first method that should execute after we've
- * switched to a scheduling fiber from user code.
- *
- * @param fiber The scheduling fiber for the current worker.
- * @param wptr The current worker.
- */
- static void enter_runtime_transition_proc(cilk_fiber *fiber)
- {
- // We can execute this method for one of three reasons:
- // 1. Undo-detach finds parent stolen.
- // 2. Sync suspends frame.
- // 3. Return from Cilk entry point.
- //
- //
- // In cases 1 and 2, the frame may be truly suspended or
- // may be immediately executed by this worker after provably_good_steal.
- //
- //
- // There is a fourth case, which can, but does not need to execute
- // this function:
- // 4. Starting up the scheduling loop on a user or
- // system worker. In this case, we won't have
- // a scheduling stack function to run.
- __cilkrts_worker* w = cilk_fiber_get_owner(fiber);
- if (w->l->post_suspend) {
- // Run the continuation function passed to longjmp_into_runtime
- run_scheduling_stack_fcn(w);
- // After we have jumped into the runtime and run the
- // scheduling function, any reducer map the worker had before entering the runtime
- // should have already been saved into the appropriate full
- // frame.
- CILK_ASSERT(NULL == w->reducer_map);
- // There shouldn't be any uncaught exceptions.
- //
- // In Windows, the OS catches any exceptions not caught by the
- // user code. Thus, we are omitting the check on Windows.
- //
- // On Android, calling std::uncaught_exception with the stlport
- // library causes a seg fault. Since we're not supporting
- // exceptions there at this point, just don't do the check
- //
- // TBD: Is this check also safe to do on Windows?
- CILKBUG_ASSERT_NO_UNCAUGHT_EXCEPTION();
- }
- }
- /**
- * Method called to jump back to executing user code.
- *
- * A normal return from the runtime back to resuming user code calls
- * this method. A computation executed using force_reduce also calls
- * this method to return to user code.
- *
- * This function should not contain any code that depends on a fiber.
- * In a force-reduce case, the user worker may not have a fiber. In
- * the force-reduce case, we call this method directly instead of
- * calling @c user_code_resume_after_switch_into_runtime.
- */
- static inline NORETURN
- cilkrts_resume(__cilkrts_stack_frame *sf, full_frame *ff)
- {
- // Save the sync stack pointer, and do the bookkeeping
- char* sync_sp = ff->sync_sp;
- __cilkrts_take_stack(ff, sync_sp); // leaves ff->sync_sp null
- sf->flags &= ~CILK_FRAME_SUSPENDED;
- // Actually longjmp to the user code.
- // We may have exceptions to deal with, since we are resuming
- // a previous-suspended frame.
- sysdep_longjmp_to_sf(sync_sp, sf, ff);
- }
- /**
- * Called by the user-code fiber right before resuming a full frame
- * (sf/ff).
- *
- * This method pulls sf/ff out of the worker, and then calls
- * cilkrts_resume to jump to user code.
- */
- static NORETURN
- user_code_resume_after_switch_into_runtime(cilk_fiber *fiber)
- {
- __cilkrts_worker *w = cilk_fiber_get_owner(fiber);
- __cilkrts_stack_frame *sf;
- full_frame *ff;
- sf = w->current_stack_frame;
- ff = sf->worker->l->frame_ff;
- #if FIBER_DEBUG >= 1
- CILK_ASSERT(ff->fiber_self == fiber);
- cilk_fiber_data *fdata = cilk_fiber_get_data(fiber);
- DBGPRINTF ("%d-%p: resume_after_switch_into_runtime, fiber=%p\n",
- w->self, w, fiber);
- CILK_ASSERT(sf == fdata->resume_sf);
- #endif
- // Notify the Intel tools that we're stealing code
- ITT_SYNC_ACQUIRED(sf->worker);
- NOTIFY_ZC_INTRINSIC("cilk_continue", sf);
- cilk_fiber_invoke_tbb_stack_op(fiber, CILK_TBB_STACK_ADOPT);
- // Actually jump to user code.
- cilkrts_resume(sf, ff);
- }
- /* The current stack is about to either be suspended or destroyed. This
- * function will switch to the stack on which the scheduler is suspended and
- * resume running the scheduler within function do_work(). Upon waking up,
- * the scheduler will run the 'cont' function, using the supplied worker and
- * frame.
- */
- static NORETURN
- longjmp_into_runtime(__cilkrts_worker *w,
- scheduling_stack_fcn_t fcn,
- __cilkrts_stack_frame *sf)
- {
- full_frame *ff, *ff2;
- CILK_ASSERT(!w->l->post_suspend);
- ff = w->l->frame_ff;
- // If we've got only one worker, stealing shouldn't be possible.
- // Assume that this is a steal or return from spawn in a force-reduce case.
- // We don't have a scheduling stack to switch to, so call the continuation
- // function directly.
- if (1 == w->g->P) {
- fcn(w, ff, sf);
- /* The call to function c() will have pushed ff as the next frame. If
- * this were a normal (non-forced-reduce) execution, there would have
- * been a pop_next_frame call in a separate part of the runtime. We
- * must call pop_next_frame here to complete the push/pop cycle. */
- ff2 = pop_next_frame(w);
- setup_for_execution(w, ff2, 0);
- scheduling_fiber_prepare_to_resume_user_code(w, ff2, w->current_stack_frame);
- cilkrts_resume(w->current_stack_frame, ff2);
-
- // Suppress clang warning that the expression result is unused
- #if defined(__clang__) && (! defined(__INTEL_COMPILER))
- # pragma clang diagnostic push
- # pragma clang diagnostic ignored "-Wunused-value"
- #endif // __clang__
- /* no return */
- CILK_ASSERT(((void)"returned from __cilkrts_resume", 0));
- #if defined(__clang__) && (! defined(__INTEL_COMPILER))
- # pragma clang diagnostic pop
- #endif // __clang__
- }
- w->l->post_suspend = fcn;
- w->l->suspended_stack = sf;
- ITT_SYNC_RELEASING(w);
- ITT_SYNC_PREPARE(w);
- #if FIBER_DEBUG >= 2
- fprintf(stderr, "ThreadId=%p, W=%d: about to switch into runtime... w->l->frame_ff = %p, sf=%p\n",
- cilkos_get_current_thread_id(),
- w->self, w->l->frame_ff,
- sf);
- #endif
- // Current fiber is either the (1) one we are about to free,
- // or (2) it has been passed up to the parent.
- cilk_fiber *current_fiber = ( w->l->fiber_to_free ?
- w->l->fiber_to_free :
- w->l->frame_ff->parent->fiber_child );
- cilk_fiber_data* fdata = cilk_fiber_get_data(current_fiber);
- CILK_ASSERT(NULL == w->l->frame_ff->fiber_self);
- // Clear the sf in the current fiber for cleanliness, to prevent
- // us from accidentally resuming a bad sf.
- // Technically, resume_sf gets overwritten for a fiber when
- // we are about to resume it anyway.
- fdata->resume_sf = NULL;
- CILK_ASSERT(fdata->owner == w);
- // Set the function to execute immediately after switching to the
- // scheduling fiber, but before freeing any fibers.
- cilk_fiber_set_post_switch_proc(w->l->scheduling_fiber,
- enter_runtime_transition_proc);
- cilk_fiber_invoke_tbb_stack_op(current_fiber, CILK_TBB_STACK_ORPHAN);
-
- if (w->l->fiber_to_free) {
- // Case 1: we are freeing this fiber. We never
- // resume this fiber again after jumping into the runtime.
- w->l->fiber_to_free = NULL;
- // Extra check. Normally, the fiber we are about to switch to
- // should have a NULL owner.
- CILK_ASSERT(NULL == cilk_fiber_get_data(w->l->scheduling_fiber)->owner);
- #if FIBER_DEBUG >= 4
- fprintf(stderr, "ThreadId=%p, W=%d: about to switch into runtime.. current_fiber = %p, deallcoate, switch to fiber %p\n",
- cilkos_get_current_thread_id(),
- w->self,
- current_fiber, w->l->scheduling_fiber);
- #endif
- cilk_fiber_invoke_tbb_stack_op(current_fiber, CILK_TBB_STACK_RELEASE);
- NOTE_INTERVAL(w, INTERVAL_DEALLOCATE_RESUME_OTHER);
- cilk_fiber_remove_reference_from_self_and_resume_other(current_fiber,
- &w->l->fiber_pool,
- w->l->scheduling_fiber);
- // We should never come back here!
- CILK_ASSERT(0);
- }
- else {
- // Case 2: We are passing the fiber to our parent because we
- // are leftmost. We should come back later to
- // resume execution of user code.
- //
- // If we are not freeing a fiber, there we must be
- // returning from a spawn or processing an exception. The
- // "sync" path always frees a fiber.
- //
- // We must be the leftmost child, and by left holder logic, we
- // have already moved the current fiber into our parent full
- // frame.
- #if FIBER_DEBUG >= 2
- fprintf(stderr, "ThreadId=%p, W=%d: about to suspend self into runtime.. current_fiber = %p, deallcoate, switch to fiber %p\n",
- cilkos_get_current_thread_id(),
- w->self,
- current_fiber, w->l->scheduling_fiber);
- #endif
- NOTE_INTERVAL(w, INTERVAL_SUSPEND_RESUME_OTHER);
- cilk_fiber_suspend_self_and_resume_other(current_fiber,
- w->l->scheduling_fiber);
- // Resuming this fiber returns control back to
- // this function because our implementation uses OS fibers.
- //
- // On Unix, we could have the choice of passing the
- // user_code_resume_after_switch_into_runtime as an extra "resume_proc"
- // that resumes execution of user code instead of the
- // jumping back here, and then jumping back to user code.
- #if FIBER_DEBUG >= 2
- CILK_ASSERT(fdata->owner == __cilkrts_get_tls_worker());
- #endif
- user_code_resume_after_switch_into_runtime(current_fiber);
- }
- }
- /*
- * Send a message to the children of the specified worker: run or wait.
- */
- static void notify_children(__cilkrts_worker *w, unsigned int msg)
- {
- int child_num;
- __cilkrts_worker *child;
- int num_sys_workers = w->g->P - 1;
- // If worker is "n", then its children are 2n + 1, and 2n + 2.
- child_num = (w->self << 1) + 1;
- if (child_num < num_sys_workers) {
- child = w->g->workers[child_num];
- CILK_ASSERT(child->l->signal_node);
- signal_node_msg(child->l->signal_node, msg);
- child_num++;
- if (child_num < num_sys_workers) {
- child = w->g->workers[child_num];
- CILK_ASSERT(child->l->signal_node);
- signal_node_msg(child->l->signal_node, msg);
- }
- }
- }
- /*
- * Notify this worker's children that they need to wait.
- */
- static void notify_children_wait(__cilkrts_worker *w)
- {
- notify_children(w, 0);
- }
- /*
- * Notify this worker's children to run and start trying to steal.
- */
- static void notify_children_run(__cilkrts_worker *w)
- {
- notify_children(w, 1);
- }
- /**
- * A single "check" to find work, either on our queue or through a
- * steal attempt. This method checks our local queue once, and
- * performs one steal attempt.
- */
- static full_frame* check_for_work(__cilkrts_worker *w)
- {
- full_frame *ff = NULL;
- ff = pop_next_frame(w);
- // If there is no work on the queue, try to steal some.
- if (NULL == ff) {
- START_INTERVAL(w, INTERVAL_STEALING) {
- if (w->l->type != WORKER_USER && w->l->team != NULL) {
- // At this point, the worker knows for certain that it has run
- // out of work. Therefore, it loses its team affiliation. User
- // workers never change teams, of course.
- __cilkrts_worker_lock(w);
- w->l->team = NULL;
- __cilkrts_worker_unlock(w);
- }
- // If we are about to do a random steal, we should have no
- // full frame...
- CILK_ASSERT(NULL == w->l->frame_ff);
- random_steal(w);
- } STOP_INTERVAL(w, INTERVAL_STEALING);
- // If the steal was successful, then the worker has populated its next
- // frame with the work to resume.
- ff = pop_next_frame(w);
- if (NULL == ff) {
- // Punish the worker for failing to steal.
- // No quantum for you!
- __cilkrts_yield();
- w->l->steal_failure_count++;
- } else {
- // Reset steal_failure_count since there is obviously still work to
- // be done.
- w->l->steal_failure_count = 0;
- }
- }
- return ff;
- }
- /**
- * Keep stealing or looking on our queue.
- *
- * Returns either when a full frame is found, or NULL if the
- * computation is done.
- */
- static full_frame* search_until_work_found_or_done(__cilkrts_worker *w)
- {
- full_frame *ff = NULL;
- // Find a full frame to execute (either through random stealing,
- // or because we pull it off w's 1-element queue).
- while (!ff) {
- // Check worker state to figure out our next action.
- switch (worker_runnable(w))
- {
- case SCHEDULE_RUN: // One attempt at checking for work.
- ff = check_for_work(w);
- break;
- case SCHEDULE_WAIT: // go into wait-mode.
- CILK_ASSERT(WORKER_SYSTEM == w->l->type);
- // If we are about to wait, then we better not have
- // a frame that we should execute...
- CILK_ASSERT(NULL == w->l->next_frame_ff);
- notify_children_wait(w);
- signal_node_wait(w->l->signal_node);
- // ...
- // Runtime is waking up.
- notify_children_run(w);
- w->l->steal_failure_count = 0;
- break;
- case SCHEDULE_EXIT: // exit the scheduler.
- CILK_ASSERT(WORKER_USER != w->l->type);
- return NULL;
- default:
- CILK_ASSERT(0);
- abort();
- }
- }
- return ff;
- }
- /**
- * The proc method for a scheduling fiber on a user worker.
- *
- * When a user worker jumps into the runtime, it jumps into this
- * method by either starting it if the scheduling fiber has never run
- * before, or resuming the fiber if it was previously suspended.
- */
- COMMON_PORTABLE
- void scheduler_fiber_proc_for_user_worker(cilk_fiber *fiber)
- {
- __cilkrts_worker* w = cilk_fiber_get_owner(fiber);
- CILK_ASSERT(w);
- // This must be a user worker
- CILK_ASSERT(WORKER_USER == w->l->type);
- // If we aren't the current worker, then something is very wrong
- // here..
- verify_current_wkr(w);
- __cilkrts_run_scheduler_with_exceptions(w);
- }
- /**
- * The body of the runtime scheduling loop. This function executes in
- * 4 stages:
- *
- * 1. Transitions from the user code into the runtime by
- * executing any scheduling-stack functions.
- * 2. Looks for a full frame enqueued from a successful provably
- * good steal.
- * 3. If no full frame is found in step 2, steal until
- * a frame is found or we are done. If we are done, finish
- * the scheduling loop.
- * 4. When a frame is found, setup to resume user code.
- * In particular, suspend the current fiber and resume the
- * user fiber to execute the frame.
- *
- * Returns a fiber object that we should switch to after completing
- * the body of the loop, or NULL if we should continue executing on
- * this fiber.
- *
- * @pre @c current_fiber should equal @c wptr->l->scheduling_fiber
- *
- * @param current_fiber The currently executing (scheduling_ fiber
- * @param wptr The currently executing worker.
- * @param return The next fiber we should switch to.
- */
- static cilk_fiber* worker_scheduling_loop_body(cilk_fiber* current_fiber,
- void* wptr)
- {
- __cilkrts_worker *w = (__cilkrts_worker*) wptr;
- CILK_ASSERT(current_fiber == w->l->scheduling_fiber);
- // Stage 1: Transition from executing user code to the runtime code.
- // We don't need to do this call here any more, because
- // every switch to the scheduling fiber should make this call
- // using a post_switch_proc on the fiber.
- //
- // enter_runtime_transition_proc(w->l->scheduling_fiber, wptr);
- // After Stage 1 is complete, w should no longer have
- // an associated full frame.
- CILK_ASSERT(NULL == w->l->frame_ff);
- // Stage 2. First do a quick check of our 1-element queue.
- full_frame *ff = pop_next_frame(w);
- if (!ff) {
- // Stage 3. We didn't find anything from our 1-element
- // queue. Now go through the steal loop to find work.
- ff = search_until_work_found_or_done(w);
- if (!ff) {
- CILK_ASSERT(w->g->work_done);
- return NULL;
- }
- }
- // Stage 4. Now that we have found a full frame to work on,
- // actually execute it.
- __cilkrts_stack_frame *sf;
- // There shouldn't be any uncaught exceptions.
- //
- // In Windows, the OS catches any exceptions not caught by the
- // user code. Thus, we are omitting the check on Windows.
- //
- // On Android, calling std::uncaught_exception with the stlport
- // library causes a seg fault. Since we're not supporting
- // exceptions there at this point, just don't do the check
- CILKBUG_ASSERT_NO_UNCAUGHT_EXCEPTION();
- BEGIN_WITH_WORKER_LOCK(w) {
- CILK_ASSERT(!w->l->frame_ff);
- BEGIN_WITH_FRAME_LOCK(w, ff) {
- sf = ff->call_stack;
- CILK_ASSERT(sf && !sf->call_parent);
- setup_for_execution(w, ff, 0);
- } END_WITH_FRAME_LOCK(w, ff);
- } END_WITH_WORKER_LOCK(w);
- /* run it */
- //
- // Prepare to run the full frame. To do so, we need to:
- // (a) Execute some code on this fiber (the scheduling
- // fiber) to set up data structures, and
- // (b) Suspend the scheduling fiber, and resume the
- // user-code fiber.
- // Part (a). Set up data structures.
- scheduling_fiber_prepare_to_resume_user_code(w, ff, sf);
- cilk_fiber *other = w->l->frame_ff->fiber_self;
- cilk_fiber_data* other_data = cilk_fiber_get_data(other);
- cilk_fiber_data* current_fiber_data = cilk_fiber_get_data(current_fiber);
- // I believe two cases are possible here, both of which
- // should have other_data->resume_sf as NULL.
- //
- // 1. Resuming a fiber that was previously executing
- // user code (i.e., a provably-good-steal).
- // In this case, resume_sf should have been
- // set to NULL when it was suspended.
- //
- // 2. Resuming code on a steal. In this case, since we
- // grabbed a new fiber, resume_sf should be NULL.
- CILK_ASSERT(NULL == other_data->resume_sf);
-
- #if FIBER_DEBUG >= 2
- fprintf(stderr, "W=%d: other fiber=%p, setting resume_sf to %p\n",
- w->self, other, other_data->resume_sf);
- #endif
- // Update our own fiber's data.
- current_fiber_data->resume_sf = NULL;
- // The scheduling fiber should have the right owner from before.
- CILK_ASSERT(current_fiber_data->owner == w);
- other_data->resume_sf = sf;
-
- #if FIBER_DEBUG >= 3
- fprintf(stderr, "ThreadId=%p (about to suspend self resume other), W=%d: current_fiber=%p, other=%p, current_fiber->resume_sf = %p, other->resume_sf = %p\n",
- cilkos_get_current_thread_id(),
- w->self,
- current_fiber, other,
- current_fiber_data->resume_sf,
- other_data->resume_sf);
- #endif
- return other;
- }
- /**
- * This function is executed once by each worker, to initialize its
- * scheduling loop.
- */
- static void worker_scheduler_init_function(__cilkrts_worker *w)
- {
- // First, execute the startup tasks that must happen for all
- // worker types.
- ITT_SYNC_PREPARE(w);
- /* Notify tools about the new worker. Inspector needs this, but we
- don't want to confuse Cilkscreen with system threads. User threads
- do this notification in bind_thread */
- if (! w->g->under_ptool)
- __cilkrts_cilkscreen_establish_worker(w);
- // Seed the initial random number generator.
- // If we forget to do this, then the worker always steals from 0.
- // Programs will still execute correctly, but
- // you may see a subtle performance bug...
- mysrand(w, (w->self + 1));
- // The startup work varies, depending on the worker type.
- switch (w->l->type) {
- case WORKER_USER:
- // Stop working once we've entered the scheduler.
- // For user workers, INTERVAL_IN_SCHEDULER counts the time
- // since we called bind_thread.
- break;
- case WORKER_SYSTEM:
- // If a system worker is starting, we must also be starting
- // the runtime.
- // Runtime begins in a wait-state and is woken up by the first user
- // worker when the runtime is ready.
- signal_node_wait(w->l->signal_node);
- // ...
- // Runtime is waking up.
- notify_children_run(w);
- w->l->steal_failure_count = 0;
- // For system threads, count all the time this thread is
- // alive in the scheduling loop.
- START_INTERVAL(w, INTERVAL_IN_SCHEDULER);
- START_INTERVAL(w, INTERVAL_WORKING);
- break;
- default:
- __cilkrts_bug("Unknown worker %p of type %d entering scheduling loop\n",
- w, w->l->type);
- }
- }
- /**
- * This function is executed once by each worker, to finish its
- * scheduling loop.
- *
- * @note Currently, only system workers finish their loops. User
- * workers will jump away to user code without exiting their
- * scheduling loop.
- */
- static void worker_scheduler_terminate_function(__cilkrts_worker *w)
- {
- // A user worker should never finish by falling through the
- // scheduling loop.
- CILK_ASSERT(WORKER_USER != w->l->type);
- STOP_INTERVAL(w, INTERVAL_IN_RUNTIME);
- STOP_INTERVAL(w, INTERVAL_IN_SCHEDULER);
- }
- /**
- * The main scheduler function executed by a worker's scheduling
- * fiber.
- *
- * This method is started by either a new system worker, or a user
- * worker that has stalled and just been imported into the runtime.
- */
- static void worker_scheduler_function(__cilkrts_worker *w)
- {
- worker_scheduler_init_function(w);
- // The main scheduling loop body.
- while (!w->g->work_done) {
- // Set intervals. Now we are in the runtime instead of working.
- START_INTERVAL(w, INTERVAL_IN_RUNTIME);
- STOP_INTERVAL(w, INTERVAL_WORKING);
- // Execute the "body" of the scheduling loop, and figure
- // out the fiber to jump to next.
- cilk_fiber* fiber_to_resume
- = worker_scheduling_loop_body(w->l->scheduling_fiber, w);
- if (fiber_to_resume) {
- // Suspend the current fiber and resume next one.
- NOTE_INTERVAL(w, INTERVAL_SUSPEND_RESUME_OTHER);
- STOP_INTERVAL(w, INTERVAL_IN_RUNTIME);
- START_INTERVAL(w, INTERVAL_WORKING);
- cilk_fiber_suspend_self_and_resume_other(w->l->scheduling_fiber,
- fiber_to_resume);
- // Return here only when this (scheduling) fiber is
- // resumed (i.e., this worker wants to reenter the runtime).
- }
- }
- // Finish the scheduling loop.
- worker_scheduler_terminate_function(w);
- }
- /*************************************************************
- Forward declarations for reduction protocol.
- *************************************************************/
- static __cilkrts_worker*
- execute_reductions_for_sync(__cilkrts_worker *w,
- full_frame *ff,
- __cilkrts_stack_frame *sf_at_sync);
- static __cilkrts_worker*
- execute_reductions_for_spawn_return(__cilkrts_worker *w,
- full_frame *ff,
- __cilkrts_stack_frame *returning_sf);
-
- /*************************************************************
- Scheduler functions that are callable by client code
- *************************************************************/
- static full_frame *disown(__cilkrts_worker *w,
- full_frame *ff,
- __cilkrts_stack_frame *sf,
- const char *why)
- {
- CILK_ASSERT(ff);
- make_unrunnable(w, ff, sf, sf != 0, why);
- w->l->frame_ff = 0;
- return ff->parent;
- }
- /**
- * Called when ff is returning from a spawn, and we need to execute a
- * reduction.
- *
- * @param w The currently executing worker.
- * @param ff The full frame for w.
- * @param returning_sf The stack frame for the spawn helper that is returning.
- *
- * Normally, by the time we gain control in the runtime, the worker
- * has already popped off the __cilkrts_stack_frame "returning_sf"
- * from its call chain.
- *
- * When we have only serial reductions, w->current_stack_frame is not
- * needed any more, because w is about to enter the runtime scheduling
- * loop anyway. Similarly, the frame "ff" is slated to be destroyed
- * after the runtime finishes the return from spawn and splices ff out
- * of the tree of full frames.
- *
- * To execute a parallel reduction, however, we still want
- * w->current_stack_frame == returning_sf, and we are going to use the
- * frame ff for a little bit longer.
- *
- * This method:
- *
- * 1. Puts returning_sf back as w's current stack frame.
- * 2. Makes "ff" runnable again on w.
- */
- static inline
- void restore_frame_for_spawn_return_reduction(__cilkrts_worker *w,
- full_frame *ff,
- __cilkrts_stack_frame *returning_sf) {
- #if REDPAR_DEBUG >= 2
- CILK_ASSERT(returning_sf);
- CILK_ASSERT(returning_sf->worker == w);
- #endif
- // Change w's current stack frame back to "returning_sf".
- //
- // Intuitively, w->current_stack_frame should be
- // returning_sf->call_parent at this point.
- //
- // We can not assert this, however, because the pop of
- // returning_sf from the call chain has already cleared
- // returning_sf->call_parent. We don't want to restore the call
- // parent of returning_sf, because its parent has been stolen, and
- // the runtime assumes that steals break this link.
- // We cannot assert call_parent is NULL either, since that's not true for
- // Win64 exception handling
- // CILK_ASSERT(returning_sf->call_parent == NULL);
- w->current_stack_frame = returning_sf;
- // Make the full frame "ff" runnable again, in preparation for
- // executing the reduction.
- make_runnable(w, ff);
- }
- NORETURN __cilkrts_c_sync(__cilkrts_worker *w,
- __cilkrts_stack_frame *sf_at_sync)
- {
- full_frame *ff;
- // Claim: This read of w->l->frame_ff can occur without
- // holding the worker lock because when w has reached a sync
- // and entered the runtime (because it stalls), w's deque is empty
- // and no one else can steal and change w->l->frame_ff.
- ff = w->l->frame_ff;
- #ifdef _WIN32
- __cilkrts_save_exception_state(w, ff);
- #else
- // Move any pending exceptions into the full frame
- CILK_ASSERT(NULL == ff->pending_exception);
- ff->pending_exception = w->l->pending_exception;
- w->l->pending_exception = NULL;
- #endif
-
- w = execute_reductions_for_sync(w, ff, sf_at_sync);
- #if FIBER_DEBUG >= 3
- fprintf(stderr, "ThreadId=%p, w->self = %d. about to longjmp_into_runtim[c_sync] with ff=%p\n",
- cilkos_get_current_thread_id(), w->self, ff);
- #endif
- longjmp_into_runtime(w, do_sync, sf_at_sync);
- }
- static void do_sync(__cilkrts_worker *w, full_frame *ff,
- __cilkrts_stack_frame *sf)
- {
- //int abandoned = 1;
- enum provably_good_steal_t steal_result = ABANDON_EXECUTION;
- START_INTERVAL(w, INTERVAL_SYNC_CHECK) {
- BEGIN_WITH_WORKER_LOCK_OPTIONAL(w) {
- CILK_ASSERT(ff);
- BEGIN_WITH_FRAME_LOCK(w, ff) {
- CILK_ASSERT(sf->call_parent == 0);
- CILK_ASSERT(sf->flags & CILK_FRAME_UNSYNCHED);
- // Before switching into the scheduling fiber, we should have
- // already taken care of deallocating the current
- // fiber.
- CILK_ASSERT(NULL == ff->fiber_self);
- // Update the frame's pedigree information if this is an ABI 1
- // or later frame
- if (CILK_FRAME_VERSION_VALUE(sf->flags) >= 1)
- {
- sf->parent_pedigree.rank = w->pedigree.rank;
- sf->parent_pedigree.parent = w->pedigree.parent;
- // Note that the pedigree rank needs to be updated
- // when setup_for_execution_pedigree runs
- sf->flags |= CILK_FRAME_SF_PEDIGREE_UNSYNCHED;
- }
- /* the decjoin() occurs in provably_good_steal() */
- steal_result = provably_good_steal(w, ff);
- } END_WITH_FRAME_LOCK(w, ff);
- // set w->l->frame_ff = NULL after checking abandoned
- if (WAIT_FOR_CONTINUE != steal_result) {
- w->l->frame_ff = NULL;
- }
- } END_WITH_WORKER_LOCK_OPTIONAL(w);
- } STOP_INTERVAL(w, INTERVAL_SYNC_CHECK);
- // Now, if we are in a replay situation and provably_good_steal() returned
- // WAIT_FOR_CONTINUE, we should sleep, reacquire locks, call
- // provably_good_steal(), and release locks until we get a value other
- // than WAIT_FOR_CONTINUE from the function.
- #ifdef CILK_RECORD_REPLAY
- // We don't have to explicitly check for REPLAY_LOG below because
- // steal_result can only be set to WAIT_FOR_CONTINUE during replay
- while(WAIT_FOR_CONTINUE == steal_result)
- {
- __cilkrts_sleep();
- BEGIN_WITH_WORKER_LOCK_OPTIONAL(w)
- {
- ff = w->l->frame_ff;
- BEGIN_WITH_FRAME_LOCK(w, ff)
- {
- steal_result = provably_good_steal(w, ff);
- } END_WITH_FRAME_LOCK(w, ff);
- if (WAIT_FOR_CONTINUE != steal_result)
- w->l->frame_ff = NULL;
- } END_WITH_WORKER_LOCK_OPTIONAL(w);
- }
- #endif // CILK_RECORD_REPLAY
- #ifdef ENABLE_NOTIFY_ZC_INTRINSIC
- // If we can't make any further progress on this thread, tell Inspector
- // that we're abandoning the work and will go find something else to do.
- if (ABANDON_EXECUTION == steal_result)
- {
- NOTIFY_ZC_INTRINSIC("cilk_sync_abandon", 0);
- }
- #endif // defined ENABLE_NOTIFY_ZC_INTRINSIC
- return; /* back to scheduler loop */
- }
- /* worker W completely promotes its own deque, simulating the case
- where the whole deque is stolen. We use this mechanism to force
- the allocation of new storage for reducers for race-detection
- purposes. */
- void __cilkrts_promote_own_deque(__cilkrts_worker *w)
- {
- // Remember the fiber we start this method on.
- CILK_ASSERT(w->l->frame_ff);
- cilk_fiber* starting_fiber = w->l->frame_ff->fiber_self;
-
- BEGIN_WITH_WORKER_LOCK(w) {
- while (dekker_protocol(w)) {
- /* PLACEHOLDER_FIBER is used as non-null marker to tell detach()
- and make_child() that this frame should be treated as a spawn
- parent, even though we have not assigned it a stack. */
- detach_for_steal(w, w, PLACEHOLDER_FIBER);
- }
- } END_WITH_WORKER_LOCK(w);
- // TBD: The management of full frames and fibers is a bit
- // sketchy here. We are promoting stack frames into full frames,
- // and pretending they are stolen away, but no other worker is
- // actually working on them. Some runtime invariants
- // may be broken here.
- //
- // Technically, if we are simulating a steal from w
- // w should get a new full frame, but
- // keep the same fiber. A real thief would be taking the
- // loot frame away, get a new fiber, and starting executing the
- // loot frame.
- //
- // What should a fake thief do? Where does the frame go?
- // In any case, we should be finishing the promotion process with
- // the same fiber with.
- CILK_ASSERT(w->l->frame_ff);
- CILK_ASSERT(w->l->frame_ff->fiber_self == starting_fiber);
- }
- /* the client code calls this function after a spawn when the dekker
- protocol fails. The function may either return or longjmp
- into the rts
- This function takes in a "returning_sf" argument which corresponds
- to the __cilkrts_stack_frame that we are finishing (i.e., the
- argument to __cilkrts_leave_frame).
- */
- void __cilkrts_c_THE_exception_check(__cilkrts_worker *w,
- __cilkrts_stack_frame *returning_sf)
- {
- full_frame *ff;
- int stolen_p;
- __cilkrts_stack_frame *saved_sf = NULL;
- START_INTERVAL(w, INTERVAL_THE_EXCEPTION_CHECK);
- BEGIN_WITH_WORKER_LOCK(w) {
- ff = w->l->frame_ff;
- CILK_ASSERT(ff);
- /* This code is called only upon a normal return and never
- upon an exceptional return. Assert that this is the
- case. */
- CILK_ASSERT(!w->l->pending_exception);
- reset_THE_exception(w);
- stolen_p = !(w->head < (w->tail + 1)); /* +1 because tail was
- speculatively
- decremented by the
- compiled code */
- if (stolen_p) {
- /* XXX This will be charged to THE for accounting purposes */
- __cilkrts_save_exception_state(w, ff);
- // Save the value of the current stack frame.
- saved_sf = w->current_stack_frame;
- // Reverse the decrement from undo_detach.
- // This update effectively resets the deque to be
- // empty (i.e., changes w->tail back to equal w->head).
- // We need to reset the deque to execute parallel
- // reductions. When we have only serial reductions, it
- // does not matter, since serial reductions do not
- // change the deque.
- w->tail++;
- #if REDPAR_DEBUG > 1
- // ASSERT our deque is empty.
- CILK_ASSERT(w->head == w->tail);
- #endif
- }
- } END_WITH_WORKER_LOCK(w);
- STOP_INTERVAL(w, INTERVAL_THE_EXCEPTION_CHECK);
- if (stolen_p)
- {
- w = execute_reductions_for_spawn_return(w, ff, returning_sf);
- // "Mr. Policeman? My parent always told me that if I was in trouble
- // I should ask a nice policeman for help. I can't find my parent
- // anywhere..."
- //
- // Write a record to the replay log for an attempt to return to a stolen parent
- replay_record_orphaned(w);
- // Update the pedigree only after we've finished the
- // reductions.
- update_pedigree_on_leave_frame(w, returning_sf);
- // Notify Inspector that the parent has been stolen and we're
- // going to abandon this work and go do something else. This
- // will match the cilk_leave_begin in the compiled code
- NOTIFY_ZC_INTRINSIC("cilk_leave_stolen", saved_sf);
- DBGPRINTF ("%d: longjmp_into_runtime from __cilkrts_c_THE_exception_check\n", w->self);
- longjmp_into_runtime(w, do_return_from_spawn, 0);
- DBGPRINTF ("%d: returned from longjmp_into_runtime from __cilkrts_c_THE_exception_check?!\n", w->self);
- }
- else
- {
- NOTE_INTERVAL(w, INTERVAL_THE_EXCEPTION_CHECK_USELESS);
- return;
- }
- }
- /* Return an exception to a stolen parent. */
- NORETURN __cilkrts_exception_from_spawn(__cilkrts_worker *w,
- __cilkrts_stack_frame *returning_sf)
- {
- full_frame *ff = w->l->frame_ff;
- // This is almost the same as THE_exception_check, except
- // the detach didn't happen, we don't need to undo the tail
- // update.
- CILK_ASSERT(w->head == w->tail);
- w = execute_reductions_for_spawn_return(w, ff, returning_sf);
- longjmp_into_runtime(w, do_return_from_spawn, 0);
- CILK_ASSERT(0);
- }
- static void do_return_from_spawn(__cilkrts_worker *w,
- full_frame *ff,
- __cilkrts_stack_frame *sf)
- {
- full_frame *parent_ff;
- enum provably_good_steal_t steal_result = ABANDON_EXECUTION;
- BEGIN_WITH_WORKER_LOCK_OPTIONAL(w) {
- CILK_ASSERT(ff);
- CILK_ASSERT(!ff->is_call_child);
- CILK_ASSERT(sf == NULL);
- parent_ff = ff->parent;
-
- BEGIN_WITH_FRAME_LOCK(w, ff) {
- decjoin(ff);
- } END_WITH_FRAME_LOCK(w, ff);
- BEGIN_WITH_FRAME_LOCK(w, parent_ff) {
- if (parent_ff->simulated_stolen)
- unconditional_steal(w, parent_ff);
- else
- steal_result = provably_good_steal(w, parent_ff);
- } END_WITH_FRAME_LOCK(w, parent_ff);
- } END_WITH_WORKER_LOCK_OPTIONAL(w);
- // Loop here in replay mode
- #ifdef CILK_RECORD_REPLAY
- // We don't have to explicitly check for REPLAY_LOG below because
- // steal_result can only get set to WAIT_FOR_CONTINUE during replay.
- // We also don't have to worry about the simulated_stolen flag
- // because steal_result can only be set to WAIT_FOR_CONTINUE by
- // provably_good_steal().
- while(WAIT_FOR_CONTINUE == steal_result)
- {
- __cilkrts_sleep();
- BEGIN_WITH_WORKER_LOCK_OPTIONAL(w)
- {
- BEGIN_WITH_FRAME_LOCK(w, parent_ff)
- {
- steal_result = provably_good_steal(w, parent_ff);
- } END_WITH_FRAME_LOCK(w, parent_ff);
- } END_WITH_WORKER_LOCK_OPTIONAL(w);
- }
- #endif // CILK_RECORD_REPLAY
- // Cleanup the child frame.
- __cilkrts_destroy_full_frame(w, ff);
- return;
- }
- #ifdef _WIN32
- /* migrate an exception across fibers. Call this function when an exception has
- * been thrown and has to traverse across a steal. The exception has already
- * been wrapped up, so all that remains is to longjmp() into the continuation,
- * sync, and re-raise it.
- */
- void __cilkrts_migrate_exception(__cilkrts_stack_frame *sf) {
- __cilkrts_worker *w = sf->worker;
- full_frame *ff;
- BEGIN_WITH_WORKER_LOCK(w) {
- ff = w->l->frame_ff;
- reset_THE_exception(w);
- /* there is no need to check for a steal because we wouldn't be here if
- there weren't a steal. */
- __cilkrts_save_exception_state(w, ff);
- CILK_ASSERT(w->head == w->tail);
- } END_WITH_WORKER_LOCK(w);
- {
- // TBD(jsukha): This function emulates the
- // the "do_return_from_spawn" path.
- w = execute_reductions_for_spawn_return(w, ff, sf);
- }
- longjmp_into_runtime(w, do_return_from_spawn, 0); /* does not return. */
- CILK_ASSERT(! "Shouldn't be here...");
- }
- #endif
- /* Pop a call stack from TAIL. Return the call stack, or NULL if the
- queue is empty */
- __cilkrts_stack_frame *__cilkrts_pop_tail(__cilkrts_worker *w)
- {
- __cilkrts_stack_frame *sf;
- BEGIN_WITH_WORKER_LOCK(w) {
- __cilkrts_stack_frame *volatile *tail = w->tail;
- if (w->head < tail) {
- --tail;
- sf = *tail;
- w->tail = tail;
- } else {
- sf = 0;
- }
- } END_WITH_WORKER_LOCK(w);
- return sf;
- }
- #ifdef CILK_RECORD_REPLAY
- __cilkrts_stack_frame *simulate_pop_tail(__cilkrts_worker *w)
- {
- __cilkrts_stack_frame *sf;
- BEGIN_WITH_WORKER_LOCK(w) {
- if (w->head < w->tail) {
- sf = *(w->tail-1);
- } else {
- sf = 0;
- }
- } END_WITH_WORKER_LOCK(w);
- return sf;
- }
- #endif
- /* Return from a call, not a spawn. */
- void __cilkrts_return(__cilkrts_worker *w)
- {
- full_frame *ff, *parent_ff;
- START_INTERVAL(w, INTERVAL_RETURNING);
- BEGIN_WITH_WORKER_LOCK_OPTIONAL(w) {
- ff = w->l->frame_ff;
- CILK_ASSERT(ff);
- CILK_ASSERT(ff->join_counter == 1);
- /* This path is not used to return from spawn. */
- CILK_ASSERT(ff->is_call_child);
- BEGIN_WITH_FRAME_LOCK(w, ff) {
- // After this call, w->l->frame_ff != ff.
- // Technically, w will "own" ff until ff is freed,
- // however, because ff is a dying leaf full frame.
- parent_ff = disown(w, ff, 0, "return");
- decjoin(ff);
- #ifdef _WIN32
- __cilkrts_save_exception_state(w, ff);
- #else
- // Move the pending exceptions into the full frame
- // This should always be NULL if this isn't a
- // return with an exception
- CILK_ASSERT(NULL == ff->pending_exception);
- ff->pending_exception = w->l->pending_exception;
- w->l->pending_exception = NULL;
- #endif // _WIN32
- } END_WITH_FRAME_LOCK(w, ff);
- __cilkrts_fence(); /* redundant */
- CILK_ASSERT(parent_ff);
- BEGIN_WITH_FRAME_LOCK(w, parent_ff) {
- finalize_child_for_call(w, parent_ff, ff);
- } END_WITH_FRAME_LOCK(w, parent_ff);
- ff = pop_next_frame(w);
- /* ff will be non-null except when the parent frame is owned
- by another worker.
- CILK_ASSERT(ff)
- */
- CILK_ASSERT(!w->l->frame_ff);
- if (ff) {
- BEGIN_WITH_FRAME_LOCK(w, ff) {
- __cilkrts_stack_frame *sf = ff->call_stack;
- CILK_ASSERT(sf && !sf->call_parent);
- setup_for_execution(w, ff, 1);
- } END_WITH_FRAME_LOCK(w, ff);
- }
- } END_WITH_WORKER_LOCK_OPTIONAL(w);
- STOP_INTERVAL(w, INTERVAL_RETURNING);
- }
- static void __cilkrts_unbind_thread()
- {
- int stop_cilkscreen = 0;
- global_state_t *g;
- // Take out the global OS mutex to protect accesses to the table of workers
- global_os_mutex_lock();
- if (cilkg_is_published()) {
- __cilkrts_worker *w = __cilkrts_get_tls_worker();
- if (w) {
- g = w->g;
- // If there's only 1 worker, the counts will be stopped in
- // __cilkrts_scheduler
- if (g->P > 1)
- {
- STOP_INTERVAL(w, INTERVAL_WORKING);
- STOP_INTERVAL(w, INTERVAL_IN_SCHEDULER);
- }
- __cilkrts_set_tls_worker(0);
- if (w->self == -1) {
- // This worker is an overflow worker. I.e., it was created on-
- // demand when the global pool ran out of workers.
- destroy_worker(w);
- __cilkrts_free(w);
- } else {
- // This is a normal user worker and needs to be counted by the
- // global state for the purposes of throttling system workers.
- w->l->type = WORKER_FREE;
- __cilkrts_leave_cilk(g);
- }
- stop_cilkscreen = (0 == g->Q);
- }
- }
- global_os_mutex_unlock();
- /* Turn off Cilkscreen. This needs to be done when we are NOT holding the
- * os mutex. */
- if (stop_cilkscreen)
- __cilkrts_cilkscreen_disable_instrumentation();
- }
- /* special return from the initial frame */
- void __cilkrts_c_return_from_initial(__cilkrts_worker *w)
- {
- struct cilkred_map *rm;
- /* This is only called on a user thread worker. */
- CILK_ASSERT(w->l->type == WORKER_USER);
- #if REDPAR_DEBUG >= 3
- fprintf(stderr, "[W=%d, desc=cilkrts_c_return_from_initial, ff=%p]\n",
- w->self, w->l->frame_ff);
- #endif
-
- BEGIN_WITH_WORKER_LOCK_OPTIONAL(w) {
- full_frame *ff = w->l->frame_ff;
- CILK_ASSERT(ff);
- CILK_ASSERT(ff->join_counter == 1);
- w->l->frame_ff = 0;
- CILK_ASSERT(ff->fiber_self);
- // Save any TBB interop data for the next time this thread enters Cilk
- cilk_fiber_tbb_interop_save_info_from_stack(ff->fiber_self);
- // Deallocate cilk_fiber that mapped to the user stack. The stack
- // itself does not get deallocated (of course) but our data
- // structure becomes divorced from it.
- #if FIBER_DEBUG >= 1
- fprintf(stderr, "ThreadId=%p: w=%d: We are about to deallocate ff->fiber_self = %p here. w->l->scheduling_fiber = %p. w->l->type = %d\n",
- cilkos_get_current_thread_id(),
- w->self,
- ff->fiber_self,
- w->l->scheduling_fiber,
- w->l->type);
- #endif
- // The fiber in ff is a user-code fiber. The fiber in
- // w->l->scheduling_fiber is a scheduling fiber. These fibers should
- // never be equal. When a user worker returns (and will unbind), we
- // should destroy only the fiber in ff. The scheduling fiber will be
- // re-used.
- CILK_ASSERT(ff->fiber_self != w->l->scheduling_fiber);
- START_INTERVAL(w, INTERVAL_FIBER_DEALLOCATE) {
- // This fiber might not be deallocated here if there
- // is a pending exception on Windows that refers
- // to this fiber.
- //
- // First "suspend" the fiber, and then try to delete it.
- cilk_fiber_deallocate_from_thread(ff->fiber_self);
- } STOP_INTERVAL(w, INTERVAL_FIBER_DEALLOCATE);
- ff->fiber_self = NULL;
- /* Save reducer map into global_state object */
- rm = w->reducer_map;
- w->reducer_map = NULL;
- #if REDPAR_DEBUG >= 3
- fprintf(stderr, "W=%d, reducer_map_to_delete=%p, was in ff=%p\n",
- w->self,
- rm,
- ff);
- #endif
- __cilkrts_destroy_full_frame(w, ff);
- /* Work is never done. w->g->work_done = 1; __cilkrts_fence(); */
- } END_WITH_WORKER_LOCK_OPTIONAL(w);
- save_pedigree_leaf_from_user_worker(w);
- // Workers can have NULL reducer maps now.
- if (rm) {
- __cilkrts_destroy_reducer_map(w, rm);
- }
- #if FIBER_DEBUG >= 1
- __cilkrts_worker* tmp = w;
- int tmp_id = w->self;
- fprintf(stderr, "w=%d: We are about unbind thread (w= %p)\n",
- w->self,
- w);
- #endif
- w = NULL;
-
- __cilkrts_unbind_thread();
- #if FIBER_DEBUG >= 1
-
- fprintf(stderr, "w=%p, %d: Finished unbind\n",
- tmp, tmp_id);
- #endif
- /* Other workers will stop trying to steal if this was the last worker. */
- return;
- }
- /*
- * __cilkrts_restore_stealing
- *
- * Restore the protected_tail to a previous state, possibly allowing frames
- * to be stolen. The dekker_protocol has been extended to steal only if
- * head+1 is < protected_tail.
- */
- void __cilkrts_restore_stealing(
- __cilkrts_worker *w,
- __cilkrts_stack_frame *volatile *saved_protected_tail)
- {
- /* On most x86 this pair of operations would be slightly faster
- as an atomic exchange due to the implicit memory barrier in
- an atomic instruction. */
- w->protected_tail = saved_protected_tail;
- __cilkrts_fence();
- }
- /*
- * __cilkrts_disallow_stealing
- *
- * Move the protected_tail to NEW_PROTECTED_TAIL, preventing any
- * frames from being stolen. If NEW_PROTECTED_TAIL is NULL, prevent
- * stealing from the whole queue. The dekker_protocol has been
- * extended to only steal if head+1 is also < protected_tail.
- */
- __cilkrts_stack_frame *volatile *__cilkrts_disallow_stealing(
- __cilkrts_worker *w,
- __cilkrts_stack_frame *volatile *new_protected_tail)
- {
- __cilkrts_stack_frame *volatile *saved_protected_tail = w->protected_tail;
- if (!new_protected_tail)
- new_protected_tail = w->l->ltq;
- if (w->protected_tail > new_protected_tail) {
- w->protected_tail = new_protected_tail;
- /* Issue a store-store barrier. The update to protected_tail
- here must precede the update to tail in the next spawn.
- On x86 this is probably not needed. */
- #if defined __GNUC__ && __ICC >= 1200 && !(__MIC__ ||__MIC2__)
- _mm_sfence();
- #else
- __cilkrts_fence();
- #endif
- }
- return saved_protected_tail;
- }
- /*************************************************************
- Initialization and startup
- *************************************************************/
- __cilkrts_worker *make_worker(global_state_t *g,
- int self, __cilkrts_worker *w)
- {
- w->self = self;
- w->g = g;
- w->pedigree.rank = 0; // Initial rank is 0
- w->pedigree.parent = NULL;
- w->l = (local_state *)__cilkrts_malloc(sizeof(*w->l));
- __cilkrts_frame_malloc_per_worker_init(w);
- w->reducer_map = NULL;
- w->current_stack_frame = NULL;
- w->reserved = NULL;
-
- w->l->worker_magic_0 = WORKER_MAGIC_0;
- w->l->team = NULL;
- w->l->type = WORKER_FREE;
-
- __cilkrts_mutex_init(&w->l->lock);
- __cilkrts_mutex_init(&w->l->steal_lock);
- w->l->do_not_steal = 0;
- w->l->frame_ff = 0;
- w->l->next_frame_ff = 0;
- w->l->last_full_frame = NULL;
- w->l->ltq = (__cilkrts_stack_frame **)
- __cilkrts_malloc(g->ltqsize * sizeof(*w->l->ltq));
- w->ltq_limit = w->l->ltq + g->ltqsize;
- w->head = w->tail = w->l->ltq;
-
- cilk_fiber_pool_init(&w->l->fiber_pool,
- &g->fiber_pool,
- g->stack_size,
- g->fiber_pool_size,
- 0, // alloc_max is 0. We don't allocate from the heap directly without checking the parent pool.
- 0);
- #if FIBER_DEBUG >= 2
- fprintf(stderr, "ThreadId=%p: Making w=%d (%p), pool = %p\n",
- cilkos_get_current_thread_id(),
- w->self, w,
- &w->l->fiber_pool);
- #endif
- w->l->scheduling_fiber = NULL;
- w->l->original_pedigree_leaf = NULL;
- w->l->rand_seed = 0; /* the scheduler will overwrite this field */
- w->l->post_suspend = 0;
- w->l->suspended_stack = 0;
- w->l->fiber_to_free = NULL;
- w->l->pending_exception = NULL;
- #if CILK_PROFILE
- w->l->stats = __cilkrts_malloc(sizeof(statistics));
- __cilkrts_init_stats(w->l->stats);
- #else
- w->l->stats = NULL;
- #endif
- w->l->steal_failure_count = 0;
- w->l->work_stolen = 0;
- // Initialize record/replay assuming we're doing neither
- w->l->record_replay_fptr = NULL;
- w->l->replay_list_root = NULL;
- w->l->replay_list_entry = NULL;
- w->l->signal_node = NULL;
- // Nothing's been stolen yet
- w->l->worker_magic_1 = WORKER_MAGIC_1;
- /*w->parallelism_disabled = 0;*/
- // Allow stealing all frames. Sets w->saved_protected_tail
- __cilkrts_restore_stealing(w, w->ltq_limit);
-
- __cilkrts_init_worker_sysdep(w);
- reset_THE_exception(w);
- return w;
- }
- void destroy_worker(__cilkrts_worker *w)
- {
- CILK_ASSERT (NULL == w->l->pending_exception);
- // Deallocate the scheduling fiber
- if (NULL != w->l->scheduling_fiber)
- {
- // The scheduling fiber is the main fiber for system workers and must
- // be deallocated by the thread that created it. Thus, we can
- // deallocate only free workers' (formerly user workers) scheduling
- // fibers here.
- CILK_ASSERT(WORKER_FREE == w->l->type);
- #if FIBER_DEBUG >=1
- fprintf(stderr, "ThreadId=%p, w=%p, %d, deallocating scheduling fiber = %p, \n",
- cilkos_get_current_thread_id(),
- w,
- w->self,
- w->l->scheduling_fiber);
- #endif
- int ref_count = cilk_fiber_remove_reference(w->l->scheduling_fiber, NULL);
- // Scheduling fiber should never have extra references because of exceptions.
- CILK_ASSERT(0 == ref_count);
- w->l->scheduling_fiber = NULL;
- }
- #if CILK_PROFILE
- if (w->l->stats) {
- __cilkrts_free(w->l->stats);
- }
- #else
- CILK_ASSERT(NULL == w->l->stats);
- #endif
-
- /* Free any cached fibers. */
- cilk_fiber_pool_destroy(&w->l->fiber_pool);
- __cilkrts_destroy_worker_sysdep(w);
- if (w->l->signal_node) {
- CILK_ASSERT(WORKER_SYSTEM == w->l->type);
- signal_node_destroy(w->l->signal_node);
- }
- __cilkrts_free(w->l->ltq);
- __cilkrts_mutex_destroy(0, &w->l->lock);
- __cilkrts_mutex_destroy(0, &w->l->steal_lock);
- __cilkrts_frame_malloc_per_worker_cleanup(w);
- __cilkrts_free(w->l);
- // The caller is responsible for freeing the worker memory
- }
- /*
- * Make a worker into a system worker.
- */
- static void make_worker_system(__cilkrts_worker *w) {
- CILK_ASSERT(WORKER_FREE == w->l->type);
- w->l->type = WORKER_SYSTEM;
- w->l->signal_node = signal_node_create();
- }
- void __cilkrts_deinit_internal(global_state_t *g)
- {
- int i;
- __cilkrts_worker *w;
- // If there's no global state then we're done
- if (NULL == g)
- return;
- #ifdef CILK_PROFILE
- __cilkrts_dump_stats_to_stderr(g);
- #endif
- w = g->workers[0];
- if (w->l->frame_ff) {
- __cilkrts_destroy_full_frame(w, w->l->frame_ff);
- w->l->frame_ff = 0;
- }
- // Release any resources used for record/replay
- replay_term(g);
- // Destroy any system dependent global state
- __cilkrts_destroy_global_sysdep(g);
- for (i = 0; i < g->total_workers; ++i)
- destroy_worker(g->workers[i]);
- // Free memory for all worker blocks which were allocated contiguously
- __cilkrts_free(g->workers[0]);
- __cilkrts_free(g->workers);
- cilk_fiber_pool_destroy(&g->fiber_pool);
- __cilkrts_frame_malloc_global_cleanup(g);
- cilkg_deinit_global_state();
- }
- /*
- * Wake the runtime by notifying the system workers that they can steal. The
- * first user worker into the runtime should call this.
- */
- static void wake_runtime(global_state_t *g)
- {
- __cilkrts_worker *root;
- if (g->P > 1) {
- // Send a message to the root node. The message will propagate.
- root = g->workers[0];
- CILK_ASSERT(root->l->signal_node);
- signal_node_msg(root->l->signal_node, 1);
- }
- }
- /*
- * Put the runtime to sleep. The last user worker out of the runtime should
- * call this. Like Dad always said, turn out the lights when nobody's in the
- * room.
- */
- static void sleep_runtime(global_state_t *g)
- {
- __cilkrts_worker *root;
- if (g->P > 1) {
- // Send a message to the root node. The message will propagate.
- root = g->workers[0];
- CILK_ASSERT(root->l->signal_node);
- signal_node_msg(root->l->signal_node, 0);
- }
- }
- /* Called when a user thread joins Cilk.
- Global lock must be held. */
- void __cilkrts_enter_cilk(global_state_t *g)
- {
- if (g->Q++ == 0) {
- // If this is the first user thread to enter Cilk wake
- // up all the workers.
- wake_runtime(g);
- }
- }
- /* Called when a user thread leaves Cilk.
- Global lock must be held. */
- void __cilkrts_leave_cilk(global_state_t *g)
- {
- if (--g->Q == 0) {
- // Put the runtime to sleep.
- sleep_runtime(g);
- }
- }
- /*
- * worker_runnable
- *
- * Return true if the worker should continue to try to steal. False, otherwise.
- */
- NOINLINE
- static enum schedule_t worker_runnable(__cilkrts_worker *w)
- {
- global_state_t *g = w->g;
- /* If this worker has something to do, do it.
- Otherwise the work would be lost. */
- if (w->l->next_frame_ff)
- return SCHEDULE_RUN;
- // If Cilk has explicitly (by the user) been told to exit (i.e., by
- // __cilkrts_end_cilk() -> __cilkrts_stop_workers(g)), then return 0.
- if (g->work_done)
- return SCHEDULE_EXIT;
- if (0 == w->self) {
- // This worker is the root node and is the only one that may query the
- // global state to see if there are still any user workers in Cilk.
- if (w->l->steal_failure_count > g->max_steal_failures) {
- if (signal_node_should_wait(w->l->signal_node)) {
- return SCHEDULE_WAIT;
- } else {
- // Reset the steal_failure_count since we have verified that
- // user workers are still in Cilk.
- w->l->steal_failure_count = 0;
- }
- }
- } else if (WORKER_SYSTEM == w->l->type &&
- signal_node_should_wait(w->l->signal_node)) {
- // This worker has been notified by its parent that it should stop
- // trying to steal.
- return SCHEDULE_WAIT;
- }
- return SCHEDULE_RUN;
- }
- // Initialize the worker structs, but don't start the workers themselves.
- static void init_workers(global_state_t *g)
- {
- int total_workers = g->total_workers;
- int i;
- struct CILK_ALIGNAS(256) buffered_worker {
- __cilkrts_worker w;
- char buf[64];
- } *workers_memory;
- /* not needed if only one worker */
- cilk_fiber_pool_init(&g->fiber_pool,
- NULL,
- g->stack_size,
- g->global_fiber_pool_size, // buffer_size
- g->max_stacks, // maximum # to allocate
- 1);
- cilk_fiber_pool_set_fiber_limit(&g->fiber_pool,
- (g->max_stacks ? g->max_stacks : INT_MAX));
- g->workers = (__cilkrts_worker **)
- __cilkrts_malloc(total_workers * sizeof(*g->workers));
- // Allocate 1 block of memory for workers to make life easier for tools
- // like Inspector which run multithreaded and need to know the memory
- // range for all the workers that will be accessed in a user's program
- workers_memory = (struct buffered_worker*)
- __cilkrts_malloc(sizeof(*workers_memory) * total_workers);
-
- // Notify any tools that care (Cilkscreen and Inspector) that they should
- // ignore memory allocated for the workers
- __cilkrts_cilkscreen_ignore_block(&workers_memory[0],
- &workers_memory[total_workers]);
- // Initialize worker structs, including unused worker slots.
- for (i = 0; i < total_workers; ++i) {
- g->workers[i] = make_worker(g, i, &workers_memory[i].w);
- }
- // Set the workers in the first P - 1 slots to be system workers.
- // Remaining worker structs already have type == 0.
- for (i = 0; i < g->system_workers; ++i) {
- make_worker_system(g->workers[i]);
- }
- }
- void __cilkrts_init_internal(int start)
- {
- global_state_t *g = NULL;
- if (cilkg_is_published()) {
- g = cilkg_init_global_state();
- }
- else {
- // We think the state has not been published yet.
- // Grab the lock and try to initialize/publish.
- global_os_mutex_lock();
- if (cilkg_is_published()) {
- // Some other thread must have snuck in and published.
- g = cilkg_init_global_state();
- }
- else {
- // Initialize and retrieve global state
- g = cilkg_init_global_state();
- // Set the scheduler pointer
- g->scheduler = worker_scheduler_function;
- // If we're running under a sequential P-Tool (Cilkscreen or
- // Cilkview) then there's only one worker and we need to tell
- // the tool about the extent of the stack
- if (g->under_ptool)
- __cilkrts_establish_c_stack();
- init_workers(g);
- // Initialize per-work record/replay logging
- replay_init_workers(g);
- // Initialize any system dependent global state
- __cilkrts_init_global_sysdep(g);
- cilkg_publish_global_state(g);
- }
- global_os_mutex_unlock();
- }
- CILK_ASSERT(g);
- if (start && !g->workers_running)
- {
- // Acquire the global OS mutex while we're starting the workers
- global_os_mutex_lock();
- if (!g->workers_running)
- // Start P - 1 system workers since P includes the first user
- // worker.
- __cilkrts_start_workers(g, g->P - 1);
- global_os_mutex_unlock();
- }
- }
- /************************************************************************
- Methods for reducer protocol.
- Reductions occur in two places:
- A. A full frame "ff" is returning from a spawn with a stolen parent.
- B. A full frame "ff" is stalling at a sync.
- To support parallel reductions, reduction functions need to be
- executed while control is on a user stack, before jumping into the
- runtime. These reductions can not occur while holding a worker or
- frame lock.
- Before a worker w executes a reduction in either Case A or B, w's
- deque is empty.
- Since parallel reductions push work onto the deque, we must do extra
- work to set up runtime data structures properly before reductions
- begin to allow stealing. ( Normally, when we have only serial
- reductions, once a worker w starts a reduction, its deque remains
- empty until w either steals another frame or resumes a suspended
- frame. Thus, we don't care about the state of the deque, since w
- will reset its deque when setting up execution of a frame. )
- To allow for parallel reductions, we coerce the runtime data
- structures so that, from their perspective, it looks as though we
- have spliced in an "execute_reductions()" function. Consider the
- two cases for reductions:
- Case A: Return from a spawn with a stolen parent.
- Consider a spawned function g is returning on a worker w.
- Assume:
- - g was spawned from a parent function f.
- - ff is the full frame for g's spawn helper
- - sf be the __cilkrts_stack_frame for g's spawn helper.
- We are conceptually splicing "execute_reductions()" so that it
- occurs immediately before the spawn helper of g returns to f.
- We do so by creating two different world views --- one for the
- runtime data structures, and one for the actual control flow.
- - Before reductions begin, the runtime data structures should
- look as though the spawn helper of g is calling
- "execute_reductions()", in terms of both the user stack and
- worker deque. More precisely, w should satisfy the
- following properties:
- (a) w has ff as its full frame,
- (b) w has sf as its __cilkrts_stack_frame, and
- (c) w has an empty deque.
- If the runtime satisfies these properties, then if w
- encounters a spawn in a parallel reduction, it can push onto
- a valid deque. Also, when a steal from w occurs, it will
- build the correct tree of full frames when w is stolen from.
- - In actual control flow, however, once the
- "execute_reductions()" function returns, it is actually
- returning to runtime code instead of g's spawn helper.
- At the point a worker w began executing reductions, the
- control flow / compiled code had already finished g's spawn
- helper, and w was about to enter the runtime. With parallel
- reductions, some worker v (which might be different from w)
- is the one returning to the runtime.
- The reduction logic consists of 4 steps:
- A1. Restore runtime data structures to make it look as though
- the spawn helper of g() is still the currently executing
- frame for w.
- A2. Execute reductions on the user stack. Reductions also
- includes the logic for exceptions and stacks. Note that
- reductions start on w, but may finish on a different
- worker if there is parallelism in the reduce.
- A3. Splice out ff from the tree of full frames.
- A4. Jump into the runtime/scheduling stack and execute
- "do_return_from_spawn". This method
- (a) Frees the user stack we were just on if it is no longer needed.
- (b) Decrement the join counter on ff->parent, and tries to do a
- provably good steal.
- (c) Clean up the full frame ff.
- Case B: Stalling at a sync.
- Consider a function g(), with full frame ff and
- __cilkrts_stack_frame sf. Suppose g() stalls at a sync, and we
- are executing reductions.
- Conceptually, we are splicing in an "execute_reductions()"
- function into g() as the last action that g() takes immediately
- before it executes the cilk_sync.
- The reduction logic for this case is similar to Case A.
- B1. Restore the runtime data structures.
- The main difference from Case A is that ff/sf is still a
- frame that needs to be executed later (since it is stalling
- at a cilk_sync). Thus, we also need to save the current
- stack information into "ff" so that we can correctly resume
- execution of "ff" after the sync.
- B2. Execute reductions on the user stack.
- B3. No frame to splice out of the tree.
- B4. Jump into the runtime/scheduling stack and execute "do_sync".
- This method:
- (a) Frees the user stack we were just on if it is no longer needed.
- (b) Tries to execute a provably good steal.
- Finally, for the reducer protocol, we consider two reduction paths,
- namely a "fast" and "slow" path. On a fast path, only trivial
- merges of reducer maps happen (i.e., one or both of the maps are
- NULL). Otherwise, on the slow path, a reduction actually needs to
- happen.
- *****************************************************************/
- /**
- * @brief Locations to store the result of a reduction.
- *
- * Struct storing pointers to the fields in our "left" sibling that we
- * should update when splicing out a full frame or stalling at a sync.
- */
- typedef struct {
- /** A pointer to the location of our left reducer map. */
- struct cilkred_map **map_ptr;
- /** A pointer to the location of our left exception. */
- struct pending_exception_info **exception_ptr;
- } splice_left_ptrs;
- /**
- * For a full frame returning from a spawn, calculate the pointers to
- * the maps and exceptions to my left.
- *
- * @param w The currently executing worker.
- * @param ff Full frame that is dying
- * @return Pointers to our "left" for reducers and exceptions.
- */
- static inline
- splice_left_ptrs compute_left_ptrs_for_spawn_return(__cilkrts_worker *w,
- full_frame *ff)
- {
- // ASSERT: we hold the lock on ff->parent
- splice_left_ptrs left_ptrs;
- if (ff->left_sibling) {
- left_ptrs.map_ptr = &ff->left_sibling->right_reducer_map;
- left_ptrs.exception_ptr = &ff->left_sibling->right_pending_exception;
- }
- else {
- full_frame *parent_ff = ff->parent;
- left_ptrs.map_ptr = &parent_ff->children_reducer_map;
- left_ptrs.exception_ptr = &parent_ff->child_pending_exception;
- }
- return left_ptrs;
- }
- /**
- * For a full frame at a sync, calculate the pointers to the maps and
- * exceptions to my left.
- *
- * @param w The currently executing worker.
- * @param ff Full frame that is stalling at a sync.
- * @return Pointers to our "left" for reducers and exceptions.
- */
- static inline
- splice_left_ptrs compute_left_ptrs_for_sync(__cilkrts_worker *w,
- full_frame *ff)
- {
- // ASSERT: we hold the lock on ff
- splice_left_ptrs left_ptrs;
- // Figure out which map to the left we should merge into.
- if (ff->rightmost_child) {
- CILK_ASSERT(ff->rightmost_child->parent == ff);
- left_ptrs.map_ptr = &(ff->rightmost_child->right_reducer_map);
- left_ptrs.exception_ptr = &(ff->rightmost_child->right_pending_exception);
- }
- else {
- // We have no children. Then, we should be the last
- // worker at the sync... "left" is our child map.
- left_ptrs.map_ptr = &(ff->children_reducer_map);
- left_ptrs.exception_ptr = &(ff->child_pending_exception);
- }
- return left_ptrs;
- }
- /**
- * After we have completed all reductions on a spawn return, call this
- * method to finish up before jumping into the runtime.
- *
- * 1. Perform the "reduction" on stacks, i.e., execute the left
- * holder logic to pass the leftmost stack up.
- *
- * w->l->fiber_to_free holds any stack that needs to be freed
- * when control switches into the runtime fiber.
- *
- * 2. Unlink and remove child_ff from the tree of full frames.
- *
- * @param w The currently executing worker.
- * @param parent_ff The parent of child_ff.
- * @param child_ff The full frame returning from a spawn.
- */
- static inline
- void finish_spawn_return_on_user_stack(__cilkrts_worker *w,
- full_frame *parent_ff,
- full_frame *child_ff)
- {
- CILK_ASSERT(w->l->fiber_to_free == NULL);
- // Execute left-holder logic for stacks.
- if (child_ff->left_sibling || parent_ff->fiber_child) {
- // Case where we are not the leftmost stack.
- CILK_ASSERT(parent_ff->fiber_child != child_ff->fiber_self);
- // Remember any fiber we need to free in the worker.
- // After we jump into the runtime, we will actually do the
- // free.
- w->l->fiber_to_free = child_ff->fiber_self;
- }
- else {
- // We are leftmost, pass stack/fiber up to parent.
- // Thus, no stack/fiber to free.
- parent_ff->fiber_child = child_ff->fiber_self;
- w->l->fiber_to_free = NULL;
- }
- child_ff->fiber_self = NULL;
- unlink_child(parent_ff, child_ff);
- }
- /**
- * Executes any fast reductions necessary to splice ff out of the tree
- * of full frames.
- *
- * This "fast" path performs only trivial merges of reducer maps,
- * i.e,. when one of them is NULL.
- * (See slow_path_reductions_for_spawn_return() for slow path.)
- *
- * Returns: 1 if we finished all reductions.
- * Returns: 0 if there are still reductions to execute, and
- * we should execute the slow path.
- *
- * This method assumes w holds the frame lock on parent_ff.
- * After this method completes:
- * 1. We have spliced ff out of the tree of full frames.
- * 2. The reducer maps of child_ff have been deposited
- * "left" according to the reducer protocol.
- * 3. w->l->stack_to_free stores the stack
- * that needs to be freed once we jump into the runtime.
- *
- * We have not, however, decremented the join counter on ff->parent.
- * This prevents any other workers from resuming execution of the parent.
- *
- * @param w The currently executing worker.
- * @param ff The full frame returning from a spawn.
- * @return NULL if we finished all reductions.
- * @return The address where the left map is stored (which should be passed to
- * slow_path_reductions_for_spawn_return()) if there are
- * still reductions to execute.
- */
- struct cilkred_map**
- fast_path_reductions_for_spawn_return(__cilkrts_worker *w,
- full_frame *ff)
- {
- // ASSERT: we hold ff->parent->lock.
- splice_left_ptrs left_ptrs;
- CILK_ASSERT(NULL == w->l->pending_exception);
- // Figure out the pointers to the left where I want
- // to put reducers and exceptions.
- left_ptrs = compute_left_ptrs_for_spawn_return(w, ff);
-
- // Go ahead and merge exceptions while holding the lock.
- splice_exceptions_for_spawn(w, ff, left_ptrs.exception_ptr);
- // Now check if we have any reductions to perform.
- //
- // Consider all the cases of left, middle and right maps.
- // 0. (-, -, -) : finish and return 1
- // 1. (L, -, -) : finish and return 1
- // 2. (-, M, -) : slide over to left, finish, and return 1.
- // 3. (L, M, -) : return 0
- // 4. (-, -, R) : slide over to left, finish, and return 1.
- // 5. (L, -, R) : return 0
- // 6. (-, M, R) : return 0
- // 7. (L, M, R) : return 0
- //
- // In terms of code:
- // L == *left_ptrs.map_ptr
- // M == w->reducer_map
- // R == f->right_reducer_map.
- //
- // The goal of the code below is to execute the fast path with
- // as few branches and writes as possible.
-
- int case_value = (*(left_ptrs.map_ptr) != NULL);
- case_value += ((w->reducer_map != NULL) << 1);
- case_value += ((ff->right_reducer_map != NULL) << 2);
- // Fastest path is case_value == 0 or 1.
- if (case_value >=2) {
- switch (case_value) {
- case 2:
- *(left_ptrs.map_ptr) = w->reducer_map;
- w->reducer_map = NULL;
- return NULL;
- break;
- case 4:
- *(left_ptrs.map_ptr) = ff->right_reducer_map;
- ff->right_reducer_map = NULL;
- return NULL;
- default:
- // If we have to execute the slow path, then
- // return the pointer to the place to deposit the left
- // map.
- return left_ptrs.map_ptr;
- }
- }
- // Do nothing
- return NULL;
- }
- /**
- * Executes any reductions necessary to splice "ff" frame out of
- * the steal tree.
- *
- * This method executes the "slow" path for reductions on a spawn
- * return, i.e., there are non-NULL maps that need to be merged
- * together.
- *
- * This method should execute only if
- * fast_path_reductions_for_spawn_return() returns a non-NULL
- * left_map_ptr.
- *
- * Upon entry, left_map_ptr should be the location of the left map
- * at the start of the reduction, as calculated by
- * fast_path_reductions_for_spawn_return().
- *
- * After this method completes:
- * 1. We have spliced ff out of the tree of full frames.
- * 2. The reducer maps of child_ff have been deposited
- * "left" according to the reducer protocol.
- * 3. w->l->stack_to_free stores the stack
- * that needs to be freed once we jump into the runtime.
- * We have not, however, decremented the join counter on ff->parent,
- * so no one can resume execution of the parent yet.
- *
- * WARNING:
- * This method assumes the lock on ff->parent is held upon entry, and
- * Upon exit, the worker that returns still holds a lock on ff->parent
- * This method can, however, release and reacquire the lock on ff->parent.
- *
- * @param w The currently executing worker.
- * @param ff The full frame returning from a spawn.
- * @param left_map_ptr Pointer to our initial left map.
- * @return The worker that this method returns on.
- */
- static __cilkrts_worker*
- slow_path_reductions_for_spawn_return(__cilkrts_worker *w,
- full_frame *ff,
- struct cilkred_map **left_map_ptr)
- {
- // CILK_ASSERT: w is holding frame lock on parent_ff.
- #if REDPAR_DEBUG > 0
- CILK_ASSERT(!ff->rightmost_child);
- CILK_ASSERT(!ff->is_call_child);
- #endif
- // Loop invariant:
- // When beginning this loop, we should
- // 1. Be holding the lock on ff->parent.
- // 2. left_map_ptr should be the address of the pointer to the left map.
- // 3. All maps should be slid over left by one, if possible.
- // 4. All exceptions should be merged so far.
- while (1) {
-
- // Slide middle map left if possible.
- if (!(*left_map_ptr)) {
- *left_map_ptr = w->reducer_map;
- w->reducer_map = NULL;
- }
- // Slide right map to middle if possible.
- if (!w->reducer_map) {
- w->reducer_map = ff->right_reducer_map;
- ff->right_reducer_map = NULL;
- }
- // Since we slid everything left by one,
- // we are finished if there is no middle map.
- if (!w->reducer_map) {
- verify_current_wkr(w);
- return w;
- }
- else {
- struct cilkred_map* left_map;
- struct cilkred_map* middle_map;
- struct cilkred_map* right_map;
- // Take all the maps from their respective locations.
- // We can't leave them in place and execute a reduction because these fields
- // might change once we release the lock.
- left_map = *left_map_ptr;
- *left_map_ptr = NULL;
- middle_map = w->reducer_map;
- w->reducer_map = NULL;
- right_map = ff->right_reducer_map;
- ff->right_reducer_map = NULL;
-
- // WARNING!!! Lock release here.
- // We have reductions to execute (and we can't hold locks).
- __cilkrts_frame_unlock(w, ff->parent);
- // Merge all reducers into the left map.
- left_map = repeated_merge_reducer_maps(&w,
- left_map,
- middle_map);
- verify_current_wkr(w);
- left_map = repeated_merge_reducer_maps(&w,
- left_map,
- right_map);
- verify_current_wkr(w);
- CILK_ASSERT(NULL == w->reducer_map);
- // Put the final answer back into w->reducer_map.
- w->reducer_map = left_map;
-
- // Save any exceptions generated because of the reduction
- // process from the returning worker. These get merged
- // the next time around the loop.
- CILK_ASSERT(NULL == ff->pending_exception);
- ff->pending_exception = w->l->pending_exception;
- w->l->pending_exception = NULL;
- // Lock ff->parent for the next loop around.
- __cilkrts_frame_lock(w, ff->parent);
- // Once we have the lock again, recompute who is to our
- // left.
- splice_left_ptrs left_ptrs;
- left_ptrs = compute_left_ptrs_for_spawn_return(w, ff);
- // Update the pointer for the left map.
- left_map_ptr = left_ptrs.map_ptr;
- // Splice the exceptions for spawn.
- splice_exceptions_for_spawn(w, ff, left_ptrs.exception_ptr);
- }
- }
- // We should never break out of this loop.
-
- CILK_ASSERT(0);
- return NULL;
- }
- /**
- * Execute reductions when returning from a spawn whose parent has
- * been stolen.
- *
- * Execution may start on w, but may finish on a different worker.
- * This method acquires/releases the lock on ff->parent.
- *
- * @param w The currently executing worker.
- * @param ff The full frame of the spawned function that is returning.
- * @param returning_sf The __cilkrts_stack_frame for this returning function.
- * @return The worker returning from this method.
- */
- static __cilkrts_worker*
- execute_reductions_for_spawn_return(__cilkrts_worker *w,
- full_frame *ff,
- __cilkrts_stack_frame *returning_sf)
- {
- // Step A1 from reducer protocol described above.
- //
- // Coerce the runtime into thinking that
- // ff/returning_sf are still on the bottom of
- // w's deque.
- restore_frame_for_spawn_return_reduction(w, ff, returning_sf);
- // Step A2 and A3: Execute reductions on user stack.
- BEGIN_WITH_FRAME_LOCK(w, ff->parent) {
- struct cilkred_map **left_map_ptr;
- left_map_ptr = fast_path_reductions_for_spawn_return(w, ff);
- // Pointer will be non-NULL if there are
- // still reductions to execute.
- if (left_map_ptr) {
- // WARNING: This method call may release the lock
- // on ff->parent and re-acquire it (possibly on a
- // different worker).
- // We can't hold locks while actually executing
- // reduce functions.
- w = slow_path_reductions_for_spawn_return(w,
- ff,
- left_map_ptr);
- verify_current_wkr(w);
- }
- finish_spawn_return_on_user_stack(w, ff->parent, ff);
- // WARNING: the use of this lock macro is deceptive.
- // The worker may have changed here.
- } END_WITH_FRAME_LOCK(w, ff->parent);
- return w;
- }
- /**
- * Execute fast "reductions" when ff stalls at a sync.
- *
- * @param w The currently executing worker.
- * @param ff The full frame stalling at a sync.
- * @return 1 if we are finished with all reductions after calling this method.
- * @return 0 if we still need to execute the slow path reductions.
- */
- static inline
- int fast_path_reductions_for_sync(__cilkrts_worker *w,
- full_frame *ff) {
- // Return 0 if there is some reduction that needs to happen.
- return !(w->reducer_map || ff->pending_exception);
- }
- /**
- * Executes slow reductions when ff stalls at a sync.
- * This method should execute only if
- * fast_path_reductions_for_sync(w, ff) returned 0.
- *
- * After this method completes:
- * 1. ff's current reducer map has been deposited into
- * right_reducer_map of ff's rightmost child, or
- * ff->children_reducer_map if ff has no children.
- * 2. Similarly for ff's current exception.
- * 3. Nothing to calculate for stacks --- if we are stalling
- * we will always free a stack.
- *
- * This method may repeatedly acquire/release the lock on ff.
- *
- * @param w The currently executing worker.
- * @param ff The full frame stalling at a sync.
- * @return The worker returning from this method.
- */
- static __cilkrts_worker*
- slow_path_reductions_for_sync(__cilkrts_worker *w,
- full_frame *ff)
- {
- struct cilkred_map *left_map;
- struct cilkred_map *middle_map;
-
- #if (REDPAR_DEBUG > 0)
- CILK_ASSERT(ff);
- CILK_ASSERT(w->head == w->tail);
- #endif
- middle_map = w->reducer_map;
- w->reducer_map = NULL;
- // Loop invariant: middle_map should be valid (the current map to reduce).
- // left_map is junk.
- // w->reducer_map == NULL.
- while (1) {
- BEGIN_WITH_FRAME_LOCK(w, ff) {
- splice_left_ptrs left_ptrs = compute_left_ptrs_for_sync(w, ff);
-
- // Grab the "left" map and store pointers to those locations.
- left_map = *(left_ptrs.map_ptr);
- *(left_ptrs.map_ptr) = NULL;
-
- // Slide the maps in our struct left as far as possible.
- if (!left_map) {
- left_map = middle_map;
- middle_map = NULL;
- }
- *(left_ptrs.exception_ptr) =
- __cilkrts_merge_pending_exceptions(w,
- *left_ptrs.exception_ptr,
- ff->pending_exception);
- ff->pending_exception = NULL;
- // If there is no middle map, then we are done.
- // Deposit left and return.
- if (!middle_map) {
- *(left_ptrs).map_ptr = left_map;
- #if (REDPAR_DEBUG > 0)
- CILK_ASSERT(NULL == w->reducer_map);
- #endif
- // Sanity check upon leaving the loop.
- verify_current_wkr(w);
- // Make sure to unlock before we return!
- __cilkrts_frame_unlock(w, ff);
- return w;
- }
- } END_WITH_FRAME_LOCK(w, ff);
-
- // If we get here, we have a nontrivial reduction to execute.
- middle_map = repeated_merge_reducer_maps(&w,
- left_map,
- middle_map);
- verify_current_wkr(w);
- // Save any exceptions generated because of the reduction
- // process. These get merged the next time around the
- // loop.
- CILK_ASSERT(NULL == ff->pending_exception);
- ff->pending_exception = w->l->pending_exception;
- w->l->pending_exception = NULL;
- }
-
- // We should never break out of the loop above.
- CILK_ASSERT(0);
- return NULL;
- }
- /**
- * Execute reductions when ff stalls at a sync.
- *
- * Execution starts on w, but may finish on a different worker.
- * This method may acquire/release the lock on ff.
- *
- * @param w The currently executing worker.
- * @param ff The full frame of the spawned function at the sync
- * @param sf_at_sync The __cilkrts_stack_frame stalling at a sync
- * @return The worker returning from this method.
- */
- static __cilkrts_worker*
- execute_reductions_for_sync(__cilkrts_worker *w,
- full_frame *ff,
- __cilkrts_stack_frame *sf_at_sync)
- {
- int finished_reductions;
- // Step B1 from reducer protocol above:
- // Restore runtime invariants.
- //
- // The following code for this step is almost equivalent to
- // the following sequence:
- // 1. disown(w, ff, sf_at_sync, "sync") (which itself
- // calls make_unrunnable(w, ff, sf_at_sync))
- // 2. make_runnable(w, ff, sf_at_sync).
- //
- // The "disown" will mark the frame "sf_at_sync"
- // as stolen and suspended, and save its place on the stack,
- // so it can be resumed after the sync.
- //
- // The difference is, that we don't want the disown to
- // break the following connections yet, since we are
- // about to immediately make sf/ff runnable again anyway.
- // sf_at_sync->worker == w
- // w->l->frame_ff == ff.
- //
- // These connections are needed for parallel reductions, since
- // we will use sf / ff as the stack frame / full frame for
- // executing any potential reductions.
- //
- // TBD: Can we refactor the disown / make_unrunnable code
- // to avoid the code duplication here?
- ff->call_stack = NULL;
- // Normally, "make_unrunnable" would add CILK_FRAME_STOLEN and
- // CILK_FRAME_SUSPENDED to sf_at_sync->flags and save the state of
- // the stack so that a worker can resume the frame in the correct
- // place.
- //
- // But on this path, CILK_FRAME_STOLEN should already be set.
- // Also, we technically don't want to suspend the frame until
- // the reduction finishes.
- // We do, however, need to save the stack before
- // we start any reductions, since the reductions might push more
- // data onto the stack.
- CILK_ASSERT(sf_at_sync->flags | CILK_FRAME_STOLEN);
- __cilkrts_put_stack(ff, sf_at_sync);
- __cilkrts_make_unrunnable_sysdep(w, ff, sf_at_sync, 1,
- "execute_reductions_for_sync");
- CILK_ASSERT(w->l->frame_ff == ff);
- // Step B2: Execute reductions on user stack.
- // Check if we have any "real" reductions to do.
- finished_reductions = fast_path_reductions_for_sync(w, ff);
-
- if (!finished_reductions) {
- // Still have some real reductions to execute.
- // Run them here.
- // This method may acquire/release the lock on ff.
- w = slow_path_reductions_for_sync(w, ff);
- // The previous call may return on a different worker.
- // than what we started on.
- verify_current_wkr(w);
- }
- #if REDPAR_DEBUG >= 0
- CILK_ASSERT(w->l->frame_ff == ff);
- CILK_ASSERT(ff->call_stack == NULL);
- #endif
- // Now we suspend the frame ff (since we've
- // finished the reductions). Roughly, we've split apart the
- // "make_unrunnable" call here --- we've already saved the
- // stack info earlier before the reductions execute.
- // All that remains is to restore the call stack back into the
- // full frame, and mark the frame as suspended.
- ff->call_stack = sf_at_sync;
- sf_at_sync->flags |= CILK_FRAME_SUSPENDED;
- // At a nontrivial sync, we should always free the current fiber,
- // because it can not be leftmost.
- w->l->fiber_to_free = ff->fiber_self;
- ff->fiber_self = NULL;
- return w;
- }
- /*
- Local Variables: **
- c-file-style:"bsd" **
- c-basic-offset:4 **
- indent-tabs-mode:nil **
- End: **
- */
|