deadline.c 73 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Deadline Scheduling Class (SCHED_DEADLINE)
  4. *
  5. * Earliest Deadline First (EDF) + Constant Bandwidth Server (CBS).
  6. *
  7. * Tasks that periodically executes their instances for less than their
  8. * runtime won't miss any of their deadlines.
  9. * Tasks that are not periodic or sporadic or that tries to execute more
  10. * than their reserved bandwidth will be slowed down (and may potentially
  11. * miss some of their deadlines), and won't affect any other task.
  12. *
  13. * Copyright (C) 2012 Dario Faggioli <raistlin@linux.it>,
  14. * Juri Lelli <juri.lelli@gmail.com>,
  15. * Michael Trimarchi <michael@amarulasolutions.com>,
  16. * Fabio Checconi <fchecconi@gmail.com>
  17. */
  18. #include "sched.h"
  19. #include <linux/slab.h>
  20. #include <uapi/linux/sched/types.h>
  21. #include "walt.h"
  22. struct dl_bandwidth def_dl_bandwidth;
  23. static inline struct task_struct *dl_task_of(struct sched_dl_entity *dl_se)
  24. {
  25. return container_of(dl_se, struct task_struct, dl);
  26. }
  27. static inline struct rq *rq_of_dl_rq(struct dl_rq *dl_rq)
  28. {
  29. return container_of(dl_rq, struct rq, dl);
  30. }
  31. static inline struct dl_rq *dl_rq_of_se(struct sched_dl_entity *dl_se)
  32. {
  33. struct task_struct *p = dl_task_of(dl_se);
  34. struct rq *rq = task_rq(p);
  35. return &rq->dl;
  36. }
  37. static inline int on_dl_rq(struct sched_dl_entity *dl_se)
  38. {
  39. return !RB_EMPTY_NODE(&dl_se->rb_node);
  40. }
  41. #ifdef CONFIG_SMP
  42. static inline struct dl_bw *dl_bw_of(int i)
  43. {
  44. RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
  45. "sched RCU must be held");
  46. return &cpu_rq(i)->rd->dl_bw;
  47. }
  48. static inline int dl_bw_cpus(int i)
  49. {
  50. struct root_domain *rd = cpu_rq(i)->rd;
  51. int cpus = 0;
  52. RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
  53. "sched RCU must be held");
  54. for_each_cpu_and(i, rd->span, cpu_active_mask)
  55. cpus++;
  56. return cpus;
  57. }
  58. #else
  59. static inline struct dl_bw *dl_bw_of(int i)
  60. {
  61. return &cpu_rq(i)->dl.dl_bw;
  62. }
  63. static inline int dl_bw_cpus(int i)
  64. {
  65. return 1;
  66. }
  67. #endif
  68. static inline
  69. void add_running_bw(u64 dl_bw, struct dl_rq *dl_rq)
  70. {
  71. u64 old = dl_rq->running_bw;
  72. lockdep_assert_held(&(rq_of_dl_rq(dl_rq))->lock);
  73. dl_rq->running_bw += dl_bw;
  74. SCHED_WARN_ON(dl_rq->running_bw < old); /* overflow */
  75. SCHED_WARN_ON(dl_rq->running_bw > dl_rq->this_bw);
  76. }
  77. static inline
  78. void sub_running_bw(u64 dl_bw, struct dl_rq *dl_rq)
  79. {
  80. u64 old = dl_rq->running_bw;
  81. lockdep_assert_held(&(rq_of_dl_rq(dl_rq))->lock);
  82. dl_rq->running_bw -= dl_bw;
  83. SCHED_WARN_ON(dl_rq->running_bw > old); /* underflow */
  84. if (dl_rq->running_bw > old)
  85. dl_rq->running_bw = 0;
  86. }
  87. static inline
  88. void add_rq_bw(u64 dl_bw, struct dl_rq *dl_rq)
  89. {
  90. u64 old = dl_rq->this_bw;
  91. lockdep_assert_held(&(rq_of_dl_rq(dl_rq))->lock);
  92. dl_rq->this_bw += dl_bw;
  93. SCHED_WARN_ON(dl_rq->this_bw < old); /* overflow */
  94. }
  95. static inline
  96. void sub_rq_bw(u64 dl_bw, struct dl_rq *dl_rq)
  97. {
  98. u64 old = dl_rq->this_bw;
  99. lockdep_assert_held(&(rq_of_dl_rq(dl_rq))->lock);
  100. dl_rq->this_bw -= dl_bw;
  101. SCHED_WARN_ON(dl_rq->this_bw > old); /* underflow */
  102. if (dl_rq->this_bw > old)
  103. dl_rq->this_bw = 0;
  104. SCHED_WARN_ON(dl_rq->running_bw > dl_rq->this_bw);
  105. }
  106. void dl_change_utilization(struct task_struct *p, u64 new_bw)
  107. {
  108. struct rq *rq;
  109. if (task_on_rq_queued(p))
  110. return;
  111. rq = task_rq(p);
  112. if (p->dl.dl_non_contending) {
  113. sub_running_bw(p->dl.dl_bw, &rq->dl);
  114. p->dl.dl_non_contending = 0;
  115. /*
  116. * If the timer handler is currently running and the
  117. * timer cannot be cancelled, inactive_task_timer()
  118. * will see that dl_not_contending is not set, and
  119. * will not touch the rq's active utilization,
  120. * so we are still safe.
  121. */
  122. if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
  123. put_task_struct(p);
  124. }
  125. sub_rq_bw(p->dl.dl_bw, &rq->dl);
  126. add_rq_bw(new_bw, &rq->dl);
  127. }
  128. /*
  129. * The utilization of a task cannot be immediately removed from
  130. * the rq active utilization (running_bw) when the task blocks.
  131. * Instead, we have to wait for the so called "0-lag time".
  132. *
  133. * If a task blocks before the "0-lag time", a timer (the inactive
  134. * timer) is armed, and running_bw is decreased when the timer
  135. * fires.
  136. *
  137. * If the task wakes up again before the inactive timer fires,
  138. * the timer is cancelled, whereas if the task wakes up after the
  139. * inactive timer fired (and running_bw has been decreased) the
  140. * task's utilization has to be added to running_bw again.
  141. * A flag in the deadline scheduling entity (dl_non_contending)
  142. * is used to avoid race conditions between the inactive timer handler
  143. * and task wakeups.
  144. *
  145. * The following diagram shows how running_bw is updated. A task is
  146. * "ACTIVE" when its utilization contributes to running_bw; an
  147. * "ACTIVE contending" task is in the TASK_RUNNING state, while an
  148. * "ACTIVE non contending" task is a blocked task for which the "0-lag time"
  149. * has not passed yet. An "INACTIVE" task is a task for which the "0-lag"
  150. * time already passed, which does not contribute to running_bw anymore.
  151. * +------------------+
  152. * wakeup | ACTIVE |
  153. * +------------------>+ contending |
  154. * | add_running_bw | |
  155. * | +----+------+------+
  156. * | | ^
  157. * | dequeue | |
  158. * +--------+-------+ | |
  159. * | | t >= 0-lag | | wakeup
  160. * | INACTIVE |<---------------+ |
  161. * | | sub_running_bw | |
  162. * +--------+-------+ | |
  163. * ^ | |
  164. * | t < 0-lag | |
  165. * | | |
  166. * | V |
  167. * | +----+------+------+
  168. * | sub_running_bw | ACTIVE |
  169. * +-------------------+ |
  170. * inactive timer | non contending |
  171. * fired +------------------+
  172. *
  173. * The task_non_contending() function is invoked when a task
  174. * blocks, and checks if the 0-lag time already passed or
  175. * not (in the first case, it directly updates running_bw;
  176. * in the second case, it arms the inactive timer).
  177. *
  178. * The task_contending() function is invoked when a task wakes
  179. * up, and checks if the task is still in the "ACTIVE non contending"
  180. * state or not (in the second case, it updates running_bw).
  181. */
  182. static void task_non_contending(struct task_struct *p)
  183. {
  184. struct sched_dl_entity *dl_se = &p->dl;
  185. struct hrtimer *timer = &dl_se->inactive_timer;
  186. struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
  187. struct rq *rq = rq_of_dl_rq(dl_rq);
  188. s64 zerolag_time;
  189. /*
  190. * If this is a non-deadline task that has been boosted,
  191. * do nothing
  192. */
  193. if (dl_se->dl_runtime == 0)
  194. return;
  195. WARN_ON(dl_se->dl_non_contending);
  196. zerolag_time = dl_se->deadline -
  197. div64_long((dl_se->runtime * dl_se->dl_period),
  198. dl_se->dl_runtime);
  199. /*
  200. * Using relative times instead of the absolute "0-lag time"
  201. * allows to simplify the code
  202. */
  203. zerolag_time -= rq_clock(rq);
  204. /*
  205. * If the "0-lag time" already passed, decrease the active
  206. * utilization now, instead of starting a timer
  207. */
  208. if ((zerolag_time < 0) || hrtimer_active(&dl_se->inactive_timer)) {
  209. if (dl_task(p))
  210. sub_running_bw(dl_se->dl_bw, dl_rq);
  211. if (!dl_task(p) || p->state == TASK_DEAD) {
  212. struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
  213. if (p->state == TASK_DEAD)
  214. sub_rq_bw(p->dl.dl_bw, &rq->dl);
  215. raw_spin_lock(&dl_b->lock);
  216. __dl_clear(dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p)));
  217. __dl_clear_params(p);
  218. raw_spin_unlock(&dl_b->lock);
  219. }
  220. return;
  221. }
  222. dl_se->dl_non_contending = 1;
  223. get_task_struct(p);
  224. hrtimer_start(timer, ns_to_ktime(zerolag_time), HRTIMER_MODE_REL);
  225. }
  226. static void task_contending(struct sched_dl_entity *dl_se, int flags)
  227. {
  228. struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
  229. /*
  230. * If this is a non-deadline task that has been boosted,
  231. * do nothing
  232. */
  233. if (dl_se->dl_runtime == 0)
  234. return;
  235. if (flags & ENQUEUE_MIGRATED)
  236. add_rq_bw(dl_se->dl_bw, dl_rq);
  237. if (dl_se->dl_non_contending) {
  238. dl_se->dl_non_contending = 0;
  239. /*
  240. * If the timer handler is currently running and the
  241. * timer cannot be cancelled, inactive_task_timer()
  242. * will see that dl_not_contending is not set, and
  243. * will not touch the rq's active utilization,
  244. * so we are still safe.
  245. */
  246. if (hrtimer_try_to_cancel(&dl_se->inactive_timer) == 1)
  247. put_task_struct(dl_task_of(dl_se));
  248. } else {
  249. /*
  250. * Since "dl_non_contending" is not set, the
  251. * task's utilization has already been removed from
  252. * active utilization (either when the task blocked,
  253. * when the "inactive timer" fired).
  254. * So, add it back.
  255. */
  256. add_running_bw(dl_se->dl_bw, dl_rq);
  257. }
  258. }
  259. static inline int is_leftmost(struct task_struct *p, struct dl_rq *dl_rq)
  260. {
  261. struct sched_dl_entity *dl_se = &p->dl;
  262. return dl_rq->root.rb_leftmost == &dl_se->rb_node;
  263. }
  264. void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime)
  265. {
  266. raw_spin_lock_init(&dl_b->dl_runtime_lock);
  267. dl_b->dl_period = period;
  268. dl_b->dl_runtime = runtime;
  269. }
  270. void init_dl_bw(struct dl_bw *dl_b)
  271. {
  272. raw_spin_lock_init(&dl_b->lock);
  273. raw_spin_lock(&def_dl_bandwidth.dl_runtime_lock);
  274. if (global_rt_runtime() == RUNTIME_INF)
  275. dl_b->bw = -1;
  276. else
  277. dl_b->bw = to_ratio(global_rt_period(), global_rt_runtime());
  278. raw_spin_unlock(&def_dl_bandwidth.dl_runtime_lock);
  279. dl_b->total_bw = 0;
  280. }
  281. void init_dl_rq(struct dl_rq *dl_rq)
  282. {
  283. dl_rq->root = RB_ROOT_CACHED;
  284. #ifdef CONFIG_SMP
  285. /* zero means no -deadline tasks */
  286. dl_rq->earliest_dl.curr = dl_rq->earliest_dl.next = 0;
  287. dl_rq->dl_nr_migratory = 0;
  288. dl_rq->overloaded = 0;
  289. dl_rq->pushable_dl_tasks_root = RB_ROOT_CACHED;
  290. #else
  291. init_dl_bw(&dl_rq->dl_bw);
  292. #endif
  293. dl_rq->running_bw = 0;
  294. dl_rq->this_bw = 0;
  295. init_dl_rq_bw_ratio(dl_rq);
  296. }
  297. #ifdef CONFIG_SMP
  298. static inline int dl_overloaded(struct rq *rq)
  299. {
  300. return atomic_read(&rq->rd->dlo_count);
  301. }
  302. static inline void dl_set_overload(struct rq *rq)
  303. {
  304. if (!rq->online)
  305. return;
  306. cpumask_set_cpu(rq->cpu, rq->rd->dlo_mask);
  307. /*
  308. * Must be visible before the overload count is
  309. * set (as in sched_rt.c).
  310. *
  311. * Matched by the barrier in pull_dl_task().
  312. */
  313. smp_wmb();
  314. atomic_inc(&rq->rd->dlo_count);
  315. }
  316. static inline void dl_clear_overload(struct rq *rq)
  317. {
  318. if (!rq->online)
  319. return;
  320. atomic_dec(&rq->rd->dlo_count);
  321. cpumask_clear_cpu(rq->cpu, rq->rd->dlo_mask);
  322. }
  323. static void update_dl_migration(struct dl_rq *dl_rq)
  324. {
  325. if (dl_rq->dl_nr_migratory && dl_rq->dl_nr_running > 1) {
  326. if (!dl_rq->overloaded) {
  327. dl_set_overload(rq_of_dl_rq(dl_rq));
  328. dl_rq->overloaded = 1;
  329. }
  330. } else if (dl_rq->overloaded) {
  331. dl_clear_overload(rq_of_dl_rq(dl_rq));
  332. dl_rq->overloaded = 0;
  333. }
  334. }
  335. static void inc_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
  336. {
  337. struct task_struct *p = dl_task_of(dl_se);
  338. if (p->nr_cpus_allowed > 1)
  339. dl_rq->dl_nr_migratory++;
  340. update_dl_migration(dl_rq);
  341. }
  342. static void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
  343. {
  344. struct task_struct *p = dl_task_of(dl_se);
  345. if (p->nr_cpus_allowed > 1)
  346. dl_rq->dl_nr_migratory--;
  347. update_dl_migration(dl_rq);
  348. }
  349. /*
  350. * The list of pushable -deadline task is not a plist, like in
  351. * sched_rt.c, it is an rb-tree with tasks ordered by deadline.
  352. */
  353. static void enqueue_pushable_dl_task(struct rq *rq, struct task_struct *p)
  354. {
  355. struct dl_rq *dl_rq = &rq->dl;
  356. struct rb_node **link = &dl_rq->pushable_dl_tasks_root.rb_root.rb_node;
  357. struct rb_node *parent = NULL;
  358. struct task_struct *entry;
  359. bool leftmost = true;
  360. BUG_ON(!RB_EMPTY_NODE(&p->pushable_dl_tasks));
  361. while (*link) {
  362. parent = *link;
  363. entry = rb_entry(parent, struct task_struct,
  364. pushable_dl_tasks);
  365. if (dl_entity_preempt(&p->dl, &entry->dl))
  366. link = &parent->rb_left;
  367. else {
  368. link = &parent->rb_right;
  369. leftmost = false;
  370. }
  371. }
  372. if (leftmost)
  373. dl_rq->earliest_dl.next = p->dl.deadline;
  374. rb_link_node(&p->pushable_dl_tasks, parent, link);
  375. rb_insert_color_cached(&p->pushable_dl_tasks,
  376. &dl_rq->pushable_dl_tasks_root, leftmost);
  377. }
  378. static void dequeue_pushable_dl_task(struct rq *rq, struct task_struct *p)
  379. {
  380. struct dl_rq *dl_rq = &rq->dl;
  381. if (RB_EMPTY_NODE(&p->pushable_dl_tasks))
  382. return;
  383. if (dl_rq->pushable_dl_tasks_root.rb_leftmost == &p->pushable_dl_tasks) {
  384. struct rb_node *next_node;
  385. next_node = rb_next(&p->pushable_dl_tasks);
  386. if (next_node) {
  387. dl_rq->earliest_dl.next = rb_entry(next_node,
  388. struct task_struct, pushable_dl_tasks)->dl.deadline;
  389. }
  390. }
  391. rb_erase_cached(&p->pushable_dl_tasks, &dl_rq->pushable_dl_tasks_root);
  392. RB_CLEAR_NODE(&p->pushable_dl_tasks);
  393. }
  394. static inline int has_pushable_dl_tasks(struct rq *rq)
  395. {
  396. return !RB_EMPTY_ROOT(&rq->dl.pushable_dl_tasks_root.rb_root);
  397. }
  398. static int push_dl_task(struct rq *rq);
  399. static inline bool need_pull_dl_task(struct rq *rq, struct task_struct *prev)
  400. {
  401. return dl_task(prev);
  402. }
  403. static DEFINE_PER_CPU(struct callback_head, dl_push_head);
  404. static DEFINE_PER_CPU(struct callback_head, dl_pull_head);
  405. static void push_dl_tasks(struct rq *);
  406. static void pull_dl_task(struct rq *);
  407. static inline void queue_push_tasks(struct rq *rq)
  408. {
  409. if (!has_pushable_dl_tasks(rq))
  410. return;
  411. queue_balance_callback(rq, &per_cpu(dl_push_head, rq->cpu), push_dl_tasks);
  412. }
  413. static inline void queue_pull_task(struct rq *rq)
  414. {
  415. queue_balance_callback(rq, &per_cpu(dl_pull_head, rq->cpu), pull_dl_task);
  416. }
  417. static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq);
  418. static struct rq *dl_task_offline_migration(struct rq *rq, struct task_struct *p)
  419. {
  420. struct rq *later_rq = NULL;
  421. later_rq = find_lock_later_rq(p, rq);
  422. if (!later_rq) {
  423. int cpu;
  424. /*
  425. * If we cannot preempt any rq, fall back to pick any
  426. * online cpu.
  427. */
  428. cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed);
  429. if (cpu >= nr_cpu_ids) {
  430. /*
  431. * Fail to find any suitable cpu.
  432. * The task will never come back!
  433. */
  434. BUG_ON(dl_bandwidth_enabled());
  435. /*
  436. * If admission control is disabled we
  437. * try a little harder to let the task
  438. * run.
  439. */
  440. cpu = cpumask_any(cpu_active_mask);
  441. }
  442. later_rq = cpu_rq(cpu);
  443. double_lock_balance(rq, later_rq);
  444. }
  445. set_task_cpu(p, later_rq->cpu);
  446. double_unlock_balance(later_rq, rq);
  447. return later_rq;
  448. }
  449. #else
  450. static inline
  451. void enqueue_pushable_dl_task(struct rq *rq, struct task_struct *p)
  452. {
  453. }
  454. static inline
  455. void dequeue_pushable_dl_task(struct rq *rq, struct task_struct *p)
  456. {
  457. }
  458. static inline
  459. void inc_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
  460. {
  461. }
  462. static inline
  463. void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
  464. {
  465. }
  466. static inline bool need_pull_dl_task(struct rq *rq, struct task_struct *prev)
  467. {
  468. return false;
  469. }
  470. static inline void pull_dl_task(struct rq *rq)
  471. {
  472. }
  473. static inline void queue_push_tasks(struct rq *rq)
  474. {
  475. }
  476. static inline void queue_pull_task(struct rq *rq)
  477. {
  478. }
  479. #endif /* CONFIG_SMP */
  480. static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags);
  481. static void __dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags);
  482. static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p,
  483. int flags);
  484. /*
  485. * We are being explicitly informed that a new instance is starting,
  486. * and this means that:
  487. * - the absolute deadline of the entity has to be placed at
  488. * current time + relative deadline;
  489. * - the runtime of the entity has to be set to the maximum value.
  490. *
  491. * The capability of specifying such event is useful whenever a -deadline
  492. * entity wants to (try to!) synchronize its behaviour with the scheduler's
  493. * one, and to (try to!) reconcile itself with its own scheduling
  494. * parameters.
  495. */
  496. static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se)
  497. {
  498. struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
  499. struct rq *rq = rq_of_dl_rq(dl_rq);
  500. WARN_ON(dl_se->dl_boosted);
  501. WARN_ON(dl_time_before(rq_clock(rq), dl_se->deadline));
  502. /*
  503. * We are racing with the deadline timer. So, do nothing because
  504. * the deadline timer handler will take care of properly recharging
  505. * the runtime and postponing the deadline
  506. */
  507. if (dl_se->dl_throttled)
  508. return;
  509. /*
  510. * We use the regular wall clock time to set deadlines in the
  511. * future; in fact, we must consider execution overheads (time
  512. * spent on hardirq context, etc.).
  513. */
  514. dl_se->deadline = rq_clock(rq) + dl_se->dl_deadline;
  515. dl_se->runtime = dl_se->dl_runtime;
  516. }
  517. /*
  518. * Pure Earliest Deadline First (EDF) scheduling does not deal with the
  519. * possibility of a entity lasting more than what it declared, and thus
  520. * exhausting its runtime.
  521. *
  522. * Here we are interested in making runtime overrun possible, but we do
  523. * not want a entity which is misbehaving to affect the scheduling of all
  524. * other entities.
  525. * Therefore, a budgeting strategy called Constant Bandwidth Server (CBS)
  526. * is used, in order to confine each entity within its own bandwidth.
  527. *
  528. * This function deals exactly with that, and ensures that when the runtime
  529. * of a entity is replenished, its deadline is also postponed. That ensures
  530. * the overrunning entity can't interfere with other entity in the system and
  531. * can't make them miss their deadlines. Reasons why this kind of overruns
  532. * could happen are, typically, a entity voluntarily trying to overcome its
  533. * runtime, or it just underestimated it during sched_setattr().
  534. */
  535. static void replenish_dl_entity(struct sched_dl_entity *dl_se,
  536. struct sched_dl_entity *pi_se)
  537. {
  538. struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
  539. struct rq *rq = rq_of_dl_rq(dl_rq);
  540. BUG_ON(pi_se->dl_runtime <= 0);
  541. /*
  542. * This could be the case for a !-dl task that is boosted.
  543. * Just go with full inherited parameters.
  544. */
  545. if (dl_se->dl_deadline == 0) {
  546. dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
  547. dl_se->runtime = pi_se->dl_runtime;
  548. }
  549. if (dl_se->dl_yielded && dl_se->runtime > 0)
  550. dl_se->runtime = 0;
  551. /*
  552. * We keep moving the deadline away until we get some
  553. * available runtime for the entity. This ensures correct
  554. * handling of situations where the runtime overrun is
  555. * arbitrary large.
  556. */
  557. while (dl_se->runtime <= 0) {
  558. dl_se->deadline += pi_se->dl_period;
  559. dl_se->runtime += pi_se->dl_runtime;
  560. }
  561. /*
  562. * At this point, the deadline really should be "in
  563. * the future" with respect to rq->clock. If it's
  564. * not, we are, for some reason, lagging too much!
  565. * Anyway, after having warn userspace abut that,
  566. * we still try to keep the things running by
  567. * resetting the deadline and the budget of the
  568. * entity.
  569. */
  570. if (dl_time_before(dl_se->deadline, rq_clock(rq))) {
  571. printk_deferred_once("sched: DL replenish lagged too much\n");
  572. dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
  573. dl_se->runtime = pi_se->dl_runtime;
  574. }
  575. if (dl_se->dl_yielded)
  576. dl_se->dl_yielded = 0;
  577. if (dl_se->dl_throttled)
  578. dl_se->dl_throttled = 0;
  579. }
  580. /*
  581. * Here we check if --at time t-- an entity (which is probably being
  582. * [re]activated or, in general, enqueued) can use its remaining runtime
  583. * and its current deadline _without_ exceeding the bandwidth it is
  584. * assigned (function returns true if it can't). We are in fact applying
  585. * one of the CBS rules: when a task wakes up, if the residual runtime
  586. * over residual deadline fits within the allocated bandwidth, then we
  587. * can keep the current (absolute) deadline and residual budget without
  588. * disrupting the schedulability of the system. Otherwise, we should
  589. * refill the runtime and set the deadline a period in the future,
  590. * because keeping the current (absolute) deadline of the task would
  591. * result in breaking guarantees promised to other tasks (refer to
  592. * Documentation/scheduler/sched-deadline.txt for more informations).
  593. *
  594. * This function returns true if:
  595. *
  596. * runtime / (deadline - t) > dl_runtime / dl_deadline ,
  597. *
  598. * IOW we can't recycle current parameters.
  599. *
  600. * Notice that the bandwidth check is done against the deadline. For
  601. * task with deadline equal to period this is the same of using
  602. * dl_period instead of dl_deadline in the equation above.
  603. */
  604. static bool dl_entity_overflow(struct sched_dl_entity *dl_se,
  605. struct sched_dl_entity *pi_se, u64 t)
  606. {
  607. u64 left, right;
  608. /*
  609. * left and right are the two sides of the equation above,
  610. * after a bit of shuffling to use multiplications instead
  611. * of divisions.
  612. *
  613. * Note that none of the time values involved in the two
  614. * multiplications are absolute: dl_deadline and dl_runtime
  615. * are the relative deadline and the maximum runtime of each
  616. * instance, runtime is the runtime left for the last instance
  617. * and (deadline - t), since t is rq->clock, is the time left
  618. * to the (absolute) deadline. Even if overflowing the u64 type
  619. * is very unlikely to occur in both cases, here we scale down
  620. * as we want to avoid that risk at all. Scaling down by 10
  621. * means that we reduce granularity to 1us. We are fine with it,
  622. * since this is only a true/false check and, anyway, thinking
  623. * of anything below microseconds resolution is actually fiction
  624. * (but still we want to give the user that illusion >;).
  625. */
  626. left = (pi_se->dl_deadline >> DL_SCALE) * (dl_se->runtime >> DL_SCALE);
  627. right = ((dl_se->deadline - t) >> DL_SCALE) *
  628. (pi_se->dl_runtime >> DL_SCALE);
  629. return dl_time_before(right, left);
  630. }
  631. /*
  632. * Revised wakeup rule [1]: For self-suspending tasks, rather then
  633. * re-initializing task's runtime and deadline, the revised wakeup
  634. * rule adjusts the task's runtime to avoid the task to overrun its
  635. * density.
  636. *
  637. * Reasoning: a task may overrun the density if:
  638. * runtime / (deadline - t) > dl_runtime / dl_deadline
  639. *
  640. * Therefore, runtime can be adjusted to:
  641. * runtime = (dl_runtime / dl_deadline) * (deadline - t)
  642. *
  643. * In such way that runtime will be equal to the maximum density
  644. * the task can use without breaking any rule.
  645. *
  646. * [1] Luca Abeni, Giuseppe Lipari, and Juri Lelli. 2015. Constant
  647. * bandwidth server revisited. SIGBED Rev. 11, 4 (January 2015), 19-24.
  648. */
  649. static void
  650. update_dl_revised_wakeup(struct sched_dl_entity *dl_se, struct rq *rq)
  651. {
  652. u64 laxity = dl_se->deadline - rq_clock(rq);
  653. /*
  654. * If the task has deadline < period, and the deadline is in the past,
  655. * it should already be throttled before this check.
  656. *
  657. * See update_dl_entity() comments for further details.
  658. */
  659. WARN_ON(dl_time_before(dl_se->deadline, rq_clock(rq)));
  660. dl_se->runtime = (dl_se->dl_density * laxity) >> BW_SHIFT;
  661. }
  662. /*
  663. * Regarding the deadline, a task with implicit deadline has a relative
  664. * deadline == relative period. A task with constrained deadline has a
  665. * relative deadline <= relative period.
  666. *
  667. * We support constrained deadline tasks. However, there are some restrictions
  668. * applied only for tasks which do not have an implicit deadline. See
  669. * update_dl_entity() to know more about such restrictions.
  670. *
  671. * The dl_is_implicit() returns true if the task has an implicit deadline.
  672. */
  673. static inline bool dl_is_implicit(struct sched_dl_entity *dl_se)
  674. {
  675. return dl_se->dl_deadline == dl_se->dl_period;
  676. }
  677. /*
  678. * When a deadline entity is placed in the runqueue, its runtime and deadline
  679. * might need to be updated. This is done by a CBS wake up rule. There are two
  680. * different rules: 1) the original CBS; and 2) the Revisited CBS.
  681. *
  682. * When the task is starting a new period, the Original CBS is used. In this
  683. * case, the runtime is replenished and a new absolute deadline is set.
  684. *
  685. * When a task is queued before the begin of the next period, using the
  686. * remaining runtime and deadline could make the entity to overflow, see
  687. * dl_entity_overflow() to find more about runtime overflow. When such case
  688. * is detected, the runtime and deadline need to be updated.
  689. *
  690. * If the task has an implicit deadline, i.e., deadline == period, the Original
  691. * CBS is applied. the runtime is replenished and a new absolute deadline is
  692. * set, as in the previous cases.
  693. *
  694. * However, the Original CBS does not work properly for tasks with
  695. * deadline < period, which are said to have a constrained deadline. By
  696. * applying the Original CBS, a constrained deadline task would be able to run
  697. * runtime/deadline in a period. With deadline < period, the task would
  698. * overrun the runtime/period allowed bandwidth, breaking the admission test.
  699. *
  700. * In order to prevent this misbehave, the Revisited CBS is used for
  701. * constrained deadline tasks when a runtime overflow is detected. In the
  702. * Revisited CBS, rather than replenishing & setting a new absolute deadline,
  703. * the remaining runtime of the task is reduced to avoid runtime overflow.
  704. * Please refer to the comments update_dl_revised_wakeup() function to find
  705. * more about the Revised CBS rule.
  706. */
  707. static void update_dl_entity(struct sched_dl_entity *dl_se,
  708. struct sched_dl_entity *pi_se)
  709. {
  710. struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
  711. struct rq *rq = rq_of_dl_rq(dl_rq);
  712. if (dl_time_before(dl_se->deadline, rq_clock(rq)) ||
  713. dl_entity_overflow(dl_se, pi_se, rq_clock(rq))) {
  714. if (unlikely(!dl_is_implicit(dl_se) &&
  715. !dl_time_before(dl_se->deadline, rq_clock(rq)) &&
  716. !dl_se->dl_boosted)){
  717. update_dl_revised_wakeup(dl_se, rq);
  718. return;
  719. }
  720. dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
  721. dl_se->runtime = pi_se->dl_runtime;
  722. }
  723. }
  724. static inline u64 dl_next_period(struct sched_dl_entity *dl_se)
  725. {
  726. return dl_se->deadline - dl_se->dl_deadline + dl_se->dl_period;
  727. }
  728. /*
  729. * If the entity depleted all its runtime, and if we want it to sleep
  730. * while waiting for some new execution time to become available, we
  731. * set the bandwidth replenishment timer to the replenishment instant
  732. * and try to activate it.
  733. *
  734. * Notice that it is important for the caller to know if the timer
  735. * actually started or not (i.e., the replenishment instant is in
  736. * the future or in the past).
  737. */
  738. static int start_dl_timer(struct task_struct *p)
  739. {
  740. struct sched_dl_entity *dl_se = &p->dl;
  741. struct hrtimer *timer = &dl_se->dl_timer;
  742. struct rq *rq = task_rq(p);
  743. ktime_t now, act;
  744. s64 delta;
  745. lockdep_assert_held(&rq->lock);
  746. /*
  747. * We want the timer to fire at the deadline, but considering
  748. * that it is actually coming from rq->clock and not from
  749. * hrtimer's time base reading.
  750. */
  751. act = ns_to_ktime(dl_next_period(dl_se));
  752. now = hrtimer_cb_get_time(timer);
  753. delta = ktime_to_ns(now) - rq_clock(rq);
  754. act = ktime_add_ns(act, delta);
  755. /*
  756. * If the expiry time already passed, e.g., because the value
  757. * chosen as the deadline is too small, don't even try to
  758. * start the timer in the past!
  759. */
  760. if (ktime_us_delta(act, now) < 0)
  761. return 0;
  762. /*
  763. * !enqueued will guarantee another callback; even if one is already in
  764. * progress. This ensures a balanced {get,put}_task_struct().
  765. *
  766. * The race against __run_timer() clearing the enqueued state is
  767. * harmless because we're holding task_rq()->lock, therefore the timer
  768. * expiring after we've done the check will wait on its task_rq_lock()
  769. * and observe our state.
  770. */
  771. if (!hrtimer_is_queued(timer)) {
  772. get_task_struct(p);
  773. hrtimer_start(timer, act, HRTIMER_MODE_ABS);
  774. }
  775. return 1;
  776. }
  777. /*
  778. * This is the bandwidth enforcement timer callback. If here, we know
  779. * a task is not on its dl_rq, since the fact that the timer was running
  780. * means the task is throttled and needs a runtime replenishment.
  781. *
  782. * However, what we actually do depends on the fact the task is active,
  783. * (it is on its rq) or has been removed from there by a call to
  784. * dequeue_task_dl(). In the former case we must issue the runtime
  785. * replenishment and add the task back to the dl_rq; in the latter, we just
  786. * do nothing but clearing dl_throttled, so that runtime and deadline
  787. * updating (and the queueing back to dl_rq) will be done by the
  788. * next call to enqueue_task_dl().
  789. */
  790. static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
  791. {
  792. struct sched_dl_entity *dl_se = container_of(timer,
  793. struct sched_dl_entity,
  794. dl_timer);
  795. struct task_struct *p = dl_task_of(dl_se);
  796. struct rq_flags rf;
  797. struct rq *rq;
  798. rq = task_rq_lock(p, &rf);
  799. /*
  800. * The task might have changed its scheduling policy to something
  801. * different than SCHED_DEADLINE (through switched_from_dl()).
  802. */
  803. if (!dl_task(p))
  804. goto unlock;
  805. /*
  806. * The task might have been boosted by someone else and might be in the
  807. * boosting/deboosting path, its not throttled.
  808. */
  809. if (dl_se->dl_boosted)
  810. goto unlock;
  811. /*
  812. * Spurious timer due to start_dl_timer() race; or we already received
  813. * a replenishment from rt_mutex_setprio().
  814. */
  815. if (!dl_se->dl_throttled)
  816. goto unlock;
  817. sched_clock_tick();
  818. update_rq_clock(rq);
  819. /*
  820. * If the throttle happened during sched-out; like:
  821. *
  822. * schedule()
  823. * deactivate_task()
  824. * dequeue_task_dl()
  825. * update_curr_dl()
  826. * start_dl_timer()
  827. * __dequeue_task_dl()
  828. * prev->on_rq = 0;
  829. *
  830. * We can be both throttled and !queued. Replenish the counter
  831. * but do not enqueue -- wait for our wakeup to do that.
  832. */
  833. if (!task_on_rq_queued(p)) {
  834. replenish_dl_entity(dl_se, dl_se);
  835. goto unlock;
  836. }
  837. #ifdef CONFIG_SMP
  838. if (unlikely(!rq->online)) {
  839. /*
  840. * If the runqueue is no longer available, migrate the
  841. * task elsewhere. This necessarily changes rq.
  842. */
  843. lockdep_unpin_lock(&rq->lock, rf.cookie);
  844. rq = dl_task_offline_migration(rq, p);
  845. rf.cookie = lockdep_pin_lock(&rq->lock);
  846. update_rq_clock(rq);
  847. /*
  848. * Now that the task has been migrated to the new RQ and we
  849. * have that locked, proceed as normal and enqueue the task
  850. * there.
  851. */
  852. }
  853. #endif
  854. enqueue_task_dl(rq, p, ENQUEUE_REPLENISH);
  855. if (dl_task(rq->curr))
  856. check_preempt_curr_dl(rq, p, 0);
  857. else
  858. resched_curr(rq);
  859. #ifdef CONFIG_SMP
  860. /*
  861. * Queueing this task back might have overloaded rq, check if we need
  862. * to kick someone away.
  863. */
  864. if (has_pushable_dl_tasks(rq)) {
  865. /*
  866. * Nothing relies on rq->lock after this, so its safe to drop
  867. * rq->lock.
  868. */
  869. rq_unpin_lock(rq, &rf);
  870. push_dl_task(rq);
  871. rq_repin_lock(rq, &rf);
  872. }
  873. #endif
  874. unlock:
  875. task_rq_unlock(rq, p, &rf);
  876. /*
  877. * This can free the task_struct, including this hrtimer, do not touch
  878. * anything related to that after this.
  879. */
  880. put_task_struct(p);
  881. return HRTIMER_NORESTART;
  882. }
  883. void init_dl_task_timer(struct sched_dl_entity *dl_se)
  884. {
  885. struct hrtimer *timer = &dl_se->dl_timer;
  886. hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  887. timer->function = dl_task_timer;
  888. }
  889. /*
  890. * During the activation, CBS checks if it can reuse the current task's
  891. * runtime and period. If the deadline of the task is in the past, CBS
  892. * cannot use the runtime, and so it replenishes the task. This rule
  893. * works fine for implicit deadline tasks (deadline == period), and the
  894. * CBS was designed for implicit deadline tasks. However, a task with
  895. * constrained deadline (deadine < period) might be awakened after the
  896. * deadline, but before the next period. In this case, replenishing the
  897. * task would allow it to run for runtime / deadline. As in this case
  898. * deadline < period, CBS enables a task to run for more than the
  899. * runtime / period. In a very loaded system, this can cause a domino
  900. * effect, making other tasks miss their deadlines.
  901. *
  902. * To avoid this problem, in the activation of a constrained deadline
  903. * task after the deadline but before the next period, throttle the
  904. * task and set the replenishing timer to the begin of the next period,
  905. * unless it is boosted.
  906. */
  907. static inline void dl_check_constrained_dl(struct sched_dl_entity *dl_se)
  908. {
  909. struct task_struct *p = dl_task_of(dl_se);
  910. struct rq *rq = rq_of_dl_rq(dl_rq_of_se(dl_se));
  911. if (dl_time_before(dl_se->deadline, rq_clock(rq)) &&
  912. dl_time_before(rq_clock(rq), dl_next_period(dl_se))) {
  913. if (unlikely(dl_se->dl_boosted || !start_dl_timer(p)))
  914. return;
  915. dl_se->dl_throttled = 1;
  916. if (dl_se->runtime > 0)
  917. dl_se->runtime = 0;
  918. }
  919. }
  920. static
  921. int dl_runtime_exceeded(struct sched_dl_entity *dl_se)
  922. {
  923. return (dl_se->runtime <= 0);
  924. }
  925. extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq);
  926. /*
  927. * This function implements the GRUB accounting rule:
  928. * according to the GRUB reclaiming algorithm, the runtime is
  929. * not decreased as "dq = -dt", but as
  930. * "dq = -max{u / Umax, (1 - Uinact - Uextra)} dt",
  931. * where u is the utilization of the task, Umax is the maximum reclaimable
  932. * utilization, Uinact is the (per-runqueue) inactive utilization, computed
  933. * as the difference between the "total runqueue utilization" and the
  934. * runqueue active utilization, and Uextra is the (per runqueue) extra
  935. * reclaimable utilization.
  936. * Since rq->dl.running_bw and rq->dl.this_bw contain utilizations
  937. * multiplied by 2^BW_SHIFT, the result has to be shifted right by
  938. * BW_SHIFT.
  939. * Since rq->dl.bw_ratio contains 1 / Umax multipled by 2^RATIO_SHIFT,
  940. * dl_bw is multiped by rq->dl.bw_ratio and shifted right by RATIO_SHIFT.
  941. * Since delta is a 64 bit variable, to have an overflow its value
  942. * should be larger than 2^(64 - 20 - 8), which is more than 64 seconds.
  943. * So, overflow is not an issue here.
  944. */
  945. static u64 grub_reclaim(u64 delta, struct rq *rq, struct sched_dl_entity *dl_se)
  946. {
  947. u64 u_inact = rq->dl.this_bw - rq->dl.running_bw; /* Utot - Uact */
  948. u64 u_act;
  949. u64 u_act_min = (dl_se->dl_bw * rq->dl.bw_ratio) >> RATIO_SHIFT;
  950. /*
  951. * Instead of computing max{u * bw_ratio, (1 - u_inact - u_extra)},
  952. * we compare u_inact + rq->dl.extra_bw with
  953. * 1 - (u * rq->dl.bw_ratio >> RATIO_SHIFT), because
  954. * u_inact + rq->dl.extra_bw can be larger than
  955. * 1 * (so, 1 - u_inact - rq->dl.extra_bw would be negative
  956. * leading to wrong results)
  957. */
  958. if (u_inact + rq->dl.extra_bw > BW_UNIT - u_act_min)
  959. u_act = u_act_min;
  960. else
  961. u_act = BW_UNIT - u_inact - rq->dl.extra_bw;
  962. return (delta * u_act) >> BW_SHIFT;
  963. }
  964. /*
  965. * Update the current task's runtime statistics (provided it is still
  966. * a -deadline task and has not been removed from the dl_rq).
  967. */
  968. static void update_curr_dl(struct rq *rq)
  969. {
  970. struct task_struct *curr = rq->curr;
  971. struct sched_dl_entity *dl_se = &curr->dl;
  972. u64 delta_exec;
  973. if (!dl_task(curr) || !on_dl_rq(dl_se))
  974. return;
  975. /*
  976. * Consumed budget is computed considering the time as
  977. * observed by schedulable tasks (excluding time spent
  978. * in hardirq context, etc.). Deadlines are instead
  979. * computed using hard walltime. This seems to be the more
  980. * natural solution, but the full ramifications of this
  981. * approach need further study.
  982. */
  983. delta_exec = rq_clock_task(rq) - curr->se.exec_start;
  984. if (unlikely((s64)delta_exec <= 0)) {
  985. if (unlikely(dl_se->dl_yielded))
  986. goto throttle;
  987. return;
  988. }
  989. /* kick cpufreq (see the comment in kernel/sched/sched.h). */
  990. cpufreq_update_util(rq, SCHED_CPUFREQ_DL);
  991. schedstat_set(curr->se.statistics.exec_max,
  992. max(curr->se.statistics.exec_max, delta_exec));
  993. curr->se.sum_exec_runtime += delta_exec;
  994. account_group_exec_runtime(curr, delta_exec);
  995. curr->se.exec_start = rq_clock_task(rq);
  996. cpuacct_charge(curr, delta_exec);
  997. sched_rt_avg_update(rq, delta_exec);
  998. if (unlikely(dl_se->flags & SCHED_FLAG_RECLAIM))
  999. delta_exec = grub_reclaim(delta_exec, rq, &curr->dl);
  1000. dl_se->runtime -= delta_exec;
  1001. throttle:
  1002. if (dl_runtime_exceeded(dl_se) || dl_se->dl_yielded) {
  1003. dl_se->dl_throttled = 1;
  1004. __dequeue_task_dl(rq, curr, 0);
  1005. if (unlikely(dl_se->dl_boosted || !start_dl_timer(curr)))
  1006. enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH);
  1007. if (!is_leftmost(curr, &rq->dl))
  1008. resched_curr(rq);
  1009. }
  1010. /*
  1011. * Because -- for now -- we share the rt bandwidth, we need to
  1012. * account our runtime there too, otherwise actual rt tasks
  1013. * would be able to exceed the shared quota.
  1014. *
  1015. * Account to the root rt group for now.
  1016. *
  1017. * The solution we're working towards is having the RT groups scheduled
  1018. * using deadline servers -- however there's a few nasties to figure
  1019. * out before that can happen.
  1020. */
  1021. if (rt_bandwidth_enabled()) {
  1022. struct rt_rq *rt_rq = &rq->rt;
  1023. raw_spin_lock(&rt_rq->rt_runtime_lock);
  1024. /*
  1025. * We'll let actual RT tasks worry about the overflow here, we
  1026. * have our own CBS to keep us inline; only account when RT
  1027. * bandwidth is relevant.
  1028. */
  1029. if (sched_rt_bandwidth_account(rt_rq))
  1030. rt_rq->rt_time += delta_exec;
  1031. raw_spin_unlock(&rt_rq->rt_runtime_lock);
  1032. }
  1033. }
  1034. static enum hrtimer_restart inactive_task_timer(struct hrtimer *timer)
  1035. {
  1036. struct sched_dl_entity *dl_se = container_of(timer,
  1037. struct sched_dl_entity,
  1038. inactive_timer);
  1039. struct task_struct *p = dl_task_of(dl_se);
  1040. struct rq_flags rf;
  1041. struct rq *rq;
  1042. rq = task_rq_lock(p, &rf);
  1043. if (!dl_task(p) || p->state == TASK_DEAD) {
  1044. struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
  1045. if (p->state == TASK_DEAD && dl_se->dl_non_contending) {
  1046. sub_running_bw(p->dl.dl_bw, dl_rq_of_se(&p->dl));
  1047. sub_rq_bw(p->dl.dl_bw, dl_rq_of_se(&p->dl));
  1048. dl_se->dl_non_contending = 0;
  1049. }
  1050. raw_spin_lock(&dl_b->lock);
  1051. __dl_clear(dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p)));
  1052. raw_spin_unlock(&dl_b->lock);
  1053. __dl_clear_params(p);
  1054. goto unlock;
  1055. }
  1056. if (dl_se->dl_non_contending == 0)
  1057. goto unlock;
  1058. sched_clock_tick();
  1059. update_rq_clock(rq);
  1060. sub_running_bw(dl_se->dl_bw, &rq->dl);
  1061. dl_se->dl_non_contending = 0;
  1062. unlock:
  1063. task_rq_unlock(rq, p, &rf);
  1064. put_task_struct(p);
  1065. return HRTIMER_NORESTART;
  1066. }
  1067. void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se)
  1068. {
  1069. struct hrtimer *timer = &dl_se->inactive_timer;
  1070. hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  1071. timer->function = inactive_task_timer;
  1072. }
  1073. #ifdef CONFIG_SMP
  1074. static void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
  1075. {
  1076. struct rq *rq = rq_of_dl_rq(dl_rq);
  1077. if (dl_rq->earliest_dl.curr == 0 ||
  1078. dl_time_before(deadline, dl_rq->earliest_dl.curr)) {
  1079. dl_rq->earliest_dl.curr = deadline;
  1080. cpudl_set(&rq->rd->cpudl, rq->cpu, deadline);
  1081. }
  1082. }
  1083. static void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
  1084. {
  1085. struct rq *rq = rq_of_dl_rq(dl_rq);
  1086. /*
  1087. * Since we may have removed our earliest (and/or next earliest)
  1088. * task we must recompute them.
  1089. */
  1090. if (!dl_rq->dl_nr_running) {
  1091. dl_rq->earliest_dl.curr = 0;
  1092. dl_rq->earliest_dl.next = 0;
  1093. cpudl_clear(&rq->rd->cpudl, rq->cpu);
  1094. } else {
  1095. struct rb_node *leftmost = dl_rq->root.rb_leftmost;
  1096. struct sched_dl_entity *entry;
  1097. entry = rb_entry(leftmost, struct sched_dl_entity, rb_node);
  1098. dl_rq->earliest_dl.curr = entry->deadline;
  1099. cpudl_set(&rq->rd->cpudl, rq->cpu, entry->deadline);
  1100. }
  1101. }
  1102. #else
  1103. static inline void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline) {}
  1104. static inline void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline) {}
  1105. #endif /* CONFIG_SMP */
  1106. static inline
  1107. void inc_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
  1108. {
  1109. int prio = dl_task_of(dl_se)->prio;
  1110. u64 deadline = dl_se->deadline;
  1111. WARN_ON(!dl_prio(prio));
  1112. dl_rq->dl_nr_running++;
  1113. add_nr_running(rq_of_dl_rq(dl_rq), 1);
  1114. walt_inc_cumulative_runnable_avg(rq_of_dl_rq(dl_rq), dl_task_of(dl_se));
  1115. inc_dl_deadline(dl_rq, deadline);
  1116. inc_dl_migration(dl_se, dl_rq);
  1117. }
  1118. static inline
  1119. void dec_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
  1120. {
  1121. int prio = dl_task_of(dl_se)->prio;
  1122. WARN_ON(!dl_prio(prio));
  1123. WARN_ON(!dl_rq->dl_nr_running);
  1124. dl_rq->dl_nr_running--;
  1125. sub_nr_running(rq_of_dl_rq(dl_rq), 1);
  1126. walt_dec_cumulative_runnable_avg(rq_of_dl_rq(dl_rq), dl_task_of(dl_se));
  1127. dec_dl_deadline(dl_rq, dl_se->deadline);
  1128. dec_dl_migration(dl_se, dl_rq);
  1129. }
  1130. static void __enqueue_dl_entity(struct sched_dl_entity *dl_se)
  1131. {
  1132. struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
  1133. struct rb_node **link = &dl_rq->root.rb_root.rb_node;
  1134. struct rb_node *parent = NULL;
  1135. struct sched_dl_entity *entry;
  1136. int leftmost = 1;
  1137. BUG_ON(!RB_EMPTY_NODE(&dl_se->rb_node));
  1138. while (*link) {
  1139. parent = *link;
  1140. entry = rb_entry(parent, struct sched_dl_entity, rb_node);
  1141. if (dl_time_before(dl_se->deadline, entry->deadline))
  1142. link = &parent->rb_left;
  1143. else {
  1144. link = &parent->rb_right;
  1145. leftmost = 0;
  1146. }
  1147. }
  1148. rb_link_node(&dl_se->rb_node, parent, link);
  1149. rb_insert_color_cached(&dl_se->rb_node, &dl_rq->root, leftmost);
  1150. inc_dl_tasks(dl_se, dl_rq);
  1151. }
  1152. static void __dequeue_dl_entity(struct sched_dl_entity *dl_se)
  1153. {
  1154. struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
  1155. if (RB_EMPTY_NODE(&dl_se->rb_node))
  1156. return;
  1157. rb_erase_cached(&dl_se->rb_node, &dl_rq->root);
  1158. RB_CLEAR_NODE(&dl_se->rb_node);
  1159. dec_dl_tasks(dl_se, dl_rq);
  1160. }
  1161. static void
  1162. enqueue_dl_entity(struct sched_dl_entity *dl_se,
  1163. struct sched_dl_entity *pi_se, int flags)
  1164. {
  1165. BUG_ON(on_dl_rq(dl_se));
  1166. /*
  1167. * If this is a wakeup or a new instance, the scheduling
  1168. * parameters of the task might need updating. Otherwise,
  1169. * we want a replenishment of its runtime.
  1170. */
  1171. if (flags & ENQUEUE_WAKEUP) {
  1172. task_contending(dl_se, flags);
  1173. update_dl_entity(dl_se, pi_se);
  1174. } else if (flags & ENQUEUE_REPLENISH) {
  1175. replenish_dl_entity(dl_se, pi_se);
  1176. } else if ((flags & ENQUEUE_RESTORE) &&
  1177. dl_time_before(dl_se->deadline,
  1178. rq_clock(rq_of_dl_rq(dl_rq_of_se(dl_se))))) {
  1179. setup_new_dl_entity(dl_se);
  1180. }
  1181. __enqueue_dl_entity(dl_se);
  1182. }
  1183. static void dequeue_dl_entity(struct sched_dl_entity *dl_se)
  1184. {
  1185. __dequeue_dl_entity(dl_se);
  1186. }
  1187. static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
  1188. {
  1189. struct task_struct *pi_task = rt_mutex_get_top_task(p);
  1190. struct sched_dl_entity *pi_se = &p->dl;
  1191. /*
  1192. * Use the scheduling parameters of the top pi-waiter task if:
  1193. * - we have a top pi-waiter which is a SCHED_DEADLINE task AND
  1194. * - our dl_boosted is set (i.e. the pi-waiter's (absolute) deadline is
  1195. * smaller than our deadline OR we are a !SCHED_DEADLINE task getting
  1196. * boosted due to a SCHED_DEADLINE pi-waiter).
  1197. * Otherwise we keep our runtime and deadline.
  1198. */
  1199. if (pi_task && dl_prio(pi_task->normal_prio) && p->dl.dl_boosted) {
  1200. pi_se = &pi_task->dl;
  1201. } else if (!dl_prio(p->normal_prio)) {
  1202. /*
  1203. * Special case in which we have a !SCHED_DEADLINE task
  1204. * that is going to be deboosted, but exceeds its
  1205. * runtime while doing so. No point in replenishing
  1206. * it, as it's going to return back to its original
  1207. * scheduling class after this.
  1208. */
  1209. BUG_ON(!p->dl.dl_boosted || flags != ENQUEUE_REPLENISH);
  1210. return;
  1211. }
  1212. /*
  1213. * Check if a constrained deadline task was activated
  1214. * after the deadline but before the next period.
  1215. * If that is the case, the task will be throttled and
  1216. * the replenishment timer will be set to the next period.
  1217. */
  1218. if (!p->dl.dl_throttled && !dl_is_implicit(&p->dl))
  1219. dl_check_constrained_dl(&p->dl);
  1220. if (p->on_rq == TASK_ON_RQ_MIGRATING || flags & ENQUEUE_RESTORE) {
  1221. add_rq_bw(p->dl.dl_bw, &rq->dl);
  1222. add_running_bw(p->dl.dl_bw, &rq->dl);
  1223. }
  1224. /*
  1225. * If p is throttled, we do not enqueue it. In fact, if it exhausted
  1226. * its budget it needs a replenishment and, since it now is on
  1227. * its rq, the bandwidth timer callback (which clearly has not
  1228. * run yet) will take care of this.
  1229. * However, the active utilization does not depend on the fact
  1230. * that the task is on the runqueue or not (but depends on the
  1231. * task's state - in GRUB parlance, "inactive" vs "active contending").
  1232. * In other words, even if a task is throttled its utilization must
  1233. * be counted in the active utilization; hence, we need to call
  1234. * add_running_bw().
  1235. */
  1236. if (p->dl.dl_throttled && !(flags & ENQUEUE_REPLENISH)) {
  1237. if (flags & ENQUEUE_WAKEUP)
  1238. task_contending(&p->dl, flags);
  1239. return;
  1240. }
  1241. enqueue_dl_entity(&p->dl, pi_se, flags);
  1242. if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
  1243. enqueue_pushable_dl_task(rq, p);
  1244. }
  1245. static void __dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags)
  1246. {
  1247. dequeue_dl_entity(&p->dl);
  1248. dequeue_pushable_dl_task(rq, p);
  1249. }
  1250. static void dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags)
  1251. {
  1252. update_curr_dl(rq);
  1253. __dequeue_task_dl(rq, p, flags);
  1254. if (p->on_rq == TASK_ON_RQ_MIGRATING || flags & DEQUEUE_SAVE) {
  1255. sub_running_bw(p->dl.dl_bw, &rq->dl);
  1256. sub_rq_bw(p->dl.dl_bw, &rq->dl);
  1257. }
  1258. /*
  1259. * This check allows to start the inactive timer (or to immediately
  1260. * decrease the active utilization, if needed) in two cases:
  1261. * when the task blocks and when it is terminating
  1262. * (p->state == TASK_DEAD). We can handle the two cases in the same
  1263. * way, because from GRUB's point of view the same thing is happening
  1264. * (the task moves from "active contending" to "active non contending"
  1265. * or "inactive")
  1266. */
  1267. if (flags & DEQUEUE_SLEEP)
  1268. task_non_contending(p);
  1269. }
  1270. /*
  1271. * Yield task semantic for -deadline tasks is:
  1272. *
  1273. * get off from the CPU until our next instance, with
  1274. * a new runtime. This is of little use now, since we
  1275. * don't have a bandwidth reclaiming mechanism. Anyway,
  1276. * bandwidth reclaiming is planned for the future, and
  1277. * yield_task_dl will indicate that some spare budget
  1278. * is available for other task instances to use it.
  1279. */
  1280. static void yield_task_dl(struct rq *rq)
  1281. {
  1282. /*
  1283. * We make the task go to sleep until its current deadline by
  1284. * forcing its runtime to zero. This way, update_curr_dl() stops
  1285. * it and the bandwidth timer will wake it up and will give it
  1286. * new scheduling parameters (thanks to dl_yielded=1).
  1287. */
  1288. rq->curr->dl.dl_yielded = 1;
  1289. update_rq_clock(rq);
  1290. update_curr_dl(rq);
  1291. /*
  1292. * Tell update_rq_clock() that we've just updated,
  1293. * so we don't do microscopic update in schedule()
  1294. * and double the fastpath cost.
  1295. */
  1296. rq_clock_skip_update(rq, true);
  1297. }
  1298. #ifdef CONFIG_SMP
  1299. static int find_later_rq(struct task_struct *task);
  1300. static int
  1301. select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags,
  1302. int sibling_count_hint)
  1303. {
  1304. struct task_struct *curr;
  1305. struct rq *rq;
  1306. if (sd_flag != SD_BALANCE_WAKE)
  1307. goto out;
  1308. rq = cpu_rq(cpu);
  1309. rcu_read_lock();
  1310. curr = READ_ONCE(rq->curr); /* unlocked access */
  1311. /*
  1312. * If we are dealing with a -deadline task, we must
  1313. * decide where to wake it up.
  1314. * If it has a later deadline and the current task
  1315. * on this rq can't move (provided the waking task
  1316. * can!) we prefer to send it somewhere else. On the
  1317. * other hand, if it has a shorter deadline, we
  1318. * try to make it stay here, it might be important.
  1319. */
  1320. if (unlikely(dl_task(curr)) &&
  1321. (curr->nr_cpus_allowed < 2 ||
  1322. !dl_entity_preempt(&p->dl, &curr->dl)) &&
  1323. (p->nr_cpus_allowed > 1)) {
  1324. int target = find_later_rq(p);
  1325. if (target != -1 &&
  1326. (dl_time_before(p->dl.deadline,
  1327. cpu_rq(target)->dl.earliest_dl.curr) ||
  1328. (cpu_rq(target)->dl.dl_nr_running == 0)))
  1329. cpu = target;
  1330. }
  1331. rcu_read_unlock();
  1332. out:
  1333. return cpu;
  1334. }
  1335. static void migrate_task_rq_dl(struct task_struct *p)
  1336. {
  1337. struct rq *rq;
  1338. if (p->state != TASK_WAKING)
  1339. return;
  1340. rq = task_rq(p);
  1341. /*
  1342. * Since p->state == TASK_WAKING, set_task_cpu() has been called
  1343. * from try_to_wake_up(). Hence, p->pi_lock is locked, but
  1344. * rq->lock is not... So, lock it
  1345. */
  1346. raw_spin_lock(&rq->lock);
  1347. if (p->dl.dl_non_contending) {
  1348. sub_running_bw(p->dl.dl_bw, &rq->dl);
  1349. p->dl.dl_non_contending = 0;
  1350. /*
  1351. * If the timer handler is currently running and the
  1352. * timer cannot be cancelled, inactive_task_timer()
  1353. * will see that dl_not_contending is not set, and
  1354. * will not touch the rq's active utilization,
  1355. * so we are still safe.
  1356. */
  1357. if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
  1358. put_task_struct(p);
  1359. }
  1360. sub_rq_bw(p->dl.dl_bw, &rq->dl);
  1361. raw_spin_unlock(&rq->lock);
  1362. }
  1363. static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
  1364. {
  1365. /*
  1366. * Current can't be migrated, useless to reschedule,
  1367. * let's hope p can move out.
  1368. */
  1369. if (rq->curr->nr_cpus_allowed == 1 ||
  1370. !cpudl_find(&rq->rd->cpudl, rq->curr, NULL))
  1371. return;
  1372. /*
  1373. * p is migratable, so let's not schedule it and
  1374. * see if it is pushed or pulled somewhere else.
  1375. */
  1376. if (p->nr_cpus_allowed != 1 &&
  1377. cpudl_find(&rq->rd->cpudl, p, NULL))
  1378. return;
  1379. resched_curr(rq);
  1380. }
  1381. #endif /* CONFIG_SMP */
  1382. /*
  1383. * Only called when both the current and waking task are -deadline
  1384. * tasks.
  1385. */
  1386. static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p,
  1387. int flags)
  1388. {
  1389. if (dl_entity_preempt(&p->dl, &rq->curr->dl)) {
  1390. resched_curr(rq);
  1391. return;
  1392. }
  1393. #ifdef CONFIG_SMP
  1394. /*
  1395. * In the unlikely case current and p have the same deadline
  1396. * let us try to decide what's the best thing to do...
  1397. */
  1398. if ((p->dl.deadline == rq->curr->dl.deadline) &&
  1399. !test_tsk_need_resched(rq->curr))
  1400. check_preempt_equal_dl(rq, p);
  1401. #endif /* CONFIG_SMP */
  1402. }
  1403. #ifdef CONFIG_SCHED_HRTICK
  1404. static void start_hrtick_dl(struct rq *rq, struct task_struct *p)
  1405. {
  1406. hrtick_start(rq, p->dl.runtime);
  1407. }
  1408. #else /* !CONFIG_SCHED_HRTICK */
  1409. static void start_hrtick_dl(struct rq *rq, struct task_struct *p)
  1410. {
  1411. }
  1412. #endif
  1413. static struct sched_dl_entity *pick_next_dl_entity(struct rq *rq,
  1414. struct dl_rq *dl_rq)
  1415. {
  1416. struct rb_node *left = rb_first_cached(&dl_rq->root);
  1417. if (!left)
  1418. return NULL;
  1419. return rb_entry(left, struct sched_dl_entity, rb_node);
  1420. }
  1421. static struct task_struct *
  1422. pick_next_task_dl(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
  1423. {
  1424. struct sched_dl_entity *dl_se;
  1425. struct task_struct *p;
  1426. struct dl_rq *dl_rq;
  1427. dl_rq = &rq->dl;
  1428. if (need_pull_dl_task(rq, prev)) {
  1429. /*
  1430. * This is OK, because current is on_cpu, which avoids it being
  1431. * picked for load-balance and preemption/IRQs are still
  1432. * disabled avoiding further scheduler activity on it and we're
  1433. * being very careful to re-start the picking loop.
  1434. */
  1435. rq_unpin_lock(rq, rf);
  1436. pull_dl_task(rq);
  1437. rq_repin_lock(rq, rf);
  1438. /*
  1439. * pull_dl_task() can drop (and re-acquire) rq->lock; this
  1440. * means a stop task can slip in, in which case we need to
  1441. * re-start task selection.
  1442. */
  1443. if (rq->stop && task_on_rq_queued(rq->stop))
  1444. return RETRY_TASK;
  1445. }
  1446. /*
  1447. * When prev is DL, we may throttle it in put_prev_task().
  1448. * So, we update time before we check for dl_nr_running.
  1449. */
  1450. if (prev->sched_class == &dl_sched_class)
  1451. update_curr_dl(rq);
  1452. if (unlikely(!dl_rq->dl_nr_running))
  1453. return NULL;
  1454. put_prev_task(rq, prev);
  1455. dl_se = pick_next_dl_entity(rq, dl_rq);
  1456. BUG_ON(!dl_se);
  1457. p = dl_task_of(dl_se);
  1458. p->se.exec_start = rq_clock_task(rq);
  1459. /* Running task will never be pushed. */
  1460. dequeue_pushable_dl_task(rq, p);
  1461. if (hrtick_enabled(rq))
  1462. start_hrtick_dl(rq, p);
  1463. queue_push_tasks(rq);
  1464. return p;
  1465. }
  1466. static void put_prev_task_dl(struct rq *rq, struct task_struct *p)
  1467. {
  1468. update_curr_dl(rq);
  1469. if (on_dl_rq(&p->dl) && p->nr_cpus_allowed > 1)
  1470. enqueue_pushable_dl_task(rq, p);
  1471. }
  1472. static void task_tick_dl(struct rq *rq, struct task_struct *p, int queued)
  1473. {
  1474. update_curr_dl(rq);
  1475. /*
  1476. * Even when we have runtime, update_curr_dl() might have resulted in us
  1477. * not being the leftmost task anymore. In that case NEED_RESCHED will
  1478. * be set and schedule() will start a new hrtick for the next task.
  1479. */
  1480. if (hrtick_enabled(rq) && queued && p->dl.runtime > 0 &&
  1481. is_leftmost(p, &rq->dl))
  1482. start_hrtick_dl(rq, p);
  1483. }
  1484. static void task_fork_dl(struct task_struct *p)
  1485. {
  1486. /*
  1487. * SCHED_DEADLINE tasks cannot fork and this is achieved through
  1488. * sched_fork()
  1489. */
  1490. }
  1491. static void set_curr_task_dl(struct rq *rq)
  1492. {
  1493. struct task_struct *p = rq->curr;
  1494. p->se.exec_start = rq_clock_task(rq);
  1495. /* You can't push away the running task */
  1496. dequeue_pushable_dl_task(rq, p);
  1497. }
  1498. #ifdef CONFIG_SMP
  1499. /* Only try algorithms three times */
  1500. #define DL_MAX_TRIES 3
  1501. static int pick_dl_task(struct rq *rq, struct task_struct *p, int cpu)
  1502. {
  1503. if (!task_running(rq, p) &&
  1504. cpumask_test_cpu(cpu, &p->cpus_allowed))
  1505. return 1;
  1506. return 0;
  1507. }
  1508. /*
  1509. * Return the earliest pushable rq's task, which is suitable to be executed
  1510. * on the CPU, NULL otherwise:
  1511. */
  1512. static struct task_struct *pick_earliest_pushable_dl_task(struct rq *rq, int cpu)
  1513. {
  1514. struct rb_node *next_node = rq->dl.pushable_dl_tasks_root.rb_leftmost;
  1515. struct task_struct *p = NULL;
  1516. if (!has_pushable_dl_tasks(rq))
  1517. return NULL;
  1518. next_node:
  1519. if (next_node) {
  1520. p = rb_entry(next_node, struct task_struct, pushable_dl_tasks);
  1521. if (pick_dl_task(rq, p, cpu))
  1522. return p;
  1523. next_node = rb_next(next_node);
  1524. goto next_node;
  1525. }
  1526. return NULL;
  1527. }
  1528. static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl);
  1529. static int find_later_rq(struct task_struct *task)
  1530. {
  1531. struct sched_domain *sd;
  1532. struct cpumask *later_mask = this_cpu_cpumask_var_ptr(local_cpu_mask_dl);
  1533. int this_cpu = smp_processor_id();
  1534. int cpu = task_cpu(task);
  1535. /* Make sure the mask is initialized first */
  1536. if (unlikely(!later_mask))
  1537. return -1;
  1538. if (task->nr_cpus_allowed == 1)
  1539. return -1;
  1540. /*
  1541. * We have to consider system topology and task affinity
  1542. * first, then we can look for a suitable cpu.
  1543. */
  1544. if (!cpudl_find(&task_rq(task)->rd->cpudl, task, later_mask))
  1545. return -1;
  1546. /*
  1547. * If we are here, some targets have been found, including
  1548. * the most suitable which is, among the runqueues where the
  1549. * current tasks have later deadlines than the task's one, the
  1550. * rq with the latest possible one.
  1551. *
  1552. * Now we check how well this matches with task's
  1553. * affinity and system topology.
  1554. *
  1555. * The last cpu where the task run is our first
  1556. * guess, since it is most likely cache-hot there.
  1557. */
  1558. if (cpumask_test_cpu(cpu, later_mask))
  1559. return cpu;
  1560. /*
  1561. * Check if this_cpu is to be skipped (i.e., it is
  1562. * not in the mask) or not.
  1563. */
  1564. if (!cpumask_test_cpu(this_cpu, later_mask))
  1565. this_cpu = -1;
  1566. rcu_read_lock();
  1567. for_each_domain(cpu, sd) {
  1568. if (sd->flags & SD_WAKE_AFFINE) {
  1569. int best_cpu;
  1570. /*
  1571. * If possible, preempting this_cpu is
  1572. * cheaper than migrating.
  1573. */
  1574. if (this_cpu != -1 &&
  1575. cpumask_test_cpu(this_cpu, sched_domain_span(sd))) {
  1576. rcu_read_unlock();
  1577. return this_cpu;
  1578. }
  1579. best_cpu = cpumask_first_and(later_mask,
  1580. sched_domain_span(sd));
  1581. /*
  1582. * Last chance: if a cpu being in both later_mask
  1583. * and current sd span is valid, that becomes our
  1584. * choice. Of course, the latest possible cpu is
  1585. * already under consideration through later_mask.
  1586. */
  1587. if (best_cpu < nr_cpu_ids) {
  1588. rcu_read_unlock();
  1589. return best_cpu;
  1590. }
  1591. }
  1592. }
  1593. rcu_read_unlock();
  1594. /*
  1595. * At this point, all our guesses failed, we just return
  1596. * 'something', and let the caller sort the things out.
  1597. */
  1598. if (this_cpu != -1)
  1599. return this_cpu;
  1600. cpu = cpumask_any(later_mask);
  1601. if (cpu < nr_cpu_ids)
  1602. return cpu;
  1603. return -1;
  1604. }
  1605. /* Locks the rq it finds */
  1606. static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
  1607. {
  1608. struct rq *later_rq = NULL;
  1609. int tries;
  1610. int cpu;
  1611. for (tries = 0; tries < DL_MAX_TRIES; tries++) {
  1612. cpu = find_later_rq(task);
  1613. if ((cpu == -1) || (cpu == rq->cpu))
  1614. break;
  1615. later_rq = cpu_rq(cpu);
  1616. if (later_rq->dl.dl_nr_running &&
  1617. !dl_time_before(task->dl.deadline,
  1618. later_rq->dl.earliest_dl.curr)) {
  1619. /*
  1620. * Target rq has tasks of equal or earlier deadline,
  1621. * retrying does not release any lock and is unlikely
  1622. * to yield a different result.
  1623. */
  1624. later_rq = NULL;
  1625. break;
  1626. }
  1627. /* Retry if something changed. */
  1628. if (double_lock_balance(rq, later_rq)) {
  1629. if (unlikely(task_rq(task) != rq ||
  1630. !cpumask_test_cpu(later_rq->cpu, &task->cpus_allowed) ||
  1631. task_running(rq, task) ||
  1632. !dl_task(task) ||
  1633. !task_on_rq_queued(task))) {
  1634. double_unlock_balance(rq, later_rq);
  1635. later_rq = NULL;
  1636. break;
  1637. }
  1638. }
  1639. /*
  1640. * If the rq we found has no -deadline task, or
  1641. * its earliest one has a later deadline than our
  1642. * task, the rq is a good one.
  1643. */
  1644. if (!later_rq->dl.dl_nr_running ||
  1645. dl_time_before(task->dl.deadline,
  1646. later_rq->dl.earliest_dl.curr))
  1647. break;
  1648. /* Otherwise we try again. */
  1649. double_unlock_balance(rq, later_rq);
  1650. later_rq = NULL;
  1651. }
  1652. return later_rq;
  1653. }
  1654. static struct task_struct *pick_next_pushable_dl_task(struct rq *rq)
  1655. {
  1656. struct task_struct *p;
  1657. if (!has_pushable_dl_tasks(rq))
  1658. return NULL;
  1659. p = rb_entry(rq->dl.pushable_dl_tasks_root.rb_leftmost,
  1660. struct task_struct, pushable_dl_tasks);
  1661. BUG_ON(rq->cpu != task_cpu(p));
  1662. BUG_ON(task_current(rq, p));
  1663. BUG_ON(p->nr_cpus_allowed <= 1);
  1664. BUG_ON(!task_on_rq_queued(p));
  1665. BUG_ON(!dl_task(p));
  1666. return p;
  1667. }
  1668. /*
  1669. * See if the non running -deadline tasks on this rq
  1670. * can be sent to some other CPU where they can preempt
  1671. * and start executing.
  1672. */
  1673. static int push_dl_task(struct rq *rq)
  1674. {
  1675. struct task_struct *next_task;
  1676. struct rq *later_rq;
  1677. int ret = 0;
  1678. if (!rq->dl.overloaded)
  1679. return 0;
  1680. next_task = pick_next_pushable_dl_task(rq);
  1681. if (!next_task)
  1682. return 0;
  1683. retry:
  1684. if (unlikely(next_task == rq->curr)) {
  1685. WARN_ON(1);
  1686. return 0;
  1687. }
  1688. /*
  1689. * If next_task preempts rq->curr, and rq->curr
  1690. * can move away, it makes sense to just reschedule
  1691. * without going further in pushing next_task.
  1692. */
  1693. if (dl_task(rq->curr) &&
  1694. dl_time_before(next_task->dl.deadline, rq->curr->dl.deadline) &&
  1695. rq->curr->nr_cpus_allowed > 1) {
  1696. resched_curr(rq);
  1697. return 0;
  1698. }
  1699. /* We might release rq lock */
  1700. get_task_struct(next_task);
  1701. /* Will lock the rq it'll find */
  1702. later_rq = find_lock_later_rq(next_task, rq);
  1703. if (!later_rq) {
  1704. struct task_struct *task;
  1705. /*
  1706. * We must check all this again, since
  1707. * find_lock_later_rq releases rq->lock and it is
  1708. * then possible that next_task has migrated.
  1709. */
  1710. task = pick_next_pushable_dl_task(rq);
  1711. if (task == next_task) {
  1712. /*
  1713. * The task is still there. We don't try
  1714. * again, some other cpu will pull it when ready.
  1715. */
  1716. goto out;
  1717. }
  1718. if (!task)
  1719. /* No more tasks */
  1720. goto out;
  1721. put_task_struct(next_task);
  1722. next_task = task;
  1723. goto retry;
  1724. }
  1725. deactivate_task(rq, next_task, 0);
  1726. sub_running_bw(next_task->dl.dl_bw, &rq->dl);
  1727. sub_rq_bw(next_task->dl.dl_bw, &rq->dl);
  1728. next_task->on_rq = TASK_ON_RQ_MIGRATING;
  1729. set_task_cpu(next_task, later_rq->cpu);
  1730. next_task->on_rq = TASK_ON_RQ_QUEUED;
  1731. add_rq_bw(next_task->dl.dl_bw, &later_rq->dl);
  1732. add_running_bw(next_task->dl.dl_bw, &later_rq->dl);
  1733. activate_task(later_rq, next_task, 0);
  1734. ret = 1;
  1735. resched_curr(later_rq);
  1736. double_unlock_balance(rq, later_rq);
  1737. out:
  1738. put_task_struct(next_task);
  1739. return ret;
  1740. }
  1741. static void push_dl_tasks(struct rq *rq)
  1742. {
  1743. /* push_dl_task() will return true if it moved a -deadline task */
  1744. while (push_dl_task(rq))
  1745. ;
  1746. }
  1747. static void pull_dl_task(struct rq *this_rq)
  1748. {
  1749. int this_cpu = this_rq->cpu, cpu;
  1750. struct task_struct *p;
  1751. bool resched = false;
  1752. struct rq *src_rq;
  1753. u64 dmin = LONG_MAX;
  1754. if (likely(!dl_overloaded(this_rq)))
  1755. return;
  1756. /*
  1757. * Match the barrier from dl_set_overloaded; this guarantees that if we
  1758. * see overloaded we must also see the dlo_mask bit.
  1759. */
  1760. smp_rmb();
  1761. for_each_cpu(cpu, this_rq->rd->dlo_mask) {
  1762. if (this_cpu == cpu)
  1763. continue;
  1764. src_rq = cpu_rq(cpu);
  1765. /*
  1766. * It looks racy, abd it is! However, as in sched_rt.c,
  1767. * we are fine with this.
  1768. */
  1769. if (this_rq->dl.dl_nr_running &&
  1770. dl_time_before(this_rq->dl.earliest_dl.curr,
  1771. src_rq->dl.earliest_dl.next))
  1772. continue;
  1773. /* Might drop this_rq->lock */
  1774. double_lock_balance(this_rq, src_rq);
  1775. /*
  1776. * If there are no more pullable tasks on the
  1777. * rq, we're done with it.
  1778. */
  1779. if (src_rq->dl.dl_nr_running <= 1)
  1780. goto skip;
  1781. p = pick_earliest_pushable_dl_task(src_rq, this_cpu);
  1782. /*
  1783. * We found a task to be pulled if:
  1784. * - it preempts our current (if there's one),
  1785. * - it will preempt the last one we pulled (if any).
  1786. */
  1787. if (p && dl_time_before(p->dl.deadline, dmin) &&
  1788. (!this_rq->dl.dl_nr_running ||
  1789. dl_time_before(p->dl.deadline,
  1790. this_rq->dl.earliest_dl.curr))) {
  1791. WARN_ON(p == src_rq->curr);
  1792. WARN_ON(!task_on_rq_queued(p));
  1793. /*
  1794. * Then we pull iff p has actually an earlier
  1795. * deadline than the current task of its runqueue.
  1796. */
  1797. if (dl_time_before(p->dl.deadline,
  1798. src_rq->curr->dl.deadline))
  1799. goto skip;
  1800. resched = true;
  1801. deactivate_task(src_rq, p, 0);
  1802. sub_running_bw(p->dl.dl_bw, &src_rq->dl);
  1803. sub_rq_bw(p->dl.dl_bw, &src_rq->dl);
  1804. p->on_rq = TASK_ON_RQ_MIGRATING;
  1805. set_task_cpu(p, this_cpu);
  1806. p->on_rq = TASK_ON_RQ_QUEUED;
  1807. add_rq_bw(p->dl.dl_bw, &this_rq->dl);
  1808. add_running_bw(p->dl.dl_bw, &this_rq->dl);
  1809. activate_task(this_rq, p, 0);
  1810. dmin = p->dl.deadline;
  1811. /* Is there any other task even earlier? */
  1812. }
  1813. skip:
  1814. double_unlock_balance(this_rq, src_rq);
  1815. }
  1816. if (resched)
  1817. resched_curr(this_rq);
  1818. }
  1819. /*
  1820. * Since the task is not running and a reschedule is not going to happen
  1821. * anytime soon on its runqueue, we try pushing it away now.
  1822. */
  1823. static void task_woken_dl(struct rq *rq, struct task_struct *p)
  1824. {
  1825. if (!task_running(rq, p) &&
  1826. !test_tsk_need_resched(rq->curr) &&
  1827. p->nr_cpus_allowed > 1 &&
  1828. dl_task(rq->curr) &&
  1829. (rq->curr->nr_cpus_allowed < 2 ||
  1830. !dl_entity_preempt(&p->dl, &rq->curr->dl))) {
  1831. push_dl_tasks(rq);
  1832. }
  1833. }
  1834. static void set_cpus_allowed_dl(struct task_struct *p,
  1835. const struct cpumask *new_mask)
  1836. {
  1837. struct root_domain *src_rd;
  1838. struct rq *rq;
  1839. BUG_ON(!dl_task(p));
  1840. rq = task_rq(p);
  1841. src_rd = rq->rd;
  1842. /*
  1843. * Migrating a SCHED_DEADLINE task between exclusive
  1844. * cpusets (different root_domains) entails a bandwidth
  1845. * update. We already made space for us in the destination
  1846. * domain (see cpuset_can_attach()).
  1847. */
  1848. if (!cpumask_intersects(src_rd->span, new_mask)) {
  1849. struct dl_bw *src_dl_b;
  1850. src_dl_b = dl_bw_of(cpu_of(rq));
  1851. /*
  1852. * We now free resources of the root_domain we are migrating
  1853. * off. In the worst case, sched_setattr() may temporary fail
  1854. * until we complete the update.
  1855. */
  1856. raw_spin_lock(&src_dl_b->lock);
  1857. __dl_clear(src_dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p)));
  1858. raw_spin_unlock(&src_dl_b->lock);
  1859. }
  1860. set_cpus_allowed_common(p, new_mask);
  1861. }
  1862. /* Assumes rq->lock is held */
  1863. static void rq_online_dl(struct rq *rq)
  1864. {
  1865. if (rq->dl.overloaded)
  1866. dl_set_overload(rq);
  1867. cpudl_set_freecpu(&rq->rd->cpudl, rq->cpu);
  1868. if (rq->dl.dl_nr_running > 0)
  1869. cpudl_set(&rq->rd->cpudl, rq->cpu, rq->dl.earliest_dl.curr);
  1870. }
  1871. /* Assumes rq->lock is held */
  1872. static void rq_offline_dl(struct rq *rq)
  1873. {
  1874. if (rq->dl.overloaded)
  1875. dl_clear_overload(rq);
  1876. cpudl_clear(&rq->rd->cpudl, rq->cpu);
  1877. cpudl_clear_freecpu(&rq->rd->cpudl, rq->cpu);
  1878. }
  1879. void __init init_sched_dl_class(void)
  1880. {
  1881. unsigned int i;
  1882. for_each_possible_cpu(i)
  1883. zalloc_cpumask_var_node(&per_cpu(local_cpu_mask_dl, i),
  1884. GFP_KERNEL, cpu_to_node(i));
  1885. }
  1886. #endif /* CONFIG_SMP */
  1887. static void switched_from_dl(struct rq *rq, struct task_struct *p)
  1888. {
  1889. /*
  1890. * task_non_contending() can start the "inactive timer" (if the 0-lag
  1891. * time is in the future). If the task switches back to dl before
  1892. * the "inactive timer" fires, it can continue to consume its current
  1893. * runtime using its current deadline. If it stays outside of
  1894. * SCHED_DEADLINE until the 0-lag time passes, inactive_task_timer()
  1895. * will reset the task parameters.
  1896. */
  1897. if (task_on_rq_queued(p) && p->dl.dl_runtime)
  1898. task_non_contending(p);
  1899. if (!task_on_rq_queued(p))
  1900. sub_rq_bw(p->dl.dl_bw, &rq->dl);
  1901. /*
  1902. * We cannot use inactive_task_timer() to invoke sub_running_bw()
  1903. * at the 0-lag time, because the task could have been migrated
  1904. * while SCHED_OTHER in the meanwhile.
  1905. */
  1906. if (p->dl.dl_non_contending)
  1907. p->dl.dl_non_contending = 0;
  1908. /*
  1909. * Since this might be the only -deadline task on the rq,
  1910. * this is the right place to try to pull some other one
  1911. * from an overloaded cpu, if any.
  1912. */
  1913. if (!task_on_rq_queued(p) || rq->dl.dl_nr_running)
  1914. return;
  1915. queue_pull_task(rq);
  1916. }
  1917. /*
  1918. * When switching to -deadline, we may overload the rq, then
  1919. * we try to push someone off, if possible.
  1920. */
  1921. static void switched_to_dl(struct rq *rq, struct task_struct *p)
  1922. {
  1923. if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
  1924. put_task_struct(p);
  1925. /* If p is not queued we will update its parameters at next wakeup. */
  1926. if (!task_on_rq_queued(p)) {
  1927. add_rq_bw(p->dl.dl_bw, &rq->dl);
  1928. return;
  1929. }
  1930. if (rq->curr != p) {
  1931. #ifdef CONFIG_SMP
  1932. if (p->nr_cpus_allowed > 1 && rq->dl.overloaded)
  1933. queue_push_tasks(rq);
  1934. #endif
  1935. if (dl_task(rq->curr))
  1936. check_preempt_curr_dl(rq, p, 0);
  1937. else
  1938. resched_curr(rq);
  1939. }
  1940. }
  1941. /*
  1942. * If the scheduling parameters of a -deadline task changed,
  1943. * a push or pull operation might be needed.
  1944. */
  1945. static void prio_changed_dl(struct rq *rq, struct task_struct *p,
  1946. int oldprio)
  1947. {
  1948. if (task_on_rq_queued(p) || rq->curr == p) {
  1949. #ifdef CONFIG_SMP
  1950. /*
  1951. * This might be too much, but unfortunately
  1952. * we don't have the old deadline value, and
  1953. * we can't argue if the task is increasing
  1954. * or lowering its prio, so...
  1955. */
  1956. if (!rq->dl.overloaded)
  1957. queue_pull_task(rq);
  1958. /*
  1959. * If we now have a earlier deadline task than p,
  1960. * then reschedule, provided p is still on this
  1961. * runqueue.
  1962. */
  1963. if (dl_time_before(rq->dl.earliest_dl.curr, p->dl.deadline))
  1964. resched_curr(rq);
  1965. #else
  1966. /*
  1967. * Again, we don't know if p has a earlier
  1968. * or later deadline, so let's blindly set a
  1969. * (maybe not needed) rescheduling point.
  1970. */
  1971. resched_curr(rq);
  1972. #endif /* CONFIG_SMP */
  1973. }
  1974. }
  1975. const struct sched_class dl_sched_class = {
  1976. .next = &rt_sched_class,
  1977. .enqueue_task = enqueue_task_dl,
  1978. .dequeue_task = dequeue_task_dl,
  1979. .yield_task = yield_task_dl,
  1980. .check_preempt_curr = check_preempt_curr_dl,
  1981. .pick_next_task = pick_next_task_dl,
  1982. .put_prev_task = put_prev_task_dl,
  1983. #ifdef CONFIG_SMP
  1984. .select_task_rq = select_task_rq_dl,
  1985. .migrate_task_rq = migrate_task_rq_dl,
  1986. .set_cpus_allowed = set_cpus_allowed_dl,
  1987. .rq_online = rq_online_dl,
  1988. .rq_offline = rq_offline_dl,
  1989. .task_woken = task_woken_dl,
  1990. #endif
  1991. .set_curr_task = set_curr_task_dl,
  1992. .task_tick = task_tick_dl,
  1993. .task_fork = task_fork_dl,
  1994. .prio_changed = prio_changed_dl,
  1995. .switched_from = switched_from_dl,
  1996. .switched_to = switched_to_dl,
  1997. .update_curr = update_curr_dl,
  1998. #ifdef CONFIG_SCHED_WALT
  1999. .fixup_cumulative_runnable_avg = walt_fixup_cumulative_runnable_avg,
  2000. #endif
  2001. };
  2002. int sched_dl_global_validate(void)
  2003. {
  2004. u64 runtime = global_rt_runtime();
  2005. u64 period = global_rt_period();
  2006. u64 new_bw = to_ratio(period, runtime);
  2007. struct dl_bw *dl_b;
  2008. int cpu, cpus, ret = 0;
  2009. unsigned long flags;
  2010. /*
  2011. * Here we want to check the bandwidth not being set to some
  2012. * value smaller than the currently allocated bandwidth in
  2013. * any of the root_domains.
  2014. *
  2015. * FIXME: Cycling on all the CPUs is overdoing, but simpler than
  2016. * cycling on root_domains... Discussion on different/better
  2017. * solutions is welcome!
  2018. */
  2019. for_each_possible_cpu(cpu) {
  2020. rcu_read_lock_sched();
  2021. dl_b = dl_bw_of(cpu);
  2022. cpus = dl_bw_cpus(cpu);
  2023. raw_spin_lock_irqsave(&dl_b->lock, flags);
  2024. if (new_bw * cpus < dl_b->total_bw)
  2025. ret = -EBUSY;
  2026. raw_spin_unlock_irqrestore(&dl_b->lock, flags);
  2027. rcu_read_unlock_sched();
  2028. if (ret)
  2029. break;
  2030. }
  2031. return ret;
  2032. }
  2033. void init_dl_rq_bw_ratio(struct dl_rq *dl_rq)
  2034. {
  2035. if (global_rt_runtime() == RUNTIME_INF) {
  2036. dl_rq->bw_ratio = 1 << RATIO_SHIFT;
  2037. dl_rq->extra_bw = 1 << BW_SHIFT;
  2038. } else {
  2039. dl_rq->bw_ratio = to_ratio(global_rt_runtime(),
  2040. global_rt_period()) >> (BW_SHIFT - RATIO_SHIFT);
  2041. dl_rq->extra_bw = to_ratio(global_rt_period(),
  2042. global_rt_runtime());
  2043. }
  2044. }
  2045. void sched_dl_do_global(void)
  2046. {
  2047. u64 new_bw = -1;
  2048. struct dl_bw *dl_b;
  2049. int cpu;
  2050. unsigned long flags;
  2051. def_dl_bandwidth.dl_period = global_rt_period();
  2052. def_dl_bandwidth.dl_runtime = global_rt_runtime();
  2053. if (global_rt_runtime() != RUNTIME_INF)
  2054. new_bw = to_ratio(global_rt_period(), global_rt_runtime());
  2055. /*
  2056. * FIXME: As above...
  2057. */
  2058. for_each_possible_cpu(cpu) {
  2059. rcu_read_lock_sched();
  2060. dl_b = dl_bw_of(cpu);
  2061. raw_spin_lock_irqsave(&dl_b->lock, flags);
  2062. dl_b->bw = new_bw;
  2063. raw_spin_unlock_irqrestore(&dl_b->lock, flags);
  2064. rcu_read_unlock_sched();
  2065. init_dl_rq_bw_ratio(&cpu_rq(cpu)->dl);
  2066. }
  2067. }
  2068. /*
  2069. * We must be sure that accepting a new task (or allowing changing the
  2070. * parameters of an existing one) is consistent with the bandwidth
  2071. * constraints. If yes, this function also accordingly updates the currently
  2072. * allocated bandwidth to reflect the new situation.
  2073. *
  2074. * This function is called while holding p's rq->lock.
  2075. */
  2076. int sched_dl_overflow(struct task_struct *p, int policy,
  2077. const struct sched_attr *attr)
  2078. {
  2079. struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
  2080. u64 period = attr->sched_period ?: attr->sched_deadline;
  2081. u64 runtime = attr->sched_runtime;
  2082. u64 new_bw = dl_policy(policy) ? to_ratio(period, runtime) : 0;
  2083. int cpus, err = -1;
  2084. /* !deadline task may carry old deadline bandwidth */
  2085. if (new_bw == p->dl.dl_bw && task_has_dl_policy(p))
  2086. return 0;
  2087. /*
  2088. * Either if a task, enters, leave, or stays -deadline but changes
  2089. * its parameters, we may need to update accordingly the total
  2090. * allocated bandwidth of the container.
  2091. */
  2092. raw_spin_lock(&dl_b->lock);
  2093. cpus = dl_bw_cpus(task_cpu(p));
  2094. if (dl_policy(policy) && !task_has_dl_policy(p) &&
  2095. !__dl_overflow(dl_b, cpus, 0, new_bw)) {
  2096. if (hrtimer_active(&p->dl.inactive_timer))
  2097. __dl_clear(dl_b, p->dl.dl_bw, cpus);
  2098. __dl_add(dl_b, new_bw, cpus);
  2099. err = 0;
  2100. } else if (dl_policy(policy) && task_has_dl_policy(p) &&
  2101. !__dl_overflow(dl_b, cpus, p->dl.dl_bw, new_bw)) {
  2102. /*
  2103. * XXX this is slightly incorrect: when the task
  2104. * utilization decreases, we should delay the total
  2105. * utilization change until the task's 0-lag point.
  2106. * But this would require to set the task's "inactive
  2107. * timer" when the task is not inactive.
  2108. */
  2109. __dl_clear(dl_b, p->dl.dl_bw, cpus);
  2110. __dl_add(dl_b, new_bw, cpus);
  2111. dl_change_utilization(p, new_bw);
  2112. err = 0;
  2113. } else if (!dl_policy(policy) && task_has_dl_policy(p)) {
  2114. /*
  2115. * Do not decrease the total deadline utilization here,
  2116. * switched_from_dl() will take care to do it at the correct
  2117. * (0-lag) time.
  2118. */
  2119. err = 0;
  2120. }
  2121. raw_spin_unlock(&dl_b->lock);
  2122. return err;
  2123. }
  2124. /*
  2125. * This function initializes the sched_dl_entity of a newly becoming
  2126. * SCHED_DEADLINE task.
  2127. *
  2128. * Only the static values are considered here, the actual runtime and the
  2129. * absolute deadline will be properly calculated when the task is enqueued
  2130. * for the first time with its new policy.
  2131. */
  2132. void __setparam_dl(struct task_struct *p, const struct sched_attr *attr)
  2133. {
  2134. struct sched_dl_entity *dl_se = &p->dl;
  2135. dl_se->dl_runtime = attr->sched_runtime;
  2136. dl_se->dl_deadline = attr->sched_deadline;
  2137. dl_se->dl_period = attr->sched_period ?: dl_se->dl_deadline;
  2138. dl_se->flags = attr->sched_flags;
  2139. dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime);
  2140. dl_se->dl_density = to_ratio(dl_se->dl_deadline, dl_se->dl_runtime);
  2141. }
  2142. void __getparam_dl(struct task_struct *p, struct sched_attr *attr)
  2143. {
  2144. struct sched_dl_entity *dl_se = &p->dl;
  2145. attr->sched_priority = p->rt_priority;
  2146. attr->sched_runtime = dl_se->dl_runtime;
  2147. attr->sched_deadline = dl_se->dl_deadline;
  2148. attr->sched_period = dl_se->dl_period;
  2149. attr->sched_flags = dl_se->flags;
  2150. }
  2151. /*
  2152. * This function validates the new parameters of a -deadline task.
  2153. * We ask for the deadline not being zero, and greater or equal
  2154. * than the runtime, as well as the period of being zero or
  2155. * greater than deadline. Furthermore, we have to be sure that
  2156. * user parameters are above the internal resolution of 1us (we
  2157. * check sched_runtime only since it is always the smaller one) and
  2158. * below 2^63 ns (we have to check both sched_deadline and
  2159. * sched_period, as the latter can be zero).
  2160. */
  2161. bool __checkparam_dl(const struct sched_attr *attr)
  2162. {
  2163. /* deadline != 0 */
  2164. if (attr->sched_deadline == 0)
  2165. return false;
  2166. /*
  2167. * Since we truncate DL_SCALE bits, make sure we're at least
  2168. * that big.
  2169. */
  2170. if (attr->sched_runtime < (1ULL << DL_SCALE))
  2171. return false;
  2172. /*
  2173. * Since we use the MSB for wrap-around and sign issues, make
  2174. * sure it's not set (mind that period can be equal to zero).
  2175. */
  2176. if (attr->sched_deadline & (1ULL << 63) ||
  2177. attr->sched_period & (1ULL << 63))
  2178. return false;
  2179. /* runtime <= deadline <= period (if period != 0) */
  2180. if ((attr->sched_period != 0 &&
  2181. attr->sched_period < attr->sched_deadline) ||
  2182. attr->sched_deadline < attr->sched_runtime)
  2183. return false;
  2184. return true;
  2185. }
  2186. /*
  2187. * This function clears the sched_dl_entity static params.
  2188. */
  2189. void __dl_clear_params(struct task_struct *p)
  2190. {
  2191. struct sched_dl_entity *dl_se = &p->dl;
  2192. dl_se->dl_runtime = 0;
  2193. dl_se->dl_deadline = 0;
  2194. dl_se->dl_period = 0;
  2195. dl_se->flags = 0;
  2196. dl_se->dl_bw = 0;
  2197. dl_se->dl_density = 0;
  2198. dl_se->dl_throttled = 0;
  2199. dl_se->dl_yielded = 0;
  2200. dl_se->dl_non_contending = 0;
  2201. }
  2202. bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr)
  2203. {
  2204. struct sched_dl_entity *dl_se = &p->dl;
  2205. if (dl_se->dl_runtime != attr->sched_runtime ||
  2206. dl_se->dl_deadline != attr->sched_deadline ||
  2207. dl_se->dl_period != attr->sched_period ||
  2208. dl_se->flags != attr->sched_flags)
  2209. return true;
  2210. return false;
  2211. }
  2212. #ifdef CONFIG_SMP
  2213. int dl_task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_allowed)
  2214. {
  2215. unsigned int dest_cpu = cpumask_any_and(cpu_active_mask,
  2216. cs_cpus_allowed);
  2217. struct dl_bw *dl_b;
  2218. bool overflow;
  2219. int cpus, ret;
  2220. unsigned long flags;
  2221. rcu_read_lock_sched();
  2222. dl_b = dl_bw_of(dest_cpu);
  2223. raw_spin_lock_irqsave(&dl_b->lock, flags);
  2224. cpus = dl_bw_cpus(dest_cpu);
  2225. overflow = __dl_overflow(dl_b, cpus, 0, p->dl.dl_bw);
  2226. if (overflow)
  2227. ret = -EBUSY;
  2228. else {
  2229. /*
  2230. * We reserve space for this task in the destination
  2231. * root_domain, as we can't fail after this point.
  2232. * We will free resources in the source root_domain
  2233. * later on (see set_cpus_allowed_dl()).
  2234. */
  2235. __dl_add(dl_b, p->dl.dl_bw, cpus);
  2236. ret = 0;
  2237. }
  2238. raw_spin_unlock_irqrestore(&dl_b->lock, flags);
  2239. rcu_read_unlock_sched();
  2240. return ret;
  2241. }
  2242. int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur,
  2243. const struct cpumask *trial)
  2244. {
  2245. int ret = 1, trial_cpus;
  2246. struct dl_bw *cur_dl_b;
  2247. unsigned long flags;
  2248. rcu_read_lock_sched();
  2249. cur_dl_b = dl_bw_of(cpumask_any(cur));
  2250. trial_cpus = cpumask_weight(trial);
  2251. raw_spin_lock_irqsave(&cur_dl_b->lock, flags);
  2252. if (cur_dl_b->bw != -1 &&
  2253. cur_dl_b->bw * trial_cpus < cur_dl_b->total_bw)
  2254. ret = 0;
  2255. raw_spin_unlock_irqrestore(&cur_dl_b->lock, flags);
  2256. rcu_read_unlock_sched();
  2257. return ret;
  2258. }
  2259. bool dl_cpu_busy(unsigned int cpu)
  2260. {
  2261. unsigned long flags;
  2262. struct dl_bw *dl_b;
  2263. bool overflow;
  2264. int cpus;
  2265. rcu_read_lock_sched();
  2266. dl_b = dl_bw_of(cpu);
  2267. raw_spin_lock_irqsave(&dl_b->lock, flags);
  2268. cpus = dl_bw_cpus(cpu);
  2269. overflow = __dl_overflow(dl_b, cpus, 0, 0);
  2270. raw_spin_unlock_irqrestore(&dl_b->lock, flags);
  2271. rcu_read_unlock_sched();
  2272. return overflow;
  2273. }
  2274. #endif
  2275. #ifdef CONFIG_SCHED_DEBUG
  2276. void print_dl_stats(struct seq_file *m, int cpu)
  2277. {
  2278. print_dl_rq(m, cpu, &cpu_rq(cpu)->dl);
  2279. }
  2280. #endif /* CONFIG_SCHED_DEBUG */