sched.h 63 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418
  1. /* SPDX-License-Identifier: GPL-2.0 */
  2. #include <linux/sched.h>
  3. #include <linux/sched/autogroup.h>
  4. #include <linux/sched/sysctl.h>
  5. #include <linux/sched/topology.h>
  6. #include <linux/sched/rt.h>
  7. #include <linux/sched/deadline.h>
  8. #include <linux/sched/clock.h>
  9. #include <linux/sched/wake_q.h>
  10. #include <linux/sched/signal.h>
  11. #include <linux/sched/numa_balancing.h>
  12. #include <linux/sched/mm.h>
  13. #include <linux/sched/cpufreq.h>
  14. #include <linux/sched/stat.h>
  15. #include <linux/sched/nohz.h>
  16. #include <linux/sched/debug.h>
  17. #include <linux/sched/hotplug.h>
  18. #include <linux/sched/task.h>
  19. #include <linux/sched/task_stack.h>
  20. #include <linux/sched/cputime.h>
  21. #include <linux/sched/init.h>
  22. #include <linux/sched/smt.h>
  23. #include <linux/u64_stats_sync.h>
  24. #include <linux/kernel_stat.h>
  25. #include <linux/binfmts.h>
  26. #include <linux/mutex.h>
  27. #include <linux/psi.h>
  28. #include <linux/spinlock.h>
  29. #include <linux/stop_machine.h>
  30. #include <linux/irq_work.h>
  31. #include <linux/tick.h>
  32. #include <linux/slab.h>
  33. #ifdef CONFIG_PARAVIRT
  34. #include <asm/paravirt.h>
  35. #endif
  36. #include "cpupri.h"
  37. #include "cpudeadline.h"
  38. #include "cpuacct.h"
  39. #ifdef CONFIG_SCHED_DEBUG
  40. # define SCHED_WARN_ON(x) WARN_ONCE(x, #x)
  41. #else
  42. # define SCHED_WARN_ON(x) ({ (void)(x), 0; })
  43. #endif
  44. struct rq;
  45. struct cpuidle_state;
  46. /* task_struct::on_rq states: */
  47. #define TASK_ON_RQ_QUEUED 1
  48. #define TASK_ON_RQ_MIGRATING 2
  49. extern __read_mostly int scheduler_running;
  50. extern unsigned long calc_load_update;
  51. extern atomic_long_t calc_load_tasks;
  52. extern void calc_global_load_tick(struct rq *this_rq);
  53. extern long calc_load_fold_active(struct rq *this_rq, long adjust);
  54. #ifdef CONFIG_SMP
  55. extern void cpu_load_update_active(struct rq *this_rq);
  56. #else
  57. static inline void cpu_load_update_active(struct rq *this_rq) { }
  58. #endif
  59. /*
  60. * Helpers for converting nanosecond timing to jiffy resolution
  61. */
  62. #define NS_TO_JIFFIES(TIME) ((unsigned long)(TIME) / (NSEC_PER_SEC / HZ))
  63. /*
  64. * Increase resolution of nice-level calculations for 64-bit architectures.
  65. * The extra resolution improves shares distribution and load balancing of
  66. * low-weight task groups (eg. nice +19 on an autogroup), deeper taskgroup
  67. * hierarchies, especially on larger systems. This is not a user-visible change
  68. * and does not change the user-interface for setting shares/weights.
  69. *
  70. * We increase resolution only if we have enough bits to allow this increased
  71. * resolution (i.e. 64bit). The costs for increasing resolution when 32bit are
  72. * pretty high and the returns do not justify the increased costs.
  73. *
  74. * Really only required when CONFIG_FAIR_GROUP_SCHED is also set, but to
  75. * increase coverage and consistency always enable it on 64bit platforms.
  76. */
  77. #ifdef CONFIG_64BIT
  78. # define NICE_0_LOAD_SHIFT (SCHED_FIXEDPOINT_SHIFT + SCHED_FIXEDPOINT_SHIFT)
  79. # define scale_load(w) ((w) << SCHED_FIXEDPOINT_SHIFT)
  80. # define scale_load_down(w) \
  81. ({ \
  82. unsigned long __w = (w); \
  83. if (__w) \
  84. __w = max(2UL, __w >> SCHED_FIXEDPOINT_SHIFT); \
  85. __w; \
  86. })
  87. #else
  88. # define NICE_0_LOAD_SHIFT (SCHED_FIXEDPOINT_SHIFT)
  89. # define scale_load(w) (w)
  90. # define scale_load_down(w) (w)
  91. #endif
  92. /*
  93. * Task weight (visible to users) and its load (invisible to users) have
  94. * independent resolution, but they should be well calibrated. We use
  95. * scale_load() and scale_load_down(w) to convert between them. The
  96. * following must be true:
  97. *
  98. * scale_load(sched_prio_to_weight[USER_PRIO(NICE_TO_PRIO(0))]) == NICE_0_LOAD
  99. *
  100. */
  101. #define NICE_0_LOAD (1L << NICE_0_LOAD_SHIFT)
  102. /*
  103. * Single value that decides SCHED_DEADLINE internal math precision.
  104. * 10 -> just above 1us
  105. * 9 -> just above 0.5us
  106. */
  107. #define DL_SCALE (10)
  108. /*
  109. * These are the 'tuning knobs' of the scheduler:
  110. */
  111. /*
  112. * single value that denotes runtime == period, ie unlimited time.
  113. */
  114. #define RUNTIME_INF ((u64)~0ULL)
  115. static inline int idle_policy(int policy)
  116. {
  117. return policy == SCHED_IDLE;
  118. }
  119. static inline int fair_policy(int policy)
  120. {
  121. return policy == SCHED_NORMAL || policy == SCHED_BATCH;
  122. }
  123. static inline int rt_policy(int policy)
  124. {
  125. return policy == SCHED_FIFO || policy == SCHED_RR;
  126. }
  127. static inline int dl_policy(int policy)
  128. {
  129. return policy == SCHED_DEADLINE;
  130. }
  131. static inline bool valid_policy(int policy)
  132. {
  133. return idle_policy(policy) || fair_policy(policy) ||
  134. rt_policy(policy) || dl_policy(policy);
  135. }
  136. static inline int task_has_rt_policy(struct task_struct *p)
  137. {
  138. return rt_policy(p->policy);
  139. }
  140. static inline int task_has_dl_policy(struct task_struct *p)
  141. {
  142. return dl_policy(p->policy);
  143. }
  144. /*
  145. * Tells if entity @a should preempt entity @b.
  146. */
  147. static inline bool
  148. dl_entity_preempt(struct sched_dl_entity *a, struct sched_dl_entity *b)
  149. {
  150. return dl_time_before(a->deadline, b->deadline);
  151. }
  152. /*
  153. * This is the priority-queue data structure of the RT scheduling class:
  154. */
  155. struct rt_prio_array {
  156. DECLARE_BITMAP(bitmap, MAX_RT_PRIO+1); /* include 1 bit for delimiter */
  157. struct list_head queue[MAX_RT_PRIO];
  158. };
  159. struct rt_bandwidth {
  160. /* nests inside the rq lock: */
  161. raw_spinlock_t rt_runtime_lock;
  162. ktime_t rt_period;
  163. u64 rt_runtime;
  164. struct hrtimer rt_period_timer;
  165. unsigned int rt_period_active;
  166. };
  167. void __dl_clear_params(struct task_struct *p);
  168. struct dl_bandwidth {
  169. raw_spinlock_t dl_runtime_lock;
  170. u64 dl_runtime;
  171. u64 dl_period;
  172. };
  173. static inline int dl_bandwidth_enabled(void)
  174. {
  175. return sysctl_sched_rt_runtime >= 0;
  176. }
  177. /*
  178. * To keep the bandwidth of -deadline tasks under control
  179. * we need some place where:
  180. * - store the maximum -deadline bandwidth of each cpu;
  181. * - cache the fraction of bandwidth that is currently allocated in
  182. * each root domain;
  183. *
  184. * This is all done in the data structure below. It is similar to the
  185. * one used for RT-throttling (rt_bandwidth), with the main difference
  186. * that, since here we are only interested in admission control, we
  187. * do not decrease any runtime while the group "executes", neither we
  188. * need a timer to replenish it.
  189. *
  190. * With respect to SMP, bandwidth is given on a per root domain basis,
  191. * meaning that:
  192. * - bw (< 100%) is the deadline bandwidth of each CPU;
  193. * - total_bw is the currently allocated bandwidth in each root domain;
  194. */
  195. struct dl_bw {
  196. raw_spinlock_t lock;
  197. u64 bw, total_bw;
  198. };
  199. static inline void __dl_update(struct dl_bw *dl_b, s64 bw);
  200. static inline
  201. void __dl_clear(struct dl_bw *dl_b, u64 tsk_bw, int cpus)
  202. {
  203. dl_b->total_bw -= tsk_bw;
  204. __dl_update(dl_b, (s32)tsk_bw / cpus);
  205. }
  206. static inline
  207. void __dl_add(struct dl_bw *dl_b, u64 tsk_bw, int cpus)
  208. {
  209. dl_b->total_bw += tsk_bw;
  210. __dl_update(dl_b, -((s32)tsk_bw / cpus));
  211. }
  212. static inline
  213. bool __dl_overflow(struct dl_bw *dl_b, int cpus, u64 old_bw, u64 new_bw)
  214. {
  215. return dl_b->bw != -1 &&
  216. dl_b->bw * cpus < dl_b->total_bw - old_bw + new_bw;
  217. }
  218. void dl_change_utilization(struct task_struct *p, u64 new_bw);
  219. extern void init_dl_bw(struct dl_bw *dl_b);
  220. extern int sched_dl_global_validate(void);
  221. extern void sched_dl_do_global(void);
  222. extern int sched_dl_overflow(struct task_struct *p, int policy,
  223. const struct sched_attr *attr);
  224. extern void __setparam_dl(struct task_struct *p, const struct sched_attr *attr);
  225. extern void __getparam_dl(struct task_struct *p, struct sched_attr *attr);
  226. extern bool __checkparam_dl(const struct sched_attr *attr);
  227. extern void __dl_clear_params(struct task_struct *p);
  228. extern bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr);
  229. extern int dl_task_can_attach(struct task_struct *p,
  230. const struct cpumask *cs_cpus_allowed);
  231. extern int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur,
  232. const struct cpumask *trial);
  233. extern bool dl_cpu_busy(unsigned int cpu);
  234. #ifdef CONFIG_CGROUP_SCHED
  235. #include <linux/cgroup.h>
  236. #include <linux/psi.h>
  237. struct cfs_rq;
  238. struct rt_rq;
  239. extern struct list_head task_groups;
  240. struct cfs_bandwidth {
  241. #ifdef CONFIG_CFS_BANDWIDTH
  242. raw_spinlock_t lock;
  243. ktime_t period;
  244. u64 quota, runtime;
  245. s64 hierarchical_quota;
  246. short idle, period_active;
  247. struct hrtimer period_timer, slack_timer;
  248. struct list_head throttled_cfs_rq;
  249. /* statistics */
  250. int nr_periods, nr_throttled;
  251. u64 throttled_time;
  252. bool distribute_running;
  253. #endif
  254. };
  255. /* task group related information */
  256. struct task_group {
  257. struct cgroup_subsys_state css;
  258. #ifdef CONFIG_FAIR_GROUP_SCHED
  259. /* schedulable entities of this group on each cpu */
  260. struct sched_entity **se;
  261. /* runqueue "owned" by this group on each cpu */
  262. struct cfs_rq **cfs_rq;
  263. unsigned long shares;
  264. #ifdef CONFIG_SMP
  265. /*
  266. * load_avg can be heavily contended at clock tick time, so put
  267. * it in its own cacheline separated from the fields above which
  268. * will also be accessed at each tick.
  269. */
  270. atomic_long_t load_avg ____cacheline_aligned;
  271. #endif
  272. #endif
  273. #ifdef CONFIG_RT_GROUP_SCHED
  274. struct sched_rt_entity **rt_se;
  275. struct rt_rq **rt_rq;
  276. struct rt_bandwidth rt_bandwidth;
  277. #endif
  278. struct rcu_head rcu;
  279. struct list_head list;
  280. struct task_group *parent;
  281. struct list_head siblings;
  282. struct list_head children;
  283. #ifdef CONFIG_SCHED_AUTOGROUP
  284. struct autogroup *autogroup;
  285. #endif
  286. struct cfs_bandwidth cfs_bandwidth;
  287. #ifdef CONFIG_UCLAMP_TASK_GROUP
  288. struct uclamp_se uclamp[UCLAMP_CNT];
  289. #endif
  290. };
  291. #ifdef CONFIG_FAIR_GROUP_SCHED
  292. #define ROOT_TASK_GROUP_LOAD NICE_0_LOAD
  293. /*
  294. * A weight of 0 or 1 can cause arithmetics problems.
  295. * A weight of a cfs_rq is the sum of weights of which entities
  296. * are queued on this cfs_rq, so a weight of a entity should not be
  297. * too large, so as the shares value of a task group.
  298. * (The default weight is 1024 - so there's no practical
  299. * limitation from this.)
  300. */
  301. #define MIN_SHARES (1UL << 1)
  302. #define MAX_SHARES (1UL << 18)
  303. #endif
  304. typedef int (*tg_visitor)(struct task_group *, void *);
  305. extern int walk_tg_tree_from(struct task_group *from,
  306. tg_visitor down, tg_visitor up, void *data);
  307. /*
  308. * Iterate the full tree, calling @down when first entering a node and @up when
  309. * leaving it for the final time.
  310. *
  311. * Caller must hold rcu_lock or sufficient equivalent.
  312. */
  313. static inline int walk_tg_tree(tg_visitor down, tg_visitor up, void *data)
  314. {
  315. return walk_tg_tree_from(&root_task_group, down, up, data);
  316. }
  317. extern int tg_nop(struct task_group *tg, void *data);
  318. extern void free_fair_sched_group(struct task_group *tg);
  319. extern int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent);
  320. extern void online_fair_sched_group(struct task_group *tg);
  321. extern void unregister_fair_sched_group(struct task_group *tg);
  322. extern void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
  323. struct sched_entity *se, int cpu,
  324. struct sched_entity *parent);
  325. extern void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
  326. extern void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b);
  327. extern void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
  328. extern void unthrottle_cfs_rq(struct cfs_rq *cfs_rq);
  329. extern void free_rt_sched_group(struct task_group *tg);
  330. extern int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent);
  331. extern void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
  332. struct sched_rt_entity *rt_se, int cpu,
  333. struct sched_rt_entity *parent);
  334. extern int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us);
  335. extern int sched_group_set_rt_period(struct task_group *tg, u64 rt_period_us);
  336. extern long sched_group_rt_runtime(struct task_group *tg);
  337. extern long sched_group_rt_period(struct task_group *tg);
  338. extern int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk);
  339. extern struct task_group *sched_create_group(struct task_group *parent);
  340. extern void sched_online_group(struct task_group *tg,
  341. struct task_group *parent);
  342. extern void sched_destroy_group(struct task_group *tg);
  343. extern void sched_offline_group(struct task_group *tg);
  344. extern void sched_move_task(struct task_struct *tsk);
  345. #ifdef CONFIG_FAIR_GROUP_SCHED
  346. extern int sched_group_set_shares(struct task_group *tg, unsigned long shares);
  347. #ifdef CONFIG_SMP
  348. extern void set_task_rq_fair(struct sched_entity *se,
  349. struct cfs_rq *prev, struct cfs_rq *next);
  350. #else /* !CONFIG_SMP */
  351. static inline void set_task_rq_fair(struct sched_entity *se,
  352. struct cfs_rq *prev, struct cfs_rq *next) { }
  353. #endif /* CONFIG_SMP */
  354. #endif /* CONFIG_FAIR_GROUP_SCHED */
  355. #else /* CONFIG_CGROUP_SCHED */
  356. struct cfs_bandwidth { };
  357. #endif /* CONFIG_CGROUP_SCHED */
  358. /* CFS-related fields in a runqueue */
  359. struct cfs_rq {
  360. struct load_weight load;
  361. unsigned int nr_running, h_nr_running;
  362. u64 exec_clock;
  363. u64 min_vruntime;
  364. #ifndef CONFIG_64BIT
  365. u64 min_vruntime_copy;
  366. #endif
  367. struct rb_root_cached tasks_timeline;
  368. /*
  369. * 'curr' points to currently running entity on this cfs_rq.
  370. * It is set to NULL otherwise (i.e when none are currently running).
  371. */
  372. struct sched_entity *curr, *next, *last, *skip;
  373. #ifdef CONFIG_SCHED_DEBUG
  374. unsigned int nr_spread_over;
  375. #endif
  376. #ifdef CONFIG_SMP
  377. /*
  378. * CFS load tracking
  379. */
  380. struct sched_avg avg;
  381. u64 runnable_load_sum;
  382. unsigned long runnable_load_avg;
  383. #ifdef CONFIG_FAIR_GROUP_SCHED
  384. unsigned long tg_load_avg_contrib;
  385. unsigned long propagate_avg;
  386. #endif
  387. atomic_long_t removed_load_avg, removed_util_avg;
  388. #ifndef CONFIG_64BIT
  389. u64 load_last_update_time_copy;
  390. #endif
  391. #ifdef CONFIG_FAIR_GROUP_SCHED
  392. /*
  393. * h_load = weight * f(tg)
  394. *
  395. * Where f(tg) is the recursive weight fraction assigned to
  396. * this group.
  397. */
  398. unsigned long h_load;
  399. u64 last_h_load_update;
  400. struct sched_entity *h_load_next;
  401. #endif /* CONFIG_FAIR_GROUP_SCHED */
  402. #endif /* CONFIG_SMP */
  403. #ifdef CONFIG_FAIR_GROUP_SCHED
  404. struct rq *rq; /* cpu runqueue to which this cfs_rq is attached */
  405. /*
  406. * leaf cfs_rqs are those that hold tasks (lowest schedulable entity in
  407. * a hierarchy). Non-leaf lrqs hold other higher schedulable entities
  408. * (like users, containers etc.)
  409. *
  410. * leaf_cfs_rq_list ties together list of leaf cfs_rq's in a cpu. This
  411. * list is used during load balance.
  412. */
  413. int on_list;
  414. struct list_head leaf_cfs_rq_list;
  415. struct task_group *tg; /* group that "owns" this runqueue */
  416. #ifdef CONFIG_CFS_BANDWIDTH
  417. int runtime_enabled;
  418. s64 runtime_remaining;
  419. u64 throttled_clock, throttled_clock_task;
  420. u64 throttled_clock_task_time;
  421. int throttled, throttle_count;
  422. struct list_head throttled_list;
  423. #ifdef CONFIG_SCHED_WALT
  424. u64 cumulative_runnable_avg;
  425. #endif /* CONFIG_SCHED_WALT */
  426. #endif /* CONFIG_CFS_BANDWIDTH */
  427. #endif /* CONFIG_FAIR_GROUP_SCHED */
  428. };
  429. static inline int rt_bandwidth_enabled(void)
  430. {
  431. return sysctl_sched_rt_runtime >= 0;
  432. }
  433. /* RT IPI pull logic requires IRQ_WORK */
  434. #if defined(CONFIG_IRQ_WORK) && defined(CONFIG_SMP)
  435. # define HAVE_RT_PUSH_IPI
  436. #endif
  437. /* Real-Time classes' related field in a runqueue: */
  438. struct rt_rq {
  439. struct rt_prio_array active;
  440. unsigned int rt_nr_running;
  441. unsigned int rr_nr_running;
  442. #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
  443. struct {
  444. int curr; /* highest queued rt task prio */
  445. #ifdef CONFIG_SMP
  446. int next; /* next highest */
  447. #endif
  448. } highest_prio;
  449. #endif
  450. #ifdef CONFIG_SMP
  451. unsigned long rt_nr_migratory;
  452. unsigned long rt_nr_total;
  453. int overloaded;
  454. struct plist_head pushable_tasks;
  455. struct sched_avg avg;
  456. #endif /* CONFIG_SMP */
  457. int rt_queued;
  458. int rt_throttled;
  459. u64 rt_time;
  460. u64 rt_runtime;
  461. /* Nests inside the rq lock: */
  462. raw_spinlock_t rt_runtime_lock;
  463. #ifdef CONFIG_RT_GROUP_SCHED
  464. unsigned long rt_nr_boosted;
  465. struct rq *rq;
  466. struct task_group *tg;
  467. #endif
  468. };
  469. /* Deadline class' related fields in a runqueue */
  470. struct dl_rq {
  471. /* runqueue is an rbtree, ordered by deadline */
  472. struct rb_root_cached root;
  473. unsigned long dl_nr_running;
  474. #ifdef CONFIG_SMP
  475. /*
  476. * Deadline values of the currently executing and the
  477. * earliest ready task on this rq. Caching these facilitates
  478. * the decision wether or not a ready but not running task
  479. * should migrate somewhere else.
  480. */
  481. struct {
  482. u64 curr;
  483. u64 next;
  484. } earliest_dl;
  485. unsigned long dl_nr_migratory;
  486. int overloaded;
  487. /*
  488. * Tasks on this rq that can be pushed away. They are kept in
  489. * an rb-tree, ordered by tasks' deadlines, with caching
  490. * of the leftmost (earliest deadline) element.
  491. */
  492. struct rb_root_cached pushable_dl_tasks_root;
  493. #else
  494. struct dl_bw dl_bw;
  495. #endif
  496. /*
  497. * "Active utilization" for this runqueue: increased when a
  498. * task wakes up (becomes TASK_RUNNING) and decreased when a
  499. * task blocks
  500. */
  501. u64 running_bw;
  502. /*
  503. * Utilization of the tasks "assigned" to this runqueue (including
  504. * the tasks that are in runqueue and the tasks that executed on this
  505. * CPU and blocked). Increased when a task moves to this runqueue, and
  506. * decreased when the task moves away (migrates, changes scheduling
  507. * policy, or terminates).
  508. * This is needed to compute the "inactive utilization" for the
  509. * runqueue (inactive utilization = this_bw - running_bw).
  510. */
  511. u64 this_bw;
  512. u64 extra_bw;
  513. /*
  514. * Inverse of the fraction of CPU utilization that can be reclaimed
  515. * by the GRUB algorithm.
  516. */
  517. u64 bw_ratio;
  518. };
  519. #ifdef CONFIG_SMP
  520. static inline bool sched_asym_prefer(int a, int b)
  521. {
  522. return arch_asym_cpu_priority(a) > arch_asym_cpu_priority(b);
  523. }
  524. struct max_cpu_capacity {
  525. raw_spinlock_t lock;
  526. unsigned long val;
  527. int cpu;
  528. };
  529. /*
  530. * We add the notion of a root-domain which will be used to define per-domain
  531. * variables. Each exclusive cpuset essentially defines an island domain by
  532. * fully partitioning the member cpus from any other cpuset. Whenever a new
  533. * exclusive cpuset is created, we also create and attach a new root-domain
  534. * object.
  535. *
  536. */
  537. struct root_domain {
  538. atomic_t refcount;
  539. atomic_t rto_count;
  540. struct rcu_head rcu;
  541. cpumask_var_t span;
  542. cpumask_var_t online;
  543. /*
  544. * Indicate pullable load on at least one CPU, e.g:
  545. * - More than one runnable task
  546. * - Running task is misfit
  547. */
  548. int overload;
  549. /*
  550. * The bit corresponding to a CPU gets set here if such CPU has more
  551. * than one runnable -deadline task (as it is below for RT tasks).
  552. */
  553. cpumask_var_t dlo_mask;
  554. atomic_t dlo_count;
  555. struct dl_bw dl_bw;
  556. struct cpudl cpudl;
  557. #ifdef HAVE_RT_PUSH_IPI
  558. /*
  559. * For IPI pull requests, loop across the rto_mask.
  560. */
  561. struct irq_work rto_push_work;
  562. raw_spinlock_t rto_lock;
  563. /* These are only updated and read within rto_lock */
  564. int rto_loop;
  565. int rto_cpu;
  566. /* These atomics are updated outside of a lock */
  567. atomic_t rto_loop_next;
  568. atomic_t rto_loop_start;
  569. #endif
  570. /*
  571. * The "RT overload" flag: it gets set if a CPU has more than
  572. * one runnable RT task.
  573. */
  574. cpumask_var_t rto_mask;
  575. struct cpupri cpupri;
  576. /* Maximum cpu capacity in the system. */
  577. struct max_cpu_capacity max_cpu_capacity;
  578. /* First cpu with maximum and minimum original capacity */
  579. int max_cap_orig_cpu, min_cap_orig_cpu;
  580. };
  581. extern struct root_domain def_root_domain;
  582. extern struct mutex sched_domains_mutex;
  583. extern void init_defrootdomain(void);
  584. extern void init_max_cpu_capacity(struct max_cpu_capacity *mcc);
  585. extern int sched_init_domains(const struct cpumask *cpu_map);
  586. extern void rq_attach_root(struct rq *rq, struct root_domain *rd);
  587. extern void sched_get_rd(struct root_domain *rd);
  588. extern void sched_put_rd(struct root_domain *rd);
  589. #ifdef HAVE_RT_PUSH_IPI
  590. extern void rto_push_irq_work_func(struct irq_work *work);
  591. #endif
  592. #endif /* CONFIG_SMP */
  593. #ifdef CONFIG_UCLAMP_TASK
  594. /**
  595. * struct uclamp_group - Utilization clamp Group
  596. * @value: utilization clamp value for tasks on this clamp group
  597. * @tasks: number of RUNNABLE tasks on this clamp group
  598. *
  599. * Keep track of how many tasks are RUNNABLE for a given utilization
  600. * clamp value.
  601. */
  602. struct uclamp_group {
  603. unsigned long value : SCHED_CAPACITY_SHIFT + 1;
  604. unsigned long tasks : BITS_PER_LONG - SCHED_CAPACITY_SHIFT - 1;
  605. };
  606. /**
  607. * struct uclamp_cpu - CPU's utilization clamp
  608. * @value: currently active clamp values for a CPU
  609. * @group: utilization clamp groups affecting a CPU
  610. *
  611. * Keep track of RUNNABLE tasks on a CPUs to aggregate their clamp values.
  612. * A clamp value is affecting a CPU where there is at least one task RUNNABLE
  613. * (or actually running) with that value.
  614. *
  615. * We have up to UCLAMP_CNT possible different clamp value, which are
  616. * currently only two: minmum utilization and maximum utilization.
  617. *
  618. * All utilization clamping values are MAX aggregated, since:
  619. * - for util_min: we want to run the CPU at least at the max of the minimum
  620. * utilization required by its currently RUNNABLE tasks.
  621. * - for util_max: we want to allow the CPU to run up to the max of the
  622. * maximum utilization allowed by its currently RUNNABLE tasks.
  623. *
  624. * Since on each system we expect only a limited number of different
  625. * utilization clamp values (CONFIG_UCLAMP_GROUPS_COUNT), we use a simple
  626. * array to track the metrics required to compute all the per-CPU utilization
  627. * clamp values. The additional slot is used to track the default clamp
  628. * values, i.e. no min/max clamping at all.
  629. */
  630. struct uclamp_cpu {
  631. struct uclamp_group group[UCLAMP_CNT][UCLAMP_GROUPS];
  632. int value[UCLAMP_CNT];
  633. /*
  634. * Idle clamp holding
  635. * Whenever a CPU is idle, we enforce the util_max clamp value of the last
  636. * task running on that CPU. This bit is used to flag a clamp holding
  637. * currently active for a CPU. This flag is:
  638. * - set when we update the clamp value of a CPU at the time of dequeuing the
  639. * last before entering idle
  640. * - reset when we enqueue the first task after a CPU wakeup from IDLE
  641. */
  642. #define UCLAMP_FLAG_IDLE 0x01
  643. int flags;
  644. };
  645. #endif /* CONFIG_UCLAMP_TASK */
  646. /*
  647. * This is the main, per-CPU runqueue data structure.
  648. *
  649. * Locking rule: those places that want to lock multiple runqueues
  650. * (such as the load balancing or the thread migration code), lock
  651. * acquire operations must be ordered by ascending &runqueue.
  652. */
  653. struct rq {
  654. /* runqueue lock: */
  655. raw_spinlock_t lock;
  656. /*
  657. * nr_running and cpu_load should be in the same cacheline because
  658. * remote CPUs use both these fields when doing load calculation.
  659. */
  660. unsigned int nr_running;
  661. #ifdef CONFIG_NUMA_BALANCING
  662. unsigned int nr_numa_running;
  663. unsigned int nr_preferred_running;
  664. #endif
  665. #define CPU_LOAD_IDX_MAX 5
  666. unsigned long cpu_load[CPU_LOAD_IDX_MAX];
  667. #ifdef CONFIG_NO_HZ_COMMON
  668. #ifdef CONFIG_SMP
  669. unsigned long last_load_update_tick;
  670. unsigned long last_blocked_load_update_tick;
  671. #endif /* CONFIG_SMP */
  672. unsigned long nohz_flags;
  673. #endif /* CONFIG_NO_HZ_COMMON */
  674. #ifdef CONFIG_NO_HZ_FULL
  675. unsigned long last_sched_tick;
  676. #endif
  677. /* capture load from *all* tasks on this cpu: */
  678. struct load_weight load;
  679. unsigned long nr_load_updates;
  680. u64 nr_switches;
  681. #ifdef CONFIG_UCLAMP_TASK
  682. /* Utilization clamp values based on CPU's RUNNABLE tasks */
  683. struct uclamp_cpu uclamp ____cacheline_aligned;
  684. #endif
  685. struct cfs_rq cfs;
  686. struct rt_rq rt;
  687. struct dl_rq dl;
  688. #ifdef CONFIG_FAIR_GROUP_SCHED
  689. /* list of leaf cfs_rq on this cpu: */
  690. struct list_head leaf_cfs_rq_list;
  691. struct list_head *tmp_alone_branch;
  692. #endif /* CONFIG_FAIR_GROUP_SCHED */
  693. /*
  694. * This is part of a global counter where only the total sum
  695. * over all CPUs matters. A task can increase this counter on
  696. * one CPU and if it got migrated afterwards it may decrease
  697. * it on another CPU. Always updated under the runqueue lock:
  698. */
  699. unsigned long nr_uninterruptible;
  700. struct task_struct *curr, *idle, *stop;
  701. unsigned long next_balance;
  702. struct mm_struct *prev_mm;
  703. unsigned int clock_update_flags;
  704. u64 clock;
  705. u64 clock_task;
  706. atomic_t nr_iowait;
  707. #ifdef CONFIG_SMP
  708. struct root_domain *rd;
  709. struct sched_domain *sd;
  710. unsigned long cpu_capacity;
  711. unsigned long cpu_capacity_orig;
  712. struct callback_head *balance_callback;
  713. unsigned char idle_balance;
  714. unsigned long misfit_task_load;
  715. /* For active balancing */
  716. int active_balance;
  717. int push_cpu;
  718. struct cpu_stop_work active_balance_work;
  719. #if defined(CONFIG_SCHED_HMP) || defined(CONFIG_MTK_IDLE_BALANCE_ENHANCEMENT)
  720. struct task_struct *migrate_task;
  721. #endif
  722. /* cpu of this runqueue: */
  723. int cpu;
  724. int online;
  725. struct list_head cfs_tasks;
  726. u64 rt_avg;
  727. u64 age_stamp;
  728. u64 idle_stamp;
  729. u64 avg_idle;
  730. /* This is used to determine avg_idle's max value */
  731. u64 max_idle_balance_cost;
  732. #endif
  733. #ifdef CONFIG_SCHED_WALT
  734. u64 cumulative_runnable_avg;
  735. u64 window_start;
  736. u64 curr_runnable_sum;
  737. u64 prev_runnable_sum;
  738. u64 nt_curr_runnable_sum;
  739. u64 nt_prev_runnable_sum;
  740. u64 cur_irqload;
  741. u64 avg_irqload;
  742. u64 irqload_ts;
  743. u64 cum_window_demand;
  744. #endif /* CONFIG_SCHED_WALT */
  745. #ifdef CONFIG_IRQ_TIME_ACCOUNTING
  746. u64 prev_irq_time;
  747. #endif
  748. #ifdef CONFIG_PARAVIRT
  749. u64 prev_steal_time;
  750. #endif
  751. #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
  752. u64 prev_steal_time_rq;
  753. #endif
  754. /* calc_load related fields */
  755. unsigned long calc_load_update;
  756. long calc_load_active;
  757. #ifdef CONFIG_SCHED_HRTICK
  758. #ifdef CONFIG_SMP
  759. int hrtick_csd_pending;
  760. call_single_data_t hrtick_csd;
  761. #endif
  762. struct hrtimer hrtick_timer;
  763. #endif
  764. #ifdef CONFIG_SCHEDSTATS
  765. /* latency stats */
  766. struct sched_info rq_sched_info;
  767. unsigned long long rq_cpu_time;
  768. /* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */
  769. /* sys_sched_yield() stats */
  770. unsigned int yld_count;
  771. /* schedule() stats */
  772. unsigned int sched_count;
  773. unsigned int sched_goidle;
  774. /* try_to_wake_up() stats */
  775. unsigned int ttwu_count;
  776. unsigned int ttwu_local;
  777. #endif
  778. #ifdef CONFIG_SMP
  779. struct llist_head wake_list;
  780. #endif
  781. #ifdef CONFIG_CPU_IDLE
  782. /* Must be inspected within a rcu lock section */
  783. struct cpuidle_state *idle_state;
  784. int idle_state_idx;
  785. #endif
  786. };
  787. static inline int cpu_of(struct rq *rq)
  788. {
  789. #ifdef CONFIG_SMP
  790. return rq->cpu;
  791. #else
  792. return 0;
  793. #endif
  794. }
  795. #ifdef CONFIG_SCHED_SMT
  796. extern void __update_idle_core(struct rq *rq);
  797. static inline void update_idle_core(struct rq *rq)
  798. {
  799. if (static_branch_unlikely(&sched_smt_present))
  800. __update_idle_core(rq);
  801. }
  802. #else
  803. static inline void update_idle_core(struct rq *rq) { }
  804. #endif
  805. DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
  806. #define cpu_rq(cpu) (&per_cpu(runqueues, (cpu)))
  807. #define this_rq() this_cpu_ptr(&runqueues)
  808. #define task_rq(p) cpu_rq(task_cpu(p))
  809. #define cpu_curr(cpu) (cpu_rq(cpu)->curr)
  810. #define raw_rq() raw_cpu_ptr(&runqueues)
  811. extern void update_rq_clock(struct rq *rq);
  812. static inline u64 __rq_clock_broken(struct rq *rq)
  813. {
  814. return READ_ONCE(rq->clock);
  815. }
  816. /*
  817. * rq::clock_update_flags bits
  818. *
  819. * %RQCF_REQ_SKIP - will request skipping of clock update on the next
  820. * call to __schedule(). This is an optimisation to avoid
  821. * neighbouring rq clock updates.
  822. *
  823. * %RQCF_ACT_SKIP - is set from inside of __schedule() when skipping is
  824. * in effect and calls to update_rq_clock() are being ignored.
  825. *
  826. * %RQCF_UPDATED - is a debug flag that indicates whether a call has been
  827. * made to update_rq_clock() since the last time rq::lock was pinned.
  828. *
  829. * If inside of __schedule(), clock_update_flags will have been
  830. * shifted left (a left shift is a cheap operation for the fast path
  831. * to promote %RQCF_REQ_SKIP to %RQCF_ACT_SKIP), so you must use,
  832. *
  833. * if (rq-clock_update_flags >= RQCF_UPDATED)
  834. *
  835. * to check if %RQCF_UPADTED is set. It'll never be shifted more than
  836. * one position though, because the next rq_unpin_lock() will shift it
  837. * back.
  838. */
  839. #define RQCF_REQ_SKIP 0x01
  840. #define RQCF_ACT_SKIP 0x02
  841. #define RQCF_UPDATED 0x04
  842. static inline void assert_clock_updated(struct rq *rq)
  843. {
  844. /*
  845. * The only reason for not seeing a clock update since the
  846. * last rq_pin_lock() is if we're currently skipping updates.
  847. */
  848. SCHED_WARN_ON(rq->clock_update_flags < RQCF_ACT_SKIP);
  849. }
  850. static inline u64 rq_clock(struct rq *rq)
  851. {
  852. lockdep_assert_held(&rq->lock);
  853. assert_clock_updated(rq);
  854. return rq->clock;
  855. }
  856. static inline u64 rq_clock_task(struct rq *rq)
  857. {
  858. lockdep_assert_held(&rq->lock);
  859. assert_clock_updated(rq);
  860. return rq->clock_task;
  861. }
  862. static inline void rq_clock_skip_update(struct rq *rq, bool skip)
  863. {
  864. lockdep_assert_held(&rq->lock);
  865. if (skip)
  866. rq->clock_update_flags |= RQCF_REQ_SKIP;
  867. else
  868. rq->clock_update_flags &= ~RQCF_REQ_SKIP;
  869. }
  870. struct rq_flags {
  871. unsigned long flags;
  872. struct pin_cookie cookie;
  873. #ifdef CONFIG_SCHED_DEBUG
  874. /*
  875. * A copy of (rq::clock_update_flags & RQCF_UPDATED) for the
  876. * current pin context is stashed here in case it needs to be
  877. * restored in rq_repin_lock().
  878. */
  879. unsigned int clock_update_flags;
  880. #endif
  881. };
  882. static inline void rq_pin_lock(struct rq *rq, struct rq_flags *rf)
  883. {
  884. rf->cookie = lockdep_pin_lock(&rq->lock);
  885. #ifdef CONFIG_SCHED_DEBUG
  886. rq->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
  887. rf->clock_update_flags = 0;
  888. #endif
  889. }
  890. static inline void rq_unpin_lock(struct rq *rq, struct rq_flags *rf)
  891. {
  892. #ifdef CONFIG_SCHED_DEBUG
  893. if (rq->clock_update_flags > RQCF_ACT_SKIP)
  894. rf->clock_update_flags = RQCF_UPDATED;
  895. #endif
  896. lockdep_unpin_lock(&rq->lock, rf->cookie);
  897. }
  898. static inline void rq_repin_lock(struct rq *rq, struct rq_flags *rf)
  899. {
  900. lockdep_repin_lock(&rq->lock, rf->cookie);
  901. #ifdef CONFIG_SCHED_DEBUG
  902. /*
  903. * Restore the value we stashed in @rf for this pin context.
  904. */
  905. rq->clock_update_flags |= rf->clock_update_flags;
  906. #endif
  907. }
  908. struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf)
  909. __acquires(rq->lock);
  910. struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf)
  911. __acquires(p->pi_lock)
  912. __acquires(rq->lock);
  913. static inline void __task_rq_unlock(struct rq *rq, struct rq_flags *rf)
  914. __releases(rq->lock)
  915. {
  916. rq_unpin_lock(rq, rf);
  917. raw_spin_unlock(&rq->lock);
  918. }
  919. static inline void
  920. task_rq_unlock(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
  921. __releases(rq->lock)
  922. __releases(p->pi_lock)
  923. {
  924. rq_unpin_lock(rq, rf);
  925. raw_spin_unlock(&rq->lock);
  926. raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags);
  927. }
  928. static inline void
  929. rq_lock_irqsave(struct rq *rq, struct rq_flags *rf)
  930. __acquires(rq->lock)
  931. {
  932. raw_spin_lock_irqsave(&rq->lock, rf->flags);
  933. rq_pin_lock(rq, rf);
  934. }
  935. static inline void
  936. rq_lock_irq(struct rq *rq, struct rq_flags *rf)
  937. __acquires(rq->lock)
  938. {
  939. raw_spin_lock_irq(&rq->lock);
  940. rq_pin_lock(rq, rf);
  941. }
  942. static inline void
  943. rq_lock(struct rq *rq, struct rq_flags *rf)
  944. __acquires(rq->lock)
  945. {
  946. raw_spin_lock(&rq->lock);
  947. rq_pin_lock(rq, rf);
  948. }
  949. static inline void
  950. rq_relock(struct rq *rq, struct rq_flags *rf)
  951. __acquires(rq->lock)
  952. {
  953. raw_spin_lock(&rq->lock);
  954. rq_repin_lock(rq, rf);
  955. }
  956. static inline void
  957. rq_unlock_irqrestore(struct rq *rq, struct rq_flags *rf)
  958. __releases(rq->lock)
  959. {
  960. rq_unpin_lock(rq, rf);
  961. raw_spin_unlock_irqrestore(&rq->lock, rf->flags);
  962. }
  963. static inline void
  964. rq_unlock_irq(struct rq *rq, struct rq_flags *rf)
  965. __releases(rq->lock)
  966. {
  967. rq_unpin_lock(rq, rf);
  968. raw_spin_unlock_irq(&rq->lock);
  969. }
  970. static inline void
  971. rq_unlock(struct rq *rq, struct rq_flags *rf)
  972. __releases(rq->lock)
  973. {
  974. rq_unpin_lock(rq, rf);
  975. raw_spin_unlock(&rq->lock);
  976. }
  977. static inline struct rq *
  978. this_rq_lock_irq(struct rq_flags *rf)
  979. __acquires(rq->lock)
  980. {
  981. struct rq *rq;
  982. local_irq_disable();
  983. rq = this_rq();
  984. rq_lock(rq, rf);
  985. return rq;
  986. }
  987. #ifdef CONFIG_NUMA
  988. enum numa_topology_type {
  989. NUMA_DIRECT,
  990. NUMA_GLUELESS_MESH,
  991. NUMA_BACKPLANE,
  992. };
  993. extern enum numa_topology_type sched_numa_topology_type;
  994. extern int sched_max_numa_distance;
  995. extern bool find_numa_distance(int distance);
  996. #endif
  997. #ifdef CONFIG_NUMA
  998. extern void sched_init_numa(void);
  999. extern void sched_domains_numa_masks_set(unsigned int cpu);
  1000. extern void sched_domains_numa_masks_clear(unsigned int cpu);
  1001. #else
  1002. static inline void sched_init_numa(void) { }
  1003. static inline void sched_domains_numa_masks_set(unsigned int cpu) { }
  1004. static inline void sched_domains_numa_masks_clear(unsigned int cpu) { }
  1005. #endif
  1006. #ifdef CONFIG_NUMA_BALANCING
  1007. /* The regions in numa_faults array from task_struct */
  1008. enum numa_faults_stats {
  1009. NUMA_MEM = 0,
  1010. NUMA_CPU,
  1011. NUMA_MEMBUF,
  1012. NUMA_CPUBUF
  1013. };
  1014. extern void sched_setnuma(struct task_struct *p, int node);
  1015. extern int migrate_task_to(struct task_struct *p, int cpu);
  1016. #endif /* CONFIG_NUMA_BALANCING */
  1017. extern int migrate_swap(struct task_struct *src, struct task_struct *dst);
  1018. #ifdef CONFIG_SMP
  1019. static inline void
  1020. queue_balance_callback(struct rq *rq,
  1021. struct callback_head *head,
  1022. void (*func)(struct rq *rq))
  1023. {
  1024. lockdep_assert_held(&rq->lock);
  1025. if (unlikely(head->next))
  1026. return;
  1027. head->func = (void (*)(struct callback_head *))func;
  1028. head->next = rq->balance_callback;
  1029. rq->balance_callback = head;
  1030. }
  1031. extern void sched_ttwu_pending(void);
  1032. #define rcu_dereference_check_sched_domain(p) \
  1033. rcu_dereference_check((p), \
  1034. lockdep_is_held(&sched_domains_mutex))
  1035. /*
  1036. * The domain tree (rq->sd) is protected by RCU's quiescent state transition.
  1037. * See detach_destroy_domains: synchronize_sched for details.
  1038. *
  1039. * The domain tree of any CPU may only be accessed from within
  1040. * preempt-disabled sections.
  1041. */
  1042. #define for_each_domain(cpu, __sd) \
  1043. for (__sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd); \
  1044. __sd; __sd = __sd->parent)
  1045. #define for_each_lower_domain(sd) for (; sd; sd = sd->child)
  1046. /**
  1047. * highest_flag_domain - Return highest sched_domain containing flag.
  1048. * @cpu: The cpu whose highest level of sched domain is to
  1049. * be returned.
  1050. * @flag: The flag to check for the highest sched_domain
  1051. * for the given cpu.
  1052. *
  1053. * Returns the highest sched_domain of a cpu which contains the given flag.
  1054. */
  1055. static inline struct sched_domain *highest_flag_domain(int cpu, int flag)
  1056. {
  1057. struct sched_domain *sd, *hsd = NULL;
  1058. for_each_domain(cpu, sd) {
  1059. if (!(sd->flags & flag))
  1060. break;
  1061. hsd = sd;
  1062. }
  1063. return hsd;
  1064. }
  1065. static inline struct sched_domain *lowest_flag_domain(int cpu, int flag)
  1066. {
  1067. struct sched_domain *sd;
  1068. for_each_domain(cpu, sd) {
  1069. if (sd->flags & flag)
  1070. break;
  1071. }
  1072. return sd;
  1073. }
  1074. DECLARE_PER_CPU(struct sched_domain *, sd_llc);
  1075. DECLARE_PER_CPU(int, sd_llc_size);
  1076. DECLARE_PER_CPU(int, sd_llc_id);
  1077. DECLARE_PER_CPU(struct sched_domain_shared *, sd_llc_shared);
  1078. DECLARE_PER_CPU(struct sched_domain *, sd_numa);
  1079. DECLARE_PER_CPU(struct sched_domain *, sd_asym);
  1080. DECLARE_PER_CPU(struct sched_domain *, sd_ea);
  1081. DECLARE_PER_CPU(struct sched_domain *, sd_scs);
  1082. extern struct static_key_false sched_asym_cpucapacity;
  1083. struct sched_group_capacity {
  1084. atomic_t ref;
  1085. /*
  1086. * CPU capacity of this group, SCHED_CAPACITY_SCALE being max capacity
  1087. * for a single CPU.
  1088. */
  1089. unsigned long capacity;
  1090. unsigned long min_capacity; /* Min per-CPU capacity in group */
  1091. unsigned long max_capacity; /* Max per-CPU capacity in group */
  1092. unsigned long next_update;
  1093. int imbalance; /* XXX unrelated to capacity but shared group state */
  1094. #ifdef CONFIG_SCHED_DEBUG
  1095. int id;
  1096. #endif
  1097. unsigned long cpumask[0]; /* balance mask */
  1098. };
  1099. struct sched_group {
  1100. struct sched_group *next; /* Must be a circular list */
  1101. atomic_t ref;
  1102. unsigned int group_weight;
  1103. struct sched_group_capacity *sgc;
  1104. int asym_prefer_cpu; /* cpu of highest priority in group */
  1105. const struct sched_group_energy *sge;
  1106. /*
  1107. * The CPUs this group covers.
  1108. *
  1109. * NOTE: this field is variable length. (Allocated dynamically
  1110. * by attaching extra space to the end of the structure,
  1111. * depending on how many CPUs the kernel has booted up with)
  1112. */
  1113. unsigned long cpumask[0];
  1114. };
  1115. static inline struct cpumask *sched_group_cpus(struct sched_group *sg)
  1116. {
  1117. return to_cpumask(sg->cpumask);
  1118. }
  1119. static inline struct cpumask *sched_group_mask(struct sched_group *sg)
  1120. {
  1121. return to_cpumask(sg->sgc->cpumask);
  1122. }
  1123. static inline struct cpumask *sched_group_span(struct sched_group *sg)
  1124. {
  1125. return to_cpumask(sg->cpumask);
  1126. }
  1127. /*
  1128. * See build_balance_mask().
  1129. */
  1130. static inline struct cpumask *group_balance_mask(struct sched_group *sg)
  1131. {
  1132. return to_cpumask(sg->sgc->cpumask);
  1133. }
  1134. /**
  1135. * group_first_cpu - Returns the first cpu in the cpumask of a sched_group.
  1136. * @group: The group whose first cpu is to be returned.
  1137. */
  1138. static inline unsigned int group_first_cpu(struct sched_group *group)
  1139. {
  1140. return cpumask_first(sched_group_span(group));
  1141. }
  1142. extern int group_balance_cpu(struct sched_group *sg);
  1143. #if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
  1144. void register_sched_domain_sysctl(void);
  1145. void dirty_sched_domain_sysctl(int cpu);
  1146. void unregister_sched_domain_sysctl(void);
  1147. #else
  1148. static inline void register_sched_domain_sysctl(void)
  1149. {
  1150. }
  1151. static inline void dirty_sched_domain_sysctl(int cpu)
  1152. {
  1153. }
  1154. static inline void unregister_sched_domain_sysctl(void)
  1155. {
  1156. }
  1157. #endif
  1158. #else
  1159. static inline void sched_ttwu_pending(void) { }
  1160. #endif /* CONFIG_SMP */
  1161. #include "stats.h"
  1162. #include "autogroup.h"
  1163. #ifdef CONFIG_CGROUP_SCHED
  1164. /*
  1165. * Return the group to which this tasks belongs.
  1166. *
  1167. * We cannot use task_css() and friends because the cgroup subsystem
  1168. * changes that value before the cgroup_subsys::attach() method is called,
  1169. * therefore we cannot pin it and might observe the wrong value.
  1170. *
  1171. * The same is true for autogroup's p->signal->autogroup->tg, the autogroup
  1172. * core changes this before calling sched_move_task().
  1173. *
  1174. * Instead we use a 'copy' which is updated from sched_move_task() while
  1175. * holding both task_struct::pi_lock and rq::lock.
  1176. */
  1177. static inline struct task_group *task_group(struct task_struct *p)
  1178. {
  1179. return p->sched_task_group;
  1180. }
  1181. /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
  1182. static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
  1183. {
  1184. #if defined(CONFIG_FAIR_GROUP_SCHED) || defined(CONFIG_RT_GROUP_SCHED)
  1185. struct task_group *tg = task_group(p);
  1186. #endif
  1187. #ifdef CONFIG_FAIR_GROUP_SCHED
  1188. set_task_rq_fair(&p->se, p->se.cfs_rq, tg->cfs_rq[cpu]);
  1189. p->se.cfs_rq = tg->cfs_rq[cpu];
  1190. p->se.parent = tg->se[cpu];
  1191. #endif
  1192. #ifdef CONFIG_RT_GROUP_SCHED
  1193. p->rt.rt_rq = tg->rt_rq[cpu];
  1194. p->rt.parent = tg->rt_se[cpu];
  1195. #endif
  1196. }
  1197. #else /* CONFIG_CGROUP_SCHED */
  1198. static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { }
  1199. static inline struct task_group *task_group(struct task_struct *p)
  1200. {
  1201. return NULL;
  1202. }
  1203. #endif /* CONFIG_CGROUP_SCHED */
  1204. static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
  1205. {
  1206. set_task_rq(p, cpu);
  1207. #ifdef CONFIG_SMP
  1208. /*
  1209. * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
  1210. * successfuly executed on another CPU. We must ensure that updates of
  1211. * per-task data have been completed by this moment.
  1212. */
  1213. smp_wmb();
  1214. #ifdef CONFIG_THREAD_INFO_IN_TASK
  1215. p->cpu = cpu;
  1216. #else
  1217. task_thread_info(p)->cpu = cpu;
  1218. #endif
  1219. p->wake_cpu = cpu;
  1220. #endif
  1221. }
  1222. /*
  1223. * Tunables that become constants when CONFIG_SCHED_DEBUG is off:
  1224. */
  1225. #ifdef CONFIG_SCHED_DEBUG
  1226. # include <linux/static_key.h>
  1227. # define const_debug __read_mostly
  1228. #else
  1229. # define const_debug const
  1230. #endif
  1231. extern const_debug unsigned int sysctl_sched_features;
  1232. #define SCHED_FEAT(name, enabled) \
  1233. __SCHED_FEAT_##name ,
  1234. enum {
  1235. #include "features.h"
  1236. __SCHED_FEAT_NR,
  1237. };
  1238. #undef SCHED_FEAT
  1239. #if defined(CONFIG_SCHED_DEBUG) && defined(HAVE_JUMP_LABEL)
  1240. #define SCHED_FEAT(name, enabled) \
  1241. static __always_inline bool static_branch_##name(struct static_key *key) \
  1242. { \
  1243. return static_key_##enabled(key); \
  1244. }
  1245. #include "features.h"
  1246. #undef SCHED_FEAT
  1247. extern struct static_key sched_feat_keys[__SCHED_FEAT_NR];
  1248. #define sched_feat(x) (static_branch_##x(&sched_feat_keys[__SCHED_FEAT_##x]))
  1249. #else /* !(SCHED_DEBUG && HAVE_JUMP_LABEL) */
  1250. #define sched_feat(x) (sysctl_sched_features & (1UL << __SCHED_FEAT_##x))
  1251. #endif /* SCHED_DEBUG && HAVE_JUMP_LABEL */
  1252. extern struct static_key_false sched_numa_balancing;
  1253. extern struct static_key_false sched_schedstats;
  1254. static inline u64 global_rt_period(void)
  1255. {
  1256. return (u64)sysctl_sched_rt_period * NSEC_PER_USEC;
  1257. }
  1258. static inline u64 global_rt_runtime(void)
  1259. {
  1260. if (sysctl_sched_rt_runtime < 0)
  1261. return RUNTIME_INF;
  1262. return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC;
  1263. }
  1264. static inline int task_current(struct rq *rq, struct task_struct *p)
  1265. {
  1266. return rq->curr == p;
  1267. }
  1268. static inline int task_running(struct rq *rq, struct task_struct *p)
  1269. {
  1270. #ifdef CONFIG_SMP
  1271. return p->on_cpu;
  1272. #else
  1273. return task_current(rq, p);
  1274. #endif
  1275. }
  1276. static inline int task_on_rq_queued(struct task_struct *p)
  1277. {
  1278. return p->on_rq == TASK_ON_RQ_QUEUED;
  1279. }
  1280. static inline int task_on_rq_migrating(struct task_struct *p)
  1281. {
  1282. return p->on_rq == TASK_ON_RQ_MIGRATING;
  1283. }
  1284. #ifndef prepare_arch_switch
  1285. # define prepare_arch_switch(next) do { } while (0)
  1286. #endif
  1287. #ifndef finish_arch_post_lock_switch
  1288. # define finish_arch_post_lock_switch() do { } while (0)
  1289. #endif
  1290. static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
  1291. {
  1292. #ifdef CONFIG_SMP
  1293. /*
  1294. * We can optimise this out completely for !SMP, because the
  1295. * SMP rebalancing from interrupt is the only thing that cares
  1296. * here.
  1297. */
  1298. next->on_cpu = 1;
  1299. #endif
  1300. }
  1301. static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
  1302. {
  1303. #ifdef CONFIG_SMP
  1304. /*
  1305. * After ->on_cpu is cleared, the task can be moved to a different CPU.
  1306. * We must ensure this doesn't happen until the switch is completely
  1307. * finished.
  1308. *
  1309. * In particular, the load of prev->state in finish_task_switch() must
  1310. * happen before this.
  1311. *
  1312. * Pairs with the smp_cond_load_acquire() in try_to_wake_up().
  1313. */
  1314. smp_store_release(&prev->on_cpu, 0);
  1315. #endif
  1316. #ifdef CONFIG_DEBUG_SPINLOCK
  1317. /* this is a valid case when another task releases the spinlock */
  1318. rq->lock.owner = current;
  1319. #endif
  1320. /*
  1321. * If we are tracking spinlock dependencies then we have to
  1322. * fix up the runqueue lock - which gets 'carried over' from
  1323. * prev into current:
  1324. */
  1325. spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);
  1326. raw_spin_unlock_irq(&rq->lock);
  1327. }
  1328. /*
  1329. * wake flags
  1330. */
  1331. #define WF_SYNC 0x01 /* waker goes to sleep after wakeup */
  1332. #define WF_FORK 0x02 /* child wakeup after fork */
  1333. #define WF_MIGRATED 0x4 /* internal use, task got migrated */
  1334. /*
  1335. * To aid in avoiding the subversion of "niceness" due to uneven distribution
  1336. * of tasks with abnormal "nice" values across CPUs the contribution that
  1337. * each task makes to its run queue's load is weighted according to its
  1338. * scheduling class and "nice" value. For SCHED_NORMAL tasks this is just a
  1339. * scaled version of the new time slice allocation that they receive on time
  1340. * slice expiry etc.
  1341. */
  1342. #define WEIGHT_IDLEPRIO 3
  1343. #define WMULT_IDLEPRIO 1431655765
  1344. extern const int sched_prio_to_weight[40];
  1345. extern const u32 sched_prio_to_wmult[40];
  1346. /*
  1347. * {de,en}queue flags:
  1348. *
  1349. * DEQUEUE_SLEEP - task is no longer runnable
  1350. * ENQUEUE_WAKEUP - task just became runnable
  1351. *
  1352. * SAVE/RESTORE - an otherwise spurious dequeue/enqueue, done to ensure tasks
  1353. * are in a known state which allows modification. Such pairs
  1354. * should preserve as much state as possible.
  1355. *
  1356. * MOVE - paired with SAVE/RESTORE, explicitly does not preserve the location
  1357. * in the runqueue.
  1358. *
  1359. * ENQUEUE_HEAD - place at front of runqueue (tail if not specified)
  1360. * ENQUEUE_REPLENISH - CBS (replenish runtime and postpone deadline)
  1361. * ENQUEUE_MIGRATED - the task was migrated during wakeup
  1362. *
  1363. */
  1364. #define DEQUEUE_SLEEP 0x01
  1365. #define DEQUEUE_SAVE 0x02 /* matches ENQUEUE_RESTORE */
  1366. #define DEQUEUE_MOVE 0x04 /* matches ENQUEUE_MOVE */
  1367. #define DEQUEUE_NOCLOCK 0x08 /* matches ENQUEUE_NOCLOCK */
  1368. #define ENQUEUE_WAKEUP 0x01
  1369. #define ENQUEUE_RESTORE 0x02
  1370. #define ENQUEUE_MOVE 0x04
  1371. #define ENQUEUE_NOCLOCK 0x08
  1372. #define ENQUEUE_HEAD 0x10
  1373. #define ENQUEUE_REPLENISH 0x20
  1374. #ifdef CONFIG_SMP
  1375. #define ENQUEUE_MIGRATED 0x40
  1376. #else
  1377. #define ENQUEUE_MIGRATED 0x00
  1378. #endif
  1379. #define RETRY_TASK ((void *)-1UL)
  1380. struct sched_class {
  1381. const struct sched_class *next;
  1382. #ifdef CONFIG_UCLAMP_TASK
  1383. int uclamp_enabled;
  1384. #endif
  1385. void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags);
  1386. void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags);
  1387. void (*yield_task) (struct rq *rq);
  1388. bool (*yield_to_task) (struct rq *rq, struct task_struct *p, bool preempt);
  1389. void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int flags);
  1390. /*
  1391. * It is the responsibility of the pick_next_task() method that will
  1392. * return the next task to call put_prev_task() on the @prev task or
  1393. * something equivalent.
  1394. *
  1395. * May return RETRY_TASK when it finds a higher prio class has runnable
  1396. * tasks.
  1397. */
  1398. struct task_struct * (*pick_next_task) (struct rq *rq,
  1399. struct task_struct *prev,
  1400. struct rq_flags *rf);
  1401. void (*put_prev_task) (struct rq *rq, struct task_struct *p);
  1402. #ifdef CONFIG_SMP
  1403. int (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags,
  1404. int subling_count_hint);
  1405. void (*migrate_task_rq)(struct task_struct *p);
  1406. void (*task_woken) (struct rq *this_rq, struct task_struct *task);
  1407. void (*set_cpus_allowed)(struct task_struct *p,
  1408. const struct cpumask *newmask);
  1409. void (*rq_online)(struct rq *rq);
  1410. void (*rq_offline)(struct rq *rq);
  1411. #endif
  1412. void (*set_curr_task) (struct rq *rq);
  1413. void (*task_tick) (struct rq *rq, struct task_struct *p, int queued);
  1414. void (*task_fork) (struct task_struct *p);
  1415. void (*task_dead) (struct task_struct *p);
  1416. /*
  1417. * The switched_from() call is allowed to drop rq->lock, therefore we
  1418. * cannot assume the switched_from/switched_to pair is serliazed by
  1419. * rq->lock. They are however serialized by p->pi_lock.
  1420. */
  1421. void (*switched_from) (struct rq *this_rq, struct task_struct *task);
  1422. void (*switched_to) (struct rq *this_rq, struct task_struct *task);
  1423. void (*prio_changed) (struct rq *this_rq, struct task_struct *task,
  1424. int oldprio);
  1425. unsigned int (*get_rr_interval) (struct rq *rq,
  1426. struct task_struct *task);
  1427. void (*update_curr) (struct rq *rq);
  1428. #define TASK_SET_GROUP 0
  1429. #define TASK_MOVE_GROUP 1
  1430. #ifdef CONFIG_FAIR_GROUP_SCHED
  1431. void (*task_change_group) (struct task_struct *p, int type);
  1432. #endif
  1433. #ifdef CONFIG_SCHED_WALT
  1434. void (*fixup_cumulative_runnable_avg)(struct rq *rq,
  1435. struct task_struct *task,
  1436. u64 new_task_load);
  1437. #endif
  1438. };
  1439. static inline void put_prev_task(struct rq *rq, struct task_struct *prev)
  1440. {
  1441. prev->sched_class->put_prev_task(rq, prev);
  1442. }
  1443. static inline void set_curr_task(struct rq *rq, struct task_struct *curr)
  1444. {
  1445. curr->sched_class->set_curr_task(rq);
  1446. }
  1447. #ifdef CONFIG_SMP
  1448. #define sched_class_highest (&stop_sched_class)
  1449. #else
  1450. #define sched_class_highest (&dl_sched_class)
  1451. #endif
  1452. #define for_each_class(class) \
  1453. for (class = sched_class_highest; class; class = class->next)
  1454. extern const struct sched_class stop_sched_class;
  1455. extern const struct sched_class dl_sched_class;
  1456. extern const struct sched_class rt_sched_class;
  1457. extern const struct sched_class fair_sched_class;
  1458. extern const struct sched_class idle_sched_class;
  1459. #ifdef CONFIG_SMP
  1460. extern void update_group_capacity(struct sched_domain *sd, int cpu);
  1461. extern void trigger_load_balance(struct rq *rq);
  1462. extern void nohz_balance_clear_nohz_mask(int cpu);
  1463. extern void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask);
  1464. #endif
  1465. #ifdef CONFIG_CPU_IDLE
  1466. static inline void idle_set_state(struct rq *rq,
  1467. struct cpuidle_state *idle_state)
  1468. {
  1469. rq->idle_state = idle_state;
  1470. }
  1471. static inline struct cpuidle_state *idle_get_state(struct rq *rq)
  1472. {
  1473. SCHED_WARN_ON(!rcu_read_lock_held());
  1474. return rq->idle_state;
  1475. }
  1476. static inline void idle_set_state_idx(struct rq *rq, int idle_state_idx)
  1477. {
  1478. rq->idle_state_idx = idle_state_idx;
  1479. }
  1480. static inline int idle_get_state_idx(struct rq *rq)
  1481. {
  1482. WARN_ON(!rcu_read_lock_held());
  1483. return rq->idle_state_idx;
  1484. }
  1485. #else
  1486. static inline void idle_set_state(struct rq *rq,
  1487. struct cpuidle_state *idle_state)
  1488. {
  1489. }
  1490. static inline struct cpuidle_state *idle_get_state(struct rq *rq)
  1491. {
  1492. return NULL;
  1493. }
  1494. static inline void idle_set_state_idx(struct rq *rq, int idle_state_idx)
  1495. {
  1496. }
  1497. static inline int idle_get_state_idx(struct rq *rq)
  1498. {
  1499. return -1;
  1500. }
  1501. #endif
  1502. extern void schedule_idle(void);
  1503. extern void sysrq_sched_debug_show(void);
  1504. extern void sched_init_granularity(void);
  1505. extern void update_max_interval(void);
  1506. extern void init_sched_dl_class(void);
  1507. extern void init_sched_rt_class(void);
  1508. extern void init_sched_fair_class(void);
  1509. extern void resched_curr(struct rq *rq);
  1510. extern void resched_cpu(int cpu);
  1511. extern struct rt_bandwidth def_rt_bandwidth;
  1512. extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
  1513. extern struct dl_bandwidth def_dl_bandwidth;
  1514. extern void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime);
  1515. extern void init_dl_task_timer(struct sched_dl_entity *dl_se);
  1516. extern void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se);
  1517. extern void init_dl_rq_bw_ratio(struct dl_rq *dl_rq);
  1518. #define BW_SHIFT 20
  1519. #define BW_UNIT (1 << BW_SHIFT)
  1520. #define RATIO_SHIFT 8
  1521. unsigned long to_ratio(u64 period, u64 runtime);
  1522. extern void init_entity_runnable_average(struct sched_entity *se);
  1523. extern void post_init_entity_util_avg(struct sched_entity *se);
  1524. #ifdef CONFIG_NO_HZ_FULL
  1525. extern bool sched_can_stop_tick(struct rq *rq);
  1526. /*
  1527. * Tick may be needed by tasks in the runqueue depending on their policy and
  1528. * requirements. If tick is needed, lets send the target an IPI to kick it out of
  1529. * nohz mode if necessary.
  1530. */
  1531. static inline void sched_update_tick_dependency(struct rq *rq)
  1532. {
  1533. int cpu;
  1534. if (!tick_nohz_full_enabled())
  1535. return;
  1536. cpu = cpu_of(rq);
  1537. if (!tick_nohz_full_cpu(cpu))
  1538. return;
  1539. if (sched_can_stop_tick(rq))
  1540. tick_nohz_dep_clear_cpu(cpu, TICK_DEP_BIT_SCHED);
  1541. else
  1542. tick_nohz_dep_set_cpu(cpu, TICK_DEP_BIT_SCHED);
  1543. }
  1544. #else
  1545. static inline void sched_update_tick_dependency(struct rq *rq) { }
  1546. #endif
  1547. #ifdef CONFIG_MTK_SCHED_RQAVG_KS
  1548. extern void sched_update_nr_prod(int cpu, unsigned long nr_running, int inc);
  1549. extern void sched_max_util_task(int *cpu, int *pid, int *util, int *boost);
  1550. extern void sched_max_util_task_tracking(void);
  1551. #endif
  1552. #ifdef CONFIG_MTK_SCHED_RQAVG_US
  1553. extern int
  1554. inc_nr_heavy_running(int invoker, struct task_struct *p, int inc, bool ack_cap);
  1555. #ifdef CONFIG_MTK_SCHED_CPULOAD
  1556. extern void cal_cpu_load(int cpu);
  1557. #endif
  1558. #endif
  1559. static inline void add_nr_running(struct rq *rq, unsigned count)
  1560. {
  1561. unsigned prev_nr = rq->nr_running;
  1562. #ifdef CONFIG_MTK_SCHED_RQAVG_KS
  1563. sched_update_nr_prod(cpu_of(rq), rq->nr_running, count);
  1564. #endif
  1565. rq->nr_running = prev_nr + count;
  1566. if (prev_nr < 2 && rq->nr_running >= 2) {
  1567. #ifdef CONFIG_SMP
  1568. if (!READ_ONCE(rq->rd->overload))
  1569. WRITE_ONCE(rq->rd->overload, 1);
  1570. #endif
  1571. }
  1572. sched_update_tick_dependency(rq);
  1573. }
  1574. static inline void sub_nr_running(struct rq *rq, unsigned count)
  1575. {
  1576. #ifdef CONFIG_MTK_SCHED_RQAVG_KS
  1577. sched_update_nr_prod(cpu_of(rq), rq->nr_running, -count);
  1578. #endif
  1579. rq->nr_running -= count;
  1580. /* Check if we still need preemption */
  1581. sched_update_tick_dependency(rq);
  1582. }
  1583. static inline void rq_last_tick_reset(struct rq *rq)
  1584. {
  1585. #ifdef CONFIG_NO_HZ_FULL
  1586. rq->last_sched_tick = jiffies;
  1587. #endif
  1588. }
  1589. extern void activate_task(struct rq *rq, struct task_struct *p, int flags);
  1590. extern void deactivate_task(struct rq *rq, struct task_struct *p, int flags);
  1591. extern void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);
  1592. extern const_debug unsigned int sysctl_sched_time_avg;
  1593. extern const_debug unsigned int sysctl_sched_nr_migrate;
  1594. extern const_debug unsigned int sysctl_sched_migration_cost;
  1595. static inline u64 sched_avg_period(void)
  1596. {
  1597. return (u64)sysctl_sched_time_avg * NSEC_PER_MSEC / 2;
  1598. }
  1599. #ifdef CONFIG_SCHED_HRTICK
  1600. /*
  1601. * Use hrtick when:
  1602. * - enabled by features
  1603. * - hrtimer is actually high res
  1604. */
  1605. static inline int hrtick_enabled(struct rq *rq)
  1606. {
  1607. if (!sched_feat(HRTICK))
  1608. return 0;
  1609. if (!cpu_active(cpu_of(rq)))
  1610. return 0;
  1611. return hrtimer_is_hres_active(&rq->hrtick_timer);
  1612. }
  1613. void hrtick_start(struct rq *rq, u64 delay);
  1614. #else
  1615. static inline int hrtick_enabled(struct rq *rq)
  1616. {
  1617. return 0;
  1618. }
  1619. #endif /* CONFIG_SCHED_HRTICK */
  1620. #ifdef CONFIG_SMP
  1621. extern void sched_avg_update(struct rq *rq);
  1622. extern unsigned long sched_get_rt_rq_util(int cpu);
  1623. #ifndef arch_scale_freq_capacity
  1624. static __always_inline
  1625. unsigned long arch_scale_freq_capacity(struct sched_domain *sd, int cpu)
  1626. {
  1627. return SCHED_CAPACITY_SCALE;
  1628. }
  1629. #endif
  1630. #ifndef arch_scale_max_freq_capacity
  1631. static __always_inline
  1632. unsigned long arch_scale_max_freq_capacity(struct sched_domain *sd, int cpu)
  1633. {
  1634. return SCHED_CAPACITY_SCALE;
  1635. }
  1636. #endif
  1637. #ifndef arch_scale_cpu_capacity
  1638. static __always_inline
  1639. unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu)
  1640. {
  1641. if (sd && (sd->flags & SD_SHARE_CPUCAPACITY) && (sd->span_weight > 1))
  1642. return sd->smt_gain / sd->span_weight;
  1643. return SCHED_CAPACITY_SCALE;
  1644. }
  1645. #endif
  1646. #ifdef CONFIG_SMP
  1647. static inline unsigned long capacity_of(int cpu)
  1648. {
  1649. return cpu_rq(cpu)->cpu_capacity;
  1650. }
  1651. static inline unsigned long capacity_orig_of(int cpu)
  1652. {
  1653. return cpu_rq(cpu)->cpu_capacity_orig;
  1654. }
  1655. extern unsigned int sysctl_sched_use_walt_cpu_util;
  1656. extern unsigned int walt_ravg_window;
  1657. extern bool walt_disabled;
  1658. #endif /* CONFIG_SMP */
  1659. #ifdef CONFIG_MEDIATEK_SOLUTION
  1660. extern void update_sched_hint(int sys_util, int sys_cap);
  1661. extern void sched_hint_check(u64 wallclock);
  1662. #else
  1663. #define update_sched_hint(sys_util, sys_cap) {}
  1664. #define sched_hint_check(wallclock) {}
  1665. #endif
  1666. static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta)
  1667. {
  1668. rq->rt_avg += rt_delta * arch_scale_freq_capacity(NULL, cpu_of(rq));
  1669. sched_avg_update(rq);
  1670. }
  1671. #else
  1672. static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta) { }
  1673. static inline void sched_avg_update(struct rq *rq) { }
  1674. #endif
  1675. #ifdef CONFIG_SMP
  1676. #ifdef CONFIG_PREEMPT
  1677. static inline void double_rq_lock(struct rq *rq1, struct rq *rq2);
  1678. /*
  1679. * fair double_lock_balance: Safely acquires both rq->locks in a fair
  1680. * way at the expense of forcing extra atomic operations in all
  1681. * invocations. This assures that the double_lock is acquired using the
  1682. * same underlying policy as the spinlock_t on this architecture, which
  1683. * reduces latency compared to the unfair variant below. However, it
  1684. * also adds more overhead and therefore may reduce throughput.
  1685. */
  1686. static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
  1687. __releases(this_rq->lock)
  1688. __acquires(busiest->lock)
  1689. __acquires(this_rq->lock)
  1690. {
  1691. raw_spin_unlock(&this_rq->lock);
  1692. double_rq_lock(this_rq, busiest);
  1693. return 1;
  1694. }
  1695. #else
  1696. /*
  1697. * Unfair double_lock_balance: Optimizes throughput at the expense of
  1698. * latency by eliminating extra atomic operations when the locks are
  1699. * already in proper order on entry. This favors lower cpu-ids and will
  1700. * grant the double lock to lower cpus over higher ids under contention,
  1701. * regardless of entry order into the function.
  1702. */
  1703. static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
  1704. __releases(this_rq->lock)
  1705. __acquires(busiest->lock)
  1706. __acquires(this_rq->lock)
  1707. {
  1708. int ret = 0;
  1709. if (unlikely(!raw_spin_trylock(&busiest->lock))) {
  1710. if (busiest < this_rq) {
  1711. raw_spin_unlock(&this_rq->lock);
  1712. raw_spin_lock(&busiest->lock);
  1713. raw_spin_lock_nested(&this_rq->lock,
  1714. SINGLE_DEPTH_NESTING);
  1715. ret = 1;
  1716. } else
  1717. raw_spin_lock_nested(&busiest->lock,
  1718. SINGLE_DEPTH_NESTING);
  1719. }
  1720. return ret;
  1721. }
  1722. #endif /* CONFIG_PREEMPT */
  1723. /*
  1724. * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
  1725. */
  1726. static inline int double_lock_balance(struct rq *this_rq, struct rq *busiest)
  1727. {
  1728. if (unlikely(!irqs_disabled())) {
  1729. /* printk() doesn't work good under rq->lock */
  1730. raw_spin_unlock(&this_rq->lock);
  1731. BUG_ON(1);
  1732. }
  1733. return _double_lock_balance(this_rq, busiest);
  1734. }
  1735. static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
  1736. __releases(busiest->lock)
  1737. {
  1738. raw_spin_unlock(&busiest->lock);
  1739. lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_);
  1740. }
  1741. static inline void double_lock(spinlock_t *l1, spinlock_t *l2)
  1742. {
  1743. if (l1 > l2)
  1744. swap(l1, l2);
  1745. spin_lock(l1);
  1746. spin_lock_nested(l2, SINGLE_DEPTH_NESTING);
  1747. }
  1748. static inline void double_lock_irq(spinlock_t *l1, spinlock_t *l2)
  1749. {
  1750. if (l1 > l2)
  1751. swap(l1, l2);
  1752. spin_lock_irq(l1);
  1753. spin_lock_nested(l2, SINGLE_DEPTH_NESTING);
  1754. }
  1755. static inline void double_raw_lock(raw_spinlock_t *l1, raw_spinlock_t *l2)
  1756. {
  1757. if (l1 > l2)
  1758. swap(l1, l2);
  1759. raw_spin_lock(l1);
  1760. raw_spin_lock_nested(l2, SINGLE_DEPTH_NESTING);
  1761. }
  1762. /*
  1763. * double_rq_lock - safely lock two runqueues
  1764. *
  1765. * Note this does not disable interrupts like task_rq_lock,
  1766. * you need to do so manually before calling.
  1767. */
  1768. static inline void double_rq_lock(struct rq *rq1, struct rq *rq2)
  1769. __acquires(rq1->lock)
  1770. __acquires(rq2->lock)
  1771. {
  1772. BUG_ON(!irqs_disabled());
  1773. if (rq1 == rq2) {
  1774. raw_spin_lock(&rq1->lock);
  1775. __acquire(rq2->lock); /* Fake it out ;) */
  1776. } else {
  1777. if (rq1 < rq2) {
  1778. raw_spin_lock(&rq1->lock);
  1779. raw_spin_lock_nested(&rq2->lock, SINGLE_DEPTH_NESTING);
  1780. } else {
  1781. raw_spin_lock(&rq2->lock);
  1782. raw_spin_lock_nested(&rq1->lock, SINGLE_DEPTH_NESTING);
  1783. }
  1784. }
  1785. }
  1786. /*
  1787. * double_rq_unlock - safely unlock two runqueues
  1788. *
  1789. * Note this does not restore interrupts like task_rq_unlock,
  1790. * you need to do so manually after calling.
  1791. */
  1792. static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2)
  1793. __releases(rq1->lock)
  1794. __releases(rq2->lock)
  1795. {
  1796. raw_spin_unlock(&rq1->lock);
  1797. if (rq1 != rq2)
  1798. raw_spin_unlock(&rq2->lock);
  1799. else
  1800. __release(rq2->lock);
  1801. }
  1802. extern void set_rq_online (struct rq *rq);
  1803. extern void set_rq_offline(struct rq *rq);
  1804. extern bool sched_smp_initialized;
  1805. #else /* CONFIG_SMP */
  1806. /*
  1807. * double_rq_lock - safely lock two runqueues
  1808. *
  1809. * Note this does not disable interrupts like task_rq_lock,
  1810. * you need to do so manually before calling.
  1811. */
  1812. static inline void double_rq_lock(struct rq *rq1, struct rq *rq2)
  1813. __acquires(rq1->lock)
  1814. __acquires(rq2->lock)
  1815. {
  1816. BUG_ON(!irqs_disabled());
  1817. BUG_ON(rq1 != rq2);
  1818. raw_spin_lock(&rq1->lock);
  1819. __acquire(rq2->lock); /* Fake it out ;) */
  1820. }
  1821. /*
  1822. * double_rq_unlock - safely unlock two runqueues
  1823. *
  1824. * Note this does not restore interrupts like task_rq_unlock,
  1825. * you need to do so manually after calling.
  1826. */
  1827. static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2)
  1828. __releases(rq1->lock)
  1829. __releases(rq2->lock)
  1830. {
  1831. BUG_ON(rq1 != rq2);
  1832. raw_spin_unlock(&rq1->lock);
  1833. __release(rq2->lock);
  1834. }
  1835. #endif
  1836. extern struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq);
  1837. extern struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq);
  1838. #ifdef CONFIG_SCHED_DEBUG
  1839. extern bool sched_debug_enabled;
  1840. extern void print_cfs_stats(struct seq_file *m, int cpu);
  1841. extern void print_rt_stats(struct seq_file *m, int cpu);
  1842. extern void print_dl_stats(struct seq_file *m, int cpu);
  1843. extern void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq);
  1844. extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq);
  1845. extern void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq);
  1846. #ifdef CONFIG_NUMA_BALANCING
  1847. extern void
  1848. show_numa_stats(struct task_struct *p, struct seq_file *m);
  1849. extern void
  1850. print_numa_stats(struct seq_file *m, int node, unsigned long tsf,
  1851. unsigned long tpf, unsigned long gsf, unsigned long gpf);
  1852. #endif /* CONFIG_NUMA_BALANCING */
  1853. #endif /* CONFIG_SCHED_DEBUG */
  1854. extern void init_cfs_rq(struct cfs_rq *cfs_rq);
  1855. extern void init_rt_rq(struct rt_rq *rt_rq);
  1856. extern void init_dl_rq(struct dl_rq *dl_rq);
  1857. extern void cfs_bandwidth_usage_inc(void);
  1858. extern void cfs_bandwidth_usage_dec(void);
  1859. #ifdef CONFIG_NO_HZ_COMMON
  1860. enum rq_nohz_flag_bits {
  1861. NOHZ_TICK_STOPPED,
  1862. NOHZ_BALANCE_KICK,
  1863. NOHZ_STATS_KICK
  1864. };
  1865. #define nohz_flags(cpu) (&cpu_rq(cpu)->nohz_flags)
  1866. extern void nohz_balance_exit_idle(unsigned int cpu);
  1867. #else
  1868. static inline void nohz_balance_exit_idle(unsigned int cpu) { }
  1869. #endif
  1870. #ifdef CONFIG_SMP
  1871. extern void init_energy_aware_data(int cpu);
  1872. static inline
  1873. void __dl_update(struct dl_bw *dl_b, s64 bw)
  1874. {
  1875. struct root_domain *rd = container_of(dl_b, struct root_domain, dl_bw);
  1876. int i;
  1877. RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
  1878. "sched RCU must be held");
  1879. for_each_cpu_and(i, rd->span, cpu_active_mask) {
  1880. struct rq *rq = cpu_rq(i);
  1881. rq->dl.extra_bw += bw;
  1882. }
  1883. }
  1884. #else
  1885. static inline
  1886. void __dl_update(struct dl_bw *dl_b, s64 bw)
  1887. {
  1888. struct dl_rq *dl = container_of(dl_b, struct dl_rq, dl_bw);
  1889. dl->extra_bw += bw;
  1890. }
  1891. #endif
  1892. #ifdef CONFIG_IRQ_TIME_ACCOUNTING
  1893. struct irqtime {
  1894. u64 total;
  1895. u64 tick_delta;
  1896. u64 irq_start_time;
  1897. struct u64_stats_sync sync;
  1898. };
  1899. DECLARE_PER_CPU(struct irqtime, cpu_irqtime);
  1900. /*
  1901. * Returns the irqtime minus the softirq time computed by ksoftirqd.
  1902. * Otherwise ksoftirqd's sum_exec_runtime is substracted its own runtime
  1903. * and never move forward.
  1904. */
  1905. static inline u64 irq_time_read(int cpu)
  1906. {
  1907. struct irqtime *irqtime = &per_cpu(cpu_irqtime, cpu);
  1908. unsigned int seq;
  1909. u64 total;
  1910. do {
  1911. seq = __u64_stats_fetch_begin(&irqtime->sync);
  1912. total = irqtime->total;
  1913. } while (__u64_stats_fetch_retry(&irqtime->sync, seq));
  1914. return total;
  1915. }
  1916. #endif /* CONFIG_IRQ_TIME_ACCOUNTING */
  1917. /* sched: add for print aee log */
  1918. #ifdef CONFIG_SMP
  1919. static inline int rq_cpu(const struct rq *rq) { return rq->cpu; }
  1920. #else
  1921. static inline int rq_cpu(const struct rq *rq) { return 0; }
  1922. #endif
  1923. #ifdef CONFIG_CPU_FREQ
  1924. DECLARE_PER_CPU(struct update_util_data *, cpufreq_update_util_data);
  1925. /**
  1926. * cpufreq_update_util - Take a note about CPU utilization changes.
  1927. * @rq: Runqueue to carry out the update for.
  1928. * @flags: Update reason flags.
  1929. *
  1930. * This function is called by the scheduler on the CPU whose utilization is
  1931. * being updated.
  1932. *
  1933. * It can only be called from RCU-sched read-side critical sections.
  1934. *
  1935. * The way cpufreq is currently arranged requires it to evaluate the CPU
  1936. * performance state (frequency/voltage) on a regular basis to prevent it from
  1937. * being stuck in a completely inadequate performance level for too long.
  1938. * That is not guaranteed to happen if the updates are only triggered from CFS,
  1939. * though, because they may not be coming in if RT or deadline tasks are active
  1940. * all the time (or there are RT and DL tasks only).
  1941. *
  1942. * As a workaround for that issue, this function is called by the RT and DL
  1943. * sched classes to trigger extra cpufreq updates to prevent it from stalling,
  1944. * but that really is a band-aid. Going forward it should be replaced with
  1945. * solutions targeted more specifically at RT and DL tasks.
  1946. */
  1947. static inline void cpufreq_update_util(struct rq *rq, unsigned int flags)
  1948. {
  1949. struct update_util_data *data;
  1950. data = rcu_dereference_sched(*per_cpu_ptr(&cpufreq_update_util_data,
  1951. cpu_of(rq)));
  1952. if (data)
  1953. data->func(data, rq_clock(rq), flags);
  1954. }
  1955. #else
  1956. static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}
  1957. #endif /* CONFIG_CPU_FREQ */
  1958. extern unsigned int uclamp_task_effective_util(struct task_struct *p,
  1959. unsigned int clamp_id);
  1960. extern unsigned int uclamp_task_util(struct task_struct *p,
  1961. unsigned int clamp_id);
  1962. #if defined(CONFIG_UCLAMP_TASK_GROUP) && defined(CONFIG_SCHED_TUNE)
  1963. extern void schedtune_init_uclamp(void);
  1964. extern struct uclamp_se *task_schedtune_uclamp(struct task_struct *tsk,
  1965. int clamp_id);
  1966. #endif
  1967. /**
  1968. * uclamp_none: default value for a clamp
  1969. *
  1970. * This returns the default value for each clamp
  1971. * - 0 for a min utilization clamp
  1972. * - SCHED_CAPACITY_SCALE for a max utilization clamp
  1973. *
  1974. * Return: the default value for a given utilization clamp
  1975. */
  1976. static inline unsigned int uclamp_none(int clamp_id)
  1977. {
  1978. if (clamp_id == UCLAMP_MIN)
  1979. return 0;
  1980. return SCHED_CAPACITY_SCALE;
  1981. }
  1982. #ifdef CONFIG_UCLAMP_TASK
  1983. static inline unsigned int uclamp_value(unsigned int cpu, int clamp_id)
  1984. {
  1985. return cpu_rq(cpu)->uclamp.value[clamp_id];
  1986. }
  1987. /**
  1988. * uclamp_util: clamp a utilization value for a specified CPU
  1989. * @rq: the CPU's RQ to get the clamp values from
  1990. * @util: the utilization signal to clamp
  1991. *
  1992. * Each CPU tracks util_{min,max} clamp values depending on the set of its
  1993. * currently RUNNABLE tasks. Given a utilization signal, i.e a signal in
  1994. * the [0..SCHED_CAPACITY_SCALE] range, this function returns a clamped
  1995. * utilization signal considering the current clamp values for the
  1996. * specified CPU.
  1997. *
  1998. * Return: a clamped utilization signal for a given CPU.
  1999. */
  2000. static inline unsigned int uclamp_util(struct rq *rq, unsigned int util)
  2001. {
  2002. unsigned int min_util = rq->uclamp.value[UCLAMP_MIN];
  2003. unsigned int max_util = rq->uclamp.value[UCLAMP_MAX];
  2004. return clamp(util, min_util, max_util);
  2005. }
  2006. #else /* CONFIG_UCLAMP_TASK */
  2007. static inline unsigned int uclamp_value(unsigned int cpu, int clamp_id)
  2008. {
  2009. return uclamp_none(clamp_id);
  2010. }
  2011. static inline unsigned int uclamp_util(struct rq *rq, unsigned int util)
  2012. {
  2013. return util;
  2014. }
  2015. #endif /* CONFIG_UCLAMP_TASK */
  2016. #ifdef CONFIG_SCHED_WALT
  2017. static inline bool
  2018. walt_task_in_cum_window_demand(struct rq *rq, struct task_struct *p)
  2019. {
  2020. return cpu_of(rq) == task_cpu(p) &&
  2021. (p->on_rq || p->last_sleep_ts >= rq->window_start);
  2022. }
  2023. #endif /* CONFIG_SCHED_WALT */
  2024. #ifdef arch_scale_freq_capacity
  2025. #ifndef arch_scale_freq_invariant
  2026. #define arch_scale_freq_invariant() (true)
  2027. #endif
  2028. #else /* arch_scale_freq_capacity */
  2029. #define arch_scale_freq_invariant() (false)
  2030. #endif
  2031. #include "sched_plus.h"