cpufreq_schedutil.c 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966
  1. /*
  2. * CPUFreq governor based on scheduler-provided CPU utilization data.
  3. *
  4. * Copyright (C) 2016, Intel Corporation
  5. * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  6. *
  7. * This program is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License version 2 as
  9. * published by the Free Software Foundation.
  10. */
  11. #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  12. #include <linux/cpufreq.h>
  13. #include <linux/kthread.h>
  14. #include <uapi/linux/sched/types.h>
  15. #include <linux/slab.h>
  16. #include <trace/events/power.h>
  17. #include <trace/events/sched.h>
  18. #include "sched.h"
  19. #include "tune.h"
  20. #include "cpufreq_schedutil.h"
  21. static struct cpufreq_governor schedutil_gov;
  22. unsigned long boosted_cpu_util(int cpu);
  23. void (*cpufreq_notifier_fp)(int cluster_id, unsigned long freq);
  24. EXPORT_SYMBOL(cpufreq_notifier_fp);
  25. #define SUGOV_KTHREAD_PRIORITY 50
  26. struct sugov_tunables {
  27. struct gov_attr_set attr_set;
  28. unsigned int up_rate_limit_us;
  29. unsigned int down_rate_limit_us;
  30. };
  31. struct sugov_policy {
  32. struct cpufreq_policy *policy;
  33. struct sugov_tunables *tunables;
  34. struct list_head tunables_hook;
  35. raw_spinlock_t update_lock; /* For shared policies */
  36. u64 last_freq_update_time;
  37. s64 min_rate_limit_ns;
  38. s64 up_rate_delay_ns;
  39. s64 down_rate_delay_ns;
  40. unsigned int next_freq;
  41. unsigned int cached_raw_freq;
  42. /* The next fields are only needed if fast switch cannot be used. */
  43. struct irq_work irq_work;
  44. struct kthread_work work;
  45. struct mutex work_lock;
  46. struct kthread_worker worker;
  47. struct task_struct *thread;
  48. bool work_in_progress;
  49. bool need_freq_update;
  50. };
  51. struct sugov_cpu {
  52. struct update_util_data update_util;
  53. struct sugov_policy *sg_policy;
  54. unsigned int cpu;
  55. bool iowait_boost_pending;
  56. unsigned int iowait_boost;
  57. unsigned int iowait_boost_max;
  58. u64 last_update;
  59. /* The fields below are only needed when sharing a policy. */
  60. unsigned long util;
  61. unsigned long max;
  62. unsigned int flags;
  63. unsigned long min_boost;
  64. /* The field below is for single-CPU policies only. */
  65. #ifdef CONFIG_NO_HZ_COMMON
  66. unsigned long saved_idle_calls;
  67. #endif
  68. };
  69. static DEFINE_PER_CPU(struct sugov_cpu, sugov_cpu);
  70. /************************ Governor internals ***********************/
  71. static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time)
  72. {
  73. s64 delta_ns;
  74. struct cpufreq_policy *policy = sg_policy->policy;
  75. if (policy->governor != &schedutil_gov ||
  76. !policy->governor_data)
  77. return false;
  78. /*
  79. * Since cpufreq_update_util() is called with rq->lock held for
  80. * the @target_cpu, our per-cpu data is fully serialized.
  81. *
  82. * However, drivers cannot in general deal with cross-cpu
  83. * requests, so while get_next_freq() will work, our
  84. * sugov_update_commit() call may not for the fast switching platforms.
  85. *
  86. * Hence stop here for remote requests if they aren't supported
  87. * by the hardware, as calculating the frequency is pointless if
  88. * we cannot in fact act on it.
  89. *
  90. * For the slow switching platforms, the kthread is always scheduled on
  91. * the right set of CPUs and any CPU can find the next frequency and
  92. * schedule the kthread.
  93. */
  94. if (sg_policy->policy->fast_switch_enabled &&
  95. !cpufreq_can_do_remote_dvfs(sg_policy->policy))
  96. return false;
  97. if (sg_policy->work_in_progress)
  98. return false;
  99. if (unlikely(sg_policy->need_freq_update)) {
  100. sg_policy->need_freq_update = false;
  101. /*
  102. * This happens when limits change, so forget the previous
  103. * next_freq value and force an update.
  104. */
  105. sg_policy->next_freq = UINT_MAX;
  106. return true;
  107. }
  108. /* No need to recalculate next freq for min_rate_limit_us
  109. * at least. However we might still decide to further rate
  110. * limit once frequency change direction is decided, according
  111. * to the separate rate limits.
  112. */
  113. delta_ns = time - sg_policy->last_freq_update_time;
  114. return delta_ns >= sg_policy->min_rate_limit_ns;
  115. }
  116. static bool sugov_up_down_rate_limit(struct sugov_policy *sg_policy, u64 time,
  117. unsigned int next_freq)
  118. {
  119. s64 delta_ns;
  120. delta_ns = time - sg_policy->last_freq_update_time;
  121. if (next_freq > sg_policy->next_freq &&
  122. delta_ns < sg_policy->up_rate_delay_ns)
  123. return true;
  124. if (next_freq < sg_policy->next_freq &&
  125. delta_ns < sg_policy->down_rate_delay_ns)
  126. return true;
  127. return false;
  128. }
  129. static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time,
  130. unsigned int next_freq)
  131. {
  132. struct cpufreq_policy *policy = sg_policy->policy;
  133. int cid = arch_get_cluster_id(policy->cpu);
  134. if (sg_policy->next_freq == next_freq)
  135. return;
  136. if (sugov_up_down_rate_limit(sg_policy, time, next_freq))
  137. return;
  138. sg_policy->next_freq = next_freq;
  139. sg_policy->last_freq_update_time = time;
  140. if (cpufreq_notifier_fp)
  141. cpufreq_notifier_fp(cid, next_freq);
  142. #ifdef CONFIG_MTK_TINYSYS_SSPM_SUPPORT
  143. mt_cpufreq_set_by_wfi_load_cluster(cid, next_freq);
  144. policy->cur = next_freq;
  145. trace_sched_util(cid, next_freq, time);
  146. #else
  147. if (policy->fast_switch_enabled) {
  148. next_freq = cpufreq_driver_fast_switch(policy, next_freq);
  149. if (!next_freq)
  150. return;
  151. policy->cur = next_freq;
  152. trace_cpu_frequency(next_freq, smp_processor_id());
  153. } else {
  154. sg_policy->work_in_progress = true;
  155. irq_work_queue(&sg_policy->irq_work);
  156. }
  157. #endif
  158. }
  159. #ifdef CONFIG_NONLINEAR_FREQ_CTL
  160. #include "cpufreq_schedutil_plus.c"
  161. #else
  162. /**
  163. * get_next_freq - Compute a new frequency for a given cpufreq policy.
  164. * @sg_policy: schedutil policy object to compute the new frequency for.
  165. * @util: Current CPU utilization.
  166. * @max: CPU capacity.
  167. *
  168. * If the utilization is frequency-invariant, choose the new frequency to be
  169. * proportional to it, that is
  170. *
  171. * next_freq = C * max_freq * util / max
  172. *
  173. * Otherwise, approximate the would-be frequency-invariant utilization by
  174. * util_raw * (curr_freq / max_freq) which leads to
  175. *
  176. * next_freq = C * curr_freq * util_raw / max
  177. *
  178. * Take C = 1.25 for the frequency tipping point at (util / max) = 0.8.
  179. *
  180. * The lowest driver-supported frequency which is equal or greater than the raw
  181. * next_freq (as calculated above) is returned, subject to policy min/max and
  182. * cpufreq driver limitations.
  183. */
  184. static unsigned int get_next_freq(struct sugov_policy *sg_policy,
  185. unsigned long util, unsigned long max)
  186. {
  187. struct cpufreq_policy *policy = sg_policy->policy;
  188. unsigned int freq = arch_scale_freq_invariant() ?
  189. policy->cpuinfo.max_freq : policy->cur;
  190. freq = freq * util / max;
  191. freq = freq / SCHED_CAPACITY_SCALE * capacity_margin;
  192. sg_policy->cached_raw_freq = freq;
  193. #ifdef CONFIG_MTK_TINYSYS_SSPM_SUPPORT
  194. return freq;
  195. #else
  196. return cpufreq_driver_resolve_freq(policy, freq);
  197. #endif
  198. }
  199. #endif
  200. static void sugov_get_util(unsigned long *util, unsigned long *max, int cpu)
  201. {
  202. unsigned long max_cap;
  203. max_cap = arch_scale_cpu_capacity(NULL, cpu);
  204. *util = boosted_cpu_util(cpu);
  205. if (idle_cpu(cpu))
  206. *util = 0;
  207. *util = min(*util, max_cap);
  208. *max = max_cap;
  209. }
  210. static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
  211. unsigned int flags)
  212. {
  213. unsigned int max_boost;
  214. if (flags & SCHED_CPUFREQ_IOWAIT) {
  215. if (sg_cpu->iowait_boost_pending)
  216. return;
  217. sg_cpu->iowait_boost_pending = true;
  218. /*
  219. * Boost FAIR tasks only up to the CPU clamped utilization.
  220. *
  221. * Since DL tasks have a much more advanced bandwidth control,
  222. * it's safe to assume that IO boost does not apply to
  223. * those tasks.
  224. * Instead, since RT tasks are currently not utiliation clamped,
  225. * we don't want to apply clamping on IO boost while there is
  226. * blocked RT utilization.
  227. */
  228. max_boost = sg_cpu->iowait_boost_max;
  229. max_boost = uclamp_util(cpu_rq(sg_cpu->cpu), max_boost);
  230. if (sg_cpu->iowait_boost) {
  231. sg_cpu->iowait_boost <<= 1;
  232. if (sg_cpu->iowait_boost > max_boost)
  233. sg_cpu->iowait_boost = max_boost;
  234. } else {
  235. sg_cpu->iowait_boost = sg_cpu->min_boost;
  236. }
  237. } else if (sg_cpu->iowait_boost) {
  238. s64 delta_ns = time - sg_cpu->last_update;
  239. /* Clear iowait_boost if the CPU apprears to have been idle. */
  240. if (delta_ns > TICK_NSEC) {
  241. sg_cpu->iowait_boost = 0;
  242. sg_cpu->iowait_boost_pending = false;
  243. }
  244. }
  245. }
  246. static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, unsigned long *util,
  247. unsigned long *max)
  248. {
  249. unsigned int boost_util, boost_max;
  250. if (!sg_cpu->iowait_boost)
  251. return;
  252. if (sg_cpu->iowait_boost_pending) {
  253. sg_cpu->iowait_boost_pending = false;
  254. } else {
  255. sg_cpu->iowait_boost >>= 1;
  256. if (sg_cpu->iowait_boost < sg_cpu->min_boost) {
  257. sg_cpu->iowait_boost = 0;
  258. return;
  259. }
  260. }
  261. boost_util = sg_cpu->iowait_boost;
  262. boost_max = sg_cpu->iowait_boost_max;
  263. if (*util * boost_max < *max * boost_util) {
  264. *util = boost_util;
  265. *max = boost_max;
  266. }
  267. }
  268. #ifdef CONFIG_NO_HZ_COMMON
  269. static bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu)
  270. {
  271. unsigned long idle_calls = tick_nohz_get_idle_calls_cpu(sg_cpu->cpu);
  272. bool ret = idle_calls == sg_cpu->saved_idle_calls;
  273. sg_cpu->saved_idle_calls = idle_calls;
  274. return ret;
  275. }
  276. #else
  277. static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; }
  278. #endif /* CONFIG_NO_HZ_COMMON */
  279. static void sugov_update_single(struct update_util_data *hook, u64 time,
  280. unsigned int flags)
  281. {
  282. struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
  283. struct sugov_policy *sg_policy = sg_cpu->sg_policy;
  284. struct cpufreq_policy *policy = sg_policy->policy;
  285. unsigned long util, max;
  286. unsigned int next_f;
  287. bool busy;
  288. #ifdef CONFIG_MTK_TINYSYS_SSPM_SUPPORT
  289. int cid;
  290. #endif
  291. sugov_set_iowait_boost(sg_cpu, time, flags);
  292. sg_cpu->last_update = time;
  293. if (!sugov_should_update_freq(sg_policy, time))
  294. return;
  295. busy = sugov_cpu_is_busy(sg_cpu);
  296. if (flags & SCHED_CPUFREQ_DL) {
  297. next_f = policy->cpuinfo.max_freq;
  298. } else {
  299. sugov_get_util(&util, &max, sg_cpu->cpu);
  300. util = uclamp_util(cpu_rq(sg_cpu->cpu), util);
  301. sugov_iowait_boost(sg_cpu, &util, &max);
  302. next_f = get_next_freq(sg_policy, util, max);
  303. #ifdef CONFIG_MTK_TINYSYS_SSPM_SUPPORT
  304. next_f = clamp_val(next_f, policy->min, policy->max);
  305. cid = arch_get_cluster_id(sg_policy->policy->cpu);
  306. next_f = mt_cpufreq_find_close_freq(cid, next_f);
  307. #endif
  308. /*
  309. * Do not reduce the frequency if the CPU has not been idle
  310. * recently, as the reduction is likely to be premature then.
  311. */
  312. if (busy && next_f < sg_policy->next_freq &&
  313. sg_policy->next_freq != UINT_MAX) {
  314. next_f = sg_policy->next_freq;
  315. /* Reset cached freq as next_freq has changed */
  316. sg_policy->cached_raw_freq = 0;
  317. }
  318. }
  319. sugov_update_commit(sg_policy, time, next_f);
  320. }
  321. static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
  322. {
  323. struct sugov_policy *sg_policy = sg_cpu->sg_policy;
  324. struct cpufreq_policy *policy = sg_policy->policy;
  325. unsigned long util = 0, max = 1;
  326. unsigned int j;
  327. unsigned int next_f;
  328. #ifdef CONFIG_MTK_TINYSYS_SSPM_SUPPORT
  329. int cid;
  330. #endif
  331. for_each_cpu(j, policy->cpus) {
  332. struct sugov_cpu *j_sg_cpu = &per_cpu(sugov_cpu, j);
  333. unsigned long j_util, j_max;
  334. s64 delta_ns;
  335. /*
  336. * If the CPU utilization was last updated before the previous
  337. * frequency update and the time elapsed between the last update
  338. * of the CPU utilization and the last frequency update is long
  339. * enough, don't take the CPU into account as it probably is
  340. * idle now (and clear iowait_boost for it).
  341. */
  342. delta_ns = time - j_sg_cpu->last_update;
  343. if (delta_ns > TICK_NSEC) {
  344. j_sg_cpu->iowait_boost = 0;
  345. j_sg_cpu->iowait_boost_pending = false;
  346. if (idle_cpu(j))
  347. continue;
  348. }
  349. if (j_sg_cpu->flags & SCHED_CPUFREQ_DL)
  350. return policy->cpuinfo.max_freq;
  351. j_util = j_sg_cpu->util;
  352. j_max = j_sg_cpu->max;
  353. #ifdef CONFIG_UCLAMP_TASK
  354. trace_schedutil_uclamp_util(j, j_util);
  355. #endif
  356. j_util = uclamp_util(cpu_rq(j), j_util);
  357. if (j_util * max > j_max * util) {
  358. util = j_util;
  359. max = j_max;
  360. }
  361. sugov_iowait_boost(j_sg_cpu, &util, &max);
  362. }
  363. next_f = get_next_freq(sg_policy, util, max);
  364. #ifdef CONFIG_MTK_TINYSYS_SSPM_SUPPORT
  365. next_f = clamp_val(next_f, policy->min, policy->max);
  366. cid = arch_get_cluster_id(sg_policy->policy->cpu);
  367. next_f = mt_cpufreq_find_close_freq(cid, next_f);
  368. #endif
  369. return next_f;
  370. }
  371. static void sugov_update_shared(struct update_util_data *hook, u64 time,
  372. unsigned int flags)
  373. {
  374. struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
  375. struct sugov_policy *sg_policy = sg_cpu->sg_policy;
  376. unsigned long util, max;
  377. unsigned int next_f;
  378. sugov_get_util(&util, &max, sg_cpu->cpu);
  379. raw_spin_lock(&sg_policy->update_lock);
  380. sg_cpu->util = util;
  381. sg_cpu->max = max;
  382. sg_cpu->flags = flags;
  383. sugov_set_iowait_boost(sg_cpu, time, flags);
  384. sg_cpu->last_update = time;
  385. if (sugov_should_update_freq(sg_policy, time)) {
  386. if (flags & SCHED_CPUFREQ_DL)
  387. next_f = sg_policy->policy->cpuinfo.max_freq;
  388. else
  389. next_f = sugov_next_freq_shared(sg_cpu, time);
  390. sugov_update_commit(sg_policy, time, next_f);
  391. }
  392. raw_spin_unlock(&sg_policy->update_lock);
  393. }
  394. static void sugov_work(struct kthread_work *work)
  395. {
  396. struct sugov_policy *sg_policy = container_of(work, struct sugov_policy, work);
  397. mutex_lock(&sg_policy->work_lock);
  398. __cpufreq_driver_target(sg_policy->policy, sg_policy->next_freq,
  399. CPUFREQ_RELATION_L);
  400. mutex_unlock(&sg_policy->work_lock);
  401. sg_policy->work_in_progress = false;
  402. }
  403. static void sugov_irq_work(struct irq_work *irq_work)
  404. {
  405. struct sugov_policy *sg_policy;
  406. sg_policy = container_of(irq_work, struct sugov_policy, irq_work);
  407. /*
  408. * For RT and deadline tasks, the schedutil governor shoots the
  409. * frequency to maximum. Special care must be taken to ensure that this
  410. * kthread doesn't result in the same behavior.
  411. *
  412. * This is (mostly) guaranteed by the work_in_progress flag. The flag is
  413. * updated only at the end of the sugov_work() function and before that
  414. * the schedutil governor rejects all other frequency scaling requests.
  415. *
  416. * There is a very rare case though, where the RT thread yields right
  417. * after the work_in_progress flag is cleared. The effects of that are
  418. * neglected for now.
  419. */
  420. kthread_queue_work(&sg_policy->worker, &sg_policy->work);
  421. }
  422. /************************** sysfs interface ************************/
  423. static struct sugov_tunables *global_tunables;
  424. static DEFINE_MUTEX(global_tunables_lock);
  425. static inline struct sugov_tunables *to_sugov_tunables(struct gov_attr_set *attr_set)
  426. {
  427. return container_of(attr_set, struct sugov_tunables, attr_set);
  428. }
  429. static DEFINE_MUTEX(min_rate_lock);
  430. static void update_min_rate_limit_ns(struct sugov_policy *sg_policy)
  431. {
  432. mutex_lock(&min_rate_lock);
  433. sg_policy->min_rate_limit_ns = min(sg_policy->up_rate_delay_ns,
  434. sg_policy->down_rate_delay_ns);
  435. mutex_unlock(&min_rate_lock);
  436. }
  437. static ssize_t up_rate_limit_us_show(struct gov_attr_set *attr_set, char *buf)
  438. {
  439. struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
  440. return sprintf(buf, "%u\n", tunables->up_rate_limit_us);
  441. }
  442. static ssize_t down_rate_limit_us_show(struct gov_attr_set *attr_set, char *buf)
  443. {
  444. struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
  445. return sprintf(buf, "%u\n", tunables->down_rate_limit_us);
  446. }
  447. static ssize_t up_rate_limit_us_store(struct gov_attr_set *attr_set,
  448. const char *buf, size_t count)
  449. {
  450. struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
  451. struct sugov_policy *sg_policy;
  452. unsigned int rate_limit_us;
  453. if (kstrtouint(buf, 10, &rate_limit_us))
  454. return -EINVAL;
  455. tunables->up_rate_limit_us = rate_limit_us;
  456. list_for_each_entry(sg_policy, &attr_set->policy_list, tunables_hook) {
  457. sg_policy->up_rate_delay_ns = rate_limit_us * NSEC_PER_USEC;
  458. update_min_rate_limit_ns(sg_policy);
  459. }
  460. return count;
  461. }
  462. static ssize_t down_rate_limit_us_store(struct gov_attr_set *attr_set,
  463. const char *buf, size_t count)
  464. {
  465. struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
  466. struct sugov_policy *sg_policy;
  467. unsigned int rate_limit_us;
  468. if (kstrtouint(buf, 10, &rate_limit_us))
  469. return -EINVAL;
  470. tunables->down_rate_limit_us = rate_limit_us;
  471. list_for_each_entry(sg_policy, &attr_set->policy_list, tunables_hook) {
  472. sg_policy->down_rate_delay_ns = rate_limit_us * NSEC_PER_USEC;
  473. update_min_rate_limit_ns(sg_policy);
  474. }
  475. return count;
  476. }
  477. int schedutil_set_down_rate_limit_us(int cpu, unsigned int rate_limit_us)
  478. {
  479. struct cpufreq_policy *policy;
  480. struct sugov_policy *sg_policy;
  481. struct sugov_tunables *tunables;
  482. struct gov_attr_set *attr_set;
  483. policy = cpufreq_cpu_get(cpu);
  484. if (!policy)
  485. return -EINVAL;
  486. if (policy->governor != &schedutil_gov)
  487. return -ENOENT;
  488. mutex_lock(&global_tunables_lock);
  489. sg_policy = policy->governor_data;
  490. if (!sg_policy) {
  491. mutex_unlock(&global_tunables_lock);
  492. cpufreq_cpu_put(policy);
  493. return -EINVAL;
  494. }
  495. tunables = sg_policy->tunables;
  496. tunables->down_rate_limit_us = rate_limit_us;
  497. attr_set = &tunables->attr_set;
  498. mutex_lock(&attr_set->update_lock);
  499. list_for_each_entry(sg_policy, &attr_set->policy_list, tunables_hook) {
  500. sg_policy->down_rate_delay_ns = rate_limit_us * NSEC_PER_USEC;
  501. update_min_rate_limit_ns(sg_policy);
  502. }
  503. mutex_unlock(&attr_set->update_lock);
  504. mutex_unlock(&global_tunables_lock);
  505. if (policy)
  506. cpufreq_cpu_put(policy);
  507. return 0;
  508. }
  509. EXPORT_SYMBOL(schedutil_set_down_rate_limit_us);
  510. int schedutil_set_up_rate_limit_us(int cpu, unsigned int rate_limit_us)
  511. {
  512. struct cpufreq_policy *policy;
  513. struct sugov_policy *sg_policy;
  514. struct sugov_tunables *tunables;
  515. struct gov_attr_set *attr_set;
  516. policy = cpufreq_cpu_get(cpu);
  517. if (!policy)
  518. return -EINVAL;
  519. if (policy->governor != &schedutil_gov)
  520. return -ENOENT;
  521. mutex_lock(&global_tunables_lock);
  522. sg_policy = policy->governor_data;
  523. if (!sg_policy) {
  524. mutex_unlock(&global_tunables_lock);
  525. cpufreq_cpu_put(policy);
  526. return -EINVAL;
  527. }
  528. tunables = sg_policy->tunables;
  529. tunables->up_rate_limit_us = rate_limit_us;
  530. attr_set = &tunables->attr_set;
  531. mutex_lock(&attr_set->update_lock);
  532. list_for_each_entry(sg_policy, &attr_set->policy_list, tunables_hook) {
  533. sg_policy->up_rate_delay_ns = rate_limit_us * NSEC_PER_USEC;
  534. update_min_rate_limit_ns(sg_policy);
  535. }
  536. mutex_unlock(&attr_set->update_lock);
  537. mutex_unlock(&global_tunables_lock);
  538. if (policy)
  539. cpufreq_cpu_put(policy);
  540. return 0;
  541. }
  542. EXPORT_SYMBOL(schedutil_set_up_rate_limit_us);
  543. static struct governor_attr up_rate_limit_us = __ATTR_RW(up_rate_limit_us);
  544. static struct governor_attr down_rate_limit_us = __ATTR_RW(down_rate_limit_us);
  545. static struct attribute *sugov_attributes[] = {
  546. &up_rate_limit_us.attr,
  547. &down_rate_limit_us.attr,
  548. NULL
  549. };
  550. static struct kobj_type sugov_tunables_ktype = {
  551. .default_attrs = sugov_attributes,
  552. .sysfs_ops = &governor_sysfs_ops,
  553. };
  554. /********************** cpufreq governor interface *********************/
  555. static struct sugov_policy *sugov_policy_alloc(struct cpufreq_policy *policy)
  556. {
  557. struct sugov_policy *sg_policy;
  558. sg_policy = kzalloc(sizeof(*sg_policy), GFP_KERNEL);
  559. if (!sg_policy)
  560. return NULL;
  561. sg_policy->policy = policy;
  562. raw_spin_lock_init(&sg_policy->update_lock);
  563. return sg_policy;
  564. }
  565. static void sugov_policy_free(struct sugov_policy *sg_policy)
  566. {
  567. kfree(sg_policy);
  568. }
  569. static int sugov_kthread_create(struct sugov_policy *sg_policy)
  570. {
  571. struct task_struct *thread;
  572. struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO / 2 };
  573. struct cpufreq_policy *policy = sg_policy->policy;
  574. int ret;
  575. /* kthread only required for slow path */
  576. if (policy->fast_switch_enabled)
  577. return 0;
  578. kthread_init_work(&sg_policy->work, sugov_work);
  579. kthread_init_worker(&sg_policy->worker);
  580. thread = kthread_create(kthread_worker_fn, &sg_policy->worker,
  581. "sugov:%d",
  582. cpumask_first(policy->related_cpus));
  583. if (IS_ERR(thread)) {
  584. pr_err("failed to create sugov thread: %ld\n", PTR_ERR(thread));
  585. return PTR_ERR(thread);
  586. }
  587. ret = sched_setscheduler_nocheck(thread, SCHED_FIFO, &param);
  588. if (ret) {
  589. kthread_stop(thread);
  590. pr_warn("%s: failed to set SCHED_FIFO\n", __func__);
  591. return ret;
  592. }
  593. sg_policy->thread = thread;
  594. /* Kthread is bound to all CPUs by default */
  595. if (!policy->dvfs_possible_from_any_cpu)
  596. kthread_bind_mask(thread, policy->related_cpus);
  597. init_irq_work(&sg_policy->irq_work, sugov_irq_work);
  598. mutex_init(&sg_policy->work_lock);
  599. wake_up_process(thread);
  600. return 0;
  601. }
  602. static void sugov_kthread_stop(struct sugov_policy *sg_policy)
  603. {
  604. /* kthread only required for slow path */
  605. if (sg_policy->policy->fast_switch_enabled)
  606. return;
  607. kthread_flush_worker(&sg_policy->worker);
  608. kthread_stop(sg_policy->thread);
  609. mutex_destroy(&sg_policy->work_lock);
  610. }
  611. static struct sugov_tunables *sugov_tunables_alloc(struct sugov_policy *sg_policy)
  612. {
  613. struct sugov_tunables *tunables;
  614. tunables = kzalloc(sizeof(*tunables), GFP_KERNEL);
  615. if (tunables) {
  616. gov_attr_set_init(&tunables->attr_set, &sg_policy->tunables_hook);
  617. if (!have_governor_per_policy())
  618. global_tunables = tunables;
  619. }
  620. return tunables;
  621. }
  622. static void sugov_tunables_free(struct sugov_tunables *tunables)
  623. {
  624. if (!have_governor_per_policy())
  625. global_tunables = NULL;
  626. kfree(tunables);
  627. }
  628. static int sugov_init(struct cpufreq_policy *policy)
  629. {
  630. struct sugov_policy *sg_policy;
  631. struct sugov_tunables *tunables;
  632. int ret = 0;
  633. /* State should be equivalent to EXIT */
  634. if (policy->governor_data)
  635. return -EBUSY;
  636. cpufreq_enable_fast_switch(policy);
  637. sg_policy = sugov_policy_alloc(policy);
  638. if (!sg_policy) {
  639. ret = -ENOMEM;
  640. goto disable_fast_switch;
  641. }
  642. ret = sugov_kthread_create(sg_policy);
  643. if (ret)
  644. goto free_sg_policy;
  645. mutex_lock(&global_tunables_lock);
  646. if (global_tunables) {
  647. if (WARN_ON(have_governor_per_policy())) {
  648. ret = -EINVAL;
  649. goto stop_kthread;
  650. }
  651. policy->governor_data = sg_policy;
  652. sg_policy->tunables = global_tunables;
  653. gov_attr_set_get(&global_tunables->attr_set, &sg_policy->tunables_hook);
  654. goto out;
  655. }
  656. tunables = sugov_tunables_alloc(sg_policy);
  657. if (!tunables) {
  658. ret = -ENOMEM;
  659. goto stop_kthread;
  660. }
  661. tunables->up_rate_limit_us = cpufreq_policy_transition_delay_us(policy);
  662. tunables->down_rate_limit_us = cpufreq_policy_transition_delay_us(policy);
  663. policy->governor_data = sg_policy;
  664. sg_policy->tunables = tunables;
  665. ret = kobject_init_and_add(&tunables->attr_set.kobj, &sugov_tunables_ktype,
  666. get_governor_parent_kobj(policy), "%s",
  667. schedutil_gov.name);
  668. if (ret)
  669. goto fail;
  670. out:
  671. mutex_unlock(&global_tunables_lock);
  672. return 0;
  673. fail:
  674. kobject_put(&tunables->attr_set.kobj);
  675. policy->governor_data = NULL;
  676. sugov_tunables_free(tunables);
  677. stop_kthread:
  678. sugov_kthread_stop(sg_policy);
  679. mutex_unlock(&global_tunables_lock);
  680. free_sg_policy:
  681. sugov_policy_free(sg_policy);
  682. disable_fast_switch:
  683. cpufreq_disable_fast_switch(policy);
  684. pr_err("initialization failed (error %d)\n", ret);
  685. return ret;
  686. }
  687. static void sugov_exit(struct cpufreq_policy *policy)
  688. {
  689. struct sugov_policy *sg_policy = policy->governor_data;
  690. struct sugov_tunables *tunables = sg_policy->tunables;
  691. unsigned int count;
  692. mutex_lock(&global_tunables_lock);
  693. count = gov_attr_set_put(&tunables->attr_set, &sg_policy->tunables_hook);
  694. policy->governor_data = NULL;
  695. if (!count)
  696. sugov_tunables_free(tunables);
  697. mutex_unlock(&global_tunables_lock);
  698. sugov_kthread_stop(sg_policy);
  699. sugov_policy_free(sg_policy);
  700. cpufreq_disable_fast_switch(policy);
  701. }
  702. static int sugov_start(struct cpufreq_policy *policy)
  703. {
  704. struct sugov_policy *sg_policy = policy->governor_data;
  705. unsigned int cpu;
  706. sg_policy->up_rate_delay_ns =
  707. sg_policy->tunables->up_rate_limit_us * NSEC_PER_USEC;
  708. sg_policy->down_rate_delay_ns =
  709. sg_policy->tunables->down_rate_limit_us * NSEC_PER_USEC;
  710. update_min_rate_limit_ns(sg_policy);
  711. sg_policy->last_freq_update_time = 0;
  712. sg_policy->next_freq = UINT_MAX;
  713. sg_policy->work_in_progress = false;
  714. sg_policy->need_freq_update = false;
  715. sg_policy->cached_raw_freq = 0;
  716. for_each_cpu(cpu, policy->cpus) {
  717. struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu);
  718. memset(sg_cpu, 0, sizeof(*sg_cpu));
  719. sg_cpu->cpu = cpu;
  720. sg_cpu->sg_policy = sg_policy;
  721. sg_cpu->flags = SCHED_CPUFREQ_DL;
  722. sg_cpu->iowait_boost_max = capacity_orig_of(cpu);
  723. sg_cpu->min_boost =
  724. (SCHED_CAPACITY_SCALE * policy->cpuinfo.min_freq) /
  725. policy->cpuinfo.max_freq;
  726. }
  727. for_each_cpu(cpu, policy->cpus) {
  728. struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu);
  729. cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util,
  730. policy_is_shared(policy) ?
  731. sugov_update_shared :
  732. sugov_update_single);
  733. }
  734. return 0;
  735. }
  736. static void sugov_stop(struct cpufreq_policy *policy)
  737. {
  738. struct sugov_policy *sg_policy = policy->governor_data;
  739. unsigned int cpu;
  740. for_each_cpu(cpu, policy->cpus)
  741. cpufreq_remove_update_util_hook(cpu);
  742. synchronize_sched();
  743. if (!policy->fast_switch_enabled) {
  744. irq_work_sync(&sg_policy->irq_work);
  745. kthread_cancel_work_sync(&sg_policy->work);
  746. }
  747. }
  748. static void sugov_limits(struct cpufreq_policy *policy)
  749. {
  750. struct sugov_policy *sg_policy = policy->governor_data;
  751. if (!policy->fast_switch_enabled) {
  752. mutex_lock(&sg_policy->work_lock);
  753. cpufreq_policy_apply_limits(policy);
  754. mutex_unlock(&sg_policy->work_lock);
  755. }
  756. sg_policy->need_freq_update = true;
  757. }
  758. static struct cpufreq_governor schedutil_gov = {
  759. .name = "schedutil",
  760. .owner = THIS_MODULE,
  761. .dynamic_switching = true,
  762. .init = sugov_init,
  763. .exit = sugov_exit,
  764. .start = sugov_start,
  765. .stop = sugov_stop,
  766. .limits = sugov_limits,
  767. };
  768. #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL
  769. struct cpufreq_governor *cpufreq_default_governor(void)
  770. {
  771. return &schedutil_gov;
  772. }
  773. #endif
  774. static int __init sugov_register(void)
  775. {
  776. return cpufreq_register_governor(&schedutil_gov);
  777. }
  778. fs_initcall(sugov_register);