cpu_cooling.c 33 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085
  1. /*
  2. * linux/drivers/thermal/cpu_cooling.c
  3. *
  4. * Copyright (C) 2012 Samsung Electronics Co., Ltd(http://www.samsung.com)
  5. * Copyright (C) 2012 Amit Daniel <amit.kachhap@linaro.org>
  6. *
  7. * Copyright (C) 2014 Viresh Kumar <viresh.kumar@linaro.org>
  8. *
  9. * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  10. * This program is free software; you can redistribute it and/or modify
  11. * it under the terms of the GNU General Public License as published by
  12. * the Free Software Foundation; version 2 of the License.
  13. *
  14. * This program is distributed in the hope that it will be useful, but
  15. * WITHOUT ANY WARRANTY; without even the implied warranty of
  16. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  17. * General Public License for more details.
  18. *
  19. * You should have received a copy of the GNU General Public License along
  20. * with this program; if not, write to the Free Software Foundation, Inc.,
  21. * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
  22. *
  23. * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  24. */
  25. #include <linux/module.h>
  26. #include <linux/thermal.h>
  27. #include <linux/cpufreq.h>
  28. #include <linux/err.h>
  29. #include <linux/pm_opp.h>
  30. #include <linux/slab.h>
  31. #include <linux/cpu.h>
  32. #include <linux/cpu_cooling.h>
  33. #include <trace/events/thermal.h>
  34. /*
  35. * Cooling state <-> CPUFreq frequency
  36. *
  37. * Cooling states are translated to frequencies throughout this driver and this
  38. * is the relation between them.
  39. *
  40. * Highest cooling state corresponds to lowest possible frequency.
  41. *
  42. * i.e.
  43. * level 0 --> 1st Max Freq
  44. * level 1 --> 2nd Max Freq
  45. * ...
  46. */
  47. /**
  48. * struct power_table - frequency to power conversion
  49. * @frequency: frequency in KHz
  50. * @power: power in mW
  51. *
  52. * This structure is built when the cooling device registers and helps
  53. * in translating frequency to power and viceversa.
  54. */
  55. struct power_table {
  56. u32 frequency;
  57. u32 power;
  58. };
  59. /**
  60. * struct cpufreq_cooling_device - data for cooling device with cpufreq
  61. * @id: unique integer value corresponding to each cpufreq_cooling_device
  62. * registered.
  63. * @cool_dev: thermal_cooling_device pointer to keep track of the
  64. * registered cooling device.
  65. * @cpufreq_state: integer value representing the current state of cpufreq
  66. * cooling devices.
  67. * @clipped_freq: integer value representing the absolute value of the clipped
  68. * frequency.
  69. * @max_level: maximum cooling level. One less than total number of valid
  70. * cpufreq frequencies.
  71. * @allowed_cpus: all the cpus involved for this cpufreq_cooling_device.
  72. * @node: list_head to link all cpufreq_cooling_device together.
  73. * @last_load: load measured by the latest call to cpufreq_get_requested_power()
  74. * @time_in_idle: previous reading of the absolute time that this cpu was idle
  75. * @time_in_idle_timestamp: wall time of the last invocation of
  76. * get_cpu_idle_time_us()
  77. * @dyn_power_table: array of struct power_table for frequency to power
  78. * conversion, sorted in ascending order.
  79. * @dyn_power_table_entries: number of entries in the @dyn_power_table array
  80. * @cpu_dev: the first cpu_device from @allowed_cpus that has OPPs registered
  81. * @plat_get_static_power: callback to calculate the static power
  82. *
  83. * This structure is required for keeping information of each registered
  84. * cpufreq_cooling_device.
  85. */
  86. struct cpufreq_cooling_device {
  87. int id;
  88. struct thermal_cooling_device *cool_dev;
  89. unsigned int cpufreq_state;
  90. unsigned int clipped_freq;
  91. unsigned int max_level;
  92. unsigned int *freq_table; /* In descending order */
  93. struct cpumask allowed_cpus;
  94. struct list_head node;
  95. u32 last_load;
  96. u64 *time_in_idle;
  97. u64 *time_in_idle_timestamp;
  98. struct power_table *dyn_power_table;
  99. int dyn_power_table_entries;
  100. struct device *cpu_dev;
  101. get_static_t plat_get_static_power;
  102. };
  103. static DEFINE_IDR(cpufreq_idr);
  104. static DEFINE_MUTEX(cooling_cpufreq_lock);
  105. static unsigned int cpufreq_dev_count;
  106. static DEFINE_MUTEX(cooling_list_lock);
  107. static LIST_HEAD(cpufreq_dev_list);
  108. /**
  109. * get_idr - function to get a unique id.
  110. * @idr: struct idr * handle used to create a id.
  111. * @id: int * value generated by this function.
  112. *
  113. * This function will populate @id with an unique
  114. * id, using the idr API.
  115. *
  116. * Return: 0 on success, an error code on failure.
  117. */
  118. static int get_idr(struct idr *idr, int *id)
  119. {
  120. int ret;
  121. mutex_lock(&cooling_cpufreq_lock);
  122. ret = idr_alloc(idr, NULL, 0, 0, GFP_KERNEL);
  123. mutex_unlock(&cooling_cpufreq_lock);
  124. if (unlikely(ret < 0))
  125. return ret;
  126. *id = ret;
  127. return 0;
  128. }
  129. /**
  130. * release_idr - function to free the unique id.
  131. * @idr: struct idr * handle used for creating the id.
  132. * @id: int value representing the unique id.
  133. */
  134. static void release_idr(struct idr *idr, int id)
  135. {
  136. mutex_lock(&cooling_cpufreq_lock);
  137. idr_remove(idr, id);
  138. mutex_unlock(&cooling_cpufreq_lock);
  139. }
  140. /* Below code defines functions to be used for cpufreq as cooling device */
  141. /**
  142. * get_level: Find the level for a particular frequency
  143. * @cpufreq_dev: cpufreq_dev for which the property is required
  144. * @freq: Frequency
  145. *
  146. * Return: level on success, THERMAL_CSTATE_INVALID on error.
  147. */
  148. static unsigned long get_level(struct cpufreq_cooling_device *cpufreq_dev,
  149. unsigned int freq)
  150. {
  151. unsigned long level;
  152. for (level = 0; level <= cpufreq_dev->max_level; level++) {
  153. if (freq == cpufreq_dev->freq_table[level])
  154. return level;
  155. if (freq > cpufreq_dev->freq_table[level])
  156. break;
  157. }
  158. return THERMAL_CSTATE_INVALID;
  159. }
  160. /**
  161. * cpufreq_cooling_get_level - for a given cpu, return the cooling level.
  162. * @cpu: cpu for which the level is required
  163. * @freq: the frequency of interest
  164. *
  165. * This function will match the cooling level corresponding to the
  166. * requested @freq and return it.
  167. *
  168. * Return: The matched cooling level on success or THERMAL_CSTATE_INVALID
  169. * otherwise.
  170. */
  171. unsigned long cpufreq_cooling_get_level(unsigned int cpu, unsigned int freq)
  172. {
  173. struct cpufreq_cooling_device *cpufreq_dev;
  174. mutex_lock(&cooling_list_lock);
  175. list_for_each_entry(cpufreq_dev, &cpufreq_dev_list, node) {
  176. if (cpumask_test_cpu(cpu, &cpufreq_dev->allowed_cpus)) {
  177. unsigned long level = get_level(cpufreq_dev, freq);
  178. mutex_unlock(&cooling_list_lock);
  179. return level;
  180. }
  181. }
  182. mutex_unlock(&cooling_list_lock);
  183. pr_err("%s: cpu:%d not part of any cooling device\n", __func__, cpu);
  184. return THERMAL_CSTATE_INVALID;
  185. }
  186. EXPORT_SYMBOL_GPL(cpufreq_cooling_get_level);
  187. /**
  188. * cpufreq_thermal_notifier - notifier callback for cpufreq policy change.
  189. * @nb: struct notifier_block * with callback info.
  190. * @event: value showing cpufreq event for which this function invoked.
  191. * @data: callback-specific data
  192. *
  193. * Callback to hijack the notification on cpufreq policy transition.
  194. * Every time there is a change in policy, we will intercept and
  195. * update the cpufreq policy with thermal constraints.
  196. *
  197. * Return: 0 (success)
  198. */
  199. static int cpufreq_thermal_notifier(struct notifier_block *nb,
  200. unsigned long event, void *data)
  201. {
  202. struct cpufreq_policy *policy = data;
  203. unsigned long clipped_freq;
  204. struct cpufreq_cooling_device *cpufreq_dev;
  205. if (event != CPUFREQ_ADJUST)
  206. return NOTIFY_DONE;
  207. mutex_lock(&cooling_list_lock);
  208. list_for_each_entry(cpufreq_dev, &cpufreq_dev_list, node) {
  209. if (!cpumask_test_cpu(policy->cpu, &cpufreq_dev->allowed_cpus))
  210. continue;
  211. /*
  212. * policy->max is the maximum allowed frequency defined by user
  213. * and clipped_freq is the maximum that thermal constraints
  214. * allow.
  215. *
  216. * If clipped_freq is lower than policy->max, then we need to
  217. * readjust policy->max.
  218. *
  219. * But, if clipped_freq is greater than policy->max, we don't
  220. * need to do anything.
  221. */
  222. clipped_freq = cpufreq_dev->clipped_freq;
  223. if (policy->max > clipped_freq)
  224. cpufreq_verify_within_limits(policy, 0, clipped_freq);
  225. break;
  226. }
  227. mutex_unlock(&cooling_list_lock);
  228. return NOTIFY_OK;
  229. }
  230. /**
  231. * build_dyn_power_table() - create a dynamic power to frequency table
  232. * @cpufreq_device: the cpufreq cooling device in which to store the table
  233. * @capacitance: dynamic power coefficient for these cpus
  234. *
  235. * Build a dynamic power to frequency table for this cpu and store it
  236. * in @cpufreq_device. This table will be used in cpu_power_to_freq() and
  237. * cpu_freq_to_power() to convert between power and frequency
  238. * efficiently. Power is stored in mW, frequency in KHz. The
  239. * resulting table is in ascending order.
  240. *
  241. * Return: 0 on success, -EINVAL if there are no OPPs for any CPUs,
  242. * -ENOMEM if we run out of memory or -EAGAIN if an OPP was
  243. * added/enabled while the function was executing.
  244. */
  245. static int build_dyn_power_table(struct cpufreq_cooling_device *cpufreq_device,
  246. u32 capacitance)
  247. {
  248. struct power_table *power_table;
  249. struct dev_pm_opp *opp;
  250. struct device *dev = NULL;
  251. int num_opps = 0, cpu, i, ret = 0;
  252. unsigned long freq;
  253. for_each_cpu(cpu, &cpufreq_device->allowed_cpus) {
  254. dev = get_cpu_device(cpu);
  255. if (!dev) {
  256. dev_warn(&cpufreq_device->cool_dev->device,
  257. "No cpu device for cpu %d\n", cpu);
  258. continue;
  259. }
  260. num_opps = dev_pm_opp_get_opp_count(dev);
  261. if (num_opps > 0)
  262. break;
  263. else if (num_opps < 0)
  264. return num_opps;
  265. }
  266. if (num_opps == 0)
  267. return -EINVAL;
  268. power_table = kcalloc(num_opps, sizeof(*power_table), GFP_KERNEL);
  269. if (!power_table)
  270. return -ENOMEM;
  271. rcu_read_lock();
  272. for (freq = 0, i = 0;
  273. opp = dev_pm_opp_find_freq_ceil(dev, &freq), !IS_ERR(opp);
  274. freq++, i++) {
  275. u32 freq_mhz, voltage_mv;
  276. u64 power;
  277. if (i >= num_opps) {
  278. rcu_read_unlock();
  279. ret = -EAGAIN;
  280. goto free_power_table;
  281. }
  282. freq_mhz = freq / 1000000;
  283. voltage_mv = dev_pm_opp_get_voltage(opp) / 1000;
  284. /*
  285. * Do the multiplication with MHz and millivolt so as
  286. * to not overflow.
  287. */
  288. power = (u64)capacitance * freq_mhz * voltage_mv * voltage_mv;
  289. do_div(power, 1000000000);
  290. /* frequency is stored in power_table in KHz */
  291. power_table[i].frequency = freq / 1000;
  292. /* power is stored in mW */
  293. power_table[i].power = power;
  294. }
  295. rcu_read_unlock();
  296. if (i != num_opps) {
  297. ret = PTR_ERR(opp);
  298. goto free_power_table;
  299. }
  300. cpufreq_device->cpu_dev = dev;
  301. cpufreq_device->dyn_power_table = power_table;
  302. cpufreq_device->dyn_power_table_entries = i;
  303. return 0;
  304. free_power_table:
  305. kfree(power_table);
  306. return ret;
  307. }
  308. static u32 cpu_freq_to_power(struct cpufreq_cooling_device *cpufreq_device,
  309. u32 freq)
  310. {
  311. int i;
  312. struct power_table *pt = cpufreq_device->dyn_power_table;
  313. for (i = 1; i < cpufreq_device->dyn_power_table_entries; i++)
  314. if (freq < pt[i].frequency)
  315. break;
  316. return pt[i - 1].power;
  317. }
  318. static u32 cpu_power_to_freq(struct cpufreq_cooling_device *cpufreq_device,
  319. u32 power)
  320. {
  321. int i;
  322. struct power_table *pt = cpufreq_device->dyn_power_table;
  323. for (i = 1; i < cpufreq_device->dyn_power_table_entries; i++)
  324. if (power < pt[i].power)
  325. break;
  326. return pt[i - 1].frequency;
  327. }
  328. /**
  329. * get_load() - get load for a cpu since last updated
  330. * @cpufreq_device: &struct cpufreq_cooling_device for this cpu
  331. * @cpu: cpu number
  332. * @cpu_idx: index of the cpu in cpufreq_device->allowed_cpus
  333. *
  334. * Return: The average load of cpu @cpu in percentage since this
  335. * function was last called.
  336. */
  337. static u32 get_load(struct cpufreq_cooling_device *cpufreq_device, int cpu,
  338. int cpu_idx)
  339. {
  340. u32 load;
  341. u64 now, now_idle, delta_time, delta_idle;
  342. now_idle = get_cpu_idle_time(cpu, &now, 0);
  343. delta_idle = now_idle - cpufreq_device->time_in_idle[cpu_idx];
  344. delta_time = now - cpufreq_device->time_in_idle_timestamp[cpu_idx];
  345. if (delta_time <= delta_idle)
  346. load = 0;
  347. else
  348. load = div64_u64(100 * (delta_time - delta_idle), delta_time);
  349. cpufreq_device->time_in_idle[cpu_idx] = now_idle;
  350. cpufreq_device->time_in_idle_timestamp[cpu_idx] = now;
  351. return load;
  352. }
  353. /**
  354. * get_static_power() - calculate the static power consumed by the cpus
  355. * @cpufreq_device: struct &cpufreq_cooling_device for this cpu cdev
  356. * @tz: thermal zone device in which we're operating
  357. * @freq: frequency in KHz
  358. * @power: pointer in which to store the calculated static power
  359. *
  360. * Calculate the static power consumed by the cpus described by
  361. * @cpu_actor running at frequency @freq. This function relies on a
  362. * platform specific function that should have been provided when the
  363. * actor was registered. If it wasn't, the static power is assumed to
  364. * be negligible. The calculated static power is stored in @power.
  365. *
  366. * Return: 0 on success, -E* on failure.
  367. */
  368. static int get_static_power(struct cpufreq_cooling_device *cpufreq_device,
  369. struct thermal_zone_device *tz, unsigned long freq,
  370. u32 *power)
  371. {
  372. struct dev_pm_opp *opp;
  373. unsigned long voltage;
  374. struct cpumask *cpumask = &cpufreq_device->allowed_cpus;
  375. unsigned long freq_hz = freq * 1000;
  376. if (!cpufreq_device->plat_get_static_power ||
  377. !cpufreq_device->cpu_dev) {
  378. *power = 0;
  379. return 0;
  380. }
  381. rcu_read_lock();
  382. opp = dev_pm_opp_find_freq_exact(cpufreq_device->cpu_dev, freq_hz,
  383. true);
  384. voltage = dev_pm_opp_get_voltage(opp);
  385. rcu_read_unlock();
  386. if (voltage == 0) {
  387. dev_warn_ratelimited(cpufreq_device->cpu_dev,
  388. "Failed to get voltage for frequency %lu: %ld\n",
  389. freq_hz, IS_ERR(opp) ? PTR_ERR(opp) : 0);
  390. return -EINVAL;
  391. }
  392. return cpufreq_device->plat_get_static_power(cpumask, tz->passive_delay,
  393. voltage, power);
  394. }
  395. /**
  396. * get_dynamic_power() - calculate the dynamic power
  397. * @cpufreq_device: &cpufreq_cooling_device for this cdev
  398. * @freq: current frequency
  399. *
  400. * Return: the dynamic power consumed by the cpus described by
  401. * @cpufreq_device.
  402. */
  403. static u32 get_dynamic_power(struct cpufreq_cooling_device *cpufreq_device,
  404. unsigned long freq)
  405. {
  406. u32 raw_cpu_power;
  407. raw_cpu_power = cpu_freq_to_power(cpufreq_device, freq);
  408. return (raw_cpu_power * cpufreq_device->last_load) / 100;
  409. }
  410. /* cpufreq cooling device callback functions are defined below */
  411. /**
  412. * cpufreq_get_max_state - callback function to get the max cooling state.
  413. * @cdev: thermal cooling device pointer.
  414. * @state: fill this variable with the max cooling state.
  415. *
  416. * Callback for the thermal cooling device to return the cpufreq
  417. * max cooling state.
  418. *
  419. * Return: 0 on success, an error code otherwise.
  420. */
  421. static int cpufreq_get_max_state(struct thermal_cooling_device *cdev,
  422. unsigned long *state)
  423. {
  424. struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
  425. *state = cpufreq_device->max_level;
  426. return 0;
  427. }
  428. /**
  429. * cpufreq_get_cur_state - callback function to get the current cooling state.
  430. * @cdev: thermal cooling device pointer.
  431. * @state: fill this variable with the current cooling state.
  432. *
  433. * Callback for the thermal cooling device to return the cpufreq
  434. * current cooling state.
  435. *
  436. * Return: 0 on success, an error code otherwise.
  437. */
  438. static int cpufreq_get_cur_state(struct thermal_cooling_device *cdev,
  439. unsigned long *state)
  440. {
  441. struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
  442. *state = cpufreq_device->cpufreq_state;
  443. return 0;
  444. }
  445. /**
  446. * cpufreq_set_cur_state - callback function to set the current cooling state.
  447. * @cdev: thermal cooling device pointer.
  448. * @state: set this variable to the current cooling state.
  449. *
  450. * Callback for the thermal cooling device to change the cpufreq
  451. * current cooling state.
  452. *
  453. * Return: 0 on success, an error code otherwise.
  454. */
  455. static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev,
  456. unsigned long state)
  457. {
  458. struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
  459. unsigned int cpu = cpumask_any(&cpufreq_device->allowed_cpus);
  460. unsigned int clip_freq;
  461. /* Request state should be less than max_level */
  462. if (WARN_ON(state > cpufreq_device->max_level))
  463. return -EINVAL;
  464. /* Check if the old cooling action is same as new cooling action */
  465. if (cpufreq_device->cpufreq_state == state)
  466. return 0;
  467. clip_freq = cpufreq_device->freq_table[state];
  468. cpufreq_device->cpufreq_state = state;
  469. cpufreq_device->clipped_freq = clip_freq;
  470. cpufreq_update_policy(cpu);
  471. return 0;
  472. }
  473. /**
  474. * cpufreq_get_requested_power() - get the current power
  475. * @cdev: &thermal_cooling_device pointer
  476. * @tz: a valid thermal zone device pointer
  477. * @power: pointer in which to store the resulting power
  478. *
  479. * Calculate the current power consumption of the cpus in milliwatts
  480. * and store it in @power. This function should actually calculate
  481. * the requested power, but it's hard to get the frequency that
  482. * cpufreq would have assigned if there were no thermal limits.
  483. * Instead, we calculate the current power on the assumption that the
  484. * immediate future will look like the immediate past.
  485. *
  486. * We use the current frequency and the average load since this
  487. * function was last called. In reality, there could have been
  488. * multiple opps since this function was last called and that affects
  489. * the load calculation. While it's not perfectly accurate, this
  490. * simplification is good enough and works. REVISIT this, as more
  491. * complex code may be needed if experiments show that it's not
  492. * accurate enough.
  493. *
  494. * Return: 0 on success, -E* if getting the static power failed.
  495. */
  496. static int cpufreq_get_requested_power(struct thermal_cooling_device *cdev,
  497. struct thermal_zone_device *tz,
  498. u32 *power)
  499. {
  500. unsigned long freq;
  501. int i = 0, cpu, ret;
  502. u32 static_power, dynamic_power, total_load = 0;
  503. struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
  504. u32 *load_cpu = NULL;
  505. cpu = cpumask_any_and(&cpufreq_device->allowed_cpus, cpu_online_mask);
  506. /*
  507. * All the CPUs are offline, thus the requested power by
  508. * the cdev is 0
  509. */
  510. if (cpu >= nr_cpu_ids) {
  511. *power = 0;
  512. return 0;
  513. }
  514. freq = cpufreq_quick_get(cpu);
  515. if (trace_thermal_power_cpu_get_power_enabled()) {
  516. u32 ncpus = cpumask_weight(&cpufreq_device->allowed_cpus);
  517. load_cpu = kcalloc(ncpus, sizeof(*load_cpu), GFP_KERNEL);
  518. }
  519. for_each_cpu(cpu, &cpufreq_device->allowed_cpus) {
  520. u32 load;
  521. if (cpu_online(cpu))
  522. load = get_load(cpufreq_device, cpu, i);
  523. else
  524. load = 0;
  525. total_load += load;
  526. if (trace_thermal_power_cpu_limit_enabled() && load_cpu)
  527. load_cpu[i] = load;
  528. i++;
  529. }
  530. cpufreq_device->last_load = total_load;
  531. dynamic_power = get_dynamic_power(cpufreq_device, freq);
  532. ret = get_static_power(cpufreq_device, tz, freq, &static_power);
  533. if (ret) {
  534. kfree(load_cpu);
  535. return ret;
  536. }
  537. if (load_cpu) {
  538. trace_thermal_power_cpu_get_power(
  539. &cpufreq_device->allowed_cpus,
  540. freq, load_cpu, i, dynamic_power, static_power);
  541. kfree(load_cpu);
  542. }
  543. *power = static_power + dynamic_power;
  544. return 0;
  545. }
  546. /**
  547. * cpufreq_state2power() - convert a cpu cdev state to power consumed
  548. * @cdev: &thermal_cooling_device pointer
  549. * @tz: a valid thermal zone device pointer
  550. * @state: cooling device state to be converted
  551. * @power: pointer in which to store the resulting power
  552. *
  553. * Convert cooling device state @state into power consumption in
  554. * milliwatts assuming 100% load. Store the calculated power in
  555. * @power.
  556. *
  557. * Return: 0 on success, -EINVAL if the cooling device state could not
  558. * be converted into a frequency or other -E* if there was an error
  559. * when calculating the static power.
  560. */
  561. static int cpufreq_state2power(struct thermal_cooling_device *cdev,
  562. struct thermal_zone_device *tz,
  563. unsigned long state, u32 *power)
  564. {
  565. unsigned int freq, num_cpus;
  566. cpumask_t cpumask;
  567. u32 static_power, dynamic_power;
  568. int ret;
  569. struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
  570. cpumask_and(&cpumask, &cpufreq_device->allowed_cpus, cpu_online_mask);
  571. num_cpus = cpumask_weight(&cpumask);
  572. /* None of our cpus are online, so no power */
  573. if (num_cpus == 0) {
  574. *power = 0;
  575. return 0;
  576. }
  577. freq = cpufreq_device->freq_table[state];
  578. if (!freq)
  579. return -EINVAL;
  580. dynamic_power = cpu_freq_to_power(cpufreq_device, freq) * num_cpus;
  581. ret = get_static_power(cpufreq_device, tz, freq, &static_power);
  582. if (ret)
  583. return ret;
  584. *power = static_power + dynamic_power;
  585. return 0;
  586. }
  587. /**
  588. * cpufreq_power2state() - convert power to a cooling device state
  589. * @cdev: &thermal_cooling_device pointer
  590. * @tz: a valid thermal zone device pointer
  591. * @power: power in milliwatts to be converted
  592. * @state: pointer in which to store the resulting state
  593. *
  594. * Calculate a cooling device state for the cpus described by @cdev
  595. * that would allow them to consume at most @power mW and store it in
  596. * @state. Note that this calculation depends on external factors
  597. * such as the cpu load or the current static power. Calling this
  598. * function with the same power as input can yield different cooling
  599. * device states depending on those external factors.
  600. *
  601. * Return: 0 on success, -ENODEV if no cpus are online or -EINVAL if
  602. * the calculated frequency could not be converted to a valid state.
  603. * The latter should not happen unless the frequencies available to
  604. * cpufreq have changed since the initialization of the cpu cooling
  605. * device.
  606. */
  607. static int cpufreq_power2state(struct thermal_cooling_device *cdev,
  608. struct thermal_zone_device *tz, u32 power,
  609. unsigned long *state)
  610. {
  611. unsigned int cpu, cur_freq, target_freq;
  612. int ret;
  613. s32 dyn_power;
  614. u32 last_load, normalised_power, static_power;
  615. struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
  616. cpu = cpumask_any_and(&cpufreq_device->allowed_cpus, cpu_online_mask);
  617. /* None of our cpus are online */
  618. if (cpu >= nr_cpu_ids)
  619. return -ENODEV;
  620. cur_freq = cpufreq_quick_get(cpu);
  621. ret = get_static_power(cpufreq_device, tz, cur_freq, &static_power);
  622. if (ret)
  623. return ret;
  624. dyn_power = power - static_power;
  625. dyn_power = dyn_power > 0 ? dyn_power : 0;
  626. last_load = cpufreq_device->last_load ?: 1;
  627. normalised_power = (dyn_power * 100) / last_load;
  628. target_freq = cpu_power_to_freq(cpufreq_device, normalised_power);
  629. *state = cpufreq_cooling_get_level(cpu, target_freq);
  630. if (*state == THERMAL_CSTATE_INVALID) {
  631. dev_warn_ratelimited(&cdev->device,
  632. "Failed to convert %dKHz for cpu %d into a cdev state\n",
  633. target_freq, cpu);
  634. return -EINVAL;
  635. }
  636. trace_thermal_power_cpu_limit(&cpufreq_device->allowed_cpus,
  637. target_freq, *state, power);
  638. return 0;
  639. }
  640. /* Bind cpufreq callbacks to thermal cooling device ops */
  641. static struct thermal_cooling_device_ops cpufreq_cooling_ops = {
  642. .get_max_state = cpufreq_get_max_state,
  643. .get_cur_state = cpufreq_get_cur_state,
  644. .set_cur_state = cpufreq_set_cur_state,
  645. };
  646. static struct thermal_cooling_device_ops cpufreq_power_cooling_ops = {
  647. .get_max_state = cpufreq_get_max_state,
  648. .get_cur_state = cpufreq_get_cur_state,
  649. .set_cur_state = cpufreq_set_cur_state,
  650. .get_requested_power = cpufreq_get_requested_power,
  651. .state2power = cpufreq_state2power,
  652. .power2state = cpufreq_power2state,
  653. };
  654. /* Notifier for cpufreq policy change */
  655. static struct notifier_block thermal_cpufreq_notifier_block = {
  656. .notifier_call = cpufreq_thermal_notifier,
  657. };
  658. static unsigned int find_next_max(struct cpufreq_frequency_table *table,
  659. unsigned int prev_max)
  660. {
  661. struct cpufreq_frequency_table *pos;
  662. unsigned int max = 0;
  663. cpufreq_for_each_valid_entry(pos, table) {
  664. if (pos->frequency > max && pos->frequency < prev_max)
  665. max = pos->frequency;
  666. }
  667. return max;
  668. }
  669. /**
  670. * __cpufreq_cooling_register - helper function to create cpufreq cooling device
  671. * @np: a valid struct device_node to the cooling device device tree node
  672. * @clip_cpus: cpumask of cpus where the frequency constraints will happen.
  673. * Normally this should be same as cpufreq policy->related_cpus.
  674. * @capacitance: dynamic power coefficient for these cpus
  675. * @plat_static_func: function to calculate the static power consumed by these
  676. * cpus (optional)
  677. *
  678. * This interface function registers the cpufreq cooling device with the name
  679. * "thermal-cpufreq-%x". This api can support multiple instances of cpufreq
  680. * cooling devices. It also gives the opportunity to link the cooling device
  681. * with a device tree node, in order to bind it via the thermal DT code.
  682. *
  683. * Return: a valid struct thermal_cooling_device pointer on success,
  684. * on failure, it returns a corresponding ERR_PTR().
  685. */
  686. static struct thermal_cooling_device *
  687. __cpufreq_cooling_register(struct device_node *np,
  688. const struct cpumask *clip_cpus, u32 capacitance,
  689. get_static_t plat_static_func)
  690. {
  691. struct cpufreq_policy *policy;
  692. struct thermal_cooling_device *cool_dev;
  693. struct cpufreq_cooling_device *cpufreq_dev;
  694. char dev_name[THERMAL_NAME_LENGTH];
  695. struct cpufreq_frequency_table *pos, *table;
  696. struct cpumask temp_mask;
  697. unsigned int freq, i, num_cpus;
  698. int ret;
  699. struct thermal_cooling_device_ops *cooling_ops;
  700. cpumask_and(&temp_mask, clip_cpus, cpu_online_mask);
  701. policy = cpufreq_cpu_get(cpumask_first(&temp_mask));
  702. if (!policy) {
  703. pr_debug("%s: CPUFreq policy not found\n", __func__);
  704. return ERR_PTR(-EPROBE_DEFER);
  705. }
  706. table = policy->freq_table;
  707. if (!table) {
  708. pr_debug("%s: CPUFreq table not found\n", __func__);
  709. cool_dev = ERR_PTR(-ENODEV);
  710. goto put_policy;
  711. }
  712. cpufreq_dev = kzalloc(sizeof(*cpufreq_dev), GFP_KERNEL);
  713. if (!cpufreq_dev) {
  714. cool_dev = ERR_PTR(-ENOMEM);
  715. goto put_policy;
  716. }
  717. num_cpus = cpumask_weight(clip_cpus);
  718. cpufreq_dev->time_in_idle = kcalloc(num_cpus,
  719. sizeof(*cpufreq_dev->time_in_idle),
  720. GFP_KERNEL);
  721. if (!cpufreq_dev->time_in_idle) {
  722. cool_dev = ERR_PTR(-ENOMEM);
  723. goto free_cdev;
  724. }
  725. cpufreq_dev->time_in_idle_timestamp =
  726. kcalloc(num_cpus, sizeof(*cpufreq_dev->time_in_idle_timestamp),
  727. GFP_KERNEL);
  728. if (!cpufreq_dev->time_in_idle_timestamp) {
  729. cool_dev = ERR_PTR(-ENOMEM);
  730. goto free_time_in_idle;
  731. }
  732. /* Find max levels */
  733. cpufreq_for_each_valid_entry(pos, table)
  734. cpufreq_dev->max_level++;
  735. cpufreq_dev->freq_table = kmalloc(sizeof(*cpufreq_dev->freq_table) *
  736. cpufreq_dev->max_level, GFP_KERNEL);
  737. if (!cpufreq_dev->freq_table) {
  738. cool_dev = ERR_PTR(-ENOMEM);
  739. goto free_time_in_idle_timestamp;
  740. }
  741. /* max_level is an index, not a counter */
  742. cpufreq_dev->max_level--;
  743. cpumask_copy(&cpufreq_dev->allowed_cpus, clip_cpus);
  744. if (capacitance) {
  745. cpufreq_dev->plat_get_static_power = plat_static_func;
  746. ret = build_dyn_power_table(cpufreq_dev, capacitance);
  747. if (ret) {
  748. cool_dev = ERR_PTR(ret);
  749. goto free_table;
  750. }
  751. cooling_ops = &cpufreq_power_cooling_ops;
  752. } else {
  753. cooling_ops = &cpufreq_cooling_ops;
  754. }
  755. ret = get_idr(&cpufreq_idr, &cpufreq_dev->id);
  756. if (ret) {
  757. cool_dev = ERR_PTR(ret);
  758. goto free_power_table;
  759. }
  760. /* Fill freq-table in descending order of frequencies */
  761. for (i = 0, freq = -1; i <= cpufreq_dev->max_level; i++) {
  762. freq = find_next_max(table, freq);
  763. cpufreq_dev->freq_table[i] = freq;
  764. /* Warn for duplicate entries */
  765. if (!freq)
  766. pr_warn("%s: table has duplicate entries\n", __func__);
  767. else
  768. pr_debug("%s: freq:%u KHz\n", __func__, freq);
  769. }
  770. snprintf(dev_name, sizeof(dev_name), "thermal-cpufreq-%d",
  771. cpufreq_dev->id);
  772. cool_dev = thermal_of_cooling_device_register(np, dev_name, cpufreq_dev,
  773. cooling_ops);
  774. if (IS_ERR(cool_dev))
  775. goto remove_idr;
  776. cpufreq_dev->clipped_freq = cpufreq_dev->freq_table[0];
  777. cpufreq_dev->cool_dev = cool_dev;
  778. mutex_lock(&cooling_cpufreq_lock);
  779. mutex_lock(&cooling_list_lock);
  780. list_add(&cpufreq_dev->node, &cpufreq_dev_list);
  781. mutex_unlock(&cooling_list_lock);
  782. /* Register the notifier for first cpufreq cooling device */
  783. if (!cpufreq_dev_count++)
  784. cpufreq_register_notifier(&thermal_cpufreq_notifier_block,
  785. CPUFREQ_POLICY_NOTIFIER);
  786. mutex_unlock(&cooling_cpufreq_lock);
  787. goto put_policy;
  788. remove_idr:
  789. release_idr(&cpufreq_idr, cpufreq_dev->id);
  790. free_power_table:
  791. kfree(cpufreq_dev->dyn_power_table);
  792. free_table:
  793. kfree(cpufreq_dev->freq_table);
  794. free_time_in_idle_timestamp:
  795. kfree(cpufreq_dev->time_in_idle_timestamp);
  796. free_time_in_idle:
  797. kfree(cpufreq_dev->time_in_idle);
  798. free_cdev:
  799. kfree(cpufreq_dev);
  800. put_policy:
  801. cpufreq_cpu_put(policy);
  802. return cool_dev;
  803. }
  804. /**
  805. * cpufreq_cooling_register - function to create cpufreq cooling device.
  806. * @clip_cpus: cpumask of cpus where the frequency constraints will happen.
  807. *
  808. * This interface function registers the cpufreq cooling device with the name
  809. * "thermal-cpufreq-%x". This api can support multiple instances of cpufreq
  810. * cooling devices.
  811. *
  812. * Return: a valid struct thermal_cooling_device pointer on success,
  813. * on failure, it returns a corresponding ERR_PTR().
  814. */
  815. struct thermal_cooling_device *
  816. cpufreq_cooling_register(const struct cpumask *clip_cpus)
  817. {
  818. return __cpufreq_cooling_register(NULL, clip_cpus, 0, NULL);
  819. }
  820. EXPORT_SYMBOL_GPL(cpufreq_cooling_register);
  821. /**
  822. * of_cpufreq_cooling_register - function to create cpufreq cooling device.
  823. * @np: a valid struct device_node to the cooling device device tree node
  824. * @clip_cpus: cpumask of cpus where the frequency constraints will happen.
  825. *
  826. * This interface function registers the cpufreq cooling device with the name
  827. * "thermal-cpufreq-%x". This api can support multiple instances of cpufreq
  828. * cooling devices. Using this API, the cpufreq cooling device will be
  829. * linked to the device tree node provided.
  830. *
  831. * Return: a valid struct thermal_cooling_device pointer on success,
  832. * on failure, it returns a corresponding ERR_PTR().
  833. */
  834. struct thermal_cooling_device *
  835. of_cpufreq_cooling_register(struct device_node *np,
  836. const struct cpumask *clip_cpus)
  837. {
  838. if (!np)
  839. return ERR_PTR(-EINVAL);
  840. return __cpufreq_cooling_register(np, clip_cpus, 0, NULL);
  841. }
  842. EXPORT_SYMBOL_GPL(of_cpufreq_cooling_register);
  843. /**
  844. * cpufreq_power_cooling_register() - create cpufreq cooling device with power extensions
  845. * @clip_cpus: cpumask of cpus where the frequency constraints will happen
  846. * @capacitance: dynamic power coefficient for these cpus
  847. * @plat_static_func: function to calculate the static power consumed by these
  848. * cpus (optional)
  849. *
  850. * This interface function registers the cpufreq cooling device with
  851. * the name "thermal-cpufreq-%x". This api can support multiple
  852. * instances of cpufreq cooling devices. Using this function, the
  853. * cooling device will implement the power extensions by using a
  854. * simple cpu power model. The cpus must have registered their OPPs
  855. * using the OPP library.
  856. *
  857. * An optional @plat_static_func may be provided to calculate the
  858. * static power consumed by these cpus. If the platform's static
  859. * power consumption is unknown or negligible, make it NULL.
  860. *
  861. * Return: a valid struct thermal_cooling_device pointer on success,
  862. * on failure, it returns a corresponding ERR_PTR().
  863. */
  864. struct thermal_cooling_device *
  865. cpufreq_power_cooling_register(const struct cpumask *clip_cpus, u32 capacitance,
  866. get_static_t plat_static_func)
  867. {
  868. return __cpufreq_cooling_register(NULL, clip_cpus, capacitance,
  869. plat_static_func);
  870. }
  871. EXPORT_SYMBOL(cpufreq_power_cooling_register);
  872. /**
  873. * of_cpufreq_power_cooling_register() - create cpufreq cooling device with power extensions
  874. * @np: a valid struct device_node to the cooling device device tree node
  875. * @clip_cpus: cpumask of cpus where the frequency constraints will happen
  876. * @capacitance: dynamic power coefficient for these cpus
  877. * @plat_static_func: function to calculate the static power consumed by these
  878. * cpus (optional)
  879. *
  880. * This interface function registers the cpufreq cooling device with
  881. * the name "thermal-cpufreq-%x". This api can support multiple
  882. * instances of cpufreq cooling devices. Using this API, the cpufreq
  883. * cooling device will be linked to the device tree node provided.
  884. * Using this function, the cooling device will implement the power
  885. * extensions by using a simple cpu power model. The cpus must have
  886. * registered their OPPs using the OPP library.
  887. *
  888. * An optional @plat_static_func may be provided to calculate the
  889. * static power consumed by these cpus. If the platform's static
  890. * power consumption is unknown or negligible, make it NULL.
  891. *
  892. * Return: a valid struct thermal_cooling_device pointer on success,
  893. * on failure, it returns a corresponding ERR_PTR().
  894. */
  895. struct thermal_cooling_device *
  896. of_cpufreq_power_cooling_register(struct device_node *np,
  897. const struct cpumask *clip_cpus,
  898. u32 capacitance,
  899. get_static_t plat_static_func)
  900. {
  901. if (!np)
  902. return ERR_PTR(-EINVAL);
  903. return __cpufreq_cooling_register(np, clip_cpus, capacitance,
  904. plat_static_func);
  905. }
  906. EXPORT_SYMBOL(of_cpufreq_power_cooling_register);
  907. /**
  908. * cpufreq_cooling_unregister - function to remove cpufreq cooling device.
  909. * @cdev: thermal cooling device pointer.
  910. *
  911. * This interface function unregisters the "thermal-cpufreq-%x" cooling device.
  912. */
  913. void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev)
  914. {
  915. struct cpufreq_cooling_device *cpufreq_dev;
  916. if (!cdev)
  917. return;
  918. cpufreq_dev = cdev->devdata;
  919. /* Unregister the notifier for the last cpufreq cooling device */
  920. mutex_lock(&cooling_cpufreq_lock);
  921. if (!--cpufreq_dev_count)
  922. cpufreq_unregister_notifier(&thermal_cpufreq_notifier_block,
  923. CPUFREQ_POLICY_NOTIFIER);
  924. mutex_lock(&cooling_list_lock);
  925. list_del(&cpufreq_dev->node);
  926. mutex_unlock(&cooling_list_lock);
  927. mutex_unlock(&cooling_cpufreq_lock);
  928. thermal_cooling_device_unregister(cpufreq_dev->cool_dev);
  929. release_idr(&cpufreq_idr, cpufreq_dev->id);
  930. kfree(cpufreq_dev->dyn_power_table);
  931. kfree(cpufreq_dev->time_in_idle_timestamp);
  932. kfree(cpufreq_dev->time_in_idle);
  933. kfree(cpufreq_dev->freq_table);
  934. kfree(cpufreq_dev);
  935. }
  936. EXPORT_SYMBOL_GPL(cpufreq_cooling_unregister);