watchdog.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Detect hard and soft lockups on a system
  4. *
  5. * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc.
  6. *
  7. * Note: Most of this code is borrowed heavily from the original softlockup
  8. * detector, so thanks to Ingo for the initial implementation.
  9. * Some chunks also taken from the old x86-specific nmi watchdog code, thanks
  10. * to those contributors as well.
  11. */
  12. #define pr_fmt(fmt) "watchdog: " fmt
  13. #include <linux/mm.h>
  14. #include <linux/cpu.h>
  15. #include <linux/nmi.h>
  16. #include <linux/init.h>
  17. #include <linux/module.h>
  18. #include <linux/sysctl.h>
  19. #include <linux/smpboot.h>
  20. #include <linux/sched/rt.h>
  21. #include <uapi/linux/sched/types.h>
  22. #include <linux/tick.h>
  23. #include <linux/workqueue.h>
  24. #include <linux/sched/clock.h>
  25. #include <linux/sched/debug.h>
  26. #include <asm/irq_regs.h>
  27. #include <linux/kvm_para.h>
  28. #include <linux/kthread.h>
  29. static DEFINE_MUTEX(watchdog_mutex);
  30. #if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HAVE_NMI_WATCHDOG)
  31. # define WATCHDOG_DEFAULT (SOFT_WATCHDOG_ENABLED | NMI_WATCHDOG_ENABLED)
  32. # define NMI_WATCHDOG_DEFAULT 1
  33. #else
  34. # define WATCHDOG_DEFAULT (SOFT_WATCHDOG_ENABLED)
  35. # define NMI_WATCHDOG_DEFAULT 0
  36. #endif
  37. unsigned long __read_mostly watchdog_enabled;
  38. int __read_mostly watchdog_user_enabled = 1;
  39. int __read_mostly nmi_watchdog_user_enabled = NMI_WATCHDOG_DEFAULT;
  40. int __read_mostly soft_watchdog_user_enabled = 1;
  41. int __read_mostly watchdog_thresh = 10;
  42. int __read_mostly nmi_watchdog_available;
  43. struct cpumask watchdog_allowed_mask __read_mostly;
  44. struct cpumask watchdog_cpumask __read_mostly;
  45. unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
  46. #ifdef CONFIG_HARDLOCKUP_DETECTOR
  47. /*
  48. * Should we panic when a soft-lockup or hard-lockup occurs:
  49. */
  50. unsigned int __read_mostly hardlockup_panic =
  51. CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
  52. /*
  53. * We may not want to enable hard lockup detection by default in all cases,
  54. * for example when running the kernel as a guest on a hypervisor. In these
  55. * cases this function can be called to disable hard lockup detection. This
  56. * function should only be executed once by the boot processor before the
  57. * kernel command line parameters are parsed, because otherwise it is not
  58. * possible to override this in hardlockup_panic_setup().
  59. */
  60. void __init hardlockup_detector_disable(void)
  61. {
  62. nmi_watchdog_user_enabled = 0;
  63. }
  64. static int __init hardlockup_panic_setup(char *str)
  65. {
  66. if (!strncmp(str, "panic", 5))
  67. hardlockup_panic = 1;
  68. else if (!strncmp(str, "nopanic", 7))
  69. hardlockup_panic = 0;
  70. else if (!strncmp(str, "0", 1))
  71. nmi_watchdog_user_enabled = 0;
  72. else if (!strncmp(str, "1", 1))
  73. nmi_watchdog_user_enabled = 1;
  74. return 1;
  75. }
  76. __setup("nmi_watchdog=", hardlockup_panic_setup);
  77. # ifdef CONFIG_SMP
  78. int __read_mostly sysctl_hardlockup_all_cpu_backtrace;
  79. static int __init hardlockup_all_cpu_backtrace_setup(char *str)
  80. {
  81. sysctl_hardlockup_all_cpu_backtrace = !!simple_strtol(str, NULL, 0);
  82. return 1;
  83. }
  84. __setup("hardlockup_all_cpu_backtrace=", hardlockup_all_cpu_backtrace_setup);
  85. # endif /* CONFIG_SMP */
  86. #endif /* CONFIG_HARDLOCKUP_DETECTOR */
  87. /*
  88. * These functions can be overridden if an architecture implements its
  89. * own hardlockup detector.
  90. *
  91. * watchdog_nmi_enable/disable can be implemented to start and stop when
  92. * softlockup watchdog threads start and stop. The arch must select the
  93. * SOFTLOCKUP_DETECTOR Kconfig.
  94. */
  95. int __weak watchdog_nmi_enable(unsigned int cpu)
  96. {
  97. hardlockup_detector_perf_enable();
  98. return 0;
  99. }
  100. void __weak watchdog_nmi_disable(unsigned int cpu)
  101. {
  102. hardlockup_detector_perf_disable();
  103. }
  104. /* Return 0, if a NMI watchdog is available. Error code otherwise */
  105. int __weak __init watchdog_nmi_probe(void)
  106. {
  107. return hardlockup_detector_perf_init();
  108. }
  109. /**
  110. * watchdog_nmi_stop - Stop the watchdog for reconfiguration
  111. *
  112. * The reconfiguration steps are:
  113. * watchdog_nmi_stop();
  114. * update_variables();
  115. * watchdog_nmi_start();
  116. */
  117. void __weak watchdog_nmi_stop(void) { }
  118. /**
  119. * watchdog_nmi_start - Start the watchdog after reconfiguration
  120. *
  121. * Counterpart to watchdog_nmi_stop().
  122. *
  123. * The following variables have been updated in update_variables() and
  124. * contain the currently valid configuration:
  125. * - watchdog_enabled
  126. * - watchdog_thresh
  127. * - watchdog_cpumask
  128. */
  129. void __weak watchdog_nmi_start(void) { }
  130. /**
  131. * lockup_detector_update_enable - Update the sysctl enable bit
  132. *
  133. * Caller needs to make sure that the NMI/perf watchdogs are off, so this
  134. * can't race with watchdog_nmi_disable().
  135. */
  136. static void lockup_detector_update_enable(void)
  137. {
  138. watchdog_enabled = 0;
  139. if (!watchdog_user_enabled)
  140. return;
  141. if (nmi_watchdog_available && nmi_watchdog_user_enabled)
  142. watchdog_enabled |= NMI_WATCHDOG_ENABLED;
  143. if (soft_watchdog_user_enabled)
  144. watchdog_enabled |= SOFT_WATCHDOG_ENABLED;
  145. }
  146. #ifdef CONFIG_SOFTLOCKUP_DETECTOR
  147. #define SOFTLOCKUP_RESET ULONG_MAX
  148. /* Global variables, exported for sysctl */
  149. unsigned int __read_mostly softlockup_panic =
  150. CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
  151. static bool softlockup_threads_initialized __read_mostly;
  152. static u64 __read_mostly sample_period;
  153. static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
  154. static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
  155. static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer);
  156. static DEFINE_PER_CPU(bool, softlockup_touch_sync);
  157. static DEFINE_PER_CPU(bool, soft_watchdog_warn);
  158. static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
  159. static DEFINE_PER_CPU(unsigned long, soft_lockup_hrtimer_cnt);
  160. static DEFINE_PER_CPU(struct task_struct *, softlockup_task_ptr_saved);
  161. static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
  162. static unsigned long soft_lockup_nmi_warn;
  163. static int __init softlockup_panic_setup(char *str)
  164. {
  165. softlockup_panic = simple_strtoul(str, NULL, 0);
  166. return 1;
  167. }
  168. __setup("softlockup_panic=", softlockup_panic_setup);
  169. static int __init nowatchdog_setup(char *str)
  170. {
  171. watchdog_user_enabled = 0;
  172. return 1;
  173. }
  174. __setup("nowatchdog", nowatchdog_setup);
  175. static int __init nosoftlockup_setup(char *str)
  176. {
  177. soft_watchdog_user_enabled = 0;
  178. return 1;
  179. }
  180. __setup("nosoftlockup", nosoftlockup_setup);
  181. #ifdef CONFIG_SMP
  182. int __read_mostly sysctl_softlockup_all_cpu_backtrace;
  183. static int __init softlockup_all_cpu_backtrace_setup(char *str)
  184. {
  185. sysctl_softlockup_all_cpu_backtrace = !!simple_strtol(str, NULL, 0);
  186. return 1;
  187. }
  188. __setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup);
  189. #endif
  190. static void __lockup_detector_cleanup(void);
  191. /*
  192. * Hard-lockup warnings should be triggered after just a few seconds. Soft-
  193. * lockups can have false positives under extreme conditions. So we generally
  194. * want a higher threshold for soft lockups than for hard lockups. So we couple
  195. * the thresholds with a factor: we make the soft threshold twice the amount of
  196. * time the hard threshold is.
  197. */
  198. static int get_softlockup_thresh(void)
  199. {
  200. return watchdog_thresh * 2;
  201. }
  202. /*
  203. * Returns seconds, approximately. We don't need nanosecond
  204. * resolution, and we don't need to waste time with a big divide when
  205. * 2^30ns == 1.074s.
  206. */
  207. static unsigned long get_timestamp(void)
  208. {
  209. return running_clock() >> 30LL; /* 2^30 ~= 10^9 */
  210. }
  211. static void set_sample_period(void)
  212. {
  213. /*
  214. * convert watchdog_thresh from seconds to ns
  215. * the divide by 5 is to give hrtimer several chances (two
  216. * or three with the current relation between the soft
  217. * and hard thresholds) to increment before the
  218. * hardlockup detector generates a warning
  219. */
  220. sample_period = get_softlockup_thresh() * ((u64)NSEC_PER_SEC / 5);
  221. watchdog_update_hrtimer_threshold(sample_period);
  222. }
  223. /* Commands for resetting the watchdog */
  224. static void __touch_watchdog(void)
  225. {
  226. __this_cpu_write(watchdog_touch_ts, get_timestamp());
  227. }
  228. /**
  229. * touch_softlockup_watchdog_sched - touch watchdog on scheduler stalls
  230. *
  231. * Call when the scheduler may have stalled for legitimate reasons
  232. * preventing the watchdog task from executing - e.g. the scheduler
  233. * entering idle state. This should only be used for scheduler events.
  234. * Use touch_softlockup_watchdog() for everything else.
  235. */
  236. notrace void touch_softlockup_watchdog_sched(void)
  237. {
  238. /*
  239. * Preemption can be enabled. It doesn't matter which CPU's timestamp
  240. * gets zeroed here, so use the raw_ operation.
  241. */
  242. raw_cpu_write(watchdog_touch_ts, SOFTLOCKUP_RESET);
  243. }
  244. notrace void touch_softlockup_watchdog(void)
  245. {
  246. touch_softlockup_watchdog_sched();
  247. wq_watchdog_touch(raw_smp_processor_id());
  248. }
  249. EXPORT_SYMBOL(touch_softlockup_watchdog);
  250. void touch_all_softlockup_watchdogs(void)
  251. {
  252. int cpu;
  253. /*
  254. * watchdog_mutex cannpt be taken here, as this might be called
  255. * from (soft)interrupt context, so the access to
  256. * watchdog_allowed_cpumask might race with a concurrent update.
  257. *
  258. * The watchdog time stamp can race against a concurrent real
  259. * update as well, the only side effect might be a cycle delay for
  260. * the softlockup check.
  261. */
  262. for_each_cpu(cpu, &watchdog_allowed_mask)
  263. per_cpu(watchdog_touch_ts, cpu) = SOFTLOCKUP_RESET;
  264. wq_watchdog_touch(-1);
  265. }
  266. void touch_softlockup_watchdog_sync(void)
  267. {
  268. __this_cpu_write(softlockup_touch_sync, true);
  269. __this_cpu_write(watchdog_touch_ts, SOFTLOCKUP_RESET);
  270. }
  271. static int is_softlockup(unsigned long touch_ts)
  272. {
  273. unsigned long now = get_timestamp();
  274. if ((watchdog_enabled & SOFT_WATCHDOG_ENABLED) && watchdog_thresh){
  275. /* Warn about unreasonable delays. */
  276. if (time_after(now, touch_ts + get_softlockup_thresh()))
  277. return now - touch_ts;
  278. }
  279. return 0;
  280. }
  281. /* watchdog detector functions */
  282. bool is_hardlockup(void)
  283. {
  284. unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
  285. if (__this_cpu_read(hrtimer_interrupts_saved) == hrint)
  286. return true;
  287. __this_cpu_write(hrtimer_interrupts_saved, hrint);
  288. return false;
  289. }
  290. static void watchdog_interrupt_count(void)
  291. {
  292. __this_cpu_inc(hrtimer_interrupts);
  293. }
  294. /* watchdog kicker functions */
  295. static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
  296. {
  297. unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts);
  298. struct pt_regs *regs = get_irq_regs();
  299. int duration;
  300. int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace;
  301. if (!watchdog_enabled)
  302. return HRTIMER_NORESTART;
  303. /* kick the hardlockup detector */
  304. watchdog_interrupt_count();
  305. /* kick the softlockup detector */
  306. wake_up_process(__this_cpu_read(softlockup_watchdog));
  307. /* .. and repeat */
  308. hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period));
  309. if (touch_ts == SOFTLOCKUP_RESET) {
  310. if (unlikely(__this_cpu_read(softlockup_touch_sync))) {
  311. /*
  312. * If the time stamp was touched atomically
  313. * make sure the scheduler tick is up to date.
  314. */
  315. __this_cpu_write(softlockup_touch_sync, false);
  316. sched_clock_tick();
  317. }
  318. /* Clear the guest paused flag on watchdog reset */
  319. kvm_check_and_clear_guest_paused();
  320. __touch_watchdog();
  321. return HRTIMER_RESTART;
  322. }
  323. /* check for a softlockup
  324. * This is done by making sure a high priority task is
  325. * being scheduled. The task touches the watchdog to
  326. * indicate it is getting cpu time. If it hasn't then
  327. * this is a good indication some task is hogging the cpu
  328. */
  329. duration = is_softlockup(touch_ts);
  330. if (unlikely(duration)) {
  331. /*
  332. * If a virtual machine is stopped by the host it can look to
  333. * the watchdog like a soft lockup, check to see if the host
  334. * stopped the vm before we issue the warning
  335. */
  336. if (kvm_check_and_clear_guest_paused())
  337. return HRTIMER_RESTART;
  338. /* only warn once */
  339. if (__this_cpu_read(soft_watchdog_warn) == true) {
  340. /*
  341. * When multiple processes are causing softlockups the
  342. * softlockup detector only warns on the first one
  343. * because the code relies on a full quiet cycle to
  344. * re-arm. The second process prevents the quiet cycle
  345. * and never gets reported. Use task pointers to detect
  346. * this.
  347. */
  348. if (__this_cpu_read(softlockup_task_ptr_saved) !=
  349. current) {
  350. __this_cpu_write(soft_watchdog_warn, false);
  351. __touch_watchdog();
  352. }
  353. return HRTIMER_RESTART;
  354. }
  355. if (softlockup_all_cpu_backtrace) {
  356. /* Prevent multiple soft-lockup reports if one cpu is already
  357. * engaged in dumping cpu back traces
  358. */
  359. if (test_and_set_bit(0, &soft_lockup_nmi_warn)) {
  360. /* Someone else will report us. Let's give up */
  361. __this_cpu_write(soft_watchdog_warn, true);
  362. return HRTIMER_RESTART;
  363. }
  364. }
  365. pr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
  366. smp_processor_id(), duration,
  367. current->comm, task_pid_nr(current));
  368. __this_cpu_write(softlockup_task_ptr_saved, current);
  369. print_modules();
  370. print_irqtrace_events(current);
  371. if (regs)
  372. show_regs(regs);
  373. else
  374. dump_stack();
  375. if (softlockup_all_cpu_backtrace) {
  376. /* Avoid generating two back traces for current
  377. * given that one is already made above
  378. */
  379. trigger_allbutself_cpu_backtrace();
  380. clear_bit(0, &soft_lockup_nmi_warn);
  381. /* Barrier to sync with other cpus */
  382. smp_mb__after_atomic();
  383. }
  384. add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
  385. if (softlockup_panic)
  386. panic("softlockup: hung tasks");
  387. __this_cpu_write(soft_watchdog_warn, true);
  388. } else
  389. __this_cpu_write(soft_watchdog_warn, false);
  390. return HRTIMER_RESTART;
  391. }
  392. static void watchdog_set_prio(unsigned int policy, unsigned int prio)
  393. {
  394. struct sched_param param = { .sched_priority = prio };
  395. sched_setscheduler(current, policy, &param);
  396. }
  397. static void watchdog_enable(unsigned int cpu)
  398. {
  399. struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer);
  400. /*
  401. * Start the timer first to prevent the NMI watchdog triggering
  402. * before the timer has a chance to fire.
  403. */
  404. hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  405. hrtimer->function = watchdog_timer_fn;
  406. hrtimer_start(hrtimer, ns_to_ktime(sample_period),
  407. HRTIMER_MODE_REL_PINNED);
  408. /* Initialize timestamp */
  409. __touch_watchdog();
  410. /* Enable the perf event */
  411. if (watchdog_enabled & NMI_WATCHDOG_ENABLED)
  412. watchdog_nmi_enable(cpu);
  413. watchdog_set_prio(SCHED_FIFO, MAX_RT_PRIO - 1);
  414. }
  415. static void watchdog_disable(unsigned int cpu)
  416. {
  417. struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer);
  418. watchdog_set_prio(SCHED_NORMAL, 0);
  419. /*
  420. * Disable the perf event first. That prevents that a large delay
  421. * between disabling the timer and disabling the perf event causes
  422. * the perf NMI to detect a false positive.
  423. */
  424. watchdog_nmi_disable(cpu);
  425. hrtimer_cancel(hrtimer);
  426. }
  427. static void watchdog_cleanup(unsigned int cpu, bool online)
  428. {
  429. watchdog_disable(cpu);
  430. }
  431. static int watchdog_should_run(unsigned int cpu)
  432. {
  433. return __this_cpu_read(hrtimer_interrupts) !=
  434. __this_cpu_read(soft_lockup_hrtimer_cnt);
  435. }
  436. /*
  437. * The watchdog thread function - touches the timestamp.
  438. *
  439. * It only runs once every sample_period seconds (4 seconds by
  440. * default) to reset the softlockup timestamp. If this gets delayed
  441. * for more than 2*watchdog_thresh seconds then the debug-printout
  442. * triggers in watchdog_timer_fn().
  443. */
  444. static void watchdog(unsigned int cpu)
  445. {
  446. __this_cpu_write(soft_lockup_hrtimer_cnt,
  447. __this_cpu_read(hrtimer_interrupts));
  448. __touch_watchdog();
  449. }
  450. static struct smp_hotplug_thread watchdog_threads = {
  451. .store = &softlockup_watchdog,
  452. .thread_should_run = watchdog_should_run,
  453. .thread_fn = watchdog,
  454. .thread_comm = "watchdog/%u",
  455. .setup = watchdog_enable,
  456. .cleanup = watchdog_cleanup,
  457. .park = watchdog_disable,
  458. .unpark = watchdog_enable,
  459. };
  460. static void softlockup_update_smpboot_threads(void)
  461. {
  462. lockdep_assert_held(&watchdog_mutex);
  463. if (!softlockup_threads_initialized)
  464. return;
  465. smpboot_update_cpumask_percpu_thread(&watchdog_threads,
  466. &watchdog_allowed_mask);
  467. }
  468. /* Temporarily park all watchdog threads */
  469. static void softlockup_park_all_threads(void)
  470. {
  471. cpumask_clear(&watchdog_allowed_mask);
  472. softlockup_update_smpboot_threads();
  473. }
  474. /* Unpark enabled threads */
  475. static void softlockup_unpark_threads(void)
  476. {
  477. cpumask_copy(&watchdog_allowed_mask, &watchdog_cpumask);
  478. softlockup_update_smpboot_threads();
  479. }
  480. static void lockup_detector_reconfigure(void)
  481. {
  482. cpus_read_lock();
  483. watchdog_nmi_stop();
  484. softlockup_park_all_threads();
  485. set_sample_period();
  486. lockup_detector_update_enable();
  487. if (watchdog_enabled && watchdog_thresh)
  488. softlockup_unpark_threads();
  489. watchdog_nmi_start();
  490. cpus_read_unlock();
  491. /*
  492. * Must be called outside the cpus locked section to prevent
  493. * recursive locking in the perf code.
  494. */
  495. __lockup_detector_cleanup();
  496. }
  497. /*
  498. * Create the watchdog thread infrastructure and configure the detector(s).
  499. *
  500. * The threads are not unparked as watchdog_allowed_mask is empty. When
  501. * the threads are sucessfully initialized, take the proper locks and
  502. * unpark the threads in the watchdog_cpumask if the watchdog is enabled.
  503. */
  504. static __init void lockup_detector_setup(void)
  505. {
  506. int ret;
  507. /*
  508. * If sysctl is off and watchdog got disabled on the command line,
  509. * nothing to do here.
  510. */
  511. lockup_detector_update_enable();
  512. if (!IS_ENABLED(CONFIG_SYSCTL) &&
  513. !(watchdog_enabled && watchdog_thresh))
  514. return;
  515. ret = smpboot_register_percpu_thread_cpumask(&watchdog_threads,
  516. &watchdog_allowed_mask);
  517. if (ret) {
  518. pr_err("Failed to initialize soft lockup detector threads\n");
  519. return;
  520. }
  521. mutex_lock(&watchdog_mutex);
  522. softlockup_threads_initialized = true;
  523. lockup_detector_reconfigure();
  524. mutex_unlock(&watchdog_mutex);
  525. }
  526. #else /* CONFIG_SOFTLOCKUP_DETECTOR */
  527. static inline int watchdog_park_threads(void) { return 0; }
  528. static inline void watchdog_unpark_threads(void) { }
  529. static inline int watchdog_enable_all_cpus(void) { return 0; }
  530. static inline void watchdog_disable_all_cpus(void) { }
  531. static void lockup_detector_reconfigure(void)
  532. {
  533. cpus_read_lock();
  534. watchdog_nmi_stop();
  535. lockup_detector_update_enable();
  536. watchdog_nmi_start();
  537. cpus_read_unlock();
  538. }
  539. static inline void lockup_detector_setup(void)
  540. {
  541. lockup_detector_reconfigure();
  542. }
  543. #endif /* !CONFIG_SOFTLOCKUP_DETECTOR */
  544. static void __lockup_detector_cleanup(void)
  545. {
  546. lockdep_assert_held(&watchdog_mutex);
  547. hardlockup_detector_perf_cleanup();
  548. }
  549. /**
  550. * lockup_detector_cleanup - Cleanup after cpu hotplug or sysctl changes
  551. *
  552. * Caller must not hold the cpu hotplug rwsem.
  553. */
  554. void lockup_detector_cleanup(void)
  555. {
  556. mutex_lock(&watchdog_mutex);
  557. __lockup_detector_cleanup();
  558. mutex_unlock(&watchdog_mutex);
  559. }
  560. /**
  561. * lockup_detector_soft_poweroff - Interface to stop lockup detector(s)
  562. *
  563. * Special interface for parisc. It prevents lockup detector warnings from
  564. * the default pm_poweroff() function which busy loops forever.
  565. */
  566. void lockup_detector_soft_poweroff(void)
  567. {
  568. watchdog_enabled = 0;
  569. }
  570. #ifdef CONFIG_SYSCTL
  571. /* Propagate any changes to the watchdog threads */
  572. static void proc_watchdog_update(void)
  573. {
  574. /* Remove impossible cpus to keep sysctl output clean. */
  575. cpumask_and(&watchdog_cpumask, &watchdog_cpumask, cpu_possible_mask);
  576. lockup_detector_reconfigure();
  577. }
  578. /*
  579. * common function for watchdog, nmi_watchdog and soft_watchdog parameter
  580. *
  581. * caller | table->data points to | 'which'
  582. * -------------------|----------------------------|--------------------------
  583. * proc_watchdog | watchdog_user_enabled | NMI_WATCHDOG_ENABLED |
  584. * | | SOFT_WATCHDOG_ENABLED
  585. * -------------------|----------------------------|--------------------------
  586. * proc_nmi_watchdog | nmi_watchdog_user_enabled | NMI_WATCHDOG_ENABLED
  587. * -------------------|----------------------------|--------------------------
  588. * proc_soft_watchdog | soft_watchdog_user_enabled | SOFT_WATCHDOG_ENABLED
  589. */
  590. static int proc_watchdog_common(int which, struct ctl_table *table, int write,
  591. void __user *buffer, size_t *lenp, loff_t *ppos)
  592. {
  593. int err, old, *param = table->data;
  594. mutex_lock(&watchdog_mutex);
  595. if (!write) {
  596. /*
  597. * On read synchronize the userspace interface. This is a
  598. * racy snapshot.
  599. */
  600. *param = (watchdog_enabled & which) != 0;
  601. err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
  602. } else {
  603. old = READ_ONCE(*param);
  604. err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
  605. if (!err && old != READ_ONCE(*param))
  606. proc_watchdog_update();
  607. }
  608. mutex_unlock(&watchdog_mutex);
  609. return err;
  610. }
  611. /*
  612. * /proc/sys/kernel/watchdog
  613. */
  614. int proc_watchdog(struct ctl_table *table, int write,
  615. void __user *buffer, size_t *lenp, loff_t *ppos)
  616. {
  617. return proc_watchdog_common(NMI_WATCHDOG_ENABLED|SOFT_WATCHDOG_ENABLED,
  618. table, write, buffer, lenp, ppos);
  619. }
  620. /*
  621. * /proc/sys/kernel/nmi_watchdog
  622. */
  623. int proc_nmi_watchdog(struct ctl_table *table, int write,
  624. void __user *buffer, size_t *lenp, loff_t *ppos)
  625. {
  626. if (!nmi_watchdog_available && write)
  627. return -ENOTSUPP;
  628. return proc_watchdog_common(NMI_WATCHDOG_ENABLED,
  629. table, write, buffer, lenp, ppos);
  630. }
  631. /*
  632. * /proc/sys/kernel/soft_watchdog
  633. */
  634. int proc_soft_watchdog(struct ctl_table *table, int write,
  635. void __user *buffer, size_t *lenp, loff_t *ppos)
  636. {
  637. return proc_watchdog_common(SOFT_WATCHDOG_ENABLED,
  638. table, write, buffer, lenp, ppos);
  639. }
  640. /*
  641. * /proc/sys/kernel/watchdog_thresh
  642. */
  643. int proc_watchdog_thresh(struct ctl_table *table, int write,
  644. void __user *buffer, size_t *lenp, loff_t *ppos)
  645. {
  646. int err, old;
  647. mutex_lock(&watchdog_mutex);
  648. old = READ_ONCE(watchdog_thresh);
  649. err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
  650. if (!err && write && old != READ_ONCE(watchdog_thresh))
  651. proc_watchdog_update();
  652. mutex_unlock(&watchdog_mutex);
  653. return err;
  654. }
  655. /*
  656. * The cpumask is the mask of possible cpus that the watchdog can run
  657. * on, not the mask of cpus it is actually running on. This allows the
  658. * user to specify a mask that will include cpus that have not yet
  659. * been brought online, if desired.
  660. */
  661. int proc_watchdog_cpumask(struct ctl_table *table, int write,
  662. void __user *buffer, size_t *lenp, loff_t *ppos)
  663. {
  664. int err;
  665. mutex_lock(&watchdog_mutex);
  666. err = proc_do_large_bitmap(table, write, buffer, lenp, ppos);
  667. if (!err && write)
  668. proc_watchdog_update();
  669. mutex_unlock(&watchdog_mutex);
  670. return err;
  671. }
  672. #endif /* CONFIG_SYSCTL */
  673. void __init lockup_detector_init(void)
  674. {
  675. #ifdef CONFIG_NO_HZ_FULL
  676. if (tick_nohz_full_enabled()) {
  677. pr_info("Disabling watchdog on nohz_full cores by default\n");
  678. cpumask_copy(&watchdog_cpumask, housekeeping_mask);
  679. } else
  680. cpumask_copy(&watchdog_cpumask, cpu_possible_mask);
  681. #else
  682. cpumask_copy(&watchdog_cpumask, cpu_possible_mask);
  683. #endif
  684. if (!watchdog_nmi_probe())
  685. nmi_watchdog_available = true;
  686. lockup_detector_setup();
  687. }