tree_exp.h 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740
  1. /*
  2. * RCU expedited grace periods
  3. *
  4. * This program is free software; you can redistribute it and/or modify
  5. * it under the terms of the GNU General Public License as published by
  6. * the Free Software Foundation; either version 2 of the License, or
  7. * (at your option) any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program; if not, you can access it online at
  16. * http://www.gnu.org/licenses/gpl-2.0.html.
  17. *
  18. * Copyright IBM Corporation, 2016
  19. *
  20. * Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
  21. */
  22. /*
  23. * Record the start of an expedited grace period.
  24. */
  25. static void rcu_exp_gp_seq_start(struct rcu_state *rsp)
  26. {
  27. rcu_seq_start(&rsp->expedited_sequence);
  28. }
  29. /*
  30. * Record the end of an expedited grace period.
  31. */
  32. static void rcu_exp_gp_seq_end(struct rcu_state *rsp)
  33. {
  34. rcu_seq_end(&rsp->expedited_sequence);
  35. smp_mb(); /* Ensure that consecutive grace periods serialize. */
  36. }
  37. /*
  38. * Take a snapshot of the expedited-grace-period counter.
  39. */
  40. static unsigned long rcu_exp_gp_seq_snap(struct rcu_state *rsp)
  41. {
  42. unsigned long s;
  43. smp_mb(); /* Caller's modifications seen first by other CPUs. */
  44. s = rcu_seq_snap(&rsp->expedited_sequence);
  45. trace_rcu_exp_grace_period(rsp->name, s, TPS("snap"));
  46. return s;
  47. }
  48. /*
  49. * Given a counter snapshot from rcu_exp_gp_seq_snap(), return true
  50. * if a full expedited grace period has elapsed since that snapshot
  51. * was taken.
  52. */
  53. static bool rcu_exp_gp_seq_done(struct rcu_state *rsp, unsigned long s)
  54. {
  55. return rcu_seq_done(&rsp->expedited_sequence, s);
  56. }
  57. /*
  58. * Reset the ->expmaskinit values in the rcu_node tree to reflect any
  59. * recent CPU-online activity. Note that these masks are not cleared
  60. * when CPUs go offline, so they reflect the union of all CPUs that have
  61. * ever been online. This means that this function normally takes its
  62. * no-work-to-do fastpath.
  63. */
  64. static void sync_exp_reset_tree_hotplug(struct rcu_state *rsp)
  65. {
  66. bool done;
  67. unsigned long flags;
  68. unsigned long mask;
  69. unsigned long oldmask;
  70. int ncpus = smp_load_acquire(&rsp->ncpus); /* Order against locking. */
  71. struct rcu_node *rnp;
  72. struct rcu_node *rnp_up;
  73. /* If no new CPUs onlined since last time, nothing to do. */
  74. if (likely(ncpus == rsp->ncpus_snap))
  75. return;
  76. rsp->ncpus_snap = ncpus;
  77. /*
  78. * Each pass through the following loop propagates newly onlined
  79. * CPUs for the current rcu_node structure up the rcu_node tree.
  80. */
  81. rcu_for_each_leaf_node(rsp, rnp) {
  82. raw_spin_lock_irqsave_rcu_node(rnp, flags);
  83. if (rnp->expmaskinit == rnp->expmaskinitnext) {
  84. raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
  85. continue; /* No new CPUs, nothing to do. */
  86. }
  87. /* Update this node's mask, track old value for propagation. */
  88. oldmask = rnp->expmaskinit;
  89. rnp->expmaskinit = rnp->expmaskinitnext;
  90. raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
  91. /* If was already nonzero, nothing to propagate. */
  92. if (oldmask)
  93. continue;
  94. /* Propagate the new CPU up the tree. */
  95. mask = rnp->grpmask;
  96. rnp_up = rnp->parent;
  97. done = false;
  98. while (rnp_up) {
  99. raw_spin_lock_irqsave_rcu_node(rnp_up, flags);
  100. if (rnp_up->expmaskinit)
  101. done = true;
  102. rnp_up->expmaskinit |= mask;
  103. raw_spin_unlock_irqrestore_rcu_node(rnp_up, flags);
  104. if (done)
  105. break;
  106. mask = rnp_up->grpmask;
  107. rnp_up = rnp_up->parent;
  108. }
  109. }
  110. }
  111. /*
  112. * Reset the ->expmask values in the rcu_node tree in preparation for
  113. * a new expedited grace period.
  114. */
  115. static void __maybe_unused sync_exp_reset_tree(struct rcu_state *rsp)
  116. {
  117. unsigned long flags;
  118. struct rcu_node *rnp;
  119. sync_exp_reset_tree_hotplug(rsp);
  120. rcu_for_each_node_breadth_first(rsp, rnp) {
  121. raw_spin_lock_irqsave_rcu_node(rnp, flags);
  122. WARN_ON_ONCE(rnp->expmask);
  123. rnp->expmask = rnp->expmaskinit;
  124. raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
  125. }
  126. }
  127. /*
  128. * Return non-zero if there is no RCU expedited grace period in progress
  129. * for the specified rcu_node structure, in other words, if all CPUs and
  130. * tasks covered by the specified rcu_node structure have done their bit
  131. * for the current expedited grace period. Works only for preemptible
  132. * RCU -- other RCU implementation use other means.
  133. *
  134. * Caller must hold the rcu_state's exp_mutex.
  135. */
  136. static bool sync_rcu_preempt_exp_done(struct rcu_node *rnp)
  137. {
  138. return rnp->exp_tasks == NULL &&
  139. READ_ONCE(rnp->expmask) == 0;
  140. }
  141. /*
  142. * Report the exit from RCU read-side critical section for the last task
  143. * that queued itself during or before the current expedited preemptible-RCU
  144. * grace period. This event is reported either to the rcu_node structure on
  145. * which the task was queued or to one of that rcu_node structure's ancestors,
  146. * recursively up the tree. (Calm down, calm down, we do the recursion
  147. * iteratively!)
  148. *
  149. * Caller must hold the rcu_state's exp_mutex and the specified rcu_node
  150. * structure's ->lock.
  151. */
  152. static void __rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
  153. bool wake, unsigned long flags)
  154. __releases(rnp->lock)
  155. {
  156. unsigned long mask;
  157. for (;;) {
  158. if (!sync_rcu_preempt_exp_done(rnp)) {
  159. if (!rnp->expmask)
  160. rcu_initiate_boost(rnp, flags);
  161. else
  162. raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
  163. break;
  164. }
  165. if (rnp->parent == NULL) {
  166. raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
  167. if (wake) {
  168. smp_mb(); /* EGP done before wake_up(). */
  169. swake_up(&rsp->expedited_wq);
  170. }
  171. break;
  172. }
  173. mask = rnp->grpmask;
  174. raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled */
  175. rnp = rnp->parent;
  176. raw_spin_lock_rcu_node(rnp); /* irqs already disabled */
  177. WARN_ON_ONCE(!(rnp->expmask & mask));
  178. rnp->expmask &= ~mask;
  179. }
  180. }
  181. /*
  182. * Report expedited quiescent state for specified node. This is a
  183. * lock-acquisition wrapper function for __rcu_report_exp_rnp().
  184. *
  185. * Caller must hold the rcu_state's exp_mutex.
  186. */
  187. static void __maybe_unused rcu_report_exp_rnp(struct rcu_state *rsp,
  188. struct rcu_node *rnp, bool wake)
  189. {
  190. unsigned long flags;
  191. raw_spin_lock_irqsave_rcu_node(rnp, flags);
  192. __rcu_report_exp_rnp(rsp, rnp, wake, flags);
  193. }
  194. /*
  195. * Report expedited quiescent state for multiple CPUs, all covered by the
  196. * specified leaf rcu_node structure. Caller must hold the rcu_state's
  197. * exp_mutex.
  198. */
  199. static void rcu_report_exp_cpu_mult(struct rcu_state *rsp, struct rcu_node *rnp,
  200. unsigned long mask, bool wake)
  201. {
  202. unsigned long flags;
  203. raw_spin_lock_irqsave_rcu_node(rnp, flags);
  204. if (!(rnp->expmask & mask)) {
  205. raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
  206. return;
  207. }
  208. rnp->expmask &= ~mask;
  209. __rcu_report_exp_rnp(rsp, rnp, wake, flags); /* Releases rnp->lock. */
  210. }
  211. /*
  212. * Report expedited quiescent state for specified rcu_data (CPU).
  213. */
  214. static void rcu_report_exp_rdp(struct rcu_state *rsp, struct rcu_data *rdp,
  215. bool wake)
  216. {
  217. rcu_report_exp_cpu_mult(rsp, rdp->mynode, rdp->grpmask, wake);
  218. }
  219. /* Common code for synchronize_{rcu,sched}_expedited() work-done checking. */
  220. static bool sync_exp_work_done(struct rcu_state *rsp, atomic_long_t *stat,
  221. unsigned long s)
  222. {
  223. if (rcu_exp_gp_seq_done(rsp, s)) {
  224. trace_rcu_exp_grace_period(rsp->name, s, TPS("done"));
  225. /* Ensure test happens before caller kfree(). */
  226. smp_mb__before_atomic(); /* ^^^ */
  227. atomic_long_inc(stat);
  228. return true;
  229. }
  230. return false;
  231. }
  232. /*
  233. * Funnel-lock acquisition for expedited grace periods. Returns true
  234. * if some other task completed an expedited grace period that this task
  235. * can piggy-back on, and with no mutex held. Otherwise, returns false
  236. * with the mutex held, indicating that the caller must actually do the
  237. * expedited grace period.
  238. */
  239. static bool exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
  240. {
  241. struct rcu_data *rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id());
  242. struct rcu_node *rnp = rdp->mynode;
  243. struct rcu_node *rnp_root = rcu_get_root(rsp);
  244. /* Low-contention fastpath. */
  245. if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s) &&
  246. (rnp == rnp_root ||
  247. ULONG_CMP_LT(READ_ONCE(rnp_root->exp_seq_rq), s)) &&
  248. mutex_trylock(&rsp->exp_mutex))
  249. goto fastpath;
  250. /*
  251. * Each pass through the following loop works its way up
  252. * the rcu_node tree, returning if others have done the work or
  253. * otherwise falls through to acquire rsp->exp_mutex. The mapping
  254. * from CPU to rcu_node structure can be inexact, as it is just
  255. * promoting locality and is not strictly needed for correctness.
  256. */
  257. for (; rnp != NULL; rnp = rnp->parent) {
  258. if (sync_exp_work_done(rsp, &rdp->exp_workdone1, s))
  259. return true;
  260. /* Work not done, either wait here or go up. */
  261. spin_lock(&rnp->exp_lock);
  262. if (ULONG_CMP_GE(rnp->exp_seq_rq, s)) {
  263. /* Someone else doing GP, so wait for them. */
  264. spin_unlock(&rnp->exp_lock);
  265. trace_rcu_exp_funnel_lock(rsp->name, rnp->level,
  266. rnp->grplo, rnp->grphi,
  267. TPS("wait"));
  268. wait_event(rnp->exp_wq[rcu_seq_ctr(s) & 0x3],
  269. sync_exp_work_done(rsp,
  270. &rdp->exp_workdone2, s));
  271. return true;
  272. }
  273. rnp->exp_seq_rq = s; /* Followers can wait on us. */
  274. spin_unlock(&rnp->exp_lock);
  275. trace_rcu_exp_funnel_lock(rsp->name, rnp->level, rnp->grplo,
  276. rnp->grphi, TPS("nxtlvl"));
  277. }
  278. mutex_lock(&rsp->exp_mutex);
  279. fastpath:
  280. if (sync_exp_work_done(rsp, &rdp->exp_workdone3, s)) {
  281. mutex_unlock(&rsp->exp_mutex);
  282. return true;
  283. }
  284. rcu_exp_gp_seq_start(rsp);
  285. trace_rcu_exp_grace_period(rsp->name, s, TPS("start"));
  286. return false;
  287. }
  288. /* Invoked on each online non-idle CPU for expedited quiescent state. */
  289. static void sync_sched_exp_handler(void *data)
  290. {
  291. struct rcu_data *rdp;
  292. struct rcu_node *rnp;
  293. struct rcu_state *rsp = data;
  294. rdp = this_cpu_ptr(rsp->rda);
  295. rnp = rdp->mynode;
  296. if (!(READ_ONCE(rnp->expmask) & rdp->grpmask) ||
  297. __this_cpu_read(rcu_sched_data.cpu_no_qs.b.exp))
  298. return;
  299. if (rcu_is_cpu_rrupt_from_idle()) {
  300. rcu_report_exp_rdp(&rcu_sched_state,
  301. this_cpu_ptr(&rcu_sched_data), true);
  302. return;
  303. }
  304. __this_cpu_write(rcu_sched_data.cpu_no_qs.b.exp, true);
  305. /* Store .exp before .rcu_urgent_qs. */
  306. smp_store_release(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs), true);
  307. resched_cpu(smp_processor_id());
  308. }
  309. /* Send IPI for expedited cleanup if needed at end of CPU-hotplug operation. */
  310. static void sync_sched_exp_online_cleanup(int cpu)
  311. {
  312. struct rcu_data *rdp;
  313. int ret;
  314. struct rcu_node *rnp;
  315. struct rcu_state *rsp = &rcu_sched_state;
  316. rdp = per_cpu_ptr(rsp->rda, cpu);
  317. rnp = rdp->mynode;
  318. if (!(READ_ONCE(rnp->expmask) & rdp->grpmask))
  319. return;
  320. ret = smp_call_function_single(cpu, sync_sched_exp_handler, rsp, 0);
  321. WARN_ON_ONCE(ret);
  322. }
  323. /*
  324. * Select the nodes that the upcoming expedited grace period needs
  325. * to wait for.
  326. */
  327. static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
  328. smp_call_func_t func)
  329. {
  330. int cpu;
  331. unsigned long flags;
  332. unsigned long mask_ofl_test;
  333. unsigned long mask_ofl_ipi;
  334. int ret;
  335. struct rcu_node *rnp;
  336. sync_exp_reset_tree(rsp);
  337. rcu_for_each_leaf_node(rsp, rnp) {
  338. raw_spin_lock_irqsave_rcu_node(rnp, flags);
  339. /* Each pass checks a CPU for identity, offline, and idle. */
  340. mask_ofl_test = 0;
  341. for_each_leaf_node_possible_cpu(rnp, cpu) {
  342. struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
  343. rdp->exp_dynticks_snap =
  344. rcu_dynticks_snap(rdp->dynticks);
  345. if (raw_smp_processor_id() == cpu ||
  346. rcu_dynticks_in_eqs(rdp->exp_dynticks_snap) ||
  347. !(rnp->qsmaskinitnext & rdp->grpmask))
  348. mask_ofl_test |= rdp->grpmask;
  349. }
  350. mask_ofl_ipi = rnp->expmask & ~mask_ofl_test;
  351. /*
  352. * Need to wait for any blocked tasks as well. Note that
  353. * additional blocking tasks will also block the expedited
  354. * GP until such time as the ->expmask bits are cleared.
  355. */
  356. if (rcu_preempt_has_tasks(rnp))
  357. rnp->exp_tasks = rnp->blkd_tasks.next;
  358. raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
  359. /* IPI the remaining CPUs for expedited quiescent state. */
  360. for_each_leaf_node_possible_cpu(rnp, cpu) {
  361. unsigned long mask = leaf_node_cpu_bit(rnp, cpu);
  362. struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
  363. if (!(mask_ofl_ipi & mask))
  364. continue;
  365. retry_ipi:
  366. if (rcu_dynticks_in_eqs_since(rdp->dynticks,
  367. rdp->exp_dynticks_snap)) {
  368. mask_ofl_test |= mask;
  369. continue;
  370. }
  371. ret = smp_call_function_single(cpu, func, rsp, 0);
  372. if (!ret) {
  373. mask_ofl_ipi &= ~mask;
  374. continue;
  375. }
  376. /* Failed, raced with CPU hotplug operation. */
  377. raw_spin_lock_irqsave_rcu_node(rnp, flags);
  378. if ((rnp->qsmaskinitnext & mask) &&
  379. (rnp->expmask & mask)) {
  380. /* Online, so delay for a bit and try again. */
  381. raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
  382. schedule_timeout_uninterruptible(1);
  383. goto retry_ipi;
  384. }
  385. /* CPU really is offline, so we can ignore it. */
  386. if (!(rnp->expmask & mask))
  387. mask_ofl_ipi &= ~mask;
  388. raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
  389. }
  390. /* Report quiescent states for those that went offline. */
  391. mask_ofl_test |= mask_ofl_ipi;
  392. if (mask_ofl_test)
  393. rcu_report_exp_cpu_mult(rsp, rnp, mask_ofl_test, false);
  394. }
  395. }
  396. static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
  397. {
  398. int cpu;
  399. unsigned long jiffies_stall;
  400. unsigned long jiffies_start;
  401. unsigned long mask;
  402. int ndetected;
  403. struct rcu_node *rnp;
  404. struct rcu_node *rnp_root = rcu_get_root(rsp);
  405. int ret;
  406. jiffies_stall = rcu_jiffies_till_stall_check();
  407. jiffies_start = jiffies;
  408. for (;;) {
  409. ret = swait_event_timeout(
  410. rsp->expedited_wq,
  411. sync_rcu_preempt_exp_done(rnp_root),
  412. jiffies_stall);
  413. if (ret > 0 || sync_rcu_preempt_exp_done(rnp_root))
  414. return;
  415. WARN_ON(ret < 0); /* workqueues should not be signaled. */
  416. if (rcu_cpu_stall_suppress)
  417. continue;
  418. panic_on_rcu_stall();
  419. pr_err("INFO: %s detected expedited stalls on CPUs/tasks: {",
  420. rsp->name);
  421. ndetected = 0;
  422. rcu_for_each_leaf_node(rsp, rnp) {
  423. ndetected += rcu_print_task_exp_stall(rnp);
  424. for_each_leaf_node_possible_cpu(rnp, cpu) {
  425. struct rcu_data *rdp;
  426. mask = leaf_node_cpu_bit(rnp, cpu);
  427. if (!(rnp->expmask & mask))
  428. continue;
  429. ndetected++;
  430. rdp = per_cpu_ptr(rsp->rda, cpu);
  431. pr_cont(" %d-%c%c%c", cpu,
  432. "O."[!!cpu_online(cpu)],
  433. "o."[!!(rdp->grpmask & rnp->expmaskinit)],
  434. "N."[!!(rdp->grpmask & rnp->expmaskinitnext)]);
  435. }
  436. }
  437. pr_cont(" } %lu jiffies s: %lu root: %#lx/%c\n",
  438. jiffies - jiffies_start, rsp->expedited_sequence,
  439. rnp_root->expmask, ".T"[!!rnp_root->exp_tasks]);
  440. if (ndetected) {
  441. pr_err("blocking rcu_node structures:");
  442. rcu_for_each_node_breadth_first(rsp, rnp) {
  443. if (rnp == rnp_root)
  444. continue; /* printed unconditionally */
  445. if (sync_rcu_preempt_exp_done(rnp))
  446. continue;
  447. pr_cont(" l=%u:%d-%d:%#lx/%c",
  448. rnp->level, rnp->grplo, rnp->grphi,
  449. rnp->expmask,
  450. ".T"[!!rnp->exp_tasks]);
  451. }
  452. pr_cont("\n");
  453. }
  454. rcu_for_each_leaf_node(rsp, rnp) {
  455. for_each_leaf_node_possible_cpu(rnp, cpu) {
  456. mask = leaf_node_cpu_bit(rnp, cpu);
  457. if (!(rnp->expmask & mask))
  458. continue;
  459. dump_cpu_task(cpu);
  460. }
  461. }
  462. jiffies_stall = 3 * rcu_jiffies_till_stall_check() + 3;
  463. }
  464. }
  465. /*
  466. * Wait for the current expedited grace period to complete, and then
  467. * wake up everyone who piggybacked on the just-completed expedited
  468. * grace period. Also update all the ->exp_seq_rq counters as needed
  469. * in order to avoid counter-wrap problems.
  470. */
  471. static void rcu_exp_wait_wake(struct rcu_state *rsp, unsigned long s)
  472. {
  473. struct rcu_node *rnp;
  474. synchronize_sched_expedited_wait(rsp);
  475. // Switch over to wakeup mode, allowing the next GP to proceed.
  476. // End the previous grace period only after acquiring the mutex
  477. // to ensure that only one GP runs concurrently with wakeups.
  478. mutex_lock(&rsp->exp_wake_mutex);
  479. rcu_exp_gp_seq_end(rsp);
  480. trace_rcu_exp_grace_period(rsp->name, s, TPS("end"));
  481. rcu_for_each_node_breadth_first(rsp, rnp) {
  482. if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s)) {
  483. spin_lock(&rnp->exp_lock);
  484. /* Recheck, avoid hang in case someone just arrived. */
  485. if (ULONG_CMP_LT(rnp->exp_seq_rq, s))
  486. rnp->exp_seq_rq = s;
  487. spin_unlock(&rnp->exp_lock);
  488. }
  489. smp_mb(); /* All above changes before wakeup. */
  490. wake_up_all(&rnp->exp_wq[rcu_seq_ctr(s) & 0x3]);
  491. }
  492. trace_rcu_exp_grace_period(rsp->name, s, TPS("endwake"));
  493. mutex_unlock(&rsp->exp_wake_mutex);
  494. }
  495. /* Let the workqueue handler know what it is supposed to do. */
  496. struct rcu_exp_work {
  497. smp_call_func_t rew_func;
  498. struct rcu_state *rew_rsp;
  499. unsigned long rew_s;
  500. struct work_struct rew_work;
  501. };
  502. /*
  503. * Common code to drive an expedited grace period forward, used by
  504. * workqueues and mid-boot-time tasks.
  505. */
  506. static void rcu_exp_sel_wait_wake(struct rcu_state *rsp,
  507. smp_call_func_t func, unsigned long s)
  508. {
  509. /* Initialize the rcu_node tree in preparation for the wait. */
  510. sync_rcu_exp_select_cpus(rsp, func);
  511. /* Wait and clean up, including waking everyone. */
  512. rcu_exp_wait_wake(rsp, s);
  513. }
  514. /*
  515. * Work-queue handler to drive an expedited grace period forward.
  516. */
  517. static void wait_rcu_exp_gp(struct work_struct *wp)
  518. {
  519. struct rcu_exp_work *rewp;
  520. rewp = container_of(wp, struct rcu_exp_work, rew_work);
  521. rcu_exp_sel_wait_wake(rewp->rew_rsp, rewp->rew_func, rewp->rew_s);
  522. }
  523. /*
  524. * Given an rcu_state pointer and a smp_call_function() handler, kick
  525. * off the specified flavor of expedited grace period.
  526. */
  527. static void _synchronize_rcu_expedited(struct rcu_state *rsp,
  528. smp_call_func_t func)
  529. {
  530. struct rcu_data *rdp;
  531. struct rcu_exp_work rew;
  532. struct rcu_node *rnp;
  533. unsigned long s;
  534. /* If expedited grace periods are prohibited, fall back to normal. */
  535. if (rcu_gp_is_normal()) {
  536. wait_rcu_gp(rsp->call);
  537. return;
  538. }
  539. /* Take a snapshot of the sequence number. */
  540. s = rcu_exp_gp_seq_snap(rsp);
  541. if (exp_funnel_lock(rsp, s))
  542. return; /* Someone else did our work for us. */
  543. /* Ensure that load happens before action based on it. */
  544. if (unlikely(rcu_scheduler_active == RCU_SCHEDULER_INIT)) {
  545. /* Direct call during scheduler init and early_initcalls(). */
  546. rcu_exp_sel_wait_wake(rsp, func, s);
  547. } else {
  548. /* Marshall arguments & schedule the expedited grace period. */
  549. rew.rew_func = func;
  550. rew.rew_rsp = rsp;
  551. rew.rew_s = s;
  552. INIT_WORK_ONSTACK(&rew.rew_work, wait_rcu_exp_gp);
  553. schedule_work(&rew.rew_work);
  554. }
  555. /* Wait for expedited grace period to complete. */
  556. rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id());
  557. rnp = rcu_get_root(rsp);
  558. wait_event(rnp->exp_wq[rcu_seq_ctr(s) & 0x3],
  559. sync_exp_work_done(rsp, &rdp->exp_workdone0, s));
  560. smp_mb(); /* Workqueue actions happen before return. */
  561. /* Let the next expedited grace period start. */
  562. mutex_unlock(&rsp->exp_mutex);
  563. }
  564. /**
  565. * synchronize_sched_expedited - Brute-force RCU-sched grace period
  566. *
  567. * Wait for an RCU-sched grace period to elapse, but use a "big hammer"
  568. * approach to force the grace period to end quickly. This consumes
  569. * significant time on all CPUs and is unfriendly to real-time workloads,
  570. * so is thus not recommended for any sort of common-case code. In fact,
  571. * if you are using synchronize_sched_expedited() in a loop, please
  572. * restructure your code to batch your updates, and then use a single
  573. * synchronize_sched() instead.
  574. *
  575. * This implementation can be thought of as an application of sequence
  576. * locking to expedited grace periods, but using the sequence counter to
  577. * determine when someone else has already done the work instead of for
  578. * retrying readers.
  579. */
  580. void synchronize_sched_expedited(void)
  581. {
  582. struct rcu_state *rsp = &rcu_sched_state;
  583. RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) ||
  584. lock_is_held(&rcu_lock_map) ||
  585. lock_is_held(&rcu_sched_lock_map),
  586. "Illegal synchronize_sched_expedited() in RCU read-side critical section");
  587. /* If only one CPU, this is automatically a grace period. */
  588. if (rcu_blocking_is_gp())
  589. return;
  590. _synchronize_rcu_expedited(rsp, sync_sched_exp_handler);
  591. }
  592. EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
  593. #ifdef CONFIG_PREEMPT_RCU
  594. /*
  595. * Remote handler for smp_call_function_single(). If there is an
  596. * RCU read-side critical section in effect, request that the
  597. * next rcu_read_unlock() record the quiescent state up the
  598. * ->expmask fields in the rcu_node tree. Otherwise, immediately
  599. * report the quiescent state.
  600. */
  601. static void sync_rcu_exp_handler(void *info)
  602. {
  603. struct rcu_data *rdp;
  604. struct rcu_state *rsp = info;
  605. struct task_struct *t = current;
  606. /*
  607. * Within an RCU read-side critical section, request that the next
  608. * rcu_read_unlock() report. Unless this RCU read-side critical
  609. * section has already blocked, in which case it is already set
  610. * up for the expedited grace period to wait on it.
  611. */
  612. if (t->rcu_read_lock_nesting > 0 &&
  613. !t->rcu_read_unlock_special.b.blocked) {
  614. t->rcu_read_unlock_special.b.exp_need_qs = true;
  615. return;
  616. }
  617. /*
  618. * We are either exiting an RCU read-side critical section (negative
  619. * values of t->rcu_read_lock_nesting) or are not in one at all
  620. * (zero value of t->rcu_read_lock_nesting). Or we are in an RCU
  621. * read-side critical section that blocked before this expedited
  622. * grace period started. Either way, we can immediately report
  623. * the quiescent state.
  624. */
  625. rdp = this_cpu_ptr(rsp->rda);
  626. rcu_report_exp_rdp(rsp, rdp, true);
  627. }
  628. /**
  629. * synchronize_rcu_expedited - Brute-force RCU grace period
  630. *
  631. * Wait for an RCU-preempt grace period, but expedite it. The basic
  632. * idea is to IPI all non-idle non-nohz online CPUs. The IPI handler
  633. * checks whether the CPU is in an RCU-preempt critical section, and
  634. * if so, it sets a flag that causes the outermost rcu_read_unlock()
  635. * to report the quiescent state. On the other hand, if the CPU is
  636. * not in an RCU read-side critical section, the IPI handler reports
  637. * the quiescent state immediately.
  638. *
  639. * Although this is a greate improvement over previous expedited
  640. * implementations, it is still unfriendly to real-time workloads, so is
  641. * thus not recommended for any sort of common-case code. In fact, if
  642. * you are using synchronize_rcu_expedited() in a loop, please restructure
  643. * your code to batch your updates, and then Use a single synchronize_rcu()
  644. * instead.
  645. */
  646. void synchronize_rcu_expedited(void)
  647. {
  648. struct rcu_state *rsp = rcu_state_p;
  649. RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) ||
  650. lock_is_held(&rcu_lock_map) ||
  651. lock_is_held(&rcu_sched_lock_map),
  652. "Illegal synchronize_rcu_expedited() in RCU read-side critical section");
  653. if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE)
  654. return;
  655. _synchronize_rcu_expedited(rsp, sync_rcu_exp_handler);
  656. }
  657. EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
  658. #else /* #ifdef CONFIG_PREEMPT_RCU */
  659. /*
  660. * Wait for an rcu-preempt grace period, but make it happen quickly.
  661. * But because preemptible RCU does not exist, map to rcu-sched.
  662. */
  663. void synchronize_rcu_expedited(void)
  664. {
  665. synchronize_sched_expedited();
  666. }
  667. EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
  668. #endif /* #else #ifdef CONFIG_PREEMPT_RCU */