tree_exp.h 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716
  1. /*
  2. * RCU expedited grace periods
  3. *
  4. * This program is free software; you can redistribute it and/or modify
  5. * it under the terms of the GNU General Public License as published by
  6. * the Free Software Foundation; either version 2 of the License, or
  7. * (at your option) any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program; if not, you can access it online at
  16. * http://www.gnu.org/licenses/gpl-2.0.html.
  17. *
  18. * Copyright IBM Corporation, 2016
  19. *
  20. * Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
  21. */
  22. /* Wrapper functions for expedited grace periods. */
  23. static void rcu_exp_gp_seq_start(struct rcu_state *rsp)
  24. {
  25. rcu_seq_start(&rsp->expedited_sequence);
  26. }
  27. static void rcu_exp_gp_seq_end(struct rcu_state *rsp)
  28. {
  29. rcu_seq_end(&rsp->expedited_sequence);
  30. smp_mb(); /* Ensure that consecutive grace periods serialize. */
  31. }
  32. static unsigned long rcu_exp_gp_seq_snap(struct rcu_state *rsp)
  33. {
  34. unsigned long s;
  35. smp_mb(); /* Caller's modifications seen first by other CPUs. */
  36. s = rcu_seq_snap(&rsp->expedited_sequence);
  37. trace_rcu_exp_grace_period(rsp->name, s, TPS("snap"));
  38. return s;
  39. }
  40. static bool rcu_exp_gp_seq_done(struct rcu_state *rsp, unsigned long s)
  41. {
  42. return rcu_seq_done(&rsp->expedited_sequence, s);
  43. }
  44. /*
  45. * Reset the ->expmaskinit values in the rcu_node tree to reflect any
  46. * recent CPU-online activity. Note that these masks are not cleared
  47. * when CPUs go offline, so they reflect the union of all CPUs that have
  48. * ever been online. This means that this function normally takes its
  49. * no-work-to-do fastpath.
  50. */
  51. static void sync_exp_reset_tree_hotplug(struct rcu_state *rsp)
  52. {
  53. bool done;
  54. unsigned long flags;
  55. unsigned long mask;
  56. unsigned long oldmask;
  57. int ncpus = READ_ONCE(rsp->ncpus);
  58. struct rcu_node *rnp;
  59. struct rcu_node *rnp_up;
  60. /* If no new CPUs onlined since last time, nothing to do. */
  61. if (likely(ncpus == rsp->ncpus_snap))
  62. return;
  63. rsp->ncpus_snap = ncpus;
  64. /*
  65. * Each pass through the following loop propagates newly onlined
  66. * CPUs for the current rcu_node structure up the rcu_node tree.
  67. */
  68. rcu_for_each_leaf_node(rsp, rnp) {
  69. raw_spin_lock_irqsave_rcu_node(rnp, flags);
  70. if (rnp->expmaskinit == rnp->expmaskinitnext) {
  71. raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
  72. continue; /* No new CPUs, nothing to do. */
  73. }
  74. /* Update this node's mask, track old value for propagation. */
  75. oldmask = rnp->expmaskinit;
  76. rnp->expmaskinit = rnp->expmaskinitnext;
  77. raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
  78. /* If was already nonzero, nothing to propagate. */
  79. if (oldmask)
  80. continue;
  81. /* Propagate the new CPU up the tree. */
  82. mask = rnp->grpmask;
  83. rnp_up = rnp->parent;
  84. done = false;
  85. while (rnp_up) {
  86. raw_spin_lock_irqsave_rcu_node(rnp_up, flags);
  87. if (rnp_up->expmaskinit)
  88. done = true;
  89. rnp_up->expmaskinit |= mask;
  90. raw_spin_unlock_irqrestore_rcu_node(rnp_up, flags);
  91. if (done)
  92. break;
  93. mask = rnp_up->grpmask;
  94. rnp_up = rnp_up->parent;
  95. }
  96. }
  97. }
  98. /*
  99. * Reset the ->expmask values in the rcu_node tree in preparation for
  100. * a new expedited grace period.
  101. */
  102. static void __maybe_unused sync_exp_reset_tree(struct rcu_state *rsp)
  103. {
  104. unsigned long flags;
  105. struct rcu_node *rnp;
  106. sync_exp_reset_tree_hotplug(rsp);
  107. rcu_for_each_node_breadth_first(rsp, rnp) {
  108. raw_spin_lock_irqsave_rcu_node(rnp, flags);
  109. WARN_ON_ONCE(rnp->expmask);
  110. rnp->expmask = rnp->expmaskinit;
  111. raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
  112. }
  113. }
  114. /*
  115. * Return non-zero if there is no RCU expedited grace period in progress
  116. * for the specified rcu_node structure, in other words, if all CPUs and
  117. * tasks covered by the specified rcu_node structure have done their bit
  118. * for the current expedited grace period. Works only for preemptible
  119. * RCU -- other RCU implementation use other means.
  120. *
  121. * Caller must hold the rcu_state's exp_mutex.
  122. */
  123. static int sync_rcu_preempt_exp_done(struct rcu_node *rnp)
  124. {
  125. return rnp->exp_tasks == NULL &&
  126. READ_ONCE(rnp->expmask) == 0;
  127. }
  128. /*
  129. * Report the exit from RCU read-side critical section for the last task
  130. * that queued itself during or before the current expedited preemptible-RCU
  131. * grace period. This event is reported either to the rcu_node structure on
  132. * which the task was queued or to one of that rcu_node structure's ancestors,
  133. * recursively up the tree. (Calm down, calm down, we do the recursion
  134. * iteratively!)
  135. *
  136. * Caller must hold the rcu_state's exp_mutex and the specified rcu_node
  137. * structure's ->lock.
  138. */
  139. static void __rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
  140. bool wake, unsigned long flags)
  141. __releases(rnp->lock)
  142. {
  143. unsigned long mask;
  144. for (;;) {
  145. if (!sync_rcu_preempt_exp_done(rnp)) {
  146. if (!rnp->expmask)
  147. rcu_initiate_boost(rnp, flags);
  148. else
  149. raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
  150. break;
  151. }
  152. if (rnp->parent == NULL) {
  153. raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
  154. if (wake) {
  155. smp_mb(); /* EGP done before wake_up(). */
  156. swake_up(&rsp->expedited_wq);
  157. }
  158. break;
  159. }
  160. mask = rnp->grpmask;
  161. raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled */
  162. rnp = rnp->parent;
  163. raw_spin_lock_rcu_node(rnp); /* irqs already disabled */
  164. WARN_ON_ONCE(!(rnp->expmask & mask));
  165. rnp->expmask &= ~mask;
  166. }
  167. }
  168. /*
  169. * Report expedited quiescent state for specified node. This is a
  170. * lock-acquisition wrapper function for __rcu_report_exp_rnp().
  171. *
  172. * Caller must hold the rcu_state's exp_mutex.
  173. */
  174. static void __maybe_unused rcu_report_exp_rnp(struct rcu_state *rsp,
  175. struct rcu_node *rnp, bool wake)
  176. {
  177. unsigned long flags;
  178. raw_spin_lock_irqsave_rcu_node(rnp, flags);
  179. __rcu_report_exp_rnp(rsp, rnp, wake, flags);
  180. }
  181. /*
  182. * Report expedited quiescent state for multiple CPUs, all covered by the
  183. * specified leaf rcu_node structure. Caller must hold the rcu_state's
  184. * exp_mutex.
  185. */
  186. static void rcu_report_exp_cpu_mult(struct rcu_state *rsp, struct rcu_node *rnp,
  187. unsigned long mask, bool wake)
  188. {
  189. unsigned long flags;
  190. raw_spin_lock_irqsave_rcu_node(rnp, flags);
  191. if (!(rnp->expmask & mask)) {
  192. raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
  193. return;
  194. }
  195. rnp->expmask &= ~mask;
  196. __rcu_report_exp_rnp(rsp, rnp, wake, flags); /* Releases rnp->lock. */
  197. }
  198. /*
  199. * Report expedited quiescent state for specified rcu_data (CPU).
  200. */
  201. static void rcu_report_exp_rdp(struct rcu_state *rsp, struct rcu_data *rdp,
  202. bool wake)
  203. {
  204. rcu_report_exp_cpu_mult(rsp, rdp->mynode, rdp->grpmask, wake);
  205. }
  206. /* Common code for synchronize_{rcu,sched}_expedited() work-done checking. */
  207. static bool sync_exp_work_done(struct rcu_state *rsp, atomic_long_t *stat,
  208. unsigned long s)
  209. {
  210. if (rcu_exp_gp_seq_done(rsp, s)) {
  211. trace_rcu_exp_grace_period(rsp->name, s, TPS("done"));
  212. /* Ensure test happens before caller kfree(). */
  213. smp_mb__before_atomic(); /* ^^^ */
  214. atomic_long_inc(stat);
  215. return true;
  216. }
  217. return false;
  218. }
  219. /*
  220. * Funnel-lock acquisition for expedited grace periods. Returns true
  221. * if some other task completed an expedited grace period that this task
  222. * can piggy-back on, and with no mutex held. Otherwise, returns false
  223. * with the mutex held, indicating that the caller must actually do the
  224. * expedited grace period.
  225. */
  226. static bool exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
  227. {
  228. struct rcu_data *rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id());
  229. struct rcu_node *rnp = rdp->mynode;
  230. struct rcu_node *rnp_root = rcu_get_root(rsp);
  231. /* Low-contention fastpath. */
  232. if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s) &&
  233. (rnp == rnp_root ||
  234. ULONG_CMP_LT(READ_ONCE(rnp_root->exp_seq_rq), s)) &&
  235. mutex_trylock(&rsp->exp_mutex))
  236. goto fastpath;
  237. /*
  238. * Each pass through the following loop works its way up
  239. * the rcu_node tree, returning if others have done the work or
  240. * otherwise falls through to acquire rsp->exp_mutex. The mapping
  241. * from CPU to rcu_node structure can be inexact, as it is just
  242. * promoting locality and is not strictly needed for correctness.
  243. */
  244. for (; rnp != NULL; rnp = rnp->parent) {
  245. if (sync_exp_work_done(rsp, &rdp->exp_workdone1, s))
  246. return true;
  247. /* Work not done, either wait here or go up. */
  248. spin_lock(&rnp->exp_lock);
  249. if (ULONG_CMP_GE(rnp->exp_seq_rq, s)) {
  250. /* Someone else doing GP, so wait for them. */
  251. spin_unlock(&rnp->exp_lock);
  252. trace_rcu_exp_funnel_lock(rsp->name, rnp->level,
  253. rnp->grplo, rnp->grphi,
  254. TPS("wait"));
  255. wait_event(rnp->exp_wq[(s >> 1) & 0x3],
  256. sync_exp_work_done(rsp,
  257. &rdp->exp_workdone2, s));
  258. return true;
  259. }
  260. rnp->exp_seq_rq = s; /* Followers can wait on us. */
  261. spin_unlock(&rnp->exp_lock);
  262. trace_rcu_exp_funnel_lock(rsp->name, rnp->level, rnp->grplo,
  263. rnp->grphi, TPS("nxtlvl"));
  264. }
  265. mutex_lock(&rsp->exp_mutex);
  266. fastpath:
  267. if (sync_exp_work_done(rsp, &rdp->exp_workdone3, s)) {
  268. mutex_unlock(&rsp->exp_mutex);
  269. return true;
  270. }
  271. rcu_exp_gp_seq_start(rsp);
  272. trace_rcu_exp_grace_period(rsp->name, s, TPS("start"));
  273. return false;
  274. }
  275. /* Invoked on each online non-idle CPU for expedited quiescent state. */
  276. static void sync_sched_exp_handler(void *data)
  277. {
  278. struct rcu_data *rdp;
  279. struct rcu_node *rnp;
  280. struct rcu_state *rsp = data;
  281. rdp = this_cpu_ptr(rsp->rda);
  282. rnp = rdp->mynode;
  283. if (!(READ_ONCE(rnp->expmask) & rdp->grpmask) ||
  284. __this_cpu_read(rcu_sched_data.cpu_no_qs.b.exp))
  285. return;
  286. if (rcu_is_cpu_rrupt_from_idle()) {
  287. rcu_report_exp_rdp(&rcu_sched_state,
  288. this_cpu_ptr(&rcu_sched_data), true);
  289. return;
  290. }
  291. __this_cpu_write(rcu_sched_data.cpu_no_qs.b.exp, true);
  292. resched_cpu(smp_processor_id());
  293. }
  294. /* Send IPI for expedited cleanup if needed at end of CPU-hotplug operation. */
  295. static void sync_sched_exp_online_cleanup(int cpu)
  296. {
  297. struct rcu_data *rdp;
  298. int ret;
  299. struct rcu_node *rnp;
  300. struct rcu_state *rsp = &rcu_sched_state;
  301. rdp = per_cpu_ptr(rsp->rda, cpu);
  302. rnp = rdp->mynode;
  303. if (!(READ_ONCE(rnp->expmask) & rdp->grpmask))
  304. return;
  305. ret = smp_call_function_single(cpu, sync_sched_exp_handler, rsp, 0);
  306. WARN_ON_ONCE(ret);
  307. }
  308. /*
  309. * Select the nodes that the upcoming expedited grace period needs
  310. * to wait for.
  311. */
  312. static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
  313. smp_call_func_t func)
  314. {
  315. int cpu;
  316. unsigned long flags;
  317. unsigned long mask_ofl_test;
  318. unsigned long mask_ofl_ipi;
  319. int ret;
  320. struct rcu_node *rnp;
  321. sync_exp_reset_tree(rsp);
  322. rcu_for_each_leaf_node(rsp, rnp) {
  323. raw_spin_lock_irqsave_rcu_node(rnp, flags);
  324. /* Each pass checks a CPU for identity, offline, and idle. */
  325. mask_ofl_test = 0;
  326. for_each_leaf_node_possible_cpu(rnp, cpu) {
  327. struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
  328. struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
  329. if (raw_smp_processor_id() == cpu ||
  330. !(atomic_add_return(0, &rdtp->dynticks) & 0x1) ||
  331. !(rnp->qsmaskinitnext & rdp->grpmask))
  332. mask_ofl_test |= rdp->grpmask;
  333. }
  334. mask_ofl_ipi = rnp->expmask & ~mask_ofl_test;
  335. /*
  336. * Need to wait for any blocked tasks as well. Note that
  337. * additional blocking tasks will also block the expedited
  338. * GP until such time as the ->expmask bits are cleared.
  339. */
  340. if (rcu_preempt_has_tasks(rnp))
  341. rnp->exp_tasks = rnp->blkd_tasks.next;
  342. raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
  343. /* IPI the remaining CPUs for expedited quiescent state. */
  344. for_each_leaf_node_possible_cpu(rnp, cpu) {
  345. unsigned long mask = leaf_node_cpu_bit(rnp, cpu);
  346. if (!(mask_ofl_ipi & mask))
  347. continue;
  348. retry_ipi:
  349. ret = smp_call_function_single(cpu, func, rsp, 0);
  350. if (!ret) {
  351. mask_ofl_ipi &= ~mask;
  352. continue;
  353. }
  354. /* Failed, raced with CPU hotplug operation. */
  355. raw_spin_lock_irqsave_rcu_node(rnp, flags);
  356. if ((rnp->qsmaskinitnext & mask) &&
  357. (rnp->expmask & mask)) {
  358. /* Online, so delay for a bit and try again. */
  359. raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
  360. schedule_timeout_uninterruptible(1);
  361. goto retry_ipi;
  362. }
  363. /* CPU really is offline, so we can ignore it. */
  364. if (!(rnp->expmask & mask))
  365. mask_ofl_ipi &= ~mask;
  366. raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
  367. }
  368. /* Report quiescent states for those that went offline. */
  369. mask_ofl_test |= mask_ofl_ipi;
  370. if (mask_ofl_test)
  371. rcu_report_exp_cpu_mult(rsp, rnp, mask_ofl_test, false);
  372. }
  373. }
  374. static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
  375. {
  376. int cpu;
  377. unsigned long jiffies_stall;
  378. unsigned long jiffies_start;
  379. unsigned long mask;
  380. int ndetected;
  381. struct rcu_node *rnp;
  382. struct rcu_node *rnp_root = rcu_get_root(rsp);
  383. int ret;
  384. jiffies_stall = rcu_jiffies_till_stall_check();
  385. jiffies_start = jiffies;
  386. for (;;) {
  387. ret = swait_event_timeout(
  388. rsp->expedited_wq,
  389. sync_rcu_preempt_exp_done(rnp_root),
  390. jiffies_stall);
  391. if (ret > 0 || sync_rcu_preempt_exp_done(rnp_root))
  392. return;
  393. WARN_ON(ret < 0); /* workqueues should not be signaled. */
  394. if (rcu_cpu_stall_suppress)
  395. continue;
  396. panic_on_rcu_stall();
  397. pr_err("INFO: %s detected expedited stalls on CPUs/tasks: {",
  398. rsp->name);
  399. ndetected = 0;
  400. rcu_for_each_leaf_node(rsp, rnp) {
  401. ndetected += rcu_print_task_exp_stall(rnp);
  402. for_each_leaf_node_possible_cpu(rnp, cpu) {
  403. struct rcu_data *rdp;
  404. mask = leaf_node_cpu_bit(rnp, cpu);
  405. if (!(rnp->expmask & mask))
  406. continue;
  407. ndetected++;
  408. rdp = per_cpu_ptr(rsp->rda, cpu);
  409. pr_cont(" %d-%c%c%c", cpu,
  410. "O."[!!cpu_online(cpu)],
  411. "o."[!!(rdp->grpmask & rnp->expmaskinit)],
  412. "N."[!!(rdp->grpmask & rnp->expmaskinitnext)]);
  413. }
  414. }
  415. pr_cont(" } %lu jiffies s: %lu root: %#lx/%c\n",
  416. jiffies - jiffies_start, rsp->expedited_sequence,
  417. rnp_root->expmask, ".T"[!!rnp_root->exp_tasks]);
  418. if (ndetected) {
  419. pr_err("blocking rcu_node structures:");
  420. rcu_for_each_node_breadth_first(rsp, rnp) {
  421. if (rnp == rnp_root)
  422. continue; /* printed unconditionally */
  423. if (sync_rcu_preempt_exp_done(rnp))
  424. continue;
  425. pr_cont(" l=%u:%d-%d:%#lx/%c",
  426. rnp->level, rnp->grplo, rnp->grphi,
  427. rnp->expmask,
  428. ".T"[!!rnp->exp_tasks]);
  429. }
  430. pr_cont("\n");
  431. }
  432. rcu_for_each_leaf_node(rsp, rnp) {
  433. for_each_leaf_node_possible_cpu(rnp, cpu) {
  434. mask = leaf_node_cpu_bit(rnp, cpu);
  435. if (!(rnp->expmask & mask))
  436. continue;
  437. dump_cpu_task(cpu);
  438. }
  439. }
  440. jiffies_stall = 3 * rcu_jiffies_till_stall_check() + 3;
  441. }
  442. }
  443. /*
  444. * Wait for the current expedited grace period to complete, and then
  445. * wake up everyone who piggybacked on the just-completed expedited
  446. * grace period. Also update all the ->exp_seq_rq counters as needed
  447. * in order to avoid counter-wrap problems.
  448. */
  449. static void rcu_exp_wait_wake(struct rcu_state *rsp, unsigned long s)
  450. {
  451. struct rcu_node *rnp;
  452. synchronize_sched_expedited_wait(rsp);
  453. rcu_exp_gp_seq_end(rsp);
  454. trace_rcu_exp_grace_period(rsp->name, s, TPS("end"));
  455. /*
  456. * Switch over to wakeup mode, allowing the next GP, but -only- the
  457. * next GP, to proceed.
  458. */
  459. mutex_lock(&rsp->exp_wake_mutex);
  460. rcu_for_each_node_breadth_first(rsp, rnp) {
  461. if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s)) {
  462. spin_lock(&rnp->exp_lock);
  463. /* Recheck, avoid hang in case someone just arrived. */
  464. if (ULONG_CMP_LT(rnp->exp_seq_rq, s))
  465. rnp->exp_seq_rq = s;
  466. spin_unlock(&rnp->exp_lock);
  467. }
  468. wake_up_all(&rnp->exp_wq[(rsp->expedited_sequence >> 1) & 0x3]);
  469. }
  470. trace_rcu_exp_grace_period(rsp->name, s, TPS("endwake"));
  471. mutex_unlock(&rsp->exp_wake_mutex);
  472. }
  473. /* Let the workqueue handler know what it is supposed to do. */
  474. struct rcu_exp_work {
  475. smp_call_func_t rew_func;
  476. struct rcu_state *rew_rsp;
  477. unsigned long rew_s;
  478. struct work_struct rew_work;
  479. };
  480. /*
  481. * Common code to drive an expedited grace period forward, used by
  482. * workqueues and mid-boot-time tasks.
  483. */
  484. static void rcu_exp_sel_wait_wake(struct rcu_state *rsp,
  485. smp_call_func_t func, unsigned long s)
  486. {
  487. /* Initialize the rcu_node tree in preparation for the wait. */
  488. sync_rcu_exp_select_cpus(rsp, func);
  489. /* Wait and clean up, including waking everyone. */
  490. rcu_exp_wait_wake(rsp, s);
  491. }
  492. /*
  493. * Work-queue handler to drive an expedited grace period forward.
  494. */
  495. static void wait_rcu_exp_gp(struct work_struct *wp)
  496. {
  497. struct rcu_exp_work *rewp;
  498. rewp = container_of(wp, struct rcu_exp_work, rew_work);
  499. rcu_exp_sel_wait_wake(rewp->rew_rsp, rewp->rew_func, rewp->rew_s);
  500. }
  501. /*
  502. * Given an rcu_state pointer and a smp_call_function() handler, kick
  503. * off the specified flavor of expedited grace period.
  504. */
  505. static void _synchronize_rcu_expedited(struct rcu_state *rsp,
  506. smp_call_func_t func)
  507. {
  508. struct rcu_data *rdp;
  509. struct rcu_exp_work rew;
  510. struct rcu_node *rnp;
  511. unsigned long s;
  512. /* If expedited grace periods are prohibited, fall back to normal. */
  513. if (rcu_gp_is_normal()) {
  514. wait_rcu_gp(rsp->call);
  515. return;
  516. }
  517. /* Take a snapshot of the sequence number. */
  518. s = rcu_exp_gp_seq_snap(rsp);
  519. if (exp_funnel_lock(rsp, s))
  520. return; /* Someone else did our work for us. */
  521. /* Ensure that load happens before action based on it. */
  522. if (unlikely(rcu_scheduler_active == RCU_SCHEDULER_INIT)) {
  523. /* Direct call during scheduler init and early_initcalls(). */
  524. rcu_exp_sel_wait_wake(rsp, func, s);
  525. } else {
  526. /* Marshall arguments & schedule the expedited grace period. */
  527. rew.rew_func = func;
  528. rew.rew_rsp = rsp;
  529. rew.rew_s = s;
  530. INIT_WORK_ONSTACK(&rew.rew_work, wait_rcu_exp_gp);
  531. schedule_work(&rew.rew_work);
  532. }
  533. /* Wait for expedited grace period to complete. */
  534. rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id());
  535. rnp = rcu_get_root(rsp);
  536. wait_event(rnp->exp_wq[(s >> 1) & 0x3],
  537. sync_exp_work_done(rsp,
  538. &rdp->exp_workdone0, s));
  539. /* Let the next expedited grace period start. */
  540. mutex_unlock(&rsp->exp_mutex);
  541. }
  542. /**
  543. * synchronize_sched_expedited - Brute-force RCU-sched grace period
  544. *
  545. * Wait for an RCU-sched grace period to elapse, but use a "big hammer"
  546. * approach to force the grace period to end quickly. This consumes
  547. * significant time on all CPUs and is unfriendly to real-time workloads,
  548. * so is thus not recommended for any sort of common-case code. In fact,
  549. * if you are using synchronize_sched_expedited() in a loop, please
  550. * restructure your code to batch your updates, and then use a single
  551. * synchronize_sched() instead.
  552. *
  553. * This implementation can be thought of as an application of sequence
  554. * locking to expedited grace periods, but using the sequence counter to
  555. * determine when someone else has already done the work instead of for
  556. * retrying readers.
  557. */
  558. void synchronize_sched_expedited(void)
  559. {
  560. struct rcu_state *rsp = &rcu_sched_state;
  561. /* If only one CPU, this is automatically a grace period. */
  562. if (rcu_blocking_is_gp())
  563. return;
  564. _synchronize_rcu_expedited(rsp, sync_sched_exp_handler);
  565. }
  566. EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
  567. #ifdef CONFIG_PREEMPT_RCU
  568. /*
  569. * Remote handler for smp_call_function_single(). If there is an
  570. * RCU read-side critical section in effect, request that the
  571. * next rcu_read_unlock() record the quiescent state up the
  572. * ->expmask fields in the rcu_node tree. Otherwise, immediately
  573. * report the quiescent state.
  574. */
  575. static void sync_rcu_exp_handler(void *info)
  576. {
  577. struct rcu_data *rdp;
  578. struct rcu_state *rsp = info;
  579. struct task_struct *t = current;
  580. /*
  581. * Within an RCU read-side critical section, request that the next
  582. * rcu_read_unlock() report. Unless this RCU read-side critical
  583. * section has already blocked, in which case it is already set
  584. * up for the expedited grace period to wait on it.
  585. */
  586. if (t->rcu_read_lock_nesting > 0 &&
  587. !t->rcu_read_unlock_special.b.blocked) {
  588. t->rcu_read_unlock_special.b.exp_need_qs = true;
  589. return;
  590. }
  591. /*
  592. * We are either exiting an RCU read-side critical section (negative
  593. * values of t->rcu_read_lock_nesting) or are not in one at all
  594. * (zero value of t->rcu_read_lock_nesting). Or we are in an RCU
  595. * read-side critical section that blocked before this expedited
  596. * grace period started. Either way, we can immediately report
  597. * the quiescent state.
  598. */
  599. rdp = this_cpu_ptr(rsp->rda);
  600. rcu_report_exp_rdp(rsp, rdp, true);
  601. }
  602. /**
  603. * synchronize_rcu_expedited - Brute-force RCU grace period
  604. *
  605. * Wait for an RCU-preempt grace period, but expedite it. The basic
  606. * idea is to IPI all non-idle non-nohz online CPUs. The IPI handler
  607. * checks whether the CPU is in an RCU-preempt critical section, and
  608. * if so, it sets a flag that causes the outermost rcu_read_unlock()
  609. * to report the quiescent state. On the other hand, if the CPU is
  610. * not in an RCU read-side critical section, the IPI handler reports
  611. * the quiescent state immediately.
  612. *
  613. * Although this is a greate improvement over previous expedited
  614. * implementations, it is still unfriendly to real-time workloads, so is
  615. * thus not recommended for any sort of common-case code. In fact, if
  616. * you are using synchronize_rcu_expedited() in a loop, please restructure
  617. * your code to batch your updates, and then Use a single synchronize_rcu()
  618. * instead.
  619. */
  620. void synchronize_rcu_expedited(void)
  621. {
  622. struct rcu_state *rsp = rcu_state_p;
  623. if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE)
  624. return;
  625. _synchronize_rcu_expedited(rsp, sync_rcu_exp_handler);
  626. }
  627. EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
  628. #else /* #ifdef CONFIG_PREEMPT_RCU */
  629. /*
  630. * Wait for an rcu-preempt grace period, but make it happen quickly.
  631. * But because preemptible RCU does not exist, map to rcu-sched.
  632. */
  633. void synchronize_rcu_expedited(void)
  634. {
  635. synchronize_sched_expedited();
  636. }
  637. EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
  638. #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
  639. /*
  640. * Switch to run-time mode once Tree RCU has fully initialized.
  641. */
  642. static int __init rcu_exp_runtime_mode(void)
  643. {
  644. rcu_test_sync_prims();
  645. rcu_scheduler_active = RCU_SCHEDULER_RUNNING;
  646. rcu_test_sync_prims();
  647. return 0;
  648. }
  649. core_initcall(rcu_exp_runtime_mode);