percpu-rwsem.c 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194
  1. #include <linux/atomic.h>
  2. #include <linux/rwsem.h>
  3. #include <linux/percpu.h>
  4. #include <linux/wait.h>
  5. #include <linux/lockdep.h>
  6. #include <linux/percpu-rwsem.h>
  7. #include <linux/rcupdate.h>
  8. #include <linux/sched.h>
  9. #include <linux/errno.h>
  10. int __percpu_init_rwsem(struct percpu_rw_semaphore *sem,
  11. const char *name, struct lock_class_key *rwsem_key)
  12. {
  13. sem->read_count = alloc_percpu(int);
  14. if (unlikely(!sem->read_count))
  15. return -ENOMEM;
  16. /* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */
  17. rcu_sync_init(&sem->rss, RCU_SCHED_SYNC);
  18. __init_rwsem(&sem->rw_sem, name, rwsem_key);
  19. init_waitqueue_head(&sem->writer);
  20. sem->readers_block = 0;
  21. return 0;
  22. }
  23. EXPORT_SYMBOL_GPL(__percpu_init_rwsem);
  24. void percpu_free_rwsem(struct percpu_rw_semaphore *sem)
  25. {
  26. /*
  27. * XXX: temporary kludge. The error path in alloc_super()
  28. * assumes that percpu_free_rwsem() is safe after kzalloc().
  29. */
  30. if (!sem->read_count)
  31. return;
  32. rcu_sync_dtor(&sem->rss);
  33. free_percpu(sem->read_count);
  34. sem->read_count = NULL; /* catch use after free bugs */
  35. }
  36. EXPORT_SYMBOL_GPL(percpu_free_rwsem);
  37. int __percpu_down_read(struct percpu_rw_semaphore *sem, int try)
  38. {
  39. /*
  40. * Due to having preemption disabled the decrement happens on
  41. * the same CPU as the increment, avoiding the
  42. * increment-on-one-CPU-and-decrement-on-another problem.
  43. *
  44. * If the reader misses the writer's assignment of readers_block, then
  45. * the writer is guaranteed to see the reader's increment.
  46. *
  47. * Conversely, any readers that increment their sem->read_count after
  48. * the writer looks are guaranteed to see the readers_block value,
  49. * which in turn means that they are guaranteed to immediately
  50. * decrement their sem->read_count, so that it doesn't matter that the
  51. * writer missed them.
  52. */
  53. smp_mb(); /* A matches D */
  54. /*
  55. * If !readers_block the critical section starts here, matched by the
  56. * release in percpu_up_write().
  57. */
  58. if (likely(!smp_load_acquire(&sem->readers_block)))
  59. return 1;
  60. /*
  61. * Per the above comment; we still have preemption disabled and
  62. * will thus decrement on the same CPU as we incremented.
  63. */
  64. __percpu_up_read(sem);
  65. if (try)
  66. return 0;
  67. /*
  68. * We either call schedule() in the wait, or we'll fall through
  69. * and reschedule on the preempt_enable() in percpu_down_read().
  70. */
  71. preempt_enable_no_resched();
  72. /*
  73. * Avoid lockdep for the down/up_read() we already have them.
  74. */
  75. __down_read(&sem->rw_sem);
  76. this_cpu_inc(*sem->read_count);
  77. __up_read(&sem->rw_sem);
  78. preempt_disable();
  79. return 1;
  80. }
  81. EXPORT_SYMBOL_GPL(__percpu_down_read);
  82. void __percpu_up_read(struct percpu_rw_semaphore *sem)
  83. {
  84. smp_mb(); /* B matches C */
  85. /*
  86. * In other words, if they see our decrement (presumably to aggregate
  87. * zero, as that is the only time it matters) they will also see our
  88. * critical section.
  89. */
  90. __this_cpu_dec(*sem->read_count);
  91. /* Prod writer to recheck readers_active */
  92. wake_up(&sem->writer);
  93. }
  94. EXPORT_SYMBOL_GPL(__percpu_up_read);
  95. #define per_cpu_sum(var) \
  96. ({ \
  97. typeof(var) __sum = 0; \
  98. int cpu; \
  99. compiletime_assert_atomic_type(__sum); \
  100. for_each_possible_cpu(cpu) \
  101. __sum += per_cpu(var, cpu); \
  102. __sum; \
  103. })
  104. /*
  105. * Return true if the modular sum of the sem->read_count per-CPU variable is
  106. * zero. If this sum is zero, then it is stable due to the fact that if any
  107. * newly arriving readers increment a given counter, they will immediately
  108. * decrement that same counter.
  109. */
  110. static bool readers_active_check(struct percpu_rw_semaphore *sem)
  111. {
  112. if (per_cpu_sum(*sem->read_count) != 0)
  113. return false;
  114. /*
  115. * If we observed the decrement; ensure we see the entire critical
  116. * section.
  117. */
  118. smp_mb(); /* C matches B */
  119. return true;
  120. }
  121. void percpu_down_write(struct percpu_rw_semaphore *sem)
  122. {
  123. /* Notify readers to take the slow path. */
  124. rcu_sync_enter(&sem->rss);
  125. down_write(&sem->rw_sem);
  126. /*
  127. * Notify new readers to block; up until now, and thus throughout the
  128. * longish rcu_sync_enter() above, new readers could still come in.
  129. */
  130. WRITE_ONCE(sem->readers_block, 1);
  131. smp_mb(); /* D matches A */
  132. /*
  133. * If they don't see our writer of readers_block, then we are
  134. * guaranteed to see their sem->read_count increment, and therefore
  135. * will wait for them.
  136. */
  137. /* Wait for all now active readers to complete. */
  138. wait_event(sem->writer, readers_active_check(sem));
  139. }
  140. EXPORT_SYMBOL_GPL(percpu_down_write);
  141. void percpu_up_write(struct percpu_rw_semaphore *sem)
  142. {
  143. /*
  144. * Signal the writer is done, no fast path yet.
  145. *
  146. * One reason that we cannot just immediately flip to readers_fast is
  147. * that new readers might fail to see the results of this writer's
  148. * critical section.
  149. *
  150. * Therefore we force it through the slow path which guarantees an
  151. * acquire and thereby guarantees the critical section's consistency.
  152. */
  153. smp_store_release(&sem->readers_block, 0);
  154. /*
  155. * Release the write lock, this will allow readers back in the game.
  156. */
  157. up_write(&sem->rw_sem);
  158. /*
  159. * Once this completes (at least one RCU-sched grace period hence) the
  160. * reader fast path will be available again. Safe to use outside the
  161. * exclusive write lock because its counting.
  162. */
  163. rcu_sync_exit(&sem->rss);
  164. }
  165. EXPORT_SYMBOL_GPL(percpu_up_write);