percpu-rwsem.c 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193
  1. #include <linux/atomic.h>
  2. #include <linux/rwsem.h>
  3. #include <linux/percpu.h>
  4. #include <linux/lockdep.h>
  5. #include <linux/percpu-rwsem.h>
  6. #include <linux/rcupdate.h>
  7. #include <linux/sched.h>
  8. #include <linux/errno.h>
  9. int __percpu_init_rwsem(struct percpu_rw_semaphore *sem,
  10. const char *name, struct lock_class_key *rwsem_key)
  11. {
  12. sem->read_count = alloc_percpu(int);
  13. if (unlikely(!sem->read_count))
  14. return -ENOMEM;
  15. /* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */
  16. rcu_sync_init(&sem->rss, RCU_SCHED_SYNC);
  17. __init_rwsem(&sem->rw_sem, name, rwsem_key);
  18. rcuwait_init(&sem->writer);
  19. sem->readers_block = 0;
  20. return 0;
  21. }
  22. EXPORT_SYMBOL_GPL(__percpu_init_rwsem);
  23. void percpu_free_rwsem(struct percpu_rw_semaphore *sem)
  24. {
  25. /*
  26. * XXX: temporary kludge. The error path in alloc_super()
  27. * assumes that percpu_free_rwsem() is safe after kzalloc().
  28. */
  29. if (!sem->read_count)
  30. return;
  31. rcu_sync_dtor(&sem->rss);
  32. free_percpu(sem->read_count);
  33. sem->read_count = NULL; /* catch use after free bugs */
  34. }
  35. EXPORT_SYMBOL_GPL(percpu_free_rwsem);
  36. int __percpu_down_read(struct percpu_rw_semaphore *sem, int try)
  37. {
  38. /*
  39. * Due to having preemption disabled the decrement happens on
  40. * the same CPU as the increment, avoiding the
  41. * increment-on-one-CPU-and-decrement-on-another problem.
  42. *
  43. * If the reader misses the writer's assignment of readers_block, then
  44. * the writer is guaranteed to see the reader's increment.
  45. *
  46. * Conversely, any readers that increment their sem->read_count after
  47. * the writer looks are guaranteed to see the readers_block value,
  48. * which in turn means that they are guaranteed to immediately
  49. * decrement their sem->read_count, so that it doesn't matter that the
  50. * writer missed them.
  51. */
  52. smp_mb(); /* A matches D */
  53. /*
  54. * If !readers_block the critical section starts here, matched by the
  55. * release in percpu_up_write().
  56. */
  57. if (likely(!smp_load_acquire(&sem->readers_block)))
  58. return 1;
  59. /*
  60. * Per the above comment; we still have preemption disabled and
  61. * will thus decrement on the same CPU as we incremented.
  62. */
  63. __percpu_up_read(sem);
  64. if (try)
  65. return 0;
  66. /*
  67. * We either call schedule() in the wait, or we'll fall through
  68. * and reschedule on the preempt_enable() in percpu_down_read().
  69. */
  70. preempt_enable_no_resched();
  71. /*
  72. * Avoid lockdep for the down/up_read() we already have them.
  73. */
  74. __down_read(&sem->rw_sem);
  75. this_cpu_inc(*sem->read_count);
  76. __up_read(&sem->rw_sem);
  77. preempt_disable();
  78. return 1;
  79. }
  80. EXPORT_SYMBOL_GPL(__percpu_down_read);
  81. void __percpu_up_read(struct percpu_rw_semaphore *sem)
  82. {
  83. smp_mb(); /* B matches C */
  84. /*
  85. * In other words, if they see our decrement (presumably to aggregate
  86. * zero, as that is the only time it matters) they will also see our
  87. * critical section.
  88. */
  89. __this_cpu_dec(*sem->read_count);
  90. /* Prod writer to recheck readers_active */
  91. rcuwait_wake_up(&sem->writer);
  92. }
  93. EXPORT_SYMBOL_GPL(__percpu_up_read);
  94. #define per_cpu_sum(var) \
  95. ({ \
  96. typeof(var) __sum = 0; \
  97. int cpu; \
  98. compiletime_assert_atomic_type(__sum); \
  99. for_each_possible_cpu(cpu) \
  100. __sum += per_cpu(var, cpu); \
  101. __sum; \
  102. })
  103. /*
  104. * Return true if the modular sum of the sem->read_count per-CPU variable is
  105. * zero. If this sum is zero, then it is stable due to the fact that if any
  106. * newly arriving readers increment a given counter, they will immediately
  107. * decrement that same counter.
  108. */
  109. static bool readers_active_check(struct percpu_rw_semaphore *sem)
  110. {
  111. if (per_cpu_sum(*sem->read_count) != 0)
  112. return false;
  113. /*
  114. * If we observed the decrement; ensure we see the entire critical
  115. * section.
  116. */
  117. smp_mb(); /* C matches B */
  118. return true;
  119. }
  120. void percpu_down_write(struct percpu_rw_semaphore *sem)
  121. {
  122. /* Notify readers to take the slow path. */
  123. rcu_sync_enter(&sem->rss);
  124. down_write(&sem->rw_sem);
  125. /*
  126. * Notify new readers to block; up until now, and thus throughout the
  127. * longish rcu_sync_enter() above, new readers could still come in.
  128. */
  129. WRITE_ONCE(sem->readers_block, 1);
  130. smp_mb(); /* D matches A */
  131. /*
  132. * If they don't see our writer of readers_block, then we are
  133. * guaranteed to see their sem->read_count increment, and therefore
  134. * will wait for them.
  135. */
  136. /* Wait for all now active readers to complete. */
  137. rcuwait_wait_event(&sem->writer, readers_active_check(sem));
  138. }
  139. EXPORT_SYMBOL_GPL(percpu_down_write);
  140. void percpu_up_write(struct percpu_rw_semaphore *sem)
  141. {
  142. /*
  143. * Signal the writer is done, no fast path yet.
  144. *
  145. * One reason that we cannot just immediately flip to readers_fast is
  146. * that new readers might fail to see the results of this writer's
  147. * critical section.
  148. *
  149. * Therefore we force it through the slow path which guarantees an
  150. * acquire and thereby guarantees the critical section's consistency.
  151. */
  152. smp_store_release(&sem->readers_block, 0);
  153. /*
  154. * Release the write lock, this will allow readers back in the game.
  155. */
  156. up_write(&sem->rw_sem);
  157. /*
  158. * Once this completes (at least one RCU-sched grace period hence) the
  159. * reader fast path will be available again. Safe to use outside the
  160. * exclusive write lock because its counting.
  161. */
  162. rcu_sync_exit(&sem->rss);
  163. }
  164. EXPORT_SYMBOL_GPL(percpu_up_write);