rwsem.c 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283
  1. /* rwsem.c: R/W semaphores: contention handling functions
  2. *
  3. * Written by David Howells (dhowells@redhat.com).
  4. * Derived from arch/i386/kernel/semaphore.c
  5. */
  6. #include <linux/rwsem.h>
  7. #include <linux/sched.h>
  8. #include <linux/init.h>
  9. #include <linux/export.h>
  10. /*
  11. * Initialize an rwsem:
  12. */
  13. void __init_rwsem(struct rw_semaphore *sem, const char *name,
  14. struct lock_class_key *key)
  15. {
  16. #ifdef CONFIG_DEBUG_LOCK_ALLOC
  17. /*
  18. * Make sure we are not reinitializing a held semaphore:
  19. */
  20. debug_check_no_locks_freed((void *)sem, sizeof(*sem));
  21. lockdep_init_map(&sem->dep_map, name, key, 0);
  22. #endif
  23. sem->count = RWSEM_UNLOCKED_VALUE;
  24. raw_spin_lock_init(&sem->wait_lock);
  25. INIT_LIST_HEAD(&sem->wait_list);
  26. }
  27. EXPORT_SYMBOL(__init_rwsem);
  28. struct rwsem_waiter {
  29. struct list_head list;
  30. struct task_struct *task;
  31. unsigned int flags;
  32. #define RWSEM_WAITING_FOR_READ 0x00000001
  33. #define RWSEM_WAITING_FOR_WRITE 0x00000002
  34. };
  35. /* Wake types for __rwsem_do_wake(). Note that RWSEM_WAKE_NO_ACTIVE and
  36. * RWSEM_WAKE_READ_OWNED imply that the spinlock must have been kept held
  37. * since the rwsem value was observed.
  38. */
  39. #define RWSEM_WAKE_ANY 0 /* Wake whatever's at head of wait list */
  40. #define RWSEM_WAKE_NO_ACTIVE 1 /* rwsem was observed with no active thread */
  41. #define RWSEM_WAKE_READ_OWNED 2 /* rwsem was observed to be read owned */
  42. /*
  43. * handle the lock release when processes blocked on it that can now run
  44. * - if we come here from up_xxxx(), then:
  45. * - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed)
  46. * - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so)
  47. * - there must be someone on the queue
  48. * - the spinlock must be held by the caller
  49. * - woken process blocks are discarded from the list after having task zeroed
  50. * - writers are only woken if downgrading is false
  51. */
  52. static struct rw_semaphore *
  53. __rwsem_do_wake(struct rw_semaphore *sem, int wake_type)
  54. {
  55. struct rwsem_waiter *waiter;
  56. struct task_struct *tsk;
  57. struct list_head *next;
  58. signed long oldcount, woken, loop, adjustment;
  59. waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
  60. if (!(waiter->flags & RWSEM_WAITING_FOR_WRITE))
  61. goto readers_only;
  62. if (wake_type == RWSEM_WAKE_READ_OWNED)
  63. /* Another active reader was observed, so wakeup is not
  64. * likely to succeed. Save the atomic op.
  65. */
  66. goto out;
  67. /* There's a writer at the front of the queue - try to grant it the
  68. * write lock. However, we only wake this writer if we can transition
  69. * the active part of the count from 0 -> 1
  70. */
  71. adjustment = RWSEM_ACTIVE_WRITE_BIAS;
  72. if (waiter->list.next == &sem->wait_list)
  73. adjustment -= RWSEM_WAITING_BIAS;
  74. try_again_write:
  75. oldcount = rwsem_atomic_update(adjustment, sem) - adjustment;
  76. if (oldcount & RWSEM_ACTIVE_MASK)
  77. /* Someone grabbed the sem already */
  78. goto undo_write;
  79. /* We must be careful not to touch 'waiter' after we set ->task = NULL.
  80. * It is an allocated on the waiter's stack and may become invalid at
  81. * any time after that point (due to a wakeup from another source).
  82. */
  83. list_del(&waiter->list);
  84. tsk = waiter->task;
  85. smp_mb();
  86. waiter->task = NULL;
  87. wake_up_process(tsk);
  88. put_task_struct(tsk);
  89. goto out;
  90. readers_only:
  91. /* If we come here from up_xxxx(), another thread might have reached
  92. * rwsem_down_failed_common() before we acquired the spinlock and
  93. * woken up a waiter, making it now active. We prefer to check for
  94. * this first in order to not spend too much time with the spinlock
  95. * held if we're not going to be able to wake up readers in the end.
  96. *
  97. * Note that we do not need to update the rwsem count: any writer
  98. * trying to acquire rwsem will run rwsem_down_write_failed() due
  99. * to the waiting threads and block trying to acquire the spinlock.
  100. *
  101. * We use a dummy atomic update in order to acquire the cache line
  102. * exclusively since we expect to succeed and run the final rwsem
  103. * count adjustment pretty soon.
  104. */
  105. if (wake_type == RWSEM_WAKE_ANY &&
  106. rwsem_atomic_update(0, sem) < RWSEM_WAITING_BIAS)
  107. /* Someone grabbed the sem for write already */
  108. goto out;
  109. /* Grant an infinite number of read locks to the readers at the front
  110. * of the queue. Note we increment the 'active part' of the count by
  111. * the number of readers before waking any processes up.
  112. */
  113. woken = 0;
  114. do {
  115. woken++;
  116. if (waiter->list.next == &sem->wait_list)
  117. break;
  118. waiter = list_entry(waiter->list.next,
  119. struct rwsem_waiter, list);
  120. } while (waiter->flags & RWSEM_WAITING_FOR_READ);
  121. adjustment = woken * RWSEM_ACTIVE_READ_BIAS;
  122. if (waiter->flags & RWSEM_WAITING_FOR_READ)
  123. /* hit end of list above */
  124. adjustment -= RWSEM_WAITING_BIAS;
  125. rwsem_atomic_add(adjustment, sem);
  126. next = sem->wait_list.next;
  127. for (loop = woken; loop > 0; loop--) {
  128. waiter = list_entry(next, struct rwsem_waiter, list);
  129. next = waiter->list.next;
  130. tsk = waiter->task;
  131. smp_mb();
  132. waiter->task = NULL;
  133. wake_up_process(tsk);
  134. put_task_struct(tsk);
  135. }
  136. sem->wait_list.next = next;
  137. next->prev = &sem->wait_list;
  138. out:
  139. return sem;
  140. /* undo the change to the active count, but check for a transition
  141. * 1->0 */
  142. undo_write:
  143. if (rwsem_atomic_update(-adjustment, sem) & RWSEM_ACTIVE_MASK)
  144. goto out;
  145. goto try_again_write;
  146. }
  147. /*
  148. * wait for a lock to be granted
  149. */
  150. static struct rw_semaphore __sched *
  151. rwsem_down_failed_common(struct rw_semaphore *sem,
  152. unsigned int flags, signed long adjustment)
  153. {
  154. struct rwsem_waiter waiter;
  155. struct task_struct *tsk = current;
  156. signed long count;
  157. set_task_state(tsk, TASK_UNINTERRUPTIBLE);
  158. /* set up my own style of waitqueue */
  159. raw_spin_lock_irq(&sem->wait_lock);
  160. waiter.task = tsk;
  161. waiter.flags = flags;
  162. get_task_struct(tsk);
  163. if (list_empty(&sem->wait_list))
  164. adjustment += RWSEM_WAITING_BIAS;
  165. list_add_tail(&waiter.list, &sem->wait_list);
  166. /* we're now waiting on the lock, but no longer actively locking */
  167. count = rwsem_atomic_update(adjustment, sem);
  168. /* If there are no active locks, wake the front queued process(es) up.
  169. *
  170. * Alternatively, if we're called from a failed down_write(), there
  171. * were already threads queued before us and there are no active
  172. * writers, the lock must be read owned; so we try to wake any read
  173. * locks that were queued ahead of us. */
  174. if (count == RWSEM_WAITING_BIAS)
  175. sem = __rwsem_do_wake(sem, RWSEM_WAKE_NO_ACTIVE);
  176. else if (count > RWSEM_WAITING_BIAS &&
  177. adjustment == -RWSEM_ACTIVE_WRITE_BIAS)
  178. sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED);
  179. raw_spin_unlock_irq(&sem->wait_lock);
  180. /* wait to be given the lock */
  181. for (;;) {
  182. if (!waiter.task)
  183. break;
  184. schedule();
  185. set_task_state(tsk, TASK_UNINTERRUPTIBLE);
  186. }
  187. tsk->state = TASK_RUNNING;
  188. return sem;
  189. }
  190. /*
  191. * wait for the read lock to be granted
  192. */
  193. struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
  194. {
  195. return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_READ,
  196. -RWSEM_ACTIVE_READ_BIAS);
  197. }
  198. /*
  199. * wait for the write lock to be granted
  200. */
  201. struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem)
  202. {
  203. return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_WRITE,
  204. -RWSEM_ACTIVE_WRITE_BIAS);
  205. }
  206. /*
  207. * handle waking up a waiter on the semaphore
  208. * - up_read/up_write has decremented the active part of count if we come here
  209. */
  210. struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
  211. {
  212. unsigned long flags;
  213. raw_spin_lock_irqsave(&sem->wait_lock, flags);
  214. /* do nothing if list empty */
  215. if (!list_empty(&sem->wait_list))
  216. sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY);
  217. raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
  218. return sem;
  219. }
  220. /*
  221. * downgrade a write lock into a read lock
  222. * - caller incremented waiting part of count and discovered it still negative
  223. * - just wake up any readers at the front of the queue
  224. */
  225. struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
  226. {
  227. unsigned long flags;
  228. raw_spin_lock_irqsave(&sem->wait_lock, flags);
  229. /* do nothing if list empty */
  230. if (!list_empty(&sem->wait_list))
  231. sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED);
  232. raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
  233. return sem;
  234. }
  235. EXPORT_SYMBOL(rwsem_down_read_failed);
  236. EXPORT_SYMBOL(rwsem_down_write_failed);
  237. EXPORT_SYMBOL(rwsem_wake);
  238. EXPORT_SYMBOL(rwsem_downgrade_wake);