page_counter.c 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Lockless hierarchical page accounting & limiting
  4. *
  5. * Copyright (C) 2014 Red Hat, Inc., Johannes Weiner
  6. */
  7. #include <linux/page_counter.h>
  8. #include <linux/atomic.h>
  9. #include <linux/kernel.h>
  10. #include <linux/string.h>
  11. #include <linux/sched.h>
  12. #include <linux/bug.h>
  13. #include <asm/page.h>
  14. /**
  15. * page_counter_cancel - take pages out of the local counter
  16. * @counter: counter
  17. * @nr_pages: number of pages to cancel
  18. */
  19. void page_counter_cancel(struct page_counter *counter, unsigned long nr_pages)
  20. {
  21. long new;
  22. new = atomic_long_sub_return(nr_pages, &counter->count);
  23. /* More uncharges than charges? */
  24. WARN_ON_ONCE(new < 0);
  25. }
  26. /**
  27. * page_counter_charge - hierarchically charge pages
  28. * @counter: counter
  29. * @nr_pages: number of pages to charge
  30. *
  31. * NOTE: This does not consider any configured counter limits.
  32. */
  33. void page_counter_charge(struct page_counter *counter, unsigned long nr_pages)
  34. {
  35. struct page_counter *c;
  36. for (c = counter; c; c = c->parent) {
  37. long new;
  38. new = atomic_long_add_return(nr_pages, &c->count);
  39. /*
  40. * This is indeed racy, but we can live with some
  41. * inaccuracy in the watermark.
  42. */
  43. if (new > c->watermark)
  44. c->watermark = new;
  45. }
  46. }
  47. /**
  48. * page_counter_try_charge - try to hierarchically charge pages
  49. * @counter: counter
  50. * @nr_pages: number of pages to charge
  51. * @fail: points first counter to hit its limit, if any
  52. *
  53. * Returns %true on success, or %false and @fail if the counter or one
  54. * of its ancestors has hit its configured limit.
  55. */
  56. bool page_counter_try_charge(struct page_counter *counter,
  57. unsigned long nr_pages,
  58. struct page_counter **fail)
  59. {
  60. struct page_counter *c;
  61. for (c = counter; c; c = c->parent) {
  62. long new;
  63. /*
  64. * Charge speculatively to avoid an expensive CAS. If
  65. * a bigger charge fails, it might falsely lock out a
  66. * racing smaller charge and send it into reclaim
  67. * early, but the error is limited to the difference
  68. * between the two sizes, which is less than 2M/4M in
  69. * case of a THP locking out a regular page charge.
  70. *
  71. * The atomic_long_add_return() implies a full memory
  72. * barrier between incrementing the count and reading
  73. * the limit. When racing with page_counter_limit(),
  74. * we either see the new limit or the setter sees the
  75. * counter has changed and retries.
  76. */
  77. new = atomic_long_add_return(nr_pages, &c->count);
  78. if (new > c->limit) {
  79. atomic_long_sub(nr_pages, &c->count);
  80. /*
  81. * This is racy, but we can live with some
  82. * inaccuracy in the failcnt.
  83. */
  84. c->failcnt++;
  85. *fail = c;
  86. goto failed;
  87. }
  88. /*
  89. * Just like with failcnt, we can live with some
  90. * inaccuracy in the watermark.
  91. */
  92. if (new > c->watermark)
  93. c->watermark = new;
  94. }
  95. return true;
  96. failed:
  97. for (c = counter; c != *fail; c = c->parent)
  98. page_counter_cancel(c, nr_pages);
  99. return false;
  100. }
  101. /**
  102. * page_counter_uncharge - hierarchically uncharge pages
  103. * @counter: counter
  104. * @nr_pages: number of pages to uncharge
  105. */
  106. void page_counter_uncharge(struct page_counter *counter, unsigned long nr_pages)
  107. {
  108. struct page_counter *c;
  109. for (c = counter; c; c = c->parent)
  110. page_counter_cancel(c, nr_pages);
  111. }
  112. /**
  113. * page_counter_limit - limit the number of pages allowed
  114. * @counter: counter
  115. * @limit: limit to set
  116. *
  117. * Returns 0 on success, -EBUSY if the current number of pages on the
  118. * counter already exceeds the specified limit.
  119. *
  120. * The caller must serialize invocations on the same counter.
  121. */
  122. int page_counter_limit(struct page_counter *counter, unsigned long limit)
  123. {
  124. for (;;) {
  125. unsigned long old;
  126. long count;
  127. /*
  128. * Update the limit while making sure that it's not
  129. * below the concurrently-changing counter value.
  130. *
  131. * The xchg implies two full memory barriers before
  132. * and after, so the read-swap-read is ordered and
  133. * ensures coherency with page_counter_try_charge():
  134. * that function modifies the count before checking
  135. * the limit, so if it sees the old limit, we see the
  136. * modified counter and retry.
  137. */
  138. count = atomic_long_read(&counter->count);
  139. if (count > limit)
  140. return -EBUSY;
  141. old = xchg(&counter->limit, limit);
  142. if (atomic_long_read(&counter->count) <= count)
  143. return 0;
  144. counter->limit = old;
  145. cond_resched();
  146. }
  147. }
  148. /**
  149. * page_counter_memparse - memparse() for page counter limits
  150. * @buf: string to parse
  151. * @max: string meaning maximum possible value
  152. * @nr_pages: returns the result in number of pages
  153. *
  154. * Returns -EINVAL, or 0 and @nr_pages on success. @nr_pages will be
  155. * limited to %PAGE_COUNTER_MAX.
  156. */
  157. int page_counter_memparse(const char *buf, const char *max,
  158. unsigned long *nr_pages)
  159. {
  160. char *end;
  161. u64 bytes;
  162. if (!strcmp(buf, max)) {
  163. *nr_pages = PAGE_COUNTER_MAX;
  164. return 0;
  165. }
  166. bytes = memparse(buf, &end);
  167. if (*end != '\0')
  168. return -EINVAL;
  169. *nr_pages = min(bytes / PAGE_SIZE, (u64)PAGE_COUNTER_MAX);
  170. return 0;
  171. }