pagewalk.c 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249
  1. #include <linux/mm.h>
  2. #include <linux/highmem.h>
  3. #include <linux/sched.h>
  4. #include <linux/hugetlb.h>
  5. static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
  6. struct mm_walk *walk)
  7. {
  8. pte_t *pte;
  9. int err = 0;
  10. pte = pte_offset_map(pmd, addr);
  11. for (;;) {
  12. err = walk->pte_entry(pte, addr, addr + PAGE_SIZE, walk);
  13. if (err)
  14. break;
  15. if (addr >= end - PAGE_SIZE)
  16. break;
  17. addr += PAGE_SIZE;
  18. pte++;
  19. }
  20. pte_unmap(pte);
  21. return err;
  22. }
  23. static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
  24. struct mm_walk *walk)
  25. {
  26. pmd_t *pmd;
  27. unsigned long next;
  28. int err = 0;
  29. pmd = pmd_offset(pud, addr);
  30. do {
  31. again:
  32. next = pmd_addr_end(addr, end);
  33. if (pmd_none(*pmd)) {
  34. if (walk->pte_hole)
  35. err = walk->pte_hole(addr, next, walk);
  36. if (err)
  37. break;
  38. continue;
  39. }
  40. /*
  41. * This implies that each ->pmd_entry() handler
  42. * needs to know about pmd_trans_huge() pmds
  43. */
  44. if (walk->pmd_entry)
  45. err = walk->pmd_entry(pmd, addr, next, walk);
  46. if (err)
  47. break;
  48. /*
  49. * Check this here so we only break down trans_huge
  50. * pages when we _need_ to
  51. */
  52. if (!walk->pte_entry)
  53. continue;
  54. split_huge_page_pmd(walk->mm, pmd);
  55. if (pmd_none_or_trans_huge_or_clear_bad(pmd))
  56. goto again;
  57. err = walk_pte_range(pmd, addr, next, walk);
  58. if (err)
  59. break;
  60. } while (pmd++, addr = next, addr != end);
  61. return err;
  62. }
  63. static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end,
  64. struct mm_walk *walk)
  65. {
  66. pud_t *pud;
  67. unsigned long next;
  68. int err = 0;
  69. pud = pud_offset(pgd, addr);
  70. do {
  71. next = pud_addr_end(addr, end);
  72. if (pud_none_or_clear_bad(pud)) {
  73. if (walk->pte_hole)
  74. err = walk->pte_hole(addr, next, walk);
  75. if (err)
  76. break;
  77. continue;
  78. }
  79. if (walk->pud_entry)
  80. err = walk->pud_entry(pud, addr, next, walk);
  81. if (!err && (walk->pmd_entry || walk->pte_entry))
  82. err = walk_pmd_range(pud, addr, next, walk);
  83. if (err)
  84. break;
  85. } while (pud++, addr = next, addr != end);
  86. return err;
  87. }
  88. #ifdef CONFIG_HUGETLB_PAGE
  89. static unsigned long hugetlb_entry_end(struct hstate *h, unsigned long addr,
  90. unsigned long end)
  91. {
  92. unsigned long boundary = (addr & huge_page_mask(h)) + huge_page_size(h);
  93. return boundary < end ? boundary : end;
  94. }
  95. static int walk_hugetlb_range(struct vm_area_struct *vma,
  96. unsigned long addr, unsigned long end,
  97. struct mm_walk *walk)
  98. {
  99. struct hstate *h = hstate_vma(vma);
  100. unsigned long next;
  101. unsigned long hmask = huge_page_mask(h);
  102. pte_t *pte;
  103. int err = 0;
  104. do {
  105. next = hugetlb_entry_end(h, addr, end);
  106. pte = huge_pte_offset(walk->mm, addr & hmask);
  107. if (pte && walk->hugetlb_entry)
  108. err = walk->hugetlb_entry(pte, hmask, addr, next, walk);
  109. if (err)
  110. return err;
  111. } while (addr = next, addr != end);
  112. return 0;
  113. }
  114. #else /* CONFIG_HUGETLB_PAGE */
  115. static int walk_hugetlb_range(struct vm_area_struct *vma,
  116. unsigned long addr, unsigned long end,
  117. struct mm_walk *walk)
  118. {
  119. return 0;
  120. }
  121. #endif /* CONFIG_HUGETLB_PAGE */
  122. /**
  123. * walk_page_range - walk a memory map's page tables with a callback
  124. * @addr: starting address
  125. * @end: ending address
  126. * @walk: set of callbacks to invoke for each level of the tree
  127. *
  128. * Recursively walk the page table for the memory area in a VMA,
  129. * calling supplied callbacks. Callbacks are called in-order (first
  130. * PGD, first PUD, first PMD, first PTE, second PTE... second PMD,
  131. * etc.). If lower-level callbacks are omitted, walking depth is reduced.
  132. *
  133. * Each callback receives an entry pointer and the start and end of the
  134. * associated range, and a copy of the original mm_walk for access to
  135. * the ->private or ->mm fields.
  136. *
  137. * Usually no locks are taken, but splitting transparent huge page may
  138. * take page table lock. And the bottom level iterator will map PTE
  139. * directories from highmem if necessary.
  140. *
  141. * If any callback returns a non-zero value, the walk is aborted and
  142. * the return value is propagated back to the caller. Otherwise 0 is returned.
  143. *
  144. * walk->mm->mmap_sem must be held for at least read if walk->hugetlb_entry
  145. * is !NULL.
  146. */
  147. int walk_page_range(unsigned long addr, unsigned long end,
  148. struct mm_walk *walk)
  149. {
  150. pgd_t *pgd;
  151. unsigned long next;
  152. int err = 0;
  153. if (addr >= end)
  154. return err;
  155. if (!walk->mm)
  156. return -EINVAL;
  157. VM_BUG_ON(!rwsem_is_locked(&walk->mm->mmap_sem));
  158. pgd = pgd_offset(walk->mm, addr);
  159. do {
  160. struct vm_area_struct *vma = NULL;
  161. next = pgd_addr_end(addr, end);
  162. /*
  163. * This function was not intended to be vma based.
  164. * But there are vma special cases to be handled:
  165. * - hugetlb vma's
  166. * - VM_PFNMAP vma's
  167. */
  168. vma = find_vma(walk->mm, addr);
  169. if (vma) {
  170. /*
  171. * There are no page structures backing a VM_PFNMAP
  172. * range, so do not allow split_huge_page_pmd().
  173. */
  174. if ((vma->vm_start <= addr) &&
  175. (vma->vm_flags & VM_PFNMAP)) {
  176. next = vma->vm_end;
  177. pgd = pgd_offset(walk->mm, next);
  178. continue;
  179. }
  180. /*
  181. * Handle hugetlb vma individually because pagetable
  182. * walk for the hugetlb page is dependent on the
  183. * architecture and we can't handled it in the same
  184. * manner as non-huge pages.
  185. */
  186. if (walk->hugetlb_entry && (vma->vm_start <= addr) &&
  187. is_vm_hugetlb_page(vma)) {
  188. if (vma->vm_end < next)
  189. next = vma->vm_end;
  190. /*
  191. * Hugepage is very tightly coupled with vma,
  192. * so walk through hugetlb entries within a
  193. * given vma.
  194. */
  195. err = walk_hugetlb_range(vma, addr, next, walk);
  196. if (err)
  197. break;
  198. pgd = pgd_offset(walk->mm, next);
  199. continue;
  200. }
  201. }
  202. if (pgd_none_or_clear_bad(pgd)) {
  203. if (walk->pte_hole)
  204. err = walk->pte_hole(addr, next, walk);
  205. if (err)
  206. break;
  207. pgd++;
  208. continue;
  209. }
  210. if (walk->pgd_entry)
  211. err = walk->pgd_entry(pgd, addr, next, walk);
  212. if (!err &&
  213. (walk->pud_entry || walk->pmd_entry || walk->pte_entry))
  214. err = walk_pud_range(pgd, addr, next, walk);
  215. if (err)
  216. break;
  217. pgd++;
  218. } while (addr = next, addr != end);
  219. return err;
  220. }