pgtable.h 29 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097
  1. /* SPDX-License-Identifier: GPL-2.0 */
  2. #ifndef _ASM_GENERIC_PGTABLE_H
  3. #define _ASM_GENERIC_PGTABLE_H
  4. #include <linux/pfn.h>
  5. #ifndef __ASSEMBLY__
  6. #ifdef CONFIG_MMU
  7. #include <linux/mm_types.h>
  8. #include <linux/bug.h>
  9. #include <linux/errno.h>
  10. #if 5 - defined(__PAGETABLE_P4D_FOLDED) - defined(__PAGETABLE_PUD_FOLDED) - \
  11. defined(__PAGETABLE_PMD_FOLDED) != CONFIG_PGTABLE_LEVELS
  12. #error CONFIG_PGTABLE_LEVELS is not consistent with __PAGETABLE_{P4D,PUD,PMD}_FOLDED
  13. #endif
  14. /*
  15. * On almost all architectures and configurations, 0 can be used as the
  16. * upper ceiling to free_pgtables(): on many architectures it has the same
  17. * effect as using TASK_SIZE. However, there is one configuration which
  18. * must impose a more careful limit, to avoid freeing kernel pgtables.
  19. */
  20. #ifndef USER_PGTABLES_CEILING
  21. #define USER_PGTABLES_CEILING 0UL
  22. #endif
  23. #ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
  24. extern int ptep_set_access_flags(struct vm_area_struct *vma,
  25. unsigned long address, pte_t *ptep,
  26. pte_t entry, int dirty);
  27. #endif
  28. #ifndef __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
  29. #ifdef CONFIG_TRANSPARENT_HUGEPAGE
  30. extern int pmdp_set_access_flags(struct vm_area_struct *vma,
  31. unsigned long address, pmd_t *pmdp,
  32. pmd_t entry, int dirty);
  33. extern int pudp_set_access_flags(struct vm_area_struct *vma,
  34. unsigned long address, pud_t *pudp,
  35. pud_t entry, int dirty);
  36. #else
  37. static inline int pmdp_set_access_flags(struct vm_area_struct *vma,
  38. unsigned long address, pmd_t *pmdp,
  39. pmd_t entry, int dirty)
  40. {
  41. BUILD_BUG();
  42. return 0;
  43. }
  44. static inline int pudp_set_access_flags(struct vm_area_struct *vma,
  45. unsigned long address, pud_t *pudp,
  46. pud_t entry, int dirty)
  47. {
  48. BUILD_BUG();
  49. return 0;
  50. }
  51. #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
  52. #endif
  53. #ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
  54. static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
  55. unsigned long address,
  56. pte_t *ptep)
  57. {
  58. pte_t pte = *ptep;
  59. int r = 1;
  60. if (!pte_young(pte))
  61. r = 0;
  62. else
  63. set_pte_at(vma->vm_mm, address, ptep, pte_mkold(pte));
  64. return r;
  65. }
  66. #endif
  67. #ifndef __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
  68. #ifdef CONFIG_TRANSPARENT_HUGEPAGE
  69. static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
  70. unsigned long address,
  71. pmd_t *pmdp)
  72. {
  73. pmd_t pmd = *pmdp;
  74. int r = 1;
  75. if (!pmd_young(pmd))
  76. r = 0;
  77. else
  78. set_pmd_at(vma->vm_mm, address, pmdp, pmd_mkold(pmd));
  79. return r;
  80. }
  81. #else
  82. static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
  83. unsigned long address,
  84. pmd_t *pmdp)
  85. {
  86. BUILD_BUG();
  87. return 0;
  88. }
  89. #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
  90. #endif
  91. #ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
  92. int ptep_clear_flush_young(struct vm_area_struct *vma,
  93. unsigned long address, pte_t *ptep);
  94. #endif
  95. #ifndef __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
  96. #ifdef CONFIG_TRANSPARENT_HUGEPAGE
  97. extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
  98. unsigned long address, pmd_t *pmdp);
  99. #else
  100. /*
  101. * Despite relevant to THP only, this API is called from generic rmap code
  102. * under PageTransHuge(), hence needs a dummy implementation for !THP
  103. */
  104. static inline int pmdp_clear_flush_young(struct vm_area_struct *vma,
  105. unsigned long address, pmd_t *pmdp)
  106. {
  107. BUILD_BUG();
  108. return 0;
  109. }
  110. #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
  111. #endif
  112. #ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR
  113. static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
  114. unsigned long address,
  115. pte_t *ptep)
  116. {
  117. pte_t pte = *ptep;
  118. pte_clear(mm, address, ptep);
  119. return pte;
  120. }
  121. #endif
  122. #ifdef CONFIG_TRANSPARENT_HUGEPAGE
  123. #ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
  124. static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
  125. unsigned long address,
  126. pmd_t *pmdp)
  127. {
  128. pmd_t pmd = *pmdp;
  129. pmd_clear(pmdp);
  130. return pmd;
  131. }
  132. #endif /* __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR */
  133. #ifndef __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR
  134. static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm,
  135. unsigned long address,
  136. pud_t *pudp)
  137. {
  138. pud_t pud = *pudp;
  139. pud_clear(pudp);
  140. return pud;
  141. }
  142. #endif /* __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR */
  143. #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
  144. #ifdef CONFIG_TRANSPARENT_HUGEPAGE
  145. #ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL
  146. static inline pmd_t pmdp_huge_get_and_clear_full(struct mm_struct *mm,
  147. unsigned long address, pmd_t *pmdp,
  148. int full)
  149. {
  150. return pmdp_huge_get_and_clear(mm, address, pmdp);
  151. }
  152. #endif
  153. #ifndef __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR_FULL
  154. static inline pud_t pudp_huge_get_and_clear_full(struct mm_struct *mm,
  155. unsigned long address, pud_t *pudp,
  156. int full)
  157. {
  158. return pudp_huge_get_and_clear(mm, address, pudp);
  159. }
  160. #endif
  161. #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
  162. #ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
  163. static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
  164. unsigned long address, pte_t *ptep,
  165. int full)
  166. {
  167. pte_t pte;
  168. pte = ptep_get_and_clear(mm, address, ptep);
  169. return pte;
  170. }
  171. #endif
  172. /*
  173. * Some architectures may be able to avoid expensive synchronization
  174. * primitives when modifications are made to PTE's which are already
  175. * not present, or in the process of an address space destruction.
  176. */
  177. #ifndef __HAVE_ARCH_PTE_CLEAR_NOT_PRESENT_FULL
  178. static inline void pte_clear_not_present_full(struct mm_struct *mm,
  179. unsigned long address,
  180. pte_t *ptep,
  181. int full)
  182. {
  183. pte_clear(mm, address, ptep);
  184. }
  185. #endif
  186. #ifndef __HAVE_ARCH_PTEP_CLEAR_FLUSH
  187. extern pte_t ptep_clear_flush(struct vm_area_struct *vma,
  188. unsigned long address,
  189. pte_t *ptep);
  190. #endif
  191. #ifndef __HAVE_ARCH_PMDP_HUGE_CLEAR_FLUSH
  192. extern pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma,
  193. unsigned long address,
  194. pmd_t *pmdp);
  195. extern pud_t pudp_huge_clear_flush(struct vm_area_struct *vma,
  196. unsigned long address,
  197. pud_t *pudp);
  198. #endif
  199. #ifndef __HAVE_ARCH_PTEP_SET_WRPROTECT
  200. struct mm_struct;
  201. static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep)
  202. {
  203. pte_t old_pte = *ptep;
  204. set_pte_at(mm, address, ptep, pte_wrprotect(old_pte));
  205. }
  206. #endif
  207. #ifndef pte_savedwrite
  208. #define pte_savedwrite pte_write
  209. #endif
  210. #ifndef pte_mk_savedwrite
  211. #define pte_mk_savedwrite pte_mkwrite
  212. #endif
  213. #ifndef pte_clear_savedwrite
  214. #define pte_clear_savedwrite pte_wrprotect
  215. #endif
  216. #ifndef pmd_savedwrite
  217. #define pmd_savedwrite pmd_write
  218. #endif
  219. #ifndef pmd_mk_savedwrite
  220. #define pmd_mk_savedwrite pmd_mkwrite
  221. #endif
  222. #ifndef pmd_clear_savedwrite
  223. #define pmd_clear_savedwrite pmd_wrprotect
  224. #endif
  225. #ifndef __HAVE_ARCH_PMDP_SET_WRPROTECT
  226. #ifdef CONFIG_TRANSPARENT_HUGEPAGE
  227. static inline void pmdp_set_wrprotect(struct mm_struct *mm,
  228. unsigned long address, pmd_t *pmdp)
  229. {
  230. pmd_t old_pmd = *pmdp;
  231. set_pmd_at(mm, address, pmdp, pmd_wrprotect(old_pmd));
  232. }
  233. #else
  234. static inline void pmdp_set_wrprotect(struct mm_struct *mm,
  235. unsigned long address, pmd_t *pmdp)
  236. {
  237. BUILD_BUG();
  238. }
  239. #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
  240. #endif
  241. #ifndef __HAVE_ARCH_PUDP_SET_WRPROTECT
  242. #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
  243. static inline void pudp_set_wrprotect(struct mm_struct *mm,
  244. unsigned long address, pud_t *pudp)
  245. {
  246. pud_t old_pud = *pudp;
  247. set_pud_at(mm, address, pudp, pud_wrprotect(old_pud));
  248. }
  249. #else
  250. static inline void pudp_set_wrprotect(struct mm_struct *mm,
  251. unsigned long address, pud_t *pudp)
  252. {
  253. BUILD_BUG();
  254. }
  255. #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
  256. #endif
  257. #ifndef pmdp_collapse_flush
  258. #ifdef CONFIG_TRANSPARENT_HUGEPAGE
  259. extern pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
  260. unsigned long address, pmd_t *pmdp);
  261. #else
  262. static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
  263. unsigned long address,
  264. pmd_t *pmdp)
  265. {
  266. BUILD_BUG();
  267. return *pmdp;
  268. }
  269. #define pmdp_collapse_flush pmdp_collapse_flush
  270. #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
  271. #endif
  272. #ifndef __HAVE_ARCH_PGTABLE_DEPOSIT
  273. extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
  274. pgtable_t pgtable);
  275. #endif
  276. #ifndef __HAVE_ARCH_PGTABLE_WITHDRAW
  277. extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
  278. #endif
  279. #ifdef CONFIG_TRANSPARENT_HUGEPAGE
  280. /*
  281. * This is an implementation of pmdp_establish() that is only suitable for an
  282. * architecture that doesn't have hardware dirty/accessed bits. In this case we
  283. * can't race with CPU which sets these bits and non-atomic aproach is fine.
  284. */
  285. static inline pmd_t generic_pmdp_establish(struct vm_area_struct *vma,
  286. unsigned long address, pmd_t *pmdp, pmd_t pmd)
  287. {
  288. pmd_t old_pmd = *pmdp;
  289. set_pmd_at(vma->vm_mm, address, pmdp, pmd);
  290. return old_pmd;
  291. }
  292. #endif
  293. #ifndef __HAVE_ARCH_PMDP_INVALIDATE
  294. extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
  295. pmd_t *pmdp);
  296. #endif
  297. #ifndef __HAVE_ARCH_PMDP_HUGE_SPLIT_PREPARE
  298. static inline void pmdp_huge_split_prepare(struct vm_area_struct *vma,
  299. unsigned long address, pmd_t *pmdp)
  300. {
  301. }
  302. #endif
  303. #ifndef __HAVE_ARCH_PTE_SAME
  304. static inline int pte_same(pte_t pte_a, pte_t pte_b)
  305. {
  306. return pte_val(pte_a) == pte_val(pte_b);
  307. }
  308. #endif
  309. #ifndef __HAVE_ARCH_PTE_UNUSED
  310. /*
  311. * Some architectures provide facilities to virtualization guests
  312. * so that they can flag allocated pages as unused. This allows the
  313. * host to transparently reclaim unused pages. This function returns
  314. * whether the pte's page is unused.
  315. */
  316. static inline int pte_unused(pte_t pte)
  317. {
  318. return 0;
  319. }
  320. #endif
  321. #ifndef pte_access_permitted
  322. #define pte_access_permitted(pte, write) \
  323. (pte_present(pte) && (!(write) || pte_write(pte)))
  324. #endif
  325. #ifndef pmd_access_permitted
  326. #define pmd_access_permitted(pmd, write) \
  327. (pmd_present(pmd) && (!(write) || pmd_write(pmd)))
  328. #endif
  329. #ifndef pud_access_permitted
  330. #define pud_access_permitted(pud, write) \
  331. (pud_present(pud) && (!(write) || pud_write(pud)))
  332. #endif
  333. #ifndef p4d_access_permitted
  334. #define p4d_access_permitted(p4d, write) \
  335. (p4d_present(p4d) && (!(write) || p4d_write(p4d)))
  336. #endif
  337. #ifndef pgd_access_permitted
  338. #define pgd_access_permitted(pgd, write) \
  339. (pgd_present(pgd) && (!(write) || pgd_write(pgd)))
  340. #endif
  341. #ifndef __HAVE_ARCH_PMD_SAME
  342. #ifdef CONFIG_TRANSPARENT_HUGEPAGE
  343. static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
  344. {
  345. return pmd_val(pmd_a) == pmd_val(pmd_b);
  346. }
  347. static inline int pud_same(pud_t pud_a, pud_t pud_b)
  348. {
  349. return pud_val(pud_a) == pud_val(pud_b);
  350. }
  351. #else /* CONFIG_TRANSPARENT_HUGEPAGE */
  352. static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
  353. {
  354. BUILD_BUG();
  355. return 0;
  356. }
  357. static inline int pud_same(pud_t pud_a, pud_t pud_b)
  358. {
  359. BUILD_BUG();
  360. return 0;
  361. }
  362. #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
  363. #endif
  364. #ifndef __HAVE_ARCH_PGD_OFFSET_GATE
  365. #define pgd_offset_gate(mm, addr) pgd_offset(mm, addr)
  366. #endif
  367. #ifndef __HAVE_ARCH_MOVE_PTE
  368. #define move_pte(pte, prot, old_addr, new_addr) (pte)
  369. #endif
  370. #ifndef pte_accessible
  371. # define pte_accessible(mm, pte) ((void)(pte), 1)
  372. #endif
  373. #ifndef flush_tlb_fix_spurious_fault
  374. #define flush_tlb_fix_spurious_fault(vma, address) flush_tlb_page(vma, address)
  375. #endif
  376. #ifndef pgprot_noncached
  377. #define pgprot_noncached(prot) (prot)
  378. #endif
  379. #ifndef pgprot_writecombine
  380. #define pgprot_writecombine pgprot_noncached
  381. #endif
  382. #ifndef pgprot_writethrough
  383. #define pgprot_writethrough pgprot_noncached
  384. #endif
  385. #ifndef pgprot_device
  386. #define pgprot_device pgprot_noncached
  387. #endif
  388. #ifndef pgprot_modify
  389. #define pgprot_modify pgprot_modify
  390. static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
  391. {
  392. if (pgprot_val(oldprot) == pgprot_val(pgprot_noncached(oldprot)))
  393. newprot = pgprot_noncached(newprot);
  394. if (pgprot_val(oldprot) == pgprot_val(pgprot_writecombine(oldprot)))
  395. newprot = pgprot_writecombine(newprot);
  396. if (pgprot_val(oldprot) == pgprot_val(pgprot_device(oldprot)))
  397. newprot = pgprot_device(newprot);
  398. return newprot;
  399. }
  400. #endif
  401. /*
  402. * When walking page tables, get the address of the next boundary,
  403. * or the end address of the range if that comes earlier. Although no
  404. * vma end wraps to 0, rounded up __boundary may wrap to 0 throughout.
  405. */
  406. #define pgd_addr_end(addr, end) \
  407. ({ unsigned long __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK; \
  408. (__boundary - 1 < (end) - 1)? __boundary: (end); \
  409. })
  410. #ifndef p4d_addr_end
  411. #define p4d_addr_end(addr, end) \
  412. ({ unsigned long __boundary = ((addr) + P4D_SIZE) & P4D_MASK; \
  413. (__boundary - 1 < (end) - 1)? __boundary: (end); \
  414. })
  415. #endif
  416. #ifndef pud_addr_end
  417. #define pud_addr_end(addr, end) \
  418. ({ unsigned long __boundary = ((addr) + PUD_SIZE) & PUD_MASK; \
  419. (__boundary - 1 < (end) - 1)? __boundary: (end); \
  420. })
  421. #endif
  422. #ifndef pmd_addr_end
  423. #define pmd_addr_end(addr, end) \
  424. ({ unsigned long __boundary = ((addr) + PMD_SIZE) & PMD_MASK; \
  425. (__boundary - 1 < (end) - 1)? __boundary: (end); \
  426. })
  427. #endif
  428. /*
  429. * When walking page tables, we usually want to skip any p?d_none entries;
  430. * and any p?d_bad entries - reporting the error before resetting to none.
  431. * Do the tests inline, but report and clear the bad entry in mm/memory.c.
  432. */
  433. void pgd_clear_bad(pgd_t *);
  434. void p4d_clear_bad(p4d_t *);
  435. void pud_clear_bad(pud_t *);
  436. void pmd_clear_bad(pmd_t *);
  437. static inline int pgd_none_or_clear_bad(pgd_t *pgd)
  438. {
  439. if (pgd_none(*pgd))
  440. return 1;
  441. if (unlikely(pgd_bad(*pgd))) {
  442. pgd_clear_bad(pgd);
  443. return 1;
  444. }
  445. return 0;
  446. }
  447. static inline int p4d_none_or_clear_bad(p4d_t *p4d)
  448. {
  449. if (p4d_none(*p4d))
  450. return 1;
  451. if (unlikely(p4d_bad(*p4d))) {
  452. p4d_clear_bad(p4d);
  453. return 1;
  454. }
  455. return 0;
  456. }
  457. static inline int pud_none_or_clear_bad(pud_t *pud)
  458. {
  459. if (pud_none(*pud))
  460. return 1;
  461. if (unlikely(pud_bad(*pud))) {
  462. pud_clear_bad(pud);
  463. return 1;
  464. }
  465. return 0;
  466. }
  467. static inline int pmd_none_or_clear_bad(pmd_t *pmd)
  468. {
  469. if (pmd_none(*pmd))
  470. return 1;
  471. if (unlikely(pmd_bad(*pmd))) {
  472. pmd_clear_bad(pmd);
  473. return 1;
  474. }
  475. return 0;
  476. }
  477. static inline pte_t __ptep_modify_prot_start(struct mm_struct *mm,
  478. unsigned long addr,
  479. pte_t *ptep)
  480. {
  481. /*
  482. * Get the current pte state, but zero it out to make it
  483. * non-present, preventing the hardware from asynchronously
  484. * updating it.
  485. */
  486. return ptep_get_and_clear(mm, addr, ptep);
  487. }
  488. static inline void __ptep_modify_prot_commit(struct mm_struct *mm,
  489. unsigned long addr,
  490. pte_t *ptep, pte_t pte)
  491. {
  492. /*
  493. * The pte is non-present, so there's no hardware state to
  494. * preserve.
  495. */
  496. set_pte_at(mm, addr, ptep, pte);
  497. }
  498. #ifndef __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
  499. /*
  500. * Start a pte protection read-modify-write transaction, which
  501. * protects against asynchronous hardware modifications to the pte.
  502. * The intention is not to prevent the hardware from making pte
  503. * updates, but to prevent any updates it may make from being lost.
  504. *
  505. * This does not protect against other software modifications of the
  506. * pte; the appropriate pte lock must be held over the transation.
  507. *
  508. * Note that this interface is intended to be batchable, meaning that
  509. * ptep_modify_prot_commit may not actually update the pte, but merely
  510. * queue the update to be done at some later time. The update must be
  511. * actually committed before the pte lock is released, however.
  512. */
  513. static inline pte_t ptep_modify_prot_start(struct mm_struct *mm,
  514. unsigned long addr,
  515. pte_t *ptep)
  516. {
  517. return __ptep_modify_prot_start(mm, addr, ptep);
  518. }
  519. /*
  520. * Commit an update to a pte, leaving any hardware-controlled bits in
  521. * the PTE unmodified.
  522. */
  523. static inline void ptep_modify_prot_commit(struct mm_struct *mm,
  524. unsigned long addr,
  525. pte_t *ptep, pte_t pte)
  526. {
  527. __ptep_modify_prot_commit(mm, addr, ptep, pte);
  528. }
  529. #endif /* __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION */
  530. #endif /* CONFIG_MMU */
  531. /*
  532. * No-op macros that just return the current protection value. Defined here
  533. * because these macros can be used used even if CONFIG_MMU is not defined.
  534. */
  535. #ifndef pgprot_encrypted
  536. #define pgprot_encrypted(prot) (prot)
  537. #endif
  538. #ifndef pgprot_decrypted
  539. #define pgprot_decrypted(prot) (prot)
  540. #endif
  541. /*
  542. * A facility to provide lazy MMU batching. This allows PTE updates and
  543. * page invalidations to be delayed until a call to leave lazy MMU mode
  544. * is issued. Some architectures may benefit from doing this, and it is
  545. * beneficial for both shadow and direct mode hypervisors, which may batch
  546. * the PTE updates which happen during this window. Note that using this
  547. * interface requires that read hazards be removed from the code. A read
  548. * hazard could result in the direct mode hypervisor case, since the actual
  549. * write to the page tables may not yet have taken place, so reads though
  550. * a raw PTE pointer after it has been modified are not guaranteed to be
  551. * up to date. This mode can only be entered and left under the protection of
  552. * the page table locks for all page tables which may be modified. In the UP
  553. * case, this is required so that preemption is disabled, and in the SMP case,
  554. * it must synchronize the delayed page table writes properly on other CPUs.
  555. */
  556. #ifndef __HAVE_ARCH_ENTER_LAZY_MMU_MODE
  557. #define arch_enter_lazy_mmu_mode() do {} while (0)
  558. #define arch_leave_lazy_mmu_mode() do {} while (0)
  559. #define arch_flush_lazy_mmu_mode() do {} while (0)
  560. #endif
  561. /*
  562. * A facility to provide batching of the reload of page tables and
  563. * other process state with the actual context switch code for
  564. * paravirtualized guests. By convention, only one of the batched
  565. * update (lazy) modes (CPU, MMU) should be active at any given time,
  566. * entry should never be nested, and entry and exits should always be
  567. * paired. This is for sanity of maintaining and reasoning about the
  568. * kernel code. In this case, the exit (end of the context switch) is
  569. * in architecture-specific code, and so doesn't need a generic
  570. * definition.
  571. */
  572. #ifndef __HAVE_ARCH_START_CONTEXT_SWITCH
  573. #define arch_start_context_switch(prev) do {} while (0)
  574. #endif
  575. #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
  576. #ifndef CONFIG_ARCH_ENABLE_THP_MIGRATION
  577. static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd)
  578. {
  579. return pmd;
  580. }
  581. static inline int pmd_swp_soft_dirty(pmd_t pmd)
  582. {
  583. return 0;
  584. }
  585. static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd)
  586. {
  587. return pmd;
  588. }
  589. #endif
  590. #else /* !CONFIG_HAVE_ARCH_SOFT_DIRTY */
  591. static inline int pte_soft_dirty(pte_t pte)
  592. {
  593. return 0;
  594. }
  595. static inline int pmd_soft_dirty(pmd_t pmd)
  596. {
  597. return 0;
  598. }
  599. static inline pte_t pte_mksoft_dirty(pte_t pte)
  600. {
  601. return pte;
  602. }
  603. static inline pmd_t pmd_mksoft_dirty(pmd_t pmd)
  604. {
  605. return pmd;
  606. }
  607. static inline pte_t pte_clear_soft_dirty(pte_t pte)
  608. {
  609. return pte;
  610. }
  611. static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd)
  612. {
  613. return pmd;
  614. }
  615. static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
  616. {
  617. return pte;
  618. }
  619. static inline int pte_swp_soft_dirty(pte_t pte)
  620. {
  621. return 0;
  622. }
  623. static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
  624. {
  625. return pte;
  626. }
  627. static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd)
  628. {
  629. return pmd;
  630. }
  631. static inline int pmd_swp_soft_dirty(pmd_t pmd)
  632. {
  633. return 0;
  634. }
  635. static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd)
  636. {
  637. return pmd;
  638. }
  639. #endif
  640. #ifndef __HAVE_PFNMAP_TRACKING
  641. /*
  642. * Interfaces that can be used by architecture code to keep track of
  643. * memory type of pfn mappings specified by the remap_pfn_range,
  644. * vm_insert_pfn.
  645. */
  646. /*
  647. * track_pfn_remap is called when a _new_ pfn mapping is being established
  648. * by remap_pfn_range() for physical range indicated by pfn and size.
  649. */
  650. static inline int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
  651. unsigned long pfn, unsigned long addr,
  652. unsigned long size)
  653. {
  654. return 0;
  655. }
  656. /*
  657. * track_pfn_insert is called when a _new_ single pfn is established
  658. * by vm_insert_pfn().
  659. */
  660. static inline void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
  661. pfn_t pfn)
  662. {
  663. }
  664. /*
  665. * track_pfn_copy is called when vma that is covering the pfnmap gets
  666. * copied through copy_page_range().
  667. */
  668. static inline int track_pfn_copy(struct vm_area_struct *vma)
  669. {
  670. return 0;
  671. }
  672. /*
  673. * untrack_pfn is called while unmapping a pfnmap for a region.
  674. * untrack can be called for a specific region indicated by pfn and size or
  675. * can be for the entire vma (in which case pfn, size are zero).
  676. */
  677. static inline void untrack_pfn(struct vm_area_struct *vma,
  678. unsigned long pfn, unsigned long size)
  679. {
  680. }
  681. /*
  682. * untrack_pfn_moved is called while mremapping a pfnmap for a new region.
  683. */
  684. static inline void untrack_pfn_moved(struct vm_area_struct *vma)
  685. {
  686. }
  687. #else
  688. extern int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
  689. unsigned long pfn, unsigned long addr,
  690. unsigned long size);
  691. extern void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
  692. pfn_t pfn);
  693. extern int track_pfn_copy(struct vm_area_struct *vma);
  694. extern void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
  695. unsigned long size);
  696. extern void untrack_pfn_moved(struct vm_area_struct *vma);
  697. #endif
  698. #ifdef __HAVE_COLOR_ZERO_PAGE
  699. static inline int is_zero_pfn(unsigned long pfn)
  700. {
  701. extern unsigned long zero_pfn;
  702. unsigned long offset_from_zero_pfn = pfn - zero_pfn;
  703. return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT);
  704. }
  705. #define my_zero_pfn(addr) page_to_pfn(ZERO_PAGE(addr))
  706. #else
  707. static inline int is_zero_pfn(unsigned long pfn)
  708. {
  709. extern unsigned long zero_pfn;
  710. return pfn == zero_pfn;
  711. }
  712. static inline unsigned long my_zero_pfn(unsigned long addr)
  713. {
  714. extern unsigned long zero_pfn;
  715. return zero_pfn;
  716. }
  717. #endif
  718. #ifdef CONFIG_MMU
  719. #ifndef CONFIG_TRANSPARENT_HUGEPAGE
  720. static inline int pmd_trans_huge(pmd_t pmd)
  721. {
  722. return 0;
  723. }
  724. #ifndef __HAVE_ARCH_PMD_WRITE
  725. static inline int pmd_write(pmd_t pmd)
  726. {
  727. BUG();
  728. return 0;
  729. }
  730. #endif /* __HAVE_ARCH_PMD_WRITE */
  731. #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
  732. #ifndef pud_write
  733. static inline int pud_write(pud_t pud)
  734. {
  735. BUG();
  736. return 0;
  737. }
  738. #endif /* pud_write */
  739. #if !defined(CONFIG_TRANSPARENT_HUGEPAGE) || \
  740. (defined(CONFIG_TRANSPARENT_HUGEPAGE) && \
  741. !defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD))
  742. static inline int pud_trans_huge(pud_t pud)
  743. {
  744. return 0;
  745. }
  746. #endif
  747. #ifndef pmd_read_atomic
  748. static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
  749. {
  750. /*
  751. * Depend on compiler for an atomic pmd read. NOTE: this is
  752. * only going to work, if the pmdval_t isn't larger than
  753. * an unsigned long.
  754. */
  755. return *pmdp;
  756. }
  757. #endif
  758. #ifndef arch_needs_pgtable_deposit
  759. #define arch_needs_pgtable_deposit() (false)
  760. #endif
  761. /*
  762. * This function is meant to be used by sites walking pagetables with
  763. * the mmap_sem hold in read mode to protect against MADV_DONTNEED and
  764. * transhuge page faults. MADV_DONTNEED can convert a transhuge pmd
  765. * into a null pmd and the transhuge page fault can convert a null pmd
  766. * into an hugepmd or into a regular pmd (if the hugepage allocation
  767. * fails). While holding the mmap_sem in read mode the pmd becomes
  768. * stable and stops changing under us only if it's not null and not a
  769. * transhuge pmd. When those races occurs and this function makes a
  770. * difference vs the standard pmd_none_or_clear_bad, the result is
  771. * undefined so behaving like if the pmd was none is safe (because it
  772. * can return none anyway). The compiler level barrier() is critically
  773. * important to compute the two checks atomically on the same pmdval.
  774. *
  775. * For 32bit kernels with a 64bit large pmd_t this automatically takes
  776. * care of reading the pmd atomically to avoid SMP race conditions
  777. * against pmd_populate() when the mmap_sem is hold for reading by the
  778. * caller (a special atomic read not done by "gcc" as in the generic
  779. * version above, is also needed when THP is disabled because the page
  780. * fault can populate the pmd from under us).
  781. */
  782. static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t *pmd)
  783. {
  784. pmd_t pmdval = pmd_read_atomic(pmd);
  785. /*
  786. * The barrier will stabilize the pmdval in a register or on
  787. * the stack so that it will stop changing under the code.
  788. *
  789. * When CONFIG_TRANSPARENT_HUGEPAGE=y on x86 32bit PAE,
  790. * pmd_read_atomic is allowed to return a not atomic pmdval
  791. * (for example pointing to an hugepage that has never been
  792. * mapped in the pmd). The below checks will only care about
  793. * the low part of the pmd with 32bit PAE x86 anyway, with the
  794. * exception of pmd_none(). So the important thing is that if
  795. * the low part of the pmd is found null, the high part will
  796. * be also null or the pmd_none() check below would be
  797. * confused.
  798. */
  799. #ifdef CONFIG_TRANSPARENT_HUGEPAGE
  800. barrier();
  801. #endif
  802. /*
  803. * !pmd_present() checks for pmd migration entries
  804. *
  805. * The complete check uses is_pmd_migration_entry() in linux/swapops.h
  806. * But using that requires moving current function and pmd_trans_unstable()
  807. * to linux/swapops.h to resovle dependency, which is too much code move.
  808. *
  809. * !pmd_present() is equivalent to is_pmd_migration_entry() currently,
  810. * because !pmd_present() pages can only be under migration not swapped
  811. * out.
  812. *
  813. * pmd_none() is preseved for future condition checks on pmd migration
  814. * entries and not confusing with this function name, although it is
  815. * redundant with !pmd_present().
  816. */
  817. if (pmd_none(pmdval) || pmd_trans_huge(pmdval) ||
  818. (IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION) && !pmd_present(pmdval)))
  819. return 1;
  820. if (unlikely(pmd_bad(pmdval))) {
  821. pmd_clear_bad(pmd);
  822. return 1;
  823. }
  824. return 0;
  825. }
  826. /*
  827. * This is a noop if Transparent Hugepage Support is not built into
  828. * the kernel. Otherwise it is equivalent to
  829. * pmd_none_or_trans_huge_or_clear_bad(), and shall only be called in
  830. * places that already verified the pmd is not none and they want to
  831. * walk ptes while holding the mmap sem in read mode (write mode don't
  832. * need this). If THP is not enabled, the pmd can't go away under the
  833. * code even if MADV_DONTNEED runs, but if THP is enabled we need to
  834. * run a pmd_trans_unstable before walking the ptes after
  835. * split_huge_page_pmd returns (because it may have run when the pmd
  836. * become null, but then a page fault can map in a THP and not a
  837. * regular page).
  838. */
  839. static inline int pmd_trans_unstable(pmd_t *pmd)
  840. {
  841. #ifdef CONFIG_TRANSPARENT_HUGEPAGE
  842. return pmd_none_or_trans_huge_or_clear_bad(pmd);
  843. #else
  844. return 0;
  845. #endif
  846. }
  847. #ifndef CONFIG_NUMA_BALANCING
  848. /*
  849. * Technically a PTE can be PROTNONE even when not doing NUMA balancing but
  850. * the only case the kernel cares is for NUMA balancing and is only ever set
  851. * when the VMA is accessible. For PROT_NONE VMAs, the PTEs are not marked
  852. * _PAGE_PROTNONE so by by default, implement the helper as "always no". It
  853. * is the responsibility of the caller to distinguish between PROT_NONE
  854. * protections and NUMA hinting fault protections.
  855. */
  856. static inline int pte_protnone(pte_t pte)
  857. {
  858. return 0;
  859. }
  860. static inline int pmd_protnone(pmd_t pmd)
  861. {
  862. return 0;
  863. }
  864. #endif /* CONFIG_NUMA_BALANCING */
  865. #endif /* CONFIG_MMU */
  866. #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
  867. #ifndef __PAGETABLE_P4D_FOLDED
  868. int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot);
  869. int p4d_clear_huge(p4d_t *p4d);
  870. #else
  871. static inline int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot)
  872. {
  873. return 0;
  874. }
  875. static inline int p4d_clear_huge(p4d_t *p4d)
  876. {
  877. return 0;
  878. }
  879. #endif /* !__PAGETABLE_P4D_FOLDED */
  880. int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot);
  881. int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot);
  882. int pud_clear_huge(pud_t *pud);
  883. int pmd_clear_huge(pmd_t *pmd);
  884. int pud_free_pmd_page(pud_t *pud, unsigned long addr);
  885. int pmd_free_pte_page(pmd_t *pmd, unsigned long addr);
  886. #else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */
  887. static inline int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot)
  888. {
  889. return 0;
  890. }
  891. static inline int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
  892. {
  893. return 0;
  894. }
  895. static inline int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
  896. {
  897. return 0;
  898. }
  899. static inline int p4d_clear_huge(p4d_t *p4d)
  900. {
  901. return 0;
  902. }
  903. static inline int pud_clear_huge(pud_t *pud)
  904. {
  905. return 0;
  906. }
  907. static inline int pmd_clear_huge(pmd_t *pmd)
  908. {
  909. return 0;
  910. }
  911. static inline int pud_free_pmd_page(pud_t *pud, unsigned long addr)
  912. {
  913. return 0;
  914. }
  915. static inline int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
  916. {
  917. return 0;
  918. }
  919. #endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */
  920. #ifndef __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
  921. #ifdef CONFIG_TRANSPARENT_HUGEPAGE
  922. /*
  923. * ARCHes with special requirements for evicting THP backing TLB entries can
  924. * implement this. Otherwise also, it can help optimize normal TLB flush in
  925. * THP regime. stock flush_tlb_range() typically has optimization to nuke the
  926. * entire TLB TLB if flush span is greater than a threshold, which will
  927. * likely be true for a single huge page. Thus a single thp flush will
  928. * invalidate the entire TLB which is not desitable.
  929. * e.g. see arch/arc: flush_pmd_tlb_range
  930. */
  931. #define flush_pmd_tlb_range(vma, addr, end) flush_tlb_range(vma, addr, end)
  932. #define flush_pud_tlb_range(vma, addr, end) flush_tlb_range(vma, addr, end)
  933. #else
  934. #define flush_pmd_tlb_range(vma, addr, end) BUILD_BUG()
  935. #define flush_pud_tlb_range(vma, addr, end) BUILD_BUG()
  936. #endif
  937. #endif
  938. struct file;
  939. int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
  940. unsigned long size, pgprot_t *vma_prot);
  941. #ifndef CONFIG_X86_ESPFIX64
  942. static inline void init_espfix_bsp(void) { }
  943. #endif
  944. #ifndef __HAVE_ARCH_PFN_MODIFY_ALLOWED
  945. static inline bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot)
  946. {
  947. return true;
  948. }
  949. static inline bool arch_has_pfn_modify_check(void)
  950. {
  951. return false;
  952. }
  953. #endif /* !_HAVE_ARCH_PFN_MODIFY_ALLOWED */
  954. #endif /* !__ASSEMBLY__ */
  955. #ifndef has_transparent_hugepage
  956. #ifdef CONFIG_TRANSPARENT_HUGEPAGE
  957. #define has_transparent_hugepage() 1
  958. #else
  959. #define has_transparent_hugepage() 0
  960. #endif
  961. #endif
  962. /*
  963. * On some architectures it depends on the mm if the p4d/pud or pmd
  964. * layer of the page table hierarchy is folded or not.
  965. */
  966. #ifndef mm_p4d_folded
  967. #define mm_p4d_folded(mm) __is_defined(__PAGETABLE_P4D_FOLDED)
  968. #endif
  969. #ifndef mm_pud_folded
  970. #define mm_pud_folded(mm) __is_defined(__PAGETABLE_PUD_FOLDED)
  971. #endif
  972. #ifndef mm_pmd_folded
  973. #define mm_pmd_folded(mm) __is_defined(__PAGETABLE_PMD_FOLDED)
  974. #endif
  975. #endif /* _ASM_GENERIC_PGTABLE_H */