pgtable.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789
  1. /*
  2. * Copyright IBM Corp. 2007, 2011
  3. * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
  4. */
  5. #include <linux/sched.h>
  6. #include <linux/kernel.h>
  7. #include <linux/errno.h>
  8. #include <linux/gfp.h>
  9. #include <linux/mm.h>
  10. #include <linux/swap.h>
  11. #include <linux/smp.h>
  12. #include <linux/spinlock.h>
  13. #include <linux/rcupdate.h>
  14. #include <linux/slab.h>
  15. #include <linux/swapops.h>
  16. #include <linux/sysctl.h>
  17. #include <linux/ksm.h>
  18. #include <linux/mman.h>
  19. #include <asm/pgtable.h>
  20. #include <asm/pgalloc.h>
  21. #include <asm/tlb.h>
  22. #include <asm/tlbflush.h>
  23. #include <asm/mmu_context.h>
  24. static inline pte_t ptep_flush_direct(struct mm_struct *mm,
  25. unsigned long addr, pte_t *ptep)
  26. {
  27. pte_t old;
  28. old = *ptep;
  29. if (unlikely(pte_val(old) & _PAGE_INVALID))
  30. return old;
  31. atomic_inc(&mm->context.flush_count);
  32. if (MACHINE_HAS_TLB_LC &&
  33. cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
  34. __ptep_ipte(addr, ptep, IPTE_LOCAL);
  35. else
  36. __ptep_ipte(addr, ptep, IPTE_GLOBAL);
  37. atomic_dec(&mm->context.flush_count);
  38. return old;
  39. }
  40. static inline pte_t ptep_flush_lazy(struct mm_struct *mm,
  41. unsigned long addr, pte_t *ptep)
  42. {
  43. pte_t old;
  44. old = *ptep;
  45. if (unlikely(pte_val(old) & _PAGE_INVALID))
  46. return old;
  47. atomic_inc(&mm->context.flush_count);
  48. if (cpumask_equal(&mm->context.cpu_attach_mask,
  49. cpumask_of(smp_processor_id()))) {
  50. pte_val(*ptep) |= _PAGE_INVALID;
  51. mm->context.flush_mm = 1;
  52. } else
  53. __ptep_ipte(addr, ptep, IPTE_GLOBAL);
  54. atomic_dec(&mm->context.flush_count);
  55. return old;
  56. }
  57. static inline pgste_t pgste_get_lock(pte_t *ptep)
  58. {
  59. unsigned long new = 0;
  60. #ifdef CONFIG_PGSTE
  61. unsigned long old;
  62. asm(
  63. " lg %0,%2\n"
  64. "0: lgr %1,%0\n"
  65. " nihh %0,0xff7f\n" /* clear PCL bit in old */
  66. " oihh %1,0x0080\n" /* set PCL bit in new */
  67. " csg %0,%1,%2\n"
  68. " jl 0b\n"
  69. : "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE])
  70. : "Q" (ptep[PTRS_PER_PTE]) : "cc", "memory");
  71. #endif
  72. return __pgste(new);
  73. }
  74. static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste)
  75. {
  76. #ifdef CONFIG_PGSTE
  77. asm(
  78. " nihh %1,0xff7f\n" /* clear PCL bit */
  79. " stg %1,%0\n"
  80. : "=Q" (ptep[PTRS_PER_PTE])
  81. : "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE])
  82. : "cc", "memory");
  83. #endif
  84. }
  85. static inline pgste_t pgste_get(pte_t *ptep)
  86. {
  87. unsigned long pgste = 0;
  88. #ifdef CONFIG_PGSTE
  89. pgste = *(unsigned long *)(ptep + PTRS_PER_PTE);
  90. #endif
  91. return __pgste(pgste);
  92. }
  93. static inline void pgste_set(pte_t *ptep, pgste_t pgste)
  94. {
  95. #ifdef CONFIG_PGSTE
  96. *(pgste_t *)(ptep + PTRS_PER_PTE) = pgste;
  97. #endif
  98. }
  99. static inline pgste_t pgste_update_all(pte_t pte, pgste_t pgste,
  100. struct mm_struct *mm)
  101. {
  102. #ifdef CONFIG_PGSTE
  103. unsigned long address, bits, skey;
  104. if (!mm_use_skey(mm) || pte_val(pte) & _PAGE_INVALID)
  105. return pgste;
  106. address = pte_val(pte) & PAGE_MASK;
  107. skey = (unsigned long) page_get_storage_key(address);
  108. bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
  109. /* Transfer page changed & referenced bit to guest bits in pgste */
  110. pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */
  111. /* Copy page access key and fetch protection bit to pgste */
  112. pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT);
  113. pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
  114. #endif
  115. return pgste;
  116. }
  117. static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry,
  118. struct mm_struct *mm)
  119. {
  120. #ifdef CONFIG_PGSTE
  121. unsigned long address;
  122. unsigned long nkey;
  123. if (!mm_use_skey(mm) || pte_val(entry) & _PAGE_INVALID)
  124. return;
  125. VM_BUG_ON(!(pte_val(*ptep) & _PAGE_INVALID));
  126. address = pte_val(entry) & PAGE_MASK;
  127. /*
  128. * Set page access key and fetch protection bit from pgste.
  129. * The guest C/R information is still in the PGSTE, set real
  130. * key C/R to 0.
  131. */
  132. nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
  133. nkey |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48;
  134. page_set_storage_key(address, nkey, 0);
  135. #endif
  136. }
  137. static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry)
  138. {
  139. #ifdef CONFIG_PGSTE
  140. if ((pte_val(entry) & _PAGE_PRESENT) &&
  141. (pte_val(entry) & _PAGE_WRITE) &&
  142. !(pte_val(entry) & _PAGE_INVALID)) {
  143. if (!MACHINE_HAS_ESOP) {
  144. /*
  145. * Without enhanced suppression-on-protection force
  146. * the dirty bit on for all writable ptes.
  147. */
  148. pte_val(entry) |= _PAGE_DIRTY;
  149. pte_val(entry) &= ~_PAGE_PROTECT;
  150. }
  151. if (!(pte_val(entry) & _PAGE_PROTECT))
  152. /* This pte allows write access, set user-dirty */
  153. pgste_val(pgste) |= PGSTE_UC_BIT;
  154. }
  155. #endif
  156. *ptep = entry;
  157. return pgste;
  158. }
  159. static inline pgste_t pgste_pte_notify(struct mm_struct *mm,
  160. unsigned long addr,
  161. pte_t *ptep, pgste_t pgste)
  162. {
  163. #ifdef CONFIG_PGSTE
  164. unsigned long bits;
  165. bits = pgste_val(pgste) & (PGSTE_IN_BIT | PGSTE_VSIE_BIT);
  166. if (bits) {
  167. pgste_val(pgste) ^= bits;
  168. ptep_notify(mm, addr, ptep, bits);
  169. }
  170. #endif
  171. return pgste;
  172. }
  173. static inline pgste_t ptep_xchg_start(struct mm_struct *mm,
  174. unsigned long addr, pte_t *ptep)
  175. {
  176. pgste_t pgste = __pgste(0);
  177. if (mm_has_pgste(mm)) {
  178. pgste = pgste_get_lock(ptep);
  179. pgste = pgste_pte_notify(mm, addr, ptep, pgste);
  180. }
  181. return pgste;
  182. }
  183. static inline pte_t ptep_xchg_commit(struct mm_struct *mm,
  184. unsigned long addr, pte_t *ptep,
  185. pgste_t pgste, pte_t old, pte_t new)
  186. {
  187. if (mm_has_pgste(mm)) {
  188. if (pte_val(old) & _PAGE_INVALID)
  189. pgste_set_key(ptep, pgste, new, mm);
  190. if (pte_val(new) & _PAGE_INVALID) {
  191. pgste = pgste_update_all(old, pgste, mm);
  192. if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) ==
  193. _PGSTE_GPS_USAGE_UNUSED)
  194. pte_val(old) |= _PAGE_UNUSED;
  195. }
  196. pgste = pgste_set_pte(ptep, pgste, new);
  197. pgste_set_unlock(ptep, pgste);
  198. } else {
  199. *ptep = new;
  200. }
  201. return old;
  202. }
  203. pte_t ptep_xchg_direct(struct mm_struct *mm, unsigned long addr,
  204. pte_t *ptep, pte_t new)
  205. {
  206. pgste_t pgste;
  207. pte_t old;
  208. preempt_disable();
  209. pgste = ptep_xchg_start(mm, addr, ptep);
  210. old = ptep_flush_direct(mm, addr, ptep);
  211. old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new);
  212. preempt_enable();
  213. return old;
  214. }
  215. EXPORT_SYMBOL(ptep_xchg_direct);
  216. pte_t ptep_xchg_lazy(struct mm_struct *mm, unsigned long addr,
  217. pte_t *ptep, pte_t new)
  218. {
  219. pgste_t pgste;
  220. pte_t old;
  221. preempt_disable();
  222. pgste = ptep_xchg_start(mm, addr, ptep);
  223. old = ptep_flush_lazy(mm, addr, ptep);
  224. old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new);
  225. preempt_enable();
  226. return old;
  227. }
  228. EXPORT_SYMBOL(ptep_xchg_lazy);
  229. pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr,
  230. pte_t *ptep)
  231. {
  232. pgste_t pgste;
  233. pte_t old;
  234. preempt_disable();
  235. pgste = ptep_xchg_start(mm, addr, ptep);
  236. old = ptep_flush_lazy(mm, addr, ptep);
  237. if (mm_has_pgste(mm)) {
  238. pgste = pgste_update_all(old, pgste, mm);
  239. pgste_set(ptep, pgste);
  240. }
  241. return old;
  242. }
  243. EXPORT_SYMBOL(ptep_modify_prot_start);
  244. void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
  245. pte_t *ptep, pte_t pte)
  246. {
  247. pgste_t pgste;
  248. if (mm_has_pgste(mm)) {
  249. pgste = pgste_get(ptep);
  250. pgste_set_key(ptep, pgste, pte, mm);
  251. pgste = pgste_set_pte(ptep, pgste, pte);
  252. pgste_set_unlock(ptep, pgste);
  253. } else {
  254. *ptep = pte;
  255. }
  256. preempt_enable();
  257. }
  258. EXPORT_SYMBOL(ptep_modify_prot_commit);
  259. static inline pmd_t pmdp_flush_direct(struct mm_struct *mm,
  260. unsigned long addr, pmd_t *pmdp)
  261. {
  262. pmd_t old;
  263. old = *pmdp;
  264. if (pmd_val(old) & _SEGMENT_ENTRY_INVALID)
  265. return old;
  266. if (!MACHINE_HAS_IDTE) {
  267. __pmdp_csp(pmdp);
  268. return old;
  269. }
  270. atomic_inc(&mm->context.flush_count);
  271. if (MACHINE_HAS_TLB_LC &&
  272. cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
  273. __pmdp_idte(addr, pmdp, IDTE_LOCAL);
  274. else
  275. __pmdp_idte(addr, pmdp, IDTE_GLOBAL);
  276. atomic_dec(&mm->context.flush_count);
  277. return old;
  278. }
  279. static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm,
  280. unsigned long addr, pmd_t *pmdp)
  281. {
  282. pmd_t old;
  283. old = *pmdp;
  284. if (pmd_val(old) & _SEGMENT_ENTRY_INVALID)
  285. return old;
  286. atomic_inc(&mm->context.flush_count);
  287. if (cpumask_equal(&mm->context.cpu_attach_mask,
  288. cpumask_of(smp_processor_id()))) {
  289. pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID;
  290. mm->context.flush_mm = 1;
  291. } else if (MACHINE_HAS_IDTE)
  292. __pmdp_idte(addr, pmdp, IDTE_GLOBAL);
  293. else
  294. __pmdp_csp(pmdp);
  295. atomic_dec(&mm->context.flush_count);
  296. return old;
  297. }
  298. pmd_t pmdp_xchg_direct(struct mm_struct *mm, unsigned long addr,
  299. pmd_t *pmdp, pmd_t new)
  300. {
  301. pmd_t old;
  302. preempt_disable();
  303. old = pmdp_flush_direct(mm, addr, pmdp);
  304. *pmdp = new;
  305. preempt_enable();
  306. return old;
  307. }
  308. EXPORT_SYMBOL(pmdp_xchg_direct);
  309. pmd_t pmdp_xchg_lazy(struct mm_struct *mm, unsigned long addr,
  310. pmd_t *pmdp, pmd_t new)
  311. {
  312. pmd_t old;
  313. preempt_disable();
  314. old = pmdp_flush_lazy(mm, addr, pmdp);
  315. *pmdp = new;
  316. preempt_enable();
  317. return old;
  318. }
  319. EXPORT_SYMBOL(pmdp_xchg_lazy);
  320. static inline pud_t pudp_flush_direct(struct mm_struct *mm,
  321. unsigned long addr, pud_t *pudp)
  322. {
  323. pud_t old;
  324. old = *pudp;
  325. if (pud_val(old) & _REGION_ENTRY_INVALID)
  326. return old;
  327. if (!MACHINE_HAS_IDTE) {
  328. /*
  329. * Invalid bit position is the same for pmd and pud, so we can
  330. * re-use _pmd_csp() here
  331. */
  332. __pmdp_csp((pmd_t *) pudp);
  333. return old;
  334. }
  335. atomic_inc(&mm->context.flush_count);
  336. if (MACHINE_HAS_TLB_LC &&
  337. cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
  338. __pudp_idte(addr, pudp, IDTE_LOCAL);
  339. else
  340. __pudp_idte(addr, pudp, IDTE_GLOBAL);
  341. atomic_dec(&mm->context.flush_count);
  342. return old;
  343. }
  344. pud_t pudp_xchg_direct(struct mm_struct *mm, unsigned long addr,
  345. pud_t *pudp, pud_t new)
  346. {
  347. pud_t old;
  348. preempt_disable();
  349. old = pudp_flush_direct(mm, addr, pudp);
  350. *pudp = new;
  351. preempt_enable();
  352. return old;
  353. }
  354. EXPORT_SYMBOL(pudp_xchg_direct);
  355. #ifdef CONFIG_TRANSPARENT_HUGEPAGE
  356. void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
  357. pgtable_t pgtable)
  358. {
  359. struct list_head *lh = (struct list_head *) pgtable;
  360. assert_spin_locked(pmd_lockptr(mm, pmdp));
  361. /* FIFO */
  362. if (!pmd_huge_pte(mm, pmdp))
  363. INIT_LIST_HEAD(lh);
  364. else
  365. list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp));
  366. pmd_huge_pte(mm, pmdp) = pgtable;
  367. }
  368. pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
  369. {
  370. struct list_head *lh;
  371. pgtable_t pgtable;
  372. pte_t *ptep;
  373. assert_spin_locked(pmd_lockptr(mm, pmdp));
  374. /* FIFO */
  375. pgtable = pmd_huge_pte(mm, pmdp);
  376. lh = (struct list_head *) pgtable;
  377. if (list_empty(lh))
  378. pmd_huge_pte(mm, pmdp) = NULL;
  379. else {
  380. pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next;
  381. list_del(lh);
  382. }
  383. ptep = (pte_t *) pgtable;
  384. pte_val(*ptep) = _PAGE_INVALID;
  385. ptep++;
  386. pte_val(*ptep) = _PAGE_INVALID;
  387. return pgtable;
  388. }
  389. #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
  390. #ifdef CONFIG_PGSTE
  391. void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr,
  392. pte_t *ptep, pte_t entry)
  393. {
  394. pgste_t pgste;
  395. /* the mm_has_pgste() check is done in set_pte_at() */
  396. preempt_disable();
  397. pgste = pgste_get_lock(ptep);
  398. pgste_val(pgste) &= ~_PGSTE_GPS_ZERO;
  399. pgste_set_key(ptep, pgste, entry, mm);
  400. pgste = pgste_set_pte(ptep, pgste, entry);
  401. pgste_set_unlock(ptep, pgste);
  402. preempt_enable();
  403. }
  404. void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
  405. {
  406. pgste_t pgste;
  407. preempt_disable();
  408. pgste = pgste_get_lock(ptep);
  409. pgste_val(pgste) |= PGSTE_IN_BIT;
  410. pgste_set_unlock(ptep, pgste);
  411. preempt_enable();
  412. }
  413. /**
  414. * ptep_force_prot - change access rights of a locked pte
  415. * @mm: pointer to the process mm_struct
  416. * @addr: virtual address in the guest address space
  417. * @ptep: pointer to the page table entry
  418. * @prot: indicates guest access rights: PROT_NONE, PROT_READ or PROT_WRITE
  419. * @bit: pgste bit to set (e.g. for notification)
  420. *
  421. * Returns 0 if the access rights were changed and -EAGAIN if the current
  422. * and requested access rights are incompatible.
  423. */
  424. int ptep_force_prot(struct mm_struct *mm, unsigned long addr,
  425. pte_t *ptep, int prot, unsigned long bit)
  426. {
  427. pte_t entry;
  428. pgste_t pgste;
  429. int pte_i, pte_p;
  430. pgste = pgste_get_lock(ptep);
  431. entry = *ptep;
  432. /* Check pte entry after all locks have been acquired */
  433. pte_i = pte_val(entry) & _PAGE_INVALID;
  434. pte_p = pte_val(entry) & _PAGE_PROTECT;
  435. if ((pte_i && (prot != PROT_NONE)) ||
  436. (pte_p && (prot & PROT_WRITE))) {
  437. pgste_set_unlock(ptep, pgste);
  438. return -EAGAIN;
  439. }
  440. /* Change access rights and set pgste bit */
  441. if (prot == PROT_NONE && !pte_i) {
  442. ptep_flush_direct(mm, addr, ptep);
  443. pgste = pgste_update_all(entry, pgste, mm);
  444. pte_val(entry) |= _PAGE_INVALID;
  445. }
  446. if (prot == PROT_READ && !pte_p) {
  447. ptep_flush_direct(mm, addr, ptep);
  448. pte_val(entry) &= ~_PAGE_INVALID;
  449. pte_val(entry) |= _PAGE_PROTECT;
  450. }
  451. pgste_val(pgste) |= bit;
  452. pgste = pgste_set_pte(ptep, pgste, entry);
  453. pgste_set_unlock(ptep, pgste);
  454. return 0;
  455. }
  456. int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr,
  457. pte_t *sptep, pte_t *tptep, pte_t pte)
  458. {
  459. pgste_t spgste, tpgste;
  460. pte_t spte, tpte;
  461. int rc = -EAGAIN;
  462. if (!(pte_val(*tptep) & _PAGE_INVALID))
  463. return 0; /* already shadowed */
  464. spgste = pgste_get_lock(sptep);
  465. spte = *sptep;
  466. if (!(pte_val(spte) & _PAGE_INVALID) &&
  467. !((pte_val(spte) & _PAGE_PROTECT) &&
  468. !(pte_val(pte) & _PAGE_PROTECT))) {
  469. pgste_val(spgste) |= PGSTE_VSIE_BIT;
  470. tpgste = pgste_get_lock(tptep);
  471. pte_val(tpte) = (pte_val(spte) & PAGE_MASK) |
  472. (pte_val(pte) & _PAGE_PROTECT);
  473. /* don't touch the storage key - it belongs to parent pgste */
  474. tpgste = pgste_set_pte(tptep, tpgste, tpte);
  475. pgste_set_unlock(tptep, tpgste);
  476. rc = 1;
  477. }
  478. pgste_set_unlock(sptep, spgste);
  479. return rc;
  480. }
  481. void ptep_unshadow_pte(struct mm_struct *mm, unsigned long saddr, pte_t *ptep)
  482. {
  483. pgste_t pgste;
  484. pgste = pgste_get_lock(ptep);
  485. /* notifier is called by the caller */
  486. ptep_flush_direct(mm, saddr, ptep);
  487. /* don't touch the storage key - it belongs to parent pgste */
  488. pgste = pgste_set_pte(ptep, pgste, __pte(_PAGE_INVALID));
  489. pgste_set_unlock(ptep, pgste);
  490. }
  491. static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry)
  492. {
  493. if (!non_swap_entry(entry))
  494. dec_mm_counter(mm, MM_SWAPENTS);
  495. else if (is_migration_entry(entry)) {
  496. struct page *page = migration_entry_to_page(entry);
  497. dec_mm_counter(mm, mm_counter(page));
  498. }
  499. free_swap_and_cache(entry);
  500. }
  501. void ptep_zap_unused(struct mm_struct *mm, unsigned long addr,
  502. pte_t *ptep, int reset)
  503. {
  504. unsigned long pgstev;
  505. pgste_t pgste;
  506. pte_t pte;
  507. /* Zap unused and logically-zero pages */
  508. preempt_disable();
  509. pgste = pgste_get_lock(ptep);
  510. pgstev = pgste_val(pgste);
  511. pte = *ptep;
  512. if (!reset && pte_swap(pte) &&
  513. ((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED ||
  514. (pgstev & _PGSTE_GPS_ZERO))) {
  515. ptep_zap_swap_entry(mm, pte_to_swp_entry(pte));
  516. pte_clear(mm, addr, ptep);
  517. }
  518. if (reset)
  519. pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK;
  520. pgste_set_unlock(ptep, pgste);
  521. preempt_enable();
  522. }
  523. void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
  524. {
  525. unsigned long ptev;
  526. pgste_t pgste;
  527. /* Clear storage key */
  528. preempt_disable();
  529. pgste = pgste_get_lock(ptep);
  530. pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT |
  531. PGSTE_GR_BIT | PGSTE_GC_BIT);
  532. ptev = pte_val(*ptep);
  533. if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE))
  534. page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 1);
  535. pgste_set_unlock(ptep, pgste);
  536. preempt_enable();
  537. }
  538. /*
  539. * Test and reset if a guest page is dirty
  540. */
  541. bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr)
  542. {
  543. spinlock_t *ptl;
  544. pgd_t *pgd;
  545. pud_t *pud;
  546. pmd_t *pmd;
  547. pgste_t pgste;
  548. pte_t *ptep;
  549. pte_t pte;
  550. bool dirty;
  551. pgd = pgd_offset(mm, addr);
  552. pud = pud_alloc(mm, pgd, addr);
  553. if (!pud)
  554. return false;
  555. pmd = pmd_alloc(mm, pud, addr);
  556. if (!pmd)
  557. return false;
  558. /* We can't run guests backed by huge pages, but userspace can
  559. * still set them up and then try to migrate them without any
  560. * migration support.
  561. */
  562. if (pmd_large(*pmd))
  563. return true;
  564. ptep = pte_alloc_map_lock(mm, pmd, addr, &ptl);
  565. if (unlikely(!ptep))
  566. return false;
  567. pgste = pgste_get_lock(ptep);
  568. dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT);
  569. pgste_val(pgste) &= ~PGSTE_UC_BIT;
  570. pte = *ptep;
  571. if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {
  572. pgste = pgste_pte_notify(mm, addr, ptep, pgste);
  573. __ptep_ipte(addr, ptep, IPTE_GLOBAL);
  574. if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE))
  575. pte_val(pte) |= _PAGE_PROTECT;
  576. else
  577. pte_val(pte) |= _PAGE_INVALID;
  578. *ptep = pte;
  579. }
  580. pgste_set_unlock(ptep, pgste);
  581. spin_unlock(ptl);
  582. return dirty;
  583. }
  584. EXPORT_SYMBOL_GPL(test_and_clear_guest_dirty);
  585. int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
  586. unsigned char key, bool nq)
  587. {
  588. unsigned long keyul;
  589. spinlock_t *ptl;
  590. pgste_t old, new;
  591. pte_t *ptep;
  592. ptep = get_locked_pte(mm, addr, &ptl);
  593. if (unlikely(!ptep))
  594. return -EFAULT;
  595. new = old = pgste_get_lock(ptep);
  596. pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT |
  597. PGSTE_ACC_BITS | PGSTE_FP_BIT);
  598. keyul = (unsigned long) key;
  599. pgste_val(new) |= (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48;
  600. pgste_val(new) |= (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
  601. if (!(pte_val(*ptep) & _PAGE_INVALID)) {
  602. unsigned long address, bits, skey;
  603. address = pte_val(*ptep) & PAGE_MASK;
  604. skey = (unsigned long) page_get_storage_key(address);
  605. bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
  606. skey = key & (_PAGE_ACC_BITS | _PAGE_FP_BIT);
  607. /* Set storage key ACC and FP */
  608. page_set_storage_key(address, skey, !nq);
  609. /* Merge host changed & referenced into pgste */
  610. pgste_val(new) |= bits << 52;
  611. }
  612. /* changing the guest storage key is considered a change of the page */
  613. if ((pgste_val(new) ^ pgste_val(old)) &
  614. (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT))
  615. pgste_val(new) |= PGSTE_UC_BIT;
  616. pgste_set_unlock(ptep, new);
  617. pte_unmap_unlock(ptep, ptl);
  618. return 0;
  619. }
  620. EXPORT_SYMBOL(set_guest_storage_key);
  621. /**
  622. * Conditionally set a guest storage key (handling csske).
  623. * oldkey will be updated when either mr or mc is set and a pointer is given.
  624. *
  625. * Returns 0 if a guests storage key update wasn't necessary, 1 if the guest
  626. * storage key was updated and -EFAULT on access errors.
  627. */
  628. int cond_set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
  629. unsigned char key, unsigned char *oldkey,
  630. bool nq, bool mr, bool mc)
  631. {
  632. unsigned char tmp, mask = _PAGE_ACC_BITS | _PAGE_FP_BIT;
  633. int rc;
  634. /* we can drop the pgste lock between getting and setting the key */
  635. if (mr | mc) {
  636. rc = get_guest_storage_key(current->mm, addr, &tmp);
  637. if (rc)
  638. return rc;
  639. if (oldkey)
  640. *oldkey = tmp;
  641. if (!mr)
  642. mask |= _PAGE_REFERENCED;
  643. if (!mc)
  644. mask |= _PAGE_CHANGED;
  645. if (!((tmp ^ key) & mask))
  646. return 0;
  647. }
  648. rc = set_guest_storage_key(current->mm, addr, key, nq);
  649. return rc < 0 ? rc : 1;
  650. }
  651. EXPORT_SYMBOL(cond_set_guest_storage_key);
  652. /**
  653. * Reset a guest reference bit (rrbe), returning the reference and changed bit.
  654. *
  655. * Returns < 0 in case of error, otherwise the cc to be reported to the guest.
  656. */
  657. int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr)
  658. {
  659. spinlock_t *ptl;
  660. pgste_t old, new;
  661. pte_t *ptep;
  662. int cc = 0;
  663. ptep = get_locked_pte(mm, addr, &ptl);
  664. if (unlikely(!ptep))
  665. return -EFAULT;
  666. new = old = pgste_get_lock(ptep);
  667. /* Reset guest reference bit only */
  668. pgste_val(new) &= ~PGSTE_GR_BIT;
  669. if (!(pte_val(*ptep) & _PAGE_INVALID)) {
  670. cc = page_reset_referenced(pte_val(*ptep) & PAGE_MASK);
  671. /* Merge real referenced bit into host-set */
  672. pgste_val(new) |= ((unsigned long) cc << 53) & PGSTE_HR_BIT;
  673. }
  674. /* Reflect guest's logical view, not physical */
  675. cc |= (pgste_val(old) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 49;
  676. /* Changing the guest storage key is considered a change of the page */
  677. if ((pgste_val(new) ^ pgste_val(old)) & PGSTE_GR_BIT)
  678. pgste_val(new) |= PGSTE_UC_BIT;
  679. pgste_set_unlock(ptep, new);
  680. pte_unmap_unlock(ptep, ptl);
  681. return 0;
  682. }
  683. EXPORT_SYMBOL(reset_guest_reference_bit);
  684. int get_guest_storage_key(struct mm_struct *mm, unsigned long addr,
  685. unsigned char *key)
  686. {
  687. spinlock_t *ptl;
  688. pgste_t pgste;
  689. pte_t *ptep;
  690. ptep = get_locked_pte(mm, addr, &ptl);
  691. if (unlikely(!ptep))
  692. return -EFAULT;
  693. pgste = pgste_get_lock(ptep);
  694. *key = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
  695. if (!(pte_val(*ptep) & _PAGE_INVALID))
  696. *key = page_get_storage_key(pte_val(*ptep) & PAGE_MASK);
  697. /* Reflect guest's logical view, not physical */
  698. *key |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48;
  699. pgste_set_unlock(ptep, pgste);
  700. pte_unmap_unlock(ptep, ptl);
  701. return 0;
  702. }
  703. EXPORT_SYMBOL(get_guest_storage_key);
  704. #endif