123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355 |
- /*
- * Page table allocation functions
- *
- * Copyright IBM Corp. 2016
- * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
- */
- #include <linux/mm.h>
- #include <linux/sysctl.h>
- #include <asm/mmu_context.h>
- #include <asm/pgalloc.h>
- #include <asm/gmap.h>
- #include <asm/tlb.h>
- #include <asm/tlbflush.h>
- #ifdef CONFIG_PGSTE
- static int page_table_allocate_pgste_min = 0;
- static int page_table_allocate_pgste_max = 1;
- int page_table_allocate_pgste = 0;
- EXPORT_SYMBOL(page_table_allocate_pgste);
- static struct ctl_table page_table_sysctl[] = {
- {
- .procname = "allocate_pgste",
- .data = &page_table_allocate_pgste,
- .maxlen = sizeof(int),
- .mode = S_IRUGO | S_IWUSR,
- .proc_handler = proc_dointvec,
- .extra1 = &page_table_allocate_pgste_min,
- .extra2 = &page_table_allocate_pgste_max,
- },
- { }
- };
- static struct ctl_table page_table_sysctl_dir[] = {
- {
- .procname = "vm",
- .maxlen = 0,
- .mode = 0555,
- .child = page_table_sysctl,
- },
- { }
- };
- static int __init page_table_register_sysctl(void)
- {
- return register_sysctl_table(page_table_sysctl_dir) ? 0 : -ENOMEM;
- }
- __initcall(page_table_register_sysctl);
- #endif /* CONFIG_PGSTE */
- unsigned long *crst_table_alloc(struct mm_struct *mm)
- {
- struct page *page = alloc_pages(GFP_KERNEL, 2);
- if (!page)
- return NULL;
- return (unsigned long *) page_to_phys(page);
- }
- void crst_table_free(struct mm_struct *mm, unsigned long *table)
- {
- free_pages((unsigned long) table, 2);
- }
- static void __crst_table_upgrade(void *arg)
- {
- struct mm_struct *mm = arg;
- if (current->active_mm == mm) {
- clear_user_asce();
- set_user_asce(mm);
- }
- __tlb_flush_local();
- }
- int crst_table_upgrade(struct mm_struct *mm)
- {
- unsigned long *table, *pgd;
- /* upgrade should only happen from 3 to 4 levels */
- BUG_ON(mm->context.asce_limit != (1UL << 42));
- table = crst_table_alloc(mm);
- if (!table)
- return -ENOMEM;
- spin_lock_bh(&mm->page_table_lock);
- pgd = (unsigned long *) mm->pgd;
- crst_table_init(table, _REGION2_ENTRY_EMPTY);
- pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd);
- mm->pgd = (pgd_t *) table;
- mm->context.asce_limit = 1UL << 53;
- mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
- _ASCE_USER_BITS | _ASCE_TYPE_REGION2;
- mm->task_size = mm->context.asce_limit;
- spin_unlock_bh(&mm->page_table_lock);
- on_each_cpu(__crst_table_upgrade, mm, 0);
- return 0;
- }
- void crst_table_downgrade(struct mm_struct *mm)
- {
- pgd_t *pgd;
- /* downgrade should only happen from 3 to 2 levels (compat only) */
- BUG_ON(mm->context.asce_limit != (1UL << 42));
- if (current->active_mm == mm) {
- clear_user_asce();
- __tlb_flush_mm(mm);
- }
- pgd = mm->pgd;
- mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN);
- mm->context.asce_limit = 1UL << 31;
- mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
- _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT;
- mm->task_size = mm->context.asce_limit;
- crst_table_free(mm, (unsigned long *) pgd);
- if (current->active_mm == mm)
- set_user_asce(mm);
- }
- static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
- {
- unsigned int old, new;
- do {
- old = atomic_read(v);
- new = old ^ bits;
- } while (atomic_cmpxchg(v, old, new) != old);
- return new;
- }
- #ifdef CONFIG_PGSTE
- struct page *page_table_alloc_pgste(struct mm_struct *mm)
- {
- struct page *page;
- unsigned long *table;
- page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
- if (page) {
- table = (unsigned long *) page_to_phys(page);
- clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
- clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2);
- }
- return page;
- }
- void page_table_free_pgste(struct page *page)
- {
- __free_page(page);
- }
- #endif /* CONFIG_PGSTE */
- /*
- * page table entry allocation/free routines.
- */
- unsigned long *page_table_alloc(struct mm_struct *mm)
- {
- unsigned long *table;
- struct page *page;
- unsigned int mask, bit;
- /* Try to get a fragment of a 4K page as a 2K page table */
- if (!mm_alloc_pgste(mm)) {
- table = NULL;
- spin_lock_bh(&mm->context.pgtable_lock);
- if (!list_empty(&mm->context.pgtable_list)) {
- page = list_first_entry(&mm->context.pgtable_list,
- struct page, lru);
- mask = atomic_read(&page->_mapcount);
- mask = (mask | (mask >> 4)) & 3;
- if (mask != 3) {
- table = (unsigned long *) page_to_phys(page);
- bit = mask & 1; /* =1 -> second 2K */
- if (bit)
- table += PTRS_PER_PTE;
- atomic_xor_bits(&page->_mapcount, 1U << bit);
- list_del(&page->lru);
- }
- }
- spin_unlock_bh(&mm->context.pgtable_lock);
- if (table)
- return table;
- }
- /* Allocate a fresh page */
- page = alloc_page(GFP_KERNEL);
- if (!page)
- return NULL;
- if (!pgtable_page_ctor(page)) {
- __free_page(page);
- return NULL;
- }
- /* Initialize page table */
- table = (unsigned long *) page_to_phys(page);
- if (mm_alloc_pgste(mm)) {
- /* Return 4K page table with PGSTEs */
- atomic_set(&page->_mapcount, 3);
- clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
- clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2);
- } else {
- /* Return the first 2K fragment of the page */
- atomic_set(&page->_mapcount, 1);
- clear_table(table, _PAGE_INVALID, PAGE_SIZE);
- spin_lock_bh(&mm->context.pgtable_lock);
- list_add(&page->lru, &mm->context.pgtable_list);
- spin_unlock_bh(&mm->context.pgtable_lock);
- }
- return table;
- }
- void page_table_free(struct mm_struct *mm, unsigned long *table)
- {
- struct page *page;
- unsigned int bit, mask;
- page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
- if (!mm_alloc_pgste(mm)) {
- /* Free 2K page table fragment of a 4K page */
- bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t));
- spin_lock_bh(&mm->context.pgtable_lock);
- mask = atomic_xor_bits(&page->_mapcount, 1U << bit);
- if (mask & 3)
- list_add(&page->lru, &mm->context.pgtable_list);
- else
- list_del(&page->lru);
- spin_unlock_bh(&mm->context.pgtable_lock);
- if (mask != 0)
- return;
- }
- pgtable_page_dtor(page);
- atomic_set(&page->_mapcount, -1);
- __free_page(page);
- }
- void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
- unsigned long vmaddr)
- {
- struct mm_struct *mm;
- struct page *page;
- unsigned int bit, mask;
- mm = tlb->mm;
- page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
- if (mm_alloc_pgste(mm)) {
- gmap_unlink(mm, table, vmaddr);
- table = (unsigned long *) (__pa(table) | 3);
- tlb_remove_table(tlb, table);
- return;
- }
- bit = (__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t));
- spin_lock_bh(&mm->context.pgtable_lock);
- mask = atomic_xor_bits(&page->_mapcount, 0x11U << bit);
- if (mask & 3)
- list_add_tail(&page->lru, &mm->context.pgtable_list);
- else
- list_del(&page->lru);
- spin_unlock_bh(&mm->context.pgtable_lock);
- table = (unsigned long *) (__pa(table) | (1U << bit));
- tlb_remove_table(tlb, table);
- }
- static void __tlb_remove_table(void *_table)
- {
- unsigned int mask = (unsigned long) _table & 3;
- void *table = (void *)((unsigned long) _table ^ mask);
- struct page *page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
- switch (mask) {
- case 0: /* pmd or pud */
- free_pages((unsigned long) table, 2);
- break;
- case 1: /* lower 2K of a 4K page table */
- case 2: /* higher 2K of a 4K page table */
- if (atomic_xor_bits(&page->_mapcount, mask << 4) != 0)
- break;
- /* fallthrough */
- case 3: /* 4K page table with pgstes */
- pgtable_page_dtor(page);
- atomic_set(&page->_mapcount, -1);
- __free_page(page);
- break;
- }
- }
- static void tlb_remove_table_smp_sync(void *arg)
- {
- /* Simply deliver the interrupt */
- }
- static void tlb_remove_table_one(void *table)
- {
- /*
- * This isn't an RCU grace period and hence the page-tables cannot be
- * assumed to be actually RCU-freed.
- *
- * It is however sufficient for software page-table walkers that rely
- * on IRQ disabling. See the comment near struct mmu_table_batch.
- */
- smp_call_function(tlb_remove_table_smp_sync, NULL, 1);
- __tlb_remove_table(table);
- }
- static void tlb_remove_table_rcu(struct rcu_head *head)
- {
- struct mmu_table_batch *batch;
- int i;
- batch = container_of(head, struct mmu_table_batch, rcu);
- for (i = 0; i < batch->nr; i++)
- __tlb_remove_table(batch->tables[i]);
- free_page((unsigned long)batch);
- }
- void tlb_table_flush(struct mmu_gather *tlb)
- {
- struct mmu_table_batch **batch = &tlb->batch;
- if (*batch) {
- call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu);
- *batch = NULL;
- }
- }
- void tlb_remove_table(struct mmu_gather *tlb, void *table)
- {
- struct mmu_table_batch **batch = &tlb->batch;
- tlb->mm->context.flush_mm = 1;
- if (*batch == NULL) {
- *batch = (struct mmu_table_batch *)
- __get_free_page(GFP_NOWAIT | __GFP_NOWARN);
- if (*batch == NULL) {
- __tlb_flush_mm_lazy(tlb->mm);
- tlb_remove_table_one(table);
- return;
- }
- (*batch)->nr = 0;
- }
- (*batch)->tables[(*batch)->nr++] = table;
- if ((*batch)->nr == MAX_TABLE_BATCH)
- tlb_flush_mmu(tlb);
- }
|