mmu_audit.c 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307
  1. /*
  2. * mmu_audit.c:
  3. *
  4. * Audit code for KVM MMU
  5. *
  6. * Copyright (C) 2006 Qumranet, Inc.
  7. * Copyright 2010 Red Hat, Inc. and/or its affiliates.
  8. *
  9. * Authors:
  10. * Yaniv Kamay <yaniv@qumranet.com>
  11. * Avi Kivity <avi@qumranet.com>
  12. * Marcelo Tosatti <mtosatti@redhat.com>
  13. * Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
  14. *
  15. * This work is licensed under the terms of the GNU GPL, version 2. See
  16. * the COPYING file in the top-level directory.
  17. *
  18. */
  19. #include <linux/ratelimit.h>
  20. char const *audit_point_name[] = {
  21. "pre page fault",
  22. "post page fault",
  23. "pre pte write",
  24. "post pte write",
  25. "pre sync",
  26. "post sync"
  27. };
  28. #define audit_printk(kvm, fmt, args...) \
  29. printk(KERN_ERR "audit: (%s) error: " \
  30. fmt, audit_point_name[kvm->arch.audit_point], ##args)
  31. typedef void (*inspect_spte_fn) (struct kvm_vcpu *vcpu, u64 *sptep, int level);
  32. static void __mmu_spte_walk(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
  33. inspect_spte_fn fn, int level)
  34. {
  35. int i;
  36. for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
  37. u64 *ent = sp->spt;
  38. fn(vcpu, ent + i, level);
  39. if (is_shadow_present_pte(ent[i]) &&
  40. !is_last_spte(ent[i], level)) {
  41. struct kvm_mmu_page *child;
  42. child = page_header(ent[i] & PT64_BASE_ADDR_MASK);
  43. __mmu_spte_walk(vcpu, child, fn, level - 1);
  44. }
  45. }
  46. }
  47. static void mmu_spte_walk(struct kvm_vcpu *vcpu, inspect_spte_fn fn)
  48. {
  49. int i;
  50. struct kvm_mmu_page *sp;
  51. if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
  52. return;
  53. if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) {
  54. hpa_t root = vcpu->arch.mmu.root_hpa;
  55. sp = page_header(root);
  56. __mmu_spte_walk(vcpu, sp, fn, PT64_ROOT_LEVEL);
  57. return;
  58. }
  59. for (i = 0; i < 4; ++i) {
  60. hpa_t root = vcpu->arch.mmu.pae_root[i];
  61. if (root && VALID_PAGE(root)) {
  62. root &= PT64_BASE_ADDR_MASK;
  63. sp = page_header(root);
  64. __mmu_spte_walk(vcpu, sp, fn, 2);
  65. }
  66. }
  67. return;
  68. }
  69. typedef void (*sp_handler) (struct kvm *kvm, struct kvm_mmu_page *sp);
  70. static void walk_all_active_sps(struct kvm *kvm, sp_handler fn)
  71. {
  72. struct kvm_mmu_page *sp;
  73. list_for_each_entry(sp, &kvm->arch.active_mmu_pages, link)
  74. fn(kvm, sp);
  75. }
  76. static void audit_mappings(struct kvm_vcpu *vcpu, u64 *sptep, int level)
  77. {
  78. struct kvm_mmu_page *sp;
  79. gfn_t gfn;
  80. kvm_pfn_t pfn;
  81. hpa_t hpa;
  82. sp = page_header(__pa(sptep));
  83. if (sp->unsync) {
  84. if (level != PT_PAGE_TABLE_LEVEL) {
  85. audit_printk(vcpu->kvm, "unsync sp: %p "
  86. "level = %d\n", sp, level);
  87. return;
  88. }
  89. }
  90. if (!is_shadow_present_pte(*sptep) || !is_last_spte(*sptep, level))
  91. return;
  92. gfn = kvm_mmu_page_get_gfn(sp, sptep - sp->spt);
  93. pfn = kvm_vcpu_gfn_to_pfn_atomic(vcpu, gfn);
  94. if (is_error_pfn(pfn))
  95. return;
  96. hpa = pfn << PAGE_SHIFT;
  97. if ((*sptep & PT64_BASE_ADDR_MASK) != hpa)
  98. audit_printk(vcpu->kvm, "levels %d pfn %llx hpa %llx "
  99. "ent %llxn", vcpu->arch.mmu.root_level, pfn,
  100. hpa, *sptep);
  101. }
  102. static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep)
  103. {
  104. static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10);
  105. struct kvm_rmap_head *rmap_head;
  106. struct kvm_mmu_page *rev_sp;
  107. struct kvm_memslots *slots;
  108. struct kvm_memory_slot *slot;
  109. gfn_t gfn;
  110. rev_sp = page_header(__pa(sptep));
  111. gfn = kvm_mmu_page_get_gfn(rev_sp, sptep - rev_sp->spt);
  112. slots = kvm_memslots_for_spte_role(kvm, rev_sp->role);
  113. slot = __gfn_to_memslot(slots, gfn);
  114. if (!slot) {
  115. if (!__ratelimit(&ratelimit_state))
  116. return;
  117. audit_printk(kvm, "no memslot for gfn %llx\n", gfn);
  118. audit_printk(kvm, "index %ld of sp (gfn=%llx)\n",
  119. (long int)(sptep - rev_sp->spt), rev_sp->gfn);
  120. dump_stack();
  121. return;
  122. }
  123. rmap_head = __gfn_to_rmap(gfn, rev_sp->role.level, slot);
  124. if (!rmap_head->val) {
  125. if (!__ratelimit(&ratelimit_state))
  126. return;
  127. audit_printk(kvm, "no rmap for writable spte %llx\n",
  128. *sptep);
  129. dump_stack();
  130. }
  131. }
  132. static void audit_sptes_have_rmaps(struct kvm_vcpu *vcpu, u64 *sptep, int level)
  133. {
  134. if (is_shadow_present_pte(*sptep) && is_last_spte(*sptep, level))
  135. inspect_spte_has_rmap(vcpu->kvm, sptep);
  136. }
  137. static void audit_spte_after_sync(struct kvm_vcpu *vcpu, u64 *sptep, int level)
  138. {
  139. struct kvm_mmu_page *sp = page_header(__pa(sptep));
  140. if (vcpu->kvm->arch.audit_point == AUDIT_POST_SYNC && sp->unsync)
  141. audit_printk(vcpu->kvm, "meet unsync sp(%p) after sync "
  142. "root.\n", sp);
  143. }
  144. static void check_mappings_rmap(struct kvm *kvm, struct kvm_mmu_page *sp)
  145. {
  146. int i;
  147. if (sp->role.level != PT_PAGE_TABLE_LEVEL)
  148. return;
  149. for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
  150. if (!is_shadow_present_pte(sp->spt[i]))
  151. continue;
  152. inspect_spte_has_rmap(kvm, sp->spt + i);
  153. }
  154. }
  155. static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp)
  156. {
  157. struct kvm_rmap_head *rmap_head;
  158. u64 *sptep;
  159. struct rmap_iterator iter;
  160. struct kvm_memslots *slots;
  161. struct kvm_memory_slot *slot;
  162. if (sp->role.direct || sp->unsync || sp->role.invalid)
  163. return;
  164. slots = kvm_memslots_for_spte_role(kvm, sp->role);
  165. slot = __gfn_to_memslot(slots, sp->gfn);
  166. rmap_head = __gfn_to_rmap(sp->gfn, PT_PAGE_TABLE_LEVEL, slot);
  167. for_each_rmap_spte(rmap_head, &iter, sptep) {
  168. if (is_writable_pte(*sptep))
  169. audit_printk(kvm, "shadow page has writable "
  170. "mappings: gfn %llx role %x\n",
  171. sp->gfn, sp->role.word);
  172. }
  173. }
  174. static void audit_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
  175. {
  176. check_mappings_rmap(kvm, sp);
  177. audit_write_protection(kvm, sp);
  178. }
  179. static void audit_all_active_sps(struct kvm *kvm)
  180. {
  181. walk_all_active_sps(kvm, audit_sp);
  182. }
  183. static void audit_spte(struct kvm_vcpu *vcpu, u64 *sptep, int level)
  184. {
  185. audit_sptes_have_rmaps(vcpu, sptep, level);
  186. audit_mappings(vcpu, sptep, level);
  187. audit_spte_after_sync(vcpu, sptep, level);
  188. }
  189. static void audit_vcpu_spte(struct kvm_vcpu *vcpu)
  190. {
  191. mmu_spte_walk(vcpu, audit_spte);
  192. }
  193. static bool mmu_audit;
  194. static struct static_key mmu_audit_key;
  195. static void __kvm_mmu_audit(struct kvm_vcpu *vcpu, int point)
  196. {
  197. static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10);
  198. if (!__ratelimit(&ratelimit_state))
  199. return;
  200. vcpu->kvm->arch.audit_point = point;
  201. audit_all_active_sps(vcpu->kvm);
  202. audit_vcpu_spte(vcpu);
  203. }
  204. static inline void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point)
  205. {
  206. if (static_key_false((&mmu_audit_key)))
  207. __kvm_mmu_audit(vcpu, point);
  208. }
  209. static void mmu_audit_enable(void)
  210. {
  211. if (mmu_audit)
  212. return;
  213. static_key_slow_inc(&mmu_audit_key);
  214. mmu_audit = true;
  215. }
  216. static void mmu_audit_disable(void)
  217. {
  218. if (!mmu_audit)
  219. return;
  220. static_key_slow_dec(&mmu_audit_key);
  221. mmu_audit = false;
  222. }
  223. static int mmu_audit_set(const char *val, const struct kernel_param *kp)
  224. {
  225. int ret;
  226. unsigned long enable;
  227. ret = kstrtoul(val, 10, &enable);
  228. if (ret < 0)
  229. return -EINVAL;
  230. switch (enable) {
  231. case 0:
  232. mmu_audit_disable();
  233. break;
  234. case 1:
  235. mmu_audit_enable();
  236. break;
  237. default:
  238. return -EINVAL;
  239. }
  240. return 0;
  241. }
  242. static const struct kernel_param_ops audit_param_ops = {
  243. .set = mmu_audit_set,
  244. .get = param_get_bool,
  245. };
  246. arch_param_cb(mmu_audit, &audit_param_ops, &mmu_audit, 0644);