mce-inject.c 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253
  1. /*
  2. * Machine check injection support.
  3. * Copyright 2008 Intel Corporation.
  4. *
  5. * This program is free software; you can redistribute it and/or
  6. * modify it under the terms of the GNU General Public License
  7. * as published by the Free Software Foundation; version 2
  8. * of the License.
  9. *
  10. * Authors:
  11. * Andi Kleen
  12. * Ying Huang
  13. */
  14. #include <linux/uaccess.h>
  15. #include <linux/module.h>
  16. #include <linux/timer.h>
  17. #include <linux/kernel.h>
  18. #include <linux/string.h>
  19. #include <linux/fs.h>
  20. #include <linux/preempt.h>
  21. #include <linux/smp.h>
  22. #include <linux/notifier.h>
  23. #include <linux/kdebug.h>
  24. #include <linux/cpu.h>
  25. #include <linux/sched.h>
  26. #include <linux/gfp.h>
  27. #include <asm/mce.h>
  28. #include <asm/apic.h>
  29. #include <asm/nmi.h>
  30. /* Update fake mce registers on current CPU. */
  31. static void inject_mce(struct mce *m)
  32. {
  33. struct mce *i = &per_cpu(injectm, m->extcpu);
  34. /* Make sure no one reads partially written injectm */
  35. i->finished = 0;
  36. mb();
  37. m->finished = 0;
  38. /* First set the fields after finished */
  39. i->extcpu = m->extcpu;
  40. mb();
  41. /* Now write record in order, finished last (except above) */
  42. memcpy(i, m, sizeof(struct mce));
  43. /* Finally activate it */
  44. mb();
  45. i->finished = 1;
  46. }
  47. static void raise_poll(struct mce *m)
  48. {
  49. unsigned long flags;
  50. mce_banks_t b;
  51. memset(&b, 0xff, sizeof(mce_banks_t));
  52. local_irq_save(flags);
  53. machine_check_poll(0, &b);
  54. local_irq_restore(flags);
  55. m->finished = 0;
  56. }
  57. static void raise_exception(struct mce *m, struct pt_regs *pregs)
  58. {
  59. struct pt_regs regs;
  60. unsigned long flags;
  61. if (!pregs) {
  62. memset(&regs, 0, sizeof(struct pt_regs));
  63. regs.ip = m->ip;
  64. regs.cs = m->cs;
  65. pregs = &regs;
  66. }
  67. /* in mcheck exeception handler, irq will be disabled */
  68. local_irq_save(flags);
  69. do_machine_check(pregs, 0);
  70. local_irq_restore(flags);
  71. m->finished = 0;
  72. }
  73. static cpumask_var_t mce_inject_cpumask;
  74. static DEFINE_MUTEX(mce_inject_mutex);
  75. static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs)
  76. {
  77. int cpu = smp_processor_id();
  78. struct mce *m = this_cpu_ptr(&injectm);
  79. if (!cpumask_test_cpu(cpu, mce_inject_cpumask))
  80. return NMI_DONE;
  81. cpumask_clear_cpu(cpu, mce_inject_cpumask);
  82. if (m->inject_flags & MCJ_EXCEPTION)
  83. raise_exception(m, regs);
  84. else if (m->status)
  85. raise_poll(m);
  86. return NMI_HANDLED;
  87. }
  88. static void mce_irq_ipi(void *info)
  89. {
  90. int cpu = smp_processor_id();
  91. struct mce *m = this_cpu_ptr(&injectm);
  92. if (cpumask_test_cpu(cpu, mce_inject_cpumask) &&
  93. m->inject_flags & MCJ_EXCEPTION) {
  94. cpumask_clear_cpu(cpu, mce_inject_cpumask);
  95. raise_exception(m, NULL);
  96. }
  97. }
  98. /* Inject mce on current CPU */
  99. static int raise_local(void)
  100. {
  101. struct mce *m = this_cpu_ptr(&injectm);
  102. int context = MCJ_CTX(m->inject_flags);
  103. int ret = 0;
  104. int cpu = m->extcpu;
  105. if (m->inject_flags & MCJ_EXCEPTION) {
  106. pr_info("Triggering MCE exception on CPU %d\n", cpu);
  107. switch (context) {
  108. case MCJ_CTX_IRQ:
  109. /*
  110. * Could do more to fake interrupts like
  111. * calling irq_enter, but the necessary
  112. * machinery isn't exported currently.
  113. */
  114. /*FALL THROUGH*/
  115. case MCJ_CTX_PROCESS:
  116. raise_exception(m, NULL);
  117. break;
  118. default:
  119. pr_info("Invalid MCE context\n");
  120. ret = -EINVAL;
  121. }
  122. pr_info("MCE exception done on CPU %d\n", cpu);
  123. } else if (m->status) {
  124. pr_info("Starting machine check poll CPU %d\n", cpu);
  125. raise_poll(m);
  126. mce_notify_irq();
  127. pr_info("Machine check poll done on CPU %d\n", cpu);
  128. } else
  129. m->finished = 0;
  130. return ret;
  131. }
  132. static void raise_mce(struct mce *m)
  133. {
  134. int context = MCJ_CTX(m->inject_flags);
  135. inject_mce(m);
  136. if (context == MCJ_CTX_RANDOM)
  137. return;
  138. if (m->inject_flags & (MCJ_IRQ_BROADCAST | MCJ_NMI_BROADCAST)) {
  139. unsigned long start;
  140. int cpu;
  141. get_online_cpus();
  142. cpumask_copy(mce_inject_cpumask, cpu_online_mask);
  143. cpumask_clear_cpu(get_cpu(), mce_inject_cpumask);
  144. for_each_online_cpu(cpu) {
  145. struct mce *mcpu = &per_cpu(injectm, cpu);
  146. if (!mcpu->finished ||
  147. MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM)
  148. cpumask_clear_cpu(cpu, mce_inject_cpumask);
  149. }
  150. if (!cpumask_empty(mce_inject_cpumask)) {
  151. if (m->inject_flags & MCJ_IRQ_BROADCAST) {
  152. /*
  153. * don't wait because mce_irq_ipi is necessary
  154. * to be sync with following raise_local
  155. */
  156. preempt_disable();
  157. smp_call_function_many(mce_inject_cpumask,
  158. mce_irq_ipi, NULL, 0);
  159. preempt_enable();
  160. } else if (m->inject_flags & MCJ_NMI_BROADCAST)
  161. apic->send_IPI_mask(mce_inject_cpumask,
  162. NMI_VECTOR);
  163. }
  164. start = jiffies;
  165. while (!cpumask_empty(mce_inject_cpumask)) {
  166. if (!time_before(jiffies, start + 2*HZ)) {
  167. pr_err("Timeout waiting for mce inject %lx\n",
  168. *cpumask_bits(mce_inject_cpumask));
  169. break;
  170. }
  171. cpu_relax();
  172. }
  173. raise_local();
  174. put_cpu();
  175. put_online_cpus();
  176. } else {
  177. preempt_disable();
  178. raise_local();
  179. preempt_enable();
  180. }
  181. }
  182. /* Error injection interface */
  183. static ssize_t mce_write(struct file *filp, const char __user *ubuf,
  184. size_t usize, loff_t *off)
  185. {
  186. struct mce m;
  187. if (!capable(CAP_SYS_ADMIN))
  188. return -EPERM;
  189. /*
  190. * There are some cases where real MSR reads could slip
  191. * through.
  192. */
  193. if (!boot_cpu_has(X86_FEATURE_MCE) || !boot_cpu_has(X86_FEATURE_MCA))
  194. return -EIO;
  195. if ((unsigned long)usize > sizeof(struct mce))
  196. usize = sizeof(struct mce);
  197. if (copy_from_user(&m, ubuf, usize))
  198. return -EFAULT;
  199. if (m.extcpu >= num_possible_cpus() || !cpu_online(m.extcpu))
  200. return -EINVAL;
  201. /*
  202. * Need to give user space some time to set everything up,
  203. * so do it a jiffie or two later everywhere.
  204. */
  205. schedule_timeout(2);
  206. mutex_lock(&mce_inject_mutex);
  207. raise_mce(&m);
  208. mutex_unlock(&mce_inject_mutex);
  209. return usize;
  210. }
  211. static int inject_init(void)
  212. {
  213. if (!alloc_cpumask_var(&mce_inject_cpumask, GFP_KERNEL))
  214. return -ENOMEM;
  215. pr_info("Machine check injector initialized\n");
  216. register_mce_write_callback(mce_write);
  217. register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0,
  218. "mce_notify");
  219. return 0;
  220. }
  221. module_init(inject_init);
  222. /*
  223. * Cannot tolerate unloading currently because we cannot
  224. * guarantee all openers of mce_chrdev will get a reference to us.
  225. */
  226. MODULE_LICENSE("GPL");