mce_amd.c 27 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174
  1. /*
  2. * (c) 2005-2016 Advanced Micro Devices, Inc.
  3. * Your use of this code is subject to the terms and conditions of the
  4. * GNU general public license version 2. See "COPYING" or
  5. * http://www.gnu.org/licenses/gpl.html
  6. *
  7. * Written by Jacob Shin - AMD, Inc.
  8. * Maintained by: Borislav Petkov <bp@alien8.de>
  9. *
  10. * All MC4_MISCi registers are shared between cores on a node.
  11. */
  12. #include <linux/interrupt.h>
  13. #include <linux/notifier.h>
  14. #include <linux/kobject.h>
  15. #include <linux/percpu.h>
  16. #include <linux/errno.h>
  17. #include <linux/sched.h>
  18. #include <linux/sysfs.h>
  19. #include <linux/slab.h>
  20. #include <linux/init.h>
  21. #include <linux/cpu.h>
  22. #include <linux/smp.h>
  23. #include <linux/string.h>
  24. #include <asm/amd_nb.h>
  25. #include <asm/apic.h>
  26. #include <asm/idle.h>
  27. #include <asm/mce.h>
  28. #include <asm/msr.h>
  29. #include <asm/trace/irq_vectors.h>
  30. #define NR_BLOCKS 5
  31. #define THRESHOLD_MAX 0xFFF
  32. #define INT_TYPE_APIC 0x00020000
  33. #define MASK_VALID_HI 0x80000000
  34. #define MASK_CNTP_HI 0x40000000
  35. #define MASK_LOCKED_HI 0x20000000
  36. #define MASK_LVTOFF_HI 0x00F00000
  37. #define MASK_COUNT_EN_HI 0x00080000
  38. #define MASK_INT_TYPE_HI 0x00060000
  39. #define MASK_OVERFLOW_HI 0x00010000
  40. #define MASK_ERR_COUNT_HI 0x00000FFF
  41. #define MASK_BLKPTR_LO 0xFF000000
  42. #define MCG_XBLK_ADDR 0xC0000400
  43. /* Deferred error settings */
  44. #define MSR_CU_DEF_ERR 0xC0000410
  45. #define MASK_DEF_LVTOFF 0x000000F0
  46. #define MASK_DEF_INT_TYPE 0x00000006
  47. #define DEF_LVT_OFF 0x2
  48. #define DEF_INT_TYPE_APIC 0x2
  49. /* Scalable MCA: */
  50. /* Threshold LVT offset is at MSR0xC0000410[15:12] */
  51. #define SMCA_THR_LVT_OFF 0xF000
  52. static const char * const th_names[] = {
  53. "load_store",
  54. "insn_fetch",
  55. "combined_unit",
  56. "decode_unit",
  57. "northbridge",
  58. "execution_unit",
  59. };
  60. static const char * const smca_umc_block_names[] = {
  61. "dram_ecc",
  62. "misc_umc"
  63. };
  64. struct smca_bank_name smca_bank_names[] = {
  65. [SMCA_LS] = { "load_store", "Load Store Unit" },
  66. [SMCA_IF] = { "insn_fetch", "Instruction Fetch Unit" },
  67. [SMCA_L2_CACHE] = { "l2_cache", "L2 Cache" },
  68. [SMCA_DE] = { "decode_unit", "Decode Unit" },
  69. [SMCA_EX] = { "execution_unit", "Execution Unit" },
  70. [SMCA_FP] = { "floating_point", "Floating Point Unit" },
  71. [SMCA_L3_CACHE] = { "l3_cache", "L3 Cache" },
  72. [SMCA_CS] = { "coherent_slave", "Coherent Slave" },
  73. [SMCA_PIE] = { "pie", "Power, Interrupts, etc." },
  74. [SMCA_UMC] = { "umc", "Unified Memory Controller" },
  75. [SMCA_PB] = { "param_block", "Parameter Block" },
  76. [SMCA_PSP] = { "psp", "Platform Security Processor" },
  77. [SMCA_SMU] = { "smu", "System Management Unit" },
  78. };
  79. EXPORT_SYMBOL_GPL(smca_bank_names);
  80. static struct smca_hwid_mcatype smca_hwid_mcatypes[] = {
  81. /* { bank_type, hwid_mcatype, xec_bitmap } */
  82. /* ZN Core (HWID=0xB0) MCA types */
  83. { SMCA_LS, HWID_MCATYPE(0xB0, 0x0), 0x1FFFEF },
  84. { SMCA_IF, HWID_MCATYPE(0xB0, 0x1), 0x3FFF },
  85. { SMCA_L2_CACHE, HWID_MCATYPE(0xB0, 0x2), 0xF },
  86. { SMCA_DE, HWID_MCATYPE(0xB0, 0x3), 0x1FF },
  87. /* HWID 0xB0 MCATYPE 0x4 is Reserved */
  88. { SMCA_EX, HWID_MCATYPE(0xB0, 0x5), 0x7FF },
  89. { SMCA_FP, HWID_MCATYPE(0xB0, 0x6), 0x7F },
  90. { SMCA_L3_CACHE, HWID_MCATYPE(0xB0, 0x7), 0xFF },
  91. /* Data Fabric MCA types */
  92. { SMCA_CS, HWID_MCATYPE(0x2E, 0x0), 0x1FF },
  93. { SMCA_PIE, HWID_MCATYPE(0x2E, 0x1), 0xF },
  94. /* Unified Memory Controller MCA type */
  95. { SMCA_UMC, HWID_MCATYPE(0x96, 0x0), 0x3F },
  96. /* Parameter Block MCA type */
  97. { SMCA_PB, HWID_MCATYPE(0x05, 0x0), 0x1 },
  98. /* Platform Security Processor MCA type */
  99. { SMCA_PSP, HWID_MCATYPE(0xFF, 0x0), 0x1 },
  100. /* System Management Unit MCA type */
  101. { SMCA_SMU, HWID_MCATYPE(0x01, 0x0), 0x1 },
  102. };
  103. struct smca_bank_info smca_banks[MAX_NR_BANKS];
  104. EXPORT_SYMBOL_GPL(smca_banks);
  105. /*
  106. * In SMCA enabled processors, we can have multiple banks for a given IP type.
  107. * So to define a unique name for each bank, we use a temp c-string to append
  108. * the MCA_IPID[InstanceId] to type's name in get_name().
  109. *
  110. * InstanceId is 32 bits which is 8 characters. Make sure MAX_MCATYPE_NAME_LEN
  111. * is greater than 8 plus 1 (for underscore) plus length of longest type name.
  112. */
  113. #define MAX_MCATYPE_NAME_LEN 30
  114. static char buf_mcatype[MAX_MCATYPE_NAME_LEN];
  115. static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
  116. static DEFINE_PER_CPU(unsigned int, bank_map); /* see which banks are on */
  117. static void amd_threshold_interrupt(void);
  118. static void amd_deferred_error_interrupt(void);
  119. static void default_deferred_error_interrupt(void)
  120. {
  121. pr_err("Unexpected deferred interrupt at vector %x\n", DEFERRED_ERROR_VECTOR);
  122. }
  123. void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt;
  124. /*
  125. * CPU Initialization
  126. */
  127. static void get_smca_bank_info(unsigned int bank)
  128. {
  129. unsigned int i, hwid_mcatype, cpu = smp_processor_id();
  130. struct smca_hwid_mcatype *type;
  131. u32 high, instanceId;
  132. u16 hwid, mcatype;
  133. /* Collect bank_info using CPU 0 for now. */
  134. if (cpu)
  135. return;
  136. if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_IPID(bank), &instanceId, &high)) {
  137. pr_warn("Failed to read MCA_IPID for bank %d\n", bank);
  138. return;
  139. }
  140. hwid = high & MCI_IPID_HWID;
  141. mcatype = (high & MCI_IPID_MCATYPE) >> 16;
  142. hwid_mcatype = HWID_MCATYPE(hwid, mcatype);
  143. for (i = 0; i < ARRAY_SIZE(smca_hwid_mcatypes); i++) {
  144. type = &smca_hwid_mcatypes[i];
  145. if (hwid_mcatype == type->hwid_mcatype) {
  146. smca_banks[bank].type = type;
  147. smca_banks[bank].type_instance = instanceId;
  148. break;
  149. }
  150. }
  151. }
  152. struct thresh_restart {
  153. struct threshold_block *b;
  154. int reset;
  155. int set_lvt_off;
  156. int lvt_off;
  157. u16 old_limit;
  158. };
  159. static inline bool is_shared_bank(int bank)
  160. {
  161. /*
  162. * Scalable MCA provides for only one core to have access to the MSRs of
  163. * a shared bank.
  164. */
  165. if (mce_flags.smca)
  166. return false;
  167. /* Bank 4 is for northbridge reporting and is thus shared */
  168. return (bank == 4);
  169. }
  170. static const char *bank4_names(const struct threshold_block *b)
  171. {
  172. switch (b->address) {
  173. /* MSR4_MISC0 */
  174. case 0x00000413:
  175. return "dram";
  176. case 0xc0000408:
  177. return "ht_links";
  178. case 0xc0000409:
  179. return "l3_cache";
  180. default:
  181. WARN(1, "Funny MSR: 0x%08x\n", b->address);
  182. return "";
  183. }
  184. };
  185. static bool lvt_interrupt_supported(unsigned int bank, u32 msr_high_bits)
  186. {
  187. /*
  188. * bank 4 supports APIC LVT interrupts implicitly since forever.
  189. */
  190. if (bank == 4)
  191. return true;
  192. /*
  193. * IntP: interrupt present; if this bit is set, the thresholding
  194. * bank can generate APIC LVT interrupts
  195. */
  196. return msr_high_bits & BIT(28);
  197. }
  198. static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi)
  199. {
  200. int msr = (hi & MASK_LVTOFF_HI) >> 20;
  201. if (apic < 0) {
  202. pr_err(FW_BUG "cpu %d, failed to setup threshold interrupt "
  203. "for bank %d, block %d (MSR%08X=0x%x%08x)\n", b->cpu,
  204. b->bank, b->block, b->address, hi, lo);
  205. return 0;
  206. }
  207. if (apic != msr) {
  208. /*
  209. * On SMCA CPUs, LVT offset is programmed at a different MSR, and
  210. * the BIOS provides the value. The original field where LVT offset
  211. * was set is reserved. Return early here:
  212. */
  213. if (mce_flags.smca)
  214. return 0;
  215. pr_err(FW_BUG "cpu %d, invalid threshold interrupt offset %d "
  216. "for bank %d, block %d (MSR%08X=0x%x%08x)\n",
  217. b->cpu, apic, b->bank, b->block, b->address, hi, lo);
  218. return 0;
  219. }
  220. return 1;
  221. };
  222. /* Reprogram MCx_MISC MSR behind this threshold bank. */
  223. static void threshold_restart_bank(void *_tr)
  224. {
  225. struct thresh_restart *tr = _tr;
  226. u32 hi, lo;
  227. rdmsr(tr->b->address, lo, hi);
  228. if (tr->b->threshold_limit < (hi & THRESHOLD_MAX))
  229. tr->reset = 1; /* limit cannot be lower than err count */
  230. if (tr->reset) { /* reset err count and overflow bit */
  231. hi =
  232. (hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) |
  233. (THRESHOLD_MAX - tr->b->threshold_limit);
  234. } else if (tr->old_limit) { /* change limit w/o reset */
  235. int new_count = (hi & THRESHOLD_MAX) +
  236. (tr->old_limit - tr->b->threshold_limit);
  237. hi = (hi & ~MASK_ERR_COUNT_HI) |
  238. (new_count & THRESHOLD_MAX);
  239. }
  240. /* clear IntType */
  241. hi &= ~MASK_INT_TYPE_HI;
  242. if (!tr->b->interrupt_capable)
  243. goto done;
  244. if (tr->set_lvt_off) {
  245. if (lvt_off_valid(tr->b, tr->lvt_off, lo, hi)) {
  246. /* set new lvt offset */
  247. hi &= ~MASK_LVTOFF_HI;
  248. hi |= tr->lvt_off << 20;
  249. }
  250. }
  251. if (tr->b->interrupt_enable)
  252. hi |= INT_TYPE_APIC;
  253. done:
  254. hi |= MASK_COUNT_EN_HI;
  255. wrmsr(tr->b->address, lo, hi);
  256. }
  257. static void mce_threshold_block_init(struct threshold_block *b, int offset)
  258. {
  259. struct thresh_restart tr = {
  260. .b = b,
  261. .set_lvt_off = 1,
  262. .lvt_off = offset,
  263. };
  264. b->threshold_limit = THRESHOLD_MAX;
  265. threshold_restart_bank(&tr);
  266. };
  267. static int setup_APIC_mce_threshold(int reserved, int new)
  268. {
  269. if (reserved < 0 && !setup_APIC_eilvt(new, THRESHOLD_APIC_VECTOR,
  270. APIC_EILVT_MSG_FIX, 0))
  271. return new;
  272. return reserved;
  273. }
  274. static int setup_APIC_deferred_error(int reserved, int new)
  275. {
  276. if (reserved < 0 && !setup_APIC_eilvt(new, DEFERRED_ERROR_VECTOR,
  277. APIC_EILVT_MSG_FIX, 0))
  278. return new;
  279. return reserved;
  280. }
  281. static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c)
  282. {
  283. u32 low = 0, high = 0;
  284. int def_offset = -1, def_new;
  285. if (rdmsr_safe(MSR_CU_DEF_ERR, &low, &high))
  286. return;
  287. def_new = (low & MASK_DEF_LVTOFF) >> 4;
  288. if (!(low & MASK_DEF_LVTOFF)) {
  289. pr_err(FW_BUG "Your BIOS is not setting up LVT offset 0x2 for deferred error IRQs correctly.\n");
  290. def_new = DEF_LVT_OFF;
  291. low = (low & ~MASK_DEF_LVTOFF) | (DEF_LVT_OFF << 4);
  292. }
  293. def_offset = setup_APIC_deferred_error(def_offset, def_new);
  294. if ((def_offset == def_new) &&
  295. (deferred_error_int_vector != amd_deferred_error_interrupt))
  296. deferred_error_int_vector = amd_deferred_error_interrupt;
  297. low = (low & ~MASK_DEF_INT_TYPE) | DEF_INT_TYPE_APIC;
  298. wrmsr(MSR_CU_DEF_ERR, low, high);
  299. }
  300. static u32 get_block_address(unsigned int cpu, u32 current_addr, u32 low, u32 high,
  301. unsigned int bank, unsigned int block)
  302. {
  303. u32 addr = 0, offset = 0;
  304. if (mce_flags.smca) {
  305. if (!block) {
  306. addr = MSR_AMD64_SMCA_MCx_MISC(bank);
  307. } else {
  308. /*
  309. * For SMCA enabled processors, BLKPTR field of the
  310. * first MISC register (MCx_MISC0) indicates presence of
  311. * additional MISC register set (MISC1-4).
  312. */
  313. u32 low, high;
  314. if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
  315. return addr;
  316. if (!(low & MCI_CONFIG_MCAX))
  317. return addr;
  318. if (!rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) &&
  319. (low & MASK_BLKPTR_LO))
  320. addr = MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
  321. }
  322. return addr;
  323. }
  324. /* Fall back to method we used for older processors: */
  325. switch (block) {
  326. case 0:
  327. addr = msr_ops.misc(bank);
  328. break;
  329. case 1:
  330. offset = ((low & MASK_BLKPTR_LO) >> 21);
  331. if (offset)
  332. addr = MCG_XBLK_ADDR + offset;
  333. break;
  334. default:
  335. addr = ++current_addr;
  336. }
  337. return addr;
  338. }
  339. static int
  340. prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
  341. int offset, u32 misc_high)
  342. {
  343. unsigned int cpu = smp_processor_id();
  344. u32 smca_low, smca_high, smca_addr;
  345. struct threshold_block b;
  346. int new;
  347. if (!block)
  348. per_cpu(bank_map, cpu) |= (1 << bank);
  349. memset(&b, 0, sizeof(b));
  350. b.cpu = cpu;
  351. b.bank = bank;
  352. b.block = block;
  353. b.address = addr;
  354. b.interrupt_capable = lvt_interrupt_supported(bank, misc_high);
  355. if (!b.interrupt_capable)
  356. goto done;
  357. b.interrupt_enable = 1;
  358. if (!mce_flags.smca) {
  359. new = (misc_high & MASK_LVTOFF_HI) >> 20;
  360. goto set_offset;
  361. }
  362. smca_addr = MSR_AMD64_SMCA_MCx_CONFIG(bank);
  363. if (!rdmsr_safe(smca_addr, &smca_low, &smca_high)) {
  364. /*
  365. * OS is required to set the MCAX bit to acknowledge that it is
  366. * now using the new MSR ranges and new registers under each
  367. * bank. It also means that the OS will configure deferred
  368. * errors in the new MCx_CONFIG register. If the bit is not set,
  369. * uncorrectable errors will cause a system panic.
  370. *
  371. * MCA_CONFIG[MCAX] is bit 32 (0 in the high portion of the MSR.)
  372. */
  373. smca_high |= BIT(0);
  374. /*
  375. * SMCA logs Deferred Error information in MCA_DE{STAT,ADDR}
  376. * registers with the option of additionally logging to
  377. * MCA_{STATUS,ADDR} if MCA_CONFIG[LogDeferredInMcaStat] is set.
  378. *
  379. * This bit is usually set by BIOS to retain the old behavior
  380. * for OSes that don't use the new registers. Linux supports the
  381. * new registers so let's disable that additional logging here.
  382. *
  383. * MCA_CONFIG[LogDeferredInMcaStat] is bit 34 (bit 2 in the high
  384. * portion of the MSR).
  385. */
  386. smca_high &= ~BIT(2);
  387. /*
  388. * SMCA sets the Deferred Error Interrupt type per bank.
  389. *
  390. * MCA_CONFIG[DeferredIntTypeSupported] is bit 5, and tells us
  391. * if the DeferredIntType bit field is available.
  392. *
  393. * MCA_CONFIG[DeferredIntType] is bits [38:37] ([6:5] in the
  394. * high portion of the MSR). OS should set this to 0x1 to enable
  395. * APIC based interrupt. First, check that no interrupt has been
  396. * set.
  397. */
  398. if ((smca_low & BIT(5)) && !((smca_high >> 5) & 0x3))
  399. smca_high |= BIT(5);
  400. wrmsr(smca_addr, smca_low, smca_high);
  401. }
  402. /* Gather LVT offset for thresholding: */
  403. if (rdmsr_safe(MSR_CU_DEF_ERR, &smca_low, &smca_high))
  404. goto out;
  405. new = (smca_low & SMCA_THR_LVT_OFF) >> 12;
  406. set_offset:
  407. offset = setup_APIC_mce_threshold(offset, new);
  408. if ((offset == new) && (mce_threshold_vector != amd_threshold_interrupt))
  409. mce_threshold_vector = amd_threshold_interrupt;
  410. done:
  411. mce_threshold_block_init(&b, offset);
  412. out:
  413. return offset;
  414. }
  415. /* cpu init entry point, called from mce.c with preempt off */
  416. void mce_amd_feature_init(struct cpuinfo_x86 *c)
  417. {
  418. u32 low = 0, high = 0, address = 0;
  419. unsigned int bank, block, cpu = smp_processor_id();
  420. int offset = -1;
  421. for (bank = 0; bank < mca_cfg.banks; ++bank) {
  422. if (mce_flags.smca)
  423. get_smca_bank_info(bank);
  424. for (block = 0; block < NR_BLOCKS; ++block) {
  425. address = get_block_address(cpu, address, low, high, bank, block);
  426. if (!address)
  427. break;
  428. if (rdmsr_safe(address, &low, &high))
  429. break;
  430. if (!(high & MASK_VALID_HI))
  431. continue;
  432. if (!(high & MASK_CNTP_HI) ||
  433. (high & MASK_LOCKED_HI))
  434. continue;
  435. offset = prepare_threshold_block(bank, block, address, offset, high);
  436. }
  437. }
  438. if (mce_flags.succor)
  439. deferred_error_interrupt_enable(c);
  440. }
  441. static void
  442. __log_error(unsigned int bank, bool deferred_err, bool threshold_err, u64 misc)
  443. {
  444. u32 msr_status = msr_ops.status(bank);
  445. u32 msr_addr = msr_ops.addr(bank);
  446. struct mce m;
  447. u64 status;
  448. WARN_ON_ONCE(deferred_err && threshold_err);
  449. if (deferred_err && mce_flags.smca) {
  450. msr_status = MSR_AMD64_SMCA_MCx_DESTAT(bank);
  451. msr_addr = MSR_AMD64_SMCA_MCx_DEADDR(bank);
  452. }
  453. rdmsrl(msr_status, status);
  454. if (!(status & MCI_STATUS_VAL))
  455. return;
  456. mce_setup(&m);
  457. m.status = status;
  458. m.bank = bank;
  459. if (threshold_err)
  460. m.misc = misc;
  461. if (m.status & MCI_STATUS_ADDRV) {
  462. rdmsrl(msr_addr, m.addr);
  463. /*
  464. * Extract [55:<lsb>] where lsb is the least significant
  465. * *valid* bit of the address bits.
  466. */
  467. if (mce_flags.smca) {
  468. u8 lsb = (m.addr >> 56) & 0x3f;
  469. m.addr &= GENMASK_ULL(55, lsb);
  470. }
  471. }
  472. if (mce_flags.smca) {
  473. rdmsrl(MSR_AMD64_SMCA_MCx_IPID(bank), m.ipid);
  474. if (m.status & MCI_STATUS_SYNDV)
  475. rdmsrl(MSR_AMD64_SMCA_MCx_SYND(bank), m.synd);
  476. }
  477. mce_log(&m);
  478. wrmsrl(msr_status, 0);
  479. }
  480. static inline void __smp_deferred_error_interrupt(void)
  481. {
  482. inc_irq_stat(irq_deferred_error_count);
  483. deferred_error_int_vector();
  484. }
  485. asmlinkage __visible void __irq_entry smp_deferred_error_interrupt(void)
  486. {
  487. entering_irq();
  488. __smp_deferred_error_interrupt();
  489. exiting_ack_irq();
  490. }
  491. asmlinkage __visible void __irq_entry smp_trace_deferred_error_interrupt(void)
  492. {
  493. entering_irq();
  494. trace_deferred_error_apic_entry(DEFERRED_ERROR_VECTOR);
  495. __smp_deferred_error_interrupt();
  496. trace_deferred_error_apic_exit(DEFERRED_ERROR_VECTOR);
  497. exiting_ack_irq();
  498. }
  499. /* APIC interrupt handler for deferred errors */
  500. static void amd_deferred_error_interrupt(void)
  501. {
  502. unsigned int bank;
  503. u32 msr_status;
  504. u64 status;
  505. for (bank = 0; bank < mca_cfg.banks; ++bank) {
  506. msr_status = (mce_flags.smca) ? MSR_AMD64_SMCA_MCx_DESTAT(bank)
  507. : msr_ops.status(bank);
  508. rdmsrl(msr_status, status);
  509. if (!(status & MCI_STATUS_VAL) ||
  510. !(status & MCI_STATUS_DEFERRED))
  511. continue;
  512. __log_error(bank, true, false, 0);
  513. break;
  514. }
  515. }
  516. /*
  517. * APIC Interrupt Handler
  518. */
  519. /*
  520. * threshold interrupt handler will service THRESHOLD_APIC_VECTOR.
  521. * the interrupt goes off when error_count reaches threshold_limit.
  522. * the handler will simply log mcelog w/ software defined bank number.
  523. */
  524. static void amd_threshold_interrupt(void)
  525. {
  526. u32 low = 0, high = 0, address = 0;
  527. unsigned int bank, block, cpu = smp_processor_id();
  528. /* assume first bank caused it */
  529. for (bank = 0; bank < mca_cfg.banks; ++bank) {
  530. if (!(per_cpu(bank_map, cpu) & (1 << bank)))
  531. continue;
  532. for (block = 0; block < NR_BLOCKS; ++block) {
  533. address = get_block_address(cpu, address, low, high, bank, block);
  534. if (!address)
  535. break;
  536. if (rdmsr_safe(address, &low, &high))
  537. break;
  538. if (!(high & MASK_VALID_HI)) {
  539. if (block)
  540. continue;
  541. else
  542. break;
  543. }
  544. if (!(high & MASK_CNTP_HI) ||
  545. (high & MASK_LOCKED_HI))
  546. continue;
  547. /*
  548. * Log the machine check that caused the threshold
  549. * event.
  550. */
  551. if (high & MASK_OVERFLOW_HI)
  552. goto log;
  553. }
  554. }
  555. return;
  556. log:
  557. __log_error(bank, false, true, ((u64)high << 32) | low);
  558. }
  559. /*
  560. * Sysfs Interface
  561. */
  562. struct threshold_attr {
  563. struct attribute attr;
  564. ssize_t (*show) (struct threshold_block *, char *);
  565. ssize_t (*store) (struct threshold_block *, const char *, size_t count);
  566. };
  567. #define SHOW_FIELDS(name) \
  568. static ssize_t show_ ## name(struct threshold_block *b, char *buf) \
  569. { \
  570. return sprintf(buf, "%lu\n", (unsigned long) b->name); \
  571. }
  572. SHOW_FIELDS(interrupt_enable)
  573. SHOW_FIELDS(threshold_limit)
  574. static ssize_t
  575. store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size)
  576. {
  577. struct thresh_restart tr;
  578. unsigned long new;
  579. if (!b->interrupt_capable)
  580. return -EINVAL;
  581. if (kstrtoul(buf, 0, &new) < 0)
  582. return -EINVAL;
  583. b->interrupt_enable = !!new;
  584. memset(&tr, 0, sizeof(tr));
  585. tr.b = b;
  586. smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
  587. return size;
  588. }
  589. static ssize_t
  590. store_threshold_limit(struct threshold_block *b, const char *buf, size_t size)
  591. {
  592. struct thresh_restart tr;
  593. unsigned long new;
  594. if (kstrtoul(buf, 0, &new) < 0)
  595. return -EINVAL;
  596. if (new > THRESHOLD_MAX)
  597. new = THRESHOLD_MAX;
  598. if (new < 1)
  599. new = 1;
  600. memset(&tr, 0, sizeof(tr));
  601. tr.old_limit = b->threshold_limit;
  602. b->threshold_limit = new;
  603. tr.b = b;
  604. smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
  605. return size;
  606. }
  607. static ssize_t show_error_count(struct threshold_block *b, char *buf)
  608. {
  609. u32 lo, hi;
  610. rdmsr_on_cpu(b->cpu, b->address, &lo, &hi);
  611. return sprintf(buf, "%u\n", ((hi & THRESHOLD_MAX) -
  612. (THRESHOLD_MAX - b->threshold_limit)));
  613. }
  614. static struct threshold_attr error_count = {
  615. .attr = {.name = __stringify(error_count), .mode = 0444 },
  616. .show = show_error_count,
  617. };
  618. #define RW_ATTR(val) \
  619. static struct threshold_attr val = { \
  620. .attr = {.name = __stringify(val), .mode = 0644 }, \
  621. .show = show_## val, \
  622. .store = store_## val, \
  623. };
  624. RW_ATTR(interrupt_enable);
  625. RW_ATTR(threshold_limit);
  626. static struct attribute *default_attrs[] = {
  627. &threshold_limit.attr,
  628. &error_count.attr,
  629. NULL, /* possibly interrupt_enable if supported, see below */
  630. NULL,
  631. };
  632. #define to_block(k) container_of(k, struct threshold_block, kobj)
  633. #define to_attr(a) container_of(a, struct threshold_attr, attr)
  634. static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
  635. {
  636. struct threshold_block *b = to_block(kobj);
  637. struct threshold_attr *a = to_attr(attr);
  638. ssize_t ret;
  639. ret = a->show ? a->show(b, buf) : -EIO;
  640. return ret;
  641. }
  642. static ssize_t store(struct kobject *kobj, struct attribute *attr,
  643. const char *buf, size_t count)
  644. {
  645. struct threshold_block *b = to_block(kobj);
  646. struct threshold_attr *a = to_attr(attr);
  647. ssize_t ret;
  648. ret = a->store ? a->store(b, buf, count) : -EIO;
  649. return ret;
  650. }
  651. static const struct sysfs_ops threshold_ops = {
  652. .show = show,
  653. .store = store,
  654. };
  655. static struct kobj_type threshold_ktype = {
  656. .sysfs_ops = &threshold_ops,
  657. .default_attrs = default_attrs,
  658. };
  659. static const char *get_name(unsigned int bank, struct threshold_block *b)
  660. {
  661. unsigned int bank_type;
  662. if (!mce_flags.smca) {
  663. if (b && bank == 4)
  664. return bank4_names(b);
  665. return th_names[bank];
  666. }
  667. if (!smca_banks[bank].type)
  668. return NULL;
  669. bank_type = smca_banks[bank].type->bank_type;
  670. if (b && bank_type == SMCA_UMC) {
  671. if (b->block < ARRAY_SIZE(smca_umc_block_names))
  672. return smca_umc_block_names[b->block];
  673. return NULL;
  674. }
  675. snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN,
  676. "%s_%x", smca_bank_names[bank_type].name,
  677. smca_banks[bank].type_instance);
  678. return buf_mcatype;
  679. }
  680. static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank,
  681. unsigned int block, u32 address)
  682. {
  683. struct threshold_block *b = NULL;
  684. u32 low, high;
  685. int err;
  686. if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS))
  687. return 0;
  688. if (rdmsr_safe_on_cpu(cpu, address, &low, &high))
  689. return 0;
  690. if (!(high & MASK_VALID_HI)) {
  691. if (block)
  692. goto recurse;
  693. else
  694. return 0;
  695. }
  696. if (!(high & MASK_CNTP_HI) ||
  697. (high & MASK_LOCKED_HI))
  698. goto recurse;
  699. b = kzalloc(sizeof(struct threshold_block), GFP_KERNEL);
  700. if (!b)
  701. return -ENOMEM;
  702. b->block = block;
  703. b->bank = bank;
  704. b->cpu = cpu;
  705. b->address = address;
  706. b->interrupt_enable = 0;
  707. b->interrupt_capable = lvt_interrupt_supported(bank, high);
  708. b->threshold_limit = THRESHOLD_MAX;
  709. if (b->interrupt_capable) {
  710. threshold_ktype.default_attrs[2] = &interrupt_enable.attr;
  711. b->interrupt_enable = 1;
  712. } else {
  713. threshold_ktype.default_attrs[2] = NULL;
  714. }
  715. INIT_LIST_HEAD(&b->miscj);
  716. if (per_cpu(threshold_banks, cpu)[bank]->blocks) {
  717. list_add(&b->miscj,
  718. &per_cpu(threshold_banks, cpu)[bank]->blocks->miscj);
  719. } else {
  720. per_cpu(threshold_banks, cpu)[bank]->blocks = b;
  721. }
  722. err = kobject_init_and_add(&b->kobj, &threshold_ktype,
  723. per_cpu(threshold_banks, cpu)[bank]->kobj,
  724. get_name(bank, b));
  725. if (err)
  726. goto out_free;
  727. recurse:
  728. address = get_block_address(cpu, address, low, high, bank, ++block);
  729. if (!address)
  730. return 0;
  731. err = allocate_threshold_blocks(cpu, bank, block, address);
  732. if (err)
  733. goto out_free;
  734. if (b)
  735. kobject_uevent(&b->kobj, KOBJ_ADD);
  736. return err;
  737. out_free:
  738. if (b) {
  739. kobject_put(&b->kobj);
  740. list_del(&b->miscj);
  741. kfree(b);
  742. }
  743. return err;
  744. }
  745. static int __threshold_add_blocks(struct threshold_bank *b)
  746. {
  747. struct list_head *head = &b->blocks->miscj;
  748. struct threshold_block *pos = NULL;
  749. struct threshold_block *tmp = NULL;
  750. int err = 0;
  751. err = kobject_add(&b->blocks->kobj, b->kobj, b->blocks->kobj.name);
  752. if (err)
  753. return err;
  754. list_for_each_entry_safe(pos, tmp, head, miscj) {
  755. err = kobject_add(&pos->kobj, b->kobj, pos->kobj.name);
  756. if (err) {
  757. list_for_each_entry_safe_reverse(pos, tmp, head, miscj)
  758. kobject_del(&pos->kobj);
  759. return err;
  760. }
  761. }
  762. return err;
  763. }
  764. static int threshold_create_bank(unsigned int cpu, unsigned int bank)
  765. {
  766. struct device *dev = per_cpu(mce_device, cpu);
  767. struct amd_northbridge *nb = NULL;
  768. struct threshold_bank *b = NULL;
  769. const char *name = get_name(bank, NULL);
  770. int err = 0;
  771. if (!dev)
  772. return -ENODEV;
  773. if (is_shared_bank(bank)) {
  774. nb = node_to_amd_nb(amd_get_nb_id(cpu));
  775. /* threshold descriptor already initialized on this node? */
  776. if (nb && nb->bank4) {
  777. /* yes, use it */
  778. b = nb->bank4;
  779. err = kobject_add(b->kobj, &dev->kobj, name);
  780. if (err)
  781. goto out;
  782. per_cpu(threshold_banks, cpu)[bank] = b;
  783. atomic_inc(&b->cpus);
  784. err = __threshold_add_blocks(b);
  785. goto out;
  786. }
  787. }
  788. b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL);
  789. if (!b) {
  790. err = -ENOMEM;
  791. goto out;
  792. }
  793. b->kobj = kobject_create_and_add(name, &dev->kobj);
  794. if (!b->kobj) {
  795. err = -EINVAL;
  796. goto out_free;
  797. }
  798. per_cpu(threshold_banks, cpu)[bank] = b;
  799. if (is_shared_bank(bank)) {
  800. atomic_set(&b->cpus, 1);
  801. /* nb is already initialized, see above */
  802. if (nb) {
  803. WARN_ON(nb->bank4);
  804. nb->bank4 = b;
  805. }
  806. }
  807. err = allocate_threshold_blocks(cpu, bank, 0, msr_ops.misc(bank));
  808. if (!err)
  809. goto out;
  810. out_free:
  811. kfree(b);
  812. out:
  813. return err;
  814. }
  815. /* create dir/files for all valid threshold banks */
  816. static int threshold_create_device(unsigned int cpu)
  817. {
  818. unsigned int bank;
  819. struct threshold_bank **bp;
  820. int err = 0;
  821. bp = kzalloc(sizeof(struct threshold_bank *) * mca_cfg.banks,
  822. GFP_KERNEL);
  823. if (!bp)
  824. return -ENOMEM;
  825. per_cpu(threshold_banks, cpu) = bp;
  826. for (bank = 0; bank < mca_cfg.banks; ++bank) {
  827. if (!(per_cpu(bank_map, cpu) & (1 << bank)))
  828. continue;
  829. err = threshold_create_bank(cpu, bank);
  830. if (err)
  831. return err;
  832. }
  833. return err;
  834. }
  835. static void deallocate_threshold_block(unsigned int cpu,
  836. unsigned int bank)
  837. {
  838. struct threshold_block *pos = NULL;
  839. struct threshold_block *tmp = NULL;
  840. struct threshold_bank *head = per_cpu(threshold_banks, cpu)[bank];
  841. if (!head)
  842. return;
  843. list_for_each_entry_safe(pos, tmp, &head->blocks->miscj, miscj) {
  844. kobject_put(&pos->kobj);
  845. list_del(&pos->miscj);
  846. kfree(pos);
  847. }
  848. kfree(per_cpu(threshold_banks, cpu)[bank]->blocks);
  849. per_cpu(threshold_banks, cpu)[bank]->blocks = NULL;
  850. }
  851. static void __threshold_remove_blocks(struct threshold_bank *b)
  852. {
  853. struct threshold_block *pos = NULL;
  854. struct threshold_block *tmp = NULL;
  855. kobject_del(b->kobj);
  856. list_for_each_entry_safe(pos, tmp, &b->blocks->miscj, miscj)
  857. kobject_del(&pos->kobj);
  858. }
  859. static void threshold_remove_bank(unsigned int cpu, int bank)
  860. {
  861. struct amd_northbridge *nb;
  862. struct threshold_bank *b;
  863. b = per_cpu(threshold_banks, cpu)[bank];
  864. if (!b)
  865. return;
  866. if (!b->blocks)
  867. goto free_out;
  868. if (is_shared_bank(bank)) {
  869. if (!atomic_dec_and_test(&b->cpus)) {
  870. __threshold_remove_blocks(b);
  871. per_cpu(threshold_banks, cpu)[bank] = NULL;
  872. return;
  873. } else {
  874. /*
  875. * the last CPU on this node using the shared bank is
  876. * going away, remove that bank now.
  877. */
  878. nb = node_to_amd_nb(amd_get_nb_id(cpu));
  879. nb->bank4 = NULL;
  880. }
  881. }
  882. deallocate_threshold_block(cpu, bank);
  883. free_out:
  884. kobject_del(b->kobj);
  885. kobject_put(b->kobj);
  886. kfree(b);
  887. per_cpu(threshold_banks, cpu)[bank] = NULL;
  888. }
  889. static void threshold_remove_device(unsigned int cpu)
  890. {
  891. unsigned int bank;
  892. for (bank = 0; bank < mca_cfg.banks; ++bank) {
  893. if (!(per_cpu(bank_map, cpu) & (1 << bank)))
  894. continue;
  895. threshold_remove_bank(cpu, bank);
  896. }
  897. kfree(per_cpu(threshold_banks, cpu));
  898. }
  899. /* get notified when a cpu comes on/off */
  900. static void
  901. amd_64_threshold_cpu_callback(unsigned long action, unsigned int cpu)
  902. {
  903. switch (action) {
  904. case CPU_ONLINE:
  905. case CPU_ONLINE_FROZEN:
  906. threshold_create_device(cpu);
  907. break;
  908. case CPU_DEAD:
  909. case CPU_DEAD_FROZEN:
  910. threshold_remove_device(cpu);
  911. break;
  912. default:
  913. break;
  914. }
  915. }
  916. static __init int threshold_init_device(void)
  917. {
  918. unsigned lcpu = 0;
  919. /* to hit CPUs online before the notifier is up */
  920. for_each_online_cpu(lcpu) {
  921. int err = threshold_create_device(lcpu);
  922. if (err)
  923. return err;
  924. }
  925. threshold_cpu_callback = amd_64_threshold_cpu_callback;
  926. return 0;
  927. }
  928. /*
  929. * there are 3 funcs which need to be _initcalled in a logic sequence:
  930. * 1. xen_late_init_mcelog
  931. * 2. mcheck_init_device
  932. * 3. threshold_init_device
  933. *
  934. * xen_late_init_mcelog must register xen_mce_chrdev_device before
  935. * native mce_chrdev_device registration if running under xen platform;
  936. *
  937. * mcheck_init_device should be inited before threshold_init_device to
  938. * initialize mce_device, otherwise a NULL ptr dereference will cause panic.
  939. *
  940. * so we use following _initcalls
  941. * 1. device_initcall(xen_late_init_mcelog);
  942. * 2. device_initcall_sync(mcheck_init_device);
  943. * 3. late_initcall(threshold_init_device);
  944. *
  945. * when running under xen, the initcall order is 1,2,3;
  946. * on baremetal, we skip 1 and we do only 2 and 3.
  947. */
  948. late_initcall(threshold_init_device);