perf_event_intel_lbr.c 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722
  1. #include <linux/perf_event.h>
  2. #include <linux/types.h>
  3. #include <asm/perf_event.h>
  4. #include <asm/msr.h>
  5. #include <asm/insn.h>
  6. #include "perf_event.h"
  7. enum {
  8. LBR_FORMAT_32 = 0x00,
  9. LBR_FORMAT_LIP = 0x01,
  10. LBR_FORMAT_EIP = 0x02,
  11. LBR_FORMAT_EIP_FLAGS = 0x03,
  12. };
  13. /*
  14. * Intel LBR_SELECT bits
  15. * Intel Vol3a, April 2011, Section 16.7 Table 16-10
  16. *
  17. * Hardware branch filter (not available on all CPUs)
  18. */
  19. #define LBR_KERNEL_BIT 0 /* do not capture at ring0 */
  20. #define LBR_USER_BIT 1 /* do not capture at ring > 0 */
  21. #define LBR_JCC_BIT 2 /* do not capture conditional branches */
  22. #define LBR_REL_CALL_BIT 3 /* do not capture relative calls */
  23. #define LBR_IND_CALL_BIT 4 /* do not capture indirect calls */
  24. #define LBR_RETURN_BIT 5 /* do not capture near returns */
  25. #define LBR_IND_JMP_BIT 6 /* do not capture indirect jumps */
  26. #define LBR_REL_JMP_BIT 7 /* do not capture relative jumps */
  27. #define LBR_FAR_BIT 8 /* do not capture far branches */
  28. #define LBR_KERNEL (1 << LBR_KERNEL_BIT)
  29. #define LBR_USER (1 << LBR_USER_BIT)
  30. #define LBR_JCC (1 << LBR_JCC_BIT)
  31. #define LBR_REL_CALL (1 << LBR_REL_CALL_BIT)
  32. #define LBR_IND_CALL (1 << LBR_IND_CALL_BIT)
  33. #define LBR_RETURN (1 << LBR_RETURN_BIT)
  34. #define LBR_REL_JMP (1 << LBR_REL_JMP_BIT)
  35. #define LBR_IND_JMP (1 << LBR_IND_JMP_BIT)
  36. #define LBR_FAR (1 << LBR_FAR_BIT)
  37. #define LBR_PLM (LBR_KERNEL | LBR_USER)
  38. #define LBR_SEL_MASK 0x1ff /* valid bits in LBR_SELECT */
  39. #define LBR_NOT_SUPP -1 /* LBR filter not supported */
  40. #define LBR_IGN 0 /* ignored */
  41. #define LBR_ANY \
  42. (LBR_JCC |\
  43. LBR_REL_CALL |\
  44. LBR_IND_CALL |\
  45. LBR_RETURN |\
  46. LBR_REL_JMP |\
  47. LBR_IND_JMP |\
  48. LBR_FAR)
  49. #define LBR_FROM_FLAG_MISPRED (1ULL << 63)
  50. #define for_each_branch_sample_type(x) \
  51. for ((x) = PERF_SAMPLE_BRANCH_USER; \
  52. (x) < PERF_SAMPLE_BRANCH_MAX; (x) <<= 1)
  53. /*
  54. * x86control flow change classification
  55. * x86control flow changes include branches, interrupts, traps, faults
  56. */
  57. enum {
  58. X86_BR_NONE = 0, /* unknown */
  59. X86_BR_USER = 1 << 0, /* branch target is user */
  60. X86_BR_KERNEL = 1 << 1, /* branch target is kernel */
  61. X86_BR_CALL = 1 << 2, /* call */
  62. X86_BR_RET = 1 << 3, /* return */
  63. X86_BR_SYSCALL = 1 << 4, /* syscall */
  64. X86_BR_SYSRET = 1 << 5, /* syscall return */
  65. X86_BR_INT = 1 << 6, /* sw interrupt */
  66. X86_BR_IRET = 1 << 7, /* return from interrupt */
  67. X86_BR_JCC = 1 << 8, /* conditional */
  68. X86_BR_JMP = 1 << 9, /* jump */
  69. X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */
  70. X86_BR_IND_CALL = 1 << 11,/* indirect calls */
  71. };
  72. #define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
  73. #define X86_BR_ANY \
  74. (X86_BR_CALL |\
  75. X86_BR_RET |\
  76. X86_BR_SYSCALL |\
  77. X86_BR_SYSRET |\
  78. X86_BR_INT |\
  79. X86_BR_IRET |\
  80. X86_BR_JCC |\
  81. X86_BR_JMP |\
  82. X86_BR_IRQ |\
  83. X86_BR_IND_CALL)
  84. #define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY)
  85. #define X86_BR_ANY_CALL \
  86. (X86_BR_CALL |\
  87. X86_BR_IND_CALL |\
  88. X86_BR_SYSCALL |\
  89. X86_BR_IRQ |\
  90. X86_BR_INT)
  91. static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc);
  92. /*
  93. * We only support LBR implementations that have FREEZE_LBRS_ON_PMI
  94. * otherwise it becomes near impossible to get a reliable stack.
  95. */
  96. static void __intel_pmu_lbr_enable(void)
  97. {
  98. u64 debugctl;
  99. struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
  100. if (cpuc->lbr_sel)
  101. wrmsrl(MSR_LBR_SELECT, cpuc->lbr_sel->config);
  102. rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
  103. debugctl |= (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
  104. wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
  105. }
  106. static void __intel_pmu_lbr_disable(void)
  107. {
  108. u64 debugctl;
  109. rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
  110. debugctl &= ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
  111. wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
  112. }
  113. static void intel_pmu_lbr_reset_32(void)
  114. {
  115. int i;
  116. for (i = 0; i < x86_pmu.lbr_nr; i++)
  117. wrmsrl(x86_pmu.lbr_from + i, 0);
  118. }
  119. static void intel_pmu_lbr_reset_64(void)
  120. {
  121. int i;
  122. for (i = 0; i < x86_pmu.lbr_nr; i++) {
  123. wrmsrl(x86_pmu.lbr_from + i, 0);
  124. wrmsrl(x86_pmu.lbr_to + i, 0);
  125. }
  126. }
  127. void intel_pmu_lbr_reset(void)
  128. {
  129. if (!x86_pmu.lbr_nr)
  130. return;
  131. if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32)
  132. intel_pmu_lbr_reset_32();
  133. else
  134. intel_pmu_lbr_reset_64();
  135. }
  136. void intel_pmu_lbr_enable(struct perf_event *event)
  137. {
  138. struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
  139. if (!x86_pmu.lbr_nr)
  140. return;
  141. /*
  142. * Reset the LBR stack if we changed task context to
  143. * avoid data leaks.
  144. */
  145. if (event->ctx->task && cpuc->lbr_context != event->ctx) {
  146. intel_pmu_lbr_reset();
  147. cpuc->lbr_context = event->ctx;
  148. }
  149. cpuc->br_sel = event->hw.branch_reg.reg;
  150. cpuc->lbr_users++;
  151. }
  152. void intel_pmu_lbr_disable(struct perf_event *event)
  153. {
  154. struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
  155. if (!x86_pmu.lbr_nr)
  156. return;
  157. cpuc->lbr_users--;
  158. WARN_ON_ONCE(cpuc->lbr_users < 0);
  159. if (cpuc->enabled && !cpuc->lbr_users) {
  160. __intel_pmu_lbr_disable();
  161. /* avoid stale pointer */
  162. cpuc->lbr_context = NULL;
  163. }
  164. }
  165. void intel_pmu_lbr_enable_all(void)
  166. {
  167. struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
  168. if (cpuc->lbr_users)
  169. __intel_pmu_lbr_enable();
  170. }
  171. void intel_pmu_lbr_disable_all(void)
  172. {
  173. struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
  174. if (cpuc->lbr_users)
  175. __intel_pmu_lbr_disable();
  176. }
  177. /*
  178. * TOS = most recently recorded branch
  179. */
  180. static inline u64 intel_pmu_lbr_tos(void)
  181. {
  182. u64 tos;
  183. rdmsrl(x86_pmu.lbr_tos, tos);
  184. return tos;
  185. }
  186. static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
  187. {
  188. unsigned long mask = x86_pmu.lbr_nr - 1;
  189. u64 tos = intel_pmu_lbr_tos();
  190. int i;
  191. for (i = 0; i < x86_pmu.lbr_nr; i++) {
  192. unsigned long lbr_idx = (tos - i) & mask;
  193. union {
  194. struct {
  195. u32 from;
  196. u32 to;
  197. };
  198. u64 lbr;
  199. } msr_lastbranch;
  200. rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr);
  201. cpuc->lbr_entries[i].from = msr_lastbranch.from;
  202. cpuc->lbr_entries[i].to = msr_lastbranch.to;
  203. cpuc->lbr_entries[i].mispred = 0;
  204. cpuc->lbr_entries[i].predicted = 0;
  205. cpuc->lbr_entries[i].reserved = 0;
  206. }
  207. cpuc->lbr_stack.nr = i;
  208. }
  209. /*
  210. * Due to lack of segmentation in Linux the effective address (offset)
  211. * is the same as the linear address, allowing us to merge the LIP and EIP
  212. * LBR formats.
  213. */
  214. static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
  215. {
  216. unsigned long mask = x86_pmu.lbr_nr - 1;
  217. int lbr_format = x86_pmu.intel_cap.lbr_format;
  218. u64 tos = intel_pmu_lbr_tos();
  219. int i;
  220. for (i = 0; i < x86_pmu.lbr_nr; i++) {
  221. unsigned long lbr_idx = (tos - i) & mask;
  222. u64 from, to, mis = 0, pred = 0;
  223. rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
  224. rdmsrl(x86_pmu.lbr_to + lbr_idx, to);
  225. if (lbr_format == LBR_FORMAT_EIP_FLAGS) {
  226. mis = !!(from & LBR_FROM_FLAG_MISPRED);
  227. pred = !mis;
  228. from = (u64)((((s64)from) << 1) >> 1);
  229. }
  230. cpuc->lbr_entries[i].from = from;
  231. cpuc->lbr_entries[i].to = to;
  232. cpuc->lbr_entries[i].mispred = mis;
  233. cpuc->lbr_entries[i].predicted = pred;
  234. cpuc->lbr_entries[i].reserved = 0;
  235. }
  236. cpuc->lbr_stack.nr = i;
  237. }
  238. void intel_pmu_lbr_read(void)
  239. {
  240. struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
  241. if (!cpuc->lbr_users)
  242. return;
  243. if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32)
  244. intel_pmu_lbr_read_32(cpuc);
  245. else
  246. intel_pmu_lbr_read_64(cpuc);
  247. intel_pmu_lbr_filter(cpuc);
  248. }
  249. /*
  250. * SW filter is used:
  251. * - in case there is no HW filter
  252. * - in case the HW filter has errata or limitations
  253. */
  254. static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
  255. {
  256. u64 br_type = event->attr.branch_sample_type;
  257. int mask = 0;
  258. if (br_type & PERF_SAMPLE_BRANCH_USER)
  259. mask |= X86_BR_USER;
  260. if (br_type & PERF_SAMPLE_BRANCH_KERNEL) {
  261. if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
  262. return -EACCES;
  263. mask |= X86_BR_KERNEL;
  264. }
  265. /* we ignore BRANCH_HV here */
  266. if (br_type & PERF_SAMPLE_BRANCH_ANY)
  267. mask |= X86_BR_ANY;
  268. if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL)
  269. mask |= X86_BR_ANY_CALL;
  270. if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
  271. mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET;
  272. if (br_type & PERF_SAMPLE_BRANCH_IND_CALL)
  273. mask |= X86_BR_IND_CALL;
  274. /*
  275. * stash actual user request into reg, it may
  276. * be used by fixup code for some CPU
  277. */
  278. event->hw.branch_reg.reg = mask;
  279. return 0;
  280. }
  281. /*
  282. * setup the HW LBR filter
  283. * Used only when available, may not be enough to disambiguate
  284. * all branches, may need the help of the SW filter
  285. */
  286. static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
  287. {
  288. struct hw_perf_event_extra *reg;
  289. u64 br_type = event->attr.branch_sample_type;
  290. u64 mask = 0, m;
  291. u64 v;
  292. for_each_branch_sample_type(m) {
  293. if (!(br_type & m))
  294. continue;
  295. v = x86_pmu.lbr_sel_map[m];
  296. if (v == LBR_NOT_SUPP)
  297. return -EOPNOTSUPP;
  298. if (v != LBR_IGN)
  299. mask |= v;
  300. }
  301. reg = &event->hw.branch_reg;
  302. reg->idx = EXTRA_REG_LBR;
  303. /* LBR_SELECT operates in suppress mode so invert mask */
  304. reg->config = ~mask & x86_pmu.lbr_sel_mask;
  305. return 0;
  306. }
  307. int intel_pmu_setup_lbr_filter(struct perf_event *event)
  308. {
  309. int ret = 0;
  310. /*
  311. * no LBR on this PMU
  312. */
  313. if (!x86_pmu.lbr_nr)
  314. return -EOPNOTSUPP;
  315. /*
  316. * setup SW LBR filter
  317. */
  318. ret = intel_pmu_setup_sw_lbr_filter(event);
  319. if (ret)
  320. return ret;
  321. /*
  322. * setup HW LBR filter, if any
  323. */
  324. if (x86_pmu.lbr_sel_map)
  325. ret = intel_pmu_setup_hw_lbr_filter(event);
  326. return ret;
  327. }
  328. /*
  329. * return the type of control flow change at address "from"
  330. * intruction is not necessarily a branch (in case of interrupt).
  331. *
  332. * The branch type returned also includes the priv level of the
  333. * target of the control flow change (X86_BR_USER, X86_BR_KERNEL).
  334. *
  335. * If a branch type is unknown OR the instruction cannot be
  336. * decoded (e.g., text page not present), then X86_BR_NONE is
  337. * returned.
  338. */
  339. static int branch_type(unsigned long from, unsigned long to)
  340. {
  341. struct insn insn;
  342. void *addr;
  343. int bytes, size = MAX_INSN_SIZE;
  344. int ret = X86_BR_NONE;
  345. int ext, to_plm, from_plm;
  346. u8 buf[MAX_INSN_SIZE];
  347. int is64 = 0;
  348. to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER;
  349. from_plm = kernel_ip(from) ? X86_BR_KERNEL : X86_BR_USER;
  350. /*
  351. * maybe zero if lbr did not fill up after a reset by the time
  352. * we get a PMU interrupt
  353. */
  354. if (from == 0 || to == 0)
  355. return X86_BR_NONE;
  356. if (from_plm == X86_BR_USER) {
  357. /*
  358. * can happen if measuring at the user level only
  359. * and we interrupt in a kernel thread, e.g., idle.
  360. */
  361. if (!current->mm)
  362. return X86_BR_NONE;
  363. /* may fail if text not present */
  364. bytes = copy_from_user_nmi(buf, (void __user *)from, size);
  365. if (bytes != size)
  366. return X86_BR_NONE;
  367. addr = buf;
  368. } else {
  369. /*
  370. * The LBR logs any address in the IP, even if the IP just
  371. * faulted. This means userspace can control the from address.
  372. * Ensure we don't blindy read any address by validating it is
  373. * a known text address.
  374. */
  375. if (kernel_text_address(from))
  376. addr = (void *)from;
  377. else
  378. return X86_BR_NONE;
  379. }
  380. /*
  381. * decoder needs to know the ABI especially
  382. * on 64-bit systems running 32-bit apps
  383. */
  384. #ifdef CONFIG_X86_64
  385. is64 = kernel_ip((unsigned long)addr) || !test_thread_flag(TIF_IA32);
  386. #endif
  387. insn_init(&insn, addr, is64);
  388. insn_get_opcode(&insn);
  389. switch (insn.opcode.bytes[0]) {
  390. case 0xf:
  391. switch (insn.opcode.bytes[1]) {
  392. case 0x05: /* syscall */
  393. case 0x34: /* sysenter */
  394. ret = X86_BR_SYSCALL;
  395. break;
  396. case 0x07: /* sysret */
  397. case 0x35: /* sysexit */
  398. ret = X86_BR_SYSRET;
  399. break;
  400. case 0x80 ... 0x8f: /* conditional */
  401. ret = X86_BR_JCC;
  402. break;
  403. default:
  404. ret = X86_BR_NONE;
  405. }
  406. break;
  407. case 0x70 ... 0x7f: /* conditional */
  408. ret = X86_BR_JCC;
  409. break;
  410. case 0xc2: /* near ret */
  411. case 0xc3: /* near ret */
  412. case 0xca: /* far ret */
  413. case 0xcb: /* far ret */
  414. ret = X86_BR_RET;
  415. break;
  416. case 0xcf: /* iret */
  417. ret = X86_BR_IRET;
  418. break;
  419. case 0xcc ... 0xce: /* int */
  420. ret = X86_BR_INT;
  421. break;
  422. case 0xe8: /* call near rel */
  423. case 0x9a: /* call far absolute */
  424. ret = X86_BR_CALL;
  425. break;
  426. case 0xe0 ... 0xe3: /* loop jmp */
  427. ret = X86_BR_JCC;
  428. break;
  429. case 0xe9 ... 0xeb: /* jmp */
  430. ret = X86_BR_JMP;
  431. break;
  432. case 0xff: /* call near absolute, call far absolute ind */
  433. insn_get_modrm(&insn);
  434. ext = (insn.modrm.bytes[0] >> 3) & 0x7;
  435. switch (ext) {
  436. case 2: /* near ind call */
  437. case 3: /* far ind call */
  438. ret = X86_BR_IND_CALL;
  439. break;
  440. case 4:
  441. case 5:
  442. ret = X86_BR_JMP;
  443. break;
  444. }
  445. break;
  446. default:
  447. ret = X86_BR_NONE;
  448. }
  449. /*
  450. * interrupts, traps, faults (and thus ring transition) may
  451. * occur on any instructions. Thus, to classify them correctly,
  452. * we need to first look at the from and to priv levels. If they
  453. * are different and to is in the kernel, then it indicates
  454. * a ring transition. If the from instruction is not a ring
  455. * transition instr (syscall, systenter, int), then it means
  456. * it was a irq, trap or fault.
  457. *
  458. * we have no way of detecting kernel to kernel faults.
  459. */
  460. if (from_plm == X86_BR_USER && to_plm == X86_BR_KERNEL
  461. && ret != X86_BR_SYSCALL && ret != X86_BR_INT)
  462. ret = X86_BR_IRQ;
  463. /*
  464. * branch priv level determined by target as
  465. * is done by HW when LBR_SELECT is implemented
  466. */
  467. if (ret != X86_BR_NONE)
  468. ret |= to_plm;
  469. return ret;
  470. }
  471. /*
  472. * implement actual branch filter based on user demand.
  473. * Hardware may not exactly satisfy that request, thus
  474. * we need to inspect opcodes. Mismatched branches are
  475. * discarded. Therefore, the number of branches returned
  476. * in PERF_SAMPLE_BRANCH_STACK sample may vary.
  477. */
  478. static void
  479. intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
  480. {
  481. u64 from, to;
  482. int br_sel = cpuc->br_sel;
  483. int i, j, type;
  484. bool compress = false;
  485. /* if sampling all branches, then nothing to filter */
  486. if ((br_sel & X86_BR_ALL) == X86_BR_ALL)
  487. return;
  488. for (i = 0; i < cpuc->lbr_stack.nr; i++) {
  489. from = cpuc->lbr_entries[i].from;
  490. to = cpuc->lbr_entries[i].to;
  491. type = branch_type(from, to);
  492. /* if type does not correspond, then discard */
  493. if (type == X86_BR_NONE || (br_sel & type) != type) {
  494. cpuc->lbr_entries[i].from = 0;
  495. compress = true;
  496. }
  497. }
  498. if (!compress)
  499. return;
  500. /* remove all entries with from=0 */
  501. for (i = 0; i < cpuc->lbr_stack.nr; ) {
  502. if (!cpuc->lbr_entries[i].from) {
  503. j = i;
  504. while (++j < cpuc->lbr_stack.nr)
  505. cpuc->lbr_entries[j-1] = cpuc->lbr_entries[j];
  506. cpuc->lbr_stack.nr--;
  507. if (!cpuc->lbr_entries[i].from)
  508. continue;
  509. }
  510. i++;
  511. }
  512. }
  513. /*
  514. * Map interface branch filters onto LBR filters
  515. */
  516. static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = {
  517. [PERF_SAMPLE_BRANCH_ANY] = LBR_ANY,
  518. [PERF_SAMPLE_BRANCH_USER] = LBR_USER,
  519. [PERF_SAMPLE_BRANCH_KERNEL] = LBR_KERNEL,
  520. [PERF_SAMPLE_BRANCH_HV] = LBR_IGN,
  521. [PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_REL_JMP
  522. | LBR_IND_JMP | LBR_FAR,
  523. /*
  524. * NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches
  525. */
  526. [PERF_SAMPLE_BRANCH_ANY_CALL] =
  527. LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR,
  528. /*
  529. * NHM/WSM erratum: must include IND_JMP to capture IND_CALL
  530. */
  531. [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL | LBR_IND_JMP,
  532. };
  533. static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = {
  534. [PERF_SAMPLE_BRANCH_ANY] = LBR_ANY,
  535. [PERF_SAMPLE_BRANCH_USER] = LBR_USER,
  536. [PERF_SAMPLE_BRANCH_KERNEL] = LBR_KERNEL,
  537. [PERF_SAMPLE_BRANCH_HV] = LBR_IGN,
  538. [PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_FAR,
  539. [PERF_SAMPLE_BRANCH_ANY_CALL] = LBR_REL_CALL | LBR_IND_CALL
  540. | LBR_FAR,
  541. [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL,
  542. };
  543. /* core */
  544. void intel_pmu_lbr_init_core(void)
  545. {
  546. x86_pmu.lbr_nr = 4;
  547. x86_pmu.lbr_tos = MSR_LBR_TOS;
  548. x86_pmu.lbr_from = MSR_LBR_CORE_FROM;
  549. x86_pmu.lbr_to = MSR_LBR_CORE_TO;
  550. /*
  551. * SW branch filter usage:
  552. * - compensate for lack of HW filter
  553. */
  554. pr_cont("4-deep LBR, ");
  555. }
  556. /* nehalem/westmere */
  557. void intel_pmu_lbr_init_nhm(void)
  558. {
  559. x86_pmu.lbr_nr = 16;
  560. x86_pmu.lbr_tos = MSR_LBR_TOS;
  561. x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
  562. x86_pmu.lbr_to = MSR_LBR_NHM_TO;
  563. x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
  564. x86_pmu.lbr_sel_map = nhm_lbr_sel_map;
  565. /*
  566. * SW branch filter usage:
  567. * - workaround LBR_SEL errata (see above)
  568. * - support syscall, sysret capture.
  569. * That requires LBR_FAR but that means far
  570. * jmp need to be filtered out
  571. */
  572. pr_cont("16-deep LBR, ");
  573. }
  574. /* sandy bridge */
  575. void intel_pmu_lbr_init_snb(void)
  576. {
  577. x86_pmu.lbr_nr = 16;
  578. x86_pmu.lbr_tos = MSR_LBR_TOS;
  579. x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
  580. x86_pmu.lbr_to = MSR_LBR_NHM_TO;
  581. x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
  582. x86_pmu.lbr_sel_map = snb_lbr_sel_map;
  583. /*
  584. * SW branch filter usage:
  585. * - support syscall, sysret capture.
  586. * That requires LBR_FAR but that means far
  587. * jmp need to be filtered out
  588. */
  589. pr_cont("16-deep LBR, ");
  590. }
  591. /* atom */
  592. void intel_pmu_lbr_init_atom(void)
  593. {
  594. /*
  595. * only models starting at stepping 10 seems
  596. * to have an operational LBR which can freeze
  597. * on PMU interrupt
  598. */
  599. if (boot_cpu_data.x86_mask < 10) {
  600. pr_cont("LBR disabled due to erratum");
  601. return;
  602. }
  603. x86_pmu.lbr_nr = 8;
  604. x86_pmu.lbr_tos = MSR_LBR_TOS;
  605. x86_pmu.lbr_from = MSR_LBR_CORE_FROM;
  606. x86_pmu.lbr_to = MSR_LBR_CORE_TO;
  607. /*
  608. * SW branch filter usage:
  609. * - compensate for lack of HW filter
  610. */
  611. pr_cont("8-deep LBR, ");
  612. }