ds.c 39 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472
  1. #include <linux/bitops.h>
  2. #include <linux/types.h>
  3. #include <linux/slab.h>
  4. #include <asm/perf_event.h>
  5. #include <asm/insn.h>
  6. #include "../perf_event.h"
  7. /* The size of a BTS record in bytes: */
  8. #define BTS_RECORD_SIZE 24
  9. #define BTS_BUFFER_SIZE (PAGE_SIZE << 4)
  10. #define PEBS_BUFFER_SIZE (PAGE_SIZE << 4)
  11. #define PEBS_FIXUP_SIZE PAGE_SIZE
  12. /*
  13. * pebs_record_32 for p4 and core not supported
  14. struct pebs_record_32 {
  15. u32 flags, ip;
  16. u32 ax, bc, cx, dx;
  17. u32 si, di, bp, sp;
  18. };
  19. */
  20. union intel_x86_pebs_dse {
  21. u64 val;
  22. struct {
  23. unsigned int ld_dse:4;
  24. unsigned int ld_stlb_miss:1;
  25. unsigned int ld_locked:1;
  26. unsigned int ld_reserved:26;
  27. };
  28. struct {
  29. unsigned int st_l1d_hit:1;
  30. unsigned int st_reserved1:3;
  31. unsigned int st_stlb_miss:1;
  32. unsigned int st_locked:1;
  33. unsigned int st_reserved2:26;
  34. };
  35. };
  36. /*
  37. * Map PEBS Load Latency Data Source encodings to generic
  38. * memory data source information
  39. */
  40. #define P(a, b) PERF_MEM_S(a, b)
  41. #define OP_LH (P(OP, LOAD) | P(LVL, HIT))
  42. #define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS))
  43. /* Version for Sandy Bridge and later */
  44. static u64 pebs_data_source[] = {
  45. P(OP, LOAD) | P(LVL, MISS) | P(LVL, L3) | P(SNOOP, NA),/* 0x00:ukn L3 */
  46. OP_LH | P(LVL, L1) | P(SNOOP, NONE), /* 0x01: L1 local */
  47. OP_LH | P(LVL, LFB) | P(SNOOP, NONE), /* 0x02: LFB hit */
  48. OP_LH | P(LVL, L2) | P(SNOOP, NONE), /* 0x03: L2 hit */
  49. OP_LH | P(LVL, L3) | P(SNOOP, NONE), /* 0x04: L3 hit */
  50. OP_LH | P(LVL, L3) | P(SNOOP, MISS), /* 0x05: L3 hit, snoop miss */
  51. OP_LH | P(LVL, L3) | P(SNOOP, HIT), /* 0x06: L3 hit, snoop hit */
  52. OP_LH | P(LVL, L3) | P(SNOOP, HITM), /* 0x07: L3 hit, snoop hitm */
  53. OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HIT), /* 0x08: L3 miss snoop hit */
  54. OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HITM), /* 0x09: L3 miss snoop hitm*/
  55. OP_LH | P(LVL, LOC_RAM) | P(SNOOP, HIT), /* 0x0a: L3 miss, shared */
  56. OP_LH | P(LVL, REM_RAM1) | P(SNOOP, HIT), /* 0x0b: L3 miss, shared */
  57. OP_LH | P(LVL, LOC_RAM) | SNOOP_NONE_MISS,/* 0x0c: L3 miss, excl */
  58. OP_LH | P(LVL, REM_RAM1) | SNOOP_NONE_MISS,/* 0x0d: L3 miss, excl */
  59. OP_LH | P(LVL, IO) | P(SNOOP, NONE), /* 0x0e: I/O */
  60. OP_LH | P(LVL, UNC) | P(SNOOP, NONE), /* 0x0f: uncached */
  61. };
  62. /* Patch up minor differences in the bits */
  63. void __init intel_pmu_pebs_data_source_nhm(void)
  64. {
  65. pebs_data_source[0x05] = OP_LH | P(LVL, L3) | P(SNOOP, HIT);
  66. pebs_data_source[0x06] = OP_LH | P(LVL, L3) | P(SNOOP, HITM);
  67. pebs_data_source[0x07] = OP_LH | P(LVL, L3) | P(SNOOP, HITM);
  68. }
  69. static u64 precise_store_data(u64 status)
  70. {
  71. union intel_x86_pebs_dse dse;
  72. u64 val = P(OP, STORE) | P(SNOOP, NA) | P(LVL, L1) | P(TLB, L2);
  73. dse.val = status;
  74. /*
  75. * bit 4: TLB access
  76. * 1 = stored missed 2nd level TLB
  77. *
  78. * so it either hit the walker or the OS
  79. * otherwise hit 2nd level TLB
  80. */
  81. if (dse.st_stlb_miss)
  82. val |= P(TLB, MISS);
  83. else
  84. val |= P(TLB, HIT);
  85. /*
  86. * bit 0: hit L1 data cache
  87. * if not set, then all we know is that
  88. * it missed L1D
  89. */
  90. if (dse.st_l1d_hit)
  91. val |= P(LVL, HIT);
  92. else
  93. val |= P(LVL, MISS);
  94. /*
  95. * bit 5: Locked prefix
  96. */
  97. if (dse.st_locked)
  98. val |= P(LOCK, LOCKED);
  99. return val;
  100. }
  101. static u64 precise_datala_hsw(struct perf_event *event, u64 status)
  102. {
  103. union perf_mem_data_src dse;
  104. dse.val = PERF_MEM_NA;
  105. if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
  106. dse.mem_op = PERF_MEM_OP_STORE;
  107. else if (event->hw.flags & PERF_X86_EVENT_PEBS_LD_HSW)
  108. dse.mem_op = PERF_MEM_OP_LOAD;
  109. /*
  110. * L1 info only valid for following events:
  111. *
  112. * MEM_UOPS_RETIRED.STLB_MISS_STORES
  113. * MEM_UOPS_RETIRED.LOCK_STORES
  114. * MEM_UOPS_RETIRED.SPLIT_STORES
  115. * MEM_UOPS_RETIRED.ALL_STORES
  116. */
  117. if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW) {
  118. if (status & 1)
  119. dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
  120. else
  121. dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_MISS;
  122. }
  123. return dse.val;
  124. }
  125. static u64 load_latency_data(u64 status)
  126. {
  127. union intel_x86_pebs_dse dse;
  128. u64 val;
  129. int model = boot_cpu_data.x86_model;
  130. int fam = boot_cpu_data.x86;
  131. dse.val = status;
  132. /*
  133. * use the mapping table for bit 0-3
  134. */
  135. val = pebs_data_source[dse.ld_dse];
  136. /*
  137. * Nehalem models do not support TLB, Lock infos
  138. */
  139. if (fam == 0x6 && (model == 26 || model == 30
  140. || model == 31 || model == 46)) {
  141. val |= P(TLB, NA) | P(LOCK, NA);
  142. return val;
  143. }
  144. /*
  145. * bit 4: TLB access
  146. * 0 = did not miss 2nd level TLB
  147. * 1 = missed 2nd level TLB
  148. */
  149. if (dse.ld_stlb_miss)
  150. val |= P(TLB, MISS) | P(TLB, L2);
  151. else
  152. val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
  153. /*
  154. * bit 5: locked prefix
  155. */
  156. if (dse.ld_locked)
  157. val |= P(LOCK, LOCKED);
  158. return val;
  159. }
  160. struct pebs_record_core {
  161. u64 flags, ip;
  162. u64 ax, bx, cx, dx;
  163. u64 si, di, bp, sp;
  164. u64 r8, r9, r10, r11;
  165. u64 r12, r13, r14, r15;
  166. };
  167. struct pebs_record_nhm {
  168. u64 flags, ip;
  169. u64 ax, bx, cx, dx;
  170. u64 si, di, bp, sp;
  171. u64 r8, r9, r10, r11;
  172. u64 r12, r13, r14, r15;
  173. u64 status, dla, dse, lat;
  174. };
  175. /*
  176. * Same as pebs_record_nhm, with two additional fields.
  177. */
  178. struct pebs_record_hsw {
  179. u64 flags, ip;
  180. u64 ax, bx, cx, dx;
  181. u64 si, di, bp, sp;
  182. u64 r8, r9, r10, r11;
  183. u64 r12, r13, r14, r15;
  184. u64 status, dla, dse, lat;
  185. u64 real_ip, tsx_tuning;
  186. };
  187. union hsw_tsx_tuning {
  188. struct {
  189. u32 cycles_last_block : 32,
  190. hle_abort : 1,
  191. rtm_abort : 1,
  192. instruction_abort : 1,
  193. non_instruction_abort : 1,
  194. retry : 1,
  195. data_conflict : 1,
  196. capacity_writes : 1,
  197. capacity_reads : 1;
  198. };
  199. u64 value;
  200. };
  201. #define PEBS_HSW_TSX_FLAGS 0xff00000000ULL
  202. /* Same as HSW, plus TSC */
  203. struct pebs_record_skl {
  204. u64 flags, ip;
  205. u64 ax, bx, cx, dx;
  206. u64 si, di, bp, sp;
  207. u64 r8, r9, r10, r11;
  208. u64 r12, r13, r14, r15;
  209. u64 status, dla, dse, lat;
  210. u64 real_ip, tsx_tuning;
  211. u64 tsc;
  212. };
  213. void init_debug_store_on_cpu(int cpu)
  214. {
  215. struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
  216. if (!ds)
  217. return;
  218. wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
  219. (u32)((u64)(unsigned long)ds),
  220. (u32)((u64)(unsigned long)ds >> 32));
  221. }
  222. void fini_debug_store_on_cpu(int cpu)
  223. {
  224. if (!per_cpu(cpu_hw_events, cpu).ds)
  225. return;
  226. wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
  227. }
  228. static DEFINE_PER_CPU(void *, insn_buffer);
  229. static int alloc_pebs_buffer(int cpu)
  230. {
  231. struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
  232. int node = cpu_to_node(cpu);
  233. int max;
  234. void *buffer, *ibuffer;
  235. if (!x86_pmu.pebs)
  236. return 0;
  237. buffer = kzalloc_node(x86_pmu.pebs_buffer_size, GFP_KERNEL, node);
  238. if (unlikely(!buffer))
  239. return -ENOMEM;
  240. /*
  241. * HSW+ already provides us the eventing ip; no need to allocate this
  242. * buffer then.
  243. */
  244. if (x86_pmu.intel_cap.pebs_format < 2) {
  245. ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
  246. if (!ibuffer) {
  247. kfree(buffer);
  248. return -ENOMEM;
  249. }
  250. per_cpu(insn_buffer, cpu) = ibuffer;
  251. }
  252. max = x86_pmu.pebs_buffer_size / x86_pmu.pebs_record_size;
  253. ds->pebs_buffer_base = (u64)(unsigned long)buffer;
  254. ds->pebs_index = ds->pebs_buffer_base;
  255. ds->pebs_absolute_maximum = ds->pebs_buffer_base +
  256. max * x86_pmu.pebs_record_size;
  257. return 0;
  258. }
  259. static void release_pebs_buffer(int cpu)
  260. {
  261. struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
  262. if (!ds || !x86_pmu.pebs)
  263. return;
  264. kfree(per_cpu(insn_buffer, cpu));
  265. per_cpu(insn_buffer, cpu) = NULL;
  266. kfree((void *)(unsigned long)ds->pebs_buffer_base);
  267. ds->pebs_buffer_base = 0;
  268. }
  269. static int alloc_bts_buffer(int cpu)
  270. {
  271. struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
  272. int node = cpu_to_node(cpu);
  273. int max, thresh;
  274. void *buffer;
  275. if (!x86_pmu.bts)
  276. return 0;
  277. buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node);
  278. if (unlikely(!buffer)) {
  279. WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__);
  280. return -ENOMEM;
  281. }
  282. max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
  283. thresh = max / 16;
  284. ds->bts_buffer_base = (u64)(unsigned long)buffer;
  285. ds->bts_index = ds->bts_buffer_base;
  286. ds->bts_absolute_maximum = ds->bts_buffer_base +
  287. max * BTS_RECORD_SIZE;
  288. ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
  289. thresh * BTS_RECORD_SIZE;
  290. return 0;
  291. }
  292. static void release_bts_buffer(int cpu)
  293. {
  294. struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
  295. if (!ds || !x86_pmu.bts)
  296. return;
  297. kfree((void *)(unsigned long)ds->bts_buffer_base);
  298. ds->bts_buffer_base = 0;
  299. }
  300. static int alloc_ds_buffer(int cpu)
  301. {
  302. int node = cpu_to_node(cpu);
  303. struct debug_store *ds;
  304. ds = kzalloc_node(sizeof(*ds), GFP_KERNEL, node);
  305. if (unlikely(!ds))
  306. return -ENOMEM;
  307. per_cpu(cpu_hw_events, cpu).ds = ds;
  308. return 0;
  309. }
  310. static void release_ds_buffer(int cpu)
  311. {
  312. struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
  313. if (!ds)
  314. return;
  315. per_cpu(cpu_hw_events, cpu).ds = NULL;
  316. kfree(ds);
  317. }
  318. void release_ds_buffers(void)
  319. {
  320. int cpu;
  321. if (!x86_pmu.bts && !x86_pmu.pebs)
  322. return;
  323. get_online_cpus();
  324. for_each_online_cpu(cpu)
  325. fini_debug_store_on_cpu(cpu);
  326. for_each_possible_cpu(cpu) {
  327. release_pebs_buffer(cpu);
  328. release_bts_buffer(cpu);
  329. release_ds_buffer(cpu);
  330. }
  331. put_online_cpus();
  332. }
  333. void reserve_ds_buffers(void)
  334. {
  335. int bts_err = 0, pebs_err = 0;
  336. int cpu;
  337. x86_pmu.bts_active = 0;
  338. x86_pmu.pebs_active = 0;
  339. if (!x86_pmu.bts && !x86_pmu.pebs)
  340. return;
  341. if (!x86_pmu.bts)
  342. bts_err = 1;
  343. if (!x86_pmu.pebs)
  344. pebs_err = 1;
  345. get_online_cpus();
  346. for_each_possible_cpu(cpu) {
  347. if (alloc_ds_buffer(cpu)) {
  348. bts_err = 1;
  349. pebs_err = 1;
  350. }
  351. if (!bts_err && alloc_bts_buffer(cpu))
  352. bts_err = 1;
  353. if (!pebs_err && alloc_pebs_buffer(cpu))
  354. pebs_err = 1;
  355. if (bts_err && pebs_err)
  356. break;
  357. }
  358. if (bts_err) {
  359. for_each_possible_cpu(cpu)
  360. release_bts_buffer(cpu);
  361. }
  362. if (pebs_err) {
  363. for_each_possible_cpu(cpu)
  364. release_pebs_buffer(cpu);
  365. }
  366. if (bts_err && pebs_err) {
  367. for_each_possible_cpu(cpu)
  368. release_ds_buffer(cpu);
  369. } else {
  370. if (x86_pmu.bts && !bts_err)
  371. x86_pmu.bts_active = 1;
  372. if (x86_pmu.pebs && !pebs_err)
  373. x86_pmu.pebs_active = 1;
  374. for_each_online_cpu(cpu)
  375. init_debug_store_on_cpu(cpu);
  376. }
  377. put_online_cpus();
  378. }
  379. /*
  380. * BTS
  381. */
  382. struct event_constraint bts_constraint =
  383. EVENT_CONSTRAINT(0, 1ULL << INTEL_PMC_IDX_FIXED_BTS, 0);
  384. void intel_pmu_enable_bts(u64 config)
  385. {
  386. unsigned long debugctlmsr;
  387. debugctlmsr = get_debugctlmsr();
  388. debugctlmsr |= DEBUGCTLMSR_TR;
  389. debugctlmsr |= DEBUGCTLMSR_BTS;
  390. if (config & ARCH_PERFMON_EVENTSEL_INT)
  391. debugctlmsr |= DEBUGCTLMSR_BTINT;
  392. if (!(config & ARCH_PERFMON_EVENTSEL_OS))
  393. debugctlmsr |= DEBUGCTLMSR_BTS_OFF_OS;
  394. if (!(config & ARCH_PERFMON_EVENTSEL_USR))
  395. debugctlmsr |= DEBUGCTLMSR_BTS_OFF_USR;
  396. update_debugctlmsr(debugctlmsr);
  397. }
  398. void intel_pmu_disable_bts(void)
  399. {
  400. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  401. unsigned long debugctlmsr;
  402. if (!cpuc->ds)
  403. return;
  404. debugctlmsr = get_debugctlmsr();
  405. debugctlmsr &=
  406. ~(DEBUGCTLMSR_TR | DEBUGCTLMSR_BTS | DEBUGCTLMSR_BTINT |
  407. DEBUGCTLMSR_BTS_OFF_OS | DEBUGCTLMSR_BTS_OFF_USR);
  408. update_debugctlmsr(debugctlmsr);
  409. }
  410. int intel_pmu_drain_bts_buffer(void)
  411. {
  412. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  413. struct debug_store *ds = cpuc->ds;
  414. struct bts_record {
  415. u64 from;
  416. u64 to;
  417. u64 flags;
  418. };
  419. struct perf_event *event = cpuc->events[INTEL_PMC_IDX_FIXED_BTS];
  420. struct bts_record *at, *base, *top;
  421. struct perf_output_handle handle;
  422. struct perf_event_header header;
  423. struct perf_sample_data data;
  424. unsigned long skip = 0;
  425. struct pt_regs regs;
  426. if (!event)
  427. return 0;
  428. if (!x86_pmu.bts_active)
  429. return 0;
  430. base = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
  431. top = (struct bts_record *)(unsigned long)ds->bts_index;
  432. if (top <= base)
  433. return 0;
  434. memset(&regs, 0, sizeof(regs));
  435. ds->bts_index = ds->bts_buffer_base;
  436. perf_sample_data_init(&data, 0, event->hw.last_period);
  437. /*
  438. * BTS leaks kernel addresses in branches across the cpl boundary,
  439. * such as traps or system calls, so unless the user is asking for
  440. * kernel tracing (and right now it's not possible), we'd need to
  441. * filter them out. But first we need to count how many of those we
  442. * have in the current batch. This is an extra O(n) pass, however,
  443. * it's much faster than the other one especially considering that
  444. * n <= 2560 (BTS_BUFFER_SIZE / BTS_RECORD_SIZE * 15/16; see the
  445. * alloc_bts_buffer()).
  446. */
  447. for (at = base; at < top; at++) {
  448. /*
  449. * Note that right now *this* BTS code only works if
  450. * attr::exclude_kernel is set, but let's keep this extra
  451. * check here in case that changes.
  452. */
  453. if (event->attr.exclude_kernel &&
  454. (kernel_ip(at->from) || kernel_ip(at->to)))
  455. skip++;
  456. }
  457. /*
  458. * Prepare a generic sample, i.e. fill in the invariant fields.
  459. * We will overwrite the from and to address before we output
  460. * the sample.
  461. */
  462. rcu_read_lock();
  463. perf_prepare_sample(&header, &data, event, &regs);
  464. if (perf_output_begin(&handle, event, header.size *
  465. (top - base - skip)))
  466. goto unlock;
  467. for (at = base; at < top; at++) {
  468. /* Filter out any records that contain kernel addresses. */
  469. if (event->attr.exclude_kernel &&
  470. (kernel_ip(at->from) || kernel_ip(at->to)))
  471. continue;
  472. data.ip = at->from;
  473. data.addr = at->to;
  474. perf_output_sample(&handle, &header, &data, event);
  475. }
  476. perf_output_end(&handle);
  477. /* There's new data available. */
  478. event->hw.interrupts++;
  479. event->pending_kill = POLL_IN;
  480. unlock:
  481. rcu_read_unlock();
  482. return 1;
  483. }
  484. static inline void intel_pmu_drain_pebs_buffer(void)
  485. {
  486. struct pt_regs regs;
  487. x86_pmu.drain_pebs(&regs);
  488. }
  489. void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in)
  490. {
  491. if (!sched_in)
  492. intel_pmu_drain_pebs_buffer();
  493. }
  494. /*
  495. * PEBS
  496. */
  497. struct event_constraint intel_core2_pebs_event_constraints[] = {
  498. INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
  499. INTEL_FLAGS_UEVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */
  500. INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */
  501. INTEL_FLAGS_UEVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */
  502. INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */
  503. /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
  504. INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x01),
  505. EVENT_CONSTRAINT_END
  506. };
  507. struct event_constraint intel_atom_pebs_event_constraints[] = {
  508. INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
  509. INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */
  510. INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */
  511. /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
  512. INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x01),
  513. /* Allow all events as PEBS with no flags */
  514. INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
  515. EVENT_CONSTRAINT_END
  516. };
  517. struct event_constraint intel_slm_pebs_event_constraints[] = {
  518. /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
  519. INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x1),
  520. /* Allow all events as PEBS with no flags */
  521. INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
  522. EVENT_CONSTRAINT_END
  523. };
  524. struct event_constraint intel_glm_pebs_event_constraints[] = {
  525. /* Allow all events as PEBS with no flags */
  526. INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
  527. EVENT_CONSTRAINT_END
  528. };
  529. struct event_constraint intel_nehalem_pebs_event_constraints[] = {
  530. INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */
  531. INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
  532. INTEL_FLAGS_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
  533. INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xf), /* INST_RETIRED.ANY */
  534. INTEL_EVENT_CONSTRAINT(0xc2, 0xf), /* UOPS_RETIRED.* */
  535. INTEL_FLAGS_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
  536. INTEL_FLAGS_UEVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */
  537. INTEL_FLAGS_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */
  538. INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
  539. INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */
  540. INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */
  541. /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
  542. INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f),
  543. EVENT_CONSTRAINT_END
  544. };
  545. struct event_constraint intel_westmere_pebs_event_constraints[] = {
  546. INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */
  547. INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
  548. INTEL_FLAGS_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
  549. INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xf), /* INSTR_RETIRED.* */
  550. INTEL_EVENT_CONSTRAINT(0xc2, 0xf), /* UOPS_RETIRED.* */
  551. INTEL_FLAGS_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
  552. INTEL_FLAGS_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
  553. INTEL_FLAGS_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */
  554. INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
  555. INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */
  556. INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */
  557. /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
  558. INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f),
  559. EVENT_CONSTRAINT_END
  560. };
  561. struct event_constraint intel_snb_pebs_event_constraints[] = {
  562. INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
  563. INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
  564. INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */
  565. /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
  566. INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
  567. INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
  568. INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
  569. INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
  570. INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
  571. /* Allow all events as PEBS with no flags */
  572. INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
  573. EVENT_CONSTRAINT_END
  574. };
  575. struct event_constraint intel_ivb_pebs_event_constraints[] = {
  576. INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
  577. INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
  578. INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */
  579. /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
  580. INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
  581. /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
  582. INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2),
  583. INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
  584. INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
  585. INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
  586. INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
  587. /* Allow all events as PEBS with no flags */
  588. INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
  589. EVENT_CONSTRAINT_END
  590. };
  591. struct event_constraint intel_hsw_pebs_event_constraints[] = {
  592. INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
  593. INTEL_PLD_CONSTRAINT(0x01cd, 0xf), /* MEM_TRANS_RETIRED.* */
  594. /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
  595. INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
  596. /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
  597. INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2),
  598. INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
  599. INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
  600. INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
  601. INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
  602. INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
  603. INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
  604. INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */
  605. INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
  606. INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
  607. INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd2, 0xf), /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */
  608. INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd3, 0xf), /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */
  609. /* Allow all events as PEBS with no flags */
  610. INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
  611. EVENT_CONSTRAINT_END
  612. };
  613. struct event_constraint intel_bdw_pebs_event_constraints[] = {
  614. INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
  615. INTEL_PLD_CONSTRAINT(0x01cd, 0xf), /* MEM_TRANS_RETIRED.* */
  616. /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
  617. INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
  618. /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
  619. INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2),
  620. INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
  621. INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
  622. INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
  623. INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
  624. INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
  625. INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
  626. INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */
  627. INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
  628. INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
  629. INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf), /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */
  630. INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf), /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */
  631. /* Allow all events as PEBS with no flags */
  632. INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
  633. EVENT_CONSTRAINT_END
  634. };
  635. struct event_constraint intel_skl_pebs_event_constraints[] = {
  636. INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x2), /* INST_RETIRED.PREC_DIST */
  637. /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
  638. INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2),
  639. /* INST_RETIRED.TOTAL_CYCLES_PS (inv=1, cmask=16) (cycles:p). */
  640. INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f),
  641. INTEL_PLD_CONSTRAINT(0x1cd, 0xf), /* MEM_TRANS_RETIRED.* */
  642. INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */
  643. INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */
  644. INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */
  645. INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x22d0, 0xf), /* MEM_INST_RETIRED.LOCK_STORES */
  646. INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_INST_RETIRED.SPLIT_LOADS */
  647. INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_INST_RETIRED.SPLIT_STORES */
  648. INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_INST_RETIRED.ALL_LOADS */
  649. INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_INST_RETIRED.ALL_STORES */
  650. INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf), /* MEM_LOAD_RETIRED.* */
  651. INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf), /* MEM_LOAD_L3_HIT_RETIRED.* */
  652. INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf), /* MEM_LOAD_L3_MISS_RETIRED.* */
  653. /* Allow all events as PEBS with no flags */
  654. INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
  655. EVENT_CONSTRAINT_END
  656. };
  657. struct event_constraint *intel_pebs_constraints(struct perf_event *event)
  658. {
  659. struct event_constraint *c;
  660. if (!event->attr.precise_ip)
  661. return NULL;
  662. if (x86_pmu.pebs_constraints) {
  663. for_each_event_constraint(c, x86_pmu.pebs_constraints) {
  664. if ((event->hw.config & c->cmask) == c->code) {
  665. event->hw.flags |= c->flags;
  666. return c;
  667. }
  668. }
  669. }
  670. return &emptyconstraint;
  671. }
  672. /*
  673. * We need the sched_task callback even for per-cpu events when we use
  674. * the large interrupt threshold, such that we can provide PID and TID
  675. * to PEBS samples.
  676. */
  677. static inline bool pebs_needs_sched_cb(struct cpu_hw_events *cpuc)
  678. {
  679. return cpuc->n_pebs && (cpuc->n_pebs == cpuc->n_large_pebs);
  680. }
  681. static inline void pebs_update_threshold(struct cpu_hw_events *cpuc)
  682. {
  683. struct debug_store *ds = cpuc->ds;
  684. u64 threshold;
  685. if (cpuc->n_pebs == cpuc->n_large_pebs) {
  686. threshold = ds->pebs_absolute_maximum -
  687. x86_pmu.max_pebs_events * x86_pmu.pebs_record_size;
  688. } else {
  689. threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size;
  690. }
  691. ds->pebs_interrupt_threshold = threshold;
  692. }
  693. static void
  694. pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc, struct pmu *pmu)
  695. {
  696. /*
  697. * Make sure we get updated with the first PEBS
  698. * event. It will trigger also during removal, but
  699. * that does not hurt:
  700. */
  701. bool update = cpuc->n_pebs == 1;
  702. if (needed_cb != pebs_needs_sched_cb(cpuc)) {
  703. if (!needed_cb)
  704. perf_sched_cb_inc(pmu);
  705. else
  706. perf_sched_cb_dec(pmu);
  707. update = true;
  708. }
  709. if (update)
  710. pebs_update_threshold(cpuc);
  711. }
  712. void intel_pmu_pebs_add(struct perf_event *event)
  713. {
  714. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  715. struct hw_perf_event *hwc = &event->hw;
  716. bool needed_cb = pebs_needs_sched_cb(cpuc);
  717. cpuc->n_pebs++;
  718. if (hwc->flags & PERF_X86_EVENT_FREERUNNING)
  719. cpuc->n_large_pebs++;
  720. pebs_update_state(needed_cb, cpuc, event->ctx->pmu);
  721. }
  722. void intel_pmu_pebs_enable(struct perf_event *event)
  723. {
  724. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  725. struct hw_perf_event *hwc = &event->hw;
  726. struct debug_store *ds = cpuc->ds;
  727. hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
  728. cpuc->pebs_enabled |= 1ULL << hwc->idx;
  729. if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
  730. cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32);
  731. else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
  732. cpuc->pebs_enabled |= 1ULL << 63;
  733. /*
  734. * Use auto-reload if possible to save a MSR write in the PMI.
  735. * This must be done in pmu::start(), because PERF_EVENT_IOC_PERIOD.
  736. */
  737. if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
  738. ds->pebs_event_reset[hwc->idx] =
  739. (u64)(-hwc->sample_period) & x86_pmu.cntval_mask;
  740. }
  741. }
  742. void intel_pmu_pebs_del(struct perf_event *event)
  743. {
  744. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  745. struct hw_perf_event *hwc = &event->hw;
  746. bool needed_cb = pebs_needs_sched_cb(cpuc);
  747. cpuc->n_pebs--;
  748. if (hwc->flags & PERF_X86_EVENT_FREERUNNING)
  749. cpuc->n_large_pebs--;
  750. pebs_update_state(needed_cb, cpuc, event->ctx->pmu);
  751. }
  752. void intel_pmu_pebs_disable(struct perf_event *event)
  753. {
  754. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  755. struct hw_perf_event *hwc = &event->hw;
  756. if (cpuc->n_pebs == cpuc->n_large_pebs)
  757. intel_pmu_drain_pebs_buffer();
  758. cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
  759. if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
  760. cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32));
  761. else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
  762. cpuc->pebs_enabled &= ~(1ULL << 63);
  763. if (cpuc->enabled)
  764. wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
  765. hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
  766. }
  767. void intel_pmu_pebs_enable_all(void)
  768. {
  769. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  770. if (cpuc->pebs_enabled)
  771. wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
  772. }
  773. void intel_pmu_pebs_disable_all(void)
  774. {
  775. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  776. if (cpuc->pebs_enabled)
  777. wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
  778. }
  779. static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
  780. {
  781. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  782. unsigned long from = cpuc->lbr_entries[0].from;
  783. unsigned long old_to, to = cpuc->lbr_entries[0].to;
  784. unsigned long ip = regs->ip;
  785. int is_64bit = 0;
  786. void *kaddr;
  787. int size;
  788. /*
  789. * We don't need to fixup if the PEBS assist is fault like
  790. */
  791. if (!x86_pmu.intel_cap.pebs_trap)
  792. return 1;
  793. /*
  794. * No LBR entry, no basic block, no rewinding
  795. */
  796. if (!cpuc->lbr_stack.nr || !from || !to)
  797. return 0;
  798. /*
  799. * Basic blocks should never cross user/kernel boundaries
  800. */
  801. if (kernel_ip(ip) != kernel_ip(to))
  802. return 0;
  803. /*
  804. * unsigned math, either ip is before the start (impossible) or
  805. * the basic block is larger than 1 page (sanity)
  806. */
  807. if ((ip - to) > PEBS_FIXUP_SIZE)
  808. return 0;
  809. /*
  810. * We sampled a branch insn, rewind using the LBR stack
  811. */
  812. if (ip == to) {
  813. set_linear_ip(regs, from);
  814. return 1;
  815. }
  816. size = ip - to;
  817. if (!kernel_ip(ip)) {
  818. int bytes;
  819. u8 *buf = this_cpu_read(insn_buffer);
  820. /* 'size' must fit our buffer, see above */
  821. bytes = copy_from_user_nmi(buf, (void __user *)to, size);
  822. if (bytes != 0)
  823. return 0;
  824. kaddr = buf;
  825. } else {
  826. kaddr = (void *)to;
  827. }
  828. do {
  829. struct insn insn;
  830. old_to = to;
  831. #ifdef CONFIG_X86_64
  832. is_64bit = kernel_ip(to) || !test_thread_flag(TIF_IA32);
  833. #endif
  834. insn_init(&insn, kaddr, size, is_64bit);
  835. insn_get_length(&insn);
  836. /*
  837. * Make sure there was not a problem decoding the
  838. * instruction and getting the length. This is
  839. * doubly important because we have an infinite
  840. * loop if insn.length=0.
  841. */
  842. if (!insn.length)
  843. break;
  844. to += insn.length;
  845. kaddr += insn.length;
  846. size -= insn.length;
  847. } while (to < ip);
  848. if (to == ip) {
  849. set_linear_ip(regs, old_to);
  850. return 1;
  851. }
  852. /*
  853. * Even though we decoded the basic block, the instruction stream
  854. * never matched the given IP, either the TO or the IP got corrupted.
  855. */
  856. return 0;
  857. }
  858. static inline u64 intel_hsw_weight(struct pebs_record_skl *pebs)
  859. {
  860. if (pebs->tsx_tuning) {
  861. union hsw_tsx_tuning tsx = { .value = pebs->tsx_tuning };
  862. return tsx.cycles_last_block;
  863. }
  864. return 0;
  865. }
  866. static inline u64 intel_hsw_transaction(struct pebs_record_skl *pebs)
  867. {
  868. u64 txn = (pebs->tsx_tuning & PEBS_HSW_TSX_FLAGS) >> 32;
  869. /* For RTM XABORTs also log the abort code from AX */
  870. if ((txn & PERF_TXN_TRANSACTION) && (pebs->ax & 1))
  871. txn |= ((pebs->ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT;
  872. return txn;
  873. }
  874. static void setup_pebs_sample_data(struct perf_event *event,
  875. struct pt_regs *iregs, void *__pebs,
  876. struct perf_sample_data *data,
  877. struct pt_regs *regs)
  878. {
  879. #define PERF_X86_EVENT_PEBS_HSW_PREC \
  880. (PERF_X86_EVENT_PEBS_ST_HSW | \
  881. PERF_X86_EVENT_PEBS_LD_HSW | \
  882. PERF_X86_EVENT_PEBS_NA_HSW)
  883. /*
  884. * We cast to the biggest pebs_record but are careful not to
  885. * unconditionally access the 'extra' entries.
  886. */
  887. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  888. struct pebs_record_skl *pebs = __pebs;
  889. u64 sample_type;
  890. int fll, fst, dsrc;
  891. int fl = event->hw.flags;
  892. if (pebs == NULL)
  893. return;
  894. sample_type = event->attr.sample_type;
  895. dsrc = sample_type & PERF_SAMPLE_DATA_SRC;
  896. fll = fl & PERF_X86_EVENT_PEBS_LDLAT;
  897. fst = fl & (PERF_X86_EVENT_PEBS_ST | PERF_X86_EVENT_PEBS_HSW_PREC);
  898. perf_sample_data_init(data, 0, event->hw.last_period);
  899. data->period = event->hw.last_period;
  900. /*
  901. * Use latency for weight (only avail with PEBS-LL)
  902. */
  903. if (fll && (sample_type & PERF_SAMPLE_WEIGHT))
  904. data->weight = pebs->lat;
  905. /*
  906. * data.data_src encodes the data source
  907. */
  908. if (dsrc) {
  909. u64 val = PERF_MEM_NA;
  910. if (fll)
  911. val = load_latency_data(pebs->dse);
  912. else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC))
  913. val = precise_datala_hsw(event, pebs->dse);
  914. else if (fst)
  915. val = precise_store_data(pebs->dse);
  916. data->data_src.val = val;
  917. }
  918. /*
  919. * We use the interrupt regs as a base because the PEBS record does not
  920. * contain a full regs set, specifically it seems to lack segment
  921. * descriptors, which get used by things like user_mode().
  922. *
  923. * In the simple case fix up only the IP for PERF_SAMPLE_IP.
  924. *
  925. * We must however always use BP,SP from iregs for the unwinder to stay
  926. * sane; the record BP,SP can point into thin air when the record is
  927. * from a previous PMI context or an (I)RET happend between the record
  928. * and PMI.
  929. */
  930. *regs = *iregs;
  931. regs->flags = pebs->flags;
  932. set_linear_ip(regs, pebs->ip);
  933. if (sample_type & PERF_SAMPLE_REGS_INTR) {
  934. regs->ax = pebs->ax;
  935. regs->bx = pebs->bx;
  936. regs->cx = pebs->cx;
  937. regs->dx = pebs->dx;
  938. regs->si = pebs->si;
  939. regs->di = pebs->di;
  940. /*
  941. * Per the above; only set BP,SP if we don't need callchains.
  942. *
  943. * XXX: does this make sense?
  944. */
  945. if (!(sample_type & PERF_SAMPLE_CALLCHAIN)) {
  946. regs->bp = pebs->bp;
  947. regs->sp = pebs->sp;
  948. }
  949. /*
  950. * Preserve PERF_EFLAGS_VM from set_linear_ip().
  951. */
  952. regs->flags = pebs->flags | (regs->flags & PERF_EFLAGS_VM);
  953. #ifndef CONFIG_X86_32
  954. regs->r8 = pebs->r8;
  955. regs->r9 = pebs->r9;
  956. regs->r10 = pebs->r10;
  957. regs->r11 = pebs->r11;
  958. regs->r12 = pebs->r12;
  959. regs->r13 = pebs->r13;
  960. regs->r14 = pebs->r14;
  961. regs->r15 = pebs->r15;
  962. #endif
  963. }
  964. if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) {
  965. regs->ip = pebs->real_ip;
  966. regs->flags |= PERF_EFLAGS_EXACT;
  967. } else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(regs))
  968. regs->flags |= PERF_EFLAGS_EXACT;
  969. else
  970. regs->flags &= ~PERF_EFLAGS_EXACT;
  971. if ((sample_type & PERF_SAMPLE_ADDR) &&
  972. x86_pmu.intel_cap.pebs_format >= 1)
  973. data->addr = pebs->dla;
  974. if (x86_pmu.intel_cap.pebs_format >= 2) {
  975. /* Only set the TSX weight when no memory weight. */
  976. if ((sample_type & PERF_SAMPLE_WEIGHT) && !fll)
  977. data->weight = intel_hsw_weight(pebs);
  978. if (sample_type & PERF_SAMPLE_TRANSACTION)
  979. data->txn = intel_hsw_transaction(pebs);
  980. }
  981. /*
  982. * v3 supplies an accurate time stamp, so we use that
  983. * for the time stamp.
  984. *
  985. * We can only do this for the default trace clock.
  986. */
  987. if (x86_pmu.intel_cap.pebs_format >= 3 &&
  988. event->attr.use_clockid == 0)
  989. data->time = native_sched_clock_from_tsc(pebs->tsc);
  990. if (has_branch_stack(event))
  991. data->br_stack = &cpuc->lbr_stack;
  992. }
  993. static inline void *
  994. get_next_pebs_record_by_bit(void *base, void *top, int bit)
  995. {
  996. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  997. void *at;
  998. u64 pebs_status;
  999. /*
  1000. * fmt0 does not have a status bitfield (does not use
  1001. * perf_record_nhm format)
  1002. */
  1003. if (x86_pmu.intel_cap.pebs_format < 1)
  1004. return base;
  1005. if (base == NULL)
  1006. return NULL;
  1007. for (at = base; at < top; at += x86_pmu.pebs_record_size) {
  1008. struct pebs_record_nhm *p = at;
  1009. if (test_bit(bit, (unsigned long *)&p->status)) {
  1010. /* PEBS v3 has accurate status bits */
  1011. if (x86_pmu.intel_cap.pebs_format >= 3)
  1012. return at;
  1013. if (p->status == (1 << bit))
  1014. return at;
  1015. /* clear non-PEBS bit and re-check */
  1016. pebs_status = p->status & cpuc->pebs_enabled;
  1017. pebs_status &= (1ULL << MAX_PEBS_EVENTS) - 1;
  1018. if (pebs_status == (1 << bit))
  1019. return at;
  1020. }
  1021. }
  1022. return NULL;
  1023. }
  1024. static void __intel_pmu_pebs_event(struct perf_event *event,
  1025. struct pt_regs *iregs,
  1026. void *base, void *top,
  1027. int bit, int count)
  1028. {
  1029. struct perf_sample_data data;
  1030. struct pt_regs regs;
  1031. void *at = get_next_pebs_record_by_bit(base, top, bit);
  1032. if (!intel_pmu_save_and_restart(event) &&
  1033. !(event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD))
  1034. return;
  1035. while (count > 1) {
  1036. setup_pebs_sample_data(event, iregs, at, &data, &regs);
  1037. perf_event_output(event, &data, &regs);
  1038. at += x86_pmu.pebs_record_size;
  1039. at = get_next_pebs_record_by_bit(at, top, bit);
  1040. count--;
  1041. }
  1042. setup_pebs_sample_data(event, iregs, at, &data, &regs);
  1043. /*
  1044. * All but the last records are processed.
  1045. * The last one is left to be able to call the overflow handler.
  1046. */
  1047. if (perf_event_overflow(event, &data, &regs)) {
  1048. x86_pmu_stop(event, 0);
  1049. return;
  1050. }
  1051. }
  1052. static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
  1053. {
  1054. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  1055. struct debug_store *ds = cpuc->ds;
  1056. struct perf_event *event = cpuc->events[0]; /* PMC0 only */
  1057. struct pebs_record_core *at, *top;
  1058. int n;
  1059. if (!x86_pmu.pebs_active)
  1060. return;
  1061. at = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base;
  1062. top = (struct pebs_record_core *)(unsigned long)ds->pebs_index;
  1063. /*
  1064. * Whatever else happens, drain the thing
  1065. */
  1066. ds->pebs_index = ds->pebs_buffer_base;
  1067. if (!test_bit(0, cpuc->active_mask))
  1068. return;
  1069. WARN_ON_ONCE(!event);
  1070. if (!event->attr.precise_ip)
  1071. return;
  1072. n = top - at;
  1073. if (n <= 0)
  1074. return;
  1075. __intel_pmu_pebs_event(event, iregs, at, top, 0, n);
  1076. }
  1077. static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
  1078. {
  1079. struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
  1080. struct debug_store *ds = cpuc->ds;
  1081. struct perf_event *event;
  1082. void *base, *at, *top;
  1083. short counts[MAX_PEBS_EVENTS] = {};
  1084. short error[MAX_PEBS_EVENTS] = {};
  1085. int bit, i;
  1086. if (!x86_pmu.pebs_active)
  1087. return;
  1088. base = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
  1089. top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
  1090. ds->pebs_index = ds->pebs_buffer_base;
  1091. if (unlikely(base >= top))
  1092. return;
  1093. for (at = base; at < top; at += x86_pmu.pebs_record_size) {
  1094. struct pebs_record_nhm *p = at;
  1095. u64 pebs_status;
  1096. pebs_status = p->status & cpuc->pebs_enabled;
  1097. pebs_status &= (1ULL << x86_pmu.max_pebs_events) - 1;
  1098. /* PEBS v3 has more accurate status bits */
  1099. if (x86_pmu.intel_cap.pebs_format >= 3) {
  1100. for_each_set_bit(bit, (unsigned long *)&pebs_status,
  1101. x86_pmu.max_pebs_events)
  1102. counts[bit]++;
  1103. continue;
  1104. }
  1105. /*
  1106. * On some CPUs the PEBS status can be zero when PEBS is
  1107. * racing with clearing of GLOBAL_STATUS.
  1108. *
  1109. * Normally we would drop that record, but in the
  1110. * case when there is only a single active PEBS event
  1111. * we can assume it's for that event.
  1112. */
  1113. if (!pebs_status && cpuc->pebs_enabled &&
  1114. !(cpuc->pebs_enabled & (cpuc->pebs_enabled-1)))
  1115. pebs_status = cpuc->pebs_enabled;
  1116. bit = find_first_bit((unsigned long *)&pebs_status,
  1117. x86_pmu.max_pebs_events);
  1118. if (bit >= x86_pmu.max_pebs_events)
  1119. continue;
  1120. /*
  1121. * The PEBS hardware does not deal well with the situation
  1122. * when events happen near to each other and multiple bits
  1123. * are set. But it should happen rarely.
  1124. *
  1125. * If these events include one PEBS and multiple non-PEBS
  1126. * events, it doesn't impact PEBS record. The record will
  1127. * be handled normally. (slow path)
  1128. *
  1129. * If these events include two or more PEBS events, the
  1130. * records for the events can be collapsed into a single
  1131. * one, and it's not possible to reconstruct all events
  1132. * that caused the PEBS record. It's called collision.
  1133. * If collision happened, the record will be dropped.
  1134. */
  1135. if (p->status != (1ULL << bit)) {
  1136. for_each_set_bit(i, (unsigned long *)&pebs_status,
  1137. x86_pmu.max_pebs_events)
  1138. error[i]++;
  1139. continue;
  1140. }
  1141. counts[bit]++;
  1142. }
  1143. for (bit = 0; bit < x86_pmu.max_pebs_events; bit++) {
  1144. if ((counts[bit] == 0) && (error[bit] == 0))
  1145. continue;
  1146. event = cpuc->events[bit];
  1147. if (WARN_ON_ONCE(!event))
  1148. continue;
  1149. if (WARN_ON_ONCE(!event->attr.precise_ip))
  1150. continue;
  1151. /* log dropped samples number */
  1152. if (error[bit])
  1153. perf_log_lost_samples(event, error[bit]);
  1154. if (counts[bit]) {
  1155. __intel_pmu_pebs_event(event, iregs, base,
  1156. top, bit, counts[bit]);
  1157. }
  1158. }
  1159. }
  1160. /*
  1161. * BTS, PEBS probe and setup
  1162. */
  1163. void __init intel_ds_init(void)
  1164. {
  1165. /*
  1166. * No support for 32bit formats
  1167. */
  1168. if (!boot_cpu_has(X86_FEATURE_DTES64))
  1169. return;
  1170. x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS);
  1171. x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS);
  1172. x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE;
  1173. if (x86_pmu.pebs) {
  1174. char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-';
  1175. int format = x86_pmu.intel_cap.pebs_format;
  1176. switch (format) {
  1177. case 0:
  1178. pr_cont("PEBS fmt0%c, ", pebs_type);
  1179. x86_pmu.pebs_record_size = sizeof(struct pebs_record_core);
  1180. /*
  1181. * Using >PAGE_SIZE buffers makes the WRMSR to
  1182. * PERF_GLOBAL_CTRL in intel_pmu_enable_all()
  1183. * mysteriously hang on Core2.
  1184. *
  1185. * As a workaround, we don't do this.
  1186. */
  1187. x86_pmu.pebs_buffer_size = PAGE_SIZE;
  1188. x86_pmu.drain_pebs = intel_pmu_drain_pebs_core;
  1189. break;
  1190. case 1:
  1191. pr_cont("PEBS fmt1%c, ", pebs_type);
  1192. x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm);
  1193. x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
  1194. break;
  1195. case 2:
  1196. pr_cont("PEBS fmt2%c, ", pebs_type);
  1197. x86_pmu.pebs_record_size = sizeof(struct pebs_record_hsw);
  1198. x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
  1199. break;
  1200. case 3:
  1201. pr_cont("PEBS fmt3%c, ", pebs_type);
  1202. x86_pmu.pebs_record_size =
  1203. sizeof(struct pebs_record_skl);
  1204. x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
  1205. x86_pmu.free_running_flags |= PERF_SAMPLE_TIME;
  1206. break;
  1207. default:
  1208. pr_cont("no PEBS fmt%d%c, ", format, pebs_type);
  1209. x86_pmu.pebs = 0;
  1210. }
  1211. }
  1212. }
  1213. void perf_restore_debug_store(void)
  1214. {
  1215. struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
  1216. if (!x86_pmu.bts && !x86_pmu.pebs)
  1217. return;
  1218. wrmsrl(MSR_IA32_DS_AREA, (unsigned long)ds);
  1219. }