perf_event_intel.c 51 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905
  1. /*
  2. * Per core/cpu state
  3. *
  4. * Used to coordinate shared registers between HT threads or
  5. * among events on a single PMU.
  6. */
  7. #include <linux/stddef.h>
  8. #include <linux/types.h>
  9. #include <linux/init.h>
  10. #include <linux/slab.h>
  11. #include <linux/export.h>
  12. #include <asm/hardirq.h>
  13. #include <asm/apic.h>
  14. #include "perf_event.h"
  15. /*
  16. * Intel PerfMon, used on Core and later.
  17. */
  18. static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly =
  19. {
  20. [PERF_COUNT_HW_CPU_CYCLES] = 0x003c,
  21. [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
  22. [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e,
  23. [PERF_COUNT_HW_CACHE_MISSES] = 0x412e,
  24. [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
  25. [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
  26. [PERF_COUNT_HW_BUS_CYCLES] = 0x013c,
  27. [PERF_COUNT_HW_REF_CPU_CYCLES] = 0x0300, /* pseudo-encoding */
  28. };
  29. static struct event_constraint intel_core_event_constraints[] __read_mostly =
  30. {
  31. INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
  32. INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
  33. INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
  34. INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
  35. INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
  36. INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FP_COMP_INSTR_RET */
  37. EVENT_CONSTRAINT_END
  38. };
  39. static struct event_constraint intel_core2_event_constraints[] __read_mostly =
  40. {
  41. FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
  42. FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
  43. FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
  44. INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
  45. INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
  46. INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
  47. INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
  48. INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
  49. INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */
  50. INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
  51. INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */
  52. INTEL_EVENT_CONSTRAINT(0xc9, 0x1), /* ITLB_MISS_RETIRED (T30-9) */
  53. INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */
  54. EVENT_CONSTRAINT_END
  55. };
  56. static struct event_constraint intel_nehalem_event_constraints[] __read_mostly =
  57. {
  58. FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
  59. FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
  60. FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
  61. INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
  62. INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
  63. INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
  64. INTEL_EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */
  65. INTEL_EVENT_CONSTRAINT(0x48, 0x3), /* L1D_PEND_MISS */
  66. INTEL_EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */
  67. INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
  68. INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
  69. EVENT_CONSTRAINT_END
  70. };
  71. static struct extra_reg intel_nehalem_extra_regs[] __read_mostly =
  72. {
  73. INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
  74. EVENT_EXTRA_END
  75. };
  76. static struct event_constraint intel_westmere_event_constraints[] __read_mostly =
  77. {
  78. FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
  79. FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
  80. FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
  81. INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
  82. INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */
  83. INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
  84. INTEL_EVENT_CONSTRAINT(0xb3, 0x1), /* SNOOPQ_REQUEST_OUTSTANDING */
  85. EVENT_CONSTRAINT_END
  86. };
  87. static struct event_constraint intel_snb_event_constraints[] __read_mostly =
  88. {
  89. FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
  90. FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
  91. FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
  92. INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */
  93. INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
  94. INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
  95. EVENT_CONSTRAINT_END
  96. };
  97. static struct extra_reg intel_westmere_extra_regs[] __read_mostly =
  98. {
  99. INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
  100. INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1),
  101. EVENT_EXTRA_END
  102. };
  103. static struct event_constraint intel_v1_event_constraints[] __read_mostly =
  104. {
  105. EVENT_CONSTRAINT_END
  106. };
  107. static struct event_constraint intel_gen_event_constraints[] __read_mostly =
  108. {
  109. FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
  110. FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
  111. FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
  112. EVENT_CONSTRAINT_END
  113. };
  114. static struct extra_reg intel_snb_extra_regs[] __read_mostly = {
  115. INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3f807f8fffull, RSP_0),
  116. INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3f807f8fffull, RSP_1),
  117. EVENT_EXTRA_END
  118. };
  119. static struct extra_reg intel_snbep_extra_regs[] __read_mostly = {
  120. INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0),
  121. INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1),
  122. EVENT_EXTRA_END
  123. };
  124. static u64 intel_pmu_event_map(int hw_event)
  125. {
  126. return intel_perfmon_event_map[hw_event];
  127. }
  128. static __initconst const u64 snb_hw_cache_event_ids
  129. [PERF_COUNT_HW_CACHE_MAX]
  130. [PERF_COUNT_HW_CACHE_OP_MAX]
  131. [PERF_COUNT_HW_CACHE_RESULT_MAX] =
  132. {
  133. [ C(L1D) ] = {
  134. [ C(OP_READ) ] = {
  135. [ C(RESULT_ACCESS) ] = 0xf1d0, /* MEM_UOP_RETIRED.LOADS */
  136. [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPLACEMENT */
  137. },
  138. [ C(OP_WRITE) ] = {
  139. [ C(RESULT_ACCESS) ] = 0xf2d0, /* MEM_UOP_RETIRED.STORES */
  140. [ C(RESULT_MISS) ] = 0x0851, /* L1D.ALL_M_REPLACEMENT */
  141. },
  142. [ C(OP_PREFETCH) ] = {
  143. [ C(RESULT_ACCESS) ] = 0x0,
  144. [ C(RESULT_MISS) ] = 0x024e, /* HW_PRE_REQ.DL1_MISS */
  145. },
  146. },
  147. [ C(L1I ) ] = {
  148. [ C(OP_READ) ] = {
  149. [ C(RESULT_ACCESS) ] = 0x0,
  150. [ C(RESULT_MISS) ] = 0x0280, /* ICACHE.MISSES */
  151. },
  152. [ C(OP_WRITE) ] = {
  153. [ C(RESULT_ACCESS) ] = -1,
  154. [ C(RESULT_MISS) ] = -1,
  155. },
  156. [ C(OP_PREFETCH) ] = {
  157. [ C(RESULT_ACCESS) ] = 0x0,
  158. [ C(RESULT_MISS) ] = 0x0,
  159. },
  160. },
  161. [ C(LL ) ] = {
  162. [ C(OP_READ) ] = {
  163. /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
  164. [ C(RESULT_ACCESS) ] = 0x01b7,
  165. /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
  166. [ C(RESULT_MISS) ] = 0x01b7,
  167. },
  168. [ C(OP_WRITE) ] = {
  169. /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
  170. [ C(RESULT_ACCESS) ] = 0x01b7,
  171. /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
  172. [ C(RESULT_MISS) ] = 0x01b7,
  173. },
  174. [ C(OP_PREFETCH) ] = {
  175. /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
  176. [ C(RESULT_ACCESS) ] = 0x01b7,
  177. /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
  178. [ C(RESULT_MISS) ] = 0x01b7,
  179. },
  180. },
  181. [ C(DTLB) ] = {
  182. [ C(OP_READ) ] = {
  183. [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOP_RETIRED.ALL_LOADS */
  184. [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.CAUSES_A_WALK */
  185. },
  186. [ C(OP_WRITE) ] = {
  187. [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOP_RETIRED.ALL_STORES */
  188. [ C(RESULT_MISS) ] = 0x0149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */
  189. },
  190. [ C(OP_PREFETCH) ] = {
  191. [ C(RESULT_ACCESS) ] = 0x0,
  192. [ C(RESULT_MISS) ] = 0x0,
  193. },
  194. },
  195. [ C(ITLB) ] = {
  196. [ C(OP_READ) ] = {
  197. [ C(RESULT_ACCESS) ] = 0x1085, /* ITLB_MISSES.STLB_HIT */
  198. [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.CAUSES_A_WALK */
  199. },
  200. [ C(OP_WRITE) ] = {
  201. [ C(RESULT_ACCESS) ] = -1,
  202. [ C(RESULT_MISS) ] = -1,
  203. },
  204. [ C(OP_PREFETCH) ] = {
  205. [ C(RESULT_ACCESS) ] = -1,
  206. [ C(RESULT_MISS) ] = -1,
  207. },
  208. },
  209. [ C(BPU ) ] = {
  210. [ C(OP_READ) ] = {
  211. [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
  212. [ C(RESULT_MISS) ] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */
  213. },
  214. [ C(OP_WRITE) ] = {
  215. [ C(RESULT_ACCESS) ] = -1,
  216. [ C(RESULT_MISS) ] = -1,
  217. },
  218. [ C(OP_PREFETCH) ] = {
  219. [ C(RESULT_ACCESS) ] = -1,
  220. [ C(RESULT_MISS) ] = -1,
  221. },
  222. },
  223. [ C(NODE) ] = {
  224. [ C(OP_READ) ] = {
  225. [ C(RESULT_ACCESS) ] = -1,
  226. [ C(RESULT_MISS) ] = -1,
  227. },
  228. [ C(OP_WRITE) ] = {
  229. [ C(RESULT_ACCESS) ] = -1,
  230. [ C(RESULT_MISS) ] = -1,
  231. },
  232. [ C(OP_PREFETCH) ] = {
  233. [ C(RESULT_ACCESS) ] = -1,
  234. [ C(RESULT_MISS) ] = -1,
  235. },
  236. },
  237. };
  238. static __initconst const u64 westmere_hw_cache_event_ids
  239. [PERF_COUNT_HW_CACHE_MAX]
  240. [PERF_COUNT_HW_CACHE_OP_MAX]
  241. [PERF_COUNT_HW_CACHE_RESULT_MAX] =
  242. {
  243. [ C(L1D) ] = {
  244. [ C(OP_READ) ] = {
  245. [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */
  246. [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPL */
  247. },
  248. [ C(OP_WRITE) ] = {
  249. [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */
  250. [ C(RESULT_MISS) ] = 0x0251, /* L1D.M_REPL */
  251. },
  252. [ C(OP_PREFETCH) ] = {
  253. [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */
  254. [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */
  255. },
  256. },
  257. [ C(L1I ) ] = {
  258. [ C(OP_READ) ] = {
  259. [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
  260. [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
  261. },
  262. [ C(OP_WRITE) ] = {
  263. [ C(RESULT_ACCESS) ] = -1,
  264. [ C(RESULT_MISS) ] = -1,
  265. },
  266. [ C(OP_PREFETCH) ] = {
  267. [ C(RESULT_ACCESS) ] = 0x0,
  268. [ C(RESULT_MISS) ] = 0x0,
  269. },
  270. },
  271. [ C(LL ) ] = {
  272. [ C(OP_READ) ] = {
  273. /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
  274. [ C(RESULT_ACCESS) ] = 0x01b7,
  275. /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
  276. [ C(RESULT_MISS) ] = 0x01b7,
  277. },
  278. /*
  279. * Use RFO, not WRITEBACK, because a write miss would typically occur
  280. * on RFO.
  281. */
  282. [ C(OP_WRITE) ] = {
  283. /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
  284. [ C(RESULT_ACCESS) ] = 0x01b7,
  285. /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
  286. [ C(RESULT_MISS) ] = 0x01b7,
  287. },
  288. [ C(OP_PREFETCH) ] = {
  289. /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
  290. [ C(RESULT_ACCESS) ] = 0x01b7,
  291. /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
  292. [ C(RESULT_MISS) ] = 0x01b7,
  293. },
  294. },
  295. [ C(DTLB) ] = {
  296. [ C(OP_READ) ] = {
  297. [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */
  298. [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */
  299. },
  300. [ C(OP_WRITE) ] = {
  301. [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */
  302. [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */
  303. },
  304. [ C(OP_PREFETCH) ] = {
  305. [ C(RESULT_ACCESS) ] = 0x0,
  306. [ C(RESULT_MISS) ] = 0x0,
  307. },
  308. },
  309. [ C(ITLB) ] = {
  310. [ C(OP_READ) ] = {
  311. [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */
  312. [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.ANY */
  313. },
  314. [ C(OP_WRITE) ] = {
  315. [ C(RESULT_ACCESS) ] = -1,
  316. [ C(RESULT_MISS) ] = -1,
  317. },
  318. [ C(OP_PREFETCH) ] = {
  319. [ C(RESULT_ACCESS) ] = -1,
  320. [ C(RESULT_MISS) ] = -1,
  321. },
  322. },
  323. [ C(BPU ) ] = {
  324. [ C(OP_READ) ] = {
  325. [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
  326. [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */
  327. },
  328. [ C(OP_WRITE) ] = {
  329. [ C(RESULT_ACCESS) ] = -1,
  330. [ C(RESULT_MISS) ] = -1,
  331. },
  332. [ C(OP_PREFETCH) ] = {
  333. [ C(RESULT_ACCESS) ] = -1,
  334. [ C(RESULT_MISS) ] = -1,
  335. },
  336. },
  337. [ C(NODE) ] = {
  338. [ C(OP_READ) ] = {
  339. [ C(RESULT_ACCESS) ] = 0x01b7,
  340. [ C(RESULT_MISS) ] = 0x01b7,
  341. },
  342. [ C(OP_WRITE) ] = {
  343. [ C(RESULT_ACCESS) ] = 0x01b7,
  344. [ C(RESULT_MISS) ] = 0x01b7,
  345. },
  346. [ C(OP_PREFETCH) ] = {
  347. [ C(RESULT_ACCESS) ] = 0x01b7,
  348. [ C(RESULT_MISS) ] = 0x01b7,
  349. },
  350. },
  351. };
  352. /*
  353. * Nehalem/Westmere MSR_OFFCORE_RESPONSE bits;
  354. * See IA32 SDM Vol 3B 30.6.1.3
  355. */
  356. #define NHM_DMND_DATA_RD (1 << 0)
  357. #define NHM_DMND_RFO (1 << 1)
  358. #define NHM_DMND_IFETCH (1 << 2)
  359. #define NHM_DMND_WB (1 << 3)
  360. #define NHM_PF_DATA_RD (1 << 4)
  361. #define NHM_PF_DATA_RFO (1 << 5)
  362. #define NHM_PF_IFETCH (1 << 6)
  363. #define NHM_OFFCORE_OTHER (1 << 7)
  364. #define NHM_UNCORE_HIT (1 << 8)
  365. #define NHM_OTHER_CORE_HIT_SNP (1 << 9)
  366. #define NHM_OTHER_CORE_HITM (1 << 10)
  367. /* reserved */
  368. #define NHM_REMOTE_CACHE_FWD (1 << 12)
  369. #define NHM_REMOTE_DRAM (1 << 13)
  370. #define NHM_LOCAL_DRAM (1 << 14)
  371. #define NHM_NON_DRAM (1 << 15)
  372. #define NHM_LOCAL (NHM_LOCAL_DRAM|NHM_REMOTE_CACHE_FWD)
  373. #define NHM_REMOTE (NHM_REMOTE_DRAM)
  374. #define NHM_DMND_READ (NHM_DMND_DATA_RD)
  375. #define NHM_DMND_WRITE (NHM_DMND_RFO|NHM_DMND_WB)
  376. #define NHM_DMND_PREFETCH (NHM_PF_DATA_RD|NHM_PF_DATA_RFO)
  377. #define NHM_L3_HIT (NHM_UNCORE_HIT|NHM_OTHER_CORE_HIT_SNP|NHM_OTHER_CORE_HITM)
  378. #define NHM_L3_MISS (NHM_NON_DRAM|NHM_LOCAL_DRAM|NHM_REMOTE_DRAM|NHM_REMOTE_CACHE_FWD)
  379. #define NHM_L3_ACCESS (NHM_L3_HIT|NHM_L3_MISS)
  380. static __initconst const u64 nehalem_hw_cache_extra_regs
  381. [PERF_COUNT_HW_CACHE_MAX]
  382. [PERF_COUNT_HW_CACHE_OP_MAX]
  383. [PERF_COUNT_HW_CACHE_RESULT_MAX] =
  384. {
  385. [ C(LL ) ] = {
  386. [ C(OP_READ) ] = {
  387. [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_L3_ACCESS,
  388. [ C(RESULT_MISS) ] = NHM_DMND_READ|NHM_L3_MISS,
  389. },
  390. [ C(OP_WRITE) ] = {
  391. [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_L3_ACCESS,
  392. [ C(RESULT_MISS) ] = NHM_DMND_WRITE|NHM_L3_MISS,
  393. },
  394. [ C(OP_PREFETCH) ] = {
  395. [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_L3_ACCESS,
  396. [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_L3_MISS,
  397. },
  398. },
  399. [ C(NODE) ] = {
  400. [ C(OP_READ) ] = {
  401. [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_LOCAL|NHM_REMOTE,
  402. [ C(RESULT_MISS) ] = NHM_DMND_READ|NHM_REMOTE,
  403. },
  404. [ C(OP_WRITE) ] = {
  405. [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_LOCAL|NHM_REMOTE,
  406. [ C(RESULT_MISS) ] = NHM_DMND_WRITE|NHM_REMOTE,
  407. },
  408. [ C(OP_PREFETCH) ] = {
  409. [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_LOCAL|NHM_REMOTE,
  410. [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_REMOTE,
  411. },
  412. },
  413. };
  414. static __initconst const u64 nehalem_hw_cache_event_ids
  415. [PERF_COUNT_HW_CACHE_MAX]
  416. [PERF_COUNT_HW_CACHE_OP_MAX]
  417. [PERF_COUNT_HW_CACHE_RESULT_MAX] =
  418. {
  419. [ C(L1D) ] = {
  420. [ C(OP_READ) ] = {
  421. [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */
  422. [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPL */
  423. },
  424. [ C(OP_WRITE) ] = {
  425. [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */
  426. [ C(RESULT_MISS) ] = 0x0251, /* L1D.M_REPL */
  427. },
  428. [ C(OP_PREFETCH) ] = {
  429. [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */
  430. [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */
  431. },
  432. },
  433. [ C(L1I ) ] = {
  434. [ C(OP_READ) ] = {
  435. [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
  436. [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
  437. },
  438. [ C(OP_WRITE) ] = {
  439. [ C(RESULT_ACCESS) ] = -1,
  440. [ C(RESULT_MISS) ] = -1,
  441. },
  442. [ C(OP_PREFETCH) ] = {
  443. [ C(RESULT_ACCESS) ] = 0x0,
  444. [ C(RESULT_MISS) ] = 0x0,
  445. },
  446. },
  447. [ C(LL ) ] = {
  448. [ C(OP_READ) ] = {
  449. /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
  450. [ C(RESULT_ACCESS) ] = 0x01b7,
  451. /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
  452. [ C(RESULT_MISS) ] = 0x01b7,
  453. },
  454. /*
  455. * Use RFO, not WRITEBACK, because a write miss would typically occur
  456. * on RFO.
  457. */
  458. [ C(OP_WRITE) ] = {
  459. /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
  460. [ C(RESULT_ACCESS) ] = 0x01b7,
  461. /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
  462. [ C(RESULT_MISS) ] = 0x01b7,
  463. },
  464. [ C(OP_PREFETCH) ] = {
  465. /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
  466. [ C(RESULT_ACCESS) ] = 0x01b7,
  467. /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
  468. [ C(RESULT_MISS) ] = 0x01b7,
  469. },
  470. },
  471. [ C(DTLB) ] = {
  472. [ C(OP_READ) ] = {
  473. [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */
  474. [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */
  475. },
  476. [ C(OP_WRITE) ] = {
  477. [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */
  478. [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */
  479. },
  480. [ C(OP_PREFETCH) ] = {
  481. [ C(RESULT_ACCESS) ] = 0x0,
  482. [ C(RESULT_MISS) ] = 0x0,
  483. },
  484. },
  485. [ C(ITLB) ] = {
  486. [ C(OP_READ) ] = {
  487. [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */
  488. [ C(RESULT_MISS) ] = 0x20c8, /* ITLB_MISS_RETIRED */
  489. },
  490. [ C(OP_WRITE) ] = {
  491. [ C(RESULT_ACCESS) ] = -1,
  492. [ C(RESULT_MISS) ] = -1,
  493. },
  494. [ C(OP_PREFETCH) ] = {
  495. [ C(RESULT_ACCESS) ] = -1,
  496. [ C(RESULT_MISS) ] = -1,
  497. },
  498. },
  499. [ C(BPU ) ] = {
  500. [ C(OP_READ) ] = {
  501. [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
  502. [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */
  503. },
  504. [ C(OP_WRITE) ] = {
  505. [ C(RESULT_ACCESS) ] = -1,
  506. [ C(RESULT_MISS) ] = -1,
  507. },
  508. [ C(OP_PREFETCH) ] = {
  509. [ C(RESULT_ACCESS) ] = -1,
  510. [ C(RESULT_MISS) ] = -1,
  511. },
  512. },
  513. [ C(NODE) ] = {
  514. [ C(OP_READ) ] = {
  515. [ C(RESULT_ACCESS) ] = 0x01b7,
  516. [ C(RESULT_MISS) ] = 0x01b7,
  517. },
  518. [ C(OP_WRITE) ] = {
  519. [ C(RESULT_ACCESS) ] = 0x01b7,
  520. [ C(RESULT_MISS) ] = 0x01b7,
  521. },
  522. [ C(OP_PREFETCH) ] = {
  523. [ C(RESULT_ACCESS) ] = 0x01b7,
  524. [ C(RESULT_MISS) ] = 0x01b7,
  525. },
  526. },
  527. };
  528. static __initconst const u64 core2_hw_cache_event_ids
  529. [PERF_COUNT_HW_CACHE_MAX]
  530. [PERF_COUNT_HW_CACHE_OP_MAX]
  531. [PERF_COUNT_HW_CACHE_RESULT_MAX] =
  532. {
  533. [ C(L1D) ] = {
  534. [ C(OP_READ) ] = {
  535. [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */
  536. [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */
  537. },
  538. [ C(OP_WRITE) ] = {
  539. [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */
  540. [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */
  541. },
  542. [ C(OP_PREFETCH) ] = {
  543. [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS */
  544. [ C(RESULT_MISS) ] = 0,
  545. },
  546. },
  547. [ C(L1I ) ] = {
  548. [ C(OP_READ) ] = {
  549. [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS */
  550. [ C(RESULT_MISS) ] = 0x0081, /* L1I.MISSES */
  551. },
  552. [ C(OP_WRITE) ] = {
  553. [ C(RESULT_ACCESS) ] = -1,
  554. [ C(RESULT_MISS) ] = -1,
  555. },
  556. [ C(OP_PREFETCH) ] = {
  557. [ C(RESULT_ACCESS) ] = 0,
  558. [ C(RESULT_MISS) ] = 0,
  559. },
  560. },
  561. [ C(LL ) ] = {
  562. [ C(OP_READ) ] = {
  563. [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */
  564. [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */
  565. },
  566. [ C(OP_WRITE) ] = {
  567. [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */
  568. [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */
  569. },
  570. [ C(OP_PREFETCH) ] = {
  571. [ C(RESULT_ACCESS) ] = 0,
  572. [ C(RESULT_MISS) ] = 0,
  573. },
  574. },
  575. [ C(DTLB) ] = {
  576. [ C(OP_READ) ] = {
  577. [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */
  578. [ C(RESULT_MISS) ] = 0x0208, /* DTLB_MISSES.MISS_LD */
  579. },
  580. [ C(OP_WRITE) ] = {
  581. [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */
  582. [ C(RESULT_MISS) ] = 0x0808, /* DTLB_MISSES.MISS_ST */
  583. },
  584. [ C(OP_PREFETCH) ] = {
  585. [ C(RESULT_ACCESS) ] = 0,
  586. [ C(RESULT_MISS) ] = 0,
  587. },
  588. },
  589. [ C(ITLB) ] = {
  590. [ C(OP_READ) ] = {
  591. [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
  592. [ C(RESULT_MISS) ] = 0x1282, /* ITLBMISSES */
  593. },
  594. [ C(OP_WRITE) ] = {
  595. [ C(RESULT_ACCESS) ] = -1,
  596. [ C(RESULT_MISS) ] = -1,
  597. },
  598. [ C(OP_PREFETCH) ] = {
  599. [ C(RESULT_ACCESS) ] = -1,
  600. [ C(RESULT_MISS) ] = -1,
  601. },
  602. },
  603. [ C(BPU ) ] = {
  604. [ C(OP_READ) ] = {
  605. [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
  606. [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
  607. },
  608. [ C(OP_WRITE) ] = {
  609. [ C(RESULT_ACCESS) ] = -1,
  610. [ C(RESULT_MISS) ] = -1,
  611. },
  612. [ C(OP_PREFETCH) ] = {
  613. [ C(RESULT_ACCESS) ] = -1,
  614. [ C(RESULT_MISS) ] = -1,
  615. },
  616. },
  617. };
  618. static __initconst const u64 atom_hw_cache_event_ids
  619. [PERF_COUNT_HW_CACHE_MAX]
  620. [PERF_COUNT_HW_CACHE_OP_MAX]
  621. [PERF_COUNT_HW_CACHE_RESULT_MAX] =
  622. {
  623. [ C(L1D) ] = {
  624. [ C(OP_READ) ] = {
  625. [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD */
  626. [ C(RESULT_MISS) ] = 0,
  627. },
  628. [ C(OP_WRITE) ] = {
  629. [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST */
  630. [ C(RESULT_MISS) ] = 0,
  631. },
  632. [ C(OP_PREFETCH) ] = {
  633. [ C(RESULT_ACCESS) ] = 0x0,
  634. [ C(RESULT_MISS) ] = 0,
  635. },
  636. },
  637. [ C(L1I ) ] = {
  638. [ C(OP_READ) ] = {
  639. [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
  640. [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
  641. },
  642. [ C(OP_WRITE) ] = {
  643. [ C(RESULT_ACCESS) ] = -1,
  644. [ C(RESULT_MISS) ] = -1,
  645. },
  646. [ C(OP_PREFETCH) ] = {
  647. [ C(RESULT_ACCESS) ] = 0,
  648. [ C(RESULT_MISS) ] = 0,
  649. },
  650. },
  651. [ C(LL ) ] = {
  652. [ C(OP_READ) ] = {
  653. [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */
  654. [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */
  655. },
  656. [ C(OP_WRITE) ] = {
  657. [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */
  658. [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */
  659. },
  660. [ C(OP_PREFETCH) ] = {
  661. [ C(RESULT_ACCESS) ] = 0,
  662. [ C(RESULT_MISS) ] = 0,
  663. },
  664. },
  665. [ C(DTLB) ] = {
  666. [ C(OP_READ) ] = {
  667. [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI (alias) */
  668. [ C(RESULT_MISS) ] = 0x0508, /* DTLB_MISSES.MISS_LD */
  669. },
  670. [ C(OP_WRITE) ] = {
  671. [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI (alias) */
  672. [ C(RESULT_MISS) ] = 0x0608, /* DTLB_MISSES.MISS_ST */
  673. },
  674. [ C(OP_PREFETCH) ] = {
  675. [ C(RESULT_ACCESS) ] = 0,
  676. [ C(RESULT_MISS) ] = 0,
  677. },
  678. },
  679. [ C(ITLB) ] = {
  680. [ C(OP_READ) ] = {
  681. [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
  682. [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */
  683. },
  684. [ C(OP_WRITE) ] = {
  685. [ C(RESULT_ACCESS) ] = -1,
  686. [ C(RESULT_MISS) ] = -1,
  687. },
  688. [ C(OP_PREFETCH) ] = {
  689. [ C(RESULT_ACCESS) ] = -1,
  690. [ C(RESULT_MISS) ] = -1,
  691. },
  692. },
  693. [ C(BPU ) ] = {
  694. [ C(OP_READ) ] = {
  695. [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
  696. [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
  697. },
  698. [ C(OP_WRITE) ] = {
  699. [ C(RESULT_ACCESS) ] = -1,
  700. [ C(RESULT_MISS) ] = -1,
  701. },
  702. [ C(OP_PREFETCH) ] = {
  703. [ C(RESULT_ACCESS) ] = -1,
  704. [ C(RESULT_MISS) ] = -1,
  705. },
  706. },
  707. };
  708. static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event)
  709. {
  710. /* user explicitly requested branch sampling */
  711. if (has_branch_stack(event))
  712. return true;
  713. /* implicit branch sampling to correct PEBS skid */
  714. if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)
  715. return true;
  716. return false;
  717. }
  718. static void intel_pmu_disable_all(void)
  719. {
  720. struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
  721. wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
  722. if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask))
  723. intel_pmu_disable_bts();
  724. intel_pmu_pebs_disable_all();
  725. intel_pmu_lbr_disable_all();
  726. }
  727. static void intel_pmu_enable_all(int added)
  728. {
  729. struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
  730. intel_pmu_pebs_enable_all();
  731. intel_pmu_lbr_enable_all();
  732. wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL,
  733. x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask);
  734. if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
  735. struct perf_event *event =
  736. cpuc->events[X86_PMC_IDX_FIXED_BTS];
  737. if (WARN_ON_ONCE(!event))
  738. return;
  739. intel_pmu_enable_bts(event->hw.config);
  740. }
  741. }
  742. /*
  743. * Workaround for:
  744. * Intel Errata AAK100 (model 26)
  745. * Intel Errata AAP53 (model 30)
  746. * Intel Errata BD53 (model 44)
  747. *
  748. * The official story:
  749. * These chips need to be 'reset' when adding counters by programming the
  750. * magic three (non-counting) events 0x4300B5, 0x4300D2, and 0x4300B1 either
  751. * in sequence on the same PMC or on different PMCs.
  752. *
  753. * In practise it appears some of these events do in fact count, and
  754. * we need to programm all 4 events.
  755. */
  756. static void intel_pmu_nhm_workaround(void)
  757. {
  758. struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
  759. static const unsigned long nhm_magic[4] = {
  760. 0x4300B5,
  761. 0x4300D2,
  762. 0x4300B1,
  763. 0x4300B1
  764. };
  765. struct perf_event *event;
  766. int i;
  767. /*
  768. * The Errata requires below steps:
  769. * 1) Clear MSR_IA32_PEBS_ENABLE and MSR_CORE_PERF_GLOBAL_CTRL;
  770. * 2) Configure 4 PERFEVTSELx with the magic events and clear
  771. * the corresponding PMCx;
  772. * 3) set bit0~bit3 of MSR_CORE_PERF_GLOBAL_CTRL;
  773. * 4) Clear MSR_CORE_PERF_GLOBAL_CTRL;
  774. * 5) Clear 4 pairs of ERFEVTSELx and PMCx;
  775. */
  776. /*
  777. * The real steps we choose are a little different from above.
  778. * A) To reduce MSR operations, we don't run step 1) as they
  779. * are already cleared before this function is called;
  780. * B) Call x86_perf_event_update to save PMCx before configuring
  781. * PERFEVTSELx with magic number;
  782. * C) With step 5), we do clear only when the PERFEVTSELx is
  783. * not used currently.
  784. * D) Call x86_perf_event_set_period to restore PMCx;
  785. */
  786. /* We always operate 4 pairs of PERF Counters */
  787. for (i = 0; i < 4; i++) {
  788. event = cpuc->events[i];
  789. if (event)
  790. x86_perf_event_update(event);
  791. }
  792. for (i = 0; i < 4; i++) {
  793. wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, nhm_magic[i]);
  794. wrmsrl(MSR_ARCH_PERFMON_PERFCTR0 + i, 0x0);
  795. }
  796. wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0xf);
  797. wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0);
  798. for (i = 0; i < 4; i++) {
  799. event = cpuc->events[i];
  800. if (event) {
  801. x86_perf_event_set_period(event);
  802. __x86_pmu_enable_event(&event->hw,
  803. ARCH_PERFMON_EVENTSEL_ENABLE);
  804. } else
  805. wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, 0x0);
  806. }
  807. }
  808. static void intel_pmu_nhm_enable_all(int added)
  809. {
  810. if (added)
  811. intel_pmu_nhm_workaround();
  812. intel_pmu_enable_all(added);
  813. }
  814. static inline u64 intel_pmu_get_status(void)
  815. {
  816. u64 status;
  817. rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
  818. return status;
  819. }
  820. static inline void intel_pmu_ack_status(u64 ack)
  821. {
  822. wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
  823. }
  824. static void intel_pmu_disable_fixed(struct hw_perf_event *hwc)
  825. {
  826. int idx = hwc->idx - X86_PMC_IDX_FIXED;
  827. u64 ctrl_val, mask;
  828. mask = 0xfULL << (idx * 4);
  829. rdmsrl(hwc->config_base, ctrl_val);
  830. ctrl_val &= ~mask;
  831. wrmsrl(hwc->config_base, ctrl_val);
  832. }
  833. static void intel_pmu_disable_event(struct perf_event *event)
  834. {
  835. struct hw_perf_event *hwc = &event->hw;
  836. struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
  837. if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) {
  838. intel_pmu_disable_bts();
  839. intel_pmu_drain_bts_buffer();
  840. return;
  841. }
  842. cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx);
  843. cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx);
  844. /*
  845. * must disable before any actual event
  846. * because any event may be combined with LBR
  847. */
  848. if (intel_pmu_needs_lbr_smpl(event))
  849. intel_pmu_lbr_disable(event);
  850. if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
  851. intel_pmu_disable_fixed(hwc);
  852. return;
  853. }
  854. x86_pmu_disable_event(event);
  855. if (unlikely(event->attr.precise_ip))
  856. intel_pmu_pebs_disable(event);
  857. }
  858. static void intel_pmu_enable_fixed(struct hw_perf_event *hwc)
  859. {
  860. int idx = hwc->idx - X86_PMC_IDX_FIXED;
  861. u64 ctrl_val, bits, mask;
  862. /*
  863. * Enable IRQ generation (0x8),
  864. * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
  865. * if requested:
  866. */
  867. bits = 0x8ULL;
  868. if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
  869. bits |= 0x2;
  870. if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
  871. bits |= 0x1;
  872. /*
  873. * ANY bit is supported in v3 and up
  874. */
  875. if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY)
  876. bits |= 0x4;
  877. bits <<= (idx * 4);
  878. mask = 0xfULL << (idx * 4);
  879. rdmsrl(hwc->config_base, ctrl_val);
  880. ctrl_val &= ~mask;
  881. ctrl_val |= bits;
  882. wrmsrl(hwc->config_base, ctrl_val);
  883. }
  884. static void intel_pmu_enable_event(struct perf_event *event)
  885. {
  886. struct hw_perf_event *hwc = &event->hw;
  887. struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
  888. if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) {
  889. if (!__this_cpu_read(cpu_hw_events.enabled))
  890. return;
  891. intel_pmu_enable_bts(hwc->config);
  892. return;
  893. }
  894. /*
  895. * must enabled before any actual event
  896. * because any event may be combined with LBR
  897. */
  898. if (intel_pmu_needs_lbr_smpl(event))
  899. intel_pmu_lbr_enable(event);
  900. if (event->attr.exclude_host)
  901. cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx);
  902. if (event->attr.exclude_guest)
  903. cpuc->intel_ctrl_host_mask |= (1ull << hwc->idx);
  904. if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
  905. intel_pmu_enable_fixed(hwc);
  906. return;
  907. }
  908. if (unlikely(event->attr.precise_ip))
  909. intel_pmu_pebs_enable(event);
  910. __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
  911. }
  912. /*
  913. * Save and restart an expired event. Called by NMI contexts,
  914. * so it has to be careful about preempting normal event ops:
  915. */
  916. int intel_pmu_save_and_restart(struct perf_event *event)
  917. {
  918. x86_perf_event_update(event);
  919. return x86_perf_event_set_period(event);
  920. }
  921. static void intel_pmu_reset(void)
  922. {
  923. struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
  924. unsigned long flags;
  925. int idx;
  926. if (!x86_pmu.num_counters)
  927. return;
  928. local_irq_save(flags);
  929. printk("clearing PMU state on CPU#%d\n", smp_processor_id());
  930. for (idx = 0; idx < x86_pmu.num_counters; idx++) {
  931. checking_wrmsrl(x86_pmu_config_addr(idx), 0ull);
  932. checking_wrmsrl(x86_pmu_event_addr(idx), 0ull);
  933. }
  934. for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++)
  935. checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
  936. if (ds)
  937. ds->bts_index = ds->bts_buffer_base;
  938. local_irq_restore(flags);
  939. }
  940. /*
  941. * This handler is triggered by the local APIC, so the APIC IRQ handling
  942. * rules apply:
  943. */
  944. static int intel_pmu_handle_irq(struct pt_regs *regs)
  945. {
  946. struct perf_sample_data data;
  947. struct cpu_hw_events *cpuc;
  948. int bit, loops;
  949. u64 status;
  950. int handled;
  951. perf_sample_data_init(&data, 0);
  952. cpuc = &__get_cpu_var(cpu_hw_events);
  953. /*
  954. * Some chipsets need to unmask the LVTPC in a particular spot
  955. * inside the nmi handler. As a result, the unmasking was pushed
  956. * into all the nmi handlers.
  957. *
  958. * This handler doesn't seem to have any issues with the unmasking
  959. * so it was left at the top.
  960. */
  961. apic_write(APIC_LVTPC, APIC_DM_NMI);
  962. intel_pmu_disable_all();
  963. handled = intel_pmu_drain_bts_buffer();
  964. status = intel_pmu_get_status();
  965. if (!status) {
  966. intel_pmu_enable_all(0);
  967. return handled;
  968. }
  969. loops = 0;
  970. again:
  971. intel_pmu_ack_status(status);
  972. if (++loops > 100) {
  973. WARN_ONCE(1, "perfevents: irq loop stuck!\n");
  974. perf_event_print_debug();
  975. intel_pmu_reset();
  976. goto done;
  977. }
  978. inc_irq_stat(apic_perf_irqs);
  979. intel_pmu_lbr_read();
  980. /*
  981. * CondChgd bit 63 doesn't mean any overflow status. Ignore
  982. * and clear the bit.
  983. */
  984. if (__test_and_clear_bit(63, (unsigned long *)&status)) {
  985. if (!status)
  986. goto done;
  987. }
  988. /*
  989. * PEBS overflow sets bit 62 in the global status register
  990. */
  991. if (__test_and_clear_bit(62, (unsigned long *)&status)) {
  992. handled++;
  993. x86_pmu.drain_pebs(regs);
  994. }
  995. for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
  996. struct perf_event *event = cpuc->events[bit];
  997. handled++;
  998. if (!test_bit(bit, cpuc->active_mask))
  999. continue;
  1000. if (!intel_pmu_save_and_restart(event))
  1001. continue;
  1002. data.period = event->hw.last_period;
  1003. if (has_branch_stack(event))
  1004. data.br_stack = &cpuc->lbr_stack;
  1005. if (perf_event_overflow(event, &data, regs))
  1006. x86_pmu_stop(event, 0);
  1007. }
  1008. /*
  1009. * Repeat if there is more work to be done:
  1010. */
  1011. status = intel_pmu_get_status();
  1012. if (status)
  1013. goto again;
  1014. done:
  1015. intel_pmu_enable_all(0);
  1016. return handled;
  1017. }
  1018. static struct event_constraint *
  1019. intel_bts_constraints(struct perf_event *event)
  1020. {
  1021. struct hw_perf_event *hwc = &event->hw;
  1022. unsigned int hw_event, bts_event;
  1023. if (event->attr.freq)
  1024. return NULL;
  1025. hw_event = hwc->config & INTEL_ARCH_EVENT_MASK;
  1026. bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
  1027. if (unlikely(hw_event == bts_event && hwc->sample_period == 1))
  1028. return &bts_constraint;
  1029. return NULL;
  1030. }
  1031. static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
  1032. {
  1033. if (!(x86_pmu.er_flags & ERF_HAS_RSP_1))
  1034. return false;
  1035. if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) {
  1036. event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
  1037. event->hw.config |= 0x01bb;
  1038. event->hw.extra_reg.idx = EXTRA_REG_RSP_1;
  1039. event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
  1040. } else if (event->hw.extra_reg.idx == EXTRA_REG_RSP_1) {
  1041. event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
  1042. event->hw.config |= 0x01b7;
  1043. event->hw.extra_reg.idx = EXTRA_REG_RSP_0;
  1044. event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
  1045. }
  1046. if (event->hw.extra_reg.idx == orig_idx)
  1047. return false;
  1048. return true;
  1049. }
  1050. /*
  1051. * manage allocation of shared extra msr for certain events
  1052. *
  1053. * sharing can be:
  1054. * per-cpu: to be shared between the various events on a single PMU
  1055. * per-core: per-cpu + shared by HT threads
  1056. */
  1057. static struct event_constraint *
  1058. __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
  1059. struct perf_event *event,
  1060. struct hw_perf_event_extra *reg)
  1061. {
  1062. struct event_constraint *c = &emptyconstraint;
  1063. struct er_account *era;
  1064. unsigned long flags;
  1065. int orig_idx = reg->idx;
  1066. /* already allocated shared msr */
  1067. if (reg->alloc)
  1068. return NULL; /* call x86_get_event_constraint() */
  1069. again:
  1070. era = &cpuc->shared_regs->regs[reg->idx];
  1071. /*
  1072. * we use spin_lock_irqsave() to avoid lockdep issues when
  1073. * passing a fake cpuc
  1074. */
  1075. raw_spin_lock_irqsave(&era->lock, flags);
  1076. if (!atomic_read(&era->ref) || era->config == reg->config) {
  1077. /* lock in msr value */
  1078. era->config = reg->config;
  1079. era->reg = reg->reg;
  1080. /* one more user */
  1081. atomic_inc(&era->ref);
  1082. /* no need to reallocate during incremental event scheduling */
  1083. reg->alloc = 1;
  1084. /*
  1085. * need to call x86_get_event_constraint()
  1086. * to check if associated event has constraints
  1087. */
  1088. c = NULL;
  1089. } else if (intel_try_alt_er(event, orig_idx)) {
  1090. raw_spin_unlock_irqrestore(&era->lock, flags);
  1091. goto again;
  1092. }
  1093. raw_spin_unlock_irqrestore(&era->lock, flags);
  1094. return c;
  1095. }
  1096. static void
  1097. __intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc,
  1098. struct hw_perf_event_extra *reg)
  1099. {
  1100. struct er_account *era;
  1101. /*
  1102. * only put constraint if extra reg was actually
  1103. * allocated. Also takes care of event which do
  1104. * not use an extra shared reg
  1105. */
  1106. if (!reg->alloc)
  1107. return;
  1108. era = &cpuc->shared_regs->regs[reg->idx];
  1109. /* one fewer user */
  1110. atomic_dec(&era->ref);
  1111. /* allocate again next time */
  1112. reg->alloc = 0;
  1113. }
  1114. static struct event_constraint *
  1115. intel_shared_regs_constraints(struct cpu_hw_events *cpuc,
  1116. struct perf_event *event)
  1117. {
  1118. struct event_constraint *c = NULL, *d;
  1119. struct hw_perf_event_extra *xreg, *breg;
  1120. xreg = &event->hw.extra_reg;
  1121. if (xreg->idx != EXTRA_REG_NONE) {
  1122. c = __intel_shared_reg_get_constraints(cpuc, event, xreg);
  1123. if (c == &emptyconstraint)
  1124. return c;
  1125. }
  1126. breg = &event->hw.branch_reg;
  1127. if (breg->idx != EXTRA_REG_NONE) {
  1128. d = __intel_shared_reg_get_constraints(cpuc, event, breg);
  1129. if (d == &emptyconstraint) {
  1130. __intel_shared_reg_put_constraints(cpuc, xreg);
  1131. c = d;
  1132. }
  1133. }
  1134. return c;
  1135. }
  1136. struct event_constraint *
  1137. x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
  1138. {
  1139. struct event_constraint *c;
  1140. if (x86_pmu.event_constraints) {
  1141. for_each_event_constraint(c, x86_pmu.event_constraints) {
  1142. if ((event->hw.config & c->cmask) == c->code)
  1143. return c;
  1144. }
  1145. }
  1146. return &unconstrained;
  1147. }
  1148. static struct event_constraint *
  1149. intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
  1150. {
  1151. struct event_constraint *c;
  1152. c = intel_bts_constraints(event);
  1153. if (c)
  1154. return c;
  1155. c = intel_pebs_constraints(event);
  1156. if (c)
  1157. return c;
  1158. c = intel_shared_regs_constraints(cpuc, event);
  1159. if (c)
  1160. return c;
  1161. return x86_get_event_constraints(cpuc, event);
  1162. }
  1163. static void
  1164. intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
  1165. struct perf_event *event)
  1166. {
  1167. struct hw_perf_event_extra *reg;
  1168. reg = &event->hw.extra_reg;
  1169. if (reg->idx != EXTRA_REG_NONE)
  1170. __intel_shared_reg_put_constraints(cpuc, reg);
  1171. reg = &event->hw.branch_reg;
  1172. if (reg->idx != EXTRA_REG_NONE)
  1173. __intel_shared_reg_put_constraints(cpuc, reg);
  1174. }
  1175. static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
  1176. struct perf_event *event)
  1177. {
  1178. intel_put_shared_regs_event_constraints(cpuc, event);
  1179. }
  1180. static int intel_pmu_hw_config(struct perf_event *event)
  1181. {
  1182. int ret = x86_pmu_hw_config(event);
  1183. if (ret)
  1184. return ret;
  1185. if (event->attr.precise_ip &&
  1186. (event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
  1187. /*
  1188. * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
  1189. * (0x003c) so that we can use it with PEBS.
  1190. *
  1191. * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
  1192. * PEBS capable. However we can use INST_RETIRED.ANY_P
  1193. * (0x00c0), which is a PEBS capable event, to get the same
  1194. * count.
  1195. *
  1196. * INST_RETIRED.ANY_P counts the number of cycles that retires
  1197. * CNTMASK instructions. By setting CNTMASK to a value (16)
  1198. * larger than the maximum number of instructions that can be
  1199. * retired per cycle (4) and then inverting the condition, we
  1200. * count all cycles that retire 16 or less instructions, which
  1201. * is every cycle.
  1202. *
  1203. * Thereby we gain a PEBS capable cycle counter.
  1204. */
  1205. u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16);
  1206. alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
  1207. event->hw.config = alt_config;
  1208. }
  1209. if (intel_pmu_needs_lbr_smpl(event)) {
  1210. ret = intel_pmu_setup_lbr_filter(event);
  1211. if (ret)
  1212. return ret;
  1213. }
  1214. if (event->attr.type != PERF_TYPE_RAW)
  1215. return 0;
  1216. if (!(event->attr.config & ARCH_PERFMON_EVENTSEL_ANY))
  1217. return 0;
  1218. if (x86_pmu.version < 3)
  1219. return -EINVAL;
  1220. if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
  1221. return -EACCES;
  1222. event->hw.config |= ARCH_PERFMON_EVENTSEL_ANY;
  1223. return 0;
  1224. }
  1225. struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
  1226. {
  1227. if (x86_pmu.guest_get_msrs)
  1228. return x86_pmu.guest_get_msrs(nr);
  1229. *nr = 0;
  1230. return NULL;
  1231. }
  1232. EXPORT_SYMBOL_GPL(perf_guest_get_msrs);
  1233. static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr)
  1234. {
  1235. struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
  1236. struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
  1237. arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL;
  1238. arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask;
  1239. arr[0].guest = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_host_mask;
  1240. *nr = 1;
  1241. return arr;
  1242. }
  1243. static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr)
  1244. {
  1245. struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
  1246. struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
  1247. int idx;
  1248. for (idx = 0; idx < x86_pmu.num_counters; idx++) {
  1249. struct perf_event *event = cpuc->events[idx];
  1250. arr[idx].msr = x86_pmu_config_addr(idx);
  1251. arr[idx].host = arr[idx].guest = 0;
  1252. if (!test_bit(idx, cpuc->active_mask))
  1253. continue;
  1254. arr[idx].host = arr[idx].guest =
  1255. event->hw.config | ARCH_PERFMON_EVENTSEL_ENABLE;
  1256. if (event->attr.exclude_host)
  1257. arr[idx].host &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
  1258. else if (event->attr.exclude_guest)
  1259. arr[idx].guest &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
  1260. }
  1261. *nr = x86_pmu.num_counters;
  1262. return arr;
  1263. }
  1264. static void core_pmu_enable_event(struct perf_event *event)
  1265. {
  1266. if (!event->attr.exclude_host)
  1267. x86_pmu_enable_event(event);
  1268. }
  1269. static void core_pmu_enable_all(int added)
  1270. {
  1271. struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
  1272. int idx;
  1273. for (idx = 0; idx < x86_pmu.num_counters; idx++) {
  1274. struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
  1275. if (!test_bit(idx, cpuc->active_mask) ||
  1276. cpuc->events[idx]->attr.exclude_host)
  1277. continue;
  1278. __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
  1279. }
  1280. }
  1281. PMU_FORMAT_ATTR(event, "config:0-7" );
  1282. PMU_FORMAT_ATTR(umask, "config:8-15" );
  1283. PMU_FORMAT_ATTR(edge, "config:18" );
  1284. PMU_FORMAT_ATTR(pc, "config:19" );
  1285. PMU_FORMAT_ATTR(any, "config:21" ); /* v3 + */
  1286. PMU_FORMAT_ATTR(inv, "config:23" );
  1287. PMU_FORMAT_ATTR(cmask, "config:24-31" );
  1288. static struct attribute *intel_arch_formats_attr[] = {
  1289. &format_attr_event.attr,
  1290. &format_attr_umask.attr,
  1291. &format_attr_edge.attr,
  1292. &format_attr_pc.attr,
  1293. &format_attr_inv.attr,
  1294. &format_attr_cmask.attr,
  1295. NULL,
  1296. };
  1297. static __initconst const struct x86_pmu core_pmu = {
  1298. .name = "core",
  1299. .handle_irq = x86_pmu_handle_irq,
  1300. .disable_all = x86_pmu_disable_all,
  1301. .enable_all = core_pmu_enable_all,
  1302. .enable = core_pmu_enable_event,
  1303. .disable = x86_pmu_disable_event,
  1304. .hw_config = x86_pmu_hw_config,
  1305. .schedule_events = x86_schedule_events,
  1306. .eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
  1307. .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
  1308. .event_map = intel_pmu_event_map,
  1309. .max_events = ARRAY_SIZE(intel_perfmon_event_map),
  1310. .apic = 1,
  1311. /*
  1312. * Intel PMCs cannot be accessed sanely above 32 bit width,
  1313. * so we install an artificial 1<<31 period regardless of
  1314. * the generic event period:
  1315. */
  1316. .max_period = (1ULL << 31) - 1,
  1317. .get_event_constraints = intel_get_event_constraints,
  1318. .put_event_constraints = intel_put_event_constraints,
  1319. .event_constraints = intel_core_event_constraints,
  1320. .guest_get_msrs = core_guest_get_msrs,
  1321. .format_attrs = intel_arch_formats_attr,
  1322. };
  1323. struct intel_shared_regs *allocate_shared_regs(int cpu)
  1324. {
  1325. struct intel_shared_regs *regs;
  1326. int i;
  1327. regs = kzalloc_node(sizeof(struct intel_shared_regs),
  1328. GFP_KERNEL, cpu_to_node(cpu));
  1329. if (regs) {
  1330. /*
  1331. * initialize the locks to keep lockdep happy
  1332. */
  1333. for (i = 0; i < EXTRA_REG_MAX; i++)
  1334. raw_spin_lock_init(&regs->regs[i].lock);
  1335. regs->core_id = -1;
  1336. }
  1337. return regs;
  1338. }
  1339. static int intel_pmu_cpu_prepare(int cpu)
  1340. {
  1341. struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
  1342. if (!(x86_pmu.extra_regs || x86_pmu.lbr_sel_map))
  1343. return NOTIFY_OK;
  1344. cpuc->shared_regs = allocate_shared_regs(cpu);
  1345. if (!cpuc->shared_regs)
  1346. return NOTIFY_BAD;
  1347. return NOTIFY_OK;
  1348. }
  1349. static void intel_pmu_cpu_starting(int cpu)
  1350. {
  1351. struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
  1352. int core_id = topology_core_id(cpu);
  1353. int i;
  1354. init_debug_store_on_cpu(cpu);
  1355. /*
  1356. * Deal with CPUs that don't clear their LBRs on power-up.
  1357. */
  1358. intel_pmu_lbr_reset();
  1359. cpuc->lbr_sel = NULL;
  1360. if (!cpuc->shared_regs)
  1361. return;
  1362. if (!(x86_pmu.er_flags & ERF_NO_HT_SHARING)) {
  1363. for_each_cpu(i, topology_thread_cpumask(cpu)) {
  1364. struct intel_shared_regs *pc;
  1365. pc = per_cpu(cpu_hw_events, i).shared_regs;
  1366. if (pc && pc->core_id == core_id) {
  1367. cpuc->kfree_on_online = cpuc->shared_regs;
  1368. cpuc->shared_regs = pc;
  1369. break;
  1370. }
  1371. }
  1372. cpuc->shared_regs->core_id = core_id;
  1373. cpuc->shared_regs->refcnt++;
  1374. }
  1375. if (x86_pmu.lbr_sel_map)
  1376. cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR];
  1377. }
  1378. static void intel_pmu_cpu_dying(int cpu)
  1379. {
  1380. struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
  1381. struct intel_shared_regs *pc;
  1382. pc = cpuc->shared_regs;
  1383. if (pc) {
  1384. if (pc->core_id == -1 || --pc->refcnt == 0)
  1385. kfree(pc);
  1386. cpuc->shared_regs = NULL;
  1387. }
  1388. fini_debug_store_on_cpu(cpu);
  1389. }
  1390. static void intel_pmu_flush_branch_stack(void)
  1391. {
  1392. /*
  1393. * Intel LBR does not tag entries with the
  1394. * PID of the current task, then we need to
  1395. * flush it on ctxsw
  1396. * For now, we simply reset it
  1397. */
  1398. if (x86_pmu.lbr_nr)
  1399. intel_pmu_lbr_reset();
  1400. }
  1401. PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");
  1402. static struct attribute *intel_arch3_formats_attr[] = {
  1403. &format_attr_event.attr,
  1404. &format_attr_umask.attr,
  1405. &format_attr_edge.attr,
  1406. &format_attr_pc.attr,
  1407. &format_attr_any.attr,
  1408. &format_attr_inv.attr,
  1409. &format_attr_cmask.attr,
  1410. &format_attr_offcore_rsp.attr, /* XXX do NHM/WSM + SNB breakout */
  1411. NULL,
  1412. };
  1413. static __initconst const struct x86_pmu intel_pmu = {
  1414. .name = "Intel",
  1415. .handle_irq = intel_pmu_handle_irq,
  1416. .disable_all = intel_pmu_disable_all,
  1417. .enable_all = intel_pmu_enable_all,
  1418. .enable = intel_pmu_enable_event,
  1419. .disable = intel_pmu_disable_event,
  1420. .hw_config = intel_pmu_hw_config,
  1421. .schedule_events = x86_schedule_events,
  1422. .eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
  1423. .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
  1424. .event_map = intel_pmu_event_map,
  1425. .max_events = ARRAY_SIZE(intel_perfmon_event_map),
  1426. .apic = 1,
  1427. /*
  1428. * Intel PMCs cannot be accessed sanely above 32 bit width,
  1429. * so we install an artificial 1<<31 period regardless of
  1430. * the generic event period:
  1431. */
  1432. .max_period = (1ULL << 31) - 1,
  1433. .get_event_constraints = intel_get_event_constraints,
  1434. .put_event_constraints = intel_put_event_constraints,
  1435. .format_attrs = intel_arch3_formats_attr,
  1436. .cpu_prepare = intel_pmu_cpu_prepare,
  1437. .cpu_starting = intel_pmu_cpu_starting,
  1438. .cpu_dying = intel_pmu_cpu_dying,
  1439. .guest_get_msrs = intel_guest_get_msrs,
  1440. .flush_branch_stack = intel_pmu_flush_branch_stack,
  1441. };
  1442. static __init void intel_clovertown_quirk(void)
  1443. {
  1444. /*
  1445. * PEBS is unreliable due to:
  1446. *
  1447. * AJ67 - PEBS may experience CPL leaks
  1448. * AJ68 - PEBS PMI may be delayed by one event
  1449. * AJ69 - GLOBAL_STATUS[62] will only be set when DEBUGCTL[12]
  1450. * AJ106 - FREEZE_LBRS_ON_PMI doesn't work in combination with PEBS
  1451. *
  1452. * AJ67 could be worked around by restricting the OS/USR flags.
  1453. * AJ69 could be worked around by setting PMU_FREEZE_ON_PMI.
  1454. *
  1455. * AJ106 could possibly be worked around by not allowing LBR
  1456. * usage from PEBS, including the fixup.
  1457. * AJ68 could possibly be worked around by always programming
  1458. * a pebs_event_reset[0] value and coping with the lost events.
  1459. *
  1460. * But taken together it might just make sense to not enable PEBS on
  1461. * these chips.
  1462. */
  1463. printk(KERN_WARNING "PEBS disabled due to CPU errata.\n");
  1464. x86_pmu.pebs = 0;
  1465. x86_pmu.pebs_constraints = NULL;
  1466. }
  1467. static __init void intel_sandybridge_quirk(void)
  1468. {
  1469. printk(KERN_WARNING "PEBS disabled due to CPU errata.\n");
  1470. x86_pmu.pebs = 0;
  1471. x86_pmu.pebs_constraints = NULL;
  1472. }
  1473. static const struct { int id; char *name; } intel_arch_events_map[] __initconst = {
  1474. { PERF_COUNT_HW_CPU_CYCLES, "cpu cycles" },
  1475. { PERF_COUNT_HW_INSTRUCTIONS, "instructions" },
  1476. { PERF_COUNT_HW_BUS_CYCLES, "bus cycles" },
  1477. { PERF_COUNT_HW_CACHE_REFERENCES, "cache references" },
  1478. { PERF_COUNT_HW_CACHE_MISSES, "cache misses" },
  1479. { PERF_COUNT_HW_BRANCH_INSTRUCTIONS, "branch instructions" },
  1480. { PERF_COUNT_HW_BRANCH_MISSES, "branch misses" },
  1481. };
  1482. static __init void intel_arch_events_quirk(void)
  1483. {
  1484. int bit;
  1485. /* disable event that reported as not presend by cpuid */
  1486. for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(intel_arch_events_map)) {
  1487. intel_perfmon_event_map[intel_arch_events_map[bit].id] = 0;
  1488. printk(KERN_WARNING "CPUID marked event: \'%s\' unavailable\n",
  1489. intel_arch_events_map[bit].name);
  1490. }
  1491. }
  1492. static __init void intel_nehalem_quirk(void)
  1493. {
  1494. union cpuid10_ebx ebx;
  1495. ebx.full = x86_pmu.events_maskl;
  1496. if (ebx.split.no_branch_misses_retired) {
  1497. /*
  1498. * Erratum AAJ80 detected, we work it around by using
  1499. * the BR_MISP_EXEC.ANY event. This will over-count
  1500. * branch-misses, but it's still much better than the
  1501. * architectural event which is often completely bogus:
  1502. */
  1503. intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89;
  1504. ebx.split.no_branch_misses_retired = 0;
  1505. x86_pmu.events_maskl = ebx.full;
  1506. printk(KERN_INFO "CPU erratum AAJ80 worked around\n");
  1507. }
  1508. }
  1509. __init int intel_pmu_init(void)
  1510. {
  1511. union cpuid10_edx edx;
  1512. union cpuid10_eax eax;
  1513. union cpuid10_ebx ebx;
  1514. unsigned int unused;
  1515. int version;
  1516. if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
  1517. switch (boot_cpu_data.x86) {
  1518. case 0x6:
  1519. return p6_pmu_init();
  1520. case 0xf:
  1521. return p4_pmu_init();
  1522. }
  1523. return -ENODEV;
  1524. }
  1525. /*
  1526. * Check whether the Architectural PerfMon supports
  1527. * Branch Misses Retired hw_event or not.
  1528. */
  1529. cpuid(10, &eax.full, &ebx.full, &unused, &edx.full);
  1530. if (eax.split.mask_length < ARCH_PERFMON_EVENTS_COUNT)
  1531. return -ENODEV;
  1532. version = eax.split.version_id;
  1533. if (version < 2)
  1534. x86_pmu = core_pmu;
  1535. else
  1536. x86_pmu = intel_pmu;
  1537. x86_pmu.version = version;
  1538. x86_pmu.num_counters = eax.split.num_counters;
  1539. x86_pmu.cntval_bits = eax.split.bit_width;
  1540. x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1;
  1541. x86_pmu.events_maskl = ebx.full;
  1542. x86_pmu.events_mask_len = eax.split.mask_length;
  1543. /*
  1544. * Quirk: v2 perfmon does not report fixed-purpose events, so
  1545. * assume at least 3 events:
  1546. */
  1547. if (version > 1)
  1548. x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3);
  1549. /*
  1550. * v2 and above have a perf capabilities MSR
  1551. */
  1552. if (version > 1) {
  1553. u64 capabilities;
  1554. rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities);
  1555. x86_pmu.intel_cap.capabilities = capabilities;
  1556. }
  1557. intel_ds_init();
  1558. x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */
  1559. /*
  1560. * Install the hw-cache-events table:
  1561. */
  1562. switch (boot_cpu_data.x86_model) {
  1563. case 14: /* 65 nm core solo/duo, "Yonah" */
  1564. pr_cont("Core events, ");
  1565. break;
  1566. case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
  1567. x86_add_quirk(intel_clovertown_quirk);
  1568. case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
  1569. case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
  1570. case 29: /* six-core 45 nm xeon "Dunnington" */
  1571. memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
  1572. sizeof(hw_cache_event_ids));
  1573. intel_pmu_lbr_init_core();
  1574. x86_pmu.event_constraints = intel_core2_event_constraints;
  1575. x86_pmu.pebs_constraints = intel_core2_pebs_event_constraints;
  1576. pr_cont("Core2 events, ");
  1577. break;
  1578. case 26: /* 45 nm nehalem, "Bloomfield" */
  1579. case 30: /* 45 nm nehalem, "Lynnfield" */
  1580. case 46: /* 45 nm nehalem-ex, "Beckton" */
  1581. memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
  1582. sizeof(hw_cache_event_ids));
  1583. memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
  1584. sizeof(hw_cache_extra_regs));
  1585. intel_pmu_lbr_init_nhm();
  1586. x86_pmu.event_constraints = intel_nehalem_event_constraints;
  1587. x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints;
  1588. x86_pmu.enable_all = intel_pmu_nhm_enable_all;
  1589. x86_pmu.extra_regs = intel_nehalem_extra_regs;
  1590. /* UOPS_ISSUED.STALLED_CYCLES */
  1591. intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
  1592. X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
  1593. /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
  1594. intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
  1595. X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1);
  1596. x86_add_quirk(intel_nehalem_quirk);
  1597. pr_cont("Nehalem events, ");
  1598. break;
  1599. case 28: /* Atom */
  1600. memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
  1601. sizeof(hw_cache_event_ids));
  1602. intel_pmu_lbr_init_atom();
  1603. x86_pmu.event_constraints = intel_gen_event_constraints;
  1604. x86_pmu.pebs_constraints = intel_atom_pebs_event_constraints;
  1605. pr_cont("Atom events, ");
  1606. break;
  1607. case 37: /* 32 nm nehalem, "Clarkdale" */
  1608. case 44: /* 32 nm nehalem, "Gulftown" */
  1609. case 47: /* 32 nm Xeon E7 */
  1610. memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
  1611. sizeof(hw_cache_event_ids));
  1612. memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
  1613. sizeof(hw_cache_extra_regs));
  1614. intel_pmu_lbr_init_nhm();
  1615. x86_pmu.event_constraints = intel_westmere_event_constraints;
  1616. x86_pmu.enable_all = intel_pmu_nhm_enable_all;
  1617. x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints;
  1618. x86_pmu.extra_regs = intel_westmere_extra_regs;
  1619. x86_pmu.er_flags |= ERF_HAS_RSP_1;
  1620. /* UOPS_ISSUED.STALLED_CYCLES */
  1621. intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
  1622. X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
  1623. /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
  1624. intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
  1625. X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1);
  1626. pr_cont("Westmere events, ");
  1627. break;
  1628. case 42: /* SandyBridge */
  1629. x86_add_quirk(intel_sandybridge_quirk);
  1630. case 45: /* SandyBridge, "Romely-EP" */
  1631. memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
  1632. sizeof(hw_cache_event_ids));
  1633. intel_pmu_lbr_init_snb();
  1634. x86_pmu.event_constraints = intel_snb_event_constraints;
  1635. x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints;
  1636. if (boot_cpu_data.x86_model == 45)
  1637. x86_pmu.extra_regs = intel_snbep_extra_regs;
  1638. else
  1639. x86_pmu.extra_regs = intel_snb_extra_regs;
  1640. /* all extra regs are per-cpu when HT is on */
  1641. x86_pmu.er_flags |= ERF_HAS_RSP_1;
  1642. x86_pmu.er_flags |= ERF_NO_HT_SHARING;
  1643. /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
  1644. intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
  1645. X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
  1646. /* UOPS_DISPATCHED.THREAD,c=1,i=1 to count stall cycles*/
  1647. intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
  1648. X86_CONFIG(.event=0xb1, .umask=0x01, .inv=1, .cmask=1);
  1649. pr_cont("SandyBridge events, ");
  1650. break;
  1651. default:
  1652. switch (x86_pmu.version) {
  1653. case 1:
  1654. x86_pmu.event_constraints = intel_v1_event_constraints;
  1655. pr_cont("generic architected perfmon v1, ");
  1656. break;
  1657. default:
  1658. /*
  1659. * default constraints for v2 and up
  1660. */
  1661. x86_pmu.event_constraints = intel_gen_event_constraints;
  1662. pr_cont("generic architected perfmon, ");
  1663. break;
  1664. }
  1665. }
  1666. return 0;
  1667. }