op_model_amd.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544
  1. /*
  2. * @file op_model_amd.c
  3. * athlon / K7 / K8 / Family 10h model-specific MSR operations
  4. *
  5. * @remark Copyright 2002-2009 OProfile authors
  6. * @remark Read the file COPYING
  7. *
  8. * @author John Levon
  9. * @author Philippe Elie
  10. * @author Graydon Hoare
  11. * @author Robert Richter <robert.richter@amd.com>
  12. * @author Barry Kasindorf <barry.kasindorf@amd.com>
  13. * @author Jason Yeh <jason.yeh@amd.com>
  14. * @author Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
  15. */
  16. #include <linux/oprofile.h>
  17. #include <linux/device.h>
  18. #include <linux/pci.h>
  19. #include <linux/percpu.h>
  20. #include <asm/ptrace.h>
  21. #include <asm/msr.h>
  22. #include <asm/nmi.h>
  23. #include <asm/apic.h>
  24. #include <asm/processor.h>
  25. #include <asm/cpufeature.h>
  26. #include "op_x86_model.h"
  27. #include "op_counter.h"
  28. #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
  29. #define NUM_VIRT_COUNTERS 32
  30. #else
  31. #define NUM_VIRT_COUNTERS 0
  32. #endif
  33. #define OP_EVENT_MASK 0x0FFF
  34. #define OP_CTR_OVERFLOW (1ULL<<31)
  35. #define MSR_AMD_EVENTSEL_RESERVED ((0xFFFFFCF0ULL<<32)|(1ULL<<21))
  36. static int num_counters;
  37. static unsigned long reset_value[OP_MAX_COUNTER];
  38. #define IBS_FETCH_SIZE 6
  39. #define IBS_OP_SIZE 12
  40. static u32 ibs_caps;
  41. struct ibs_config {
  42. unsigned long op_enabled;
  43. unsigned long fetch_enabled;
  44. unsigned long max_cnt_fetch;
  45. unsigned long max_cnt_op;
  46. unsigned long rand_en;
  47. unsigned long dispatched_ops;
  48. unsigned long branch_target;
  49. };
  50. struct ibs_state {
  51. u64 ibs_op_ctl;
  52. int branch_target;
  53. unsigned long sample_size;
  54. };
  55. static struct ibs_config ibs_config;
  56. static struct ibs_state ibs_state;
  57. /*
  58. * IBS randomization macros
  59. */
  60. #define IBS_RANDOM_BITS 12
  61. #define IBS_RANDOM_MASK ((1ULL << IBS_RANDOM_BITS) - 1)
  62. #define IBS_RANDOM_MAXCNT_OFFSET (1ULL << (IBS_RANDOM_BITS - 5))
  63. /*
  64. * 16-bit Linear Feedback Shift Register (LFSR)
  65. *
  66. * 16 14 13 11
  67. * Feedback polynomial = X + X + X + X + 1
  68. */
  69. static unsigned int lfsr_random(void)
  70. {
  71. static unsigned int lfsr_value = 0xF00D;
  72. unsigned int bit;
  73. /* Compute next bit to shift in */
  74. bit = ((lfsr_value >> 0) ^
  75. (lfsr_value >> 2) ^
  76. (lfsr_value >> 3) ^
  77. (lfsr_value >> 5)) & 0x0001;
  78. /* Advance to next register value */
  79. lfsr_value = (lfsr_value >> 1) | (bit << 15);
  80. return lfsr_value;
  81. }
  82. /*
  83. * IBS software randomization
  84. *
  85. * The IBS periodic op counter is randomized in software. The lower 12
  86. * bits of the 20 bit counter are randomized. IbsOpCurCnt is
  87. * initialized with a 12 bit random value.
  88. */
  89. static inline u64 op_amd_randomize_ibs_op(u64 val)
  90. {
  91. unsigned int random = lfsr_random();
  92. if (!(ibs_caps & IBS_CAPS_RDWROPCNT))
  93. /*
  94. * Work around if the hw can not write to IbsOpCurCnt
  95. *
  96. * Randomize the lower 8 bits of the 16 bit
  97. * IbsOpMaxCnt [15:0] value in the range of -128 to
  98. * +127 by adding/subtracting an offset to the
  99. * maximum count (IbsOpMaxCnt).
  100. *
  101. * To avoid over or underflows and protect upper bits
  102. * starting at bit 16, the initial value for
  103. * IbsOpMaxCnt must fit in the range from 0x0081 to
  104. * 0xff80.
  105. */
  106. val += (s8)(random >> 4);
  107. else
  108. val |= (u64)(random & IBS_RANDOM_MASK) << 32;
  109. return val;
  110. }
  111. static inline void
  112. op_amd_handle_ibs(struct pt_regs * const regs,
  113. struct op_msrs const * const msrs)
  114. {
  115. u64 val, ctl;
  116. struct op_entry entry;
  117. if (!ibs_caps)
  118. return;
  119. if (ibs_config.fetch_enabled) {
  120. rdmsrl(MSR_AMD64_IBSFETCHCTL, ctl);
  121. if (ctl & IBS_FETCH_VAL) {
  122. rdmsrl(MSR_AMD64_IBSFETCHLINAD, val);
  123. oprofile_write_reserve(&entry, regs, val,
  124. IBS_FETCH_CODE, IBS_FETCH_SIZE);
  125. oprofile_add_data64(&entry, val);
  126. oprofile_add_data64(&entry, ctl);
  127. rdmsrl(MSR_AMD64_IBSFETCHPHYSAD, val);
  128. oprofile_add_data64(&entry, val);
  129. oprofile_write_commit(&entry);
  130. /* reenable the IRQ */
  131. ctl &= ~(IBS_FETCH_VAL | IBS_FETCH_CNT);
  132. ctl |= IBS_FETCH_ENABLE;
  133. wrmsrl(MSR_AMD64_IBSFETCHCTL, ctl);
  134. }
  135. }
  136. if (ibs_config.op_enabled) {
  137. rdmsrl(MSR_AMD64_IBSOPCTL, ctl);
  138. if (ctl & IBS_OP_VAL) {
  139. rdmsrl(MSR_AMD64_IBSOPRIP, val);
  140. oprofile_write_reserve(&entry, regs, val, IBS_OP_CODE,
  141. ibs_state.sample_size);
  142. oprofile_add_data64(&entry, val);
  143. rdmsrl(MSR_AMD64_IBSOPDATA, val);
  144. oprofile_add_data64(&entry, val);
  145. rdmsrl(MSR_AMD64_IBSOPDATA2, val);
  146. oprofile_add_data64(&entry, val);
  147. rdmsrl(MSR_AMD64_IBSOPDATA3, val);
  148. oprofile_add_data64(&entry, val);
  149. rdmsrl(MSR_AMD64_IBSDCLINAD, val);
  150. oprofile_add_data64(&entry, val);
  151. rdmsrl(MSR_AMD64_IBSDCPHYSAD, val);
  152. oprofile_add_data64(&entry, val);
  153. if (ibs_state.branch_target) {
  154. rdmsrl(MSR_AMD64_IBSBRTARGET, val);
  155. oprofile_add_data(&entry, (unsigned long)val);
  156. }
  157. oprofile_write_commit(&entry);
  158. /* reenable the IRQ */
  159. ctl = op_amd_randomize_ibs_op(ibs_state.ibs_op_ctl);
  160. wrmsrl(MSR_AMD64_IBSOPCTL, ctl);
  161. }
  162. }
  163. }
  164. static inline void op_amd_start_ibs(void)
  165. {
  166. u64 val;
  167. if (!ibs_caps)
  168. return;
  169. memset(&ibs_state, 0, sizeof(ibs_state));
  170. /*
  171. * Note: Since the max count settings may out of range we
  172. * write back the actual used values so that userland can read
  173. * it.
  174. */
  175. if (ibs_config.fetch_enabled) {
  176. val = ibs_config.max_cnt_fetch >> 4;
  177. val = min(val, IBS_FETCH_MAX_CNT);
  178. ibs_config.max_cnt_fetch = val << 4;
  179. val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0;
  180. val |= IBS_FETCH_ENABLE;
  181. wrmsrl(MSR_AMD64_IBSFETCHCTL, val);
  182. }
  183. if (ibs_config.op_enabled) {
  184. val = ibs_config.max_cnt_op >> 4;
  185. if (!(ibs_caps & IBS_CAPS_RDWROPCNT)) {
  186. /*
  187. * IbsOpCurCnt not supported. See
  188. * op_amd_randomize_ibs_op() for details.
  189. */
  190. val = clamp(val, 0x0081ULL, 0xFF80ULL);
  191. ibs_config.max_cnt_op = val << 4;
  192. } else {
  193. /*
  194. * The start value is randomized with a
  195. * positive offset, we need to compensate it
  196. * with the half of the randomized range. Also
  197. * avoid underflows.
  198. */
  199. val += IBS_RANDOM_MAXCNT_OFFSET;
  200. if (ibs_caps & IBS_CAPS_OPCNTEXT)
  201. val = min(val, IBS_OP_MAX_CNT_EXT);
  202. else
  203. val = min(val, IBS_OP_MAX_CNT);
  204. ibs_config.max_cnt_op =
  205. (val - IBS_RANDOM_MAXCNT_OFFSET) << 4;
  206. }
  207. val = ((val & ~IBS_OP_MAX_CNT) << 4) | (val & IBS_OP_MAX_CNT);
  208. val |= ibs_config.dispatched_ops ? IBS_OP_CNT_CTL : 0;
  209. val |= IBS_OP_ENABLE;
  210. ibs_state.ibs_op_ctl = val;
  211. ibs_state.sample_size = IBS_OP_SIZE;
  212. if (ibs_config.branch_target) {
  213. ibs_state.branch_target = 1;
  214. ibs_state.sample_size++;
  215. }
  216. val = op_amd_randomize_ibs_op(ibs_state.ibs_op_ctl);
  217. wrmsrl(MSR_AMD64_IBSOPCTL, val);
  218. }
  219. }
  220. static void op_amd_stop_ibs(void)
  221. {
  222. if (!ibs_caps)
  223. return;
  224. if (ibs_config.fetch_enabled)
  225. /* clear max count and enable */
  226. wrmsrl(MSR_AMD64_IBSFETCHCTL, 0);
  227. if (ibs_config.op_enabled)
  228. /* clear max count and enable */
  229. wrmsrl(MSR_AMD64_IBSOPCTL, 0);
  230. }
  231. #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
  232. static void op_mux_switch_ctrl(struct op_x86_model_spec const *model,
  233. struct op_msrs const * const msrs)
  234. {
  235. u64 val;
  236. int i;
  237. /* enable active counters */
  238. for (i = 0; i < num_counters; ++i) {
  239. int virt = op_x86_phys_to_virt(i);
  240. if (!reset_value[virt])
  241. continue;
  242. rdmsrl(msrs->controls[i].addr, val);
  243. val &= model->reserved;
  244. val |= op_x86_get_ctrl(model, &counter_config[virt]);
  245. wrmsrl(msrs->controls[i].addr, val);
  246. }
  247. }
  248. #endif
  249. /* functions for op_amd_spec */
  250. static void op_amd_shutdown(struct op_msrs const * const msrs)
  251. {
  252. int i;
  253. for (i = 0; i < num_counters; ++i) {
  254. if (!msrs->counters[i].addr)
  255. continue;
  256. release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
  257. release_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
  258. }
  259. }
  260. static int op_amd_fill_in_addresses(struct op_msrs * const msrs)
  261. {
  262. int i;
  263. for (i = 0; i < num_counters; i++) {
  264. if (!reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i))
  265. goto fail;
  266. if (!reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) {
  267. release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
  268. goto fail;
  269. }
  270. /* both registers must be reserved */
  271. if (num_counters == AMD64_NUM_COUNTERS_F15H) {
  272. msrs->counters[i].addr = MSR_F15H_PERF_CTR + (i << 1);
  273. msrs->controls[i].addr = MSR_F15H_PERF_CTL + (i << 1);
  274. } else {
  275. msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i;
  276. msrs->counters[i].addr = MSR_K7_PERFCTR0 + i;
  277. }
  278. continue;
  279. fail:
  280. if (!counter_config[i].enabled)
  281. continue;
  282. op_x86_warn_reserved(i);
  283. op_amd_shutdown(msrs);
  284. return -EBUSY;
  285. }
  286. return 0;
  287. }
  288. static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
  289. struct op_msrs const * const msrs)
  290. {
  291. u64 val;
  292. int i;
  293. /* setup reset_value */
  294. for (i = 0; i < OP_MAX_COUNTER; ++i) {
  295. if (counter_config[i].enabled
  296. && msrs->counters[op_x86_virt_to_phys(i)].addr)
  297. reset_value[i] = counter_config[i].count;
  298. else
  299. reset_value[i] = 0;
  300. }
  301. /* clear all counters */
  302. for (i = 0; i < num_counters; ++i) {
  303. if (!msrs->controls[i].addr)
  304. continue;
  305. rdmsrl(msrs->controls[i].addr, val);
  306. if (val & ARCH_PERFMON_EVENTSEL_ENABLE)
  307. op_x86_warn_in_use(i);
  308. val &= model->reserved;
  309. wrmsrl(msrs->controls[i].addr, val);
  310. /*
  311. * avoid a false detection of ctr overflows in NMI
  312. * handler
  313. */
  314. wrmsrl(msrs->counters[i].addr, -1LL);
  315. }
  316. /* enable active counters */
  317. for (i = 0; i < num_counters; ++i) {
  318. int virt = op_x86_phys_to_virt(i);
  319. if (!reset_value[virt])
  320. continue;
  321. /* setup counter registers */
  322. wrmsrl(msrs->counters[i].addr, -(u64)reset_value[virt]);
  323. /* setup control registers */
  324. rdmsrl(msrs->controls[i].addr, val);
  325. val &= model->reserved;
  326. val |= op_x86_get_ctrl(model, &counter_config[virt]);
  327. wrmsrl(msrs->controls[i].addr, val);
  328. }
  329. }
  330. static int op_amd_check_ctrs(struct pt_regs * const regs,
  331. struct op_msrs const * const msrs)
  332. {
  333. u64 val;
  334. int i;
  335. for (i = 0; i < num_counters; ++i) {
  336. int virt = op_x86_phys_to_virt(i);
  337. if (!reset_value[virt])
  338. continue;
  339. rdmsrl(msrs->counters[i].addr, val);
  340. /* bit is clear if overflowed: */
  341. if (val & OP_CTR_OVERFLOW)
  342. continue;
  343. oprofile_add_sample(regs, virt);
  344. wrmsrl(msrs->counters[i].addr, -(u64)reset_value[virt]);
  345. }
  346. op_amd_handle_ibs(regs, msrs);
  347. /* See op_model_ppro.c */
  348. return 1;
  349. }
  350. static void op_amd_start(struct op_msrs const * const msrs)
  351. {
  352. u64 val;
  353. int i;
  354. for (i = 0; i < num_counters; ++i) {
  355. if (!reset_value[op_x86_phys_to_virt(i)])
  356. continue;
  357. rdmsrl(msrs->controls[i].addr, val);
  358. val |= ARCH_PERFMON_EVENTSEL_ENABLE;
  359. wrmsrl(msrs->controls[i].addr, val);
  360. }
  361. op_amd_start_ibs();
  362. }
  363. static void op_amd_stop(struct op_msrs const * const msrs)
  364. {
  365. u64 val;
  366. int i;
  367. /*
  368. * Subtle: stop on all counters to avoid race with setting our
  369. * pm callback
  370. */
  371. for (i = 0; i < num_counters; ++i) {
  372. if (!reset_value[op_x86_phys_to_virt(i)])
  373. continue;
  374. rdmsrl(msrs->controls[i].addr, val);
  375. val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
  376. wrmsrl(msrs->controls[i].addr, val);
  377. }
  378. op_amd_stop_ibs();
  379. }
  380. /*
  381. * check and reserve APIC extended interrupt LVT offset for IBS if
  382. * available
  383. */
  384. static void init_ibs(void)
  385. {
  386. ibs_caps = get_ibs_caps();
  387. if (!ibs_caps)
  388. return;
  389. printk(KERN_INFO "oprofile: AMD IBS detected (0x%08x)\n", ibs_caps);
  390. }
  391. static int (*create_arch_files)(struct super_block *sb, struct dentry *root);
  392. static int setup_ibs_files(struct super_block *sb, struct dentry *root)
  393. {
  394. struct dentry *dir;
  395. int ret = 0;
  396. /* architecture specific files */
  397. if (create_arch_files)
  398. ret = create_arch_files(sb, root);
  399. if (ret)
  400. return ret;
  401. if (!ibs_caps)
  402. return ret;
  403. /* model specific files */
  404. /* setup some reasonable defaults */
  405. memset(&ibs_config, 0, sizeof(ibs_config));
  406. ibs_config.max_cnt_fetch = 250000;
  407. ibs_config.max_cnt_op = 250000;
  408. if (ibs_caps & IBS_CAPS_FETCHSAM) {
  409. dir = oprofilefs_mkdir(sb, root, "ibs_fetch");
  410. oprofilefs_create_ulong(sb, dir, "enable",
  411. &ibs_config.fetch_enabled);
  412. oprofilefs_create_ulong(sb, dir, "max_count",
  413. &ibs_config.max_cnt_fetch);
  414. oprofilefs_create_ulong(sb, dir, "rand_enable",
  415. &ibs_config.rand_en);
  416. }
  417. if (ibs_caps & IBS_CAPS_OPSAM) {
  418. dir = oprofilefs_mkdir(sb, root, "ibs_op");
  419. oprofilefs_create_ulong(sb, dir, "enable",
  420. &ibs_config.op_enabled);
  421. oprofilefs_create_ulong(sb, dir, "max_count",
  422. &ibs_config.max_cnt_op);
  423. if (ibs_caps & IBS_CAPS_OPCNT)
  424. oprofilefs_create_ulong(sb, dir, "dispatched_ops",
  425. &ibs_config.dispatched_ops);
  426. if (ibs_caps & IBS_CAPS_BRNTRGT)
  427. oprofilefs_create_ulong(sb, dir, "branch_target",
  428. &ibs_config.branch_target);
  429. }
  430. return 0;
  431. }
  432. struct op_x86_model_spec op_amd_spec;
  433. static int op_amd_init(struct oprofile_operations *ops)
  434. {
  435. init_ibs();
  436. create_arch_files = ops->create_files;
  437. ops->create_files = setup_ibs_files;
  438. if (boot_cpu_data.x86 == 0x15) {
  439. num_counters = AMD64_NUM_COUNTERS_F15H;
  440. } else {
  441. num_counters = AMD64_NUM_COUNTERS;
  442. }
  443. op_amd_spec.num_counters = num_counters;
  444. op_amd_spec.num_controls = num_counters;
  445. op_amd_spec.num_virt_counters = max(num_counters, NUM_VIRT_COUNTERS);
  446. return 0;
  447. }
  448. struct op_x86_model_spec op_amd_spec = {
  449. /* num_counters/num_controls filled in at runtime */
  450. .reserved = MSR_AMD_EVENTSEL_RESERVED,
  451. .event_mask = OP_EVENT_MASK,
  452. .init = op_amd_init,
  453. .fill_in_addresses = &op_amd_fill_in_addresses,
  454. .setup_ctrs = &op_amd_setup_ctrs,
  455. .check_ctrs = &op_amd_check_ctrs,
  456. .start = &op_amd_start,
  457. .stop = &op_amd_stop,
  458. .shutdown = &op_amd_shutdown,
  459. #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
  460. .switch_ctrl = &op_mux_switch_ctrl,
  461. #endif
  462. };