op_model_ppro.c 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246
  1. /*
  2. * @file op_model_ppro.h
  3. * Family 6 perfmon and architectural perfmon MSR operations
  4. *
  5. * @remark Copyright 2002 OProfile authors
  6. * @remark Copyright 2008 Intel Corporation
  7. * @remark Read the file COPYING
  8. *
  9. * @author John Levon
  10. * @author Philippe Elie
  11. * @author Graydon Hoare
  12. * @author Andi Kleen
  13. * @author Robert Richter <robert.richter@amd.com>
  14. */
  15. #include <linux/oprofile.h>
  16. #include <linux/slab.h>
  17. #include <asm/ptrace.h>
  18. #include <asm/msr.h>
  19. #include <asm/apic.h>
  20. #include <asm/nmi.h>
  21. #include "op_x86_model.h"
  22. #include "op_counter.h"
  23. static int num_counters = 2;
  24. static int counter_width = 32;
  25. #define MSR_PPRO_EVENTSEL_RESERVED ((0xFFFFFFFFULL<<32)|(1ULL<<21))
  26. static u64 reset_value[OP_MAX_COUNTER];
  27. static void ppro_shutdown(struct op_msrs const * const msrs)
  28. {
  29. int i;
  30. for (i = 0; i < num_counters; ++i) {
  31. if (!msrs->counters[i].addr)
  32. continue;
  33. release_perfctr_nmi(MSR_P6_PERFCTR0 + i);
  34. release_evntsel_nmi(MSR_P6_EVNTSEL0 + i);
  35. }
  36. }
  37. static int ppro_fill_in_addresses(struct op_msrs * const msrs)
  38. {
  39. int i;
  40. for (i = 0; i < num_counters; i++) {
  41. if (!reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i))
  42. goto fail;
  43. if (!reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i)) {
  44. release_perfctr_nmi(MSR_P6_PERFCTR0 + i);
  45. goto fail;
  46. }
  47. /* both registers must be reserved */
  48. msrs->counters[i].addr = MSR_P6_PERFCTR0 + i;
  49. msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i;
  50. continue;
  51. fail:
  52. if (!counter_config[i].enabled)
  53. continue;
  54. op_x86_warn_reserved(i);
  55. ppro_shutdown(msrs);
  56. return -EBUSY;
  57. }
  58. return 0;
  59. }
  60. static void ppro_setup_ctrs(struct op_x86_model_spec const *model,
  61. struct op_msrs const * const msrs)
  62. {
  63. u64 val;
  64. int i;
  65. if (boot_cpu_has(X86_FEATURE_ARCH_PERFMON)) {
  66. union cpuid10_eax eax;
  67. eax.full = cpuid_eax(0xa);
  68. /*
  69. * For Core2 (family 6, model 15), don't reset the
  70. * counter width:
  71. */
  72. if (!(eax.split.version_id == 0 &&
  73. __this_cpu_read(cpu_info.x86) == 6 &&
  74. __this_cpu_read(cpu_info.x86_model) == 15)) {
  75. if (counter_width < eax.split.bit_width)
  76. counter_width = eax.split.bit_width;
  77. }
  78. }
  79. /* clear all counters */
  80. for (i = 0; i < num_counters; ++i) {
  81. if (!msrs->controls[i].addr)
  82. continue;
  83. rdmsrl(msrs->controls[i].addr, val);
  84. if (val & ARCH_PERFMON_EVENTSEL_ENABLE)
  85. op_x86_warn_in_use(i);
  86. val &= model->reserved;
  87. wrmsrl(msrs->controls[i].addr, val);
  88. /*
  89. * avoid a false detection of ctr overflows in NMI *
  90. * handler
  91. */
  92. wrmsrl(msrs->counters[i].addr, -1LL);
  93. }
  94. /* enable active counters */
  95. for (i = 0; i < num_counters; ++i) {
  96. if (counter_config[i].enabled && msrs->counters[i].addr) {
  97. reset_value[i] = counter_config[i].count;
  98. wrmsrl(msrs->counters[i].addr, -reset_value[i]);
  99. rdmsrl(msrs->controls[i].addr, val);
  100. val &= model->reserved;
  101. val |= op_x86_get_ctrl(model, &counter_config[i]);
  102. wrmsrl(msrs->controls[i].addr, val);
  103. } else {
  104. reset_value[i] = 0;
  105. }
  106. }
  107. }
  108. static int ppro_check_ctrs(struct pt_regs * const regs,
  109. struct op_msrs const * const msrs)
  110. {
  111. u64 val;
  112. int i;
  113. for (i = 0; i < num_counters; ++i) {
  114. if (!reset_value[i])
  115. continue;
  116. rdmsrl(msrs->counters[i].addr, val);
  117. if (val & (1ULL << (counter_width - 1)))
  118. continue;
  119. oprofile_add_sample(regs, i);
  120. wrmsrl(msrs->counters[i].addr, -reset_value[i]);
  121. }
  122. /* Only P6 based Pentium M need to re-unmask the apic vector but it
  123. * doesn't hurt other P6 variant */
  124. apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
  125. /* We can't work out if we really handled an interrupt. We
  126. * might have caught a *second* counter just after overflowing
  127. * the interrupt for this counter then arrives
  128. * and we don't find a counter that's overflowed, so we
  129. * would return 0 and get dazed + confused. Instead we always
  130. * assume we found an overflow. This sucks.
  131. */
  132. return 1;
  133. }
  134. static void ppro_start(struct op_msrs const * const msrs)
  135. {
  136. u64 val;
  137. int i;
  138. for (i = 0; i < num_counters; ++i) {
  139. if (reset_value[i]) {
  140. rdmsrl(msrs->controls[i].addr, val);
  141. val |= ARCH_PERFMON_EVENTSEL_ENABLE;
  142. wrmsrl(msrs->controls[i].addr, val);
  143. }
  144. }
  145. }
  146. static void ppro_stop(struct op_msrs const * const msrs)
  147. {
  148. u64 val;
  149. int i;
  150. for (i = 0; i < num_counters; ++i) {
  151. if (!reset_value[i])
  152. continue;
  153. rdmsrl(msrs->controls[i].addr, val);
  154. val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
  155. wrmsrl(msrs->controls[i].addr, val);
  156. }
  157. }
  158. struct op_x86_model_spec op_ppro_spec = {
  159. .num_counters = 2,
  160. .num_controls = 2,
  161. .reserved = MSR_PPRO_EVENTSEL_RESERVED,
  162. .fill_in_addresses = &ppro_fill_in_addresses,
  163. .setup_ctrs = &ppro_setup_ctrs,
  164. .check_ctrs = &ppro_check_ctrs,
  165. .start = &ppro_start,
  166. .stop = &ppro_stop,
  167. .shutdown = &ppro_shutdown
  168. };
  169. /*
  170. * Architectural performance monitoring.
  171. *
  172. * Newer Intel CPUs (Core1+) have support for architectural
  173. * events described in CPUID 0xA. See the IA32 SDM Vol3b.18 for details.
  174. * The advantage of this is that it can be done without knowing about
  175. * the specific CPU.
  176. */
  177. static void arch_perfmon_setup_counters(void)
  178. {
  179. union cpuid10_eax eax;
  180. eax.full = cpuid_eax(0xa);
  181. /* Workaround for BIOS bugs in 6/15. Taken from perfmon2 */
  182. if (eax.split.version_id == 0 && boot_cpu_data.x86 == 6 &&
  183. boot_cpu_data.x86_model == 15) {
  184. eax.split.version_id = 2;
  185. eax.split.num_counters = 2;
  186. eax.split.bit_width = 40;
  187. }
  188. num_counters = min((int)eax.split.num_counters, OP_MAX_COUNTER);
  189. op_arch_perfmon_spec.num_counters = num_counters;
  190. op_arch_perfmon_spec.num_controls = num_counters;
  191. }
  192. static int arch_perfmon_init(struct oprofile_operations *ignore)
  193. {
  194. arch_perfmon_setup_counters();
  195. return 0;
  196. }
  197. struct op_x86_model_spec op_arch_perfmon_spec = {
  198. .reserved = MSR_PPRO_EVENTSEL_RESERVED,
  199. .init = &arch_perfmon_init,
  200. /* num_counters/num_controls filled in at runtime */
  201. .fill_in_addresses = &ppro_fill_in_addresses,
  202. /* user space does the cpuid check for available events */
  203. .setup_ctrs = &ppro_setup_ctrs,
  204. .check_ctrs = &ppro_check_ctrs,
  205. .start = &ppro_start,
  206. .stop = &ppro_stop,
  207. .shutdown = &ppro_shutdown
  208. };