trace_event_perf.c 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217
  1. /*
  2. * trace event based perf event profiling/tracing
  3. *
  4. * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra <pzijlstr@redhat.com>
  5. * Copyright (C) 2009-2010 Frederic Weisbecker <fweisbec@gmail.com>
  6. */
  7. #include <linux/module.h>
  8. #include <linux/kprobes.h>
  9. #include "trace.h"
  10. static char __percpu *perf_trace_buf[PERF_NR_CONTEXTS];
  11. /*
  12. * Force it to be aligned to unsigned long to avoid misaligned accesses
  13. * suprises
  14. */
  15. typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)])
  16. perf_trace_t;
  17. /* Count the events in use (per event id, not per instance) */
  18. static int total_ref_count;
  19. static int perf_trace_event_perm(struct ftrace_event_call *tp_event,
  20. struct perf_event *p_event)
  21. {
  22. /* No tracing, just counting, so no obvious leak */
  23. if (!(p_event->attr.sample_type & PERF_SAMPLE_RAW))
  24. return 0;
  25. /* Some events are ok to be traced by non-root users... */
  26. if (p_event->attach_state == PERF_ATTACH_TASK) {
  27. if (tp_event->flags & TRACE_EVENT_FL_CAP_ANY)
  28. return 0;
  29. }
  30. /*
  31. * ...otherwise raw tracepoint data can be a severe data leak,
  32. * only allow root to have these.
  33. */
  34. if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN))
  35. return -EPERM;
  36. return 0;
  37. }
  38. static int perf_trace_event_init(struct ftrace_event_call *tp_event,
  39. struct perf_event *p_event)
  40. {
  41. struct hlist_head __percpu *list;
  42. int ret;
  43. int cpu;
  44. ret = perf_trace_event_perm(tp_event, p_event);
  45. if (ret)
  46. return ret;
  47. p_event->tp_event = tp_event;
  48. if (tp_event->perf_refcount++ > 0)
  49. return 0;
  50. ret = -ENOMEM;
  51. list = alloc_percpu(struct hlist_head);
  52. if (!list)
  53. goto fail;
  54. for_each_possible_cpu(cpu)
  55. INIT_HLIST_HEAD(per_cpu_ptr(list, cpu));
  56. tp_event->perf_events = list;
  57. if (!total_ref_count) {
  58. char __percpu *buf;
  59. int i;
  60. for (i = 0; i < PERF_NR_CONTEXTS; i++) {
  61. buf = (char __percpu *)alloc_percpu(perf_trace_t);
  62. if (!buf)
  63. goto fail;
  64. perf_trace_buf[i] = buf;
  65. }
  66. }
  67. ret = tp_event->class->reg(tp_event, TRACE_REG_PERF_REGISTER);
  68. if (ret)
  69. goto fail;
  70. total_ref_count++;
  71. return 0;
  72. fail:
  73. if (!total_ref_count) {
  74. int i;
  75. for (i = 0; i < PERF_NR_CONTEXTS; i++) {
  76. free_percpu(perf_trace_buf[i]);
  77. perf_trace_buf[i] = NULL;
  78. }
  79. }
  80. if (!--tp_event->perf_refcount) {
  81. free_percpu(tp_event->perf_events);
  82. tp_event->perf_events = NULL;
  83. }
  84. return ret;
  85. }
  86. int perf_trace_init(struct perf_event *p_event)
  87. {
  88. struct ftrace_event_call *tp_event;
  89. int event_id = p_event->attr.config;
  90. int ret = -EINVAL;
  91. mutex_lock(&event_mutex);
  92. list_for_each_entry(tp_event, &ftrace_events, list) {
  93. if (tp_event->event.type == event_id &&
  94. tp_event->class && tp_event->class->reg &&
  95. try_module_get(tp_event->mod)) {
  96. ret = perf_trace_event_init(tp_event, p_event);
  97. if (ret)
  98. module_put(tp_event->mod);
  99. break;
  100. }
  101. }
  102. mutex_unlock(&event_mutex);
  103. return ret;
  104. }
  105. int perf_trace_add(struct perf_event *p_event, int flags)
  106. {
  107. struct ftrace_event_call *tp_event = p_event->tp_event;
  108. struct hlist_head __percpu *pcpu_list;
  109. struct hlist_head *list;
  110. pcpu_list = tp_event->perf_events;
  111. if (WARN_ON_ONCE(!pcpu_list))
  112. return -EINVAL;
  113. if (!(flags & PERF_EF_START))
  114. p_event->hw.state = PERF_HES_STOPPED;
  115. list = this_cpu_ptr(pcpu_list);
  116. hlist_add_head_rcu(&p_event->hlist_entry, list);
  117. return 0;
  118. }
  119. void perf_trace_del(struct perf_event *p_event, int flags)
  120. {
  121. hlist_del_rcu(&p_event->hlist_entry);
  122. }
  123. void perf_trace_destroy(struct perf_event *p_event)
  124. {
  125. struct ftrace_event_call *tp_event = p_event->tp_event;
  126. int i;
  127. mutex_lock(&event_mutex);
  128. if (--tp_event->perf_refcount > 0)
  129. goto out;
  130. tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER);
  131. /*
  132. * Ensure our callback won't be called anymore. The buffers
  133. * will be freed after that.
  134. */
  135. tracepoint_synchronize_unregister();
  136. free_percpu(tp_event->perf_events);
  137. tp_event->perf_events = NULL;
  138. if (!--total_ref_count) {
  139. for (i = 0; i < PERF_NR_CONTEXTS; i++) {
  140. free_percpu(perf_trace_buf[i]);
  141. perf_trace_buf[i] = NULL;
  142. }
  143. }
  144. out:
  145. module_put(tp_event->mod);
  146. mutex_unlock(&event_mutex);
  147. }
  148. __kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
  149. struct pt_regs *regs, int *rctxp)
  150. {
  151. struct trace_entry *entry;
  152. unsigned long flags;
  153. char *raw_data;
  154. int pc;
  155. BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long));
  156. pc = preempt_count();
  157. *rctxp = perf_swevent_get_recursion_context();
  158. if (*rctxp < 0)
  159. return NULL;
  160. raw_data = this_cpu_ptr(perf_trace_buf[*rctxp]);
  161. /* zero the dead bytes from align to not leak stack to user */
  162. memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64));
  163. entry = (struct trace_entry *)raw_data;
  164. local_save_flags(flags);
  165. tracing_generic_entry_update(entry, flags, pc);
  166. entry->type = type;
  167. return raw_data;
  168. }
  169. EXPORT_SYMBOL_GPL(perf_trace_buf_prepare);