trace_syscalls.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818
  1. // SPDX-License-Identifier: GPL-2.0
  2. #include <trace/syscall.h>
  3. #include <trace/events/syscalls.h>
  4. #include <linux/syscalls.h>
  5. #include <linux/slab.h>
  6. #include <linux/kernel.h>
  7. #include <linux/module.h> /* for MODULE_NAME_LEN via KSYM_SYMBOL_LEN */
  8. #include <linux/ftrace.h>
  9. #include <linux/perf_event.h>
  10. #include <asm/syscall.h>
  11. #include "trace_output.h"
  12. #include "trace.h"
  13. static DEFINE_MUTEX(syscall_trace_lock);
  14. static int syscall_enter_register(struct trace_event_call *event,
  15. enum trace_reg type, void *data);
  16. static int syscall_exit_register(struct trace_event_call *event,
  17. enum trace_reg type, void *data);
  18. static struct list_head *
  19. syscall_get_enter_fields(struct trace_event_call *call)
  20. {
  21. struct syscall_metadata *entry = call->data;
  22. return &entry->enter_fields;
  23. }
  24. extern struct syscall_metadata *__start_syscalls_metadata[];
  25. extern struct syscall_metadata *__stop_syscalls_metadata[];
  26. static struct syscall_metadata **syscalls_metadata;
  27. #ifndef ARCH_HAS_SYSCALL_MATCH_SYM_NAME
  28. static inline bool arch_syscall_match_sym_name(const char *sym, const char *name)
  29. {
  30. /*
  31. * Only compare after the "sys" prefix. Archs that use
  32. * syscall wrappers may have syscalls symbols aliases prefixed
  33. * with ".SyS" or ".sys" instead of "sys", leading to an unwanted
  34. * mismatch.
  35. */
  36. return !strcmp(sym + 3, name + 3);
  37. }
  38. #endif
  39. #ifdef ARCH_TRACE_IGNORE_COMPAT_SYSCALLS
  40. /*
  41. * Some architectures that allow for 32bit applications
  42. * to run on a 64bit kernel, do not map the syscalls for
  43. * the 32bit tasks the same as they do for 64bit tasks.
  44. *
  45. * *cough*x86*cough*
  46. *
  47. * In such a case, instead of reporting the wrong syscalls,
  48. * simply ignore them.
  49. *
  50. * For an arch to ignore the compat syscalls it needs to
  51. * define ARCH_TRACE_IGNORE_COMPAT_SYSCALLS as well as
  52. * define the function arch_trace_is_compat_syscall() to let
  53. * the tracing system know that it should ignore it.
  54. */
  55. static int
  56. trace_get_syscall_nr(struct task_struct *task, struct pt_regs *regs)
  57. {
  58. if (unlikely(arch_trace_is_compat_syscall(regs)))
  59. return -1;
  60. return syscall_get_nr(task, regs);
  61. }
  62. #else
  63. static inline int
  64. trace_get_syscall_nr(struct task_struct *task, struct pt_regs *regs)
  65. {
  66. return syscall_get_nr(task, regs);
  67. }
  68. #endif /* ARCH_TRACE_IGNORE_COMPAT_SYSCALLS */
  69. static __init struct syscall_metadata *
  70. find_syscall_meta(unsigned long syscall)
  71. {
  72. struct syscall_metadata **start;
  73. struct syscall_metadata **stop;
  74. char str[KSYM_SYMBOL_LEN];
  75. start = __start_syscalls_metadata;
  76. stop = __stop_syscalls_metadata;
  77. kallsyms_lookup(syscall, NULL, NULL, NULL, str);
  78. if (arch_syscall_match_sym_name(str, "sys_ni_syscall"))
  79. return NULL;
  80. for ( ; start < stop; start++) {
  81. if ((*start)->name && arch_syscall_match_sym_name(str, (*start)->name))
  82. return *start;
  83. }
  84. return NULL;
  85. }
  86. static struct syscall_metadata *syscall_nr_to_meta(int nr)
  87. {
  88. if (!syscalls_metadata || nr >= NR_syscalls || nr < 0)
  89. return NULL;
  90. return syscalls_metadata[nr];
  91. }
  92. const char *get_syscall_name(int syscall)
  93. {
  94. struct syscall_metadata *entry;
  95. entry = syscall_nr_to_meta(syscall);
  96. if (!entry)
  97. return NULL;
  98. return entry->name;
  99. }
  100. static enum print_line_t
  101. print_syscall_enter(struct trace_iterator *iter, int flags,
  102. struct trace_event *event)
  103. {
  104. struct trace_array *tr = iter->tr;
  105. struct trace_seq *s = &iter->seq;
  106. struct trace_entry *ent = iter->ent;
  107. struct syscall_trace_enter *trace;
  108. struct syscall_metadata *entry;
  109. int i, syscall;
  110. trace = (typeof(trace))ent;
  111. syscall = trace->nr;
  112. entry = syscall_nr_to_meta(syscall);
  113. if (!entry)
  114. goto end;
  115. if (entry->enter_event->event.type != ent->type) {
  116. WARN_ON_ONCE(1);
  117. goto end;
  118. }
  119. trace_seq_printf(s, "%s(", entry->name);
  120. for (i = 0; i < entry->nb_args; i++) {
  121. if (trace_seq_has_overflowed(s))
  122. goto end;
  123. /* parameter types */
  124. if (tr->trace_flags & TRACE_ITER_VERBOSE)
  125. trace_seq_printf(s, "%s ", entry->types[i]);
  126. /* parameter values */
  127. trace_seq_printf(s, "%s: %lx%s", entry->args[i],
  128. trace->args[i],
  129. i == entry->nb_args - 1 ? "" : ", ");
  130. }
  131. trace_seq_putc(s, ')');
  132. end:
  133. trace_seq_putc(s, '\n');
  134. return trace_handle_return(s);
  135. }
  136. static enum print_line_t
  137. print_syscall_exit(struct trace_iterator *iter, int flags,
  138. struct trace_event *event)
  139. {
  140. struct trace_seq *s = &iter->seq;
  141. struct trace_entry *ent = iter->ent;
  142. struct syscall_trace_exit *trace;
  143. int syscall;
  144. struct syscall_metadata *entry;
  145. trace = (typeof(trace))ent;
  146. syscall = trace->nr;
  147. entry = syscall_nr_to_meta(syscall);
  148. if (!entry) {
  149. trace_seq_putc(s, '\n');
  150. goto out;
  151. }
  152. if (entry->exit_event->event.type != ent->type) {
  153. WARN_ON_ONCE(1);
  154. return TRACE_TYPE_UNHANDLED;
  155. }
  156. trace_seq_printf(s, "%s -> 0x%lx\n", entry->name,
  157. trace->ret);
  158. out:
  159. return trace_handle_return(s);
  160. }
  161. extern char *__bad_type_size(void);
  162. #define SYSCALL_FIELD(type, field, name) \
  163. sizeof(type) != sizeof(trace.field) ? \
  164. __bad_type_size() : \
  165. #type, #name, offsetof(typeof(trace), field), \
  166. sizeof(trace.field), is_signed_type(type)
  167. static int __init
  168. __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len)
  169. {
  170. int i;
  171. int pos = 0;
  172. /* When len=0, we just calculate the needed length */
  173. #define LEN_OR_ZERO (len ? len - pos : 0)
  174. pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
  175. for (i = 0; i < entry->nb_args; i++) {
  176. pos += snprintf(buf + pos, LEN_OR_ZERO, "%s: 0x%%0%zulx%s",
  177. entry->args[i], sizeof(unsigned long),
  178. i == entry->nb_args - 1 ? "" : ", ");
  179. }
  180. pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
  181. for (i = 0; i < entry->nb_args; i++) {
  182. pos += snprintf(buf + pos, LEN_OR_ZERO,
  183. ", ((unsigned long)(REC->%s))", entry->args[i]);
  184. }
  185. #undef LEN_OR_ZERO
  186. /* return the length of print_fmt */
  187. return pos;
  188. }
  189. static int __init set_syscall_print_fmt(struct trace_event_call *call)
  190. {
  191. char *print_fmt;
  192. int len;
  193. struct syscall_metadata *entry = call->data;
  194. if (entry->enter_event != call) {
  195. call->print_fmt = "\"0x%lx\", REC->ret";
  196. return 0;
  197. }
  198. /* First: called with 0 length to calculate the needed length */
  199. len = __set_enter_print_fmt(entry, NULL, 0);
  200. print_fmt = kmalloc(len + 1, GFP_KERNEL);
  201. if (!print_fmt)
  202. return -ENOMEM;
  203. /* Second: actually write the @print_fmt */
  204. __set_enter_print_fmt(entry, print_fmt, len + 1);
  205. call->print_fmt = print_fmt;
  206. return 0;
  207. }
  208. static void __init free_syscall_print_fmt(struct trace_event_call *call)
  209. {
  210. struct syscall_metadata *entry = call->data;
  211. if (entry->enter_event == call)
  212. kfree(call->print_fmt);
  213. }
  214. static int __init syscall_enter_define_fields(struct trace_event_call *call)
  215. {
  216. struct syscall_trace_enter trace;
  217. struct syscall_metadata *meta = call->data;
  218. int ret;
  219. int i;
  220. int offset = offsetof(typeof(trace), args);
  221. ret = trace_define_field(call, SYSCALL_FIELD(int, nr, __syscall_nr),
  222. FILTER_OTHER);
  223. if (ret)
  224. return ret;
  225. for (i = 0; i < meta->nb_args; i++) {
  226. ret = trace_define_field(call, meta->types[i],
  227. meta->args[i], offset,
  228. sizeof(unsigned long), 0,
  229. FILTER_OTHER);
  230. offset += sizeof(unsigned long);
  231. }
  232. return ret;
  233. }
  234. static int __init syscall_exit_define_fields(struct trace_event_call *call)
  235. {
  236. struct syscall_trace_exit trace;
  237. int ret;
  238. ret = trace_define_field(call, SYSCALL_FIELD(int, nr, __syscall_nr),
  239. FILTER_OTHER);
  240. if (ret)
  241. return ret;
  242. ret = trace_define_field(call, SYSCALL_FIELD(long, ret, ret),
  243. FILTER_OTHER);
  244. return ret;
  245. }
  246. static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
  247. {
  248. struct trace_array *tr = data;
  249. struct trace_event_file *trace_file;
  250. struct syscall_trace_enter *entry;
  251. struct syscall_metadata *sys_data;
  252. struct ring_buffer_event *event;
  253. struct ring_buffer *buffer;
  254. unsigned long irq_flags;
  255. int pc;
  256. int syscall_nr;
  257. int size;
  258. syscall_nr = trace_get_syscall_nr(current, regs);
  259. if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
  260. return;
  261. /* Here we're inside tp handler's rcu_read_lock_sched (__DO_TRACE) */
  262. trace_file = rcu_dereference_sched(tr->enter_syscall_files[syscall_nr]);
  263. if (!trace_file)
  264. return;
  265. if (trace_trigger_soft_disabled(trace_file))
  266. return;
  267. sys_data = syscall_nr_to_meta(syscall_nr);
  268. if (!sys_data)
  269. return;
  270. size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
  271. local_save_flags(irq_flags);
  272. pc = preempt_count();
  273. buffer = tr->trace_buffer.buffer;
  274. event = trace_buffer_lock_reserve(buffer,
  275. sys_data->enter_event->event.type, size, irq_flags, pc);
  276. if (!event)
  277. return;
  278. entry = ring_buffer_event_data(event);
  279. entry->nr = syscall_nr;
  280. syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args);
  281. event_trigger_unlock_commit(trace_file, buffer, event, entry,
  282. irq_flags, pc);
  283. }
  284. static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)
  285. {
  286. struct trace_array *tr = data;
  287. struct trace_event_file *trace_file;
  288. struct syscall_trace_exit *entry;
  289. struct syscall_metadata *sys_data;
  290. struct ring_buffer_event *event;
  291. struct ring_buffer *buffer;
  292. unsigned long irq_flags;
  293. int pc;
  294. int syscall_nr;
  295. syscall_nr = trace_get_syscall_nr(current, regs);
  296. if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
  297. return;
  298. /* Here we're inside tp handler's rcu_read_lock_sched (__DO_TRACE()) */
  299. trace_file = rcu_dereference_sched(tr->exit_syscall_files[syscall_nr]);
  300. if (!trace_file)
  301. return;
  302. if (trace_trigger_soft_disabled(trace_file))
  303. return;
  304. sys_data = syscall_nr_to_meta(syscall_nr);
  305. if (!sys_data)
  306. return;
  307. local_save_flags(irq_flags);
  308. pc = preempt_count();
  309. buffer = tr->trace_buffer.buffer;
  310. event = trace_buffer_lock_reserve(buffer,
  311. sys_data->exit_event->event.type, sizeof(*entry),
  312. irq_flags, pc);
  313. if (!event)
  314. return;
  315. entry = ring_buffer_event_data(event);
  316. entry->nr = syscall_nr;
  317. entry->ret = syscall_get_return_value(current, regs);
  318. event_trigger_unlock_commit(trace_file, buffer, event, entry,
  319. irq_flags, pc);
  320. }
  321. static int reg_event_syscall_enter(struct trace_event_file *file,
  322. struct trace_event_call *call)
  323. {
  324. struct trace_array *tr = file->tr;
  325. int ret = 0;
  326. int num;
  327. num = ((struct syscall_metadata *)call->data)->syscall_nr;
  328. if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
  329. return -ENOSYS;
  330. mutex_lock(&syscall_trace_lock);
  331. if (!tr->sys_refcount_enter)
  332. ret = register_trace_sys_enter(ftrace_syscall_enter, tr);
  333. if (!ret) {
  334. rcu_assign_pointer(tr->enter_syscall_files[num], file);
  335. tr->sys_refcount_enter++;
  336. }
  337. mutex_unlock(&syscall_trace_lock);
  338. return ret;
  339. }
  340. static void unreg_event_syscall_enter(struct trace_event_file *file,
  341. struct trace_event_call *call)
  342. {
  343. struct trace_array *tr = file->tr;
  344. int num;
  345. num = ((struct syscall_metadata *)call->data)->syscall_nr;
  346. if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
  347. return;
  348. mutex_lock(&syscall_trace_lock);
  349. tr->sys_refcount_enter--;
  350. RCU_INIT_POINTER(tr->enter_syscall_files[num], NULL);
  351. if (!tr->sys_refcount_enter)
  352. unregister_trace_sys_enter(ftrace_syscall_enter, tr);
  353. mutex_unlock(&syscall_trace_lock);
  354. }
  355. static int reg_event_syscall_exit(struct trace_event_file *file,
  356. struct trace_event_call *call)
  357. {
  358. struct trace_array *tr = file->tr;
  359. int ret = 0;
  360. int num;
  361. num = ((struct syscall_metadata *)call->data)->syscall_nr;
  362. if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
  363. return -ENOSYS;
  364. mutex_lock(&syscall_trace_lock);
  365. if (!tr->sys_refcount_exit)
  366. ret = register_trace_sys_exit(ftrace_syscall_exit, tr);
  367. if (!ret) {
  368. rcu_assign_pointer(tr->exit_syscall_files[num], file);
  369. tr->sys_refcount_exit++;
  370. }
  371. mutex_unlock(&syscall_trace_lock);
  372. return ret;
  373. }
  374. static void unreg_event_syscall_exit(struct trace_event_file *file,
  375. struct trace_event_call *call)
  376. {
  377. struct trace_array *tr = file->tr;
  378. int num;
  379. num = ((struct syscall_metadata *)call->data)->syscall_nr;
  380. if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
  381. return;
  382. mutex_lock(&syscall_trace_lock);
  383. tr->sys_refcount_exit--;
  384. RCU_INIT_POINTER(tr->exit_syscall_files[num], NULL);
  385. if (!tr->sys_refcount_exit)
  386. unregister_trace_sys_exit(ftrace_syscall_exit, tr);
  387. mutex_unlock(&syscall_trace_lock);
  388. }
  389. static int __init init_syscall_trace(struct trace_event_call *call)
  390. {
  391. int id;
  392. int num;
  393. num = ((struct syscall_metadata *)call->data)->syscall_nr;
  394. if (num < 0 || num >= NR_syscalls) {
  395. pr_debug("syscall %s metadata not mapped, disabling ftrace event\n",
  396. ((struct syscall_metadata *)call->data)->name);
  397. return -ENOSYS;
  398. }
  399. if (set_syscall_print_fmt(call) < 0)
  400. return -ENOMEM;
  401. id = trace_event_raw_init(call);
  402. if (id < 0) {
  403. free_syscall_print_fmt(call);
  404. return id;
  405. }
  406. return id;
  407. }
  408. struct trace_event_functions enter_syscall_print_funcs = {
  409. .trace = print_syscall_enter,
  410. };
  411. struct trace_event_functions exit_syscall_print_funcs = {
  412. .trace = print_syscall_exit,
  413. };
  414. struct trace_event_class __refdata event_class_syscall_enter = {
  415. .system = "syscalls",
  416. .reg = syscall_enter_register,
  417. .define_fields = syscall_enter_define_fields,
  418. .get_fields = syscall_get_enter_fields,
  419. .raw_init = init_syscall_trace,
  420. };
  421. struct trace_event_class __refdata event_class_syscall_exit = {
  422. .system = "syscalls",
  423. .reg = syscall_exit_register,
  424. .define_fields = syscall_exit_define_fields,
  425. .fields = LIST_HEAD_INIT(event_class_syscall_exit.fields),
  426. .raw_init = init_syscall_trace,
  427. };
  428. unsigned long __init __weak arch_syscall_addr(int nr)
  429. {
  430. return (unsigned long)sys_call_table[nr];
  431. }
  432. void __init init_ftrace_syscalls(void)
  433. {
  434. struct syscall_metadata *meta;
  435. unsigned long addr;
  436. int i;
  437. syscalls_metadata = kcalloc(NR_syscalls, sizeof(*syscalls_metadata),
  438. GFP_KERNEL);
  439. if (!syscalls_metadata) {
  440. WARN_ON(1);
  441. return;
  442. }
  443. for (i = 0; i < NR_syscalls; i++) {
  444. addr = arch_syscall_addr(i);
  445. meta = find_syscall_meta(addr);
  446. if (!meta)
  447. continue;
  448. meta->syscall_nr = i;
  449. syscalls_metadata[i] = meta;
  450. }
  451. }
  452. #ifdef CONFIG_PERF_EVENTS
  453. static DECLARE_BITMAP(enabled_perf_enter_syscalls, NR_syscalls);
  454. static DECLARE_BITMAP(enabled_perf_exit_syscalls, NR_syscalls);
  455. static int sys_perf_refcount_enter;
  456. static int sys_perf_refcount_exit;
  457. static int perf_call_bpf_enter(struct trace_event_call *call, struct pt_regs *regs,
  458. struct syscall_metadata *sys_data,
  459. struct syscall_trace_enter *rec)
  460. {
  461. struct syscall_tp_t {
  462. unsigned long long regs;
  463. unsigned long syscall_nr;
  464. unsigned long args[SYSCALL_DEFINE_MAXARGS];
  465. } param;
  466. int i;
  467. *(struct pt_regs **)&param = regs;
  468. param.syscall_nr = rec->nr;
  469. for (i = 0; i < sys_data->nb_args; i++)
  470. param.args[i] = rec->args[i];
  471. return trace_call_bpf(call, &param);
  472. }
  473. static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
  474. {
  475. struct syscall_metadata *sys_data;
  476. struct syscall_trace_enter *rec;
  477. struct hlist_head *head;
  478. bool valid_prog_array;
  479. int syscall_nr;
  480. int rctx;
  481. int size;
  482. syscall_nr = trace_get_syscall_nr(current, regs);
  483. if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
  484. return;
  485. if (!test_bit(syscall_nr, enabled_perf_enter_syscalls))
  486. return;
  487. sys_data = syscall_nr_to_meta(syscall_nr);
  488. if (!sys_data)
  489. return;
  490. head = this_cpu_ptr(sys_data->enter_event->perf_events);
  491. valid_prog_array = bpf_prog_array_valid(sys_data->enter_event);
  492. if (!valid_prog_array && hlist_empty(head))
  493. return;
  494. /* get the size after alignment with the u32 buffer size field */
  495. size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec);
  496. size = ALIGN(size + sizeof(u32), sizeof(u64));
  497. size -= sizeof(u32);
  498. rec = perf_trace_buf_alloc(size, NULL, &rctx);
  499. if (!rec)
  500. return;
  501. rec->nr = syscall_nr;
  502. syscall_get_arguments(current, regs, 0, sys_data->nb_args,
  503. (unsigned long *)&rec->args);
  504. if ((valid_prog_array &&
  505. !perf_call_bpf_enter(sys_data->enter_event, regs, sys_data, rec)) ||
  506. hlist_empty(head)) {
  507. perf_swevent_put_recursion_context(rctx);
  508. return;
  509. }
  510. perf_trace_buf_submit(rec, size, rctx,
  511. sys_data->enter_event->event.type, 1, regs,
  512. head, NULL, NULL);
  513. }
  514. static int perf_sysenter_enable(struct trace_event_call *call)
  515. {
  516. int ret = 0;
  517. int num;
  518. num = ((struct syscall_metadata *)call->data)->syscall_nr;
  519. mutex_lock(&syscall_trace_lock);
  520. if (!sys_perf_refcount_enter)
  521. ret = register_trace_sys_enter(perf_syscall_enter, NULL);
  522. if (ret) {
  523. pr_info("event trace: Could not activate syscall entry trace point");
  524. } else {
  525. set_bit(num, enabled_perf_enter_syscalls);
  526. sys_perf_refcount_enter++;
  527. }
  528. mutex_unlock(&syscall_trace_lock);
  529. return ret;
  530. }
  531. static void perf_sysenter_disable(struct trace_event_call *call)
  532. {
  533. int num;
  534. num = ((struct syscall_metadata *)call->data)->syscall_nr;
  535. mutex_lock(&syscall_trace_lock);
  536. sys_perf_refcount_enter--;
  537. clear_bit(num, enabled_perf_enter_syscalls);
  538. if (!sys_perf_refcount_enter)
  539. unregister_trace_sys_enter(perf_syscall_enter, NULL);
  540. mutex_unlock(&syscall_trace_lock);
  541. }
  542. static int perf_call_bpf_exit(struct trace_event_call *call, struct pt_regs *regs,
  543. struct syscall_trace_exit *rec)
  544. {
  545. struct syscall_tp_t {
  546. unsigned long long regs;
  547. unsigned long syscall_nr;
  548. unsigned long ret;
  549. } param;
  550. *(struct pt_regs **)&param = regs;
  551. param.syscall_nr = rec->nr;
  552. param.ret = rec->ret;
  553. return trace_call_bpf(call, &param);
  554. }
  555. static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
  556. {
  557. struct syscall_metadata *sys_data;
  558. struct syscall_trace_exit *rec;
  559. struct hlist_head *head;
  560. bool valid_prog_array;
  561. int syscall_nr;
  562. int rctx;
  563. int size;
  564. syscall_nr = trace_get_syscall_nr(current, regs);
  565. if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
  566. return;
  567. if (!test_bit(syscall_nr, enabled_perf_exit_syscalls))
  568. return;
  569. sys_data = syscall_nr_to_meta(syscall_nr);
  570. if (!sys_data)
  571. return;
  572. head = this_cpu_ptr(sys_data->exit_event->perf_events);
  573. valid_prog_array = bpf_prog_array_valid(sys_data->exit_event);
  574. if (!valid_prog_array && hlist_empty(head))
  575. return;
  576. /* We can probably do that at build time */
  577. size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64));
  578. size -= sizeof(u32);
  579. rec = perf_trace_buf_alloc(size, NULL, &rctx);
  580. if (!rec)
  581. return;
  582. rec->nr = syscall_nr;
  583. rec->ret = syscall_get_return_value(current, regs);
  584. if ((valid_prog_array &&
  585. !perf_call_bpf_exit(sys_data->exit_event, regs, rec)) ||
  586. hlist_empty(head)) {
  587. perf_swevent_put_recursion_context(rctx);
  588. return;
  589. }
  590. perf_trace_buf_submit(rec, size, rctx, sys_data->exit_event->event.type,
  591. 1, regs, head, NULL, NULL);
  592. }
  593. static int perf_sysexit_enable(struct trace_event_call *call)
  594. {
  595. int ret = 0;
  596. int num;
  597. num = ((struct syscall_metadata *)call->data)->syscall_nr;
  598. mutex_lock(&syscall_trace_lock);
  599. if (!sys_perf_refcount_exit)
  600. ret = register_trace_sys_exit(perf_syscall_exit, NULL);
  601. if (ret) {
  602. pr_info("event trace: Could not activate syscall exit trace point");
  603. } else {
  604. set_bit(num, enabled_perf_exit_syscalls);
  605. sys_perf_refcount_exit++;
  606. }
  607. mutex_unlock(&syscall_trace_lock);
  608. return ret;
  609. }
  610. static void perf_sysexit_disable(struct trace_event_call *call)
  611. {
  612. int num;
  613. num = ((struct syscall_metadata *)call->data)->syscall_nr;
  614. mutex_lock(&syscall_trace_lock);
  615. sys_perf_refcount_exit--;
  616. clear_bit(num, enabled_perf_exit_syscalls);
  617. if (!sys_perf_refcount_exit)
  618. unregister_trace_sys_exit(perf_syscall_exit, NULL);
  619. mutex_unlock(&syscall_trace_lock);
  620. }
  621. #endif /* CONFIG_PERF_EVENTS */
  622. static int syscall_enter_register(struct trace_event_call *event,
  623. enum trace_reg type, void *data)
  624. {
  625. struct trace_event_file *file = data;
  626. switch (type) {
  627. case TRACE_REG_REGISTER:
  628. return reg_event_syscall_enter(file, event);
  629. case TRACE_REG_UNREGISTER:
  630. unreg_event_syscall_enter(file, event);
  631. return 0;
  632. #ifdef CONFIG_PERF_EVENTS
  633. case TRACE_REG_PERF_REGISTER:
  634. return perf_sysenter_enable(event);
  635. case TRACE_REG_PERF_UNREGISTER:
  636. perf_sysenter_disable(event);
  637. return 0;
  638. case TRACE_REG_PERF_OPEN:
  639. case TRACE_REG_PERF_CLOSE:
  640. case TRACE_REG_PERF_ADD:
  641. case TRACE_REG_PERF_DEL:
  642. return 0;
  643. #endif
  644. }
  645. return 0;
  646. }
  647. static int syscall_exit_register(struct trace_event_call *event,
  648. enum trace_reg type, void *data)
  649. {
  650. struct trace_event_file *file = data;
  651. switch (type) {
  652. case TRACE_REG_REGISTER:
  653. return reg_event_syscall_exit(file, event);
  654. case TRACE_REG_UNREGISTER:
  655. unreg_event_syscall_exit(file, event);
  656. return 0;
  657. #ifdef CONFIG_PERF_EVENTS
  658. case TRACE_REG_PERF_REGISTER:
  659. return perf_sysexit_enable(event);
  660. case TRACE_REG_PERF_UNREGISTER:
  661. perf_sysexit_disable(event);
  662. return 0;
  663. case TRACE_REG_PERF_OPEN:
  664. case TRACE_REG_PERF_CLOSE:
  665. case TRACE_REG_PERF_ADD:
  666. case TRACE_REG_PERF_DEL:
  667. return 0;
  668. #endif
  669. }
  670. return 0;
  671. }