bpf_trace.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751
  1. /* Copyright (c) 2011-2015 PLUMgrid, http://plumgrid.com
  2. * Copyright (c) 2016 Facebook
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of version 2 of the GNU General Public
  6. * License as published by the Free Software Foundation.
  7. */
  8. #include <linux/kernel.h>
  9. #include <linux/types.h>
  10. #include <linux/slab.h>
  11. #include <linux/bpf.h>
  12. #include <linux/bpf_perf_event.h>
  13. #include <linux/filter.h>
  14. #include <linux/uaccess.h>
  15. #include <linux/ctype.h>
  16. #include "trace.h"
  17. /**
  18. * trace_call_bpf - invoke BPF program
  19. * @call: tracepoint event
  20. * @ctx: opaque context pointer
  21. *
  22. * kprobe handlers execute BPF programs via this helper.
  23. * Can be used from static tracepoints in the future.
  24. *
  25. * Return: BPF programs always return an integer which is interpreted by
  26. * kprobe handler as:
  27. * 0 - return from kprobe (event is filtered out)
  28. * 1 - store kprobe event into ring buffer
  29. * Other values are reserved and currently alias to 1
  30. */
  31. unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
  32. {
  33. unsigned int ret;
  34. if (in_nmi()) /* not supported yet */
  35. return 1;
  36. preempt_disable();
  37. if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) {
  38. /*
  39. * since some bpf program is already running on this cpu,
  40. * don't call into another bpf program (same or different)
  41. * and don't send kprobe event into ring-buffer,
  42. * so return zero here
  43. */
  44. ret = 0;
  45. goto out;
  46. }
  47. /*
  48. * Instead of moving rcu_read_lock/rcu_dereference/rcu_read_unlock
  49. * to all call sites, we did a bpf_prog_array_valid() there to check
  50. * whether call->prog_array is empty or not, which is
  51. * a heurisitc to speed up execution.
  52. *
  53. * If bpf_prog_array_valid() fetched prog_array was
  54. * non-NULL, we go into trace_call_bpf() and do the actual
  55. * proper rcu_dereference() under RCU lock.
  56. * If it turns out that prog_array is NULL then, we bail out.
  57. * For the opposite, if the bpf_prog_array_valid() fetched pointer
  58. * was NULL, you'll skip the prog_array with the risk of missing
  59. * out of events when it was updated in between this and the
  60. * rcu_dereference() which is accepted risk.
  61. */
  62. ret = BPF_PROG_RUN_ARRAY_CHECK(call->prog_array, ctx, BPF_PROG_RUN);
  63. out:
  64. __this_cpu_dec(bpf_prog_active);
  65. preempt_enable();
  66. return ret;
  67. }
  68. EXPORT_SYMBOL_GPL(trace_call_bpf);
  69. BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr)
  70. {
  71. int ret;
  72. ret = probe_kernel_read(dst, unsafe_ptr, size);
  73. if (unlikely(ret < 0))
  74. memset(dst, 0, size);
  75. return ret;
  76. }
  77. static const struct bpf_func_proto bpf_probe_read_proto = {
  78. .func = bpf_probe_read,
  79. .gpl_only = true,
  80. .ret_type = RET_INTEGER,
  81. .arg1_type = ARG_PTR_TO_UNINIT_MEM,
  82. .arg2_type = ARG_CONST_SIZE,
  83. .arg3_type = ARG_ANYTHING,
  84. };
  85. BPF_CALL_3(bpf_probe_write_user, void *, unsafe_ptr, const void *, src,
  86. u32, size)
  87. {
  88. /*
  89. * Ensure we're in user context which is safe for the helper to
  90. * run. This helper has no business in a kthread.
  91. *
  92. * access_ok() should prevent writing to non-user memory, but in
  93. * some situations (nommu, temporary switch, etc) access_ok() does
  94. * not provide enough validation, hence the check on KERNEL_DS.
  95. */
  96. if (unlikely(in_interrupt() ||
  97. current->flags & (PF_KTHREAD | PF_EXITING)))
  98. return -EPERM;
  99. if (unlikely(uaccess_kernel()))
  100. return -EPERM;
  101. if (!access_ok(VERIFY_WRITE, unsafe_ptr, size))
  102. return -EPERM;
  103. return probe_kernel_write(unsafe_ptr, src, size);
  104. }
  105. static const struct bpf_func_proto bpf_probe_write_user_proto = {
  106. .func = bpf_probe_write_user,
  107. .gpl_only = true,
  108. .ret_type = RET_INTEGER,
  109. .arg1_type = ARG_ANYTHING,
  110. .arg2_type = ARG_PTR_TO_MEM,
  111. .arg3_type = ARG_CONST_SIZE,
  112. };
  113. static const struct bpf_func_proto *bpf_get_probe_write_proto(void)
  114. {
  115. pr_warn_ratelimited("%s[%d] is installing a program with bpf_probe_write_user helper that may corrupt user memory!",
  116. current->comm, task_pid_nr(current));
  117. return &bpf_probe_write_user_proto;
  118. }
  119. /*
  120. * Only limited trace_printk() conversion specifiers allowed:
  121. * %d %i %u %x %ld %li %lu %lx %lld %lli %llu %llx %p %s
  122. */
  123. BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
  124. u64, arg2, u64, arg3)
  125. {
  126. bool str_seen = false;
  127. int mod[3] = {};
  128. int fmt_cnt = 0;
  129. u64 unsafe_addr;
  130. char buf[64];
  131. int i;
  132. /*
  133. * bpf_check()->check_func_arg()->check_stack_boundary()
  134. * guarantees that fmt points to bpf program stack,
  135. * fmt_size bytes of it were initialized and fmt_size > 0
  136. */
  137. if (fmt[--fmt_size] != 0)
  138. return -EINVAL;
  139. /* check format string for allowed specifiers */
  140. for (i = 0; i < fmt_size; i++) {
  141. if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i]))
  142. return -EINVAL;
  143. if (fmt[i] != '%')
  144. continue;
  145. if (fmt_cnt >= 3)
  146. return -EINVAL;
  147. /* fmt[i] != 0 && fmt[last] == 0, so we can access fmt[i + 1] */
  148. i++;
  149. if (fmt[i] == 'l') {
  150. mod[fmt_cnt]++;
  151. i++;
  152. } else if (fmt[i] == 'p' || fmt[i] == 's') {
  153. mod[fmt_cnt]++;
  154. /* disallow any further format extensions */
  155. if (fmt[i + 1] != 0 &&
  156. !isspace(fmt[i + 1]) &&
  157. !ispunct(fmt[i + 1]))
  158. return -EINVAL;
  159. fmt_cnt++;
  160. if (fmt[i] == 's') {
  161. if (str_seen)
  162. /* allow only one '%s' per fmt string */
  163. return -EINVAL;
  164. str_seen = true;
  165. switch (fmt_cnt) {
  166. case 1:
  167. unsafe_addr = arg1;
  168. arg1 = (long) buf;
  169. break;
  170. case 2:
  171. unsafe_addr = arg2;
  172. arg2 = (long) buf;
  173. break;
  174. case 3:
  175. unsafe_addr = arg3;
  176. arg3 = (long) buf;
  177. break;
  178. }
  179. buf[0] = 0;
  180. strncpy_from_unsafe(buf,
  181. (void *) (long) unsafe_addr,
  182. sizeof(buf));
  183. }
  184. continue;
  185. }
  186. if (fmt[i] == 'l') {
  187. mod[fmt_cnt]++;
  188. i++;
  189. }
  190. if (fmt[i] != 'i' && fmt[i] != 'd' &&
  191. fmt[i] != 'u' && fmt[i] != 'x')
  192. return -EINVAL;
  193. fmt_cnt++;
  194. }
  195. /* Horrid workaround for getting va_list handling working with different
  196. * argument type combinations generically for 32 and 64 bit archs.
  197. */
  198. #define __BPF_TP_EMIT() __BPF_ARG3_TP()
  199. #define __BPF_TP(...) \
  200. __trace_printk(1 /* Fake ip will not be printed. */, \
  201. fmt, ##__VA_ARGS__)
  202. #define __BPF_ARG1_TP(...) \
  203. ((mod[0] == 2 || (mod[0] == 1 && __BITS_PER_LONG == 64)) \
  204. ? __BPF_TP(arg1, ##__VA_ARGS__) \
  205. : ((mod[0] == 1 || (mod[0] == 0 && __BITS_PER_LONG == 32)) \
  206. ? __BPF_TP((long)arg1, ##__VA_ARGS__) \
  207. : __BPF_TP((u32)arg1, ##__VA_ARGS__)))
  208. #define __BPF_ARG2_TP(...) \
  209. ((mod[1] == 2 || (mod[1] == 1 && __BITS_PER_LONG == 64)) \
  210. ? __BPF_ARG1_TP(arg2, ##__VA_ARGS__) \
  211. : ((mod[1] == 1 || (mod[1] == 0 && __BITS_PER_LONG == 32)) \
  212. ? __BPF_ARG1_TP((long)arg2, ##__VA_ARGS__) \
  213. : __BPF_ARG1_TP((u32)arg2, ##__VA_ARGS__)))
  214. #define __BPF_ARG3_TP(...) \
  215. ((mod[2] == 2 || (mod[2] == 1 && __BITS_PER_LONG == 64)) \
  216. ? __BPF_ARG2_TP(arg3, ##__VA_ARGS__) \
  217. : ((mod[2] == 1 || (mod[2] == 0 && __BITS_PER_LONG == 32)) \
  218. ? __BPF_ARG2_TP((long)arg3, ##__VA_ARGS__) \
  219. : __BPF_ARG2_TP((u32)arg3, ##__VA_ARGS__)))
  220. return __BPF_TP_EMIT();
  221. }
  222. static const struct bpf_func_proto bpf_trace_printk_proto = {
  223. .func = bpf_trace_printk,
  224. .gpl_only = true,
  225. .ret_type = RET_INTEGER,
  226. .arg1_type = ARG_PTR_TO_MEM,
  227. .arg2_type = ARG_CONST_SIZE,
  228. };
  229. const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
  230. {
  231. /*
  232. * this program might be calling bpf_trace_printk,
  233. * so allocate per-cpu printk buffers
  234. */
  235. trace_printk_init_buffers();
  236. return &bpf_trace_printk_proto;
  237. }
  238. BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map, u64, flags)
  239. {
  240. struct bpf_array *array = container_of(map, struct bpf_array, map);
  241. unsigned int cpu = smp_processor_id();
  242. u64 index = flags & BPF_F_INDEX_MASK;
  243. struct bpf_event_entry *ee;
  244. u64 value = 0;
  245. int err;
  246. if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
  247. return -EINVAL;
  248. if (index == BPF_F_CURRENT_CPU)
  249. index = cpu;
  250. if (unlikely(index >= array->map.max_entries))
  251. return -E2BIG;
  252. ee = READ_ONCE(array->ptrs[index]);
  253. if (!ee)
  254. return -ENOENT;
  255. err = perf_event_read_local(ee->event, &value);
  256. /*
  257. * this api is ugly since we miss [-22..-2] range of valid
  258. * counter values, but that's uapi
  259. */
  260. if (err)
  261. return err;
  262. return value;
  263. }
  264. static const struct bpf_func_proto bpf_perf_event_read_proto = {
  265. .func = bpf_perf_event_read,
  266. .gpl_only = true,
  267. .ret_type = RET_INTEGER,
  268. .arg1_type = ARG_CONST_MAP_PTR,
  269. .arg2_type = ARG_ANYTHING,
  270. };
  271. static DEFINE_PER_CPU(struct perf_sample_data, bpf_trace_sd);
  272. static __always_inline u64
  273. __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
  274. u64 flags, struct perf_sample_data *sd)
  275. {
  276. struct bpf_array *array = container_of(map, struct bpf_array, map);
  277. unsigned int cpu = smp_processor_id();
  278. u64 index = flags & BPF_F_INDEX_MASK;
  279. struct bpf_event_entry *ee;
  280. struct perf_event *event;
  281. if (index == BPF_F_CURRENT_CPU)
  282. index = cpu;
  283. if (unlikely(index >= array->map.max_entries))
  284. return -E2BIG;
  285. ee = READ_ONCE(array->ptrs[index]);
  286. if (!ee)
  287. return -ENOENT;
  288. event = ee->event;
  289. if (unlikely(event->attr.type != PERF_TYPE_SOFTWARE ||
  290. event->attr.config != PERF_COUNT_SW_BPF_OUTPUT))
  291. return -EINVAL;
  292. if (unlikely(event->oncpu != cpu))
  293. return -EOPNOTSUPP;
  294. perf_event_output(event, sd, regs);
  295. return 0;
  296. }
  297. BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,
  298. u64, flags, void *, data, u64, size)
  299. {
  300. struct perf_sample_data *sd = this_cpu_ptr(&bpf_trace_sd);
  301. struct perf_raw_record raw = {
  302. .frag = {
  303. .size = size,
  304. .data = data,
  305. },
  306. };
  307. if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
  308. return -EINVAL;
  309. perf_sample_data_init(sd, 0, 0);
  310. sd->raw = &raw;
  311. return __bpf_perf_event_output(regs, map, flags, sd);
  312. }
  313. static const struct bpf_func_proto bpf_perf_event_output_proto = {
  314. .func = bpf_perf_event_output,
  315. .gpl_only = true,
  316. .ret_type = RET_INTEGER,
  317. .arg1_type = ARG_PTR_TO_CTX,
  318. .arg2_type = ARG_CONST_MAP_PTR,
  319. .arg3_type = ARG_ANYTHING,
  320. .arg4_type = ARG_PTR_TO_MEM,
  321. .arg5_type = ARG_CONST_SIZE,
  322. };
  323. static DEFINE_PER_CPU(struct pt_regs, bpf_pt_regs);
  324. static DEFINE_PER_CPU(struct perf_sample_data, bpf_misc_sd);
  325. u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
  326. void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy)
  327. {
  328. struct perf_sample_data *sd = this_cpu_ptr(&bpf_misc_sd);
  329. struct pt_regs *regs = this_cpu_ptr(&bpf_pt_regs);
  330. struct perf_raw_frag frag = {
  331. .copy = ctx_copy,
  332. .size = ctx_size,
  333. .data = ctx,
  334. };
  335. struct perf_raw_record raw = {
  336. .frag = {
  337. {
  338. .next = ctx_size ? &frag : NULL,
  339. },
  340. .size = meta_size,
  341. .data = meta,
  342. },
  343. };
  344. perf_fetch_caller_regs(regs);
  345. perf_sample_data_init(sd, 0, 0);
  346. sd->raw = &raw;
  347. return __bpf_perf_event_output(regs, map, flags, sd);
  348. }
  349. BPF_CALL_0(bpf_get_current_task)
  350. {
  351. return (long) current;
  352. }
  353. static const struct bpf_func_proto bpf_get_current_task_proto = {
  354. .func = bpf_get_current_task,
  355. .gpl_only = true,
  356. .ret_type = RET_INTEGER,
  357. };
  358. BPF_CALL_2(bpf_current_task_under_cgroup, struct bpf_map *, map, u32, idx)
  359. {
  360. struct bpf_array *array = container_of(map, struct bpf_array, map);
  361. struct cgroup *cgrp;
  362. if (unlikely(in_interrupt()))
  363. return -EINVAL;
  364. if (unlikely(idx >= array->map.max_entries))
  365. return -E2BIG;
  366. cgrp = READ_ONCE(array->ptrs[idx]);
  367. if (unlikely(!cgrp))
  368. return -EAGAIN;
  369. return task_under_cgroup_hierarchy(current, cgrp);
  370. }
  371. static const struct bpf_func_proto bpf_current_task_under_cgroup_proto = {
  372. .func = bpf_current_task_under_cgroup,
  373. .gpl_only = false,
  374. .ret_type = RET_INTEGER,
  375. .arg1_type = ARG_CONST_MAP_PTR,
  376. .arg2_type = ARG_ANYTHING,
  377. };
  378. BPF_CALL_3(bpf_probe_read_str, void *, dst, u32, size,
  379. const void *, unsafe_ptr)
  380. {
  381. int ret;
  382. /*
  383. * The strncpy_from_unsafe() call will likely not fill the entire
  384. * buffer, but that's okay in this circumstance as we're probing
  385. * arbitrary memory anyway similar to bpf_probe_read() and might
  386. * as well probe the stack. Thus, memory is explicitly cleared
  387. * only in error case, so that improper users ignoring return
  388. * code altogether don't copy garbage; otherwise length of string
  389. * is returned that can be used for bpf_perf_event_output() et al.
  390. */
  391. ret = strncpy_from_unsafe(dst, unsafe_ptr, size);
  392. if (unlikely(ret < 0))
  393. memset(dst, 0, size);
  394. return ret;
  395. }
  396. static const struct bpf_func_proto bpf_probe_read_str_proto = {
  397. .func = bpf_probe_read_str,
  398. .gpl_only = true,
  399. .ret_type = RET_INTEGER,
  400. .arg1_type = ARG_PTR_TO_UNINIT_MEM,
  401. .arg2_type = ARG_CONST_SIZE,
  402. .arg3_type = ARG_ANYTHING,
  403. };
  404. static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id)
  405. {
  406. switch (func_id) {
  407. case BPF_FUNC_map_lookup_elem:
  408. return &bpf_map_lookup_elem_proto;
  409. case BPF_FUNC_map_update_elem:
  410. return &bpf_map_update_elem_proto;
  411. case BPF_FUNC_map_delete_elem:
  412. return &bpf_map_delete_elem_proto;
  413. case BPF_FUNC_probe_read:
  414. return &bpf_probe_read_proto;
  415. case BPF_FUNC_ktime_get_ns:
  416. return &bpf_ktime_get_ns_proto;
  417. case BPF_FUNC_ktime_get_boot_ns:
  418. return &bpf_ktime_get_boot_ns_proto;
  419. case BPF_FUNC_tail_call:
  420. return &bpf_tail_call_proto;
  421. case BPF_FUNC_get_current_pid_tgid:
  422. return &bpf_get_current_pid_tgid_proto;
  423. case BPF_FUNC_get_current_task:
  424. return &bpf_get_current_task_proto;
  425. case BPF_FUNC_get_current_uid_gid:
  426. return &bpf_get_current_uid_gid_proto;
  427. case BPF_FUNC_get_current_comm:
  428. return &bpf_get_current_comm_proto;
  429. case BPF_FUNC_trace_printk:
  430. return bpf_get_trace_printk_proto();
  431. case BPF_FUNC_get_smp_processor_id:
  432. return &bpf_get_smp_processor_id_proto;
  433. case BPF_FUNC_get_numa_node_id:
  434. return &bpf_get_numa_node_id_proto;
  435. case BPF_FUNC_perf_event_read:
  436. return &bpf_perf_event_read_proto;
  437. case BPF_FUNC_probe_write_user:
  438. return bpf_get_probe_write_proto();
  439. case BPF_FUNC_current_task_under_cgroup:
  440. return &bpf_current_task_under_cgroup_proto;
  441. case BPF_FUNC_get_prandom_u32:
  442. return &bpf_get_prandom_u32_proto;
  443. case BPF_FUNC_probe_read_str:
  444. return &bpf_probe_read_str_proto;
  445. default:
  446. return NULL;
  447. }
  448. }
  449. static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func_id)
  450. {
  451. switch (func_id) {
  452. case BPF_FUNC_perf_event_output:
  453. return &bpf_perf_event_output_proto;
  454. case BPF_FUNC_get_stackid:
  455. return &bpf_get_stackid_proto;
  456. default:
  457. return tracing_func_proto(func_id);
  458. }
  459. }
  460. /* bpf+kprobe programs can access fields of 'struct pt_regs' */
  461. static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
  462. struct bpf_insn_access_aux *info)
  463. {
  464. if (off < 0 || off >= sizeof(struct pt_regs))
  465. return false;
  466. if (type != BPF_READ)
  467. return false;
  468. if (off % size != 0)
  469. return false;
  470. /*
  471. * Assertion for 32 bit to make sure last 8 byte access
  472. * (BPF_DW) to the last 4 byte member is disallowed.
  473. */
  474. if (off + size > sizeof(struct pt_regs))
  475. return false;
  476. return true;
  477. }
  478. const struct bpf_verifier_ops kprobe_prog_ops = {
  479. .get_func_proto = kprobe_prog_func_proto,
  480. .is_valid_access = kprobe_prog_is_valid_access,
  481. };
  482. BPF_CALL_5(bpf_perf_event_output_tp, void *, tp_buff, struct bpf_map *, map,
  483. u64, flags, void *, data, u64, size)
  484. {
  485. struct pt_regs *regs = *(struct pt_regs **)tp_buff;
  486. /*
  487. * r1 points to perf tracepoint buffer where first 8 bytes are hidden
  488. * from bpf program and contain a pointer to 'struct pt_regs'. Fetch it
  489. * from there and call the same bpf_perf_event_output() helper inline.
  490. */
  491. return ____bpf_perf_event_output(regs, map, flags, data, size);
  492. }
  493. static const struct bpf_func_proto bpf_perf_event_output_proto_tp = {
  494. .func = bpf_perf_event_output_tp,
  495. .gpl_only = true,
  496. .ret_type = RET_INTEGER,
  497. .arg1_type = ARG_PTR_TO_CTX,
  498. .arg2_type = ARG_CONST_MAP_PTR,
  499. .arg3_type = ARG_ANYTHING,
  500. .arg4_type = ARG_PTR_TO_MEM,
  501. .arg5_type = ARG_CONST_SIZE,
  502. };
  503. BPF_CALL_3(bpf_get_stackid_tp, void *, tp_buff, struct bpf_map *, map,
  504. u64, flags)
  505. {
  506. struct pt_regs *regs = *(struct pt_regs **)tp_buff;
  507. /*
  508. * Same comment as in bpf_perf_event_output_tp(), only that this time
  509. * the other helper's function body cannot be inlined due to being
  510. * external, thus we need to call raw helper function.
  511. */
  512. return bpf_get_stackid((unsigned long) regs, (unsigned long) map,
  513. flags, 0, 0);
  514. }
  515. static const struct bpf_func_proto bpf_get_stackid_proto_tp = {
  516. .func = bpf_get_stackid_tp,
  517. .gpl_only = true,
  518. .ret_type = RET_INTEGER,
  519. .arg1_type = ARG_PTR_TO_CTX,
  520. .arg2_type = ARG_CONST_MAP_PTR,
  521. .arg3_type = ARG_ANYTHING,
  522. };
  523. static const struct bpf_func_proto *tp_prog_func_proto(enum bpf_func_id func_id)
  524. {
  525. switch (func_id) {
  526. case BPF_FUNC_perf_event_output:
  527. return &bpf_perf_event_output_proto_tp;
  528. case BPF_FUNC_get_stackid:
  529. return &bpf_get_stackid_proto_tp;
  530. default:
  531. return tracing_func_proto(func_id);
  532. }
  533. }
  534. static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type,
  535. struct bpf_insn_access_aux *info)
  536. {
  537. if (off < sizeof(void *) || off >= PERF_MAX_TRACE_SIZE)
  538. return false;
  539. if (type != BPF_READ)
  540. return false;
  541. if (off % size != 0)
  542. return false;
  543. BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(__u64));
  544. return true;
  545. }
  546. const struct bpf_verifier_ops tracepoint_prog_ops = {
  547. .get_func_proto = tp_prog_func_proto,
  548. .is_valid_access = tp_prog_is_valid_access,
  549. };
  550. static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
  551. struct bpf_insn_access_aux *info)
  552. {
  553. const int size_sp = FIELD_SIZEOF(struct bpf_perf_event_data,
  554. sample_period);
  555. if (off < 0 || off >= sizeof(struct bpf_perf_event_data))
  556. return false;
  557. if (type != BPF_READ)
  558. return false;
  559. if (off % size != 0)
  560. return false;
  561. switch (off) {
  562. case bpf_ctx_range(struct bpf_perf_event_data, sample_period):
  563. bpf_ctx_record_field_size(info, size_sp);
  564. if (!bpf_ctx_narrow_access_ok(off, size, size_sp))
  565. return false;
  566. break;
  567. default:
  568. if (size != sizeof(long))
  569. return false;
  570. }
  571. return true;
  572. }
  573. static u32 pe_prog_convert_ctx_access(enum bpf_access_type type,
  574. const struct bpf_insn *si,
  575. struct bpf_insn *insn_buf,
  576. struct bpf_prog *prog, u32 *target_size)
  577. {
  578. struct bpf_insn *insn = insn_buf;
  579. switch (si->off) {
  580. case offsetof(struct bpf_perf_event_data, sample_period):
  581. *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
  582. data), si->dst_reg, si->src_reg,
  583. offsetof(struct bpf_perf_event_data_kern, data));
  584. *insn++ = BPF_LDX_MEM(BPF_DW, si->dst_reg, si->dst_reg,
  585. bpf_target_off(struct perf_sample_data, period, 8,
  586. target_size));
  587. break;
  588. default:
  589. *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
  590. regs), si->dst_reg, si->src_reg,
  591. offsetof(struct bpf_perf_event_data_kern, regs));
  592. *insn++ = BPF_LDX_MEM(BPF_SIZEOF(long), si->dst_reg, si->dst_reg,
  593. si->off);
  594. break;
  595. }
  596. return insn - insn_buf;
  597. }
  598. const struct bpf_verifier_ops perf_event_prog_ops = {
  599. .get_func_proto = tp_prog_func_proto,
  600. .is_valid_access = pe_prog_is_valid_access,
  601. .convert_ctx_access = pe_prog_convert_ctx_access,
  602. };
  603. static DEFINE_MUTEX(bpf_event_mutex);
  604. int perf_event_attach_bpf_prog(struct perf_event *event,
  605. struct bpf_prog *prog)
  606. {
  607. struct bpf_prog_array __rcu *old_array;
  608. struct bpf_prog_array *new_array;
  609. int ret = -EEXIST;
  610. mutex_lock(&bpf_event_mutex);
  611. if (event->prog)
  612. goto out;
  613. old_array = rcu_dereference_protected(event->tp_event->prog_array,
  614. lockdep_is_held(&bpf_event_mutex));
  615. ret = bpf_prog_array_copy(old_array, NULL, prog, &new_array);
  616. if (ret < 0)
  617. goto out;
  618. /* set the new array to event->tp_event and set event->prog */
  619. event->prog = prog;
  620. rcu_assign_pointer(event->tp_event->prog_array, new_array);
  621. bpf_prog_array_free(old_array);
  622. out:
  623. mutex_unlock(&bpf_event_mutex);
  624. return ret;
  625. }
  626. void perf_event_detach_bpf_prog(struct perf_event *event)
  627. {
  628. struct bpf_prog_array __rcu *old_array;
  629. struct bpf_prog_array *new_array;
  630. int ret;
  631. mutex_lock(&bpf_event_mutex);
  632. if (!event->prog)
  633. goto out;
  634. old_array = rcu_dereference_protected(event->tp_event->prog_array,
  635. lockdep_is_held(&bpf_event_mutex));
  636. ret = bpf_prog_array_copy(old_array, event->prog, NULL, &new_array);
  637. if (ret < 0) {
  638. bpf_prog_array_delete_safe(old_array, event->prog);
  639. } else {
  640. rcu_assign_pointer(event->tp_event->prog_array, new_array);
  641. bpf_prog_array_free(old_array);
  642. }
  643. bpf_prog_put(event->prog);
  644. event->prog = NULL;
  645. out:
  646. mutex_unlock(&bpf_event_mutex);
  647. }