thread-stack.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626
  1. /*
  2. * thread-stack.c: Synthesize a thread's stack using call / return events
  3. * Copyright (c) 2014, Intel Corporation.
  4. *
  5. * This program is free software; you can redistribute it and/or modify it
  6. * under the terms and conditions of the GNU General Public License,
  7. * version 2, as published by the Free Software Foundation.
  8. *
  9. * This program is distributed in the hope it will be useful, but WITHOUT
  10. * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11. * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  12. * more details.
  13. *
  14. */
  15. #include <linux/rbtree.h>
  16. #include <linux/list.h>
  17. #include "thread.h"
  18. #include "event.h"
  19. #include "machine.h"
  20. #include "util.h"
  21. #include "debug.h"
  22. #include "symbol.h"
  23. #include "comm.h"
  24. #include "call-path.h"
  25. #include "thread-stack.h"
  26. #define STACK_GROWTH 2048
  27. /**
  28. * struct thread_stack_entry - thread stack entry.
  29. * @ret_addr: return address
  30. * @timestamp: timestamp (if known)
  31. * @ref: external reference (e.g. db_id of sample)
  32. * @branch_count: the branch count when the entry was created
  33. * @cp: call path
  34. * @no_call: a 'call' was not seen
  35. */
  36. struct thread_stack_entry {
  37. u64 ret_addr;
  38. u64 timestamp;
  39. u64 ref;
  40. u64 branch_count;
  41. struct call_path *cp;
  42. bool no_call;
  43. };
  44. /**
  45. * struct thread_stack - thread stack constructed from 'call' and 'return'
  46. * branch samples.
  47. * @stack: array that holds the stack
  48. * @cnt: number of entries in the stack
  49. * @sz: current maximum stack size
  50. * @trace_nr: current trace number
  51. * @branch_count: running branch count
  52. * @kernel_start: kernel start address
  53. * @last_time: last timestamp
  54. * @crp: call/return processor
  55. * @comm: current comm
  56. */
  57. struct thread_stack {
  58. struct thread_stack_entry *stack;
  59. size_t cnt;
  60. size_t sz;
  61. u64 trace_nr;
  62. u64 branch_count;
  63. u64 kernel_start;
  64. u64 last_time;
  65. struct call_return_processor *crp;
  66. struct comm *comm;
  67. };
  68. static int thread_stack__grow(struct thread_stack *ts)
  69. {
  70. struct thread_stack_entry *new_stack;
  71. size_t sz, new_sz;
  72. new_sz = ts->sz + STACK_GROWTH;
  73. sz = new_sz * sizeof(struct thread_stack_entry);
  74. new_stack = realloc(ts->stack, sz);
  75. if (!new_stack)
  76. return -ENOMEM;
  77. ts->stack = new_stack;
  78. ts->sz = new_sz;
  79. return 0;
  80. }
  81. static struct thread_stack *thread_stack__new(struct thread *thread,
  82. struct call_return_processor *crp)
  83. {
  84. struct thread_stack *ts;
  85. ts = zalloc(sizeof(struct thread_stack));
  86. if (!ts)
  87. return NULL;
  88. if (thread_stack__grow(ts)) {
  89. free(ts);
  90. return NULL;
  91. }
  92. if (thread->mg && thread->mg->machine)
  93. ts->kernel_start = machine__kernel_start(thread->mg->machine);
  94. else
  95. ts->kernel_start = 1ULL << 63;
  96. ts->crp = crp;
  97. return ts;
  98. }
  99. static int thread_stack__push(struct thread_stack *ts, u64 ret_addr)
  100. {
  101. int err = 0;
  102. if (ts->cnt == ts->sz) {
  103. err = thread_stack__grow(ts);
  104. if (err) {
  105. pr_warning("Out of memory: discarding thread stack\n");
  106. ts->cnt = 0;
  107. }
  108. }
  109. ts->stack[ts->cnt++].ret_addr = ret_addr;
  110. return err;
  111. }
  112. static void thread_stack__pop(struct thread_stack *ts, u64 ret_addr)
  113. {
  114. size_t i;
  115. /*
  116. * In some cases there may be functions which are not seen to return.
  117. * For example when setjmp / longjmp has been used. Or the perf context
  118. * switch in the kernel which doesn't stop and start tracing in exactly
  119. * the same code path. When that happens the return address will be
  120. * further down the stack. If the return address is not found at all,
  121. * we assume the opposite (i.e. this is a return for a call that wasn't
  122. * seen for some reason) and leave the stack alone.
  123. */
  124. for (i = ts->cnt; i; ) {
  125. if (ts->stack[--i].ret_addr == ret_addr) {
  126. ts->cnt = i;
  127. return;
  128. }
  129. }
  130. }
  131. static bool thread_stack__in_kernel(struct thread_stack *ts)
  132. {
  133. if (!ts->cnt)
  134. return false;
  135. return ts->stack[ts->cnt - 1].cp->in_kernel;
  136. }
  137. static int thread_stack__call_return(struct thread *thread,
  138. struct thread_stack *ts, size_t idx,
  139. u64 timestamp, u64 ref, bool no_return)
  140. {
  141. struct call_return_processor *crp = ts->crp;
  142. struct thread_stack_entry *tse;
  143. struct call_return cr = {
  144. .thread = thread,
  145. .comm = ts->comm,
  146. .db_id = 0,
  147. };
  148. tse = &ts->stack[idx];
  149. cr.cp = tse->cp;
  150. cr.call_time = tse->timestamp;
  151. cr.return_time = timestamp;
  152. cr.branch_count = ts->branch_count - tse->branch_count;
  153. cr.call_ref = tse->ref;
  154. cr.return_ref = ref;
  155. if (tse->no_call)
  156. cr.flags |= CALL_RETURN_NO_CALL;
  157. if (no_return)
  158. cr.flags |= CALL_RETURN_NO_RETURN;
  159. return crp->process(&cr, crp->data);
  160. }
  161. static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts)
  162. {
  163. struct call_return_processor *crp = ts->crp;
  164. int err;
  165. if (!crp) {
  166. ts->cnt = 0;
  167. return 0;
  168. }
  169. while (ts->cnt) {
  170. err = thread_stack__call_return(thread, ts, --ts->cnt,
  171. ts->last_time, 0, true);
  172. if (err) {
  173. pr_err("Error flushing thread stack!\n");
  174. ts->cnt = 0;
  175. return err;
  176. }
  177. }
  178. return 0;
  179. }
  180. int thread_stack__flush(struct thread *thread)
  181. {
  182. if (thread->ts)
  183. return __thread_stack__flush(thread, thread->ts);
  184. return 0;
  185. }
  186. int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
  187. u64 to_ip, u16 insn_len, u64 trace_nr)
  188. {
  189. if (!thread)
  190. return -EINVAL;
  191. if (!thread->ts) {
  192. thread->ts = thread_stack__new(thread, NULL);
  193. if (!thread->ts) {
  194. pr_warning("Out of memory: no thread stack\n");
  195. return -ENOMEM;
  196. }
  197. thread->ts->trace_nr = trace_nr;
  198. }
  199. /*
  200. * When the trace is discontinuous, the trace_nr changes. In that case
  201. * the stack might be completely invalid. Better to report nothing than
  202. * to report something misleading, so flush the stack.
  203. */
  204. if (trace_nr != thread->ts->trace_nr) {
  205. if (thread->ts->trace_nr)
  206. __thread_stack__flush(thread, thread->ts);
  207. thread->ts->trace_nr = trace_nr;
  208. }
  209. /* Stop here if thread_stack__process() is in use */
  210. if (thread->ts->crp)
  211. return 0;
  212. if (flags & PERF_IP_FLAG_CALL) {
  213. u64 ret_addr;
  214. if (!to_ip)
  215. return 0;
  216. ret_addr = from_ip + insn_len;
  217. if (ret_addr == to_ip)
  218. return 0; /* Zero-length calls are excluded */
  219. return thread_stack__push(thread->ts, ret_addr);
  220. } else if (flags & PERF_IP_FLAG_RETURN) {
  221. if (!from_ip)
  222. return 0;
  223. thread_stack__pop(thread->ts, to_ip);
  224. }
  225. return 0;
  226. }
  227. void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr)
  228. {
  229. if (!thread || !thread->ts)
  230. return;
  231. if (trace_nr != thread->ts->trace_nr) {
  232. if (thread->ts->trace_nr)
  233. __thread_stack__flush(thread, thread->ts);
  234. thread->ts->trace_nr = trace_nr;
  235. }
  236. }
  237. void thread_stack__free(struct thread *thread)
  238. {
  239. if (thread->ts) {
  240. __thread_stack__flush(thread, thread->ts);
  241. zfree(&thread->ts->stack);
  242. zfree(&thread->ts);
  243. }
  244. }
  245. void thread_stack__sample(struct thread *thread, struct ip_callchain *chain,
  246. size_t sz, u64 ip)
  247. {
  248. size_t i;
  249. if (!thread || !thread->ts)
  250. chain->nr = 1;
  251. else
  252. chain->nr = min(sz, thread->ts->cnt + 1);
  253. chain->ips[0] = ip;
  254. for (i = 1; i < chain->nr; i++)
  255. chain->ips[i] = thread->ts->stack[thread->ts->cnt - i].ret_addr;
  256. }
  257. struct call_return_processor *
  258. call_return_processor__new(int (*process)(struct call_return *cr, void *data),
  259. void *data)
  260. {
  261. struct call_return_processor *crp;
  262. crp = zalloc(sizeof(struct call_return_processor));
  263. if (!crp)
  264. return NULL;
  265. crp->cpr = call_path_root__new();
  266. if (!crp->cpr)
  267. goto out_free;
  268. crp->process = process;
  269. crp->data = data;
  270. return crp;
  271. out_free:
  272. free(crp);
  273. return NULL;
  274. }
  275. void call_return_processor__free(struct call_return_processor *crp)
  276. {
  277. if (crp) {
  278. call_path_root__free(crp->cpr);
  279. free(crp);
  280. }
  281. }
  282. static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr,
  283. u64 timestamp, u64 ref, struct call_path *cp,
  284. bool no_call)
  285. {
  286. struct thread_stack_entry *tse;
  287. int err;
  288. if (ts->cnt == ts->sz) {
  289. err = thread_stack__grow(ts);
  290. if (err)
  291. return err;
  292. }
  293. tse = &ts->stack[ts->cnt++];
  294. tse->ret_addr = ret_addr;
  295. tse->timestamp = timestamp;
  296. tse->ref = ref;
  297. tse->branch_count = ts->branch_count;
  298. tse->cp = cp;
  299. tse->no_call = no_call;
  300. return 0;
  301. }
  302. static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts,
  303. u64 ret_addr, u64 timestamp, u64 ref,
  304. struct symbol *sym)
  305. {
  306. int err;
  307. if (!ts->cnt)
  308. return 1;
  309. if (ts->cnt == 1) {
  310. struct thread_stack_entry *tse = &ts->stack[0];
  311. if (tse->cp->sym == sym)
  312. return thread_stack__call_return(thread, ts, --ts->cnt,
  313. timestamp, ref, false);
  314. }
  315. if (ts->stack[ts->cnt - 1].ret_addr == ret_addr) {
  316. return thread_stack__call_return(thread, ts, --ts->cnt,
  317. timestamp, ref, false);
  318. } else {
  319. size_t i = ts->cnt - 1;
  320. while (i--) {
  321. if (ts->stack[i].ret_addr != ret_addr)
  322. continue;
  323. i += 1;
  324. while (ts->cnt > i) {
  325. err = thread_stack__call_return(thread, ts,
  326. --ts->cnt,
  327. timestamp, ref,
  328. true);
  329. if (err)
  330. return err;
  331. }
  332. return thread_stack__call_return(thread, ts, --ts->cnt,
  333. timestamp, ref, false);
  334. }
  335. }
  336. return 1;
  337. }
  338. static int thread_stack__bottom(struct thread *thread, struct thread_stack *ts,
  339. struct perf_sample *sample,
  340. struct addr_location *from_al,
  341. struct addr_location *to_al, u64 ref)
  342. {
  343. struct call_path_root *cpr = ts->crp->cpr;
  344. struct call_path *cp;
  345. struct symbol *sym;
  346. u64 ip;
  347. if (sample->ip) {
  348. ip = sample->ip;
  349. sym = from_al->sym;
  350. } else if (sample->addr) {
  351. ip = sample->addr;
  352. sym = to_al->sym;
  353. } else {
  354. return 0;
  355. }
  356. cp = call_path__findnew(cpr, &cpr->call_path, sym, ip,
  357. ts->kernel_start);
  358. if (!cp)
  359. return -ENOMEM;
  360. return thread_stack__push_cp(thread->ts, ip, sample->time, ref, cp,
  361. true);
  362. }
  363. static int thread_stack__no_call_return(struct thread *thread,
  364. struct thread_stack *ts,
  365. struct perf_sample *sample,
  366. struct addr_location *from_al,
  367. struct addr_location *to_al, u64 ref)
  368. {
  369. struct call_path_root *cpr = ts->crp->cpr;
  370. struct call_path *cp, *parent;
  371. u64 ks = ts->kernel_start;
  372. int err;
  373. if (sample->ip >= ks && sample->addr < ks) {
  374. /* Return to userspace, so pop all kernel addresses */
  375. while (thread_stack__in_kernel(ts)) {
  376. err = thread_stack__call_return(thread, ts, --ts->cnt,
  377. sample->time, ref,
  378. true);
  379. if (err)
  380. return err;
  381. }
  382. /* If the stack is empty, push the userspace address */
  383. if (!ts->cnt) {
  384. cp = call_path__findnew(cpr, &cpr->call_path,
  385. to_al->sym, sample->addr,
  386. ts->kernel_start);
  387. if (!cp)
  388. return -ENOMEM;
  389. return thread_stack__push_cp(ts, 0, sample->time, ref,
  390. cp, true);
  391. }
  392. } else if (thread_stack__in_kernel(ts) && sample->ip < ks) {
  393. /* Return to userspace, so pop all kernel addresses */
  394. while (thread_stack__in_kernel(ts)) {
  395. err = thread_stack__call_return(thread, ts, --ts->cnt,
  396. sample->time, ref,
  397. true);
  398. if (err)
  399. return err;
  400. }
  401. }
  402. if (ts->cnt)
  403. parent = ts->stack[ts->cnt - 1].cp;
  404. else
  405. parent = &cpr->call_path;
  406. /* This 'return' had no 'call', so push and pop top of stack */
  407. cp = call_path__findnew(cpr, parent, from_al->sym, sample->ip,
  408. ts->kernel_start);
  409. if (!cp)
  410. return -ENOMEM;
  411. err = thread_stack__push_cp(ts, sample->addr, sample->time, ref, cp,
  412. true);
  413. if (err)
  414. return err;
  415. return thread_stack__pop_cp(thread, ts, sample->addr, sample->time, ref,
  416. to_al->sym);
  417. }
  418. static int thread_stack__trace_begin(struct thread *thread,
  419. struct thread_stack *ts, u64 timestamp,
  420. u64 ref)
  421. {
  422. struct thread_stack_entry *tse;
  423. int err;
  424. if (!ts->cnt)
  425. return 0;
  426. /* Pop trace end */
  427. tse = &ts->stack[ts->cnt - 1];
  428. if (tse->cp->sym == NULL && tse->cp->ip == 0) {
  429. err = thread_stack__call_return(thread, ts, --ts->cnt,
  430. timestamp, ref, false);
  431. if (err)
  432. return err;
  433. }
  434. return 0;
  435. }
  436. static int thread_stack__trace_end(struct thread_stack *ts,
  437. struct perf_sample *sample, u64 ref)
  438. {
  439. struct call_path_root *cpr = ts->crp->cpr;
  440. struct call_path *cp;
  441. u64 ret_addr;
  442. /* No point having 'trace end' on the bottom of the stack */
  443. if (!ts->cnt || (ts->cnt == 1 && ts->stack[0].ref == ref))
  444. return 0;
  445. cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, NULL, 0,
  446. ts->kernel_start);
  447. if (!cp)
  448. return -ENOMEM;
  449. ret_addr = sample->ip + sample->insn_len;
  450. return thread_stack__push_cp(ts, ret_addr, sample->time, ref, cp,
  451. false);
  452. }
  453. int thread_stack__process(struct thread *thread, struct comm *comm,
  454. struct perf_sample *sample,
  455. struct addr_location *from_al,
  456. struct addr_location *to_al, u64 ref,
  457. struct call_return_processor *crp)
  458. {
  459. struct thread_stack *ts = thread->ts;
  460. int err = 0;
  461. if (ts) {
  462. if (!ts->crp) {
  463. /* Supersede thread_stack__event() */
  464. thread_stack__free(thread);
  465. thread->ts = thread_stack__new(thread, crp);
  466. if (!thread->ts)
  467. return -ENOMEM;
  468. ts = thread->ts;
  469. ts->comm = comm;
  470. }
  471. } else {
  472. thread->ts = thread_stack__new(thread, crp);
  473. if (!thread->ts)
  474. return -ENOMEM;
  475. ts = thread->ts;
  476. ts->comm = comm;
  477. }
  478. /* Flush stack on exec */
  479. if (ts->comm != comm && thread->pid_ == thread->tid) {
  480. err = __thread_stack__flush(thread, ts);
  481. if (err)
  482. return err;
  483. ts->comm = comm;
  484. }
  485. /* If the stack is empty, put the current symbol on the stack */
  486. if (!ts->cnt) {
  487. err = thread_stack__bottom(thread, ts, sample, from_al, to_al,
  488. ref);
  489. if (err)
  490. return err;
  491. }
  492. ts->branch_count += 1;
  493. ts->last_time = sample->time;
  494. if (sample->flags & PERF_IP_FLAG_CALL) {
  495. struct call_path_root *cpr = ts->crp->cpr;
  496. struct call_path *cp;
  497. u64 ret_addr;
  498. if (!sample->ip || !sample->addr)
  499. return 0;
  500. ret_addr = sample->ip + sample->insn_len;
  501. if (ret_addr == sample->addr)
  502. return 0; /* Zero-length calls are excluded */
  503. cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp,
  504. to_al->sym, sample->addr,
  505. ts->kernel_start);
  506. if (!cp)
  507. return -ENOMEM;
  508. err = thread_stack__push_cp(ts, ret_addr, sample->time, ref,
  509. cp, false);
  510. } else if (sample->flags & PERF_IP_FLAG_RETURN) {
  511. if (!sample->ip || !sample->addr)
  512. return 0;
  513. err = thread_stack__pop_cp(thread, ts, sample->addr,
  514. sample->time, ref, from_al->sym);
  515. if (err) {
  516. if (err < 0)
  517. return err;
  518. err = thread_stack__no_call_return(thread, ts, sample,
  519. from_al, to_al, ref);
  520. }
  521. } else if (sample->flags & PERF_IP_FLAG_TRACE_BEGIN) {
  522. err = thread_stack__trace_begin(thread, ts, sample->time, ref);
  523. } else if (sample->flags & PERF_IP_FLAG_TRACE_END) {
  524. err = thread_stack__trace_end(ts, sample, ref);
  525. }
  526. return err;
  527. }
  528. size_t thread_stack__depth(struct thread *thread)
  529. {
  530. if (!thread->ts)
  531. return 0;
  532. return thread->ts->cnt;
  533. }