spu_task_sync.c 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662
  1. /*
  2. * Cell Broadband Engine OProfile Support
  3. *
  4. * (C) Copyright IBM Corporation 2006
  5. *
  6. * Author: Maynard Johnson <maynardj@us.ibm.com>
  7. *
  8. * This program is free software; you can redistribute it and/or
  9. * modify it under the terms of the GNU General Public License
  10. * as published by the Free Software Foundation; either version
  11. * 2 of the License, or (at your option) any later version.
  12. */
  13. /* The purpose of this file is to handle SPU event task switching
  14. * and to record SPU context information into the OProfile
  15. * event buffer.
  16. *
  17. * Additionally, the spu_sync_buffer function is provided as a helper
  18. * for recoding actual SPU program counter samples to the event buffer.
  19. */
  20. #include <linux/dcookies.h>
  21. #include <linux/kref.h>
  22. #include <linux/mm.h>
  23. #include <linux/fs.h>
  24. #include <linux/file.h>
  25. #include <linux/module.h>
  26. #include <linux/notifier.h>
  27. #include <linux/numa.h>
  28. #include <linux/oprofile.h>
  29. #include <linux/slab.h>
  30. #include <linux/spinlock.h>
  31. #include "pr_util.h"
  32. #define RELEASE_ALL 9999
  33. static DEFINE_SPINLOCK(buffer_lock);
  34. static DEFINE_SPINLOCK(cache_lock);
  35. static int num_spu_nodes;
  36. static int spu_prof_num_nodes;
  37. struct spu_buffer spu_buff[MAX_NUMNODES * SPUS_PER_NODE];
  38. struct delayed_work spu_work;
  39. static unsigned max_spu_buff;
  40. static void spu_buff_add(unsigned long int value, int spu)
  41. {
  42. /* spu buff is a circular buffer. Add entries to the
  43. * head. Head is the index to store the next value.
  44. * The buffer is full when there is one available entry
  45. * in the queue, i.e. head and tail can't be equal.
  46. * That way we can tell the difference between the
  47. * buffer being full versus empty.
  48. *
  49. * ASSUMPTION: the buffer_lock is held when this function
  50. * is called to lock the buffer, head and tail.
  51. */
  52. int full = 1;
  53. if (spu_buff[spu].head >= spu_buff[spu].tail) {
  54. if ((spu_buff[spu].head - spu_buff[spu].tail)
  55. < (max_spu_buff - 1))
  56. full = 0;
  57. } else if (spu_buff[spu].tail > spu_buff[spu].head) {
  58. if ((spu_buff[spu].tail - spu_buff[spu].head)
  59. > 1)
  60. full = 0;
  61. }
  62. if (!full) {
  63. spu_buff[spu].buff[spu_buff[spu].head] = value;
  64. spu_buff[spu].head++;
  65. if (spu_buff[spu].head >= max_spu_buff)
  66. spu_buff[spu].head = 0;
  67. } else {
  68. /* From the user's perspective make the SPU buffer
  69. * size management/overflow look like we are using
  70. * per cpu buffers. The user uses the same
  71. * per cpu parameter to adjust the SPU buffer size.
  72. * Increment the sample_lost_overflow to inform
  73. * the user the buffer size needs to be increased.
  74. */
  75. oprofile_cpu_buffer_inc_smpl_lost();
  76. }
  77. }
  78. /* This function copies the per SPU buffers to the
  79. * OProfile kernel buffer.
  80. */
  81. static void sync_spu_buff(void)
  82. {
  83. int spu;
  84. unsigned long flags;
  85. int curr_head;
  86. for (spu = 0; spu < num_spu_nodes; spu++) {
  87. /* In case there was an issue and the buffer didn't
  88. * get created skip it.
  89. */
  90. if (spu_buff[spu].buff == NULL)
  91. continue;
  92. /* Hold the lock to make sure the head/tail
  93. * doesn't change while spu_buff_add() is
  94. * deciding if the buffer is full or not.
  95. * Being a little paranoid.
  96. */
  97. spin_lock_irqsave(&buffer_lock, flags);
  98. curr_head = spu_buff[spu].head;
  99. spin_unlock_irqrestore(&buffer_lock, flags);
  100. /* Transfer the current contents to the kernel buffer.
  101. * data can still be added to the head of the buffer.
  102. */
  103. oprofile_put_buff(spu_buff[spu].buff,
  104. spu_buff[spu].tail,
  105. curr_head, max_spu_buff);
  106. spin_lock_irqsave(&buffer_lock, flags);
  107. spu_buff[spu].tail = curr_head;
  108. spin_unlock_irqrestore(&buffer_lock, flags);
  109. }
  110. }
  111. static void wq_sync_spu_buff(struct work_struct *work)
  112. {
  113. /* move data from spu buffers to kernel buffer */
  114. sync_spu_buff();
  115. /* only reschedule if profiling is not done */
  116. if (spu_prof_running)
  117. schedule_delayed_work(&spu_work, DEFAULT_TIMER_EXPIRE);
  118. }
  119. /* Container for caching information about an active SPU task. */
  120. struct cached_info {
  121. struct vma_to_fileoffset_map *map;
  122. struct spu *the_spu; /* needed to access pointer to local_store */
  123. struct kref cache_ref;
  124. };
  125. static struct cached_info *spu_info[MAX_NUMNODES * 8];
  126. static void destroy_cached_info(struct kref *kref)
  127. {
  128. struct cached_info *info;
  129. info = container_of(kref, struct cached_info, cache_ref);
  130. vma_map_free(info->map);
  131. kfree(info);
  132. module_put(THIS_MODULE);
  133. }
  134. /* Return the cached_info for the passed SPU number.
  135. * ATTENTION: Callers are responsible for obtaining the
  136. * cache_lock if needed prior to invoking this function.
  137. */
  138. static struct cached_info *get_cached_info(struct spu *the_spu, int spu_num)
  139. {
  140. struct kref *ref;
  141. struct cached_info *ret_info;
  142. if (spu_num >= num_spu_nodes) {
  143. printk(KERN_ERR "SPU_PROF: "
  144. "%s, line %d: Invalid index %d into spu info cache\n",
  145. __func__, __LINE__, spu_num);
  146. ret_info = NULL;
  147. goto out;
  148. }
  149. if (!spu_info[spu_num] && the_spu) {
  150. ref = spu_get_profile_private_kref(the_spu->ctx);
  151. if (ref) {
  152. spu_info[spu_num] = container_of(ref, struct cached_info, cache_ref);
  153. kref_get(&spu_info[spu_num]->cache_ref);
  154. }
  155. }
  156. ret_info = spu_info[spu_num];
  157. out:
  158. return ret_info;
  159. }
  160. /* Looks for cached info for the passed spu. If not found, the
  161. * cached info is created for the passed spu.
  162. * Returns 0 for success; otherwise, -1 for error.
  163. */
  164. static int
  165. prepare_cached_spu_info(struct spu *spu, unsigned long objectId)
  166. {
  167. unsigned long flags;
  168. struct vma_to_fileoffset_map *new_map;
  169. int retval = 0;
  170. struct cached_info *info;
  171. /* We won't bother getting cache_lock here since
  172. * don't do anything with the cached_info that's returned.
  173. */
  174. info = get_cached_info(spu, spu->number);
  175. if (info) {
  176. pr_debug("Found cached SPU info.\n");
  177. goto out;
  178. }
  179. /* Create cached_info and set spu_info[spu->number] to point to it.
  180. * spu->number is a system-wide value, not a per-node value.
  181. */
  182. info = kzalloc(sizeof(struct cached_info), GFP_KERNEL);
  183. if (!info) {
  184. printk(KERN_ERR "SPU_PROF: "
  185. "%s, line %d: create vma_map failed\n",
  186. __func__, __LINE__);
  187. retval = -ENOMEM;
  188. goto err_alloc;
  189. }
  190. new_map = create_vma_map(spu, objectId);
  191. if (!new_map) {
  192. printk(KERN_ERR "SPU_PROF: "
  193. "%s, line %d: create vma_map failed\n",
  194. __func__, __LINE__);
  195. retval = -ENOMEM;
  196. goto err_alloc;
  197. }
  198. pr_debug("Created vma_map\n");
  199. info->map = new_map;
  200. info->the_spu = spu;
  201. kref_init(&info->cache_ref);
  202. spin_lock_irqsave(&cache_lock, flags);
  203. spu_info[spu->number] = info;
  204. /* Increment count before passing off ref to SPUFS. */
  205. kref_get(&info->cache_ref);
  206. /* We increment the module refcount here since SPUFS is
  207. * responsible for the final destruction of the cached_info,
  208. * and it must be able to access the destroy_cached_info()
  209. * function defined in the OProfile module. We decrement
  210. * the module refcount in destroy_cached_info.
  211. */
  212. try_module_get(THIS_MODULE);
  213. spu_set_profile_private_kref(spu->ctx, &info->cache_ref,
  214. destroy_cached_info);
  215. spin_unlock_irqrestore(&cache_lock, flags);
  216. goto out;
  217. err_alloc:
  218. kfree(info);
  219. out:
  220. return retval;
  221. }
  222. /*
  223. * NOTE: The caller is responsible for locking the
  224. * cache_lock prior to calling this function.
  225. */
  226. static int release_cached_info(int spu_index)
  227. {
  228. int index, end;
  229. if (spu_index == RELEASE_ALL) {
  230. end = num_spu_nodes;
  231. index = 0;
  232. } else {
  233. if (spu_index >= num_spu_nodes) {
  234. printk(KERN_ERR "SPU_PROF: "
  235. "%s, line %d: "
  236. "Invalid index %d into spu info cache\n",
  237. __func__, __LINE__, spu_index);
  238. goto out;
  239. }
  240. end = spu_index + 1;
  241. index = spu_index;
  242. }
  243. for (; index < end; index++) {
  244. if (spu_info[index]) {
  245. kref_put(&spu_info[index]->cache_ref,
  246. destroy_cached_info);
  247. spu_info[index] = NULL;
  248. }
  249. }
  250. out:
  251. return 0;
  252. }
  253. /* The source code for fast_get_dcookie was "borrowed"
  254. * from drivers/oprofile/buffer_sync.c.
  255. */
  256. /* Optimisation. We can manage without taking the dcookie sem
  257. * because we cannot reach this code without at least one
  258. * dcookie user still being registered (namely, the reader
  259. * of the event buffer).
  260. */
  261. static inline unsigned long fast_get_dcookie(struct path *path)
  262. {
  263. unsigned long cookie;
  264. if (path->dentry->d_flags & DCACHE_COOKIE)
  265. return (unsigned long)path->dentry;
  266. get_dcookie(path, &cookie);
  267. return cookie;
  268. }
  269. /* Look up the dcookie for the task's mm->exe_file,
  270. * which corresponds loosely to "application name". Also, determine
  271. * the offset for the SPU ELF object. If computed offset is
  272. * non-zero, it implies an embedded SPU object; otherwise, it's a
  273. * separate SPU binary, in which case we retrieve it's dcookie.
  274. * For the embedded case, we must determine if SPU ELF is embedded
  275. * in the executable application or another file (i.e., shared lib).
  276. * If embedded in a shared lib, we must get the dcookie and return
  277. * that to the caller.
  278. */
  279. static unsigned long
  280. get_exec_dcookie_and_offset(struct spu *spu, unsigned int *offsetp,
  281. unsigned long *spu_bin_dcookie,
  282. unsigned long spu_ref)
  283. {
  284. unsigned long app_cookie = 0;
  285. unsigned int my_offset = 0;
  286. struct vm_area_struct *vma;
  287. struct file *exe_file;
  288. struct mm_struct *mm = spu->mm;
  289. if (!mm)
  290. goto out;
  291. exe_file = get_mm_exe_file(mm);
  292. if (exe_file) {
  293. app_cookie = fast_get_dcookie(&exe_file->f_path);
  294. pr_debug("got dcookie for %pD\n", exe_file);
  295. fput(exe_file);
  296. }
  297. down_read(&mm->mmap_sem);
  298. for (vma = mm->mmap; vma; vma = vma->vm_next) {
  299. if (vma->vm_start > spu_ref || vma->vm_end <= spu_ref)
  300. continue;
  301. my_offset = spu_ref - vma->vm_start;
  302. if (!vma->vm_file)
  303. goto fail_no_image_cookie;
  304. pr_debug("Found spu ELF at %X(object-id:%lx) for file %pD\n",
  305. my_offset, spu_ref, vma->vm_file);
  306. *offsetp = my_offset;
  307. break;
  308. }
  309. *spu_bin_dcookie = fast_get_dcookie(&vma->vm_file->f_path);
  310. pr_debug("got dcookie for %pD\n", vma->vm_file);
  311. up_read(&mm->mmap_sem);
  312. out:
  313. return app_cookie;
  314. fail_no_image_cookie:
  315. up_read(&mm->mmap_sem);
  316. printk(KERN_ERR "SPU_PROF: "
  317. "%s, line %d: Cannot find dcookie for SPU binary\n",
  318. __func__, __LINE__);
  319. goto out;
  320. }
  321. /* This function finds or creates cached context information for the
  322. * passed SPU and records SPU context information into the OProfile
  323. * event buffer.
  324. */
  325. static int process_context_switch(struct spu *spu, unsigned long objectId)
  326. {
  327. unsigned long flags;
  328. int retval;
  329. unsigned int offset = 0;
  330. unsigned long spu_cookie = 0, app_dcookie;
  331. retval = prepare_cached_spu_info(spu, objectId);
  332. if (retval)
  333. goto out;
  334. /* Get dcookie first because a mutex_lock is taken in that
  335. * code path, so interrupts must not be disabled.
  336. */
  337. app_dcookie = get_exec_dcookie_and_offset(spu, &offset, &spu_cookie, objectId);
  338. if (!app_dcookie || !spu_cookie) {
  339. retval = -ENOENT;
  340. goto out;
  341. }
  342. /* Record context info in event buffer */
  343. spin_lock_irqsave(&buffer_lock, flags);
  344. spu_buff_add(ESCAPE_CODE, spu->number);
  345. spu_buff_add(SPU_CTX_SWITCH_CODE, spu->number);
  346. spu_buff_add(spu->number, spu->number);
  347. spu_buff_add(spu->pid, spu->number);
  348. spu_buff_add(spu->tgid, spu->number);
  349. spu_buff_add(app_dcookie, spu->number);
  350. spu_buff_add(spu_cookie, spu->number);
  351. spu_buff_add(offset, spu->number);
  352. /* Set flag to indicate SPU PC data can now be written out. If
  353. * the SPU program counter data is seen before an SPU context
  354. * record is seen, the postprocessing will fail.
  355. */
  356. spu_buff[spu->number].ctx_sw_seen = 1;
  357. spin_unlock_irqrestore(&buffer_lock, flags);
  358. smp_wmb(); /* insure spu event buffer updates are written */
  359. /* don't want entries intermingled... */
  360. out:
  361. return retval;
  362. }
  363. /*
  364. * This function is invoked on either a bind_context or unbind_context.
  365. * If called for an unbind_context, the val arg is 0; otherwise,
  366. * it is the object-id value for the spu context.
  367. * The data arg is of type 'struct spu *'.
  368. */
  369. static int spu_active_notify(struct notifier_block *self, unsigned long val,
  370. void *data)
  371. {
  372. int retval;
  373. unsigned long flags;
  374. struct spu *the_spu = data;
  375. pr_debug("SPU event notification arrived\n");
  376. if (!val) {
  377. spin_lock_irqsave(&cache_lock, flags);
  378. retval = release_cached_info(the_spu->number);
  379. spin_unlock_irqrestore(&cache_lock, flags);
  380. } else {
  381. retval = process_context_switch(the_spu, val);
  382. }
  383. return retval;
  384. }
  385. static struct notifier_block spu_active = {
  386. .notifier_call = spu_active_notify,
  387. };
  388. static int number_of_online_nodes(void)
  389. {
  390. u32 cpu; u32 tmp;
  391. int nodes = 0;
  392. for_each_online_cpu(cpu) {
  393. tmp = cbe_cpu_to_node(cpu) + 1;
  394. if (tmp > nodes)
  395. nodes++;
  396. }
  397. return nodes;
  398. }
  399. static int oprofile_spu_buff_create(void)
  400. {
  401. int spu;
  402. max_spu_buff = oprofile_get_cpu_buffer_size();
  403. for (spu = 0; spu < num_spu_nodes; spu++) {
  404. /* create circular buffers to store the data in.
  405. * use locks to manage accessing the buffers
  406. */
  407. spu_buff[spu].head = 0;
  408. spu_buff[spu].tail = 0;
  409. /*
  410. * Create a buffer for each SPU. Can't reliably
  411. * create a single buffer for all spus due to not
  412. * enough contiguous kernel memory.
  413. */
  414. spu_buff[spu].buff = kzalloc((max_spu_buff
  415. * sizeof(unsigned long)),
  416. GFP_KERNEL);
  417. if (!spu_buff[spu].buff) {
  418. printk(KERN_ERR "SPU_PROF: "
  419. "%s, line %d: oprofile_spu_buff_create "
  420. "failed to allocate spu buffer %d.\n",
  421. __func__, __LINE__, spu);
  422. /* release the spu buffers that have been allocated */
  423. while (spu >= 0) {
  424. kfree(spu_buff[spu].buff);
  425. spu_buff[spu].buff = 0;
  426. spu--;
  427. }
  428. return -ENOMEM;
  429. }
  430. }
  431. return 0;
  432. }
  433. /* The main purpose of this function is to synchronize
  434. * OProfile with SPUFS by registering to be notified of
  435. * SPU task switches.
  436. *
  437. * NOTE: When profiling SPUs, we must ensure that only
  438. * spu_sync_start is invoked and not the generic sync_start
  439. * in drivers/oprofile/oprof.c. A return value of
  440. * SKIP_GENERIC_SYNC or SYNC_START_ERROR will
  441. * accomplish this.
  442. */
  443. int spu_sync_start(void)
  444. {
  445. int spu;
  446. int ret = SKIP_GENERIC_SYNC;
  447. int register_ret;
  448. unsigned long flags = 0;
  449. spu_prof_num_nodes = number_of_online_nodes();
  450. num_spu_nodes = spu_prof_num_nodes * 8;
  451. INIT_DELAYED_WORK(&spu_work, wq_sync_spu_buff);
  452. /* create buffer for storing the SPU data to put in
  453. * the kernel buffer.
  454. */
  455. ret = oprofile_spu_buff_create();
  456. if (ret)
  457. goto out;
  458. spin_lock_irqsave(&buffer_lock, flags);
  459. for (spu = 0; spu < num_spu_nodes; spu++) {
  460. spu_buff_add(ESCAPE_CODE, spu);
  461. spu_buff_add(SPU_PROFILING_CODE, spu);
  462. spu_buff_add(num_spu_nodes, spu);
  463. }
  464. spin_unlock_irqrestore(&buffer_lock, flags);
  465. for (spu = 0; spu < num_spu_nodes; spu++) {
  466. spu_buff[spu].ctx_sw_seen = 0;
  467. spu_buff[spu].last_guard_val = 0;
  468. }
  469. /* Register for SPU events */
  470. register_ret = spu_switch_event_register(&spu_active);
  471. if (register_ret) {
  472. ret = SYNC_START_ERROR;
  473. goto out;
  474. }
  475. pr_debug("spu_sync_start -- running.\n");
  476. out:
  477. return ret;
  478. }
  479. /* Record SPU program counter samples to the oprofile event buffer. */
  480. void spu_sync_buffer(int spu_num, unsigned int *samples,
  481. int num_samples)
  482. {
  483. unsigned long long file_offset;
  484. unsigned long flags;
  485. int i;
  486. struct vma_to_fileoffset_map *map;
  487. struct spu *the_spu;
  488. unsigned long long spu_num_ll = spu_num;
  489. unsigned long long spu_num_shifted = spu_num_ll << 32;
  490. struct cached_info *c_info;
  491. /* We need to obtain the cache_lock here because it's
  492. * possible that after getting the cached_info, the SPU job
  493. * corresponding to this cached_info may end, thus resulting
  494. * in the destruction of the cached_info.
  495. */
  496. spin_lock_irqsave(&cache_lock, flags);
  497. c_info = get_cached_info(NULL, spu_num);
  498. if (!c_info) {
  499. /* This legitimately happens when the SPU task ends before all
  500. * samples are recorded.
  501. * No big deal -- so we just drop a few samples.
  502. */
  503. pr_debug("SPU_PROF: No cached SPU contex "
  504. "for SPU #%d. Dropping samples.\n", spu_num);
  505. goto out;
  506. }
  507. map = c_info->map;
  508. the_spu = c_info->the_spu;
  509. spin_lock(&buffer_lock);
  510. for (i = 0; i < num_samples; i++) {
  511. unsigned int sample = *(samples+i);
  512. int grd_val = 0;
  513. file_offset = 0;
  514. if (sample == 0)
  515. continue;
  516. file_offset = vma_map_lookup( map, sample, the_spu, &grd_val);
  517. /* If overlays are used by this SPU application, the guard
  518. * value is non-zero, indicating which overlay section is in
  519. * use. We need to discard samples taken during the time
  520. * period which an overlay occurs (i.e., guard value changes).
  521. */
  522. if (grd_val && grd_val != spu_buff[spu_num].last_guard_val) {
  523. spu_buff[spu_num].last_guard_val = grd_val;
  524. /* Drop the rest of the samples. */
  525. break;
  526. }
  527. /* We must ensure that the SPU context switch has been written
  528. * out before samples for the SPU. Otherwise, the SPU context
  529. * information is not available and the postprocessing of the
  530. * SPU PC will fail with no available anonymous map information.
  531. */
  532. if (spu_buff[spu_num].ctx_sw_seen)
  533. spu_buff_add((file_offset | spu_num_shifted),
  534. spu_num);
  535. }
  536. spin_unlock(&buffer_lock);
  537. out:
  538. spin_unlock_irqrestore(&cache_lock, flags);
  539. }
  540. int spu_sync_stop(void)
  541. {
  542. unsigned long flags = 0;
  543. int ret;
  544. int k;
  545. ret = spu_switch_event_unregister(&spu_active);
  546. if (ret)
  547. printk(KERN_ERR "SPU_PROF: "
  548. "%s, line %d: spu_switch_event_unregister " \
  549. "returned %d\n",
  550. __func__, __LINE__, ret);
  551. /* flush any remaining data in the per SPU buffers */
  552. sync_spu_buff();
  553. spin_lock_irqsave(&cache_lock, flags);
  554. ret = release_cached_info(RELEASE_ALL);
  555. spin_unlock_irqrestore(&cache_lock, flags);
  556. /* remove scheduled work queue item rather then waiting
  557. * for every queued entry to execute. Then flush pending
  558. * system wide buffer to event buffer.
  559. */
  560. cancel_delayed_work(&spu_work);
  561. for (k = 0; k < num_spu_nodes; k++) {
  562. spu_buff[k].ctx_sw_seen = 0;
  563. /*
  564. * spu_sys_buff will be null if there was a problem
  565. * allocating the buffer. Only delete if it exists.
  566. */
  567. kfree(spu_buff[k].buff);
  568. spu_buff[k].buff = 0;
  569. }
  570. pr_debug("spu_sync_stop -- done.\n");
  571. return ret;
  572. }