scif_mmap.c 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700
  1. /*
  2. * Intel MIC Platform Software Stack (MPSS)
  3. *
  4. * Copyright(c) 2015 Intel Corporation.
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License, version 2, as
  8. * published by the Free Software Foundation.
  9. *
  10. * This program is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. * General Public License for more details.
  14. *
  15. * Intel SCIF driver.
  16. *
  17. */
  18. #include "scif_main.h"
  19. /*
  20. * struct scif_vma_info - Information about a remote memory mapping
  21. * created via scif_mmap(..)
  22. * @vma: VM area struct
  23. * @list: link to list of active vmas
  24. */
  25. struct scif_vma_info {
  26. struct vm_area_struct *vma;
  27. struct list_head list;
  28. };
  29. void scif_recv_munmap(struct scif_dev *scifdev, struct scifmsg *msg)
  30. {
  31. struct scif_rma_req req;
  32. struct scif_window *window = NULL;
  33. struct scif_window *recv_window =
  34. (struct scif_window *)msg->payload[0];
  35. struct scif_endpt *ep;
  36. ep = (struct scif_endpt *)recv_window->ep;
  37. req.out_window = &window;
  38. req.offset = recv_window->offset;
  39. req.prot = recv_window->prot;
  40. req.nr_bytes = recv_window->nr_pages << PAGE_SHIFT;
  41. req.type = SCIF_WINDOW_FULL;
  42. req.head = &ep->rma_info.reg_list;
  43. msg->payload[0] = ep->remote_ep;
  44. mutex_lock(&ep->rma_info.rma_lock);
  45. /* Does a valid window exist? */
  46. if (scif_query_window(&req)) {
  47. dev_err(&scifdev->sdev->dev,
  48. "%s %d -ENXIO\n", __func__, __LINE__);
  49. msg->uop = SCIF_UNREGISTER_ACK;
  50. goto error;
  51. }
  52. scif_put_window(window, window->nr_pages);
  53. if (!window->ref_count) {
  54. atomic_inc(&ep->rma_info.tw_refcount);
  55. ep->rma_info.async_list_del = 1;
  56. list_del_init(&window->list);
  57. scif_free_window_offset(ep, window, window->offset);
  58. }
  59. error:
  60. mutex_unlock(&ep->rma_info.rma_lock);
  61. if (window && !window->ref_count)
  62. scif_queue_for_cleanup(window, &scif_info.rma);
  63. }
  64. /*
  65. * Remove valid remote memory mappings created via scif_mmap(..) from the
  66. * process address space since the remote node is lost
  67. */
  68. static void __scif_zap_mmaps(struct scif_endpt *ep)
  69. {
  70. struct list_head *item;
  71. struct scif_vma_info *info;
  72. struct vm_area_struct *vma;
  73. unsigned long size;
  74. spin_lock(&ep->lock);
  75. list_for_each(item, &ep->rma_info.vma_list) {
  76. info = list_entry(item, struct scif_vma_info, list);
  77. vma = info->vma;
  78. size = vma->vm_end - vma->vm_start;
  79. zap_vma_ptes(vma, vma->vm_start, size);
  80. dev_dbg(scif_info.mdev.this_device,
  81. "%s ep %p zap vma %p size 0x%lx\n",
  82. __func__, ep, info->vma, size);
  83. }
  84. spin_unlock(&ep->lock);
  85. }
  86. /*
  87. * Traverse the list of endpoints for a particular remote node and
  88. * zap valid remote memory mappings since the remote node is lost
  89. */
  90. static void _scif_zap_mmaps(int node, struct list_head *head)
  91. {
  92. struct scif_endpt *ep;
  93. struct list_head *item;
  94. mutex_lock(&scif_info.connlock);
  95. list_for_each(item, head) {
  96. ep = list_entry(item, struct scif_endpt, list);
  97. if (ep->remote_dev->node == node)
  98. __scif_zap_mmaps(ep);
  99. }
  100. mutex_unlock(&scif_info.connlock);
  101. }
  102. /*
  103. * Wrapper for removing remote memory mappings for a particular node. This API
  104. * is called by peer nodes as part of handling a lost node.
  105. */
  106. void scif_zap_mmaps(int node)
  107. {
  108. _scif_zap_mmaps(node, &scif_info.connected);
  109. _scif_zap_mmaps(node, &scif_info.disconnected);
  110. }
  111. /*
  112. * This API is only called while handling a lost node:
  113. * a) Remote node is dead.
  114. * b) Remote memory mappings have been zapped
  115. * So we can traverse the remote_reg_list without any locks. Since
  116. * the window has not yet been unregistered we can drop the ref count
  117. * and queue it to the cleanup thread.
  118. */
  119. static void __scif_cleanup_rma_for_zombies(struct scif_endpt *ep)
  120. {
  121. struct list_head *pos, *tmp;
  122. struct scif_window *window;
  123. list_for_each_safe(pos, tmp, &ep->rma_info.remote_reg_list) {
  124. window = list_entry(pos, struct scif_window, list);
  125. if (window->ref_count)
  126. scif_put_window(window, window->nr_pages);
  127. else
  128. dev_err(scif_info.mdev.this_device,
  129. "%s %d unexpected\n",
  130. __func__, __LINE__);
  131. if (!window->ref_count) {
  132. atomic_inc(&ep->rma_info.tw_refcount);
  133. list_del_init(&window->list);
  134. scif_queue_for_cleanup(window, &scif_info.rma);
  135. }
  136. }
  137. }
  138. /* Cleanup remote registration lists for zombie endpoints */
  139. void scif_cleanup_rma_for_zombies(int node)
  140. {
  141. struct scif_endpt *ep;
  142. struct list_head *item;
  143. mutex_lock(&scif_info.eplock);
  144. list_for_each(item, &scif_info.zombie) {
  145. ep = list_entry(item, struct scif_endpt, list);
  146. if (ep->remote_dev && ep->remote_dev->node == node)
  147. __scif_cleanup_rma_for_zombies(ep);
  148. }
  149. mutex_unlock(&scif_info.eplock);
  150. flush_work(&scif_info.misc_work);
  151. }
  152. /* Insert the VMA into the per endpoint VMA list */
  153. static int scif_insert_vma(struct scif_endpt *ep, struct vm_area_struct *vma)
  154. {
  155. struct scif_vma_info *info;
  156. int err = 0;
  157. info = kzalloc(sizeof(*info), GFP_KERNEL);
  158. if (!info) {
  159. err = -ENOMEM;
  160. goto done;
  161. }
  162. info->vma = vma;
  163. spin_lock(&ep->lock);
  164. list_add_tail(&info->list, &ep->rma_info.vma_list);
  165. spin_unlock(&ep->lock);
  166. done:
  167. return err;
  168. }
  169. /* Delete the VMA from the per endpoint VMA list */
  170. static void scif_delete_vma(struct scif_endpt *ep, struct vm_area_struct *vma)
  171. {
  172. struct list_head *item;
  173. struct scif_vma_info *info;
  174. spin_lock(&ep->lock);
  175. list_for_each(item, &ep->rma_info.vma_list) {
  176. info = list_entry(item, struct scif_vma_info, list);
  177. if (info->vma == vma) {
  178. list_del(&info->list);
  179. kfree(info);
  180. break;
  181. }
  182. }
  183. spin_unlock(&ep->lock);
  184. }
  185. static phys_addr_t scif_get_phys(phys_addr_t phys, struct scif_endpt *ep)
  186. {
  187. struct scif_dev *scifdev = (struct scif_dev *)ep->remote_dev;
  188. struct scif_hw_dev *sdev = scifdev->sdev;
  189. phys_addr_t out_phys, apt_base = 0;
  190. /*
  191. * If the DMA address is card relative then we need to add the
  192. * aperture base for mmap to work correctly
  193. */
  194. if (!scifdev_self(scifdev) && sdev->aper && sdev->card_rel_da)
  195. apt_base = sdev->aper->pa;
  196. out_phys = apt_base + phys;
  197. return out_phys;
  198. }
  199. int scif_get_pages(scif_epd_t epd, off_t offset, size_t len,
  200. struct scif_range **pages)
  201. {
  202. struct scif_endpt *ep = (struct scif_endpt *)epd;
  203. struct scif_rma_req req;
  204. struct scif_window *window = NULL;
  205. int nr_pages, err, i;
  206. dev_dbg(scif_info.mdev.this_device,
  207. "SCIFAPI get_pinned_pages: ep %p offset 0x%lx len 0x%lx\n",
  208. ep, offset, len);
  209. err = scif_verify_epd(ep);
  210. if (err)
  211. return err;
  212. if (!len || (offset < 0) ||
  213. (offset + len < offset) ||
  214. (ALIGN(offset, PAGE_SIZE) != offset) ||
  215. (ALIGN(len, PAGE_SIZE) != len))
  216. return -EINVAL;
  217. nr_pages = len >> PAGE_SHIFT;
  218. req.out_window = &window;
  219. req.offset = offset;
  220. req.prot = 0;
  221. req.nr_bytes = len;
  222. req.type = SCIF_WINDOW_SINGLE;
  223. req.head = &ep->rma_info.remote_reg_list;
  224. mutex_lock(&ep->rma_info.rma_lock);
  225. /* Does a valid window exist? */
  226. err = scif_query_window(&req);
  227. if (err) {
  228. dev_err(&ep->remote_dev->sdev->dev,
  229. "%s %d err %d\n", __func__, __LINE__, err);
  230. goto error;
  231. }
  232. /* Allocate scif_range */
  233. *pages = kzalloc(sizeof(**pages), GFP_KERNEL);
  234. if (!*pages) {
  235. err = -ENOMEM;
  236. goto error;
  237. }
  238. /* Allocate phys addr array */
  239. (*pages)->phys_addr = scif_zalloc(nr_pages * sizeof(dma_addr_t));
  240. if (!((*pages)->phys_addr)) {
  241. err = -ENOMEM;
  242. goto error;
  243. }
  244. if (scif_is_mgmt_node() && !scifdev_self(ep->remote_dev)) {
  245. /* Allocate virtual address array */
  246. ((*pages)->va = scif_zalloc(nr_pages * sizeof(void *)));
  247. if (!(*pages)->va) {
  248. err = -ENOMEM;
  249. goto error;
  250. }
  251. }
  252. /* Populate the values */
  253. (*pages)->cookie = window;
  254. (*pages)->nr_pages = nr_pages;
  255. (*pages)->prot_flags = window->prot;
  256. for (i = 0; i < nr_pages; i++) {
  257. (*pages)->phys_addr[i] =
  258. __scif_off_to_dma_addr(window, offset +
  259. (i * PAGE_SIZE));
  260. (*pages)->phys_addr[i] = scif_get_phys((*pages)->phys_addr[i],
  261. ep);
  262. if (scif_is_mgmt_node() && !scifdev_self(ep->remote_dev))
  263. (*pages)->va[i] =
  264. ep->remote_dev->sdev->aper->va +
  265. (*pages)->phys_addr[i] -
  266. ep->remote_dev->sdev->aper->pa;
  267. }
  268. scif_get_window(window, nr_pages);
  269. error:
  270. mutex_unlock(&ep->rma_info.rma_lock);
  271. if (err) {
  272. if (*pages) {
  273. scif_free((*pages)->phys_addr,
  274. nr_pages * sizeof(dma_addr_t));
  275. scif_free((*pages)->va,
  276. nr_pages * sizeof(void *));
  277. kfree(*pages);
  278. *pages = NULL;
  279. }
  280. dev_err(&ep->remote_dev->sdev->dev,
  281. "%s %d err %d\n", __func__, __LINE__, err);
  282. }
  283. return err;
  284. }
  285. EXPORT_SYMBOL_GPL(scif_get_pages);
  286. int scif_put_pages(struct scif_range *pages)
  287. {
  288. struct scif_endpt *ep;
  289. struct scif_window *window;
  290. struct scifmsg msg;
  291. if (!pages || !pages->cookie)
  292. return -EINVAL;
  293. window = pages->cookie;
  294. if (!window || window->magic != SCIFEP_MAGIC)
  295. return -EINVAL;
  296. ep = (struct scif_endpt *)window->ep;
  297. /*
  298. * If the state is SCIFEP_CONNECTED or SCIFEP_DISCONNECTED then the
  299. * callee should be allowed to release references to the pages,
  300. * else the endpoint was not connected in the first place,
  301. * hence the ENOTCONN.
  302. */
  303. if (ep->state != SCIFEP_CONNECTED && ep->state != SCIFEP_DISCONNECTED)
  304. return -ENOTCONN;
  305. mutex_lock(&ep->rma_info.rma_lock);
  306. scif_put_window(window, pages->nr_pages);
  307. /* Initiate window destruction if ref count is zero */
  308. if (!window->ref_count) {
  309. list_del(&window->list);
  310. mutex_unlock(&ep->rma_info.rma_lock);
  311. scif_drain_dma_intr(ep->remote_dev->sdev,
  312. ep->rma_info.dma_chan);
  313. /* Inform the peer about this window being destroyed. */
  314. msg.uop = SCIF_MUNMAP;
  315. msg.src = ep->port;
  316. msg.payload[0] = window->peer_window;
  317. /* No error handling for notification messages */
  318. scif_nodeqp_send(ep->remote_dev, &msg);
  319. /* Destroy this window from the peer's registered AS */
  320. scif_destroy_remote_window(window);
  321. } else {
  322. mutex_unlock(&ep->rma_info.rma_lock);
  323. }
  324. scif_free(pages->phys_addr, pages->nr_pages * sizeof(dma_addr_t));
  325. scif_free(pages->va, pages->nr_pages * sizeof(void *));
  326. kfree(pages);
  327. return 0;
  328. }
  329. EXPORT_SYMBOL_GPL(scif_put_pages);
  330. /*
  331. * scif_rma_list_mmap:
  332. *
  333. * Traverse the remote registration list starting from start_window:
  334. * 1) Create VtoP mappings via remap_pfn_range(..)
  335. * 2) Once step 1) and 2) complete successfully then traverse the range of
  336. * windows again and bump the reference count.
  337. * RMA lock must be held.
  338. */
  339. static int scif_rma_list_mmap(struct scif_window *start_window, s64 offset,
  340. int nr_pages, struct vm_area_struct *vma)
  341. {
  342. s64 end_offset, loop_offset = offset;
  343. struct scif_window *window = start_window;
  344. int loop_nr_pages, nr_pages_left = nr_pages;
  345. struct scif_endpt *ep = (struct scif_endpt *)start_window->ep;
  346. struct list_head *head = &ep->rma_info.remote_reg_list;
  347. int i, err = 0;
  348. dma_addr_t phys_addr;
  349. struct scif_window_iter src_win_iter;
  350. size_t contig_bytes = 0;
  351. might_sleep();
  352. list_for_each_entry_from(window, head, list) {
  353. end_offset = window->offset +
  354. (window->nr_pages << PAGE_SHIFT);
  355. loop_nr_pages = min_t(int,
  356. (end_offset - loop_offset) >> PAGE_SHIFT,
  357. nr_pages_left);
  358. scif_init_window_iter(window, &src_win_iter);
  359. for (i = 0; i < loop_nr_pages; i++) {
  360. phys_addr = scif_off_to_dma_addr(window, loop_offset,
  361. &contig_bytes,
  362. &src_win_iter);
  363. phys_addr = scif_get_phys(phys_addr, ep);
  364. err = remap_pfn_range(vma,
  365. vma->vm_start +
  366. loop_offset - offset,
  367. phys_addr >> PAGE_SHIFT,
  368. PAGE_SIZE,
  369. vma->vm_page_prot);
  370. if (err)
  371. goto error;
  372. loop_offset += PAGE_SIZE;
  373. }
  374. nr_pages_left -= loop_nr_pages;
  375. if (!nr_pages_left)
  376. break;
  377. }
  378. /*
  379. * No more failures expected. Bump up the ref count for all
  380. * the windows. Another traversal from start_window required
  381. * for handling errors encountered across windows during
  382. * remap_pfn_range(..).
  383. */
  384. loop_offset = offset;
  385. nr_pages_left = nr_pages;
  386. window = start_window;
  387. head = &ep->rma_info.remote_reg_list;
  388. list_for_each_entry_from(window, head, list) {
  389. end_offset = window->offset +
  390. (window->nr_pages << PAGE_SHIFT);
  391. loop_nr_pages = min_t(int,
  392. (end_offset - loop_offset) >> PAGE_SHIFT,
  393. nr_pages_left);
  394. scif_get_window(window, loop_nr_pages);
  395. nr_pages_left -= loop_nr_pages;
  396. loop_offset += (loop_nr_pages << PAGE_SHIFT);
  397. if (!nr_pages_left)
  398. break;
  399. }
  400. error:
  401. if (err)
  402. dev_err(scif_info.mdev.this_device,
  403. "%s %d err %d\n", __func__, __LINE__, err);
  404. return err;
  405. }
  406. /*
  407. * scif_rma_list_munmap:
  408. *
  409. * Traverse the remote registration list starting from window:
  410. * 1) Decrement ref count.
  411. * 2) If the ref count drops to zero then send a SCIF_MUNMAP message to peer.
  412. * RMA lock must be held.
  413. */
  414. static void scif_rma_list_munmap(struct scif_window *start_window,
  415. s64 offset, int nr_pages)
  416. {
  417. struct scifmsg msg;
  418. s64 loop_offset = offset, end_offset;
  419. int loop_nr_pages, nr_pages_left = nr_pages;
  420. struct scif_endpt *ep = (struct scif_endpt *)start_window->ep;
  421. struct list_head *head = &ep->rma_info.remote_reg_list;
  422. struct scif_window *window = start_window, *_window;
  423. msg.uop = SCIF_MUNMAP;
  424. msg.src = ep->port;
  425. loop_offset = offset;
  426. nr_pages_left = nr_pages;
  427. list_for_each_entry_safe_from(window, _window, head, list) {
  428. end_offset = window->offset +
  429. (window->nr_pages << PAGE_SHIFT);
  430. loop_nr_pages = min_t(int,
  431. (end_offset - loop_offset) >> PAGE_SHIFT,
  432. nr_pages_left);
  433. scif_put_window(window, loop_nr_pages);
  434. if (!window->ref_count) {
  435. struct scif_dev *rdev = ep->remote_dev;
  436. scif_drain_dma_intr(rdev->sdev,
  437. ep->rma_info.dma_chan);
  438. /* Inform the peer about this munmap */
  439. msg.payload[0] = window->peer_window;
  440. /* No error handling for Notification messages. */
  441. scif_nodeqp_send(ep->remote_dev, &msg);
  442. list_del(&window->list);
  443. /* Destroy this window from the peer's registered AS */
  444. scif_destroy_remote_window(window);
  445. }
  446. nr_pages_left -= loop_nr_pages;
  447. loop_offset += (loop_nr_pages << PAGE_SHIFT);
  448. if (!nr_pages_left)
  449. break;
  450. }
  451. }
  452. /*
  453. * The private data field of each VMA used to mmap a remote window
  454. * points to an instance of struct vma_pvt
  455. */
  456. struct vma_pvt {
  457. struct scif_endpt *ep; /* End point for remote window */
  458. s64 offset; /* offset within remote window */
  459. bool valid_offset; /* offset is valid only if the original
  460. * mmap request was for a single page
  461. * else the offset within the vma is
  462. * the correct offset
  463. */
  464. struct kref ref;
  465. };
  466. static void vma_pvt_release(struct kref *ref)
  467. {
  468. struct vma_pvt *vmapvt = container_of(ref, struct vma_pvt, ref);
  469. kfree(vmapvt);
  470. }
  471. /**
  472. * scif_vma_open - VMA open driver callback
  473. * @vma: VMM memory area.
  474. * The open method is called by the kernel to allow the subsystem implementing
  475. * the VMA to initialize the area. This method is invoked any time a new
  476. * reference to the VMA is made (when a process forks, for example).
  477. * The one exception happens when the VMA is first created by mmap;
  478. * in this case, the driver's mmap method is called instead.
  479. * This function is also invoked when an existing VMA is split by the kernel
  480. * due to a call to munmap on a subset of the VMA resulting in two VMAs.
  481. * The kernel invokes this function only on one of the two VMAs.
  482. */
  483. static void scif_vma_open(struct vm_area_struct *vma)
  484. {
  485. struct vma_pvt *vmapvt = vma->vm_private_data;
  486. dev_dbg(scif_info.mdev.this_device,
  487. "SCIFAPI vma open: vma_start 0x%lx vma_end 0x%lx\n",
  488. vma->vm_start, vma->vm_end);
  489. scif_insert_vma(vmapvt->ep, vma);
  490. kref_get(&vmapvt->ref);
  491. }
  492. /**
  493. * scif_munmap - VMA close driver callback.
  494. * @vma: VMM memory area.
  495. * When an area is destroyed, the kernel calls its close operation.
  496. * Note that there's no usage count associated with VMA's; the area
  497. * is opened and closed exactly once by each process that uses it.
  498. */
  499. static void scif_munmap(struct vm_area_struct *vma)
  500. {
  501. struct scif_endpt *ep;
  502. struct vma_pvt *vmapvt = vma->vm_private_data;
  503. int nr_pages = vma_pages(vma);
  504. s64 offset;
  505. struct scif_rma_req req;
  506. struct scif_window *window = NULL;
  507. int err;
  508. might_sleep();
  509. dev_dbg(scif_info.mdev.this_device,
  510. "SCIFAPI munmap: vma_start 0x%lx vma_end 0x%lx\n",
  511. vma->vm_start, vma->vm_end);
  512. ep = vmapvt->ep;
  513. offset = vmapvt->valid_offset ? vmapvt->offset :
  514. (vma->vm_pgoff) << PAGE_SHIFT;
  515. dev_dbg(scif_info.mdev.this_device,
  516. "SCIFAPI munmap: ep %p nr_pages 0x%x offset 0x%llx\n",
  517. ep, nr_pages, offset);
  518. req.out_window = &window;
  519. req.offset = offset;
  520. req.nr_bytes = vma->vm_end - vma->vm_start;
  521. req.prot = vma->vm_flags & (VM_READ | VM_WRITE);
  522. req.type = SCIF_WINDOW_PARTIAL;
  523. req.head = &ep->rma_info.remote_reg_list;
  524. mutex_lock(&ep->rma_info.rma_lock);
  525. err = scif_query_window(&req);
  526. if (err)
  527. dev_err(scif_info.mdev.this_device,
  528. "%s %d err %d\n", __func__, __LINE__, err);
  529. else
  530. scif_rma_list_munmap(window, offset, nr_pages);
  531. mutex_unlock(&ep->rma_info.rma_lock);
  532. /*
  533. * The kernel probably zeroes these out but we still want
  534. * to clean up our own mess just in case.
  535. */
  536. vma->vm_ops = NULL;
  537. vma->vm_private_data = NULL;
  538. kref_put(&vmapvt->ref, vma_pvt_release);
  539. scif_delete_vma(ep, vma);
  540. }
  541. static const struct vm_operations_struct scif_vm_ops = {
  542. .open = scif_vma_open,
  543. .close = scif_munmap,
  544. };
  545. /**
  546. * scif_mmap - Map pages in virtual address space to a remote window.
  547. * @vma: VMM memory area.
  548. * @epd: endpoint descriptor
  549. *
  550. * Return: Upon successful completion, scif_mmap() returns zero
  551. * else an apt error is returned as documented in scif.h
  552. */
  553. int scif_mmap(struct vm_area_struct *vma, scif_epd_t epd)
  554. {
  555. struct scif_rma_req req;
  556. struct scif_window *window = NULL;
  557. struct scif_endpt *ep = (struct scif_endpt *)epd;
  558. s64 start_offset = vma->vm_pgoff << PAGE_SHIFT;
  559. int nr_pages = vma_pages(vma);
  560. int err;
  561. struct vma_pvt *vmapvt;
  562. dev_dbg(scif_info.mdev.this_device,
  563. "SCIFAPI mmap: ep %p start_offset 0x%llx nr_pages 0x%x\n",
  564. ep, start_offset, nr_pages);
  565. err = scif_verify_epd(ep);
  566. if (err)
  567. return err;
  568. might_sleep();
  569. err = scif_insert_vma(ep, vma);
  570. if (err)
  571. return err;
  572. vmapvt = kzalloc(sizeof(*vmapvt), GFP_KERNEL);
  573. if (!vmapvt) {
  574. scif_delete_vma(ep, vma);
  575. return -ENOMEM;
  576. }
  577. vmapvt->ep = ep;
  578. kref_init(&vmapvt->ref);
  579. req.out_window = &window;
  580. req.offset = start_offset;
  581. req.nr_bytes = vma->vm_end - vma->vm_start;
  582. req.prot = vma->vm_flags & (VM_READ | VM_WRITE);
  583. req.type = SCIF_WINDOW_PARTIAL;
  584. req.head = &ep->rma_info.remote_reg_list;
  585. mutex_lock(&ep->rma_info.rma_lock);
  586. /* Does a valid window exist? */
  587. err = scif_query_window(&req);
  588. if (err) {
  589. dev_err(&ep->remote_dev->sdev->dev,
  590. "%s %d err %d\n", __func__, __LINE__, err);
  591. goto error_unlock;
  592. }
  593. /* Default prot for loopback */
  594. if (!scifdev_self(ep->remote_dev))
  595. vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
  596. /*
  597. * VM_DONTCOPY - Do not copy this vma on fork
  598. * VM_DONTEXPAND - Cannot expand with mremap()
  599. * VM_RESERVED - Count as reserved_vm like IO
  600. * VM_PFNMAP - Page-ranges managed without "struct page"
  601. * VM_IO - Memory mapped I/O or similar
  602. *
  603. * We do not want to copy this VMA automatically on a fork(),
  604. * expand this VMA due to mremap() or swap out these pages since
  605. * the VMA is actually backed by physical pages in the remote
  606. * node's physical memory and not via a struct page.
  607. */
  608. vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP;
  609. if (!scifdev_self(ep->remote_dev))
  610. vma->vm_flags |= VM_IO | VM_PFNMAP;
  611. /* Map this range of windows */
  612. err = scif_rma_list_mmap(window, start_offset, nr_pages, vma);
  613. if (err) {
  614. dev_err(&ep->remote_dev->sdev->dev,
  615. "%s %d err %d\n", __func__, __LINE__, err);
  616. goto error_unlock;
  617. }
  618. /* Set up the driver call back */
  619. vma->vm_ops = &scif_vm_ops;
  620. vma->vm_private_data = vmapvt;
  621. error_unlock:
  622. mutex_unlock(&ep->rma_info.rma_lock);
  623. if (err) {
  624. kfree(vmapvt);
  625. dev_err(&ep->remote_dev->sdev->dev,
  626. "%s %d err %d\n", __func__, __LINE__, err);
  627. scif_delete_vma(ep, vma);
  628. }
  629. return err;
  630. }