scif_rma.h 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465
  1. /*
  2. * Intel MIC Platform Software Stack (MPSS)
  3. *
  4. * This file is provided under a dual BSD/GPLv2 license. When using or
  5. * redistributing this file, you may do so under either license.
  6. *
  7. * GPL LICENSE SUMMARY
  8. *
  9. * Copyright(c) 2015 Intel Corporation.
  10. *
  11. * This program is free software; you can redistribute it and/or modify
  12. * it under the terms of version 2 of the GNU General Public License as
  13. * published by the Free Software Foundation.
  14. *
  15. * This program is distributed in the hope that it will be useful, but
  16. * WITHOUT ANY WARRANTY; without even the implied warranty of
  17. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  18. * General Public License for more details.
  19. *
  20. * BSD LICENSE
  21. *
  22. * Copyright(c) 2015 Intel Corporation.
  23. *
  24. * Redistribution and use in source and binary forms, with or without
  25. * modification, are permitted provided that the following conditions
  26. * are met:
  27. *
  28. * * Redistributions of source code must retain the above copyright
  29. * notice, this list of conditions and the following disclaimer.
  30. * * Redistributions in binary form must reproduce the above copyright
  31. * notice, this list of conditions and the following disclaimer in
  32. * the documentation and/or other materials provided with the
  33. * distribution.
  34. * * Neither the name of Intel Corporation nor the names of its
  35. * contributors may be used to endorse or promote products derived
  36. * from this software without specific prior written permission.
  37. *
  38. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  39. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  40. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  41. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  42. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  43. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  44. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  45. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  46. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  47. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  48. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  49. *
  50. * Intel SCIF driver.
  51. *
  52. */
  53. #ifndef SCIF_RMA_H
  54. #define SCIF_RMA_H
  55. #include <linux/dma_remapping.h>
  56. #include <linux/mmu_notifier.h>
  57. #include "../bus/scif_bus.h"
  58. /* If this bit is set then the mark is a remote fence mark */
  59. #define SCIF_REMOTE_FENCE_BIT 31
  60. /* Magic value used to indicate a remote fence request */
  61. #define SCIF_REMOTE_FENCE BIT_ULL(SCIF_REMOTE_FENCE_BIT)
  62. #define SCIF_MAX_UNALIGNED_BUF_SIZE (1024 * 1024ULL)
  63. #define SCIF_KMEM_UNALIGNED_BUF_SIZE (SCIF_MAX_UNALIGNED_BUF_SIZE + \
  64. (L1_CACHE_BYTES << 1))
  65. #define SCIF_IOVA_START_PFN (1)
  66. #define SCIF_IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
  67. #define SCIF_DMA_64BIT_PFN SCIF_IOVA_PFN(DMA_BIT_MASK(64))
  68. #define SCIF_DMA_63BIT_PFN SCIF_IOVA_PFN(DMA_BIT_MASK(63))
  69. /*
  70. * struct scif_endpt_rma_info - Per Endpoint Remote Memory Access Information
  71. *
  72. * @reg_list: List of registration windows for self
  73. * @remote_reg_list: List of registration windows for peer
  74. * @iovad: Offset generator
  75. * @rma_lock: Synchronizes access to self/remote list and also protects the
  76. * window from being destroyed while RMAs are in progress.
  77. * @tc_lock: Synchronizes access to temporary cached windows list
  78. * for SCIF Registration Caching.
  79. * @mmn_lock: Synchronizes access to the list of MMU notifiers registered
  80. * @tw_refcount: Keeps track of number of outstanding temporary registered
  81. * windows created by scif_vreadfrom/scif_vwriteto which have
  82. * not been destroyed.
  83. * @tcw_refcount: Same as tw_refcount but for temporary cached windows
  84. * @tcw_total_pages: Same as tcw_refcount but in terms of pages pinned
  85. * @mmn_list: MMU notifier so that we can destroy the windows when required
  86. * @fence_refcount: Keeps track of number of outstanding remote fence
  87. * requests which have been received by the peer.
  88. * @dma_chan: DMA channel used for all DMA transfers for this endpoint.
  89. * @async_list_del: Detect asynchronous list entry deletion
  90. * @vma_list: List of vmas with remote memory mappings
  91. * @markwq: Wait queue used for scif_fence_mark/scif_fence_wait
  92. */
  93. struct scif_endpt_rma_info {
  94. struct list_head reg_list;
  95. struct list_head remote_reg_list;
  96. struct iova_domain iovad;
  97. struct mutex rma_lock;
  98. spinlock_t tc_lock;
  99. struct mutex mmn_lock;
  100. atomic_t tw_refcount;
  101. atomic_t tcw_refcount;
  102. atomic_t tcw_total_pages;
  103. struct list_head mmn_list;
  104. atomic_t fence_refcount;
  105. struct dma_chan *dma_chan;
  106. int async_list_del;
  107. struct list_head vma_list;
  108. wait_queue_head_t markwq;
  109. };
  110. /*
  111. * struct scif_fence_info - used for tracking fence requests
  112. *
  113. * @state: State of this transfer
  114. * @wq: Fences wait on this queue
  115. * @dma_mark: Used for storing the DMA mark
  116. */
  117. struct scif_fence_info {
  118. enum scif_msg_state state;
  119. struct completion comp;
  120. int dma_mark;
  121. };
  122. /*
  123. * struct scif_remote_fence_info - used for tracking remote fence requests
  124. *
  125. * @msg: List of SCIF node QP fence messages
  126. * @list: Link to list of remote fence requests
  127. */
  128. struct scif_remote_fence_info {
  129. struct scifmsg msg;
  130. struct list_head list;
  131. };
  132. /*
  133. * Specifies whether an RMA operation can span across partial windows, a single
  134. * window or multiple contiguous windows. Mmaps can span across partial windows.
  135. * Unregistration can span across complete windows. scif_get_pages() can span a
  136. * single window. A window can also be of type self or peer.
  137. */
  138. enum scif_window_type {
  139. SCIF_WINDOW_PARTIAL,
  140. SCIF_WINDOW_SINGLE,
  141. SCIF_WINDOW_FULL,
  142. SCIF_WINDOW_SELF,
  143. SCIF_WINDOW_PEER
  144. };
  145. /* The number of physical addresses that can be stored in a PAGE. */
  146. #define SCIF_NR_ADDR_IN_PAGE (0x1000 >> 3)
  147. /*
  148. * struct scif_rma_lookup - RMA lookup data structure for page list transfers
  149. *
  150. * Store an array of lookup offsets. Each offset in this array maps
  151. * one 4K page containing 512 physical addresses i.e. 2MB. 512 such
  152. * offsets in a 4K page will correspond to 1GB of registered address space.
  153. * @lookup: Array of offsets
  154. * @offset: DMA offset of lookup array
  155. */
  156. struct scif_rma_lookup {
  157. dma_addr_t *lookup;
  158. dma_addr_t offset;
  159. };
  160. /*
  161. * struct scif_pinned_pages - A set of pinned pages obtained with
  162. * scif_pin_pages() which could be part of multiple registered
  163. * windows across different end points.
  164. *
  165. * @nr_pages: Number of pages which is defined as a s64 instead of an int
  166. * to avoid sign extension with buffers >= 2GB
  167. * @prot: read/write protections
  168. * @map_flags: Flags specified during the pin operation
  169. * @ref_count: Reference count bumped in terms of number of pages
  170. * @magic: A magic value
  171. * @pages: Array of pointers to struct pages populated with get_user_pages(..)
  172. */
  173. struct scif_pinned_pages {
  174. s64 nr_pages;
  175. int prot;
  176. int map_flags;
  177. atomic_t ref_count;
  178. u64 magic;
  179. struct page **pages;
  180. };
  181. /*
  182. * struct scif_status - Stores DMA status update information
  183. *
  184. * @src_dma_addr: Source buffer DMA address
  185. * @val: src location for value to be written to the destination
  186. * @ep: SCIF endpoint
  187. */
  188. struct scif_status {
  189. dma_addr_t src_dma_addr;
  190. u64 val;
  191. struct scif_endpt *ep;
  192. };
  193. /*
  194. * struct scif_window - Registration Window for Self and Remote
  195. *
  196. * @nr_pages: Number of pages which is defined as a s64 instead of an int
  197. * to avoid sign extension with buffers >= 2GB
  198. * @nr_contig_chunks: Number of contiguous physical chunks
  199. * @prot: read/write protections
  200. * @ref_count: reference count in terms of number of pages
  201. * @magic: Cookie to detect corruption
  202. * @offset: registered offset
  203. * @va_for_temp: va address that this window represents
  204. * @dma_mark: Used to determine if all DMAs against the window are done
  205. * @ep: Pointer to EP. Useful for passing EP around with messages to
  206. avoid expensive list traversals.
  207. * @list: link to list of windows for the endpoint
  208. * @type: self or peer window
  209. * @peer_window: Pointer to peer window. Useful for sending messages to peer
  210. * without requiring an extra list traversal
  211. * @unreg_state: unregistration state
  212. * @offset_freed: True if the offset has been freed
  213. * @temp: True for temporary windows created via scif_vreadfrom/scif_vwriteto
  214. * @mm: memory descriptor for the task_struct which initiated the RMA
  215. * @st: scatter gather table for DMA mappings with IOMMU enabled
  216. * @pinned_pages: The set of pinned_pages backing this window
  217. * @alloc_handle: Handle for sending ALLOC_REQ
  218. * @regwq: Wait Queue for an registration (N)ACK
  219. * @reg_state: Registration state
  220. * @unregwq: Wait Queue for an unregistration (N)ACK
  221. * @dma_addr_lookup: Lookup for physical addresses used for DMA
  222. * @nr_lookup: Number of entries in lookup
  223. * @mapped_offset: Offset used to map the window by the peer
  224. * @dma_addr: Array of physical addresses used for Mgmt node & MIC initiated DMA
  225. * @num_pages: Array specifying number of pages for each physical address
  226. */
  227. struct scif_window {
  228. s64 nr_pages;
  229. int nr_contig_chunks;
  230. int prot;
  231. int ref_count;
  232. u64 magic;
  233. s64 offset;
  234. unsigned long va_for_temp;
  235. int dma_mark;
  236. u64 ep;
  237. struct list_head list;
  238. enum scif_window_type type;
  239. u64 peer_window;
  240. enum scif_msg_state unreg_state;
  241. bool offset_freed;
  242. bool temp;
  243. struct mm_struct *mm;
  244. struct sg_table *st;
  245. union {
  246. struct {
  247. struct scif_pinned_pages *pinned_pages;
  248. struct scif_allocmsg alloc_handle;
  249. wait_queue_head_t regwq;
  250. enum scif_msg_state reg_state;
  251. wait_queue_head_t unregwq;
  252. };
  253. struct {
  254. struct scif_rma_lookup dma_addr_lookup;
  255. struct scif_rma_lookup num_pages_lookup;
  256. int nr_lookup;
  257. dma_addr_t mapped_offset;
  258. };
  259. };
  260. dma_addr_t *dma_addr;
  261. u64 *num_pages;
  262. } __packed;
  263. /*
  264. * scif_mmu_notif - SCIF mmu notifier information
  265. *
  266. * @mmu_notifier ep_mmu_notifier: MMU notifier operations
  267. * @tc_reg_list: List of temp registration windows for self
  268. * @mm: memory descriptor for the task_struct which initiated the RMA
  269. * @ep: SCIF endpoint
  270. * @list: link to list of MMU notifier information
  271. */
  272. struct scif_mmu_notif {
  273. #ifdef CONFIG_MMU_NOTIFIER
  274. struct mmu_notifier ep_mmu_notifier;
  275. #endif
  276. struct list_head tc_reg_list;
  277. struct mm_struct *mm;
  278. struct scif_endpt *ep;
  279. struct list_head list;
  280. };
  281. enum scif_rma_dir {
  282. SCIF_LOCAL_TO_REMOTE,
  283. SCIF_REMOTE_TO_LOCAL
  284. };
  285. extern struct kmem_cache *unaligned_cache;
  286. /* Initialize RMA for this EP */
  287. void scif_rma_ep_init(struct scif_endpt *ep);
  288. /* Check if epd can be uninitialized */
  289. int scif_rma_ep_can_uninit(struct scif_endpt *ep);
  290. /* Obtain a new offset. Callee must grab RMA lock */
  291. int scif_get_window_offset(struct scif_endpt *ep, int flags,
  292. s64 offset, int nr_pages, s64 *out_offset);
  293. /* Free offset. Callee must grab RMA lock */
  294. void scif_free_window_offset(struct scif_endpt *ep,
  295. struct scif_window *window, s64 offset);
  296. /* Create self registration window */
  297. struct scif_window *scif_create_window(struct scif_endpt *ep, int nr_pages,
  298. s64 offset, bool temp);
  299. /* Destroy self registration window.*/
  300. int scif_destroy_window(struct scif_endpt *ep, struct scif_window *window);
  301. void scif_unmap_window(struct scif_dev *remote_dev, struct scif_window *window);
  302. /* Map pages of self window to Aperture/PCI */
  303. int scif_map_window(struct scif_dev *remote_dev,
  304. struct scif_window *window);
  305. /* Unregister a self window */
  306. int scif_unregister_window(struct scif_window *window);
  307. /* Destroy remote registration window */
  308. void
  309. scif_destroy_remote_window(struct scif_window *window);
  310. /* remove valid remote memory mappings from process address space */
  311. void scif_zap_mmaps(int node);
  312. /* Query if any applications have remote memory mappings */
  313. bool scif_rma_do_apps_have_mmaps(int node);
  314. /* Cleanup remote registration lists for zombie endpoints */
  315. void scif_cleanup_rma_for_zombies(int node);
  316. /* Reserve a DMA channel for a particular endpoint */
  317. int scif_reserve_dma_chan(struct scif_endpt *ep);
  318. /* Setup a DMA mark for an endpoint */
  319. int _scif_fence_mark(scif_epd_t epd, int *mark);
  320. int scif_prog_signal(scif_epd_t epd, off_t offset, u64 val,
  321. enum scif_window_type type);
  322. void scif_alloc_req(struct scif_dev *scifdev, struct scifmsg *msg);
  323. void scif_alloc_gnt_rej(struct scif_dev *scifdev, struct scifmsg *msg);
  324. void scif_free_virt(struct scif_dev *scifdev, struct scifmsg *msg);
  325. void scif_recv_reg(struct scif_dev *scifdev, struct scifmsg *msg);
  326. void scif_recv_unreg(struct scif_dev *scifdev, struct scifmsg *msg);
  327. void scif_recv_reg_ack(struct scif_dev *scifdev, struct scifmsg *msg);
  328. void scif_recv_reg_nack(struct scif_dev *scifdev, struct scifmsg *msg);
  329. void scif_recv_unreg_ack(struct scif_dev *scifdev, struct scifmsg *msg);
  330. void scif_recv_unreg_nack(struct scif_dev *scifdev, struct scifmsg *msg);
  331. void scif_recv_munmap(struct scif_dev *scifdev, struct scifmsg *msg);
  332. void scif_recv_mark(struct scif_dev *scifdev, struct scifmsg *msg);
  333. void scif_recv_mark_resp(struct scif_dev *scifdev, struct scifmsg *msg);
  334. void scif_recv_wait(struct scif_dev *scifdev, struct scifmsg *msg);
  335. void scif_recv_wait_resp(struct scif_dev *scifdev, struct scifmsg *msg);
  336. void scif_recv_sig_local(struct scif_dev *scifdev, struct scifmsg *msg);
  337. void scif_recv_sig_remote(struct scif_dev *scifdev, struct scifmsg *msg);
  338. void scif_recv_sig_resp(struct scif_dev *scifdev, struct scifmsg *msg);
  339. void scif_mmu_notif_handler(struct work_struct *work);
  340. void scif_rma_handle_remote_fences(void);
  341. void scif_rma_destroy_windows(void);
  342. void scif_rma_destroy_tcw_invalid(void);
  343. int scif_drain_dma_intr(struct scif_hw_dev *sdev, struct dma_chan *chan);
  344. struct scif_window_iter {
  345. s64 offset;
  346. int index;
  347. };
  348. static inline void
  349. scif_init_window_iter(struct scif_window *window, struct scif_window_iter *iter)
  350. {
  351. iter->offset = window->offset;
  352. iter->index = 0;
  353. }
  354. dma_addr_t scif_off_to_dma_addr(struct scif_window *window, s64 off,
  355. size_t *nr_bytes,
  356. struct scif_window_iter *iter);
  357. static inline
  358. dma_addr_t __scif_off_to_dma_addr(struct scif_window *window, s64 off)
  359. {
  360. return scif_off_to_dma_addr(window, off, NULL, NULL);
  361. }
  362. static inline bool scif_unaligned(off_t src_offset, off_t dst_offset)
  363. {
  364. src_offset = src_offset & (L1_CACHE_BYTES - 1);
  365. dst_offset = dst_offset & (L1_CACHE_BYTES - 1);
  366. return !(src_offset == dst_offset);
  367. }
  368. /*
  369. * scif_zalloc:
  370. * @size: Size of the allocation request.
  371. *
  372. * Helper API which attempts to allocate zeroed pages via
  373. * __get_free_pages(..) first and then falls back on
  374. * vzalloc(..) if that fails.
  375. */
  376. static inline void *scif_zalloc(size_t size)
  377. {
  378. void *ret = NULL;
  379. size_t align = ALIGN(size, PAGE_SIZE);
  380. if (align && get_order(align) < MAX_ORDER)
  381. ret = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
  382. get_order(align));
  383. return ret ? ret : vzalloc(align);
  384. }
  385. /*
  386. * scif_free:
  387. * @addr: Address to be freed.
  388. * @size: Size of the allocation.
  389. * Helper API which frees memory allocated via scif_zalloc().
  390. */
  391. static inline void scif_free(void *addr, size_t size)
  392. {
  393. size_t align = ALIGN(size, PAGE_SIZE);
  394. if (is_vmalloc_addr(addr))
  395. vfree(addr);
  396. else
  397. free_pages((unsigned long)addr, get_order(align));
  398. }
  399. static inline void scif_get_window(struct scif_window *window, int nr_pages)
  400. {
  401. window->ref_count += nr_pages;
  402. }
  403. static inline void scif_put_window(struct scif_window *window, int nr_pages)
  404. {
  405. window->ref_count -= nr_pages;
  406. }
  407. static inline void scif_set_window_ref(struct scif_window *window, int nr_pages)
  408. {
  409. window->ref_count = nr_pages;
  410. }
  411. static inline void
  412. scif_queue_for_cleanup(struct scif_window *window, struct list_head *list)
  413. {
  414. spin_lock(&scif_info.rmalock);
  415. list_add_tail(&window->list, list);
  416. spin_unlock(&scif_info.rmalock);
  417. schedule_work(&scif_info.misc_work);
  418. }
  419. static inline void __scif_rma_destroy_tcw_helper(struct scif_window *window)
  420. {
  421. list_del_init(&window->list);
  422. scif_queue_for_cleanup(window, &scif_info.rma_tc);
  423. }
  424. static inline bool scif_is_iommu_enabled(void)
  425. {
  426. #ifdef CONFIG_INTEL_IOMMU
  427. return intel_iommu_enabled;
  428. #else
  429. return false;
  430. #endif
  431. }
  432. #endif /* SCIF_RMA_H */