scif.h 59 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340
  1. /*
  2. * Intel MIC Platform Software Stack (MPSS)
  3. *
  4. * This file is provided under a dual BSD/GPLv2 license. When using or
  5. * redistributing this file, you may do so under either license.
  6. *
  7. * GPL LICENSE SUMMARY
  8. *
  9. * Copyright(c) 2014 Intel Corporation.
  10. *
  11. * This program is free software; you can redistribute it and/or modify
  12. * it under the terms of version 2 of the GNU General Public License as
  13. * published by the Free Software Foundation.
  14. *
  15. * This program is distributed in the hope that it will be useful, but
  16. * WITHOUT ANY WARRANTY; without even the implied warranty of
  17. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  18. * General Public License for more details.
  19. *
  20. * BSD LICENSE
  21. *
  22. * Copyright(c) 2014 Intel Corporation.
  23. *
  24. * Redistribution and use in source and binary forms, with or without
  25. * modification, are permitted provided that the following conditions
  26. * are met:
  27. *
  28. * * Redistributions of source code must retain the above copyright
  29. * notice, this list of conditions and the following disclaimer.
  30. * * Redistributions in binary form must reproduce the above copyright
  31. * notice, this list of conditions and the following disclaimer in
  32. * the documentation and/or other materials provided with the
  33. * distribution.
  34. * * Neither the name of Intel Corporation nor the names of its
  35. * contributors may be used to endorse or promote products derived
  36. * from this software without specific prior written permission.
  37. *
  38. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  39. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  40. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  41. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  42. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  43. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  44. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  45. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  46. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  47. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  48. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  49. *
  50. * Intel SCIF driver.
  51. *
  52. */
  53. #ifndef __SCIF_H__
  54. #define __SCIF_H__
  55. #include <linux/types.h>
  56. #include <linux/poll.h>
  57. #include <linux/device.h>
  58. #include <linux/scif_ioctl.h>
  59. #define SCIF_ACCEPT_SYNC 1
  60. #define SCIF_SEND_BLOCK 1
  61. #define SCIF_RECV_BLOCK 1
  62. enum {
  63. SCIF_PROT_READ = (1 << 0),
  64. SCIF_PROT_WRITE = (1 << 1)
  65. };
  66. enum {
  67. SCIF_MAP_FIXED = 0x10,
  68. SCIF_MAP_KERNEL = 0x20,
  69. };
  70. enum {
  71. SCIF_FENCE_INIT_SELF = (1 << 0),
  72. SCIF_FENCE_INIT_PEER = (1 << 1),
  73. SCIF_SIGNAL_LOCAL = (1 << 4),
  74. SCIF_SIGNAL_REMOTE = (1 << 5)
  75. };
  76. enum {
  77. SCIF_RMA_USECPU = (1 << 0),
  78. SCIF_RMA_USECACHE = (1 << 1),
  79. SCIF_RMA_SYNC = (1 << 2),
  80. SCIF_RMA_ORDERED = (1 << 3)
  81. };
  82. /* End of SCIF Admin Reserved Ports */
  83. #define SCIF_ADMIN_PORT_END 1024
  84. /* End of SCIF Reserved Ports */
  85. #define SCIF_PORT_RSVD 1088
  86. typedef struct scif_endpt *scif_epd_t;
  87. typedef struct scif_pinned_pages *scif_pinned_pages_t;
  88. /**
  89. * struct scif_range - SCIF registered range used in kernel mode
  90. * @cookie: cookie used internally by SCIF
  91. * @nr_pages: number of pages of PAGE_SIZE
  92. * @prot_flags: R/W protection
  93. * @phys_addr: Array of bus addresses
  94. * @va: Array of kernel virtual addresses backed by the pages in the phys_addr
  95. * array. The va is populated only when called on the host for a remote
  96. * SCIF connection on MIC. This is required to support the use case of DMA
  97. * between MIC and another device which is not a SCIF node e.g., an IB or
  98. * ethernet NIC.
  99. */
  100. struct scif_range {
  101. void *cookie;
  102. int nr_pages;
  103. int prot_flags;
  104. dma_addr_t *phys_addr;
  105. void __iomem **va;
  106. };
  107. /**
  108. * struct scif_pollepd - SCIF endpoint to be monitored via scif_poll
  109. * @epd: SCIF endpoint
  110. * @events: requested events
  111. * @revents: returned events
  112. */
  113. struct scif_pollepd {
  114. scif_epd_t epd;
  115. short events;
  116. short revents;
  117. };
  118. /**
  119. * scif_peer_dev - representation of a peer SCIF device
  120. *
  121. * Peer devices show up as PCIe devices for the mgmt node but not the cards.
  122. * The mgmt node discovers all the cards on the PCIe bus and informs the other
  123. * cards about their peers. Upon notification of a peer a node adds a peer
  124. * device to the peer bus to maintain symmetry in the way devices are
  125. * discovered across all nodes in the SCIF network.
  126. *
  127. * @dev: underlying device
  128. * @dnode - The destination node which this device will communicate with.
  129. */
  130. struct scif_peer_dev {
  131. struct device dev;
  132. u8 dnode;
  133. };
  134. /**
  135. * scif_client - representation of a SCIF client
  136. * @name: client name
  137. * @probe - client method called when a peer device is registered
  138. * @remove - client method called when a peer device is unregistered
  139. * @si - subsys_interface used internally for implementing SCIF clients
  140. */
  141. struct scif_client {
  142. const char *name;
  143. void (*probe)(struct scif_peer_dev *spdev);
  144. void (*remove)(struct scif_peer_dev *spdev);
  145. struct subsys_interface si;
  146. };
  147. #define SCIF_OPEN_FAILED ((scif_epd_t)-1)
  148. #define SCIF_REGISTER_FAILED ((off_t)-1)
  149. #define SCIF_MMAP_FAILED ((void *)-1)
  150. /**
  151. * scif_open() - Create an endpoint
  152. *
  153. * Return:
  154. * Upon successful completion, scif_open() returns an endpoint descriptor to
  155. * be used in subsequent SCIF functions calls to refer to that endpoint;
  156. * otherwise in user mode SCIF_OPEN_FAILED (that is ((scif_epd_t)-1)) is
  157. * returned and errno is set to indicate the error; in kernel mode a NULL
  158. * scif_epd_t is returned.
  159. *
  160. * Errors:
  161. * ENOMEM - Insufficient kernel memory was available
  162. */
  163. scif_epd_t scif_open(void);
  164. /**
  165. * scif_bind() - Bind an endpoint to a port
  166. * @epd: endpoint descriptor
  167. * @pn: port number
  168. *
  169. * scif_bind() binds endpoint epd to port pn, where pn is a port number on the
  170. * local node. If pn is zero, a port number greater than or equal to
  171. * SCIF_PORT_RSVD is assigned and returned. Each endpoint may be bound to
  172. * exactly one local port. Ports less than 1024 when requested can only be bound
  173. * by system (or root) processes or by processes executed by privileged users.
  174. *
  175. * Return:
  176. * Upon successful completion, scif_bind() returns the port number to which epd
  177. * is bound; otherwise in user mode -1 is returned and errno is set to
  178. * indicate the error; in kernel mode the negative of one of the following
  179. * errors is returned.
  180. *
  181. * Errors:
  182. * EBADF, ENOTTY - epd is not a valid endpoint descriptor
  183. * EINVAL - the endpoint or the port is already bound
  184. * EISCONN - The endpoint is already connected
  185. * ENOSPC - No port number available for assignment
  186. * EACCES - The port requested is protected and the user is not the superuser
  187. */
  188. int scif_bind(scif_epd_t epd, u16 pn);
  189. /**
  190. * scif_listen() - Listen for connections on an endpoint
  191. * @epd: endpoint descriptor
  192. * @backlog: maximum pending connection requests
  193. *
  194. * scif_listen() marks the endpoint epd as a listening endpoint - that is, as
  195. * an endpoint that will be used to accept incoming connection requests. Once
  196. * so marked, the endpoint is said to be in the listening state and may not be
  197. * used as the endpoint of a connection.
  198. *
  199. * The endpoint, epd, must have been bound to a port.
  200. *
  201. * The backlog argument defines the maximum length to which the queue of
  202. * pending connections for epd may grow. If a connection request arrives when
  203. * the queue is full, the client may receive an error with an indication that
  204. * the connection was refused.
  205. *
  206. * Return:
  207. * Upon successful completion, scif_listen() returns 0; otherwise in user mode
  208. * -1 is returned and errno is set to indicate the error; in kernel mode the
  209. * negative of one of the following errors is returned.
  210. *
  211. * Errors:
  212. * EBADF, ENOTTY - epd is not a valid endpoint descriptor
  213. * EINVAL - the endpoint is not bound to a port
  214. * EISCONN - The endpoint is already connected or listening
  215. */
  216. int scif_listen(scif_epd_t epd, int backlog);
  217. /**
  218. * scif_connect() - Initiate a connection on a port
  219. * @epd: endpoint descriptor
  220. * @dst: global id of port to which to connect
  221. *
  222. * The scif_connect() function requests the connection of endpoint epd to remote
  223. * port dst. If the connection is successful, a peer endpoint, bound to dst, is
  224. * created on node dst.node. On successful return, the connection is complete.
  225. *
  226. * If the endpoint epd has not already been bound to a port, scif_connect()
  227. * will bind it to an unused local port.
  228. *
  229. * A connection is terminated when an endpoint of the connection is closed,
  230. * either explicitly by scif_close(), or when a process that owns one of the
  231. * endpoints of the connection is terminated.
  232. *
  233. * In user space, scif_connect() supports an asynchronous connection mode
  234. * if the application has set the O_NONBLOCK flag on the endpoint via the
  235. * fcntl() system call. Setting this flag will result in the calling process
  236. * not to wait during scif_connect().
  237. *
  238. * Return:
  239. * Upon successful completion, scif_connect() returns the port ID to which the
  240. * endpoint, epd, is bound; otherwise in user mode -1 is returned and errno is
  241. * set to indicate the error; in kernel mode the negative of one of the
  242. * following errors is returned.
  243. *
  244. * Errors:
  245. * EBADF, ENOTTY - epd is not a valid endpoint descriptor
  246. * ECONNREFUSED - The destination was not listening for connections or refused
  247. * the connection request
  248. * EINVAL - dst.port is not a valid port ID
  249. * EISCONN - The endpoint is already connected
  250. * ENOMEM - No buffer space is available
  251. * ENODEV - The destination node does not exist, or the node is lost or existed,
  252. * but is not currently in the network since it may have crashed
  253. * ENOSPC - No port number available for assignment
  254. * EOPNOTSUPP - The endpoint is listening and cannot be connected
  255. */
  256. int scif_connect(scif_epd_t epd, struct scif_port_id *dst);
  257. /**
  258. * scif_accept() - Accept a connection on an endpoint
  259. * @epd: endpoint descriptor
  260. * @peer: global id of port to which connected
  261. * @newepd: new connected endpoint descriptor
  262. * @flags: flags
  263. *
  264. * The scif_accept() call extracts the first connection request from the queue
  265. * of pending connections for the port on which epd is listening. scif_accept()
  266. * creates a new endpoint, bound to the same port as epd, and allocates a new
  267. * SCIF endpoint descriptor, returned in newepd, for the endpoint. The new
  268. * endpoint is connected to the endpoint through which the connection was
  269. * requested. epd is unaffected by this call, and remains in the listening
  270. * state.
  271. *
  272. * On successful return, peer holds the global port identifier (node id and
  273. * local port number) of the port which requested the connection.
  274. *
  275. * A connection is terminated when an endpoint of the connection is closed,
  276. * either explicitly by scif_close(), or when a process that owns one of the
  277. * endpoints of the connection is terminated.
  278. *
  279. * The number of connections that can (subsequently) be accepted on epd is only
  280. * limited by system resources (memory).
  281. *
  282. * The flags argument is formed by OR'ing together zero or more of the
  283. * following values.
  284. * SCIF_ACCEPT_SYNC - block until a connection request is presented. If
  285. * SCIF_ACCEPT_SYNC is not in flags, and no pending
  286. * connections are present on the queue, scif_accept()
  287. * fails with an EAGAIN error
  288. *
  289. * In user mode, the select() and poll() functions can be used to determine
  290. * when there is a connection request. In kernel mode, the scif_poll()
  291. * function may be used for this purpose. A readable event will be delivered
  292. * when a connection is requested.
  293. *
  294. * Return:
  295. * Upon successful completion, scif_accept() returns 0; otherwise in user mode
  296. * -1 is returned and errno is set to indicate the error; in kernel mode the
  297. * negative of one of the following errors is returned.
  298. *
  299. * Errors:
  300. * EAGAIN - SCIF_ACCEPT_SYNC is not set and no connections are present to be
  301. * accepted or SCIF_ACCEPT_SYNC is not set and remote node failed to complete
  302. * its connection request
  303. * EBADF, ENOTTY - epd is not a valid endpoint descriptor
  304. * EINTR - Interrupted function
  305. * EINVAL - epd is not a listening endpoint, or flags is invalid, or peer is
  306. * NULL, or newepd is NULL
  307. * ENODEV - The requesting node is lost or existed, but is not currently in the
  308. * network since it may have crashed
  309. * ENOMEM - Not enough space
  310. * ENOENT - Secondary part of epd registration failed
  311. */
  312. int scif_accept(scif_epd_t epd, struct scif_port_id *peer, scif_epd_t
  313. *newepd, int flags);
  314. /**
  315. * scif_close() - Close an endpoint
  316. * @epd: endpoint descriptor
  317. *
  318. * scif_close() closes an endpoint and performs necessary teardown of
  319. * facilities associated with that endpoint.
  320. *
  321. * If epd is a listening endpoint then it will no longer accept connection
  322. * requests on the port to which it is bound. Any pending connection requests
  323. * are rejected.
  324. *
  325. * If epd is a connected endpoint, then its peer endpoint is also closed. RMAs
  326. * which are in-process through epd or its peer endpoint will complete before
  327. * scif_close() returns. Registered windows of the local and peer endpoints are
  328. * released as if scif_unregister() was called against each window.
  329. *
  330. * Closing a SCIF endpoint does not affect local registered memory mapped by
  331. * a SCIF endpoint on a remote node. The local memory remains mapped by the peer
  332. * SCIF endpoint explicitly removed by calling munmap(..) by the peer.
  333. *
  334. * If the peer endpoint's receive queue is not empty at the time that epd is
  335. * closed, then the peer endpoint can be passed as the endpoint parameter to
  336. * scif_recv() until the receive queue is empty.
  337. *
  338. * epd is freed and may no longer be accessed.
  339. *
  340. * Return:
  341. * Upon successful completion, scif_close() returns 0; otherwise in user mode
  342. * -1 is returned and errno is set to indicate the error; in kernel mode the
  343. * negative of one of the following errors is returned.
  344. *
  345. * Errors:
  346. * EBADF, ENOTTY - epd is not a valid endpoint descriptor
  347. */
  348. int scif_close(scif_epd_t epd);
  349. /**
  350. * scif_send() - Send a message
  351. * @epd: endpoint descriptor
  352. * @msg: message buffer address
  353. * @len: message length
  354. * @flags: blocking mode flags
  355. *
  356. * scif_send() sends data to the peer of endpoint epd. Up to len bytes of data
  357. * are copied from memory starting at address msg. On successful execution the
  358. * return value of scif_send() is the number of bytes that were sent, and is
  359. * zero if no bytes were sent because len was zero. scif_send() may be called
  360. * only when the endpoint is in a connected state.
  361. *
  362. * If a scif_send() call is non-blocking, then it sends only those bytes which
  363. * can be sent without waiting, up to a maximum of len bytes.
  364. *
  365. * If a scif_send() call is blocking, then it normally returns after sending
  366. * all len bytes. If a blocking call is interrupted or the connection is
  367. * reset, the call is considered successful if some bytes were sent or len is
  368. * zero, otherwise the call is considered unsuccessful.
  369. *
  370. * In user mode, the select() and poll() functions can be used to determine
  371. * when the send queue is not full. In kernel mode, the scif_poll() function
  372. * may be used for this purpose.
  373. *
  374. * It is recommended that scif_send()/scif_recv() only be used for short
  375. * control-type message communication between SCIF endpoints. The SCIF RMA
  376. * APIs are expected to provide better performance for transfer sizes of
  377. * 1024 bytes or longer for the current MIC hardware and software
  378. * implementation.
  379. *
  380. * scif_send() will block until the entire message is sent if SCIF_SEND_BLOCK
  381. * is passed as the flags argument.
  382. *
  383. * Return:
  384. * Upon successful completion, scif_send() returns the number of bytes sent;
  385. * otherwise in user mode -1 is returned and errno is set to indicate the
  386. * error; in kernel mode the negative of one of the following errors is
  387. * returned.
  388. *
  389. * Errors:
  390. * EBADF, ENOTTY - epd is not a valid endpoint descriptor
  391. * ECONNRESET - Connection reset by peer
  392. * EINVAL - flags is invalid, or len is negative
  393. * ENODEV - The remote node is lost or existed, but is not currently in the
  394. * network since it may have crashed
  395. * ENOMEM - Not enough space
  396. * ENOTCONN - The endpoint is not connected
  397. */
  398. int scif_send(scif_epd_t epd, void *msg, int len, int flags);
  399. /**
  400. * scif_recv() - Receive a message
  401. * @epd: endpoint descriptor
  402. * @msg: message buffer address
  403. * @len: message buffer length
  404. * @flags: blocking mode flags
  405. *
  406. * scif_recv() receives data from the peer of endpoint epd. Up to len bytes of
  407. * data are copied to memory starting at address msg. On successful execution
  408. * the return value of scif_recv() is the number of bytes that were received,
  409. * and is zero if no bytes were received because len was zero. scif_recv() may
  410. * be called only when the endpoint is in a connected state.
  411. *
  412. * If a scif_recv() call is non-blocking, then it receives only those bytes
  413. * which can be received without waiting, up to a maximum of len bytes.
  414. *
  415. * If a scif_recv() call is blocking, then it normally returns after receiving
  416. * all len bytes. If the blocking call was interrupted due to a disconnection,
  417. * subsequent calls to scif_recv() will copy all bytes received upto the point
  418. * of disconnection.
  419. *
  420. * In user mode, the select() and poll() functions can be used to determine
  421. * when data is available to be received. In kernel mode, the scif_poll()
  422. * function may be used for this purpose.
  423. *
  424. * It is recommended that scif_send()/scif_recv() only be used for short
  425. * control-type message communication between SCIF endpoints. The SCIF RMA
  426. * APIs are expected to provide better performance for transfer sizes of
  427. * 1024 bytes or longer for the current MIC hardware and software
  428. * implementation.
  429. *
  430. * scif_recv() will block until the entire message is received if
  431. * SCIF_RECV_BLOCK is passed as the flags argument.
  432. *
  433. * Return:
  434. * Upon successful completion, scif_recv() returns the number of bytes
  435. * received; otherwise in user mode -1 is returned and errno is set to
  436. * indicate the error; in kernel mode the negative of one of the following
  437. * errors is returned.
  438. *
  439. * Errors:
  440. * EAGAIN - The destination node is returning from a low power state
  441. * EBADF, ENOTTY - epd is not a valid endpoint descriptor
  442. * ECONNRESET - Connection reset by peer
  443. * EINVAL - flags is invalid, or len is negative
  444. * ENODEV - The remote node is lost or existed, but is not currently in the
  445. * network since it may have crashed
  446. * ENOMEM - Not enough space
  447. * ENOTCONN - The endpoint is not connected
  448. */
  449. int scif_recv(scif_epd_t epd, void *msg, int len, int flags);
  450. /**
  451. * scif_register() - Mark a memory region for remote access.
  452. * @epd: endpoint descriptor
  453. * @addr: starting virtual address
  454. * @len: length of range
  455. * @offset: offset of window
  456. * @prot_flags: read/write protection flags
  457. * @map_flags: mapping flags
  458. *
  459. * The scif_register() function opens a window, a range of whole pages of the
  460. * registered address space of the endpoint epd, starting at offset po and
  461. * continuing for len bytes. The value of po, further described below, is a
  462. * function of the parameters offset and len, and the value of map_flags. Each
  463. * page of the window represents the physical memory page which backs the
  464. * corresponding page of the range of virtual address pages starting at addr
  465. * and continuing for len bytes. addr and len are constrained to be multiples
  466. * of the page size. A successful scif_register() call returns po.
  467. *
  468. * When SCIF_MAP_FIXED is set in the map_flags argument, po will be offset
  469. * exactly, and offset is constrained to be a multiple of the page size. The
  470. * mapping established by scif_register() will not replace any existing
  471. * registration; an error is returned if any page within the range [offset,
  472. * offset + len - 1] intersects an existing window.
  473. *
  474. * When SCIF_MAP_FIXED is not set, the implementation uses offset in an
  475. * implementation-defined manner to arrive at po. The po value so chosen will
  476. * be an area of the registered address space that the implementation deems
  477. * suitable for a mapping of len bytes. An offset value of 0 is interpreted as
  478. * granting the implementation complete freedom in selecting po, subject to
  479. * constraints described below. A non-zero value of offset is taken to be a
  480. * suggestion of an offset near which the mapping should be placed. When the
  481. * implementation selects a value for po, it does not replace any extant
  482. * window. In all cases, po will be a multiple of the page size.
  483. *
  484. * The physical pages which are so represented by a window are available for
  485. * access in calls to mmap(), scif_readfrom(), scif_writeto(),
  486. * scif_vreadfrom(), and scif_vwriteto(). While a window is registered, the
  487. * physical pages represented by the window will not be reused by the memory
  488. * subsystem for any other purpose. Note that the same physical page may be
  489. * represented by multiple windows.
  490. *
  491. * Subsequent operations which change the memory pages to which virtual
  492. * addresses are mapped (such as mmap(), munmap()) have no effect on
  493. * existing window.
  494. *
  495. * If the process will fork(), it is recommended that the registered
  496. * virtual address range be marked with MADV_DONTFORK. Doing so will prevent
  497. * problems due to copy-on-write semantics.
  498. *
  499. * The prot_flags argument is formed by OR'ing together one or more of the
  500. * following values.
  501. * SCIF_PROT_READ - allow read operations from the window
  502. * SCIF_PROT_WRITE - allow write operations to the window
  503. *
  504. * Return:
  505. * Upon successful completion, scif_register() returns the offset at which the
  506. * mapping was placed (po); otherwise in user mode SCIF_REGISTER_FAILED (that
  507. * is (off_t *)-1) is returned and errno is set to indicate the error; in
  508. * kernel mode the negative of one of the following errors is returned.
  509. *
  510. * Errors:
  511. * EADDRINUSE - SCIF_MAP_FIXED is set in map_flags, and pages in the range
  512. * [offset, offset + len -1] are already registered
  513. * EAGAIN - The mapping could not be performed due to lack of resources
  514. * EBADF, ENOTTY - epd is not a valid endpoint descriptor
  515. * ECONNRESET - Connection reset by peer
  516. * EINVAL - map_flags is invalid, or prot_flags is invalid, or SCIF_MAP_FIXED is
  517. * set in flags, and offset is not a multiple of the page size, or addr is not a
  518. * multiple of the page size, or len is not a multiple of the page size, or is
  519. * 0, or offset is negative
  520. * ENODEV - The remote node is lost or existed, but is not currently in the
  521. * network since it may have crashed
  522. * ENOMEM - Not enough space
  523. * ENOTCONN -The endpoint is not connected
  524. */
  525. off_t scif_register(scif_epd_t epd, void *addr, size_t len, off_t offset,
  526. int prot_flags, int map_flags);
  527. /**
  528. * scif_unregister() - Mark a memory region for remote access.
  529. * @epd: endpoint descriptor
  530. * @offset: start of range to unregister
  531. * @len: length of range to unregister
  532. *
  533. * The scif_unregister() function closes those previously registered windows
  534. * which are entirely within the range [offset, offset + len - 1]. It is an
  535. * error to specify a range which intersects only a subrange of a window.
  536. *
  537. * On a successful return, pages within the window may no longer be specified
  538. * in calls to mmap(), scif_readfrom(), scif_writeto(), scif_vreadfrom(),
  539. * scif_vwriteto(), scif_get_pages, and scif_fence_signal(). The window,
  540. * however, continues to exist until all previous references against it are
  541. * removed. A window is referenced if there is a mapping to it created by
  542. * mmap(), or if scif_get_pages() was called against the window
  543. * (and the pages have not been returned via scif_put_pages()). A window is
  544. * also referenced while an RMA, in which some range of the window is a source
  545. * or destination, is in progress. Finally a window is referenced while some
  546. * offset in that window was specified to scif_fence_signal(), and the RMAs
  547. * marked by that call to scif_fence_signal() have not completed. While a
  548. * window is in this state, its registered address space pages are not
  549. * available for use in a new registered window.
  550. *
  551. * When all such references to the window have been removed, its references to
  552. * all the physical pages which it represents are removed. Similarly, the
  553. * registered address space pages of the window become available for
  554. * registration in a new window.
  555. *
  556. * Return:
  557. * Upon successful completion, scif_unregister() returns 0; otherwise in user
  558. * mode -1 is returned and errno is set to indicate the error; in kernel mode
  559. * the negative of one of the following errors is returned. In the event of an
  560. * error, no windows are unregistered.
  561. *
  562. * Errors:
  563. * EBADF, ENOTTY - epd is not a valid endpoint descriptor
  564. * ECONNRESET - Connection reset by peer
  565. * EINVAL - the range [offset, offset + len - 1] intersects a subrange of a
  566. * window, or offset is negative
  567. * ENODEV - The remote node is lost or existed, but is not currently in the
  568. * network since it may have crashed
  569. * ENOTCONN - The endpoint is not connected
  570. * ENXIO - Offsets in the range [offset, offset + len - 1] are invalid for the
  571. * registered address space of epd
  572. */
  573. int scif_unregister(scif_epd_t epd, off_t offset, size_t len);
  574. /**
  575. * scif_readfrom() - Copy from a remote address space
  576. * @epd: endpoint descriptor
  577. * @loffset: offset in local registered address space to
  578. * which to copy
  579. * @len: length of range to copy
  580. * @roffset: offset in remote registered address space
  581. * from which to copy
  582. * @rma_flags: transfer mode flags
  583. *
  584. * scif_readfrom() copies len bytes from the remote registered address space of
  585. * the peer of endpoint epd, starting at the offset roffset to the local
  586. * registered address space of epd, starting at the offset loffset.
  587. *
  588. * Each of the specified ranges [loffset, loffset + len - 1] and [roffset,
  589. * roffset + len - 1] must be within some registered window or windows of the
  590. * local and remote nodes. A range may intersect multiple registered windows,
  591. * but only if those windows are contiguous in the registered address space.
  592. *
  593. * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using
  594. * programmed read/writes. Otherwise the data is copied using DMA. If rma_-
  595. * flags includes SCIF_RMA_SYNC, then scif_readfrom() will return after the
  596. * transfer is complete. Otherwise, the transfer may be performed asynchron-
  597. * ously. The order in which any two asynchronous RMA operations complete
  598. * is non-deterministic. The synchronization functions, scif_fence_mark()/
  599. * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to
  600. * the completion of asynchronous RMA operations on the same endpoint.
  601. *
  602. * The DMA transfer of individual bytes is not guaranteed to complete in
  603. * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last
  604. * cacheline or partial cacheline of the source range will become visible on
  605. * the destination node after all other transferred data in the source
  606. * range has become visible on the destination node.
  607. *
  608. * The optimal DMA performance will likely be realized if both
  609. * loffset and roffset are cacheline aligned (are a multiple of 64). Lower
  610. * performance will likely be realized if loffset and roffset are not
  611. * cacheline aligned but are separated by some multiple of 64. The lowest level
  612. * of performance is likely if loffset and roffset are not separated by a
  613. * multiple of 64.
  614. *
  615. * The rma_flags argument is formed by ORing together zero or more of the
  616. * following values.
  617. * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA
  618. * engine.
  619. * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the
  620. * transfer has completed. Passing this flag results in the
  621. * current implementation busy waiting and consuming CPU cycles
  622. * while the DMA transfer is in progress for best performance by
  623. * avoiding the interrupt latency.
  624. * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of
  625. * the source range becomes visible on the destination node
  626. * after all other transferred data in the source range has
  627. * become visible on the destination
  628. *
  629. * Return:
  630. * Upon successful completion, scif_readfrom() returns 0; otherwise in user
  631. * mode -1 is returned and errno is set to indicate the error; in kernel mode
  632. * the negative of one of the following errors is returned.
  633. *
  634. * Errors:
  635. * EACCESS - Attempt to write to a read-only range
  636. * EBADF, ENOTTY - epd is not a valid endpoint descriptor
  637. * ECONNRESET - Connection reset by peer
  638. * EINVAL - rma_flags is invalid
  639. * ENODEV - The remote node is lost or existed, but is not currently in the
  640. * network since it may have crashed
  641. * ENOTCONN - The endpoint is not connected
  642. * ENXIO - The range [loffset, loffset + len - 1] is invalid for the registered
  643. * address space of epd, or, The range [roffset, roffset + len - 1] is invalid
  644. * for the registered address space of the peer of epd, or loffset or roffset
  645. * is negative
  646. */
  647. int scif_readfrom(scif_epd_t epd, off_t loffset, size_t len, off_t
  648. roffset, int rma_flags);
  649. /**
  650. * scif_writeto() - Copy to a remote address space
  651. * @epd: endpoint descriptor
  652. * @loffset: offset in local registered address space
  653. * from which to copy
  654. * @len: length of range to copy
  655. * @roffset: offset in remote registered address space to
  656. * which to copy
  657. * @rma_flags: transfer mode flags
  658. *
  659. * scif_writeto() copies len bytes from the local registered address space of
  660. * epd, starting at the offset loffset to the remote registered address space
  661. * of the peer of endpoint epd, starting at the offset roffset.
  662. *
  663. * Each of the specified ranges [loffset, loffset + len - 1] and [roffset,
  664. * roffset + len - 1] must be within some registered window or windows of the
  665. * local and remote nodes. A range may intersect multiple registered windows,
  666. * but only if those windows are contiguous in the registered address space.
  667. *
  668. * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using
  669. * programmed read/writes. Otherwise the data is copied using DMA. If rma_-
  670. * flags includes SCIF_RMA_SYNC, then scif_writeto() will return after the
  671. * transfer is complete. Otherwise, the transfer may be performed asynchron-
  672. * ously. The order in which any two asynchronous RMA operations complete
  673. * is non-deterministic. The synchronization functions, scif_fence_mark()/
  674. * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to
  675. * the completion of asynchronous RMA operations on the same endpoint.
  676. *
  677. * The DMA transfer of individual bytes is not guaranteed to complete in
  678. * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last
  679. * cacheline or partial cacheline of the source range will become visible on
  680. * the destination node after all other transferred data in the source
  681. * range has become visible on the destination node.
  682. *
  683. * The optimal DMA performance will likely be realized if both
  684. * loffset and roffset are cacheline aligned (are a multiple of 64). Lower
  685. * performance will likely be realized if loffset and roffset are not cacheline
  686. * aligned but are separated by some multiple of 64. The lowest level of
  687. * performance is likely if loffset and roffset are not separated by a multiple
  688. * of 64.
  689. *
  690. * The rma_flags argument is formed by ORing together zero or more of the
  691. * following values.
  692. * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA
  693. * engine.
  694. * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the
  695. * transfer has completed. Passing this flag results in the
  696. * current implementation busy waiting and consuming CPU cycles
  697. * while the DMA transfer is in progress for best performance by
  698. * avoiding the interrupt latency.
  699. * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of
  700. * the source range becomes visible on the destination node
  701. * after all other transferred data in the source range has
  702. * become visible on the destination
  703. *
  704. * Return:
  705. * Upon successful completion, scif_readfrom() returns 0; otherwise in user
  706. * mode -1 is returned and errno is set to indicate the error; in kernel mode
  707. * the negative of one of the following errors is returned.
  708. *
  709. * Errors:
  710. * EACCESS - Attempt to write to a read-only range
  711. * EBADF, ENOTTY - epd is not a valid endpoint descriptor
  712. * ECONNRESET - Connection reset by peer
  713. * EINVAL - rma_flags is invalid
  714. * ENODEV - The remote node is lost or existed, but is not currently in the
  715. * network since it may have crashed
  716. * ENOTCONN - The endpoint is not connected
  717. * ENXIO - The range [loffset, loffset + len - 1] is invalid for the registered
  718. * address space of epd, or, The range [roffset , roffset + len -1] is invalid
  719. * for the registered address space of the peer of epd, or loffset or roffset
  720. * is negative
  721. */
  722. int scif_writeto(scif_epd_t epd, off_t loffset, size_t len, off_t
  723. roffset, int rma_flags);
  724. /**
  725. * scif_vreadfrom() - Copy from a remote address space
  726. * @epd: endpoint descriptor
  727. * @addr: address to which to copy
  728. * @len: length of range to copy
  729. * @roffset: offset in remote registered address space
  730. * from which to copy
  731. * @rma_flags: transfer mode flags
  732. *
  733. * scif_vreadfrom() copies len bytes from the remote registered address
  734. * space of the peer of endpoint epd, starting at the offset roffset, to local
  735. * memory, starting at addr.
  736. *
  737. * The specified range [roffset, roffset + len - 1] must be within some
  738. * registered window or windows of the remote nodes. The range may
  739. * intersect multiple registered windows, but only if those windows are
  740. * contiguous in the registered address space.
  741. *
  742. * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using
  743. * programmed read/writes. Otherwise the data is copied using DMA. If rma_-
  744. * flags includes SCIF_RMA_SYNC, then scif_vreadfrom() will return after the
  745. * transfer is complete. Otherwise, the transfer may be performed asynchron-
  746. * ously. The order in which any two asynchronous RMA operations complete
  747. * is non-deterministic. The synchronization functions, scif_fence_mark()/
  748. * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to
  749. * the completion of asynchronous RMA operations on the same endpoint.
  750. *
  751. * The DMA transfer of individual bytes is not guaranteed to complete in
  752. * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last
  753. * cacheline or partial cacheline of the source range will become visible on
  754. * the destination node after all other transferred data in the source
  755. * range has become visible on the destination node.
  756. *
  757. * If rma_flags includes SCIF_RMA_USECACHE, then the physical pages which back
  758. * the specified local memory range may be remain in a pinned state even after
  759. * the specified transfer completes. This may reduce overhead if some or all of
  760. * the same virtual address range is referenced in a subsequent call of
  761. * scif_vreadfrom() or scif_vwriteto().
  762. *
  763. * The optimal DMA performance will likely be realized if both
  764. * addr and roffset are cacheline aligned (are a multiple of 64). Lower
  765. * performance will likely be realized if addr and roffset are not
  766. * cacheline aligned but are separated by some multiple of 64. The lowest level
  767. * of performance is likely if addr and roffset are not separated by a
  768. * multiple of 64.
  769. *
  770. * The rma_flags argument is formed by ORing together zero or more of the
  771. * following values.
  772. * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA
  773. * engine.
  774. * SCIF_RMA_USECACHE - enable registration caching
  775. * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the
  776. * transfer has completed. Passing this flag results in the
  777. * current implementation busy waiting and consuming CPU cycles
  778. * while the DMA transfer is in progress for best performance by
  779. * avoiding the interrupt latency.
  780. * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of
  781. * the source range becomes visible on the destination node
  782. * after all other transferred data in the source range has
  783. * become visible on the destination
  784. *
  785. * Return:
  786. * Upon successful completion, scif_vreadfrom() returns 0; otherwise in user
  787. * mode -1 is returned and errno is set to indicate the error; in kernel mode
  788. * the negative of one of the following errors is returned.
  789. *
  790. * Errors:
  791. * EACCESS - Attempt to write to a read-only range
  792. * EBADF, ENOTTY - epd is not a valid endpoint descriptor
  793. * ECONNRESET - Connection reset by peer
  794. * EINVAL - rma_flags is invalid
  795. * ENODEV - The remote node is lost or existed, but is not currently in the
  796. * network since it may have crashed
  797. * ENOTCONN - The endpoint is not connected
  798. * ENXIO - Offsets in the range [roffset, roffset + len - 1] are invalid for the
  799. * registered address space of epd
  800. */
  801. int scif_vreadfrom(scif_epd_t epd, void *addr, size_t len, off_t roffset,
  802. int rma_flags);
  803. /**
  804. * scif_vwriteto() - Copy to a remote address space
  805. * @epd: endpoint descriptor
  806. * @addr: address from which to copy
  807. * @len: length of range to copy
  808. * @roffset: offset in remote registered address space to
  809. * which to copy
  810. * @rma_flags: transfer mode flags
  811. *
  812. * scif_vwriteto() copies len bytes from the local memory, starting at addr, to
  813. * the remote registered address space of the peer of endpoint epd, starting at
  814. * the offset roffset.
  815. *
  816. * The specified range [roffset, roffset + len - 1] must be within some
  817. * registered window or windows of the remote nodes. The range may intersect
  818. * multiple registered windows, but only if those windows are contiguous in the
  819. * registered address space.
  820. *
  821. * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using
  822. * programmed read/writes. Otherwise the data is copied using DMA. If rma_-
  823. * flags includes SCIF_RMA_SYNC, then scif_vwriteto() will return after the
  824. * transfer is complete. Otherwise, the transfer may be performed asynchron-
  825. * ously. The order in which any two asynchronous RMA operations complete
  826. * is non-deterministic. The synchronization functions, scif_fence_mark()/
  827. * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to
  828. * the completion of asynchronous RMA operations on the same endpoint.
  829. *
  830. * The DMA transfer of individual bytes is not guaranteed to complete in
  831. * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last
  832. * cacheline or partial cacheline of the source range will become visible on
  833. * the destination node after all other transferred data in the source
  834. * range has become visible on the destination node.
  835. *
  836. * If rma_flags includes SCIF_RMA_USECACHE, then the physical pages which back
  837. * the specified local memory range may be remain in a pinned state even after
  838. * the specified transfer completes. This may reduce overhead if some or all of
  839. * the same virtual address range is referenced in a subsequent call of
  840. * scif_vreadfrom() or scif_vwriteto().
  841. *
  842. * The optimal DMA performance will likely be realized if both
  843. * addr and offset are cacheline aligned (are a multiple of 64). Lower
  844. * performance will likely be realized if addr and offset are not cacheline
  845. * aligned but are separated by some multiple of 64. The lowest level of
  846. * performance is likely if addr and offset are not separated by a multiple of
  847. * 64.
  848. *
  849. * The rma_flags argument is formed by ORing together zero or more of the
  850. * following values.
  851. * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA
  852. * engine.
  853. * SCIF_RMA_USECACHE - allow registration caching
  854. * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the
  855. * transfer has completed. Passing this flag results in the
  856. * current implementation busy waiting and consuming CPU cycles
  857. * while the DMA transfer is in progress for best performance by
  858. * avoiding the interrupt latency.
  859. * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of
  860. * the source range becomes visible on the destination node
  861. * after all other transferred data in the source range has
  862. * become visible on the destination
  863. *
  864. * Return:
  865. * Upon successful completion, scif_vwriteto() returns 0; otherwise in user
  866. * mode -1 is returned and errno is set to indicate the error; in kernel mode
  867. * the negative of one of the following errors is returned.
  868. *
  869. * Errors:
  870. * EACCESS - Attempt to write to a read-only range
  871. * EBADF, ENOTTY - epd is not a valid endpoint descriptor
  872. * ECONNRESET - Connection reset by peer
  873. * EINVAL - rma_flags is invalid
  874. * ENODEV - The remote node is lost or existed, but is not currently in the
  875. * network since it may have crashed
  876. * ENOTCONN - The endpoint is not connected
  877. * ENXIO - Offsets in the range [roffset, roffset + len - 1] are invalid for the
  878. * registered address space of epd
  879. */
  880. int scif_vwriteto(scif_epd_t epd, void *addr, size_t len, off_t roffset,
  881. int rma_flags);
  882. /**
  883. * scif_fence_mark() - Mark previously issued RMAs
  884. * @epd: endpoint descriptor
  885. * @flags: control flags
  886. * @mark: marked value returned as output.
  887. *
  888. * scif_fence_mark() returns after marking the current set of all uncompleted
  889. * RMAs initiated through the endpoint epd or the current set of all
  890. * uncompleted RMAs initiated through the peer of endpoint epd. The RMAs are
  891. * marked with a value returned at mark. The application may subsequently call
  892. * scif_fence_wait(), passing the value returned at mark, to await completion
  893. * of all RMAs so marked.
  894. *
  895. * The flags argument has exactly one of the following values.
  896. * SCIF_FENCE_INIT_SELF - RMA operations initiated through endpoint
  897. * epd are marked
  898. * SCIF_FENCE_INIT_PEER - RMA operations initiated through the peer
  899. * of endpoint epd are marked
  900. *
  901. * Return:
  902. * Upon successful completion, scif_fence_mark() returns 0; otherwise in user
  903. * mode -1 is returned and errno is set to indicate the error; in kernel mode
  904. * the negative of one of the following errors is returned.
  905. *
  906. * Errors:
  907. * EBADF, ENOTTY - epd is not a valid endpoint descriptor
  908. * ECONNRESET - Connection reset by peer
  909. * EINVAL - flags is invalid
  910. * ENODEV - The remote node is lost or existed, but is not currently in the
  911. * network since it may have crashed
  912. * ENOTCONN - The endpoint is not connected
  913. * ENOMEM - Insufficient kernel memory was available
  914. */
  915. int scif_fence_mark(scif_epd_t epd, int flags, int *mark);
  916. /**
  917. * scif_fence_wait() - Wait for completion of marked RMAs
  918. * @epd: endpoint descriptor
  919. * @mark: mark request
  920. *
  921. * scif_fence_wait() returns after all RMAs marked with mark have completed.
  922. * The value passed in mark must have been obtained in a previous call to
  923. * scif_fence_mark().
  924. *
  925. * Return:
  926. * Upon successful completion, scif_fence_wait() returns 0; otherwise in user
  927. * mode -1 is returned and errno is set to indicate the error; in kernel mode
  928. * the negative of one of the following errors is returned.
  929. *
  930. * Errors:
  931. * EBADF, ENOTTY - epd is not a valid endpoint descriptor
  932. * ECONNRESET - Connection reset by peer
  933. * ENODEV - The remote node is lost or existed, but is not currently in the
  934. * network since it may have crashed
  935. * ENOTCONN - The endpoint is not connected
  936. * ENOMEM - Insufficient kernel memory was available
  937. */
  938. int scif_fence_wait(scif_epd_t epd, int mark);
  939. /**
  940. * scif_fence_signal() - Request a memory update on completion of RMAs
  941. * @epd: endpoint descriptor
  942. * @loff: local offset
  943. * @lval: local value to write to loffset
  944. * @roff: remote offset
  945. * @rval: remote value to write to roffset
  946. * @flags: flags
  947. *
  948. * scif_fence_signal() returns after marking the current set of all uncompleted
  949. * RMAs initiated through the endpoint epd or marking the current set of all
  950. * uncompleted RMAs initiated through the peer of endpoint epd.
  951. *
  952. * If flags includes SCIF_SIGNAL_LOCAL, then on completion of the RMAs in the
  953. * marked set, lval is written to memory at the address corresponding to offset
  954. * loff in the local registered address space of epd. loff must be within a
  955. * registered window. If flags includes SCIF_SIGNAL_REMOTE, then on completion
  956. * of the RMAs in the marked set, rval is written to memory at the address
  957. * corresponding to offset roff in the remote registered address space of epd.
  958. * roff must be within a remote registered window of the peer of epd. Note
  959. * that any specified offset must be DWORD (4 byte / 32 bit) aligned.
  960. *
  961. * The flags argument is formed by OR'ing together the following.
  962. * Exactly one of the following values.
  963. * SCIF_FENCE_INIT_SELF - RMA operations initiated through endpoint
  964. * epd are marked
  965. * SCIF_FENCE_INIT_PEER - RMA operations initiated through the peer
  966. * of endpoint epd are marked
  967. * One or more of the following values.
  968. * SCIF_SIGNAL_LOCAL - On completion of the marked set of RMAs, write lval to
  969. * memory at the address corresponding to offset loff in the local
  970. * registered address space of epd.
  971. * SCIF_SIGNAL_REMOTE - On completion of the marked set of RMAs, write rval to
  972. * memory at the address corresponding to offset roff in the remote
  973. * registered address space of epd.
  974. *
  975. * Return:
  976. * Upon successful completion, scif_fence_signal() returns 0; otherwise in
  977. * user mode -1 is returned and errno is set to indicate the error; in kernel
  978. * mode the negative of one of the following errors is returned.
  979. *
  980. * Errors:
  981. * EBADF, ENOTTY - epd is not a valid endpoint descriptor
  982. * ECONNRESET - Connection reset by peer
  983. * EINVAL - flags is invalid, or loff or roff are not DWORD aligned
  984. * ENODEV - The remote node is lost or existed, but is not currently in the
  985. * network since it may have crashed
  986. * ENOTCONN - The endpoint is not connected
  987. * ENXIO - loff is invalid for the registered address of epd, or roff is invalid
  988. * for the registered address space, of the peer of epd
  989. */
  990. int scif_fence_signal(scif_epd_t epd, off_t loff, u64 lval, off_t roff,
  991. u64 rval, int flags);
  992. /**
  993. * scif_get_node_ids() - Return information about online nodes
  994. * @nodes: array in which to return online node IDs
  995. * @len: number of entries in the nodes array
  996. * @self: address to place the node ID of the local node
  997. *
  998. * scif_get_node_ids() fills in the nodes array with up to len node IDs of the
  999. * nodes in the SCIF network. If there is not enough space in nodes, as
  1000. * indicated by the len parameter, only len node IDs are returned in nodes. The
  1001. * return value of scif_get_node_ids() is the total number of nodes currently in
  1002. * the SCIF network. By checking the return value against the len parameter,
  1003. * the user may determine if enough space for nodes was allocated.
  1004. *
  1005. * The node ID of the local node is returned at self.
  1006. *
  1007. * Return:
  1008. * Upon successful completion, scif_get_node_ids() returns the actual number of
  1009. * online nodes in the SCIF network including 'self'; otherwise in user mode
  1010. * -1 is returned and errno is set to indicate the error; in kernel mode no
  1011. * errors are returned.
  1012. */
  1013. int scif_get_node_ids(u16 *nodes, int len, u16 *self);
  1014. /**
  1015. * scif_pin_pages() - Pin a set of pages
  1016. * @addr: Virtual address of range to pin
  1017. * @len: Length of range to pin
  1018. * @prot_flags: Page protection flags
  1019. * @map_flags: Page classification flags
  1020. * @pinned_pages: Handle to pinned pages
  1021. *
  1022. * scif_pin_pages() pins (locks in physical memory) the physical pages which
  1023. * back the range of virtual address pages starting at addr and continuing for
  1024. * len bytes. addr and len are constrained to be multiples of the page size. A
  1025. * successful scif_pin_pages() call returns a handle to pinned_pages which may
  1026. * be used in subsequent calls to scif_register_pinned_pages().
  1027. *
  1028. * The pages will remain pinned as long as there is a reference against the
  1029. * scif_pinned_pages_t value returned by scif_pin_pages() and until
  1030. * scif_unpin_pages() is called, passing the scif_pinned_pages_t value. A
  1031. * reference is added to a scif_pinned_pages_t value each time a window is
  1032. * created by calling scif_register_pinned_pages() and passing the
  1033. * scif_pinned_pages_t value. A reference is removed from a
  1034. * scif_pinned_pages_t value each time such a window is deleted.
  1035. *
  1036. * Subsequent operations which change the memory pages to which virtual
  1037. * addresses are mapped (such as mmap(), munmap()) have no effect on the
  1038. * scif_pinned_pages_t value or windows created against it.
  1039. *
  1040. * If the process will fork(), it is recommended that the registered
  1041. * virtual address range be marked with MADV_DONTFORK. Doing so will prevent
  1042. * problems due to copy-on-write semantics.
  1043. *
  1044. * The prot_flags argument is formed by OR'ing together one or more of the
  1045. * following values.
  1046. * SCIF_PROT_READ - allow read operations against the pages
  1047. * SCIF_PROT_WRITE - allow write operations against the pages
  1048. * The map_flags argument can be set as SCIF_MAP_KERNEL to interpret addr as a
  1049. * kernel space address. By default, addr is interpreted as a user space
  1050. * address.
  1051. *
  1052. * Return:
  1053. * Upon successful completion, scif_pin_pages() returns 0; otherwise the
  1054. * negative of one of the following errors is returned.
  1055. *
  1056. * Errors:
  1057. * EINVAL - prot_flags is invalid, map_flags is invalid, or offset is negative
  1058. * ENOMEM - Not enough space
  1059. */
  1060. int scif_pin_pages(void *addr, size_t len, int prot_flags, int map_flags,
  1061. scif_pinned_pages_t *pinned_pages);
  1062. /**
  1063. * scif_unpin_pages() - Unpin a set of pages
  1064. * @pinned_pages: Handle to pinned pages to be unpinned
  1065. *
  1066. * scif_unpin_pages() prevents scif_register_pinned_pages() from registering new
  1067. * windows against pinned_pages. The physical pages represented by pinned_pages
  1068. * will remain pinned until all windows previously registered against
  1069. * pinned_pages are deleted (the window is scif_unregister()'d and all
  1070. * references to the window are removed (see scif_unregister()).
  1071. *
  1072. * pinned_pages must have been obtain from a previous call to scif_pin_pages().
  1073. * After calling scif_unpin_pages(), it is an error to pass pinned_pages to
  1074. * scif_register_pinned_pages().
  1075. *
  1076. * Return:
  1077. * Upon successful completion, scif_unpin_pages() returns 0; otherwise the
  1078. * negative of one of the following errors is returned.
  1079. *
  1080. * Errors:
  1081. * EINVAL - pinned_pages is not valid
  1082. */
  1083. int scif_unpin_pages(scif_pinned_pages_t pinned_pages);
  1084. /**
  1085. * scif_register_pinned_pages() - Mark a memory region for remote access.
  1086. * @epd: endpoint descriptor
  1087. * @pinned_pages: Handle to pinned pages
  1088. * @offset: Registered address space offset
  1089. * @map_flags: Flags which control where pages are mapped
  1090. *
  1091. * The scif_register_pinned_pages() function opens a window, a range of whole
  1092. * pages of the registered address space of the endpoint epd, starting at
  1093. * offset po. The value of po, further described below, is a function of the
  1094. * parameters offset and pinned_pages, and the value of map_flags. Each page of
  1095. * the window represents a corresponding physical memory page of the range
  1096. * represented by pinned_pages; the length of the window is the same as the
  1097. * length of range represented by pinned_pages. A successful
  1098. * scif_register_pinned_pages() call returns po as the return value.
  1099. *
  1100. * When SCIF_MAP_FIXED is set in the map_flags argument, po will be offset
  1101. * exactly, and offset is constrained to be a multiple of the page size. The
  1102. * mapping established by scif_register_pinned_pages() will not replace any
  1103. * existing registration; an error is returned if any page of the new window
  1104. * would intersect an existing window.
  1105. *
  1106. * When SCIF_MAP_FIXED is not set, the implementation uses offset in an
  1107. * implementation-defined manner to arrive at po. The po so chosen will be an
  1108. * area of the registered address space that the implementation deems suitable
  1109. * for a mapping of the required size. An offset value of 0 is interpreted as
  1110. * granting the implementation complete freedom in selecting po, subject to
  1111. * constraints described below. A non-zero value of offset is taken to be a
  1112. * suggestion of an offset near which the mapping should be placed. When the
  1113. * implementation selects a value for po, it does not replace any extant
  1114. * window. In all cases, po will be a multiple of the page size.
  1115. *
  1116. * The physical pages which are so represented by a window are available for
  1117. * access in calls to scif_get_pages(), scif_readfrom(), scif_writeto(),
  1118. * scif_vreadfrom(), and scif_vwriteto(). While a window is registered, the
  1119. * physical pages represented by the window will not be reused by the memory
  1120. * subsystem for any other purpose. Note that the same physical page may be
  1121. * represented by multiple windows.
  1122. *
  1123. * Windows created by scif_register_pinned_pages() are unregistered by
  1124. * scif_unregister().
  1125. *
  1126. * The map_flags argument can be set to SCIF_MAP_FIXED which interprets a
  1127. * fixed offset.
  1128. *
  1129. * Return:
  1130. * Upon successful completion, scif_register_pinned_pages() returns the offset
  1131. * at which the mapping was placed (po); otherwise the negative of one of the
  1132. * following errors is returned.
  1133. *
  1134. * Errors:
  1135. * EADDRINUSE - SCIF_MAP_FIXED is set in map_flags and pages in the new window
  1136. * would intersect an existing window
  1137. * EAGAIN - The mapping could not be performed due to lack of resources
  1138. * ECONNRESET - Connection reset by peer
  1139. * EINVAL - map_flags is invalid, or SCIF_MAP_FIXED is set in map_flags, and
  1140. * offset is not a multiple of the page size, or offset is negative
  1141. * ENODEV - The remote node is lost or existed, but is not currently in the
  1142. * network since it may have crashed
  1143. * ENOMEM - Not enough space
  1144. * ENOTCONN - The endpoint is not connected
  1145. */
  1146. off_t scif_register_pinned_pages(scif_epd_t epd,
  1147. scif_pinned_pages_t pinned_pages,
  1148. off_t offset, int map_flags);
  1149. /**
  1150. * scif_get_pages() - Add references to remote registered pages
  1151. * @epd: endpoint descriptor
  1152. * @offset: remote registered offset
  1153. * @len: length of range of pages
  1154. * @pages: returned scif_range structure
  1155. *
  1156. * scif_get_pages() returns the addresses of the physical pages represented by
  1157. * those pages of the registered address space of the peer of epd, starting at
  1158. * offset and continuing for len bytes. offset and len are constrained to be
  1159. * multiples of the page size.
  1160. *
  1161. * All of the pages in the specified range [offset, offset + len - 1] must be
  1162. * within a single window of the registered address space of the peer of epd.
  1163. *
  1164. * The addresses are returned as a virtually contiguous array pointed to by the
  1165. * phys_addr component of the scif_range structure whose address is returned in
  1166. * pages. The nr_pages component of scif_range is the length of the array. The
  1167. * prot_flags component of scif_range holds the protection flag value passed
  1168. * when the pages were registered.
  1169. *
  1170. * Each physical page whose address is returned by scif_get_pages() remains
  1171. * available and will not be released for reuse until the scif_range structure
  1172. * is returned in a call to scif_put_pages(). The scif_range structure returned
  1173. * by scif_get_pages() must be unmodified.
  1174. *
  1175. * It is an error to call scif_close() on an endpoint on which a scif_range
  1176. * structure of that endpoint has not been returned to scif_put_pages().
  1177. *
  1178. * Return:
  1179. * Upon successful completion, scif_get_pages() returns 0; otherwise the
  1180. * negative of one of the following errors is returned.
  1181. * Errors:
  1182. * ECONNRESET - Connection reset by peer.
  1183. * EINVAL - offset is not a multiple of the page size, or offset is negative, or
  1184. * len is not a multiple of the page size
  1185. * ENODEV - The remote node is lost or existed, but is not currently in the
  1186. * network since it may have crashed
  1187. * ENOTCONN - The endpoint is not connected
  1188. * ENXIO - Offsets in the range [offset, offset + len - 1] are invalid
  1189. * for the registered address space of the peer epd
  1190. */
  1191. int scif_get_pages(scif_epd_t epd, off_t offset, size_t len,
  1192. struct scif_range **pages);
  1193. /**
  1194. * scif_put_pages() - Remove references from remote registered pages
  1195. * @pages: pages to be returned
  1196. *
  1197. * scif_put_pages() releases a scif_range structure previously obtained by
  1198. * calling scif_get_pages(). The physical pages represented by pages may
  1199. * be reused when the window which represented those pages is unregistered.
  1200. * Therefore, those pages must not be accessed after calling scif_put_pages().
  1201. *
  1202. * Return:
  1203. * Upon successful completion, scif_put_pages() returns 0; otherwise the
  1204. * negative of one of the following errors is returned.
  1205. * Errors:
  1206. * EINVAL - pages does not point to a valid scif_range structure, or
  1207. * the scif_range structure pointed to by pages was already returned
  1208. * ENODEV - The remote node is lost or existed, but is not currently in the
  1209. * network since it may have crashed
  1210. * ENOTCONN - The endpoint is not connected
  1211. */
  1212. int scif_put_pages(struct scif_range *pages);
  1213. /**
  1214. * scif_poll() - Wait for some event on an endpoint
  1215. * @epds: Array of endpoint descriptors
  1216. * @nepds: Length of epds
  1217. * @timeout: Upper limit on time for which scif_poll() will block
  1218. *
  1219. * scif_poll() waits for one of a set of endpoints to become ready to perform
  1220. * an I/O operation.
  1221. *
  1222. * The epds argument specifies the endpoint descriptors to be examined and the
  1223. * events of interest for each endpoint descriptor. epds is a pointer to an
  1224. * array with one member for each open endpoint descriptor of interest.
  1225. *
  1226. * The number of items in the epds array is specified in nepds. The epd field
  1227. * of scif_pollepd is an endpoint descriptor of an open endpoint. The field
  1228. * events is a bitmask specifying the events which the application is
  1229. * interested in. The field revents is an output parameter, filled by the
  1230. * kernel with the events that actually occurred. The bits returned in revents
  1231. * can include any of those specified in events, or one of the values POLLERR,
  1232. * POLLHUP, or POLLNVAL. (These three bits are meaningless in the events
  1233. * field, and will be set in the revents field whenever the corresponding
  1234. * condition is true.)
  1235. *
  1236. * If none of the events requested (and no error) has occurred for any of the
  1237. * endpoint descriptors, then scif_poll() blocks until one of the events occurs.
  1238. *
  1239. * The timeout argument specifies an upper limit on the time for which
  1240. * scif_poll() will block, in milliseconds. Specifying a negative value in
  1241. * timeout means an infinite timeout.
  1242. *
  1243. * The following bits may be set in events and returned in revents.
  1244. * POLLIN - Data may be received without blocking. For a connected
  1245. * endpoint, this means that scif_recv() may be called without blocking. For a
  1246. * listening endpoint, this means that scif_accept() may be called without
  1247. * blocking.
  1248. * POLLOUT - Data may be sent without blocking. For a connected endpoint, this
  1249. * means that scif_send() may be called without blocking. POLLOUT may also be
  1250. * used to block waiting for a non-blocking connect to complete. This bit value
  1251. * has no meaning for a listening endpoint and is ignored if specified.
  1252. *
  1253. * The following bits are only returned in revents, and are ignored if set in
  1254. * events.
  1255. * POLLERR - An error occurred on the endpoint
  1256. * POLLHUP - The connection to the peer endpoint was disconnected
  1257. * POLLNVAL - The specified endpoint descriptor is invalid.
  1258. *
  1259. * Return:
  1260. * Upon successful completion, scif_poll() returns a non-negative value. A
  1261. * positive value indicates the total number of endpoint descriptors that have
  1262. * been selected (that is, endpoint descriptors for which the revents member is
  1263. * non-zero). A value of 0 indicates that the call timed out and no endpoint
  1264. * descriptors have been selected. Otherwise in user mode -1 is returned and
  1265. * errno is set to indicate the error; in kernel mode the negative of one of
  1266. * the following errors is returned.
  1267. *
  1268. * Errors:
  1269. * EINTR - A signal occurred before any requested event
  1270. * EINVAL - The nepds argument is greater than {OPEN_MAX}
  1271. * ENOMEM - There was no space to allocate file descriptor tables
  1272. */
  1273. int scif_poll(struct scif_pollepd *epds, unsigned int nepds, long timeout);
  1274. /**
  1275. * scif_client_register() - Register a SCIF client
  1276. * @client: client to be registered
  1277. *
  1278. * scif_client_register() registers a SCIF client. The probe() method
  1279. * of the client is called when SCIF peer devices come online and the
  1280. * remove() method is called when the peer devices disappear.
  1281. *
  1282. * Return:
  1283. * Upon successful completion, scif_client_register() returns a non-negative
  1284. * value. Otherwise the return value is the same as subsys_interface_register()
  1285. * in the kernel.
  1286. */
  1287. int scif_client_register(struct scif_client *client);
  1288. /**
  1289. * scif_client_unregister() - Unregister a SCIF client
  1290. * @client: client to be unregistered
  1291. *
  1292. * scif_client_unregister() unregisters a SCIF client.
  1293. *
  1294. * Return:
  1295. * None
  1296. */
  1297. void scif_client_unregister(struct scif_client *client);
  1298. #endif /* __SCIF_H__ */