verbs.c 35 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391
  1. /*
  2. * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
  3. *
  4. * This software is available to you under a choice of one of two
  5. * licenses. You may choose to be licensed under the terms of the GNU
  6. * General Public License (GPL) Version 2, available from the file
  7. * COPYING in the main directory of this source tree, or the BSD-type
  8. * license below:
  9. *
  10. * Redistribution and use in source and binary forms, with or without
  11. * modification, are permitted provided that the following conditions
  12. * are met:
  13. *
  14. * Redistributions of source code must retain the above copyright
  15. * notice, this list of conditions and the following disclaimer.
  16. *
  17. * Redistributions in binary form must reproduce the above
  18. * copyright notice, this list of conditions and the following
  19. * disclaimer in the documentation and/or other materials provided
  20. * with the distribution.
  21. *
  22. * Neither the name of the Network Appliance, Inc. nor the names of
  23. * its contributors may be used to endorse or promote products
  24. * derived from this software without specific prior written
  25. * permission.
  26. *
  27. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  28. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  29. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  30. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  31. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  32. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  33. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  34. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  35. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  36. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  37. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  38. */
  39. /*
  40. * verbs.c
  41. *
  42. * Encapsulates the major functions managing:
  43. * o adapters
  44. * o endpoints
  45. * o connections
  46. * o buffer memory
  47. */
  48. #include <linux/interrupt.h>
  49. #include <linux/slab.h>
  50. #include <linux/prefetch.h>
  51. #include <linux/sunrpc/addr.h>
  52. #include <linux/sunrpc/svc_rdma.h>
  53. #include <asm/bitops.h>
  54. #include <linux/module.h> /* try_module_get()/module_put() */
  55. #include "xprt_rdma.h"
  56. /*
  57. * Globals/Macros
  58. */
  59. #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
  60. # define RPCDBG_FACILITY RPCDBG_TRANS
  61. #endif
  62. /*
  63. * internal functions
  64. */
  65. static struct workqueue_struct *rpcrdma_receive_wq;
  66. int
  67. rpcrdma_alloc_wq(void)
  68. {
  69. struct workqueue_struct *recv_wq;
  70. recv_wq = alloc_workqueue("xprtrdma_receive",
  71. WQ_MEM_RECLAIM | WQ_UNBOUND | WQ_HIGHPRI,
  72. 0);
  73. if (!recv_wq)
  74. return -ENOMEM;
  75. rpcrdma_receive_wq = recv_wq;
  76. return 0;
  77. }
  78. void
  79. rpcrdma_destroy_wq(void)
  80. {
  81. struct workqueue_struct *wq;
  82. if (rpcrdma_receive_wq) {
  83. wq = rpcrdma_receive_wq;
  84. rpcrdma_receive_wq = NULL;
  85. destroy_workqueue(wq);
  86. }
  87. }
  88. static void
  89. rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
  90. {
  91. struct rpcrdma_ep *ep = context;
  92. pr_err("RPC: %s: %s on device %s ep %p\n",
  93. __func__, ib_event_msg(event->event),
  94. event->device->name, context);
  95. if (ep->rep_connected == 1) {
  96. ep->rep_connected = -EIO;
  97. rpcrdma_conn_func(ep);
  98. wake_up_all(&ep->rep_connect_wait);
  99. }
  100. }
  101. /**
  102. * rpcrdma_wc_send - Invoked by RDMA provider for each polled Send WC
  103. * @cq: completion queue (ignored)
  104. * @wc: completed WR
  105. *
  106. */
  107. static void
  108. rpcrdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
  109. {
  110. /* WARNING: Only wr_cqe and status are reliable at this point */
  111. if (wc->status != IB_WC_SUCCESS && wc->status != IB_WC_WR_FLUSH_ERR)
  112. pr_err("rpcrdma: Send: %s (%u/0x%x)\n",
  113. ib_wc_status_msg(wc->status),
  114. wc->status, wc->vendor_err);
  115. }
  116. /* Perform basic sanity checking to avoid using garbage
  117. * to update the credit grant value.
  118. */
  119. static void
  120. rpcrdma_update_granted_credits(struct rpcrdma_rep *rep)
  121. {
  122. struct rpcrdma_msg *rmsgp = rdmab_to_msg(rep->rr_rdmabuf);
  123. struct rpcrdma_buffer *buffer = &rep->rr_rxprt->rx_buf;
  124. u32 credits;
  125. if (rep->rr_len < RPCRDMA_HDRLEN_ERR)
  126. return;
  127. credits = be32_to_cpu(rmsgp->rm_credit);
  128. if (credits == 0)
  129. credits = 1; /* don't deadlock */
  130. else if (credits > buffer->rb_max_requests)
  131. credits = buffer->rb_max_requests;
  132. atomic_set(&buffer->rb_credits, credits);
  133. }
  134. /**
  135. * rpcrdma_wc_receive - Invoked by RDMA provider for each polled Receive WC
  136. * @cq: completion queue (ignored)
  137. * @wc: completed WR
  138. *
  139. */
  140. static void
  141. rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
  142. {
  143. struct ib_cqe *cqe = wc->wr_cqe;
  144. struct rpcrdma_rep *rep = container_of(cqe, struct rpcrdma_rep,
  145. rr_cqe);
  146. /* WARNING: Only wr_id and status are reliable at this point */
  147. if (wc->status != IB_WC_SUCCESS)
  148. goto out_fail;
  149. /* status == SUCCESS means all fields in wc are trustworthy */
  150. if (wc->opcode != IB_WC_RECV)
  151. return;
  152. dprintk("RPC: %s: rep %p opcode 'recv', length %u: success\n",
  153. __func__, rep, wc->byte_len);
  154. rep->rr_len = wc->byte_len;
  155. rep->rr_wc_flags = wc->wc_flags;
  156. rep->rr_inv_rkey = wc->ex.invalidate_rkey;
  157. ib_dma_sync_single_for_cpu(rep->rr_device,
  158. rdmab_addr(rep->rr_rdmabuf),
  159. rep->rr_len, DMA_FROM_DEVICE);
  160. rpcrdma_update_granted_credits(rep);
  161. out_schedule:
  162. queue_work(rpcrdma_receive_wq, &rep->rr_work);
  163. return;
  164. out_fail:
  165. if (wc->status != IB_WC_WR_FLUSH_ERR)
  166. pr_err("rpcrdma: Recv: %s (%u/0x%x)\n",
  167. ib_wc_status_msg(wc->status),
  168. wc->status, wc->vendor_err);
  169. rep->rr_len = RPCRDMA_BAD_LEN;
  170. goto out_schedule;
  171. }
  172. static void
  173. rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt,
  174. struct rdma_conn_param *param)
  175. {
  176. struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
  177. const struct rpcrdma_connect_private *pmsg = param->private_data;
  178. unsigned int rsize, wsize;
  179. /* Default settings for RPC-over-RDMA Version One */
  180. r_xprt->rx_ia.ri_reminv_expected = false;
  181. r_xprt->rx_ia.ri_implicit_roundup = xprt_rdma_pad_optimize;
  182. rsize = RPCRDMA_V1_DEF_INLINE_SIZE;
  183. wsize = RPCRDMA_V1_DEF_INLINE_SIZE;
  184. if (pmsg &&
  185. pmsg->cp_magic == rpcrdma_cmp_magic &&
  186. pmsg->cp_version == RPCRDMA_CMP_VERSION) {
  187. r_xprt->rx_ia.ri_reminv_expected = true;
  188. r_xprt->rx_ia.ri_implicit_roundup = true;
  189. rsize = rpcrdma_decode_buffer_size(pmsg->cp_send_size);
  190. wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size);
  191. }
  192. if (rsize < cdata->inline_rsize)
  193. cdata->inline_rsize = rsize;
  194. if (wsize < cdata->inline_wsize)
  195. cdata->inline_wsize = wsize;
  196. dprintk("RPC: %s: max send %u, max recv %u\n",
  197. __func__, cdata->inline_wsize, cdata->inline_rsize);
  198. rpcrdma_set_max_header_sizes(r_xprt);
  199. }
  200. static int
  201. rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
  202. {
  203. struct rpcrdma_xprt *xprt = id->context;
  204. struct rpcrdma_ia *ia = &xprt->rx_ia;
  205. struct rpcrdma_ep *ep = &xprt->rx_ep;
  206. #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
  207. struct sockaddr *sap = (struct sockaddr *)&ep->rep_remote_addr;
  208. #endif
  209. struct ib_qp_attr *attr = &ia->ri_qp_attr;
  210. struct ib_qp_init_attr *iattr = &ia->ri_qp_init_attr;
  211. int connstate = 0;
  212. switch (event->event) {
  213. case RDMA_CM_EVENT_ADDR_RESOLVED:
  214. case RDMA_CM_EVENT_ROUTE_RESOLVED:
  215. ia->ri_async_rc = 0;
  216. complete(&ia->ri_done);
  217. break;
  218. case RDMA_CM_EVENT_ADDR_ERROR:
  219. ia->ri_async_rc = -EHOSTUNREACH;
  220. dprintk("RPC: %s: CM address resolution error, ep 0x%p\n",
  221. __func__, ep);
  222. complete(&ia->ri_done);
  223. break;
  224. case RDMA_CM_EVENT_ROUTE_ERROR:
  225. ia->ri_async_rc = -ENETUNREACH;
  226. dprintk("RPC: %s: CM route resolution error, ep 0x%p\n",
  227. __func__, ep);
  228. complete(&ia->ri_done);
  229. break;
  230. case RDMA_CM_EVENT_ESTABLISHED:
  231. connstate = 1;
  232. ib_query_qp(ia->ri_id->qp, attr,
  233. IB_QP_MAX_QP_RD_ATOMIC | IB_QP_MAX_DEST_RD_ATOMIC,
  234. iattr);
  235. dprintk("RPC: %s: %d responder resources"
  236. " (%d initiator)\n",
  237. __func__, attr->max_dest_rd_atomic,
  238. attr->max_rd_atomic);
  239. rpcrdma_update_connect_private(xprt, &event->param.conn);
  240. goto connected;
  241. case RDMA_CM_EVENT_CONNECT_ERROR:
  242. connstate = -ENOTCONN;
  243. goto connected;
  244. case RDMA_CM_EVENT_UNREACHABLE:
  245. connstate = -ENETDOWN;
  246. goto connected;
  247. case RDMA_CM_EVENT_REJECTED:
  248. connstate = -ECONNREFUSED;
  249. goto connected;
  250. case RDMA_CM_EVENT_DISCONNECTED:
  251. connstate = -ECONNABORTED;
  252. goto connected;
  253. case RDMA_CM_EVENT_DEVICE_REMOVAL:
  254. connstate = -ENODEV;
  255. connected:
  256. dprintk("RPC: %s: %sconnected\n",
  257. __func__, connstate > 0 ? "" : "dis");
  258. atomic_set(&xprt->rx_buf.rb_credits, 1);
  259. ep->rep_connected = connstate;
  260. rpcrdma_conn_func(ep);
  261. wake_up_all(&ep->rep_connect_wait);
  262. /*FALLTHROUGH*/
  263. default:
  264. dprintk("RPC: %s: %pIS:%u (ep 0x%p): %s\n",
  265. __func__, sap, rpc_get_port(sap), ep,
  266. rdma_event_msg(event->event));
  267. break;
  268. }
  269. #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
  270. if (connstate == 1) {
  271. int ird = attr->max_dest_rd_atomic;
  272. int tird = ep->rep_remote_cma.responder_resources;
  273. pr_info("rpcrdma: connection to %pIS:%u on %s, memreg '%s', %d credits, %d responders%s\n",
  274. sap, rpc_get_port(sap),
  275. ia->ri_device->name,
  276. ia->ri_ops->ro_displayname,
  277. xprt->rx_buf.rb_max_requests,
  278. ird, ird < 4 && ird < tird / 2 ? " (low!)" : "");
  279. } else if (connstate < 0) {
  280. pr_info("rpcrdma: connection to %pIS:%u closed (%d)\n",
  281. sap, rpc_get_port(sap), connstate);
  282. }
  283. #endif
  284. return 0;
  285. }
  286. static void rpcrdma_destroy_id(struct rdma_cm_id *id)
  287. {
  288. if (id) {
  289. module_put(id->device->owner);
  290. rdma_destroy_id(id);
  291. }
  292. }
  293. static struct rdma_cm_id *
  294. rpcrdma_create_id(struct rpcrdma_xprt *xprt,
  295. struct rpcrdma_ia *ia, struct sockaddr *addr)
  296. {
  297. struct rdma_cm_id *id;
  298. int rc;
  299. init_completion(&ia->ri_done);
  300. id = rdma_create_id(&init_net, rpcrdma_conn_upcall, xprt, RDMA_PS_TCP,
  301. IB_QPT_RC);
  302. if (IS_ERR(id)) {
  303. rc = PTR_ERR(id);
  304. dprintk("RPC: %s: rdma_create_id() failed %i\n",
  305. __func__, rc);
  306. return id;
  307. }
  308. ia->ri_async_rc = -ETIMEDOUT;
  309. rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT);
  310. if (rc) {
  311. dprintk("RPC: %s: rdma_resolve_addr() failed %i\n",
  312. __func__, rc);
  313. goto out;
  314. }
  315. wait_for_completion_interruptible_timeout(&ia->ri_done,
  316. msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
  317. /* FIXME:
  318. * Until xprtrdma supports DEVICE_REMOVAL, the provider must
  319. * be pinned while there are active NFS/RDMA mounts to prevent
  320. * hangs and crashes at umount time.
  321. */
  322. if (!ia->ri_async_rc && !try_module_get(id->device->owner)) {
  323. dprintk("RPC: %s: Failed to get device module\n",
  324. __func__);
  325. ia->ri_async_rc = -ENODEV;
  326. }
  327. rc = ia->ri_async_rc;
  328. if (rc)
  329. goto out;
  330. ia->ri_async_rc = -ETIMEDOUT;
  331. rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT);
  332. if (rc) {
  333. dprintk("RPC: %s: rdma_resolve_route() failed %i\n",
  334. __func__, rc);
  335. goto put;
  336. }
  337. wait_for_completion_interruptible_timeout(&ia->ri_done,
  338. msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
  339. rc = ia->ri_async_rc;
  340. if (rc)
  341. goto put;
  342. return id;
  343. put:
  344. module_put(id->device->owner);
  345. out:
  346. rdma_destroy_id(id);
  347. return ERR_PTR(rc);
  348. }
  349. /*
  350. * Exported functions.
  351. */
  352. /*
  353. * Open and initialize an Interface Adapter.
  354. * o initializes fields of struct rpcrdma_ia, including
  355. * interface and provider attributes and protection zone.
  356. */
  357. int
  358. rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
  359. {
  360. struct rpcrdma_ia *ia = &xprt->rx_ia;
  361. int rc;
  362. ia->ri_id = rpcrdma_create_id(xprt, ia, addr);
  363. if (IS_ERR(ia->ri_id)) {
  364. rc = PTR_ERR(ia->ri_id);
  365. goto out1;
  366. }
  367. ia->ri_device = ia->ri_id->device;
  368. ia->ri_pd = ib_alloc_pd(ia->ri_device, 0);
  369. if (IS_ERR(ia->ri_pd)) {
  370. rc = PTR_ERR(ia->ri_pd);
  371. pr_err("rpcrdma: ib_alloc_pd() returned %d\n", rc);
  372. goto out2;
  373. }
  374. switch (memreg) {
  375. case RPCRDMA_FRMR:
  376. if (frwr_is_supported(ia)) {
  377. ia->ri_ops = &rpcrdma_frwr_memreg_ops;
  378. break;
  379. }
  380. /*FALLTHROUGH*/
  381. case RPCRDMA_MTHCAFMR:
  382. if (fmr_is_supported(ia)) {
  383. ia->ri_ops = &rpcrdma_fmr_memreg_ops;
  384. break;
  385. }
  386. /*FALLTHROUGH*/
  387. default:
  388. pr_err("rpcrdma: Unsupported memory registration mode: %d\n",
  389. memreg);
  390. rc = -EINVAL;
  391. goto out3;
  392. }
  393. return 0;
  394. out3:
  395. ib_dealloc_pd(ia->ri_pd);
  396. ia->ri_pd = NULL;
  397. out2:
  398. rpcrdma_destroy_id(ia->ri_id);
  399. ia->ri_id = NULL;
  400. out1:
  401. return rc;
  402. }
  403. /*
  404. * Clean up/close an IA.
  405. * o if event handles and PD have been initialized, free them.
  406. * o close the IA
  407. */
  408. void
  409. rpcrdma_ia_close(struct rpcrdma_ia *ia)
  410. {
  411. dprintk("RPC: %s: entering\n", __func__);
  412. if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) {
  413. if (ia->ri_id->qp)
  414. rdma_destroy_qp(ia->ri_id);
  415. rpcrdma_destroy_id(ia->ri_id);
  416. ia->ri_id = NULL;
  417. }
  418. /* If the pd is still busy, xprtrdma missed freeing a resource */
  419. if (ia->ri_pd && !IS_ERR(ia->ri_pd))
  420. ib_dealloc_pd(ia->ri_pd);
  421. }
  422. /*
  423. * Create unconnected endpoint.
  424. */
  425. int
  426. rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
  427. struct rpcrdma_create_data_internal *cdata)
  428. {
  429. struct rpcrdma_connect_private *pmsg = &ep->rep_cm_private;
  430. unsigned int max_qp_wr, max_sge;
  431. struct ib_cq *sendcq, *recvcq;
  432. int rc;
  433. max_sge = min_t(unsigned int, ia->ri_device->attrs.max_sge,
  434. RPCRDMA_MAX_SEND_SGES);
  435. if (max_sge < RPCRDMA_MIN_SEND_SGES) {
  436. pr_warn("rpcrdma: HCA provides only %d send SGEs\n", max_sge);
  437. return -ENOMEM;
  438. }
  439. ia->ri_max_send_sges = max_sge - RPCRDMA_MIN_SEND_SGES;
  440. if (ia->ri_device->attrs.max_qp_wr <= RPCRDMA_BACKWARD_WRS) {
  441. dprintk("RPC: %s: insufficient wqe's available\n",
  442. __func__);
  443. return -ENOMEM;
  444. }
  445. max_qp_wr = ia->ri_device->attrs.max_qp_wr - RPCRDMA_BACKWARD_WRS - 1;
  446. /* check provider's send/recv wr limits */
  447. if (cdata->max_requests > max_qp_wr)
  448. cdata->max_requests = max_qp_wr;
  449. ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall;
  450. ep->rep_attr.qp_context = ep;
  451. ep->rep_attr.srq = NULL;
  452. ep->rep_attr.cap.max_send_wr = cdata->max_requests;
  453. ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS;
  454. ep->rep_attr.cap.max_send_wr += 1; /* drain cqe */
  455. rc = ia->ri_ops->ro_open(ia, ep, cdata);
  456. if (rc)
  457. return rc;
  458. ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
  459. ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
  460. ep->rep_attr.cap.max_recv_wr += 1; /* drain cqe */
  461. ep->rep_attr.cap.max_send_sge = max_sge;
  462. ep->rep_attr.cap.max_recv_sge = 1;
  463. ep->rep_attr.cap.max_inline_data = 0;
  464. ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
  465. ep->rep_attr.qp_type = IB_QPT_RC;
  466. ep->rep_attr.port_num = ~0;
  467. dprintk("RPC: %s: requested max: dtos: send %d recv %d; "
  468. "iovs: send %d recv %d\n",
  469. __func__,
  470. ep->rep_attr.cap.max_send_wr,
  471. ep->rep_attr.cap.max_recv_wr,
  472. ep->rep_attr.cap.max_send_sge,
  473. ep->rep_attr.cap.max_recv_sge);
  474. /* set trigger for requesting send completion */
  475. ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1;
  476. if (ep->rep_cqinit <= 2)
  477. ep->rep_cqinit = 0; /* always signal? */
  478. rpcrdma_init_cqcount(ep, 0);
  479. init_waitqueue_head(&ep->rep_connect_wait);
  480. INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
  481. sendcq = ib_alloc_cq(ia->ri_device, NULL,
  482. ep->rep_attr.cap.max_send_wr + 1,
  483. 0, IB_POLL_SOFTIRQ);
  484. if (IS_ERR(sendcq)) {
  485. rc = PTR_ERR(sendcq);
  486. dprintk("RPC: %s: failed to create send CQ: %i\n",
  487. __func__, rc);
  488. goto out1;
  489. }
  490. recvcq = ib_alloc_cq(ia->ri_device, NULL,
  491. ep->rep_attr.cap.max_recv_wr + 1,
  492. 0, IB_POLL_SOFTIRQ);
  493. if (IS_ERR(recvcq)) {
  494. rc = PTR_ERR(recvcq);
  495. dprintk("RPC: %s: failed to create recv CQ: %i\n",
  496. __func__, rc);
  497. goto out2;
  498. }
  499. ep->rep_attr.send_cq = sendcq;
  500. ep->rep_attr.recv_cq = recvcq;
  501. /* Initialize cma parameters */
  502. memset(&ep->rep_remote_cma, 0, sizeof(ep->rep_remote_cma));
  503. /* Prepare RDMA-CM private message */
  504. pmsg->cp_magic = rpcrdma_cmp_magic;
  505. pmsg->cp_version = RPCRDMA_CMP_VERSION;
  506. pmsg->cp_flags |= ia->ri_ops->ro_send_w_inv_ok;
  507. pmsg->cp_send_size = rpcrdma_encode_buffer_size(cdata->inline_wsize);
  508. pmsg->cp_recv_size = rpcrdma_encode_buffer_size(cdata->inline_rsize);
  509. ep->rep_remote_cma.private_data = pmsg;
  510. ep->rep_remote_cma.private_data_len = sizeof(*pmsg);
  511. /* Client offers RDMA Read but does not initiate */
  512. ep->rep_remote_cma.initiator_depth = 0;
  513. if (ia->ri_device->attrs.max_qp_rd_atom > 32) /* arbitrary but <= 255 */
  514. ep->rep_remote_cma.responder_resources = 32;
  515. else
  516. ep->rep_remote_cma.responder_resources =
  517. ia->ri_device->attrs.max_qp_rd_atom;
  518. /* Limit transport retries so client can detect server
  519. * GID changes quickly. RPC layer handles re-establishing
  520. * transport connection and retransmission.
  521. */
  522. ep->rep_remote_cma.retry_count = 6;
  523. /* RPC-over-RDMA handles its own flow control. In addition,
  524. * make all RNR NAKs visible so we know that RPC-over-RDMA
  525. * flow control is working correctly (no NAKs should be seen).
  526. */
  527. ep->rep_remote_cma.flow_control = 0;
  528. ep->rep_remote_cma.rnr_retry_count = 0;
  529. return 0;
  530. out2:
  531. ib_free_cq(sendcq);
  532. out1:
  533. return rc;
  534. }
  535. /*
  536. * rpcrdma_ep_destroy
  537. *
  538. * Disconnect and destroy endpoint. After this, the only
  539. * valid operations on the ep are to free it (if dynamically
  540. * allocated) or re-create it.
  541. */
  542. void
  543. rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
  544. {
  545. dprintk("RPC: %s: entering, connected is %d\n",
  546. __func__, ep->rep_connected);
  547. cancel_delayed_work_sync(&ep->rep_connect_worker);
  548. if (ia->ri_id->qp) {
  549. rpcrdma_ep_disconnect(ep, ia);
  550. rdma_destroy_qp(ia->ri_id);
  551. ia->ri_id->qp = NULL;
  552. }
  553. ib_free_cq(ep->rep_attr.recv_cq);
  554. ib_free_cq(ep->rep_attr.send_cq);
  555. }
  556. /*
  557. * Connect unconnected endpoint.
  558. */
  559. int
  560. rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
  561. {
  562. struct rdma_cm_id *id, *old;
  563. int rc = 0;
  564. int retry_count = 0;
  565. if (ep->rep_connected != 0) {
  566. struct rpcrdma_xprt *xprt;
  567. retry:
  568. dprintk("RPC: %s: reconnecting...\n", __func__);
  569. rpcrdma_ep_disconnect(ep, ia);
  570. xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
  571. id = rpcrdma_create_id(xprt, ia,
  572. (struct sockaddr *)&xprt->rx_data.addr);
  573. if (IS_ERR(id)) {
  574. rc = -EHOSTUNREACH;
  575. goto out;
  576. }
  577. /* TEMP TEMP TEMP - fail if new device:
  578. * Deregister/remarshal *all* requests!
  579. * Close and recreate adapter, pd, etc!
  580. * Re-determine all attributes still sane!
  581. * More stuff I haven't thought of!
  582. * Rrrgh!
  583. */
  584. if (ia->ri_device != id->device) {
  585. printk("RPC: %s: can't reconnect on "
  586. "different device!\n", __func__);
  587. rpcrdma_destroy_id(id);
  588. rc = -ENETUNREACH;
  589. goto out;
  590. }
  591. /* END TEMP */
  592. rc = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr);
  593. if (rc) {
  594. dprintk("RPC: %s: rdma_create_qp failed %i\n",
  595. __func__, rc);
  596. rpcrdma_destroy_id(id);
  597. rc = -ENETUNREACH;
  598. goto out;
  599. }
  600. old = ia->ri_id;
  601. ia->ri_id = id;
  602. rdma_destroy_qp(old);
  603. rpcrdma_destroy_id(old);
  604. } else {
  605. dprintk("RPC: %s: connecting...\n", __func__);
  606. rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
  607. if (rc) {
  608. dprintk("RPC: %s: rdma_create_qp failed %i\n",
  609. __func__, rc);
  610. /* do not update ep->rep_connected */
  611. return -ENETUNREACH;
  612. }
  613. }
  614. ep->rep_connected = 0;
  615. rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma);
  616. if (rc) {
  617. dprintk("RPC: %s: rdma_connect() failed with %i\n",
  618. __func__, rc);
  619. goto out;
  620. }
  621. wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0);
  622. /*
  623. * Check state. A non-peer reject indicates no listener
  624. * (ECONNREFUSED), which may be a transient state. All
  625. * others indicate a transport condition which has already
  626. * undergone a best-effort.
  627. */
  628. if (ep->rep_connected == -ECONNREFUSED &&
  629. ++retry_count <= RDMA_CONNECT_RETRY_MAX) {
  630. dprintk("RPC: %s: non-peer_reject, retry\n", __func__);
  631. goto retry;
  632. }
  633. if (ep->rep_connected <= 0) {
  634. /* Sometimes, the only way to reliably connect to remote
  635. * CMs is to use same nonzero values for ORD and IRD. */
  636. if (retry_count++ <= RDMA_CONNECT_RETRY_MAX + 1 &&
  637. (ep->rep_remote_cma.responder_resources == 0 ||
  638. ep->rep_remote_cma.initiator_depth !=
  639. ep->rep_remote_cma.responder_resources)) {
  640. if (ep->rep_remote_cma.responder_resources == 0)
  641. ep->rep_remote_cma.responder_resources = 1;
  642. ep->rep_remote_cma.initiator_depth =
  643. ep->rep_remote_cma.responder_resources;
  644. goto retry;
  645. }
  646. rc = ep->rep_connected;
  647. } else {
  648. struct rpcrdma_xprt *r_xprt;
  649. unsigned int extras;
  650. dprintk("RPC: %s: connected\n", __func__);
  651. r_xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
  652. extras = r_xprt->rx_buf.rb_bc_srv_max_requests;
  653. if (extras) {
  654. rc = rpcrdma_ep_post_extra_recv(r_xprt, extras);
  655. if (rc) {
  656. pr_warn("%s: rpcrdma_ep_post_extra_recv: %i\n",
  657. __func__, rc);
  658. rc = 0;
  659. }
  660. }
  661. }
  662. out:
  663. if (rc)
  664. ep->rep_connected = rc;
  665. return rc;
  666. }
  667. /*
  668. * rpcrdma_ep_disconnect
  669. *
  670. * This is separate from destroy to facilitate the ability
  671. * to reconnect without recreating the endpoint.
  672. *
  673. * This call is not reentrant, and must not be made in parallel
  674. * on the same endpoint.
  675. */
  676. void
  677. rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
  678. {
  679. int rc;
  680. rc = rdma_disconnect(ia->ri_id);
  681. if (!rc) {
  682. /* returns without wait if not connected */
  683. wait_event_interruptible(ep->rep_connect_wait,
  684. ep->rep_connected != 1);
  685. dprintk("RPC: %s: after wait, %sconnected\n", __func__,
  686. (ep->rep_connected == 1) ? "still " : "dis");
  687. } else {
  688. dprintk("RPC: %s: rdma_disconnect %i\n", __func__, rc);
  689. ep->rep_connected = rc;
  690. }
  691. ib_drain_qp(ia->ri_id->qp);
  692. }
  693. static void
  694. rpcrdma_mr_recovery_worker(struct work_struct *work)
  695. {
  696. struct rpcrdma_buffer *buf = container_of(work, struct rpcrdma_buffer,
  697. rb_recovery_worker.work);
  698. struct rpcrdma_mw *mw;
  699. spin_lock(&buf->rb_recovery_lock);
  700. while (!list_empty(&buf->rb_stale_mrs)) {
  701. mw = list_first_entry(&buf->rb_stale_mrs,
  702. struct rpcrdma_mw, mw_list);
  703. list_del_init(&mw->mw_list);
  704. spin_unlock(&buf->rb_recovery_lock);
  705. dprintk("RPC: %s: recovering MR %p\n", __func__, mw);
  706. mw->mw_xprt->rx_ia.ri_ops->ro_recover_mr(mw);
  707. spin_lock(&buf->rb_recovery_lock);
  708. }
  709. spin_unlock(&buf->rb_recovery_lock);
  710. }
  711. void
  712. rpcrdma_defer_mr_recovery(struct rpcrdma_mw *mw)
  713. {
  714. struct rpcrdma_xprt *r_xprt = mw->mw_xprt;
  715. struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
  716. spin_lock(&buf->rb_recovery_lock);
  717. list_add(&mw->mw_list, &buf->rb_stale_mrs);
  718. spin_unlock(&buf->rb_recovery_lock);
  719. schedule_delayed_work(&buf->rb_recovery_worker, 0);
  720. }
  721. static void
  722. rpcrdma_create_mrs(struct rpcrdma_xprt *r_xprt)
  723. {
  724. struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
  725. struct rpcrdma_ia *ia = &r_xprt->rx_ia;
  726. unsigned int count;
  727. LIST_HEAD(free);
  728. LIST_HEAD(all);
  729. for (count = 0; count < 32; count++) {
  730. struct rpcrdma_mw *mw;
  731. int rc;
  732. mw = kzalloc(sizeof(*mw), GFP_KERNEL);
  733. if (!mw)
  734. break;
  735. rc = ia->ri_ops->ro_init_mr(ia, mw);
  736. if (rc) {
  737. kfree(mw);
  738. break;
  739. }
  740. mw->mw_xprt = r_xprt;
  741. list_add(&mw->mw_list, &free);
  742. list_add(&mw->mw_all, &all);
  743. }
  744. spin_lock(&buf->rb_mwlock);
  745. list_splice(&free, &buf->rb_mws);
  746. list_splice(&all, &buf->rb_all);
  747. r_xprt->rx_stats.mrs_allocated += count;
  748. spin_unlock(&buf->rb_mwlock);
  749. dprintk("RPC: %s: created %u MRs\n", __func__, count);
  750. }
  751. static void
  752. rpcrdma_mr_refresh_worker(struct work_struct *work)
  753. {
  754. struct rpcrdma_buffer *buf = container_of(work, struct rpcrdma_buffer,
  755. rb_refresh_worker.work);
  756. struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt,
  757. rx_buf);
  758. rpcrdma_create_mrs(r_xprt);
  759. }
  760. struct rpcrdma_req *
  761. rpcrdma_create_req(struct rpcrdma_xprt *r_xprt)
  762. {
  763. struct rpcrdma_buffer *buffer = &r_xprt->rx_buf;
  764. struct rpcrdma_req *req;
  765. req = kzalloc(sizeof(*req), GFP_KERNEL);
  766. if (req == NULL)
  767. return ERR_PTR(-ENOMEM);
  768. INIT_LIST_HEAD(&req->rl_free);
  769. spin_lock(&buffer->rb_reqslock);
  770. list_add(&req->rl_all, &buffer->rb_allreqs);
  771. spin_unlock(&buffer->rb_reqslock);
  772. req->rl_cqe.done = rpcrdma_wc_send;
  773. req->rl_buffer = &r_xprt->rx_buf;
  774. INIT_LIST_HEAD(&req->rl_registered);
  775. req->rl_send_wr.next = NULL;
  776. req->rl_send_wr.wr_cqe = &req->rl_cqe;
  777. req->rl_send_wr.sg_list = req->rl_send_sge;
  778. req->rl_send_wr.opcode = IB_WR_SEND;
  779. return req;
  780. }
  781. struct rpcrdma_rep *
  782. rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
  783. {
  784. struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
  785. struct rpcrdma_ia *ia = &r_xprt->rx_ia;
  786. struct rpcrdma_rep *rep;
  787. int rc;
  788. rc = -ENOMEM;
  789. rep = kzalloc(sizeof(*rep), GFP_KERNEL);
  790. if (rep == NULL)
  791. goto out;
  792. rep->rr_rdmabuf = rpcrdma_alloc_regbuf(cdata->inline_rsize,
  793. DMA_FROM_DEVICE, GFP_KERNEL);
  794. if (IS_ERR(rep->rr_rdmabuf)) {
  795. rc = PTR_ERR(rep->rr_rdmabuf);
  796. goto out_free;
  797. }
  798. rep->rr_device = ia->ri_device;
  799. rep->rr_cqe.done = rpcrdma_wc_receive;
  800. rep->rr_rxprt = r_xprt;
  801. INIT_WORK(&rep->rr_work, rpcrdma_reply_handler);
  802. rep->rr_recv_wr.next = NULL;
  803. rep->rr_recv_wr.wr_cqe = &rep->rr_cqe;
  804. rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov;
  805. rep->rr_recv_wr.num_sge = 1;
  806. return rep;
  807. out_free:
  808. kfree(rep);
  809. out:
  810. return ERR_PTR(rc);
  811. }
  812. int
  813. rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
  814. {
  815. struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
  816. int i, rc;
  817. buf->rb_max_requests = r_xprt->rx_data.max_requests;
  818. buf->rb_bc_srv_max_requests = 0;
  819. atomic_set(&buf->rb_credits, 1);
  820. spin_lock_init(&buf->rb_mwlock);
  821. spin_lock_init(&buf->rb_lock);
  822. spin_lock_init(&buf->rb_recovery_lock);
  823. INIT_LIST_HEAD(&buf->rb_mws);
  824. INIT_LIST_HEAD(&buf->rb_all);
  825. INIT_LIST_HEAD(&buf->rb_stale_mrs);
  826. INIT_DELAYED_WORK(&buf->rb_refresh_worker,
  827. rpcrdma_mr_refresh_worker);
  828. INIT_DELAYED_WORK(&buf->rb_recovery_worker,
  829. rpcrdma_mr_recovery_worker);
  830. rpcrdma_create_mrs(r_xprt);
  831. INIT_LIST_HEAD(&buf->rb_send_bufs);
  832. INIT_LIST_HEAD(&buf->rb_allreqs);
  833. spin_lock_init(&buf->rb_reqslock);
  834. for (i = 0; i < buf->rb_max_requests; i++) {
  835. struct rpcrdma_req *req;
  836. req = rpcrdma_create_req(r_xprt);
  837. if (IS_ERR(req)) {
  838. dprintk("RPC: %s: request buffer %d alloc"
  839. " failed\n", __func__, i);
  840. rc = PTR_ERR(req);
  841. goto out;
  842. }
  843. req->rl_backchannel = false;
  844. list_add(&req->rl_free, &buf->rb_send_bufs);
  845. }
  846. INIT_LIST_HEAD(&buf->rb_recv_bufs);
  847. for (i = 0; i < buf->rb_max_requests + RPCRDMA_MAX_BC_REQUESTS; i++) {
  848. struct rpcrdma_rep *rep;
  849. rep = rpcrdma_create_rep(r_xprt);
  850. if (IS_ERR(rep)) {
  851. dprintk("RPC: %s: reply buffer %d alloc failed\n",
  852. __func__, i);
  853. rc = PTR_ERR(rep);
  854. goto out;
  855. }
  856. list_add(&rep->rr_list, &buf->rb_recv_bufs);
  857. }
  858. return 0;
  859. out:
  860. rpcrdma_buffer_destroy(buf);
  861. return rc;
  862. }
  863. static struct rpcrdma_req *
  864. rpcrdma_buffer_get_req_locked(struct rpcrdma_buffer *buf)
  865. {
  866. struct rpcrdma_req *req;
  867. req = list_first_entry(&buf->rb_send_bufs,
  868. struct rpcrdma_req, rl_free);
  869. list_del(&req->rl_free);
  870. return req;
  871. }
  872. static struct rpcrdma_rep *
  873. rpcrdma_buffer_get_rep_locked(struct rpcrdma_buffer *buf)
  874. {
  875. struct rpcrdma_rep *rep;
  876. rep = list_first_entry(&buf->rb_recv_bufs,
  877. struct rpcrdma_rep, rr_list);
  878. list_del(&rep->rr_list);
  879. return rep;
  880. }
  881. static void
  882. rpcrdma_destroy_rep(struct rpcrdma_rep *rep)
  883. {
  884. rpcrdma_free_regbuf(rep->rr_rdmabuf);
  885. kfree(rep);
  886. }
  887. void
  888. rpcrdma_destroy_req(struct rpcrdma_req *req)
  889. {
  890. rpcrdma_free_regbuf(req->rl_recvbuf);
  891. rpcrdma_free_regbuf(req->rl_sendbuf);
  892. rpcrdma_free_regbuf(req->rl_rdmabuf);
  893. kfree(req);
  894. }
  895. static void
  896. rpcrdma_destroy_mrs(struct rpcrdma_buffer *buf)
  897. {
  898. struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt,
  899. rx_buf);
  900. struct rpcrdma_ia *ia = rdmab_to_ia(buf);
  901. struct rpcrdma_mw *mw;
  902. unsigned int count;
  903. count = 0;
  904. spin_lock(&buf->rb_mwlock);
  905. while (!list_empty(&buf->rb_all)) {
  906. mw = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
  907. list_del(&mw->mw_all);
  908. spin_unlock(&buf->rb_mwlock);
  909. ia->ri_ops->ro_release_mr(mw);
  910. count++;
  911. spin_lock(&buf->rb_mwlock);
  912. }
  913. spin_unlock(&buf->rb_mwlock);
  914. r_xprt->rx_stats.mrs_allocated = 0;
  915. dprintk("RPC: %s: released %u MRs\n", __func__, count);
  916. }
  917. void
  918. rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
  919. {
  920. cancel_delayed_work_sync(&buf->rb_recovery_worker);
  921. cancel_delayed_work_sync(&buf->rb_refresh_worker);
  922. while (!list_empty(&buf->rb_recv_bufs)) {
  923. struct rpcrdma_rep *rep;
  924. rep = rpcrdma_buffer_get_rep_locked(buf);
  925. rpcrdma_destroy_rep(rep);
  926. }
  927. buf->rb_send_count = 0;
  928. spin_lock(&buf->rb_reqslock);
  929. while (!list_empty(&buf->rb_allreqs)) {
  930. struct rpcrdma_req *req;
  931. req = list_first_entry(&buf->rb_allreqs,
  932. struct rpcrdma_req, rl_all);
  933. list_del(&req->rl_all);
  934. spin_unlock(&buf->rb_reqslock);
  935. rpcrdma_destroy_req(req);
  936. spin_lock(&buf->rb_reqslock);
  937. }
  938. spin_unlock(&buf->rb_reqslock);
  939. buf->rb_recv_count = 0;
  940. rpcrdma_destroy_mrs(buf);
  941. }
  942. struct rpcrdma_mw *
  943. rpcrdma_get_mw(struct rpcrdma_xprt *r_xprt)
  944. {
  945. struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
  946. struct rpcrdma_mw *mw = NULL;
  947. spin_lock(&buf->rb_mwlock);
  948. if (!list_empty(&buf->rb_mws)) {
  949. mw = list_first_entry(&buf->rb_mws,
  950. struct rpcrdma_mw, mw_list);
  951. list_del_init(&mw->mw_list);
  952. }
  953. spin_unlock(&buf->rb_mwlock);
  954. if (!mw)
  955. goto out_nomws;
  956. return mw;
  957. out_nomws:
  958. dprintk("RPC: %s: no MWs available\n", __func__);
  959. schedule_delayed_work(&buf->rb_refresh_worker, 0);
  960. /* Allow the reply handler and refresh worker to run */
  961. cond_resched();
  962. return NULL;
  963. }
  964. void
  965. rpcrdma_put_mw(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mw *mw)
  966. {
  967. struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
  968. spin_lock(&buf->rb_mwlock);
  969. list_add_tail(&mw->mw_list, &buf->rb_mws);
  970. spin_unlock(&buf->rb_mwlock);
  971. }
  972. static struct rpcrdma_rep *
  973. rpcrdma_buffer_get_rep(struct rpcrdma_buffer *buffers)
  974. {
  975. /* If an RPC previously completed without a reply (say, a
  976. * credential problem or a soft timeout occurs) then hold off
  977. * on supplying more Receive buffers until the number of new
  978. * pending RPCs catches up to the number of posted Receives.
  979. */
  980. if (unlikely(buffers->rb_send_count < buffers->rb_recv_count))
  981. return NULL;
  982. if (unlikely(list_empty(&buffers->rb_recv_bufs)))
  983. return NULL;
  984. buffers->rb_recv_count++;
  985. return rpcrdma_buffer_get_rep_locked(buffers);
  986. }
  987. /*
  988. * Get a set of request/reply buffers.
  989. *
  990. * Reply buffer (if available) is attached to send buffer upon return.
  991. */
  992. struct rpcrdma_req *
  993. rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
  994. {
  995. struct rpcrdma_req *req;
  996. spin_lock(&buffers->rb_lock);
  997. if (list_empty(&buffers->rb_send_bufs))
  998. goto out_reqbuf;
  999. buffers->rb_send_count++;
  1000. req = rpcrdma_buffer_get_req_locked(buffers);
  1001. req->rl_reply = rpcrdma_buffer_get_rep(buffers);
  1002. spin_unlock(&buffers->rb_lock);
  1003. return req;
  1004. out_reqbuf:
  1005. spin_unlock(&buffers->rb_lock);
  1006. pr_warn("RPC: %s: out of request buffers\n", __func__);
  1007. return NULL;
  1008. }
  1009. /*
  1010. * Put request/reply buffers back into pool.
  1011. * Pre-decrement counter/array index.
  1012. */
  1013. void
  1014. rpcrdma_buffer_put(struct rpcrdma_req *req)
  1015. {
  1016. struct rpcrdma_buffer *buffers = req->rl_buffer;
  1017. struct rpcrdma_rep *rep = req->rl_reply;
  1018. req->rl_send_wr.num_sge = 0;
  1019. req->rl_reply = NULL;
  1020. spin_lock(&buffers->rb_lock);
  1021. buffers->rb_send_count--;
  1022. list_add_tail(&req->rl_free, &buffers->rb_send_bufs);
  1023. if (rep) {
  1024. buffers->rb_recv_count--;
  1025. list_add_tail(&rep->rr_list, &buffers->rb_recv_bufs);
  1026. }
  1027. spin_unlock(&buffers->rb_lock);
  1028. }
  1029. /*
  1030. * Recover reply buffers from pool.
  1031. * This happens when recovering from disconnect.
  1032. */
  1033. void
  1034. rpcrdma_recv_buffer_get(struct rpcrdma_req *req)
  1035. {
  1036. struct rpcrdma_buffer *buffers = req->rl_buffer;
  1037. spin_lock(&buffers->rb_lock);
  1038. req->rl_reply = rpcrdma_buffer_get_rep(buffers);
  1039. spin_unlock(&buffers->rb_lock);
  1040. }
  1041. /*
  1042. * Put reply buffers back into pool when not attached to
  1043. * request. This happens in error conditions.
  1044. */
  1045. void
  1046. rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
  1047. {
  1048. struct rpcrdma_buffer *buffers = &rep->rr_rxprt->rx_buf;
  1049. spin_lock(&buffers->rb_lock);
  1050. buffers->rb_recv_count--;
  1051. list_add_tail(&rep->rr_list, &buffers->rb_recv_bufs);
  1052. spin_unlock(&buffers->rb_lock);
  1053. }
  1054. /**
  1055. * rpcrdma_alloc_regbuf - allocate and DMA-map memory for SEND/RECV buffers
  1056. * @size: size of buffer to be allocated, in bytes
  1057. * @direction: direction of data movement
  1058. * @flags: GFP flags
  1059. *
  1060. * Returns an ERR_PTR, or a pointer to a regbuf, a buffer that
  1061. * can be persistently DMA-mapped for I/O.
  1062. *
  1063. * xprtrdma uses a regbuf for posting an outgoing RDMA SEND, or for
  1064. * receiving the payload of RDMA RECV operations. During Long Calls
  1065. * or Replies they may be registered externally via ro_map.
  1066. */
  1067. struct rpcrdma_regbuf *
  1068. rpcrdma_alloc_regbuf(size_t size, enum dma_data_direction direction,
  1069. gfp_t flags)
  1070. {
  1071. struct rpcrdma_regbuf *rb;
  1072. rb = kmalloc(sizeof(*rb) + size, flags);
  1073. if (rb == NULL)
  1074. return ERR_PTR(-ENOMEM);
  1075. rb->rg_device = NULL;
  1076. rb->rg_direction = direction;
  1077. rb->rg_iov.length = size;
  1078. return rb;
  1079. }
  1080. /**
  1081. * __rpcrdma_map_regbuf - DMA-map a regbuf
  1082. * @ia: controlling rpcrdma_ia
  1083. * @rb: regbuf to be mapped
  1084. */
  1085. bool
  1086. __rpcrdma_dma_map_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb)
  1087. {
  1088. if (rb->rg_direction == DMA_NONE)
  1089. return false;
  1090. rb->rg_iov.addr = ib_dma_map_single(ia->ri_device,
  1091. (void *)rb->rg_base,
  1092. rdmab_length(rb),
  1093. rb->rg_direction);
  1094. if (ib_dma_mapping_error(ia->ri_device, rdmab_addr(rb)))
  1095. return false;
  1096. rb->rg_device = ia->ri_device;
  1097. rb->rg_iov.lkey = ia->ri_pd->local_dma_lkey;
  1098. return true;
  1099. }
  1100. static void
  1101. rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb)
  1102. {
  1103. if (!rpcrdma_regbuf_is_mapped(rb))
  1104. return;
  1105. ib_dma_unmap_single(rb->rg_device, rdmab_addr(rb),
  1106. rdmab_length(rb), rb->rg_direction);
  1107. rb->rg_device = NULL;
  1108. }
  1109. /**
  1110. * rpcrdma_free_regbuf - deregister and free registered buffer
  1111. * @rb: regbuf to be deregistered and freed
  1112. */
  1113. void
  1114. rpcrdma_free_regbuf(struct rpcrdma_regbuf *rb)
  1115. {
  1116. if (!rb)
  1117. return;
  1118. rpcrdma_dma_unmap_regbuf(rb);
  1119. kfree(rb);
  1120. }
  1121. /*
  1122. * Prepost any receive buffer, then post send.
  1123. *
  1124. * Receive buffer is donated to hardware, reclaimed upon recv completion.
  1125. */
  1126. int
  1127. rpcrdma_ep_post(struct rpcrdma_ia *ia,
  1128. struct rpcrdma_ep *ep,
  1129. struct rpcrdma_req *req)
  1130. {
  1131. struct ib_send_wr *send_wr = &req->rl_send_wr;
  1132. struct ib_send_wr *send_wr_fail;
  1133. int rc;
  1134. if (req->rl_reply) {
  1135. rc = rpcrdma_ep_post_recv(ia, req->rl_reply);
  1136. if (rc)
  1137. return rc;
  1138. req->rl_reply = NULL;
  1139. }
  1140. dprintk("RPC: %s: posting %d s/g entries\n",
  1141. __func__, send_wr->num_sge);
  1142. rpcrdma_set_signaled(ep, send_wr);
  1143. rc = ib_post_send(ia->ri_id->qp, send_wr, &send_wr_fail);
  1144. if (rc)
  1145. goto out_postsend_err;
  1146. return 0;
  1147. out_postsend_err:
  1148. pr_err("rpcrdma: RDMA Send ib_post_send returned %i\n", rc);
  1149. return -ENOTCONN;
  1150. }
  1151. int
  1152. rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
  1153. struct rpcrdma_rep *rep)
  1154. {
  1155. struct ib_recv_wr *recv_wr_fail;
  1156. int rc;
  1157. if (!rpcrdma_dma_map_regbuf(ia, rep->rr_rdmabuf))
  1158. goto out_map;
  1159. rc = ib_post_recv(ia->ri_id->qp, &rep->rr_recv_wr, &recv_wr_fail);
  1160. if (rc)
  1161. goto out_postrecv;
  1162. return 0;
  1163. out_map:
  1164. pr_err("rpcrdma: failed to DMA map the Receive buffer\n");
  1165. return -EIO;
  1166. out_postrecv:
  1167. pr_err("rpcrdma: ib_post_recv returned %i\n", rc);
  1168. return -ENOTCONN;
  1169. }
  1170. /**
  1171. * rpcrdma_ep_post_extra_recv - Post buffers for incoming backchannel requests
  1172. * @r_xprt: transport associated with these backchannel resources
  1173. * @min_reqs: minimum number of incoming requests expected
  1174. *
  1175. * Returns zero if all requested buffers were posted, or a negative errno.
  1176. */
  1177. int
  1178. rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *r_xprt, unsigned int count)
  1179. {
  1180. struct rpcrdma_buffer *buffers = &r_xprt->rx_buf;
  1181. struct rpcrdma_ia *ia = &r_xprt->rx_ia;
  1182. struct rpcrdma_rep *rep;
  1183. int rc;
  1184. while (count--) {
  1185. spin_lock(&buffers->rb_lock);
  1186. if (list_empty(&buffers->rb_recv_bufs))
  1187. goto out_reqbuf;
  1188. rep = rpcrdma_buffer_get_rep_locked(buffers);
  1189. spin_unlock(&buffers->rb_lock);
  1190. rc = rpcrdma_ep_post_recv(ia, rep);
  1191. if (rc)
  1192. goto out_rc;
  1193. }
  1194. return 0;
  1195. out_reqbuf:
  1196. spin_unlock(&buffers->rb_lock);
  1197. pr_warn("%s: no extra receive buffers\n", __func__);
  1198. return -ENOMEM;
  1199. out_rc:
  1200. rpcrdma_recv_buffer_put(rep);
  1201. return rc;
  1202. }