svc_rdma_transport.c 38 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379
  1. /*
  2. * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved.
  3. * Copyright (c) 2005-2007 Network Appliance, Inc. All rights reserved.
  4. *
  5. * This software is available to you under a choice of one of two
  6. * licenses. You may choose to be licensed under the terms of the GNU
  7. * General Public License (GPL) Version 2, available from the file
  8. * COPYING in the main directory of this source tree, or the BSD-type
  9. * license below:
  10. *
  11. * Redistribution and use in source and binary forms, with or without
  12. * modification, are permitted provided that the following conditions
  13. * are met:
  14. *
  15. * Redistributions of source code must retain the above copyright
  16. * notice, this list of conditions and the following disclaimer.
  17. *
  18. * Redistributions in binary form must reproduce the above
  19. * copyright notice, this list of conditions and the following
  20. * disclaimer in the documentation and/or other materials provided
  21. * with the distribution.
  22. *
  23. * Neither the name of the Network Appliance, Inc. nor the names of
  24. * its contributors may be used to endorse or promote products
  25. * derived from this software without specific prior written
  26. * permission.
  27. *
  28. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  29. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  30. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  31. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  32. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  33. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  34. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  35. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  36. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  37. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  38. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  39. *
  40. * Author: Tom Tucker <tom@opengridcomputing.com>
  41. */
  42. #include <linux/sunrpc/svc_xprt.h>
  43. #include <linux/sunrpc/debug.h>
  44. #include <linux/sunrpc/rpc_rdma.h>
  45. #include <linux/interrupt.h>
  46. #include <linux/sched.h>
  47. #include <linux/slab.h>
  48. #include <linux/spinlock.h>
  49. #include <linux/workqueue.h>
  50. #include <rdma/ib_verbs.h>
  51. #include <rdma/rdma_cm.h>
  52. #include <linux/sunrpc/svc_rdma.h>
  53. #include <linux/export.h>
  54. #include "xprt_rdma.h"
  55. #define RPCDBG_FACILITY RPCDBG_SVCXPRT
  56. static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *, int);
  57. static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
  58. struct net *net,
  59. struct sockaddr *sa, int salen,
  60. int flags);
  61. static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt);
  62. static void svc_rdma_release_rqst(struct svc_rqst *);
  63. static void svc_rdma_detach(struct svc_xprt *xprt);
  64. static void svc_rdma_free(struct svc_xprt *xprt);
  65. static int svc_rdma_has_wspace(struct svc_xprt *xprt);
  66. static int svc_rdma_secure_port(struct svc_rqst *);
  67. static void svc_rdma_kill_temp_xprt(struct svc_xprt *);
  68. static struct svc_xprt_ops svc_rdma_ops = {
  69. .xpo_create = svc_rdma_create,
  70. .xpo_recvfrom = svc_rdma_recvfrom,
  71. .xpo_sendto = svc_rdma_sendto,
  72. .xpo_release_rqst = svc_rdma_release_rqst,
  73. .xpo_detach = svc_rdma_detach,
  74. .xpo_free = svc_rdma_free,
  75. .xpo_prep_reply_hdr = svc_rdma_prep_reply_hdr,
  76. .xpo_has_wspace = svc_rdma_has_wspace,
  77. .xpo_accept = svc_rdma_accept,
  78. .xpo_secure_port = svc_rdma_secure_port,
  79. .xpo_kill_temp_xprt = svc_rdma_kill_temp_xprt,
  80. };
  81. struct svc_xprt_class svc_rdma_class = {
  82. .xcl_name = "rdma",
  83. .xcl_owner = THIS_MODULE,
  84. .xcl_ops = &svc_rdma_ops,
  85. .xcl_max_payload = RPCSVC_MAXPAYLOAD_RDMA,
  86. .xcl_ident = XPRT_TRANSPORT_RDMA,
  87. };
  88. #if defined(CONFIG_SUNRPC_BACKCHANNEL)
  89. static struct svc_xprt *svc_rdma_bc_create(struct svc_serv *, struct net *,
  90. struct sockaddr *, int, int);
  91. static void svc_rdma_bc_detach(struct svc_xprt *);
  92. static void svc_rdma_bc_free(struct svc_xprt *);
  93. static struct svc_xprt_ops svc_rdma_bc_ops = {
  94. .xpo_create = svc_rdma_bc_create,
  95. .xpo_detach = svc_rdma_bc_detach,
  96. .xpo_free = svc_rdma_bc_free,
  97. .xpo_prep_reply_hdr = svc_rdma_prep_reply_hdr,
  98. .xpo_secure_port = svc_rdma_secure_port,
  99. };
  100. struct svc_xprt_class svc_rdma_bc_class = {
  101. .xcl_name = "rdma-bc",
  102. .xcl_owner = THIS_MODULE,
  103. .xcl_ops = &svc_rdma_bc_ops,
  104. .xcl_max_payload = (1024 - RPCRDMA_HDRLEN_MIN)
  105. };
  106. static struct svc_xprt *svc_rdma_bc_create(struct svc_serv *serv,
  107. struct net *net,
  108. struct sockaddr *sa, int salen,
  109. int flags)
  110. {
  111. struct svcxprt_rdma *cma_xprt;
  112. struct svc_xprt *xprt;
  113. cma_xprt = rdma_create_xprt(serv, 0);
  114. if (!cma_xprt)
  115. return ERR_PTR(-ENOMEM);
  116. xprt = &cma_xprt->sc_xprt;
  117. svc_xprt_init(net, &svc_rdma_bc_class, xprt, serv);
  118. serv->sv_bc_xprt = xprt;
  119. dprintk("svcrdma: %s(%p)\n", __func__, xprt);
  120. return xprt;
  121. }
  122. static void svc_rdma_bc_detach(struct svc_xprt *xprt)
  123. {
  124. dprintk("svcrdma: %s(%p)\n", __func__, xprt);
  125. }
  126. static void svc_rdma_bc_free(struct svc_xprt *xprt)
  127. {
  128. struct svcxprt_rdma *rdma =
  129. container_of(xprt, struct svcxprt_rdma, sc_xprt);
  130. dprintk("svcrdma: %s(%p)\n", __func__, xprt);
  131. if (xprt)
  132. kfree(rdma);
  133. }
  134. #endif /* CONFIG_SUNRPC_BACKCHANNEL */
  135. static struct svc_rdma_op_ctxt *alloc_ctxt(struct svcxprt_rdma *xprt,
  136. gfp_t flags)
  137. {
  138. struct svc_rdma_op_ctxt *ctxt;
  139. ctxt = kmalloc(sizeof(*ctxt), flags);
  140. if (ctxt) {
  141. ctxt->xprt = xprt;
  142. INIT_LIST_HEAD(&ctxt->free);
  143. INIT_LIST_HEAD(&ctxt->dto_q);
  144. }
  145. return ctxt;
  146. }
  147. static bool svc_rdma_prealloc_ctxts(struct svcxprt_rdma *xprt)
  148. {
  149. unsigned int i;
  150. /* Each RPC/RDMA credit can consume a number of send
  151. * and receive WQEs. One ctxt is allocated for each.
  152. */
  153. i = xprt->sc_sq_depth + xprt->sc_rq_depth;
  154. while (i--) {
  155. struct svc_rdma_op_ctxt *ctxt;
  156. ctxt = alloc_ctxt(xprt, GFP_KERNEL);
  157. if (!ctxt) {
  158. dprintk("svcrdma: No memory for RDMA ctxt\n");
  159. return false;
  160. }
  161. list_add(&ctxt->free, &xprt->sc_ctxts);
  162. }
  163. return true;
  164. }
  165. struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
  166. {
  167. struct svc_rdma_op_ctxt *ctxt = NULL;
  168. spin_lock_bh(&xprt->sc_ctxt_lock);
  169. xprt->sc_ctxt_used++;
  170. if (list_empty(&xprt->sc_ctxts))
  171. goto out_empty;
  172. ctxt = list_first_entry(&xprt->sc_ctxts,
  173. struct svc_rdma_op_ctxt, free);
  174. list_del_init(&ctxt->free);
  175. spin_unlock_bh(&xprt->sc_ctxt_lock);
  176. out:
  177. ctxt->count = 0;
  178. ctxt->mapped_sges = 0;
  179. ctxt->frmr = NULL;
  180. return ctxt;
  181. out_empty:
  182. /* Either pre-allocation missed the mark, or send
  183. * queue accounting is broken.
  184. */
  185. spin_unlock_bh(&xprt->sc_ctxt_lock);
  186. ctxt = alloc_ctxt(xprt, GFP_NOIO);
  187. if (ctxt)
  188. goto out;
  189. spin_lock_bh(&xprt->sc_ctxt_lock);
  190. xprt->sc_ctxt_used--;
  191. spin_unlock_bh(&xprt->sc_ctxt_lock);
  192. WARN_ONCE(1, "svcrdma: empty RDMA ctxt list?\n");
  193. return NULL;
  194. }
  195. void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
  196. {
  197. struct svcxprt_rdma *xprt = ctxt->xprt;
  198. struct ib_device *device = xprt->sc_cm_id->device;
  199. u32 lkey = xprt->sc_pd->local_dma_lkey;
  200. unsigned int i, count;
  201. for (count = 0, i = 0; i < ctxt->mapped_sges; i++) {
  202. /*
  203. * Unmap the DMA addr in the SGE if the lkey matches
  204. * the local_dma_lkey, otherwise, ignore it since it is
  205. * an FRMR lkey and will be unmapped later when the
  206. * last WR that uses it completes.
  207. */
  208. if (ctxt->sge[i].lkey == lkey) {
  209. count++;
  210. ib_dma_unmap_page(device,
  211. ctxt->sge[i].addr,
  212. ctxt->sge[i].length,
  213. ctxt->direction);
  214. }
  215. }
  216. ctxt->mapped_sges = 0;
  217. atomic_sub(count, &xprt->sc_dma_used);
  218. }
  219. void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
  220. {
  221. struct svcxprt_rdma *xprt = ctxt->xprt;
  222. int i;
  223. if (free_pages)
  224. for (i = 0; i < ctxt->count; i++)
  225. put_page(ctxt->pages[i]);
  226. spin_lock_bh(&xprt->sc_ctxt_lock);
  227. xprt->sc_ctxt_used--;
  228. list_add(&ctxt->free, &xprt->sc_ctxts);
  229. spin_unlock_bh(&xprt->sc_ctxt_lock);
  230. }
  231. static void svc_rdma_destroy_ctxts(struct svcxprt_rdma *xprt)
  232. {
  233. while (!list_empty(&xprt->sc_ctxts)) {
  234. struct svc_rdma_op_ctxt *ctxt;
  235. ctxt = list_first_entry(&xprt->sc_ctxts,
  236. struct svc_rdma_op_ctxt, free);
  237. list_del(&ctxt->free);
  238. kfree(ctxt);
  239. }
  240. }
  241. static struct svc_rdma_req_map *alloc_req_map(gfp_t flags)
  242. {
  243. struct svc_rdma_req_map *map;
  244. map = kmalloc(sizeof(*map), flags);
  245. if (map)
  246. INIT_LIST_HEAD(&map->free);
  247. return map;
  248. }
  249. static bool svc_rdma_prealloc_maps(struct svcxprt_rdma *xprt)
  250. {
  251. unsigned int i;
  252. /* One for each receive buffer on this connection. */
  253. i = xprt->sc_max_requests;
  254. while (i--) {
  255. struct svc_rdma_req_map *map;
  256. map = alloc_req_map(GFP_KERNEL);
  257. if (!map) {
  258. dprintk("svcrdma: No memory for request map\n");
  259. return false;
  260. }
  261. list_add(&map->free, &xprt->sc_maps);
  262. }
  263. return true;
  264. }
  265. struct svc_rdma_req_map *svc_rdma_get_req_map(struct svcxprt_rdma *xprt)
  266. {
  267. struct svc_rdma_req_map *map = NULL;
  268. spin_lock(&xprt->sc_map_lock);
  269. if (list_empty(&xprt->sc_maps))
  270. goto out_empty;
  271. map = list_first_entry(&xprt->sc_maps,
  272. struct svc_rdma_req_map, free);
  273. list_del_init(&map->free);
  274. spin_unlock(&xprt->sc_map_lock);
  275. out:
  276. map->count = 0;
  277. return map;
  278. out_empty:
  279. spin_unlock(&xprt->sc_map_lock);
  280. /* Pre-allocation amount was incorrect */
  281. map = alloc_req_map(GFP_NOIO);
  282. if (map)
  283. goto out;
  284. WARN_ONCE(1, "svcrdma: empty request map list?\n");
  285. return NULL;
  286. }
  287. void svc_rdma_put_req_map(struct svcxprt_rdma *xprt,
  288. struct svc_rdma_req_map *map)
  289. {
  290. spin_lock(&xprt->sc_map_lock);
  291. list_add(&map->free, &xprt->sc_maps);
  292. spin_unlock(&xprt->sc_map_lock);
  293. }
  294. static void svc_rdma_destroy_maps(struct svcxprt_rdma *xprt)
  295. {
  296. while (!list_empty(&xprt->sc_maps)) {
  297. struct svc_rdma_req_map *map;
  298. map = list_first_entry(&xprt->sc_maps,
  299. struct svc_rdma_req_map, free);
  300. list_del(&map->free);
  301. kfree(map);
  302. }
  303. }
  304. /* QP event handler */
  305. static void qp_event_handler(struct ib_event *event, void *context)
  306. {
  307. struct svc_xprt *xprt = context;
  308. switch (event->event) {
  309. /* These are considered benign events */
  310. case IB_EVENT_PATH_MIG:
  311. case IB_EVENT_COMM_EST:
  312. case IB_EVENT_SQ_DRAINED:
  313. case IB_EVENT_QP_LAST_WQE_REACHED:
  314. dprintk("svcrdma: QP event %s (%d) received for QP=%p\n",
  315. ib_event_msg(event->event), event->event,
  316. event->element.qp);
  317. break;
  318. /* These are considered fatal events */
  319. case IB_EVENT_PATH_MIG_ERR:
  320. case IB_EVENT_QP_FATAL:
  321. case IB_EVENT_QP_REQ_ERR:
  322. case IB_EVENT_QP_ACCESS_ERR:
  323. case IB_EVENT_DEVICE_FATAL:
  324. default:
  325. dprintk("svcrdma: QP ERROR event %s (%d) received for QP=%p, "
  326. "closing transport\n",
  327. ib_event_msg(event->event), event->event,
  328. event->element.qp);
  329. set_bit(XPT_CLOSE, &xprt->xpt_flags);
  330. break;
  331. }
  332. }
  333. /**
  334. * svc_rdma_wc_receive - Invoked by RDMA provider for each polled Receive WC
  335. * @cq: completion queue
  336. * @wc: completed WR
  337. *
  338. */
  339. static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
  340. {
  341. struct svcxprt_rdma *xprt = cq->cq_context;
  342. struct ib_cqe *cqe = wc->wr_cqe;
  343. struct svc_rdma_op_ctxt *ctxt;
  344. /* WARNING: Only wc->wr_cqe and wc->status are reliable */
  345. ctxt = container_of(cqe, struct svc_rdma_op_ctxt, cqe);
  346. ctxt->wc_status = wc->status;
  347. svc_rdma_unmap_dma(ctxt);
  348. if (wc->status != IB_WC_SUCCESS)
  349. goto flushed;
  350. /* All wc fields are now known to be valid */
  351. ctxt->byte_len = wc->byte_len;
  352. spin_lock(&xprt->sc_rq_dto_lock);
  353. list_add_tail(&ctxt->dto_q, &xprt->sc_rq_dto_q);
  354. spin_unlock(&xprt->sc_rq_dto_lock);
  355. set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
  356. if (test_bit(RDMAXPRT_CONN_PENDING, &xprt->sc_flags))
  357. goto out;
  358. svc_xprt_enqueue(&xprt->sc_xprt);
  359. goto out;
  360. flushed:
  361. if (wc->status != IB_WC_WR_FLUSH_ERR)
  362. pr_warn("svcrdma: receive: %s (%u/0x%x)\n",
  363. ib_wc_status_msg(wc->status),
  364. wc->status, wc->vendor_err);
  365. set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
  366. svc_rdma_put_context(ctxt, 1);
  367. out:
  368. svc_xprt_put(&xprt->sc_xprt);
  369. }
  370. static void svc_rdma_send_wc_common(struct svcxprt_rdma *xprt,
  371. struct ib_wc *wc,
  372. const char *opname)
  373. {
  374. if (wc->status != IB_WC_SUCCESS)
  375. goto err;
  376. out:
  377. atomic_dec(&xprt->sc_sq_count);
  378. wake_up(&xprt->sc_send_wait);
  379. return;
  380. err:
  381. set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
  382. if (wc->status != IB_WC_WR_FLUSH_ERR)
  383. pr_err("svcrdma: %s: %s (%u/0x%x)\n",
  384. opname, ib_wc_status_msg(wc->status),
  385. wc->status, wc->vendor_err);
  386. goto out;
  387. }
  388. static void svc_rdma_send_wc_common_put(struct ib_cq *cq, struct ib_wc *wc,
  389. const char *opname)
  390. {
  391. struct svcxprt_rdma *xprt = cq->cq_context;
  392. svc_rdma_send_wc_common(xprt, wc, opname);
  393. svc_xprt_put(&xprt->sc_xprt);
  394. }
  395. /**
  396. * svc_rdma_wc_send - Invoked by RDMA provider for each polled Send WC
  397. * @cq: completion queue
  398. * @wc: completed WR
  399. *
  400. */
  401. void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
  402. {
  403. struct ib_cqe *cqe = wc->wr_cqe;
  404. struct svc_rdma_op_ctxt *ctxt;
  405. svc_rdma_send_wc_common_put(cq, wc, "send");
  406. ctxt = container_of(cqe, struct svc_rdma_op_ctxt, cqe);
  407. svc_rdma_unmap_dma(ctxt);
  408. svc_rdma_put_context(ctxt, 1);
  409. }
  410. /**
  411. * svc_rdma_wc_write - Invoked by RDMA provider for each polled Write WC
  412. * @cq: completion queue
  413. * @wc: completed WR
  414. *
  415. */
  416. void svc_rdma_wc_write(struct ib_cq *cq, struct ib_wc *wc)
  417. {
  418. struct ib_cqe *cqe = wc->wr_cqe;
  419. struct svc_rdma_op_ctxt *ctxt;
  420. svc_rdma_send_wc_common_put(cq, wc, "write");
  421. ctxt = container_of(cqe, struct svc_rdma_op_ctxt, cqe);
  422. svc_rdma_unmap_dma(ctxt);
  423. svc_rdma_put_context(ctxt, 0);
  424. }
  425. /**
  426. * svc_rdma_wc_reg - Invoked by RDMA provider for each polled FASTREG WC
  427. * @cq: completion queue
  428. * @wc: completed WR
  429. *
  430. */
  431. void svc_rdma_wc_reg(struct ib_cq *cq, struct ib_wc *wc)
  432. {
  433. svc_rdma_send_wc_common_put(cq, wc, "fastreg");
  434. }
  435. /**
  436. * svc_rdma_wc_read - Invoked by RDMA provider for each polled Read WC
  437. * @cq: completion queue
  438. * @wc: completed WR
  439. *
  440. */
  441. void svc_rdma_wc_read(struct ib_cq *cq, struct ib_wc *wc)
  442. {
  443. struct svcxprt_rdma *xprt = cq->cq_context;
  444. struct ib_cqe *cqe = wc->wr_cqe;
  445. struct svc_rdma_op_ctxt *ctxt;
  446. svc_rdma_send_wc_common(xprt, wc, "read");
  447. ctxt = container_of(cqe, struct svc_rdma_op_ctxt, cqe);
  448. svc_rdma_unmap_dma(ctxt);
  449. svc_rdma_put_frmr(xprt, ctxt->frmr);
  450. if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
  451. struct svc_rdma_op_ctxt *read_hdr;
  452. read_hdr = ctxt->read_hdr;
  453. spin_lock(&xprt->sc_rq_dto_lock);
  454. list_add_tail(&read_hdr->dto_q,
  455. &xprt->sc_read_complete_q);
  456. spin_unlock(&xprt->sc_rq_dto_lock);
  457. set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
  458. svc_xprt_enqueue(&xprt->sc_xprt);
  459. }
  460. svc_rdma_put_context(ctxt, 0);
  461. svc_xprt_put(&xprt->sc_xprt);
  462. }
  463. /**
  464. * svc_rdma_wc_inv - Invoked by RDMA provider for each polled LOCAL_INV WC
  465. * @cq: completion queue
  466. * @wc: completed WR
  467. *
  468. */
  469. void svc_rdma_wc_inv(struct ib_cq *cq, struct ib_wc *wc)
  470. {
  471. svc_rdma_send_wc_common_put(cq, wc, "localInv");
  472. }
  473. static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
  474. int listener)
  475. {
  476. struct svcxprt_rdma *cma_xprt = kzalloc(sizeof *cma_xprt, GFP_KERNEL);
  477. if (!cma_xprt)
  478. return NULL;
  479. svc_xprt_init(&init_net, &svc_rdma_class, &cma_xprt->sc_xprt, serv);
  480. INIT_LIST_HEAD(&cma_xprt->sc_accept_q);
  481. INIT_LIST_HEAD(&cma_xprt->sc_dto_q);
  482. INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
  483. INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q);
  484. INIT_LIST_HEAD(&cma_xprt->sc_frmr_q);
  485. INIT_LIST_HEAD(&cma_xprt->sc_ctxts);
  486. INIT_LIST_HEAD(&cma_xprt->sc_maps);
  487. init_waitqueue_head(&cma_xprt->sc_send_wait);
  488. spin_lock_init(&cma_xprt->sc_lock);
  489. spin_lock_init(&cma_xprt->sc_rq_dto_lock);
  490. spin_lock_init(&cma_xprt->sc_frmr_q_lock);
  491. spin_lock_init(&cma_xprt->sc_ctxt_lock);
  492. spin_lock_init(&cma_xprt->sc_map_lock);
  493. if (listener)
  494. set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags);
  495. return cma_xprt;
  496. }
  497. int svc_rdma_post_recv(struct svcxprt_rdma *xprt, gfp_t flags)
  498. {
  499. struct ib_recv_wr recv_wr, *bad_recv_wr;
  500. struct svc_rdma_op_ctxt *ctxt;
  501. struct page *page;
  502. dma_addr_t pa;
  503. int sge_no;
  504. int buflen;
  505. int ret;
  506. ctxt = svc_rdma_get_context(xprt);
  507. buflen = 0;
  508. ctxt->direction = DMA_FROM_DEVICE;
  509. ctxt->cqe.done = svc_rdma_wc_receive;
  510. for (sge_no = 0; buflen < xprt->sc_max_req_size; sge_no++) {
  511. if (sge_no >= xprt->sc_max_sge) {
  512. pr_err("svcrdma: Too many sges (%d)\n", sge_no);
  513. goto err_put_ctxt;
  514. }
  515. page = alloc_page(flags);
  516. if (!page)
  517. goto err_put_ctxt;
  518. ctxt->pages[sge_no] = page;
  519. pa = ib_dma_map_page(xprt->sc_cm_id->device,
  520. page, 0, PAGE_SIZE,
  521. DMA_FROM_DEVICE);
  522. if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa))
  523. goto err_put_ctxt;
  524. svc_rdma_count_mappings(xprt, ctxt);
  525. ctxt->sge[sge_no].addr = pa;
  526. ctxt->sge[sge_no].length = PAGE_SIZE;
  527. ctxt->sge[sge_no].lkey = xprt->sc_pd->local_dma_lkey;
  528. ctxt->count = sge_no + 1;
  529. buflen += PAGE_SIZE;
  530. }
  531. recv_wr.next = NULL;
  532. recv_wr.sg_list = &ctxt->sge[0];
  533. recv_wr.num_sge = ctxt->count;
  534. recv_wr.wr_cqe = &ctxt->cqe;
  535. svc_xprt_get(&xprt->sc_xprt);
  536. ret = ib_post_recv(xprt->sc_qp, &recv_wr, &bad_recv_wr);
  537. if (ret) {
  538. svc_rdma_unmap_dma(ctxt);
  539. svc_rdma_put_context(ctxt, 1);
  540. svc_xprt_put(&xprt->sc_xprt);
  541. }
  542. return ret;
  543. err_put_ctxt:
  544. svc_rdma_unmap_dma(ctxt);
  545. svc_rdma_put_context(ctxt, 1);
  546. return -ENOMEM;
  547. }
  548. int svc_rdma_repost_recv(struct svcxprt_rdma *xprt, gfp_t flags)
  549. {
  550. int ret = 0;
  551. ret = svc_rdma_post_recv(xprt, flags);
  552. if (ret) {
  553. pr_err("svcrdma: could not post a receive buffer, err=%d.\n",
  554. ret);
  555. pr_err("svcrdma: closing transport %p.\n", xprt);
  556. set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
  557. ret = -ENOTCONN;
  558. }
  559. return ret;
  560. }
  561. static void
  562. svc_rdma_parse_connect_private(struct svcxprt_rdma *newxprt,
  563. struct rdma_conn_param *param)
  564. {
  565. const struct rpcrdma_connect_private *pmsg = param->private_data;
  566. if (pmsg &&
  567. pmsg->cp_magic == rpcrdma_cmp_magic &&
  568. pmsg->cp_version == RPCRDMA_CMP_VERSION) {
  569. newxprt->sc_snd_w_inv = pmsg->cp_flags &
  570. RPCRDMA_CMP_F_SND_W_INV_OK;
  571. dprintk("svcrdma: client send_size %u, recv_size %u "
  572. "remote inv %ssupported\n",
  573. rpcrdma_decode_buffer_size(pmsg->cp_send_size),
  574. rpcrdma_decode_buffer_size(pmsg->cp_recv_size),
  575. newxprt->sc_snd_w_inv ? "" : "un");
  576. }
  577. }
  578. /*
  579. * This function handles the CONNECT_REQUEST event on a listening
  580. * endpoint. It is passed the cma_id for the _new_ connection. The context in
  581. * this cma_id is inherited from the listening cma_id and is the svc_xprt
  582. * structure for the listening endpoint.
  583. *
  584. * This function creates a new xprt for the new connection and enqueues it on
  585. * the accept queue for the listent xprt. When the listen thread is kicked, it
  586. * will call the recvfrom method on the listen xprt which will accept the new
  587. * connection.
  588. */
  589. static void handle_connect_req(struct rdma_cm_id *new_cma_id,
  590. struct rdma_conn_param *param)
  591. {
  592. struct svcxprt_rdma *listen_xprt = new_cma_id->context;
  593. struct svcxprt_rdma *newxprt;
  594. struct sockaddr *sa;
  595. /* Create a new transport */
  596. newxprt = rdma_create_xprt(listen_xprt->sc_xprt.xpt_server, 0);
  597. if (!newxprt) {
  598. dprintk("svcrdma: failed to create new transport\n");
  599. return;
  600. }
  601. newxprt->sc_cm_id = new_cma_id;
  602. new_cma_id->context = newxprt;
  603. dprintk("svcrdma: Creating newxprt=%p, cm_id=%p, listenxprt=%p\n",
  604. newxprt, newxprt->sc_cm_id, listen_xprt);
  605. svc_rdma_parse_connect_private(newxprt, param);
  606. /* Save client advertised inbound read limit for use later in accept. */
  607. newxprt->sc_ord = param->initiator_depth;
  608. /* Set the local and remote addresses in the transport */
  609. sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr;
  610. svc_xprt_set_remote(&newxprt->sc_xprt, sa, svc_addr_len(sa));
  611. sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr;
  612. svc_xprt_set_local(&newxprt->sc_xprt, sa, svc_addr_len(sa));
  613. /*
  614. * Enqueue the new transport on the accept queue of the listening
  615. * transport
  616. */
  617. spin_lock_bh(&listen_xprt->sc_lock);
  618. list_add_tail(&newxprt->sc_accept_q, &listen_xprt->sc_accept_q);
  619. spin_unlock_bh(&listen_xprt->sc_lock);
  620. set_bit(XPT_CONN, &listen_xprt->sc_xprt.xpt_flags);
  621. svc_xprt_enqueue(&listen_xprt->sc_xprt);
  622. }
  623. /*
  624. * Handles events generated on the listening endpoint. These events will be
  625. * either be incoming connect requests or adapter removal events.
  626. */
  627. static int rdma_listen_handler(struct rdma_cm_id *cma_id,
  628. struct rdma_cm_event *event)
  629. {
  630. struct svcxprt_rdma *xprt = cma_id->context;
  631. int ret = 0;
  632. switch (event->event) {
  633. case RDMA_CM_EVENT_CONNECT_REQUEST:
  634. dprintk("svcrdma: Connect request on cma_id=%p, xprt = %p, "
  635. "event = %s (%d)\n", cma_id, cma_id->context,
  636. rdma_event_msg(event->event), event->event);
  637. handle_connect_req(cma_id, &event->param.conn);
  638. break;
  639. case RDMA_CM_EVENT_ESTABLISHED:
  640. /* Accept complete */
  641. dprintk("svcrdma: Connection completed on LISTEN xprt=%p, "
  642. "cm_id=%p\n", xprt, cma_id);
  643. break;
  644. case RDMA_CM_EVENT_DEVICE_REMOVAL:
  645. dprintk("svcrdma: Device removal xprt=%p, cm_id=%p\n",
  646. xprt, cma_id);
  647. if (xprt)
  648. set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
  649. break;
  650. default:
  651. dprintk("svcrdma: Unexpected event on listening endpoint %p, "
  652. "event = %s (%d)\n", cma_id,
  653. rdma_event_msg(event->event), event->event);
  654. break;
  655. }
  656. return ret;
  657. }
  658. static int rdma_cma_handler(struct rdma_cm_id *cma_id,
  659. struct rdma_cm_event *event)
  660. {
  661. struct svc_xprt *xprt = cma_id->context;
  662. struct svcxprt_rdma *rdma =
  663. container_of(xprt, struct svcxprt_rdma, sc_xprt);
  664. switch (event->event) {
  665. case RDMA_CM_EVENT_ESTABLISHED:
  666. /* Accept complete */
  667. svc_xprt_get(xprt);
  668. dprintk("svcrdma: Connection completed on DTO xprt=%p, "
  669. "cm_id=%p\n", xprt, cma_id);
  670. clear_bit(RDMAXPRT_CONN_PENDING, &rdma->sc_flags);
  671. svc_xprt_enqueue(xprt);
  672. break;
  673. case RDMA_CM_EVENT_DISCONNECTED:
  674. dprintk("svcrdma: Disconnect on DTO xprt=%p, cm_id=%p\n",
  675. xprt, cma_id);
  676. if (xprt) {
  677. set_bit(XPT_CLOSE, &xprt->xpt_flags);
  678. svc_xprt_enqueue(xprt);
  679. svc_xprt_put(xprt);
  680. }
  681. break;
  682. case RDMA_CM_EVENT_DEVICE_REMOVAL:
  683. dprintk("svcrdma: Device removal cma_id=%p, xprt = %p, "
  684. "event = %s (%d)\n", cma_id, xprt,
  685. rdma_event_msg(event->event), event->event);
  686. if (xprt) {
  687. set_bit(XPT_CLOSE, &xprt->xpt_flags);
  688. svc_xprt_enqueue(xprt);
  689. svc_xprt_put(xprt);
  690. }
  691. break;
  692. default:
  693. dprintk("svcrdma: Unexpected event on DTO endpoint %p, "
  694. "event = %s (%d)\n", cma_id,
  695. rdma_event_msg(event->event), event->event);
  696. break;
  697. }
  698. return 0;
  699. }
  700. /*
  701. * Create a listening RDMA service endpoint.
  702. */
  703. static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
  704. struct net *net,
  705. struct sockaddr *sa, int salen,
  706. int flags)
  707. {
  708. struct rdma_cm_id *listen_id;
  709. struct svcxprt_rdma *cma_xprt;
  710. int ret;
  711. dprintk("svcrdma: Creating RDMA socket\n");
  712. if ((sa->sa_family != AF_INET) && (sa->sa_family != AF_INET6)) {
  713. dprintk("svcrdma: Address family %d is not supported.\n", sa->sa_family);
  714. return ERR_PTR(-EAFNOSUPPORT);
  715. }
  716. cma_xprt = rdma_create_xprt(serv, 1);
  717. if (!cma_xprt)
  718. return ERR_PTR(-ENOMEM);
  719. listen_id = rdma_create_id(&init_net, rdma_listen_handler, cma_xprt,
  720. RDMA_PS_TCP, IB_QPT_RC);
  721. if (IS_ERR(listen_id)) {
  722. ret = PTR_ERR(listen_id);
  723. dprintk("svcrdma: rdma_create_id failed = %d\n", ret);
  724. goto err0;
  725. }
  726. /* Allow both IPv4 and IPv6 sockets to bind a single port
  727. * at the same time.
  728. */
  729. #if IS_ENABLED(CONFIG_IPV6)
  730. ret = rdma_set_afonly(listen_id, 1);
  731. if (ret) {
  732. dprintk("svcrdma: rdma_set_afonly failed = %d\n", ret);
  733. goto err1;
  734. }
  735. #endif
  736. ret = rdma_bind_addr(listen_id, sa);
  737. if (ret) {
  738. dprintk("svcrdma: rdma_bind_addr failed = %d\n", ret);
  739. goto err1;
  740. }
  741. cma_xprt->sc_cm_id = listen_id;
  742. ret = rdma_listen(listen_id, RPCRDMA_LISTEN_BACKLOG);
  743. if (ret) {
  744. dprintk("svcrdma: rdma_listen failed = %d\n", ret);
  745. goto err1;
  746. }
  747. /*
  748. * We need to use the address from the cm_id in case the
  749. * caller specified 0 for the port number.
  750. */
  751. sa = (struct sockaddr *)&cma_xprt->sc_cm_id->route.addr.src_addr;
  752. svc_xprt_set_local(&cma_xprt->sc_xprt, sa, salen);
  753. return &cma_xprt->sc_xprt;
  754. err1:
  755. rdma_destroy_id(listen_id);
  756. err0:
  757. kfree(cma_xprt);
  758. return ERR_PTR(ret);
  759. }
  760. static struct svc_rdma_fastreg_mr *rdma_alloc_frmr(struct svcxprt_rdma *xprt)
  761. {
  762. struct ib_mr *mr;
  763. struct scatterlist *sg;
  764. struct svc_rdma_fastreg_mr *frmr;
  765. u32 num_sg;
  766. frmr = kmalloc(sizeof(*frmr), GFP_KERNEL);
  767. if (!frmr)
  768. goto err;
  769. num_sg = min_t(u32, RPCSVC_MAXPAGES, xprt->sc_frmr_pg_list_len);
  770. mr = ib_alloc_mr(xprt->sc_pd, IB_MR_TYPE_MEM_REG, num_sg);
  771. if (IS_ERR(mr))
  772. goto err_free_frmr;
  773. sg = kcalloc(RPCSVC_MAXPAGES, sizeof(*sg), GFP_KERNEL);
  774. if (!sg)
  775. goto err_free_mr;
  776. sg_init_table(sg, RPCSVC_MAXPAGES);
  777. frmr->mr = mr;
  778. frmr->sg = sg;
  779. INIT_LIST_HEAD(&frmr->frmr_list);
  780. return frmr;
  781. err_free_mr:
  782. ib_dereg_mr(mr);
  783. err_free_frmr:
  784. kfree(frmr);
  785. err:
  786. return ERR_PTR(-ENOMEM);
  787. }
  788. static void rdma_dealloc_frmr_q(struct svcxprt_rdma *xprt)
  789. {
  790. struct svc_rdma_fastreg_mr *frmr;
  791. while (!list_empty(&xprt->sc_frmr_q)) {
  792. frmr = list_entry(xprt->sc_frmr_q.next,
  793. struct svc_rdma_fastreg_mr, frmr_list);
  794. list_del_init(&frmr->frmr_list);
  795. kfree(frmr->sg);
  796. ib_dereg_mr(frmr->mr);
  797. kfree(frmr);
  798. }
  799. }
  800. struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *rdma)
  801. {
  802. struct svc_rdma_fastreg_mr *frmr = NULL;
  803. spin_lock_bh(&rdma->sc_frmr_q_lock);
  804. if (!list_empty(&rdma->sc_frmr_q)) {
  805. frmr = list_entry(rdma->sc_frmr_q.next,
  806. struct svc_rdma_fastreg_mr, frmr_list);
  807. list_del_init(&frmr->frmr_list);
  808. frmr->sg_nents = 0;
  809. }
  810. spin_unlock_bh(&rdma->sc_frmr_q_lock);
  811. if (frmr)
  812. return frmr;
  813. return rdma_alloc_frmr(rdma);
  814. }
  815. void svc_rdma_put_frmr(struct svcxprt_rdma *rdma,
  816. struct svc_rdma_fastreg_mr *frmr)
  817. {
  818. if (frmr) {
  819. ib_dma_unmap_sg(rdma->sc_cm_id->device,
  820. frmr->sg, frmr->sg_nents, frmr->direction);
  821. atomic_dec(&rdma->sc_dma_used);
  822. spin_lock_bh(&rdma->sc_frmr_q_lock);
  823. WARN_ON_ONCE(!list_empty(&frmr->frmr_list));
  824. list_add(&frmr->frmr_list, &rdma->sc_frmr_q);
  825. spin_unlock_bh(&rdma->sc_frmr_q_lock);
  826. }
  827. }
  828. /*
  829. * This is the xpo_recvfrom function for listening endpoints. Its
  830. * purpose is to accept incoming connections. The CMA callback handler
  831. * has already created a new transport and attached it to the new CMA
  832. * ID.
  833. *
  834. * There is a queue of pending connections hung on the listening
  835. * transport. This queue contains the new svc_xprt structure. This
  836. * function takes svc_xprt structures off the accept_q and completes
  837. * the connection.
  838. */
  839. static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
  840. {
  841. struct svcxprt_rdma *listen_rdma;
  842. struct svcxprt_rdma *newxprt = NULL;
  843. struct rdma_conn_param conn_param;
  844. struct rpcrdma_connect_private pmsg;
  845. struct ib_qp_init_attr qp_attr;
  846. struct ib_device *dev;
  847. unsigned int i;
  848. int ret = 0;
  849. listen_rdma = container_of(xprt, struct svcxprt_rdma, sc_xprt);
  850. clear_bit(XPT_CONN, &xprt->xpt_flags);
  851. /* Get the next entry off the accept list */
  852. spin_lock_bh(&listen_rdma->sc_lock);
  853. if (!list_empty(&listen_rdma->sc_accept_q)) {
  854. newxprt = list_entry(listen_rdma->sc_accept_q.next,
  855. struct svcxprt_rdma, sc_accept_q);
  856. list_del_init(&newxprt->sc_accept_q);
  857. }
  858. if (!list_empty(&listen_rdma->sc_accept_q))
  859. set_bit(XPT_CONN, &listen_rdma->sc_xprt.xpt_flags);
  860. spin_unlock_bh(&listen_rdma->sc_lock);
  861. if (!newxprt)
  862. return NULL;
  863. dprintk("svcrdma: newxprt from accept queue = %p, cm_id=%p\n",
  864. newxprt, newxprt->sc_cm_id);
  865. dev = newxprt->sc_cm_id->device;
  866. /* Qualify the transport resource defaults with the
  867. * capabilities of this particular device */
  868. newxprt->sc_max_sge = min((size_t)dev->attrs.max_sge,
  869. (size_t)RPCSVC_MAXPAGES);
  870. newxprt->sc_max_sge_rd = min_t(size_t, dev->attrs.max_sge_rd,
  871. RPCSVC_MAXPAGES);
  872. newxprt->sc_max_req_size = svcrdma_max_req_size;
  873. newxprt->sc_max_requests = min_t(u32, dev->attrs.max_qp_wr,
  874. svcrdma_max_requests);
  875. newxprt->sc_max_bc_requests = min_t(u32, dev->attrs.max_qp_wr,
  876. svcrdma_max_bc_requests);
  877. newxprt->sc_rq_depth = newxprt->sc_max_requests +
  878. newxprt->sc_max_bc_requests;
  879. newxprt->sc_sq_depth = RPCRDMA_SQ_DEPTH_MULT * newxprt->sc_rq_depth;
  880. if (!svc_rdma_prealloc_ctxts(newxprt))
  881. goto errout;
  882. if (!svc_rdma_prealloc_maps(newxprt))
  883. goto errout;
  884. /*
  885. * Limit ORD based on client limit, local device limit, and
  886. * configured svcrdma limit.
  887. */
  888. newxprt->sc_ord = min_t(size_t, dev->attrs.max_qp_rd_atom, newxprt->sc_ord);
  889. newxprt->sc_ord = min_t(size_t, svcrdma_ord, newxprt->sc_ord);
  890. newxprt->sc_pd = ib_alloc_pd(dev, 0);
  891. if (IS_ERR(newxprt->sc_pd)) {
  892. dprintk("svcrdma: error creating PD for connect request\n");
  893. goto errout;
  894. }
  895. newxprt->sc_sq_cq = ib_alloc_cq(dev, newxprt, newxprt->sc_sq_depth,
  896. 0, IB_POLL_SOFTIRQ);
  897. if (IS_ERR(newxprt->sc_sq_cq)) {
  898. dprintk("svcrdma: error creating SQ CQ for connect request\n");
  899. goto errout;
  900. }
  901. newxprt->sc_rq_cq = ib_alloc_cq(dev, newxprt, newxprt->sc_rq_depth,
  902. 0, IB_POLL_SOFTIRQ);
  903. if (IS_ERR(newxprt->sc_rq_cq)) {
  904. dprintk("svcrdma: error creating RQ CQ for connect request\n");
  905. goto errout;
  906. }
  907. memset(&qp_attr, 0, sizeof qp_attr);
  908. qp_attr.event_handler = qp_event_handler;
  909. qp_attr.qp_context = &newxprt->sc_xprt;
  910. qp_attr.cap.max_send_wr = newxprt->sc_sq_depth;
  911. qp_attr.cap.max_recv_wr = newxprt->sc_rq_depth;
  912. qp_attr.cap.max_send_sge = newxprt->sc_max_sge;
  913. qp_attr.cap.max_recv_sge = newxprt->sc_max_sge;
  914. qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
  915. qp_attr.qp_type = IB_QPT_RC;
  916. qp_attr.send_cq = newxprt->sc_sq_cq;
  917. qp_attr.recv_cq = newxprt->sc_rq_cq;
  918. dprintk("svcrdma: newxprt->sc_cm_id=%p, newxprt->sc_pd=%p\n"
  919. " cm_id->device=%p, sc_pd->device=%p\n"
  920. " cap.max_send_wr = %d\n"
  921. " cap.max_recv_wr = %d\n"
  922. " cap.max_send_sge = %d\n"
  923. " cap.max_recv_sge = %d\n",
  924. newxprt->sc_cm_id, newxprt->sc_pd,
  925. dev, newxprt->sc_pd->device,
  926. qp_attr.cap.max_send_wr,
  927. qp_attr.cap.max_recv_wr,
  928. qp_attr.cap.max_send_sge,
  929. qp_attr.cap.max_recv_sge);
  930. ret = rdma_create_qp(newxprt->sc_cm_id, newxprt->sc_pd, &qp_attr);
  931. if (ret) {
  932. dprintk("svcrdma: failed to create QP, ret=%d\n", ret);
  933. goto errout;
  934. }
  935. newxprt->sc_qp = newxprt->sc_cm_id->qp;
  936. /*
  937. * Use the most secure set of MR resources based on the
  938. * transport type and available memory management features in
  939. * the device. Here's the table implemented below:
  940. *
  941. * Fast Global DMA Remote WR
  942. * Reg LKEY MR Access
  943. * Sup'd Sup'd Needed Needed
  944. *
  945. * IWARP N N Y Y
  946. * N Y Y Y
  947. * Y N Y N
  948. * Y Y N -
  949. *
  950. * IB N N Y N
  951. * N Y N -
  952. * Y N Y N
  953. * Y Y N -
  954. *
  955. * NB: iWARP requires remote write access for the data sink
  956. * of an RDMA_READ. IB does not.
  957. */
  958. newxprt->sc_reader = rdma_read_chunk_lcl;
  959. if (dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
  960. newxprt->sc_frmr_pg_list_len =
  961. dev->attrs.max_fast_reg_page_list_len;
  962. newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_FAST_REG;
  963. newxprt->sc_reader = rdma_read_chunk_frmr;
  964. } else
  965. newxprt->sc_snd_w_inv = false;
  966. /*
  967. * Determine if a DMA MR is required and if so, what privs are required
  968. */
  969. if (!rdma_protocol_iwarp(dev, newxprt->sc_cm_id->port_num) &&
  970. !rdma_ib_or_roce(dev, newxprt->sc_cm_id->port_num))
  971. goto errout;
  972. if (rdma_protocol_iwarp(dev, newxprt->sc_cm_id->port_num))
  973. newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_READ_W_INV;
  974. /* Post receive buffers */
  975. for (i = 0; i < newxprt->sc_max_requests; i++) {
  976. ret = svc_rdma_post_recv(newxprt, GFP_KERNEL);
  977. if (ret) {
  978. dprintk("svcrdma: failure posting receive buffers\n");
  979. goto errout;
  980. }
  981. }
  982. /* Swap out the handler */
  983. newxprt->sc_cm_id->event_handler = rdma_cma_handler;
  984. /* Construct RDMA-CM private message */
  985. pmsg.cp_magic = rpcrdma_cmp_magic;
  986. pmsg.cp_version = RPCRDMA_CMP_VERSION;
  987. pmsg.cp_flags = 0;
  988. pmsg.cp_send_size = pmsg.cp_recv_size =
  989. rpcrdma_encode_buffer_size(newxprt->sc_max_req_size);
  990. /* Accept Connection */
  991. set_bit(RDMAXPRT_CONN_PENDING, &newxprt->sc_flags);
  992. memset(&conn_param, 0, sizeof conn_param);
  993. conn_param.responder_resources = 0;
  994. conn_param.initiator_depth = newxprt->sc_ord;
  995. conn_param.private_data = &pmsg;
  996. conn_param.private_data_len = sizeof(pmsg);
  997. ret = rdma_accept(newxprt->sc_cm_id, &conn_param);
  998. if (ret) {
  999. dprintk("svcrdma: failed to accept new connection, ret=%d\n",
  1000. ret);
  1001. goto errout;
  1002. }
  1003. dprintk("svcrdma: new connection %p accepted with the following "
  1004. "attributes:\n"
  1005. " local_ip : %pI4\n"
  1006. " local_port : %d\n"
  1007. " remote_ip : %pI4\n"
  1008. " remote_port : %d\n"
  1009. " max_sge : %d\n"
  1010. " max_sge_rd : %d\n"
  1011. " sq_depth : %d\n"
  1012. " max_requests : %d\n"
  1013. " ord : %d\n",
  1014. newxprt,
  1015. &((struct sockaddr_in *)&newxprt->sc_cm_id->
  1016. route.addr.src_addr)->sin_addr.s_addr,
  1017. ntohs(((struct sockaddr_in *)&newxprt->sc_cm_id->
  1018. route.addr.src_addr)->sin_port),
  1019. &((struct sockaddr_in *)&newxprt->sc_cm_id->
  1020. route.addr.dst_addr)->sin_addr.s_addr,
  1021. ntohs(((struct sockaddr_in *)&newxprt->sc_cm_id->
  1022. route.addr.dst_addr)->sin_port),
  1023. newxprt->sc_max_sge,
  1024. newxprt->sc_max_sge_rd,
  1025. newxprt->sc_sq_depth,
  1026. newxprt->sc_max_requests,
  1027. newxprt->sc_ord);
  1028. return &newxprt->sc_xprt;
  1029. errout:
  1030. dprintk("svcrdma: failure accepting new connection rc=%d.\n", ret);
  1031. /* Take a reference in case the DTO handler runs */
  1032. svc_xprt_get(&newxprt->sc_xprt);
  1033. if (newxprt->sc_qp && !IS_ERR(newxprt->sc_qp))
  1034. ib_destroy_qp(newxprt->sc_qp);
  1035. rdma_destroy_id(newxprt->sc_cm_id);
  1036. /* This call to put will destroy the transport */
  1037. svc_xprt_put(&newxprt->sc_xprt);
  1038. return NULL;
  1039. }
  1040. static void svc_rdma_release_rqst(struct svc_rqst *rqstp)
  1041. {
  1042. }
  1043. /*
  1044. * When connected, an svc_xprt has at least two references:
  1045. *
  1046. * - A reference held by the cm_id between the ESTABLISHED and
  1047. * DISCONNECTED events. If the remote peer disconnected first, this
  1048. * reference could be gone.
  1049. *
  1050. * - A reference held by the svc_recv code that called this function
  1051. * as part of close processing.
  1052. *
  1053. * At a minimum one references should still be held.
  1054. */
  1055. static void svc_rdma_detach(struct svc_xprt *xprt)
  1056. {
  1057. struct svcxprt_rdma *rdma =
  1058. container_of(xprt, struct svcxprt_rdma, sc_xprt);
  1059. dprintk("svc: svc_rdma_detach(%p)\n", xprt);
  1060. /* Disconnect and flush posted WQE */
  1061. rdma_disconnect(rdma->sc_cm_id);
  1062. }
  1063. static void __svc_rdma_free(struct work_struct *work)
  1064. {
  1065. struct svcxprt_rdma *rdma =
  1066. container_of(work, struct svcxprt_rdma, sc_work);
  1067. struct svc_xprt *xprt = &rdma->sc_xprt;
  1068. dprintk("svcrdma: %s(%p)\n", __func__, rdma);
  1069. if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
  1070. ib_drain_qp(rdma->sc_qp);
  1071. /* We should only be called from kref_put */
  1072. if (atomic_read(&xprt->xpt_ref.refcount) != 0)
  1073. pr_err("svcrdma: sc_xprt still in use? (%d)\n",
  1074. atomic_read(&xprt->xpt_ref.refcount));
  1075. /*
  1076. * Destroy queued, but not processed read completions. Note
  1077. * that this cleanup has to be done before destroying the
  1078. * cm_id because the device ptr is needed to unmap the dma in
  1079. * svc_rdma_put_context.
  1080. */
  1081. while (!list_empty(&rdma->sc_read_complete_q)) {
  1082. struct svc_rdma_op_ctxt *ctxt;
  1083. ctxt = list_entry(rdma->sc_read_complete_q.next,
  1084. struct svc_rdma_op_ctxt,
  1085. dto_q);
  1086. list_del_init(&ctxt->dto_q);
  1087. svc_rdma_put_context(ctxt, 1);
  1088. }
  1089. /* Destroy queued, but not processed recv completions */
  1090. while (!list_empty(&rdma->sc_rq_dto_q)) {
  1091. struct svc_rdma_op_ctxt *ctxt;
  1092. ctxt = list_entry(rdma->sc_rq_dto_q.next,
  1093. struct svc_rdma_op_ctxt,
  1094. dto_q);
  1095. list_del_init(&ctxt->dto_q);
  1096. svc_rdma_put_context(ctxt, 1);
  1097. }
  1098. /* Warn if we leaked a resource or under-referenced */
  1099. if (rdma->sc_ctxt_used != 0)
  1100. pr_err("svcrdma: ctxt still in use? (%d)\n",
  1101. rdma->sc_ctxt_used);
  1102. if (atomic_read(&rdma->sc_dma_used) != 0)
  1103. pr_err("svcrdma: dma still in use? (%d)\n",
  1104. atomic_read(&rdma->sc_dma_used));
  1105. /* Final put of backchannel client transport */
  1106. if (xprt->xpt_bc_xprt) {
  1107. xprt_put(xprt->xpt_bc_xprt);
  1108. xprt->xpt_bc_xprt = NULL;
  1109. }
  1110. rdma_dealloc_frmr_q(rdma);
  1111. svc_rdma_destroy_ctxts(rdma);
  1112. svc_rdma_destroy_maps(rdma);
  1113. /* Destroy the QP if present (not a listener) */
  1114. if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
  1115. ib_destroy_qp(rdma->sc_qp);
  1116. if (rdma->sc_sq_cq && !IS_ERR(rdma->sc_sq_cq))
  1117. ib_free_cq(rdma->sc_sq_cq);
  1118. if (rdma->sc_rq_cq && !IS_ERR(rdma->sc_rq_cq))
  1119. ib_free_cq(rdma->sc_rq_cq);
  1120. if (rdma->sc_pd && !IS_ERR(rdma->sc_pd))
  1121. ib_dealloc_pd(rdma->sc_pd);
  1122. /* Destroy the CM ID */
  1123. rdma_destroy_id(rdma->sc_cm_id);
  1124. kfree(rdma);
  1125. }
  1126. static void svc_rdma_free(struct svc_xprt *xprt)
  1127. {
  1128. struct svcxprt_rdma *rdma =
  1129. container_of(xprt, struct svcxprt_rdma, sc_xprt);
  1130. INIT_WORK(&rdma->sc_work, __svc_rdma_free);
  1131. queue_work(svc_rdma_wq, &rdma->sc_work);
  1132. }
  1133. static int svc_rdma_has_wspace(struct svc_xprt *xprt)
  1134. {
  1135. struct svcxprt_rdma *rdma =
  1136. container_of(xprt, struct svcxprt_rdma, sc_xprt);
  1137. /*
  1138. * If there are already waiters on the SQ,
  1139. * return false.
  1140. */
  1141. if (waitqueue_active(&rdma->sc_send_wait))
  1142. return 0;
  1143. /* Otherwise return true. */
  1144. return 1;
  1145. }
  1146. static int svc_rdma_secure_port(struct svc_rqst *rqstp)
  1147. {
  1148. return 1;
  1149. }
  1150. static void svc_rdma_kill_temp_xprt(struct svc_xprt *xprt)
  1151. {
  1152. }
  1153. int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
  1154. {
  1155. struct ib_send_wr *bad_wr, *n_wr;
  1156. int wr_count;
  1157. int i;
  1158. int ret;
  1159. if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags))
  1160. return -ENOTCONN;
  1161. wr_count = 1;
  1162. for (n_wr = wr->next; n_wr; n_wr = n_wr->next)
  1163. wr_count++;
  1164. /* If the SQ is full, wait until an SQ entry is available */
  1165. while (1) {
  1166. spin_lock_bh(&xprt->sc_lock);
  1167. if (xprt->sc_sq_depth < atomic_read(&xprt->sc_sq_count) + wr_count) {
  1168. spin_unlock_bh(&xprt->sc_lock);
  1169. atomic_inc(&rdma_stat_sq_starve);
  1170. /* Wait until SQ WR available if SQ still full */
  1171. wait_event(xprt->sc_send_wait,
  1172. atomic_read(&xprt->sc_sq_count) <
  1173. xprt->sc_sq_depth);
  1174. if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags))
  1175. return -ENOTCONN;
  1176. continue;
  1177. }
  1178. /* Take a transport ref for each WR posted */
  1179. for (i = 0; i < wr_count; i++)
  1180. svc_xprt_get(&xprt->sc_xprt);
  1181. /* Bump used SQ WR count and post */
  1182. atomic_add(wr_count, &xprt->sc_sq_count);
  1183. ret = ib_post_send(xprt->sc_qp, wr, &bad_wr);
  1184. if (ret) {
  1185. set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
  1186. atomic_sub(wr_count, &xprt->sc_sq_count);
  1187. for (i = 0; i < wr_count; i ++)
  1188. svc_xprt_put(&xprt->sc_xprt);
  1189. dprintk("svcrdma: failed to post SQ WR rc=%d, "
  1190. "sc_sq_count=%d, sc_sq_depth=%d\n",
  1191. ret, atomic_read(&xprt->sc_sq_count),
  1192. xprt->sc_sq_depth);
  1193. }
  1194. spin_unlock_bh(&xprt->sc_lock);
  1195. if (ret)
  1196. wake_up(&xprt->sc_send_wait);
  1197. break;
  1198. }
  1199. return ret;
  1200. }