kcmsock.c 45 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112
  1. /*
  2. * Kernel Connection Multiplexor
  3. *
  4. * Copyright (c) 2016 Tom Herbert <tom@herbertland.com>
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License version 2
  8. * as published by the Free Software Foundation.
  9. */
  10. #include <linux/bpf.h>
  11. #include <linux/errno.h>
  12. #include <linux/errqueue.h>
  13. #include <linux/file.h>
  14. #include <linux/in.h>
  15. #include <linux/kernel.h>
  16. #include <linux/module.h>
  17. #include <linux/net.h>
  18. #include <linux/netdevice.h>
  19. #include <linux/poll.h>
  20. #include <linux/rculist.h>
  21. #include <linux/skbuff.h>
  22. #include <linux/socket.h>
  23. #include <linux/uaccess.h>
  24. #include <linux/workqueue.h>
  25. #include <linux/syscalls.h>
  26. #include <net/kcm.h>
  27. #include <net/netns/generic.h>
  28. #include <net/sock.h>
  29. #include <uapi/linux/kcm.h>
  30. unsigned int kcm_net_id;
  31. static struct kmem_cache *kcm_psockp __read_mostly;
  32. static struct kmem_cache *kcm_muxp __read_mostly;
  33. static struct workqueue_struct *kcm_wq;
  34. static inline struct kcm_sock *kcm_sk(const struct sock *sk)
  35. {
  36. return (struct kcm_sock *)sk;
  37. }
  38. static inline struct kcm_tx_msg *kcm_tx_msg(struct sk_buff *skb)
  39. {
  40. return (struct kcm_tx_msg *)skb->cb;
  41. }
  42. static void report_csk_error(struct sock *csk, int err)
  43. {
  44. csk->sk_err = EPIPE;
  45. csk->sk_error_report(csk);
  46. }
  47. static void kcm_abort_tx_psock(struct kcm_psock *psock, int err,
  48. bool wakeup_kcm)
  49. {
  50. struct sock *csk = psock->sk;
  51. struct kcm_mux *mux = psock->mux;
  52. /* Unrecoverable error in transmit */
  53. spin_lock_bh(&mux->lock);
  54. if (psock->tx_stopped) {
  55. spin_unlock_bh(&mux->lock);
  56. return;
  57. }
  58. psock->tx_stopped = 1;
  59. KCM_STATS_INCR(psock->stats.tx_aborts);
  60. if (!psock->tx_kcm) {
  61. /* Take off psocks_avail list */
  62. list_del(&psock->psock_avail_list);
  63. } else if (wakeup_kcm) {
  64. /* In this case psock is being aborted while outside of
  65. * write_msgs and psock is reserved. Schedule tx_work
  66. * to handle the failure there. Need to commit tx_stopped
  67. * before queuing work.
  68. */
  69. smp_mb();
  70. queue_work(kcm_wq, &psock->tx_kcm->tx_work);
  71. }
  72. spin_unlock_bh(&mux->lock);
  73. /* Report error on lower socket */
  74. report_csk_error(csk, err);
  75. }
  76. /* RX mux lock held. */
  77. static void kcm_update_rx_mux_stats(struct kcm_mux *mux,
  78. struct kcm_psock *psock)
  79. {
  80. STRP_STATS_ADD(mux->stats.rx_bytes,
  81. psock->strp.stats.rx_bytes -
  82. psock->saved_rx_bytes);
  83. mux->stats.rx_msgs +=
  84. psock->strp.stats.rx_msgs - psock->saved_rx_msgs;
  85. psock->saved_rx_msgs = psock->strp.stats.rx_msgs;
  86. psock->saved_rx_bytes = psock->strp.stats.rx_bytes;
  87. }
  88. static void kcm_update_tx_mux_stats(struct kcm_mux *mux,
  89. struct kcm_psock *psock)
  90. {
  91. KCM_STATS_ADD(mux->stats.tx_bytes,
  92. psock->stats.tx_bytes - psock->saved_tx_bytes);
  93. mux->stats.tx_msgs +=
  94. psock->stats.tx_msgs - psock->saved_tx_msgs;
  95. psock->saved_tx_msgs = psock->stats.tx_msgs;
  96. psock->saved_tx_bytes = psock->stats.tx_bytes;
  97. }
  98. static int kcm_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
  99. /* KCM is ready to receive messages on its queue-- either the KCM is new or
  100. * has become unblocked after being blocked on full socket buffer. Queue any
  101. * pending ready messages on a psock. RX mux lock held.
  102. */
  103. static void kcm_rcv_ready(struct kcm_sock *kcm)
  104. {
  105. struct kcm_mux *mux = kcm->mux;
  106. struct kcm_psock *psock;
  107. struct sk_buff *skb;
  108. if (unlikely(kcm->rx_wait || kcm->rx_psock || kcm->rx_disabled))
  109. return;
  110. while (unlikely((skb = __skb_dequeue(&mux->rx_hold_queue)))) {
  111. if (kcm_queue_rcv_skb(&kcm->sk, skb)) {
  112. /* Assuming buffer limit has been reached */
  113. skb_queue_head(&mux->rx_hold_queue, skb);
  114. WARN_ON(!sk_rmem_alloc_get(&kcm->sk));
  115. return;
  116. }
  117. }
  118. while (!list_empty(&mux->psocks_ready)) {
  119. psock = list_first_entry(&mux->psocks_ready, struct kcm_psock,
  120. psock_ready_list);
  121. if (kcm_queue_rcv_skb(&kcm->sk, psock->ready_rx_msg)) {
  122. /* Assuming buffer limit has been reached */
  123. WARN_ON(!sk_rmem_alloc_get(&kcm->sk));
  124. return;
  125. }
  126. /* Consumed the ready message on the psock. Schedule rx_work to
  127. * get more messages.
  128. */
  129. list_del(&psock->psock_ready_list);
  130. psock->ready_rx_msg = NULL;
  131. /* Commit clearing of ready_rx_msg for queuing work */
  132. smp_mb();
  133. strp_unpause(&psock->strp);
  134. strp_check_rcv(&psock->strp);
  135. }
  136. /* Buffer limit is okay now, add to ready list */
  137. list_add_tail(&kcm->wait_rx_list,
  138. &kcm->mux->kcm_rx_waiters);
  139. kcm->rx_wait = true;
  140. }
  141. static void kcm_rfree(struct sk_buff *skb)
  142. {
  143. struct sock *sk = skb->sk;
  144. struct kcm_sock *kcm = kcm_sk(sk);
  145. struct kcm_mux *mux = kcm->mux;
  146. unsigned int len = skb->truesize;
  147. sk_mem_uncharge(sk, len);
  148. atomic_sub(len, &sk->sk_rmem_alloc);
  149. /* For reading rx_wait and rx_psock without holding lock */
  150. smp_mb__after_atomic();
  151. if (!kcm->rx_wait && !kcm->rx_psock &&
  152. sk_rmem_alloc_get(sk) < sk->sk_rcvlowat) {
  153. spin_lock_bh(&mux->rx_lock);
  154. kcm_rcv_ready(kcm);
  155. spin_unlock_bh(&mux->rx_lock);
  156. }
  157. }
  158. static int kcm_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
  159. {
  160. struct sk_buff_head *list = &sk->sk_receive_queue;
  161. if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
  162. return -ENOMEM;
  163. if (!sk_rmem_schedule(sk, skb, skb->truesize))
  164. return -ENOBUFS;
  165. skb->dev = NULL;
  166. skb_orphan(skb);
  167. skb->sk = sk;
  168. skb->destructor = kcm_rfree;
  169. atomic_add(skb->truesize, &sk->sk_rmem_alloc);
  170. sk_mem_charge(sk, skb->truesize);
  171. skb_queue_tail(list, skb);
  172. if (!sock_flag(sk, SOCK_DEAD))
  173. sk->sk_data_ready(sk);
  174. return 0;
  175. }
  176. /* Requeue received messages for a kcm socket to other kcm sockets. This is
  177. * called with a kcm socket is receive disabled.
  178. * RX mux lock held.
  179. */
  180. static void requeue_rx_msgs(struct kcm_mux *mux, struct sk_buff_head *head)
  181. {
  182. struct sk_buff *skb;
  183. struct kcm_sock *kcm;
  184. while ((skb = __skb_dequeue(head))) {
  185. /* Reset destructor to avoid calling kcm_rcv_ready */
  186. skb->destructor = sock_rfree;
  187. skb_orphan(skb);
  188. try_again:
  189. if (list_empty(&mux->kcm_rx_waiters)) {
  190. skb_queue_tail(&mux->rx_hold_queue, skb);
  191. continue;
  192. }
  193. kcm = list_first_entry(&mux->kcm_rx_waiters,
  194. struct kcm_sock, wait_rx_list);
  195. if (kcm_queue_rcv_skb(&kcm->sk, skb)) {
  196. /* Should mean socket buffer full */
  197. list_del(&kcm->wait_rx_list);
  198. kcm->rx_wait = false;
  199. /* Commit rx_wait to read in kcm_free */
  200. smp_wmb();
  201. goto try_again;
  202. }
  203. }
  204. }
  205. /* Lower sock lock held */
  206. static struct kcm_sock *reserve_rx_kcm(struct kcm_psock *psock,
  207. struct sk_buff *head)
  208. {
  209. struct kcm_mux *mux = psock->mux;
  210. struct kcm_sock *kcm;
  211. WARN_ON(psock->ready_rx_msg);
  212. if (psock->rx_kcm)
  213. return psock->rx_kcm;
  214. spin_lock_bh(&mux->rx_lock);
  215. if (psock->rx_kcm) {
  216. spin_unlock_bh(&mux->rx_lock);
  217. return psock->rx_kcm;
  218. }
  219. kcm_update_rx_mux_stats(mux, psock);
  220. if (list_empty(&mux->kcm_rx_waiters)) {
  221. psock->ready_rx_msg = head;
  222. strp_pause(&psock->strp);
  223. list_add_tail(&psock->psock_ready_list,
  224. &mux->psocks_ready);
  225. spin_unlock_bh(&mux->rx_lock);
  226. return NULL;
  227. }
  228. kcm = list_first_entry(&mux->kcm_rx_waiters,
  229. struct kcm_sock, wait_rx_list);
  230. list_del(&kcm->wait_rx_list);
  231. kcm->rx_wait = false;
  232. psock->rx_kcm = kcm;
  233. kcm->rx_psock = psock;
  234. spin_unlock_bh(&mux->rx_lock);
  235. return kcm;
  236. }
  237. static void kcm_done(struct kcm_sock *kcm);
  238. static void kcm_done_work(struct work_struct *w)
  239. {
  240. kcm_done(container_of(w, struct kcm_sock, done_work));
  241. }
  242. /* Lower sock held */
  243. static void unreserve_rx_kcm(struct kcm_psock *psock,
  244. bool rcv_ready)
  245. {
  246. struct kcm_sock *kcm = psock->rx_kcm;
  247. struct kcm_mux *mux = psock->mux;
  248. if (!kcm)
  249. return;
  250. spin_lock_bh(&mux->rx_lock);
  251. psock->rx_kcm = NULL;
  252. kcm->rx_psock = NULL;
  253. /* Commit kcm->rx_psock before sk_rmem_alloc_get to sync with
  254. * kcm_rfree
  255. */
  256. smp_mb();
  257. if (unlikely(kcm->done)) {
  258. spin_unlock_bh(&mux->rx_lock);
  259. /* Need to run kcm_done in a task since we need to qcquire
  260. * callback locks which may already be held here.
  261. */
  262. INIT_WORK(&kcm->done_work, kcm_done_work);
  263. schedule_work(&kcm->done_work);
  264. return;
  265. }
  266. if (unlikely(kcm->rx_disabled)) {
  267. requeue_rx_msgs(mux, &kcm->sk.sk_receive_queue);
  268. } else if (rcv_ready || unlikely(!sk_rmem_alloc_get(&kcm->sk))) {
  269. /* Check for degenerative race with rx_wait that all
  270. * data was dequeued (accounted for in kcm_rfree).
  271. */
  272. kcm_rcv_ready(kcm);
  273. }
  274. spin_unlock_bh(&mux->rx_lock);
  275. }
  276. /* Lower sock lock held */
  277. static void psock_data_ready(struct sock *sk)
  278. {
  279. struct kcm_psock *psock;
  280. read_lock_bh(&sk->sk_callback_lock);
  281. psock = (struct kcm_psock *)sk->sk_user_data;
  282. if (likely(psock))
  283. strp_data_ready(&psock->strp);
  284. read_unlock_bh(&sk->sk_callback_lock);
  285. }
  286. /* Called with lower sock held */
  287. static void kcm_rcv_strparser(struct strparser *strp, struct sk_buff *skb)
  288. {
  289. struct kcm_psock *psock = container_of(strp, struct kcm_psock, strp);
  290. struct kcm_sock *kcm;
  291. try_queue:
  292. kcm = reserve_rx_kcm(psock, skb);
  293. if (!kcm) {
  294. /* Unable to reserve a KCM, message is held in psock and strp
  295. * is paused.
  296. */
  297. return;
  298. }
  299. if (kcm_queue_rcv_skb(&kcm->sk, skb)) {
  300. /* Should mean socket buffer full */
  301. unreserve_rx_kcm(psock, false);
  302. goto try_queue;
  303. }
  304. }
  305. static int kcm_parse_func_strparser(struct strparser *strp, struct sk_buff *skb)
  306. {
  307. struct kcm_psock *psock = container_of(strp, struct kcm_psock, strp);
  308. struct bpf_prog *prog = psock->bpf_prog;
  309. return (*prog->bpf_func)(skb, prog->insnsi);
  310. }
  311. static int kcm_read_sock_done(struct strparser *strp, int err)
  312. {
  313. struct kcm_psock *psock = container_of(strp, struct kcm_psock, strp);
  314. unreserve_rx_kcm(psock, true);
  315. return err;
  316. }
  317. static void psock_state_change(struct sock *sk)
  318. {
  319. /* TCP only does a POLLIN for a half close. Do a POLLHUP here
  320. * since application will normally not poll with POLLIN
  321. * on the TCP sockets.
  322. */
  323. report_csk_error(sk, EPIPE);
  324. }
  325. static void psock_write_space(struct sock *sk)
  326. {
  327. struct kcm_psock *psock;
  328. struct kcm_mux *mux;
  329. struct kcm_sock *kcm;
  330. read_lock_bh(&sk->sk_callback_lock);
  331. psock = (struct kcm_psock *)sk->sk_user_data;
  332. if (unlikely(!psock))
  333. goto out;
  334. mux = psock->mux;
  335. spin_lock_bh(&mux->lock);
  336. /* Check if the socket is reserved so someone is waiting for sending. */
  337. kcm = psock->tx_kcm;
  338. if (kcm && !unlikely(kcm->tx_stopped))
  339. queue_work(kcm_wq, &kcm->tx_work);
  340. spin_unlock_bh(&mux->lock);
  341. out:
  342. read_unlock_bh(&sk->sk_callback_lock);
  343. }
  344. static void unreserve_psock(struct kcm_sock *kcm);
  345. /* kcm sock is locked. */
  346. static struct kcm_psock *reserve_psock(struct kcm_sock *kcm)
  347. {
  348. struct kcm_mux *mux = kcm->mux;
  349. struct kcm_psock *psock;
  350. psock = kcm->tx_psock;
  351. smp_rmb(); /* Must read tx_psock before tx_wait */
  352. if (psock) {
  353. WARN_ON(kcm->tx_wait);
  354. if (unlikely(psock->tx_stopped))
  355. unreserve_psock(kcm);
  356. else
  357. return kcm->tx_psock;
  358. }
  359. spin_lock_bh(&mux->lock);
  360. /* Check again under lock to see if psock was reserved for this
  361. * psock via psock_unreserve.
  362. */
  363. psock = kcm->tx_psock;
  364. if (unlikely(psock)) {
  365. WARN_ON(kcm->tx_wait);
  366. spin_unlock_bh(&mux->lock);
  367. return kcm->tx_psock;
  368. }
  369. if (!list_empty(&mux->psocks_avail)) {
  370. psock = list_first_entry(&mux->psocks_avail,
  371. struct kcm_psock,
  372. psock_avail_list);
  373. list_del(&psock->psock_avail_list);
  374. if (kcm->tx_wait) {
  375. list_del(&kcm->wait_psock_list);
  376. kcm->tx_wait = false;
  377. }
  378. kcm->tx_psock = psock;
  379. psock->tx_kcm = kcm;
  380. KCM_STATS_INCR(psock->stats.reserved);
  381. } else if (!kcm->tx_wait) {
  382. list_add_tail(&kcm->wait_psock_list,
  383. &mux->kcm_tx_waiters);
  384. kcm->tx_wait = true;
  385. }
  386. spin_unlock_bh(&mux->lock);
  387. return psock;
  388. }
  389. /* mux lock held */
  390. static void psock_now_avail(struct kcm_psock *psock)
  391. {
  392. struct kcm_mux *mux = psock->mux;
  393. struct kcm_sock *kcm;
  394. if (list_empty(&mux->kcm_tx_waiters)) {
  395. list_add_tail(&psock->psock_avail_list,
  396. &mux->psocks_avail);
  397. } else {
  398. kcm = list_first_entry(&mux->kcm_tx_waiters,
  399. struct kcm_sock,
  400. wait_psock_list);
  401. list_del(&kcm->wait_psock_list);
  402. kcm->tx_wait = false;
  403. psock->tx_kcm = kcm;
  404. /* Commit before changing tx_psock since that is read in
  405. * reserve_psock before queuing work.
  406. */
  407. smp_mb();
  408. kcm->tx_psock = psock;
  409. KCM_STATS_INCR(psock->stats.reserved);
  410. queue_work(kcm_wq, &kcm->tx_work);
  411. }
  412. }
  413. /* kcm sock is locked. */
  414. static void unreserve_psock(struct kcm_sock *kcm)
  415. {
  416. struct kcm_psock *psock;
  417. struct kcm_mux *mux = kcm->mux;
  418. spin_lock_bh(&mux->lock);
  419. psock = kcm->tx_psock;
  420. if (WARN_ON(!psock)) {
  421. spin_unlock_bh(&mux->lock);
  422. return;
  423. }
  424. smp_rmb(); /* Read tx_psock before tx_wait */
  425. kcm_update_tx_mux_stats(mux, psock);
  426. WARN_ON(kcm->tx_wait);
  427. kcm->tx_psock = NULL;
  428. psock->tx_kcm = NULL;
  429. KCM_STATS_INCR(psock->stats.unreserved);
  430. if (unlikely(psock->tx_stopped)) {
  431. if (psock->done) {
  432. /* Deferred free */
  433. list_del(&psock->psock_list);
  434. mux->psocks_cnt--;
  435. sock_put(psock->sk);
  436. fput(psock->sk->sk_socket->file);
  437. kmem_cache_free(kcm_psockp, psock);
  438. }
  439. /* Don't put back on available list */
  440. spin_unlock_bh(&mux->lock);
  441. return;
  442. }
  443. psock_now_avail(psock);
  444. spin_unlock_bh(&mux->lock);
  445. }
  446. static void kcm_report_tx_retry(struct kcm_sock *kcm)
  447. {
  448. struct kcm_mux *mux = kcm->mux;
  449. spin_lock_bh(&mux->lock);
  450. KCM_STATS_INCR(mux->stats.tx_retries);
  451. spin_unlock_bh(&mux->lock);
  452. }
  453. /* Write any messages ready on the kcm socket. Called with kcm sock lock
  454. * held. Return bytes actually sent or error.
  455. */
  456. static int kcm_write_msgs(struct kcm_sock *kcm)
  457. {
  458. struct sock *sk = &kcm->sk;
  459. struct kcm_psock *psock;
  460. struct sk_buff *skb, *head;
  461. struct kcm_tx_msg *txm;
  462. unsigned short fragidx, frag_offset;
  463. unsigned int sent, total_sent = 0;
  464. int ret = 0;
  465. kcm->tx_wait_more = false;
  466. psock = kcm->tx_psock;
  467. if (unlikely(psock && psock->tx_stopped)) {
  468. /* A reserved psock was aborted asynchronously. Unreserve
  469. * it and we'll retry the message.
  470. */
  471. unreserve_psock(kcm);
  472. kcm_report_tx_retry(kcm);
  473. if (skb_queue_empty(&sk->sk_write_queue))
  474. return 0;
  475. kcm_tx_msg(skb_peek(&sk->sk_write_queue))->sent = 0;
  476. } else if (skb_queue_empty(&sk->sk_write_queue)) {
  477. return 0;
  478. }
  479. head = skb_peek(&sk->sk_write_queue);
  480. txm = kcm_tx_msg(head);
  481. if (txm->sent) {
  482. /* Send of first skbuff in queue already in progress */
  483. if (WARN_ON(!psock)) {
  484. ret = -EINVAL;
  485. goto out;
  486. }
  487. sent = txm->sent;
  488. frag_offset = txm->frag_offset;
  489. fragidx = txm->fragidx;
  490. skb = txm->frag_skb;
  491. goto do_frag;
  492. }
  493. try_again:
  494. psock = reserve_psock(kcm);
  495. if (!psock)
  496. goto out;
  497. do {
  498. skb = head;
  499. txm = kcm_tx_msg(head);
  500. sent = 0;
  501. do_frag_list:
  502. if (WARN_ON(!skb_shinfo(skb)->nr_frags)) {
  503. ret = -EINVAL;
  504. goto out;
  505. }
  506. for (fragidx = 0; fragidx < skb_shinfo(skb)->nr_frags;
  507. fragidx++) {
  508. skb_frag_t *frag;
  509. frag_offset = 0;
  510. do_frag:
  511. frag = &skb_shinfo(skb)->frags[fragidx];
  512. if (WARN_ON(!frag->size)) {
  513. ret = -EINVAL;
  514. goto out;
  515. }
  516. ret = kernel_sendpage(psock->sk->sk_socket,
  517. frag->page.p,
  518. frag->page_offset + frag_offset,
  519. frag->size - frag_offset,
  520. MSG_DONTWAIT);
  521. if (ret <= 0) {
  522. if (ret == -EAGAIN) {
  523. /* Save state to try again when there's
  524. * write space on the socket
  525. */
  526. txm->sent = sent;
  527. txm->frag_offset = frag_offset;
  528. txm->fragidx = fragidx;
  529. txm->frag_skb = skb;
  530. ret = 0;
  531. goto out;
  532. }
  533. /* Hard failure in sending message, abort this
  534. * psock since it has lost framing
  535. * synchonization and retry sending the
  536. * message from the beginning.
  537. */
  538. kcm_abort_tx_psock(psock, ret ? -ret : EPIPE,
  539. true);
  540. unreserve_psock(kcm);
  541. txm->sent = 0;
  542. kcm_report_tx_retry(kcm);
  543. ret = 0;
  544. goto try_again;
  545. }
  546. sent += ret;
  547. frag_offset += ret;
  548. KCM_STATS_ADD(psock->stats.tx_bytes, ret);
  549. if (frag_offset < frag->size) {
  550. /* Not finished with this frag */
  551. goto do_frag;
  552. }
  553. }
  554. if (skb == head) {
  555. if (skb_has_frag_list(skb)) {
  556. skb = skb_shinfo(skb)->frag_list;
  557. goto do_frag_list;
  558. }
  559. } else if (skb->next) {
  560. skb = skb->next;
  561. goto do_frag_list;
  562. }
  563. /* Successfully sent the whole packet, account for it. */
  564. skb_dequeue(&sk->sk_write_queue);
  565. kfree_skb(head);
  566. sk->sk_wmem_queued -= sent;
  567. total_sent += sent;
  568. KCM_STATS_INCR(psock->stats.tx_msgs);
  569. } while ((head = skb_peek(&sk->sk_write_queue)));
  570. out:
  571. if (!head) {
  572. /* Done with all queued messages. */
  573. WARN_ON(!skb_queue_empty(&sk->sk_write_queue));
  574. unreserve_psock(kcm);
  575. }
  576. /* Check if write space is available */
  577. sk->sk_write_space(sk);
  578. return total_sent ? : ret;
  579. }
  580. static void kcm_tx_work(struct work_struct *w)
  581. {
  582. struct kcm_sock *kcm = container_of(w, struct kcm_sock, tx_work);
  583. struct sock *sk = &kcm->sk;
  584. int err;
  585. lock_sock(sk);
  586. /* Primarily for SOCK_DGRAM sockets, also handle asynchronous tx
  587. * aborts
  588. */
  589. err = kcm_write_msgs(kcm);
  590. if (err < 0) {
  591. /* Hard failure in write, report error on KCM socket */
  592. pr_warn("KCM: Hard failure on kcm_write_msgs %d\n", err);
  593. report_csk_error(&kcm->sk, -err);
  594. goto out;
  595. }
  596. /* Primarily for SOCK_SEQPACKET sockets */
  597. if (likely(sk->sk_socket) &&
  598. test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
  599. clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
  600. sk->sk_write_space(sk);
  601. }
  602. out:
  603. release_sock(sk);
  604. }
  605. static void kcm_push(struct kcm_sock *kcm)
  606. {
  607. if (kcm->tx_wait_more)
  608. kcm_write_msgs(kcm);
  609. }
  610. static ssize_t kcm_sendpage(struct socket *sock, struct page *page,
  611. int offset, size_t size, int flags)
  612. {
  613. struct sock *sk = sock->sk;
  614. struct kcm_sock *kcm = kcm_sk(sk);
  615. struct sk_buff *skb = NULL, *head = NULL;
  616. long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
  617. bool eor;
  618. int err = 0;
  619. int i;
  620. if (flags & MSG_SENDPAGE_NOTLAST)
  621. flags |= MSG_MORE;
  622. /* No MSG_EOR from splice, only look at MSG_MORE */
  623. eor = !(flags & MSG_MORE);
  624. lock_sock(sk);
  625. sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
  626. err = -EPIPE;
  627. if (sk->sk_err)
  628. goto out_error;
  629. if (kcm->seq_skb) {
  630. /* Previously opened message */
  631. head = kcm->seq_skb;
  632. skb = kcm_tx_msg(head)->last_skb;
  633. i = skb_shinfo(skb)->nr_frags;
  634. if (skb_can_coalesce(skb, i, page, offset)) {
  635. skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], size);
  636. skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
  637. goto coalesced;
  638. }
  639. if (i >= MAX_SKB_FRAGS) {
  640. struct sk_buff *tskb;
  641. tskb = alloc_skb(0, sk->sk_allocation);
  642. while (!tskb) {
  643. kcm_push(kcm);
  644. err = sk_stream_wait_memory(sk, &timeo);
  645. if (err)
  646. goto out_error;
  647. }
  648. if (head == skb)
  649. skb_shinfo(head)->frag_list = tskb;
  650. else
  651. skb->next = tskb;
  652. skb = tskb;
  653. skb->ip_summed = CHECKSUM_UNNECESSARY;
  654. i = 0;
  655. }
  656. } else {
  657. /* Call the sk_stream functions to manage the sndbuf mem. */
  658. if (!sk_stream_memory_free(sk)) {
  659. kcm_push(kcm);
  660. set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
  661. err = sk_stream_wait_memory(sk, &timeo);
  662. if (err)
  663. goto out_error;
  664. }
  665. head = alloc_skb(0, sk->sk_allocation);
  666. while (!head) {
  667. kcm_push(kcm);
  668. err = sk_stream_wait_memory(sk, &timeo);
  669. if (err)
  670. goto out_error;
  671. }
  672. skb = head;
  673. i = 0;
  674. }
  675. get_page(page);
  676. skb_fill_page_desc(skb, i, page, offset, size);
  677. skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
  678. coalesced:
  679. skb->len += size;
  680. skb->data_len += size;
  681. skb->truesize += size;
  682. sk->sk_wmem_queued += size;
  683. sk_mem_charge(sk, size);
  684. if (head != skb) {
  685. head->len += size;
  686. head->data_len += size;
  687. head->truesize += size;
  688. }
  689. if (eor) {
  690. bool not_busy = skb_queue_empty(&sk->sk_write_queue);
  691. /* Message complete, queue it on send buffer */
  692. __skb_queue_tail(&sk->sk_write_queue, head);
  693. kcm->seq_skb = NULL;
  694. KCM_STATS_INCR(kcm->stats.tx_msgs);
  695. if (flags & MSG_BATCH) {
  696. kcm->tx_wait_more = true;
  697. } else if (kcm->tx_wait_more || not_busy) {
  698. err = kcm_write_msgs(kcm);
  699. if (err < 0) {
  700. /* We got a hard error in write_msgs but have
  701. * already queued this message. Report an error
  702. * in the socket, but don't affect return value
  703. * from sendmsg
  704. */
  705. pr_warn("KCM: Hard failure on kcm_write_msgs\n");
  706. report_csk_error(&kcm->sk, -err);
  707. }
  708. }
  709. } else {
  710. /* Message not complete, save state */
  711. kcm->seq_skb = head;
  712. kcm_tx_msg(head)->last_skb = skb;
  713. }
  714. KCM_STATS_ADD(kcm->stats.tx_bytes, size);
  715. release_sock(sk);
  716. return size;
  717. out_error:
  718. kcm_push(kcm);
  719. err = sk_stream_error(sk, flags, err);
  720. /* make sure we wake any epoll edge trigger waiter */
  721. if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && err == -EAGAIN))
  722. sk->sk_write_space(sk);
  723. release_sock(sk);
  724. return err;
  725. }
  726. static int kcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
  727. {
  728. struct sock *sk = sock->sk;
  729. struct kcm_sock *kcm = kcm_sk(sk);
  730. struct sk_buff *skb = NULL, *head = NULL;
  731. size_t copy, copied = 0;
  732. long timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
  733. int eor = (sock->type == SOCK_DGRAM) ?
  734. !(msg->msg_flags & MSG_MORE) : !!(msg->msg_flags & MSG_EOR);
  735. int err = -EPIPE;
  736. lock_sock(sk);
  737. /* Per tcp_sendmsg this should be in poll */
  738. sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
  739. if (sk->sk_err)
  740. goto out_error;
  741. if (kcm->seq_skb) {
  742. /* Previously opened message */
  743. head = kcm->seq_skb;
  744. skb = kcm_tx_msg(head)->last_skb;
  745. goto start;
  746. }
  747. /* Call the sk_stream functions to manage the sndbuf mem. */
  748. if (!sk_stream_memory_free(sk)) {
  749. kcm_push(kcm);
  750. set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
  751. err = sk_stream_wait_memory(sk, &timeo);
  752. if (err)
  753. goto out_error;
  754. }
  755. if (msg_data_left(msg)) {
  756. /* New message, alloc head skb */
  757. head = alloc_skb(0, sk->sk_allocation);
  758. while (!head) {
  759. kcm_push(kcm);
  760. err = sk_stream_wait_memory(sk, &timeo);
  761. if (err)
  762. goto out_error;
  763. head = alloc_skb(0, sk->sk_allocation);
  764. }
  765. skb = head;
  766. /* Set ip_summed to CHECKSUM_UNNECESSARY to avoid calling
  767. * csum_and_copy_from_iter from skb_do_copy_data_nocache.
  768. */
  769. skb->ip_summed = CHECKSUM_UNNECESSARY;
  770. }
  771. start:
  772. while (msg_data_left(msg)) {
  773. bool merge = true;
  774. int i = skb_shinfo(skb)->nr_frags;
  775. struct page_frag *pfrag = sk_page_frag(sk);
  776. if (!sk_page_frag_refill(sk, pfrag))
  777. goto wait_for_memory;
  778. if (!skb_can_coalesce(skb, i, pfrag->page,
  779. pfrag->offset)) {
  780. if (i == MAX_SKB_FRAGS) {
  781. struct sk_buff *tskb;
  782. tskb = alloc_skb(0, sk->sk_allocation);
  783. if (!tskb)
  784. goto wait_for_memory;
  785. if (head == skb)
  786. skb_shinfo(head)->frag_list = tskb;
  787. else
  788. skb->next = tskb;
  789. skb = tskb;
  790. skb->ip_summed = CHECKSUM_UNNECESSARY;
  791. continue;
  792. }
  793. merge = false;
  794. }
  795. copy = min_t(int, msg_data_left(msg),
  796. pfrag->size - pfrag->offset);
  797. if (!sk_wmem_schedule(sk, copy))
  798. goto wait_for_memory;
  799. err = skb_copy_to_page_nocache(sk, &msg->msg_iter, skb,
  800. pfrag->page,
  801. pfrag->offset,
  802. copy);
  803. if (err)
  804. goto out_error;
  805. /* Update the skb. */
  806. if (merge) {
  807. skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
  808. } else {
  809. skb_fill_page_desc(skb, i, pfrag->page,
  810. pfrag->offset, copy);
  811. get_page(pfrag->page);
  812. }
  813. pfrag->offset += copy;
  814. copied += copy;
  815. if (head != skb) {
  816. head->len += copy;
  817. head->data_len += copy;
  818. }
  819. continue;
  820. wait_for_memory:
  821. kcm_push(kcm);
  822. err = sk_stream_wait_memory(sk, &timeo);
  823. if (err)
  824. goto out_error;
  825. }
  826. if (eor) {
  827. bool not_busy = skb_queue_empty(&sk->sk_write_queue);
  828. if (head) {
  829. /* Message complete, queue it on send buffer */
  830. __skb_queue_tail(&sk->sk_write_queue, head);
  831. kcm->seq_skb = NULL;
  832. KCM_STATS_INCR(kcm->stats.tx_msgs);
  833. }
  834. if (msg->msg_flags & MSG_BATCH) {
  835. kcm->tx_wait_more = true;
  836. } else if (kcm->tx_wait_more || not_busy) {
  837. err = kcm_write_msgs(kcm);
  838. if (err < 0) {
  839. /* We got a hard error in write_msgs but have
  840. * already queued this message. Report an error
  841. * in the socket, but don't affect return value
  842. * from sendmsg
  843. */
  844. pr_warn("KCM: Hard failure on kcm_write_msgs\n");
  845. report_csk_error(&kcm->sk, -err);
  846. }
  847. }
  848. } else {
  849. /* Message not complete, save state */
  850. partial_message:
  851. if (head) {
  852. kcm->seq_skb = head;
  853. kcm_tx_msg(head)->last_skb = skb;
  854. }
  855. }
  856. KCM_STATS_ADD(kcm->stats.tx_bytes, copied);
  857. release_sock(sk);
  858. return copied;
  859. out_error:
  860. kcm_push(kcm);
  861. if (copied && sock->type == SOCK_SEQPACKET) {
  862. /* Wrote some bytes before encountering an
  863. * error, return partial success.
  864. */
  865. goto partial_message;
  866. }
  867. if (head != kcm->seq_skb)
  868. kfree_skb(head);
  869. err = sk_stream_error(sk, msg->msg_flags, err);
  870. /* make sure we wake any epoll edge trigger waiter */
  871. if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && err == -EAGAIN))
  872. sk->sk_write_space(sk);
  873. release_sock(sk);
  874. return err;
  875. }
  876. static struct sk_buff *kcm_wait_data(struct sock *sk, int flags,
  877. long timeo, int *err)
  878. {
  879. struct sk_buff *skb;
  880. while (!(skb = skb_peek(&sk->sk_receive_queue))) {
  881. if (sk->sk_err) {
  882. *err = sock_error(sk);
  883. return NULL;
  884. }
  885. if (sock_flag(sk, SOCK_DONE))
  886. return NULL;
  887. if ((flags & MSG_DONTWAIT) || !timeo) {
  888. *err = -EAGAIN;
  889. return NULL;
  890. }
  891. sk_wait_data(sk, &timeo, NULL);
  892. /* Handle signals */
  893. if (signal_pending(current)) {
  894. *err = sock_intr_errno(timeo);
  895. return NULL;
  896. }
  897. }
  898. return skb;
  899. }
  900. static int kcm_recvmsg(struct socket *sock, struct msghdr *msg,
  901. size_t len, int flags)
  902. {
  903. struct sock *sk = sock->sk;
  904. struct kcm_sock *kcm = kcm_sk(sk);
  905. int err = 0;
  906. long timeo;
  907. struct strp_rx_msg *rxm;
  908. int copied = 0;
  909. struct sk_buff *skb;
  910. timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
  911. lock_sock(sk);
  912. skb = kcm_wait_data(sk, flags, timeo, &err);
  913. if (!skb)
  914. goto out;
  915. /* Okay, have a message on the receive queue */
  916. rxm = strp_rx_msg(skb);
  917. if (len > rxm->full_len)
  918. len = rxm->full_len;
  919. err = skb_copy_datagram_msg(skb, rxm->offset, msg, len);
  920. if (err < 0)
  921. goto out;
  922. copied = len;
  923. if (likely(!(flags & MSG_PEEK))) {
  924. KCM_STATS_ADD(kcm->stats.rx_bytes, copied);
  925. if (copied < rxm->full_len) {
  926. if (sock->type == SOCK_DGRAM) {
  927. /* Truncated message */
  928. msg->msg_flags |= MSG_TRUNC;
  929. goto msg_finished;
  930. }
  931. rxm->offset += copied;
  932. rxm->full_len -= copied;
  933. } else {
  934. msg_finished:
  935. /* Finished with message */
  936. msg->msg_flags |= MSG_EOR;
  937. KCM_STATS_INCR(kcm->stats.rx_msgs);
  938. skb_unlink(skb, &sk->sk_receive_queue);
  939. kfree_skb(skb);
  940. }
  941. }
  942. out:
  943. release_sock(sk);
  944. return copied ? : err;
  945. }
  946. static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos,
  947. struct pipe_inode_info *pipe, size_t len,
  948. unsigned int flags)
  949. {
  950. struct sock *sk = sock->sk;
  951. struct kcm_sock *kcm = kcm_sk(sk);
  952. long timeo;
  953. struct strp_rx_msg *rxm;
  954. int err = 0;
  955. ssize_t copied;
  956. struct sk_buff *skb;
  957. /* Only support splice for SOCKSEQPACKET */
  958. timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
  959. lock_sock(sk);
  960. skb = kcm_wait_data(sk, flags, timeo, &err);
  961. if (!skb)
  962. goto err_out;
  963. /* Okay, have a message on the receive queue */
  964. rxm = strp_rx_msg(skb);
  965. if (len > rxm->full_len)
  966. len = rxm->full_len;
  967. copied = skb_splice_bits(skb, sk, rxm->offset, pipe, len, flags);
  968. if (copied < 0) {
  969. err = copied;
  970. goto err_out;
  971. }
  972. KCM_STATS_ADD(kcm->stats.rx_bytes, copied);
  973. rxm->offset += copied;
  974. rxm->full_len -= copied;
  975. /* We have no way to return MSG_EOR. If all the bytes have been
  976. * read we still leave the message in the receive socket buffer.
  977. * A subsequent recvmsg needs to be done to return MSG_EOR and
  978. * finish reading the message.
  979. */
  980. release_sock(sk);
  981. return copied;
  982. err_out:
  983. release_sock(sk);
  984. return err;
  985. }
  986. /* kcm sock lock held */
  987. static void kcm_recv_disable(struct kcm_sock *kcm)
  988. {
  989. struct kcm_mux *mux = kcm->mux;
  990. if (kcm->rx_disabled)
  991. return;
  992. spin_lock_bh(&mux->rx_lock);
  993. kcm->rx_disabled = 1;
  994. /* If a psock is reserved we'll do cleanup in unreserve */
  995. if (!kcm->rx_psock) {
  996. if (kcm->rx_wait) {
  997. list_del(&kcm->wait_rx_list);
  998. kcm->rx_wait = false;
  999. }
  1000. requeue_rx_msgs(mux, &kcm->sk.sk_receive_queue);
  1001. }
  1002. spin_unlock_bh(&mux->rx_lock);
  1003. }
  1004. /* kcm sock lock held */
  1005. static void kcm_recv_enable(struct kcm_sock *kcm)
  1006. {
  1007. struct kcm_mux *mux = kcm->mux;
  1008. if (!kcm->rx_disabled)
  1009. return;
  1010. spin_lock_bh(&mux->rx_lock);
  1011. kcm->rx_disabled = 0;
  1012. kcm_rcv_ready(kcm);
  1013. spin_unlock_bh(&mux->rx_lock);
  1014. }
  1015. static int kcm_setsockopt(struct socket *sock, int level, int optname,
  1016. char __user *optval, unsigned int optlen)
  1017. {
  1018. struct kcm_sock *kcm = kcm_sk(sock->sk);
  1019. int val, valbool;
  1020. int err = 0;
  1021. if (level != SOL_KCM)
  1022. return -ENOPROTOOPT;
  1023. if (optlen < sizeof(int))
  1024. return -EINVAL;
  1025. if (get_user(val, (int __user *)optval))
  1026. return -EINVAL;
  1027. valbool = val ? 1 : 0;
  1028. switch (optname) {
  1029. case KCM_RECV_DISABLE:
  1030. lock_sock(&kcm->sk);
  1031. if (valbool)
  1032. kcm_recv_disable(kcm);
  1033. else
  1034. kcm_recv_enable(kcm);
  1035. release_sock(&kcm->sk);
  1036. break;
  1037. default:
  1038. err = -ENOPROTOOPT;
  1039. }
  1040. return err;
  1041. }
  1042. static int kcm_getsockopt(struct socket *sock, int level, int optname,
  1043. char __user *optval, int __user *optlen)
  1044. {
  1045. struct kcm_sock *kcm = kcm_sk(sock->sk);
  1046. int val, len;
  1047. if (level != SOL_KCM)
  1048. return -ENOPROTOOPT;
  1049. if (get_user(len, optlen))
  1050. return -EFAULT;
  1051. len = min_t(unsigned int, len, sizeof(int));
  1052. if (len < 0)
  1053. return -EINVAL;
  1054. switch (optname) {
  1055. case KCM_RECV_DISABLE:
  1056. val = kcm->rx_disabled;
  1057. break;
  1058. default:
  1059. return -ENOPROTOOPT;
  1060. }
  1061. if (put_user(len, optlen))
  1062. return -EFAULT;
  1063. if (copy_to_user(optval, &val, len))
  1064. return -EFAULT;
  1065. return 0;
  1066. }
  1067. static void init_kcm_sock(struct kcm_sock *kcm, struct kcm_mux *mux)
  1068. {
  1069. struct kcm_sock *tkcm;
  1070. struct list_head *head;
  1071. int index = 0;
  1072. /* For SOCK_SEQPACKET sock type, datagram_poll checks the sk_state, so
  1073. * we set sk_state, otherwise epoll_wait always returns right away with
  1074. * POLLHUP
  1075. */
  1076. kcm->sk.sk_state = TCP_ESTABLISHED;
  1077. /* Add to mux's kcm sockets list */
  1078. kcm->mux = mux;
  1079. spin_lock_bh(&mux->lock);
  1080. head = &mux->kcm_socks;
  1081. list_for_each_entry(tkcm, &mux->kcm_socks, kcm_sock_list) {
  1082. if (tkcm->index != index)
  1083. break;
  1084. head = &tkcm->kcm_sock_list;
  1085. index++;
  1086. }
  1087. list_add(&kcm->kcm_sock_list, head);
  1088. kcm->index = index;
  1089. mux->kcm_socks_cnt++;
  1090. spin_unlock_bh(&mux->lock);
  1091. INIT_WORK(&kcm->tx_work, kcm_tx_work);
  1092. spin_lock_bh(&mux->rx_lock);
  1093. kcm_rcv_ready(kcm);
  1094. spin_unlock_bh(&mux->rx_lock);
  1095. }
  1096. static int kcm_attach(struct socket *sock, struct socket *csock,
  1097. struct bpf_prog *prog)
  1098. {
  1099. struct kcm_sock *kcm = kcm_sk(sock->sk);
  1100. struct kcm_mux *mux = kcm->mux;
  1101. struct sock *csk;
  1102. struct kcm_psock *psock = NULL, *tpsock;
  1103. struct list_head *head;
  1104. int index = 0;
  1105. struct strp_callbacks cb;
  1106. int err = 0;
  1107. csk = csock->sk;
  1108. if (!csk)
  1109. return -EINVAL;
  1110. lock_sock(csk);
  1111. /* Only allow TCP sockets to be attached for now */
  1112. if ((csk->sk_family != AF_INET && csk->sk_family != AF_INET6) ||
  1113. csk->sk_protocol != IPPROTO_TCP) {
  1114. err = -EOPNOTSUPP;
  1115. goto out;
  1116. }
  1117. /* Don't allow listeners or closed sockets */
  1118. if (csk->sk_state == TCP_LISTEN || csk->sk_state == TCP_CLOSE) {
  1119. err = -EOPNOTSUPP;
  1120. goto out;
  1121. }
  1122. psock = kmem_cache_zalloc(kcm_psockp, GFP_KERNEL);
  1123. if (!psock) {
  1124. err = -ENOMEM;
  1125. goto out;
  1126. }
  1127. psock->mux = mux;
  1128. psock->sk = csk;
  1129. psock->bpf_prog = prog;
  1130. cb.rcv_msg = kcm_rcv_strparser;
  1131. cb.abort_parser = NULL;
  1132. cb.parse_msg = kcm_parse_func_strparser;
  1133. cb.read_sock_done = kcm_read_sock_done;
  1134. err = strp_init(&psock->strp, csk, &cb);
  1135. if (err) {
  1136. kmem_cache_free(kcm_psockp, psock);
  1137. goto out;
  1138. }
  1139. write_lock_bh(&csk->sk_callback_lock);
  1140. /* Check if sk_user_data is aready by KCM or someone else.
  1141. * Must be done under lock to prevent race conditions.
  1142. */
  1143. if (csk->sk_user_data) {
  1144. write_unlock_bh(&csk->sk_callback_lock);
  1145. strp_stop(&psock->strp);
  1146. strp_done(&psock->strp);
  1147. kmem_cache_free(kcm_psockp, psock);
  1148. err = -EALREADY;
  1149. goto out;
  1150. }
  1151. psock->save_data_ready = csk->sk_data_ready;
  1152. psock->save_write_space = csk->sk_write_space;
  1153. psock->save_state_change = csk->sk_state_change;
  1154. csk->sk_user_data = psock;
  1155. csk->sk_data_ready = psock_data_ready;
  1156. csk->sk_write_space = psock_write_space;
  1157. csk->sk_state_change = psock_state_change;
  1158. write_unlock_bh(&csk->sk_callback_lock);
  1159. sock_hold(csk);
  1160. /* Finished initialization, now add the psock to the MUX. */
  1161. spin_lock_bh(&mux->lock);
  1162. head = &mux->psocks;
  1163. list_for_each_entry(tpsock, &mux->psocks, psock_list) {
  1164. if (tpsock->index != index)
  1165. break;
  1166. head = &tpsock->psock_list;
  1167. index++;
  1168. }
  1169. list_add(&psock->psock_list, head);
  1170. psock->index = index;
  1171. KCM_STATS_INCR(mux->stats.psock_attach);
  1172. mux->psocks_cnt++;
  1173. psock_now_avail(psock);
  1174. spin_unlock_bh(&mux->lock);
  1175. /* Schedule RX work in case there are already bytes queued */
  1176. strp_check_rcv(&psock->strp);
  1177. out:
  1178. release_sock(csk);
  1179. return err;
  1180. }
  1181. static int kcm_attach_ioctl(struct socket *sock, struct kcm_attach *info)
  1182. {
  1183. struct socket *csock;
  1184. struct bpf_prog *prog;
  1185. int err;
  1186. csock = sockfd_lookup(info->fd, &err);
  1187. if (!csock)
  1188. return -ENOENT;
  1189. prog = bpf_prog_get_type(info->bpf_fd, BPF_PROG_TYPE_SOCKET_FILTER);
  1190. if (IS_ERR(prog)) {
  1191. err = PTR_ERR(prog);
  1192. goto out;
  1193. }
  1194. err = kcm_attach(sock, csock, prog);
  1195. if (err) {
  1196. bpf_prog_put(prog);
  1197. goto out;
  1198. }
  1199. /* Keep reference on file also */
  1200. return 0;
  1201. out:
  1202. fput(csock->file);
  1203. return err;
  1204. }
  1205. static void kcm_unattach(struct kcm_psock *psock)
  1206. {
  1207. struct sock *csk = psock->sk;
  1208. struct kcm_mux *mux = psock->mux;
  1209. lock_sock(csk);
  1210. /* Stop getting callbacks from TCP socket. After this there should
  1211. * be no way to reserve a kcm for this psock.
  1212. */
  1213. write_lock_bh(&csk->sk_callback_lock);
  1214. csk->sk_user_data = NULL;
  1215. csk->sk_data_ready = psock->save_data_ready;
  1216. csk->sk_write_space = psock->save_write_space;
  1217. csk->sk_state_change = psock->save_state_change;
  1218. strp_stop(&psock->strp);
  1219. if (WARN_ON(psock->rx_kcm)) {
  1220. write_unlock_bh(&csk->sk_callback_lock);
  1221. release_sock(csk);
  1222. return;
  1223. }
  1224. spin_lock_bh(&mux->rx_lock);
  1225. /* Stop receiver activities. After this point psock should not be
  1226. * able to get onto ready list either through callbacks or work.
  1227. */
  1228. if (psock->ready_rx_msg) {
  1229. list_del(&psock->psock_ready_list);
  1230. kfree_skb(psock->ready_rx_msg);
  1231. psock->ready_rx_msg = NULL;
  1232. KCM_STATS_INCR(mux->stats.rx_ready_drops);
  1233. }
  1234. spin_unlock_bh(&mux->rx_lock);
  1235. write_unlock_bh(&csk->sk_callback_lock);
  1236. /* Call strp_done without sock lock */
  1237. release_sock(csk);
  1238. strp_done(&psock->strp);
  1239. lock_sock(csk);
  1240. bpf_prog_put(psock->bpf_prog);
  1241. spin_lock_bh(&mux->lock);
  1242. aggregate_psock_stats(&psock->stats, &mux->aggregate_psock_stats);
  1243. save_strp_stats(&psock->strp, &mux->aggregate_strp_stats);
  1244. KCM_STATS_INCR(mux->stats.psock_unattach);
  1245. if (psock->tx_kcm) {
  1246. /* psock was reserved. Just mark it finished and we will clean
  1247. * up in the kcm paths, we need kcm lock which can not be
  1248. * acquired here.
  1249. */
  1250. KCM_STATS_INCR(mux->stats.psock_unattach_rsvd);
  1251. spin_unlock_bh(&mux->lock);
  1252. /* We are unattaching a socket that is reserved. Abort the
  1253. * socket since we may be out of sync in sending on it. We need
  1254. * to do this without the mux lock.
  1255. */
  1256. kcm_abort_tx_psock(psock, EPIPE, false);
  1257. spin_lock_bh(&mux->lock);
  1258. if (!psock->tx_kcm) {
  1259. /* psock now unreserved in window mux was unlocked */
  1260. goto no_reserved;
  1261. }
  1262. psock->done = 1;
  1263. /* Commit done before queuing work to process it */
  1264. smp_mb();
  1265. /* Queue tx work to make sure psock->done is handled */
  1266. queue_work(kcm_wq, &psock->tx_kcm->tx_work);
  1267. spin_unlock_bh(&mux->lock);
  1268. } else {
  1269. no_reserved:
  1270. if (!psock->tx_stopped)
  1271. list_del(&psock->psock_avail_list);
  1272. list_del(&psock->psock_list);
  1273. mux->psocks_cnt--;
  1274. spin_unlock_bh(&mux->lock);
  1275. sock_put(csk);
  1276. fput(csk->sk_socket->file);
  1277. kmem_cache_free(kcm_psockp, psock);
  1278. }
  1279. release_sock(csk);
  1280. }
  1281. static int kcm_unattach_ioctl(struct socket *sock, struct kcm_unattach *info)
  1282. {
  1283. struct kcm_sock *kcm = kcm_sk(sock->sk);
  1284. struct kcm_mux *mux = kcm->mux;
  1285. struct kcm_psock *psock;
  1286. struct socket *csock;
  1287. struct sock *csk;
  1288. int err;
  1289. csock = sockfd_lookup(info->fd, &err);
  1290. if (!csock)
  1291. return -ENOENT;
  1292. csk = csock->sk;
  1293. if (!csk) {
  1294. err = -EINVAL;
  1295. goto out;
  1296. }
  1297. err = -ENOENT;
  1298. spin_lock_bh(&mux->lock);
  1299. list_for_each_entry(psock, &mux->psocks, psock_list) {
  1300. if (psock->sk != csk)
  1301. continue;
  1302. /* Found the matching psock */
  1303. if (psock->unattaching || WARN_ON(psock->done)) {
  1304. err = -EALREADY;
  1305. break;
  1306. }
  1307. psock->unattaching = 1;
  1308. spin_unlock_bh(&mux->lock);
  1309. /* Lower socket lock should already be held */
  1310. kcm_unattach(psock);
  1311. err = 0;
  1312. goto out;
  1313. }
  1314. spin_unlock_bh(&mux->lock);
  1315. out:
  1316. fput(csock->file);
  1317. return err;
  1318. }
  1319. static struct proto kcm_proto = {
  1320. .name = "KCM",
  1321. .owner = THIS_MODULE,
  1322. .obj_size = sizeof(struct kcm_sock),
  1323. };
  1324. /* Clone a kcm socket. */
  1325. static struct file *kcm_clone(struct socket *osock)
  1326. {
  1327. struct socket *newsock;
  1328. struct sock *newsk;
  1329. struct file *file;
  1330. newsock = sock_alloc();
  1331. if (!newsock)
  1332. return ERR_PTR(-ENFILE);
  1333. newsock->type = osock->type;
  1334. newsock->ops = osock->ops;
  1335. __module_get(newsock->ops->owner);
  1336. newsk = sk_alloc(sock_net(osock->sk), PF_KCM, GFP_KERNEL,
  1337. &kcm_proto, false);
  1338. if (!newsk) {
  1339. sock_release(newsock);
  1340. return ERR_PTR(-ENOMEM);
  1341. }
  1342. sock_init_data(newsock, newsk);
  1343. init_kcm_sock(kcm_sk(newsk), kcm_sk(osock->sk)->mux);
  1344. file = sock_alloc_file(newsock, 0, osock->sk->sk_prot_creator->name);
  1345. if (IS_ERR(file))
  1346. sock_release(newsock);
  1347. return file;
  1348. }
  1349. static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
  1350. {
  1351. int err;
  1352. switch (cmd) {
  1353. case SIOCKCMATTACH: {
  1354. struct kcm_attach info;
  1355. if (copy_from_user(&info, (void __user *)arg, sizeof(info)))
  1356. return -EFAULT;
  1357. err = kcm_attach_ioctl(sock, &info);
  1358. break;
  1359. }
  1360. case SIOCKCMUNATTACH: {
  1361. struct kcm_unattach info;
  1362. if (copy_from_user(&info, (void __user *)arg, sizeof(info)))
  1363. return -EFAULT;
  1364. err = kcm_unattach_ioctl(sock, &info);
  1365. break;
  1366. }
  1367. case SIOCKCMCLONE: {
  1368. struct kcm_clone info;
  1369. struct file *file;
  1370. info.fd = get_unused_fd_flags(0);
  1371. if (unlikely(info.fd < 0))
  1372. return info.fd;
  1373. file = kcm_clone(sock);
  1374. if (IS_ERR(file)) {
  1375. put_unused_fd(info.fd);
  1376. return PTR_ERR(file);
  1377. }
  1378. if (copy_to_user((void __user *)arg, &info,
  1379. sizeof(info))) {
  1380. put_unused_fd(info.fd);
  1381. fput(file);
  1382. return -EFAULT;
  1383. }
  1384. fd_install(info.fd, file);
  1385. err = 0;
  1386. break;
  1387. }
  1388. default:
  1389. err = -ENOIOCTLCMD;
  1390. break;
  1391. }
  1392. return err;
  1393. }
  1394. static void free_mux(struct rcu_head *rcu)
  1395. {
  1396. struct kcm_mux *mux = container_of(rcu,
  1397. struct kcm_mux, rcu);
  1398. kmem_cache_free(kcm_muxp, mux);
  1399. }
  1400. static void release_mux(struct kcm_mux *mux)
  1401. {
  1402. struct kcm_net *knet = mux->knet;
  1403. struct kcm_psock *psock, *tmp_psock;
  1404. /* Release psocks */
  1405. list_for_each_entry_safe(psock, tmp_psock,
  1406. &mux->psocks, psock_list) {
  1407. if (!WARN_ON(psock->unattaching))
  1408. kcm_unattach(psock);
  1409. }
  1410. if (WARN_ON(mux->psocks_cnt))
  1411. return;
  1412. __skb_queue_purge(&mux->rx_hold_queue);
  1413. mutex_lock(&knet->mutex);
  1414. aggregate_mux_stats(&mux->stats, &knet->aggregate_mux_stats);
  1415. aggregate_psock_stats(&mux->aggregate_psock_stats,
  1416. &knet->aggregate_psock_stats);
  1417. aggregate_strp_stats(&mux->aggregate_strp_stats,
  1418. &knet->aggregate_strp_stats);
  1419. list_del_rcu(&mux->kcm_mux_list);
  1420. knet->count--;
  1421. mutex_unlock(&knet->mutex);
  1422. call_rcu(&mux->rcu, free_mux);
  1423. }
  1424. static void kcm_done(struct kcm_sock *kcm)
  1425. {
  1426. struct kcm_mux *mux = kcm->mux;
  1427. struct sock *sk = &kcm->sk;
  1428. int socks_cnt;
  1429. spin_lock_bh(&mux->rx_lock);
  1430. if (kcm->rx_psock) {
  1431. /* Cleanup in unreserve_rx_kcm */
  1432. WARN_ON(kcm->done);
  1433. kcm->rx_disabled = 1;
  1434. kcm->done = 1;
  1435. spin_unlock_bh(&mux->rx_lock);
  1436. return;
  1437. }
  1438. if (kcm->rx_wait) {
  1439. list_del(&kcm->wait_rx_list);
  1440. kcm->rx_wait = false;
  1441. }
  1442. /* Move any pending receive messages to other kcm sockets */
  1443. requeue_rx_msgs(mux, &sk->sk_receive_queue);
  1444. spin_unlock_bh(&mux->rx_lock);
  1445. if (WARN_ON(sk_rmem_alloc_get(sk)))
  1446. return;
  1447. /* Detach from MUX */
  1448. spin_lock_bh(&mux->lock);
  1449. list_del(&kcm->kcm_sock_list);
  1450. mux->kcm_socks_cnt--;
  1451. socks_cnt = mux->kcm_socks_cnt;
  1452. spin_unlock_bh(&mux->lock);
  1453. if (!socks_cnt) {
  1454. /* We are done with the mux now. */
  1455. release_mux(mux);
  1456. }
  1457. WARN_ON(kcm->rx_wait);
  1458. sock_put(&kcm->sk);
  1459. }
  1460. /* Called by kcm_release to close a KCM socket.
  1461. * If this is the last KCM socket on the MUX, destroy the MUX.
  1462. */
  1463. static int kcm_release(struct socket *sock)
  1464. {
  1465. struct sock *sk = sock->sk;
  1466. struct kcm_sock *kcm;
  1467. struct kcm_mux *mux;
  1468. struct kcm_psock *psock;
  1469. if (!sk)
  1470. return 0;
  1471. kcm = kcm_sk(sk);
  1472. mux = kcm->mux;
  1473. sock_orphan(sk);
  1474. kfree_skb(kcm->seq_skb);
  1475. lock_sock(sk);
  1476. /* Purge queue under lock to avoid race condition with tx_work trying
  1477. * to act when queue is nonempty. If tx_work runs after this point
  1478. * it will just return.
  1479. */
  1480. __skb_queue_purge(&sk->sk_write_queue);
  1481. /* Set tx_stopped. This is checked when psock is bound to a kcm and we
  1482. * get a writespace callback. This prevents further work being queued
  1483. * from the callback (unbinding the psock occurs after canceling work.
  1484. */
  1485. kcm->tx_stopped = 1;
  1486. release_sock(sk);
  1487. spin_lock_bh(&mux->lock);
  1488. if (kcm->tx_wait) {
  1489. /* Take of tx_wait list, after this point there should be no way
  1490. * that a psock will be assigned to this kcm.
  1491. */
  1492. list_del(&kcm->wait_psock_list);
  1493. kcm->tx_wait = false;
  1494. }
  1495. spin_unlock_bh(&mux->lock);
  1496. /* Cancel work. After this point there should be no outside references
  1497. * to the kcm socket.
  1498. */
  1499. cancel_work_sync(&kcm->tx_work);
  1500. lock_sock(sk);
  1501. psock = kcm->tx_psock;
  1502. if (psock) {
  1503. /* A psock was reserved, so we need to kill it since it
  1504. * may already have some bytes queued from a message. We
  1505. * need to do this after removing kcm from tx_wait list.
  1506. */
  1507. kcm_abort_tx_psock(psock, EPIPE, false);
  1508. unreserve_psock(kcm);
  1509. }
  1510. release_sock(sk);
  1511. WARN_ON(kcm->tx_wait);
  1512. WARN_ON(kcm->tx_psock);
  1513. sock->sk = NULL;
  1514. kcm_done(kcm);
  1515. return 0;
  1516. }
  1517. static const struct proto_ops kcm_dgram_ops = {
  1518. .family = PF_KCM,
  1519. .owner = THIS_MODULE,
  1520. .release = kcm_release,
  1521. .bind = sock_no_bind,
  1522. .connect = sock_no_connect,
  1523. .socketpair = sock_no_socketpair,
  1524. .accept = sock_no_accept,
  1525. .getname = sock_no_getname,
  1526. .poll = datagram_poll,
  1527. .ioctl = kcm_ioctl,
  1528. .listen = sock_no_listen,
  1529. .shutdown = sock_no_shutdown,
  1530. .setsockopt = kcm_setsockopt,
  1531. .getsockopt = kcm_getsockopt,
  1532. .sendmsg = kcm_sendmsg,
  1533. .recvmsg = kcm_recvmsg,
  1534. .mmap = sock_no_mmap,
  1535. .sendpage = kcm_sendpage,
  1536. };
  1537. static const struct proto_ops kcm_seqpacket_ops = {
  1538. .family = PF_KCM,
  1539. .owner = THIS_MODULE,
  1540. .release = kcm_release,
  1541. .bind = sock_no_bind,
  1542. .connect = sock_no_connect,
  1543. .socketpair = sock_no_socketpair,
  1544. .accept = sock_no_accept,
  1545. .getname = sock_no_getname,
  1546. .poll = datagram_poll,
  1547. .ioctl = kcm_ioctl,
  1548. .listen = sock_no_listen,
  1549. .shutdown = sock_no_shutdown,
  1550. .setsockopt = kcm_setsockopt,
  1551. .getsockopt = kcm_getsockopt,
  1552. .sendmsg = kcm_sendmsg,
  1553. .recvmsg = kcm_recvmsg,
  1554. .mmap = sock_no_mmap,
  1555. .sendpage = kcm_sendpage,
  1556. .splice_read = kcm_splice_read,
  1557. };
  1558. /* Create proto operation for kcm sockets */
  1559. static int kcm_create(struct net *net, struct socket *sock,
  1560. int protocol, int kern)
  1561. {
  1562. struct kcm_net *knet = net_generic(net, kcm_net_id);
  1563. struct sock *sk;
  1564. struct kcm_mux *mux;
  1565. switch (sock->type) {
  1566. case SOCK_DGRAM:
  1567. sock->ops = &kcm_dgram_ops;
  1568. break;
  1569. case SOCK_SEQPACKET:
  1570. sock->ops = &kcm_seqpacket_ops;
  1571. break;
  1572. default:
  1573. return -ESOCKTNOSUPPORT;
  1574. }
  1575. if (protocol != KCMPROTO_CONNECTED)
  1576. return -EPROTONOSUPPORT;
  1577. sk = sk_alloc(net, PF_KCM, GFP_KERNEL, &kcm_proto, kern);
  1578. if (!sk)
  1579. return -ENOMEM;
  1580. /* Allocate a kcm mux, shared between KCM sockets */
  1581. mux = kmem_cache_zalloc(kcm_muxp, GFP_KERNEL);
  1582. if (!mux) {
  1583. sk_free(sk);
  1584. return -ENOMEM;
  1585. }
  1586. spin_lock_init(&mux->lock);
  1587. spin_lock_init(&mux->rx_lock);
  1588. INIT_LIST_HEAD(&mux->kcm_socks);
  1589. INIT_LIST_HEAD(&mux->kcm_rx_waiters);
  1590. INIT_LIST_HEAD(&mux->kcm_tx_waiters);
  1591. INIT_LIST_HEAD(&mux->psocks);
  1592. INIT_LIST_HEAD(&mux->psocks_ready);
  1593. INIT_LIST_HEAD(&mux->psocks_avail);
  1594. mux->knet = knet;
  1595. /* Add new MUX to list */
  1596. mutex_lock(&knet->mutex);
  1597. list_add_rcu(&mux->kcm_mux_list, &knet->mux_list);
  1598. knet->count++;
  1599. mutex_unlock(&knet->mutex);
  1600. skb_queue_head_init(&mux->rx_hold_queue);
  1601. /* Init KCM socket */
  1602. sock_init_data(sock, sk);
  1603. init_kcm_sock(kcm_sk(sk), mux);
  1604. return 0;
  1605. }
  1606. static struct net_proto_family kcm_family_ops = {
  1607. .family = PF_KCM,
  1608. .create = kcm_create,
  1609. .owner = THIS_MODULE,
  1610. };
  1611. static __net_init int kcm_init_net(struct net *net)
  1612. {
  1613. struct kcm_net *knet = net_generic(net, kcm_net_id);
  1614. INIT_LIST_HEAD_RCU(&knet->mux_list);
  1615. mutex_init(&knet->mutex);
  1616. return 0;
  1617. }
  1618. static __net_exit void kcm_exit_net(struct net *net)
  1619. {
  1620. struct kcm_net *knet = net_generic(net, kcm_net_id);
  1621. /* All KCM sockets should be closed at this point, which should mean
  1622. * that all multiplexors and psocks have been destroyed.
  1623. */
  1624. WARN_ON(!list_empty(&knet->mux_list));
  1625. }
  1626. static struct pernet_operations kcm_net_ops = {
  1627. .init = kcm_init_net,
  1628. .exit = kcm_exit_net,
  1629. .id = &kcm_net_id,
  1630. .size = sizeof(struct kcm_net),
  1631. };
  1632. static int __init kcm_init(void)
  1633. {
  1634. int err = -ENOMEM;
  1635. kcm_muxp = kmem_cache_create("kcm_mux_cache",
  1636. sizeof(struct kcm_mux), 0,
  1637. SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
  1638. if (!kcm_muxp)
  1639. goto fail;
  1640. kcm_psockp = kmem_cache_create("kcm_psock_cache",
  1641. sizeof(struct kcm_psock), 0,
  1642. SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
  1643. if (!kcm_psockp)
  1644. goto fail;
  1645. kcm_wq = create_singlethread_workqueue("kkcmd");
  1646. if (!kcm_wq)
  1647. goto fail;
  1648. err = proto_register(&kcm_proto, 1);
  1649. if (err)
  1650. goto fail;
  1651. err = sock_register(&kcm_family_ops);
  1652. if (err)
  1653. goto sock_register_fail;
  1654. err = register_pernet_device(&kcm_net_ops);
  1655. if (err)
  1656. goto net_ops_fail;
  1657. err = kcm_proc_init();
  1658. if (err)
  1659. goto proc_init_fail;
  1660. return 0;
  1661. proc_init_fail:
  1662. unregister_pernet_device(&kcm_net_ops);
  1663. net_ops_fail:
  1664. sock_unregister(PF_KCM);
  1665. sock_register_fail:
  1666. proto_unregister(&kcm_proto);
  1667. fail:
  1668. kmem_cache_destroy(kcm_muxp);
  1669. kmem_cache_destroy(kcm_psockp);
  1670. if (kcm_wq)
  1671. destroy_workqueue(kcm_wq);
  1672. return err;
  1673. }
  1674. static void __exit kcm_exit(void)
  1675. {
  1676. kcm_proc_exit();
  1677. unregister_pernet_device(&kcm_net_ops);
  1678. sock_unregister(PF_KCM);
  1679. proto_unregister(&kcm_proto);
  1680. destroy_workqueue(kcm_wq);
  1681. kmem_cache_destroy(kcm_muxp);
  1682. kmem_cache_destroy(kcm_psockp);
  1683. }
  1684. module_init(kcm_init);
  1685. module_exit(kcm_exit);
  1686. MODULE_LICENSE("GPL");
  1687. MODULE_ALIAS_NETPROTO(PF_KCM);