mksck_kernel.c 64 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676
  1. /*
  2. * Linux 2.6.32 and later Kernel module for VMware MVP Hypervisor Support
  3. *
  4. * Copyright (C) 2010-2013 VMware, Inc. All rights reserved.
  5. *
  6. * This program is free software; you can redistribute it and/or modify it
  7. * under the terms of the GNU General Public License version 2 as published by
  8. * the Free Software Foundation.
  9. *
  10. * This program is distributed in the hope that it will be useful, but WITHOUT
  11. * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12. * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  13. * more details.
  14. *
  15. * You should have received a copy of the GNU General Public License along with
  16. * this program; see the file COPYING. If not, write to the Free Software
  17. * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  18. */
  19. #line 5
  20. /**
  21. * @file
  22. *
  23. * @brief The monitor/kernel socket interface kernel extension.
  24. */
  25. #define __KERNEL_SYSCALLS__
  26. #include <linux/version.h>
  27. #include <linux/kernel.h>
  28. #include <linux/module.h>
  29. #include <linux/init.h>
  30. #include <linux/slab.h>
  31. #include <linux/fs.h>
  32. #include <linux/errno.h>
  33. #include <linux/types.h>
  34. #include <linux/proc_fs.h>
  35. #include <linux/fcntl.h>
  36. #include <linux/syscalls.h>
  37. #include <linux/kmod.h>
  38. #include <linux/socket.h>
  39. #include <linux/net.h>
  40. #include <linux/skbuff.h>
  41. #include <linux/miscdevice.h>
  42. #include <linux/poll.h>
  43. #include <linux/rcupdate.h>
  44. #include <linux/smp.h>
  45. #include <linux/spinlock.h>
  46. #include <linux/mm.h>
  47. #include <linux/mman.h>
  48. #include <linux/file.h>
  49. #include <linux/vmalloc.h>
  50. #include <linux/debugfs.h>
  51. #include <linux/seq_file.h>
  52. #include <net/sock.h>
  53. #include <asm/memory.h>
  54. #include <asm/system.h>
  55. #include <linux/uaccess.h>
  56. #include "mvp.h"
  57. #include "actions.h"
  58. #include "mvpkm_kernel.h"
  59. #include "mksck_kernel.h"
  60. #include "mksck_sockaddr.h"
  61. #include "mutex_kernel.h"
  62. void NORETURN
  63. FatalError(char const *file,
  64. int line,
  65. FECode feCode,
  66. int bugno,
  67. char const *fmt,
  68. ...)
  69. {
  70. static DEFINE_MUTEX(fatalErrorMutex);
  71. /*
  72. * Lock around printing the error details so that messages from multiple
  73. * threads are not interleaved.
  74. */
  75. mutex_lock(&fatalErrorMutex);
  76. FATALERROR_COMMON(printk, vprintk, file, line, feCode, bugno, fmt);
  77. dump_stack();
  78. /* done printing */
  79. mutex_unlock(&fatalErrorMutex);
  80. /*
  81. * do_exit below exits the current thread but does not crash the kernel.
  82. * Hence, the stack dump will actually be readable from other user
  83. * threads.
  84. */
  85. do_exit(1);
  86. }
  87. /*
  88. * The project uses a new address family: AF_MKSCK. Optimally, this address
  89. * family would be registered/assigned its own constant.
  90. *
  91. * Instead, we ASSUME that DECnet is not needed, so we re-use its constant.
  92. */
  93. static struct proto mksckProto = {
  94. .name = "AF_MKSCK",
  95. .owner = THIS_MODULE,
  96. .obj_size = sizeof(struct sock),
  97. };
  98. static int
  99. MksckCreate(struct net *net,
  100. struct socket *sock,
  101. int protocol,
  102. int kern);
  103. static struct net_proto_family mksckFamilyOps = {
  104. .family = AF_MKSCK,
  105. .owner = THIS_MODULE,
  106. .create = MksckCreate,
  107. };
  108. static int MksckFault(struct vm_area_struct *vma, struct vm_fault *vmf);
  109. /**
  110. * @brief Linux vma operations for receive windows established via Mksck mmap.
  111. */
  112. static struct vm_operations_struct mksckVMOps = {
  113. .fault = MksckFault
  114. };
  115. /*
  116. * List of hosts and guests we know about.
  117. */
  118. static spinlock_t mksckPageListLock;
  119. static MksckPage *mksckPages[MKSCK_MAX_SHARES];
  120. /*
  121. * The following functions form the AF_MKSCK DGRAM operations.
  122. */
  123. static int MksckRelease(struct socket *sock);
  124. static int MksckBacklogRcv(struct sock *sk, struct sk_buff *skb);
  125. static void MksckSkDestruct(struct sock *sk);
  126. static int
  127. MksckBind(struct socket *sock,
  128. struct sockaddr *addr,
  129. int addrLen);
  130. static int MksckBindGeneric(struct sock *sk, Mksck_Address addr);
  131. static int
  132. MksckDgramRecvMsg(struct kiocb *kiocb,
  133. struct socket *sock,
  134. struct msghdr *msg,
  135. size_t len,
  136. int flags);
  137. static int
  138. MksckDgramSendMsg(struct kiocb *kiocb,
  139. struct socket *sock,
  140. struct msghdr *msg,
  141. size_t len);
  142. static int
  143. MksckGetName(struct socket *sock,
  144. struct sockaddr *addr,
  145. int *addrLen,
  146. int peer);
  147. static unsigned int
  148. MksckPoll(struct file *filp,
  149. struct socket *sock,
  150. poll_table *wait);
  151. static int
  152. MksckDgramConnect(struct socket *sock,
  153. struct sockaddr *addr,
  154. int addrLen,
  155. int flags);
  156. static int
  157. MksckMMap(struct file *file,
  158. struct socket *sock,
  159. struct vm_area_struct *vma);
  160. static void MksckPageRelease(struct MksckPage *mksckPage);
  161. static const struct proto_ops mksckDgramOps = {
  162. .family = AF_MKSCK,
  163. .owner = THIS_MODULE,
  164. .release = MksckRelease,
  165. .bind = MksckBind,
  166. .connect = MksckDgramConnect,
  167. .socketpair = sock_no_socketpair,
  168. .accept = sock_no_accept,
  169. .getname = MksckGetName,
  170. .poll = MksckPoll,
  171. .ioctl = sock_no_ioctl,
  172. .listen = sock_no_listen,
  173. .shutdown = sock_no_shutdown, /* MksckShutdown, */
  174. .setsockopt = sock_no_setsockopt,
  175. .getsockopt = sock_no_getsockopt,
  176. .sendmsg = MksckDgramSendMsg,
  177. .recvmsg = MksckDgramRecvMsg,
  178. .mmap = MksckMMap,
  179. .sendpage = sock_no_sendpage,
  180. };
  181. /**
  182. * @brief Initialize the MKSCK protocol
  183. *
  184. * @return 0 on success, -errno on failure
  185. */
  186. int
  187. Mksck_Init(void)
  188. {
  189. int err;
  190. spin_lock_init(&mksckPageListLock);
  191. err = proto_register(&mksckProto, 1);
  192. if (err != 0) {
  193. pr_err("Mksck_Init: Cannot register AF_MKSCK protocol" \
  194. ", errno = %d.\n", err);
  195. return err;
  196. }
  197. err = sock_register(&mksckFamilyOps);
  198. if (err < 0) {
  199. pr_err("Mksck_Init: Could not register address family" \
  200. " AF_MKSCK (errno = %d).\n", err);
  201. return err;
  202. }
  203. return 0;
  204. }
  205. /**
  206. * @brief De-register the MKSCK protocol
  207. */
  208. void
  209. Mksck_Exit(void)
  210. {
  211. sock_unregister(mksckFamilyOps.family);
  212. proto_unregister(&mksckProto);
  213. }
  214. /**
  215. * @brief Create a new MKSCK socket
  216. *
  217. * @param net network namespace (2.6.24 or above)
  218. * @param sock user socket structure
  219. * @param protocol protocol to be used
  220. * @param kern called from kernel mode
  221. *
  222. * @return 0 on success, -errno on failure
  223. */
  224. static int
  225. MksckCreate(struct net *net,
  226. struct socket *sock,
  227. int protocol,
  228. int kern)
  229. {
  230. struct sock *sk;
  231. uid_t currentUid = current_euid();
  232. if (!(currentUid == 0 ||
  233. currentUid == Mvpkm_vmwareUid)) {
  234. pr_warn("MksckCreate: rejected from process %s " \
  235. "tgid=%d, pid=%d euid:%d.\n",
  236. current->comm,
  237. task_tgid_vnr(current),
  238. task_pid_vnr(current),
  239. currentUid);
  240. return -EPERM;
  241. }
  242. if (!sock)
  243. return -EINVAL;
  244. if (protocol)
  245. return -EPROTONOSUPPORT;
  246. switch (sock->type) {
  247. case SOCK_DGRAM:
  248. sock->ops = &mksckDgramOps;
  249. break;
  250. default:
  251. return -ESOCKTNOSUPPORT;
  252. }
  253. sock->state = SS_UNCONNECTED;
  254. sk = sk_alloc(net, mksckFamilyOps.family, GFP_KERNEL, &mksckProto);
  255. if (!sk)
  256. return -ENOMEM;
  257. sock_init_data(sock, sk);
  258. sk->sk_type = SOCK_DGRAM;
  259. sk->sk_destruct = MksckSkDestruct;
  260. sk->sk_backlog_rcv = MksckBacklogRcv;
  261. /*
  262. * On socket lock...
  263. *
  264. * A bound socket will have an associated private area, the Mksck
  265. * structure part of MksckPage. That area is pointed to by
  266. * sk->sk_protinfo. In addition, a connected socket will have the
  267. * peer field in its associated area set to point to the associated
  268. * private area of the peer socket. A mechanism is needed to ensure
  269. * that these private areas area not freed while they are being
  270. * accessed within the scope of a function. A simple lock would not
  271. * suffice as the interface functions (like MksckDgramRecvMsg())
  272. * may block. Hence a reference count mechanism is employed. When
  273. * the mentioned references (sk->sk_protinfo and mksck->peer) to
  274. * the respective private areas are set a refcount is incremented,
  275. * and decremented when the references are deleted.
  276. *
  277. * The refcounts of areas pointed to by sk->sk_protinfo and
  278. * mksck->peer will be decremented under the lock of the socket.
  279. * Hence these private areas cannot disappear as long as the socket
  280. * lock is held.
  281. *
  282. * The interface functions will have one of the following
  283. * structures:
  284. *
  285. * simpleFn(sk)
  286. * {
  287. * lock_sock(sk);
  288. * if ((mksck = sk->sk_protinfo)) {
  289. * <non-blocking use of mksck>
  290. * }
  291. * release_sock(sk);
  292. * }
  293. *
  294. * complexFn(sk)
  295. * {
  296. * lock_sock(sk);
  297. * if ((mksck = sk->sk_protinfo))
  298. * IncRefc(mksck);
  299. * release_sock(sk);
  300. *
  301. * if (mksck) {
  302. * <use of mksck in a potentially blocking manner>
  303. * DecRefc(mksck);
  304. * }
  305. * }
  306. */
  307. sk->sk_protinfo = NULL;
  308. sock_reset_flag(sk, SOCK_DONE);
  309. return 0;
  310. }
  311. /**
  312. * @brief Delete a MKSCK socket
  313. *
  314. * @param sock user socket structure
  315. *
  316. * @return 0 on success, -errno on failure
  317. */
  318. static int
  319. MksckRelease(struct socket *sock)
  320. {
  321. struct sock *sk = sock->sk;
  322. if (sk) {
  323. lock_sock(sk);
  324. sock_orphan(sk);
  325. release_sock(sk);
  326. sock_put(sk);
  327. }
  328. sock->sk = NULL;
  329. sock->state = SS_FREE;
  330. return 0;
  331. }
  332. static int
  333. MksckBacklogRcv(struct sock *sk,
  334. struct sk_buff *skb)
  335. {
  336. /*
  337. * We should never get these as we never queue an skb.
  338. */
  339. pr_err("MksckBacklogRcv: should never get here\n");
  340. return -EIO;
  341. }
  342. /**
  343. * @brief Callback at socket destruction
  344. *
  345. * @param sk pointer to kernel socket structure
  346. */
  347. static void
  348. MksckSkDestruct(struct sock *sk)
  349. {
  350. Mksck *mksck;
  351. lock_sock(sk);
  352. mksck = sk->sk_protinfo;
  353. if (mksck != NULL) {
  354. sk->sk_protinfo = NULL;
  355. Mksck_CloseCommon(mksck);
  356. }
  357. if (sk->sk_user_data != NULL) {
  358. sock_kfree_s(sk, sk->sk_user_data, sizeof(int));
  359. sk->sk_user_data = NULL;
  360. }
  361. release_sock(sk);
  362. }
  363. /**
  364. * @brief Set the local address of a MKSCK socket
  365. *
  366. * @param sk kernel socket structure
  367. * @param addr the new address of the socket
  368. *
  369. * @return 0 on success, -errno on failure
  370. *
  371. * If addr.port is undefined a new random port is assigned.
  372. * If addr.vmId is undefined then the vmId computed from the tgid is used.
  373. * Hence the vmId of a socket does not determine the host all the time.
  374. *
  375. * Assumed that the socket is locked.
  376. * This function is called by explicit set (MksckBind) and implicit (Send).
  377. */
  378. static int
  379. MksckBindGeneric(struct sock *sk,
  380. Mksck_Address addr)
  381. {
  382. int err;
  383. Mksck *mksck;
  384. struct MksckPage *mksckPage;
  385. if (sk->sk_protinfo != NULL)
  386. return -EISCONN;
  387. /*
  388. * Locate the page for the given host and increment its reference
  389. * count so it can't get freed off while we are working on it.
  390. */
  391. if (addr.vmId == MKSCK_VMID_UNDEF) {
  392. mksckPage = MksckPage_GetFromTgidIncRefc();
  393. } else {
  394. pr_err("MksckBind: host bind called on vmid 0x%X\n", addr.vmId);
  395. mksckPage = MksckPage_GetFromVmIdIncRefc(addr.vmId);
  396. }
  397. if (mksckPage == NULL) {
  398. pr_err("MksckBind: no mksckPage for vm 0x%X\n", addr.vmId);
  399. return -ENETUNREACH;
  400. }
  401. addr.vmId = mksckPage->vmId;
  402. /*
  403. * Before we can find an unused socket port on the page we have to
  404. * lock the page for exclusive access so another thread can't
  405. * allocate the same port.
  406. */
  407. err = Mutex_Lock(&mksckPage->mutex, MutexModeEX);
  408. if (err < 0)
  409. goto outDec;
  410. addr.port = MksckPage_GetFreePort(mksckPage, addr.port);
  411. if (addr.port == MKSCK_PORT_UNDEF) {
  412. err = -EINVAL;
  413. goto outUnlockDec;
  414. }
  415. /*
  416. * At this point we have the mksckPage locked for exclusive access
  417. * and its reference count incremented. Also, addr is completely
  418. * filled in with vmId and port that we want to bind.
  419. *
  420. * Find an available mksck struct on the shared page and initialize it.
  421. */
  422. mksck = MksckPage_AllocSocket(mksckPage, addr);
  423. if (mksck == NULL) {
  424. err = -EMFILE;
  425. goto outUnlockDec;
  426. }
  427. /*
  428. * Stable, release mutex. Leave mksckPage->refCount incremented so
  429. * mksckPage can't be freed until socket is closed.
  430. */
  431. Mutex_Unlock(&mksckPage->mutex, MutexModeEX);
  432. /*
  433. * This is why we start mksck->refCount at 1. When sk_protinfo gets
  434. * cleared, we decrement mksck->refCount.
  435. */
  436. sk->sk_protinfo = mksck;
  437. PRINTK("MksckBind: socket bound to %08X\n",
  438. mksck->addr.addr);
  439. return 0;
  440. outUnlockDec:
  441. Mutex_Unlock(&mksckPage->mutex, MutexModeEX);
  442. outDec:
  443. MksckPage_DecRefc(mksckPage);
  444. return err;
  445. }
  446. /**
  447. * @brief Test if the socket is already bound to a local address and,
  448. * if not, bind it to an unused address.
  449. *
  450. * @param sk kernel socket structure
  451. * @return 0 on success, -errno on failure
  452. *
  453. * Assumed that the socket is locked.
  454. */
  455. static inline int
  456. MksckTryBind(struct sock *sk)
  457. {
  458. int err = 0;
  459. if (!sk->sk_protinfo) {
  460. static const Mksck_Address addr = { .addr = MKSCK_ADDR_UNDEF };
  461. err = MksckBindGeneric(sk, addr);
  462. }
  463. return err;
  464. }
  465. /**
  466. * @brief Set the address of a MKSCK socket (user call)
  467. *
  468. * @param sock user socket structure
  469. * @param addr the new address of the socket
  470. * @param addrLen length of the address
  471. *
  472. * @return 0 on success, -errno on failure
  473. */
  474. static int
  475. MksckBind(struct socket *sock,
  476. struct sockaddr *addr,
  477. int addrLen)
  478. {
  479. int err;
  480. struct sock *sk = sock->sk;
  481. struct sockaddr_mk *addrMk = (struct sockaddr_mk *)addr;
  482. if (addrLen != sizeof(*addrMk))
  483. return -EINVAL;
  484. if (addrMk->mk_family != AF_MKSCK)
  485. return -EAFNOSUPPORT;
  486. /*
  487. * Obtain the socket lock and call the generic Bind function.
  488. */
  489. lock_sock(sk);
  490. err = MksckBindGeneric(sk, addrMk->mk_addr);
  491. release_sock(sk);
  492. return err;
  493. }
  494. /**
  495. * @brief Lock the peer socket by locating it, incrementing its refc
  496. * @param addr the address of the peer socket
  497. * @param[out] peerMksckR set to the locked peer socket pointer
  498. * upon successful lookup
  499. * @return 0 on success, -errno on failure
  500. */
  501. static int
  502. LockPeer(Mksck_Address addr, Mksck **peerMksckR)
  503. {
  504. int err = 0;
  505. struct MksckPage *peerMksckPage =
  506. MksckPage_GetFromVmIdIncRefc(addr.vmId);
  507. Mksck *peerMksck;
  508. /*
  509. * Find corresponding destination shared page and increment its
  510. * reference count so it can't be freed while we are sending to the
  511. * socket. Make sure that the address is indeed an address of a
  512. * monitor/guest socket.
  513. */
  514. if (peerMksckPage == NULL) {
  515. pr_info("LockPeer: vmId %x is not in use!\n", addr.vmId);
  516. return -ENETUNREACH;
  517. }
  518. if (!peerMksckPage->isGuest) {
  519. MksckPage_DecRefc(peerMksckPage);
  520. pr_err("LockPeer: vmId %x does not belong to a guest!\n",
  521. addr.vmId);
  522. return -ENETUNREACH;
  523. }
  524. err = Mutex_Lock(&peerMksckPage->mutex, MutexModeSH);
  525. if (err < 0) {
  526. MksckPage_DecRefc(peerMksckPage);
  527. return err;
  528. }
  529. /*
  530. * Find corresponding destination socket on that shared page and
  531. * increment its reference count so it can't be freed while we are
  532. * trying to send to it.
  533. */
  534. peerMksck = MksckPage_GetFromAddr(peerMksckPage, addr);
  535. if (peerMksck) {
  536. ATOMIC_ADDV(peerMksck->refCount, 1);
  537. *peerMksckR = peerMksck;
  538. } else {
  539. pr_err("LockPeer: addr %x is not a defined socket!\n",
  540. addr.addr);
  541. err = -ENETUNREACH;
  542. }
  543. Mutex_Unlock(&peerMksckPage->mutex, MutexModeSH);
  544. MksckPage_DecRefc(peerMksckPage);
  545. return err;
  546. }
  547. /**
  548. * @brief Set the peer address of a MKSCK socket
  549. *
  550. * @param sock user socket structure
  551. * @param addr the new address of the socket
  552. * @param addrLen length of the address
  553. * @param flags flags
  554. *
  555. * @return 0 on success, -errno on failure
  556. */
  557. static int
  558. MksckDgramConnect(struct socket *sock,
  559. struct sockaddr *addr,
  560. int addrLen,
  561. int flags)
  562. {
  563. struct sock *sk = sock->sk;
  564. Mksck *mksck;
  565. struct sockaddr_mk *peerAddrMk = (struct sockaddr_mk *)addr;
  566. int err = 0;
  567. if (addrLen != sizeof(*peerAddrMk)) {
  568. pr_info("MksckConnect: wrong address length!\n");
  569. return -EINVAL;
  570. }
  571. if (peerAddrMk->mk_family != AF_MKSCK) {
  572. pr_info("MksckConnect: wrong address family!\n");
  573. return -EAFNOSUPPORT;
  574. }
  575. lock_sock(sk);
  576. err = MksckTryBind(sk);
  577. if (err)
  578. goto releaseSock;
  579. mksck = sk->sk_protinfo;
  580. /*
  581. * First sever any past peer connections...
  582. */
  583. Mksck_DisconnectPeer(mksck);
  584. sock->state = SS_UNCONNECTED;
  585. /*
  586. * ... and build new connections.
  587. */
  588. if (peerAddrMk->mk_addr.addr != MKSCK_ADDR_UNDEF) {
  589. sock->state = SS_CONNECTED;
  590. mksck->peerAddr = peerAddrMk->mk_addr;
  591. err = LockPeer(mksck->peerAddr, &mksck->peer);
  592. PRINTK("MksckConnect: socket %x is connected" \
  593. " to %x!\n", mksck->addr.addr, mksck->peerAddr.addr);
  594. }
  595. releaseSock:
  596. release_sock(sk);
  597. return err;
  598. }
  599. /**
  600. * @brief returns the address of a MKSCK socket/peer address
  601. *
  602. * @param sock user socket structure
  603. * @param addr the new address of the socket
  604. * @param addrLen length of the address
  605. * @param peer 1 if the peer address is sought
  606. *
  607. * @return 0 on success, -errno on failure
  608. */
  609. static int
  610. MksckGetName(struct socket *sock,
  611. struct sockaddr *addr,
  612. int *addrLen,
  613. int peer)
  614. {
  615. int err;
  616. Mksck *mksck;
  617. struct sock *sk = sock->sk;
  618. /*
  619. * MAX_SOCK_ADDR is size of *addr, but it's not exported.
  620. * ASSERT_ON_COMPILE(sizeof(struct sockaddr_mk) <= MAX_SOCK_ADDR);
  621. */
  622. lock_sock(sk);
  623. mksck = sk->sk_protinfo;
  624. if (mksck == NULL) {
  625. if (peer) {
  626. err = -ENOTCONN;
  627. } else {
  628. ((struct sockaddr_mk *)addr)->mk_family = AF_MKSCK;
  629. ((struct sockaddr_mk *)addr)->mk_addr.addr =
  630. MKSCK_ADDR_UNDEF;
  631. *addrLen = sizeof(struct sockaddr_mk);
  632. err = 0;
  633. }
  634. } else if (!peer) {
  635. ((struct sockaddr_mk *)addr)->mk_family = AF_MKSCK;
  636. ((struct sockaddr_mk *)addr)->mk_addr = mksck->addr;
  637. *addrLen = sizeof(struct sockaddr_mk);
  638. err = 0;
  639. } else if (mksck->peerAddr.addr == MKSCK_ADDR_UNDEF) {
  640. err = -ENOTCONN;
  641. } else {
  642. ((struct sockaddr_mk *)addr)->mk_family = AF_MKSCK;
  643. ((struct sockaddr_mk *)addr)->mk_addr = mksck->peerAddr;
  644. *addrLen = sizeof(struct sockaddr_mk);
  645. err = 0;
  646. }
  647. release_sock(sk);
  648. return err;
  649. }
  650. /**
  651. * @brief VMX polling a received packet from VMM.
  652. *
  653. * @param filp kernel file pointer to poll for
  654. * @param sock user socket structure
  655. * @param wait kernel polling table where to poll if not null
  656. *
  657. * @return poll mask state given from socket state.
  658. */
  659. static unsigned int MksckPoll(struct file *filp,
  660. struct socket *sock,
  661. poll_table *wait)
  662. {
  663. struct sock *sk = sock->sk;
  664. unsigned int mask = 0;
  665. Mksck *mksck = NULL;
  666. uint32 read;
  667. int err;
  668. lock_sock(sk);
  669. err = MksckTryBind(sk);
  670. if (err) {
  671. release_sock(sk);
  672. return err;
  673. }
  674. mksck = sk->sk_protinfo;
  675. /*
  676. * To avoid mksck disappearing right after the release_sock the
  677. * refcount needs to be incremented. For more details read the
  678. * block comment on locking in MksckCreate.
  679. */
  680. ATOMIC_ADDV(mksck->refCount, 1);
  681. release_sock(sk);
  682. /*
  683. * Wait to make sure this is the only thread trying to access socket.
  684. */
  685. err = Mutex_Lock(&mksck->mutex, MutexModeEX);
  686. if (err < 0) {
  687. /*
  688. * We might get in this situation if we are signaled
  689. * (select() may handle this, so leave)
  690. */
  691. PRINTK("MksckPoll: try to abort\n");
  692. return mask;
  693. }
  694. /*
  695. * See if packet in ring.
  696. */
  697. read = mksck->read;
  698. if (read != mksck->write) {
  699. mask |= POLLIN | POLLRDNORM; /* readable, socket is unlocked */
  700. /*
  701. * Note that if we implement support for POLLOUT, we SHOULD
  702. * change this Mutex_Unlock by Mutex_UnlPoll, because there is
  703. * no obvious knowledge about the sleepy reason that is
  704. * intended by user
  705. */
  706. Mutex_Unlock(&mksck->mutex, MutexModeEX);
  707. } else {
  708. Mutex_UnlPoll(&mksck->mutex, MutexModeEX,
  709. MKSCK_CVAR_FILL, filp, wait);
  710. }
  711. /*
  712. * Note that locking rules differ a little inside MksckPoll, since we
  713. * are not only given a pointer to the struct socket but also a pointer
  714. * to a struct file. This means that during the whole operation of this
  715. * function and during any pending wait (registered with poll_wait()),
  716. * the file itself is reference counted up, and we should rely on that
  717. * 'upper' reference counting to prevent from tearing the Mksck down.
  718. * That holds true since we don't re-bind sockets.
  719. */
  720. Mksck_DecRefc(mksck);
  721. return mask;
  722. }
  723. /**
  724. * @brief Manage a set of Mksck_PageDesc from a message or a stored array.
  725. *
  726. * @param pd set of Mksck_PageDesc
  727. * @param pages Mksck_PageDesc pages count for this management operation
  728. * @param incr ternary used to indicate if we want to reference (+1), or
  729. * dereference (-1), or count (0) 4k pages
  730. *
  731. * @return length of bytes processed.
  732. */
  733. static size_t
  734. MksckPageDescManage(Mksck_PageDesc *pd,
  735. uint32 pages,
  736. int incr)
  737. {
  738. size_t payloadLen = 0;
  739. uint32 i;
  740. for (i = 0; i < pages && pd[i].mpn != INVALID_MPN; ++i) {
  741. uint32 j;
  742. for (j = 0; j < 1 << pd[i].order; ++j) {
  743. struct page *page;
  744. MPN currMPN = pd[i].mpn + j;
  745. /*
  746. * The monitor tried to send an invalid MPN, bad.
  747. */
  748. if (!pfn_valid(currMPN)) {
  749. pr_warn("MksckPageDescManage: Invalid MPN %x\n",
  750. currMPN);
  751. } else {
  752. page = pfn_to_page(currMPN);
  753. if (incr == 1)
  754. get_page(page);
  755. if (incr == -1)
  756. put_page(page);
  757. }
  758. payloadLen += PAGE_SIZE;
  759. }
  760. }
  761. return payloadLen;
  762. }
  763. /**
  764. * @brief Management values to be used as third parameter of MksckPageDescManage
  765. */
  766. #define MANAGE_INCREMENT 1
  767. #define MANAGE_DECREMENT -1
  768. #define MANAGE_COUNT 0
  769. /**
  770. * @brief Map a set of Mksck_PageDesc from a message or a stored array.
  771. *
  772. * @param pd set of Mksck_PageDesc
  773. * @param pages pages count for this mapping
  774. * @param iov vectored user virtual addresses of the recv commands
  775. * @param iovCount size for iov parameter
  776. * @param vma virtual memory area used for the mapping, note that
  777. * this is mandatorily required MksckPageDescMap is used
  778. * on an indirect PageDesc context (i.e whenever iov is
  779. * not computed by the kernel but by ourselves).
  780. *
  781. * Since find_vma() and vm_insert_page() are used, this function must
  782. * be called with current's mmap_sem locked, or inside an MMap operation.
  783. *
  784. * @return length of bytes mapped.
  785. */
  786. static size_t
  787. MksckPageDescMap(Mksck_PageDesc *pd,
  788. uint32 pages,
  789. struct iovec *iov,
  790. int iovCount,
  791. struct vm_area_struct *vma)
  792. {
  793. size_t payloadLen = 0;
  794. uint32 i;
  795. for (i = 0; i < pages && pd[i].mpn != INVALID_MPN; ++i) {
  796. uint32 j;
  797. for (j = 0; j < 1 << pd[i].order; ++j) {
  798. HUVA huva = 0;
  799. struct page *page;
  800. MPN currMPN = pd[i].mpn + j;
  801. while (iovCount > 0 && iov->iov_len == 0) {
  802. iovCount--;
  803. iov++;
  804. }
  805. if (iovCount == 0) {
  806. pr_warn("MksckPageDescMap: Invalid " \
  807. "iov length\n");
  808. goto map_done;
  809. }
  810. huva = (HUVA)iov->iov_base;
  811. /*
  812. * iovecs for receiving the typed component of the
  813. * message should have page aligned base and size
  814. * sufficient for page descriptor mappings.
  815. */
  816. if (huva & (PAGE_SIZE - 1) ||
  817. iov->iov_len < PAGE_SIZE) {
  818. pr_warn("MksckPageDescMap: Invalid huva %x " \
  819. "or iov_len %d\n", huva, iov->iov_len);
  820. goto map_done;
  821. }
  822. /*
  823. * Might be in a new vma...
  824. */
  825. if (vma == NULL || huva < vma->vm_start ||
  826. huva >= vma->vm_end) {
  827. vma = find_vma(current->mm, huva);
  828. /*
  829. * Couldn't find a matching vma for huva.
  830. */
  831. if (vma == NULL ||
  832. huva < vma->vm_start ||
  833. vma->vm_ops != &mksckVMOps) {
  834. pr_warn("MksckPageDescMap: " \
  835. "Invalid vma\n");
  836. goto map_done;
  837. }
  838. }
  839. /*
  840. * The monitor tried to send an invalid MPN, bad.
  841. */
  842. if (!pfn_valid(currMPN)) {
  843. pr_warn("MksckPageDescMap: Invalid MPN %x\n",
  844. currMPN);
  845. } else {
  846. int rc;
  847. page = pfn_to_page(currMPN);
  848. /*
  849. * Map into the receive window.
  850. */
  851. rc = vm_insert_page(vma, huva, page);
  852. if (rc) {
  853. pr_warn("MksckPageDescMap: Failed to " \
  854. "insert %x at %x, error %d\n",
  855. currMPN, huva, rc);
  856. goto map_done;
  857. }
  858. ASSERT(iov->iov_len >= PAGE_SIZE);
  859. iov->iov_base += PAGE_SIZE;
  860. iov->iov_len -= PAGE_SIZE;
  861. }
  862. payloadLen += PAGE_SIZE;
  863. }
  864. }
  865. map_done:
  866. return payloadLen;
  867. }
  868. /**
  869. * @brief Check if the provided MsgHdr has still room for a receive operation.
  870. *
  871. * @param msg user buffer
  872. * @return 1 if MsgHdr has IO space room in order to receive a mapping, 0 otherwise.
  873. */
  874. static int
  875. MsgHdrHasAvailableRoom(struct msghdr *msg)
  876. {
  877. struct iovec *vec = msg->msg_iov;
  878. uint32 count = msg->msg_iovlen;
  879. while (count > 0 && vec->iov_len == 0) {
  880. count--;
  881. vec++;
  882. }
  883. return (count != 0);
  884. }
  885. /**
  886. * Whenever a typed message is received from the monitor, we may choose to store
  887. * all the page descriptor content in a linked state of descriptors, through the
  888. * following information context
  889. */
  890. struct MksckPageDescInfo {
  891. struct MksckPageDescInfo *next;
  892. uint32 flags;
  893. uint32 pages;
  894. uint32 mapCounts;
  895. Mksck_PageDesc descs[0];
  896. };
  897. static void MksckPageDescSkDestruct(struct sock *sk);
  898. static int
  899. MksckPageDescMMap(struct file *file,
  900. struct socket *sock,
  901. struct vm_area_struct *vma);
  902. static int
  903. MksckPageDescIoctl(struct socket *sock,
  904. unsigned int cmd,
  905. unsigned long arg);
  906. /**
  907. * @brief Delete a page descriptor container socket
  908. *
  909. * @param sock user socket structure
  910. * @return 0 on success, -errno on failure
  911. */
  912. static int
  913. MksckPageDescRelease(struct socket *sock)
  914. {
  915. /* This is generic socket release */
  916. struct sock *sk = sock->sk;
  917. if (sk) {
  918. lock_sock(sk);
  919. sock_orphan(sk);
  920. release_sock(sk);
  921. sock_put(sk);
  922. }
  923. sock->sk = NULL;
  924. sock->state = SS_FREE;
  925. return 0;
  926. }
  927. /**
  928. * Whenever a typed message is received from the monitor, we may choose to store
  929. * all the page descriptor content for a future mapping. One shall put a context
  930. * usable by host userland, that means trough a file descriptor, and as a secure
  931. * implementation we choose to define a strict set of operations that are used
  932. * only for that purpose. This set of operation is reduced to leaving the
  933. * default "PageDesc(s) accumulating" mode (inside ioctl), mapping the context,
  934. * and generic socket destruction.
  935. */
  936. static const struct proto_ops mksckPageDescOps = {
  937. .family = AF_MKSCK,
  938. .owner = THIS_MODULE,
  939. .release = MksckPageDescRelease,
  940. .bind = sock_no_bind,
  941. .connect = sock_no_connect,
  942. .socketpair = sock_no_socketpair,
  943. .accept = sock_no_accept,
  944. .getname = sock_no_getname,
  945. .poll = sock_no_poll,
  946. .ioctl = MksckPageDescIoctl,
  947. .listen = sock_no_listen,
  948. .shutdown = sock_no_shutdown,
  949. .setsockopt = sock_no_setsockopt,
  950. .getsockopt = sock_no_getsockopt,
  951. .sendmsg = sock_no_sendmsg,
  952. .recvmsg = sock_no_recvmsg,
  953. .mmap = MksckPageDescMMap,
  954. .sendpage = sock_no_sendpage,
  955. };
  956. /**
  957. * @brief Create or accumulate to a PageDesc context, backed as a descriptor.
  958. *
  959. * @param sock user socket structure
  960. * @param msg user buffer to receive the file descriptor as ancillary data
  961. * @param pd source descriptor part of a message
  962. * @param pages pages count for this mapping
  963. *
  964. * @return error if negative, 0 otherwise
  965. *
  966. */
  967. static int
  968. MksckPageDescToFd(struct socket *sock,
  969. struct msghdr *msg,
  970. Mksck_PageDesc *pd,
  971. uint32 pages)
  972. {
  973. int retval;
  974. int newfd;
  975. struct socket *newsock;
  976. struct sock *newsk;
  977. struct sock *sk = sock->sk;
  978. struct MksckPageDescInfo **pmpdi, *mpdi;
  979. lock_sock(sk);
  980. /*
  981. * Relation between any mk socket and the PageDesc context is as follow:
  982. *
  983. * From the mk socket to the PageDesc context:
  984. * - sk->sk_user_data is a WEAK LINK, containing only a file descriptor
  985. * numerical value such that accumulating is keyed on it.
  986. *
  987. * From the PageDesc context to the mk socket:
  988. * - sk->sk_protinfo contains a MksckPageDescInfo struct.
  989. * - sk->sk_user_data is a pointer REF-COUNTED sock_hold() LINK, also it
  990. * is rarely dereferenced but usually used to check that
  991. * the right socket pair is used. Full dereferencing is
  992. * used only to break the described links.
  993. */
  994. if (sk->sk_user_data) {
  995. struct MksckPageDescInfo *mpdi2;
  996. /* Continue any previous on-going mapping, i.e accumulate */
  997. newfd = *((int *)sk->sk_user_data);
  998. /* Promote the weak link */
  999. newsock = sockfd_lookup(newfd, &retval);
  1000. if (!newsock) {
  1001. retval = -EINVAL;
  1002. goto endProcessingReleaseSock;
  1003. }
  1004. newsk = newsock->sk;
  1005. lock_sock(newsk);
  1006. sockfd_put(newsock);
  1007. if (((struct sock *)newsk->sk_user_data) != sk) {
  1008. /*
  1009. * One way of going into this situation would be for
  1010. * userland to dup the file descriptor just received,
  1011. * close the original number, and open a new mk socket
  1012. * in the very same spot. The userland code have
  1013. * a lot of way of interacting with the kernel without
  1014. * this code to be notified.
  1015. */
  1016. retval = -EINVAL;
  1017. release_sock(newsk);
  1018. goto endProcessingReleaseSock;
  1019. }
  1020. mpdi = kmalloc(sizeof(struct MksckPageDescInfo) +
  1021. pages*sizeof(Mksck_PageDesc), GFP_KERNEL);
  1022. if (!mpdi) {
  1023. retval = -ENOMEM;
  1024. release_sock(newsk);
  1025. goto endProcessingReleaseSock;
  1026. }
  1027. /*
  1028. * There is no mandatory needs for us to notify userland from
  1029. * the progress in "appending" to the file descriptor, but it
  1030. * would feel strange if the userland would have no mean to
  1031. * tell if the received message was just not thrown away. So, in
  1032. * order to be consistent one fill the ancillary message while
  1033. * "creating" and "appending to" file descriptors.
  1034. */
  1035. retval = put_cmsg(msg, SOL_DECNET, 0, sizeof(int), &newfd);
  1036. if (retval < 0)
  1037. goto endProcessingKFreeReleaseSock;
  1038. release_sock(sk);
  1039. mpdi2 = (struct MksckPageDescInfo *)newsk->sk_protinfo;
  1040. while (mpdi2->next)
  1041. mpdi2 = mpdi2->next;
  1042. pmpdi = &(mpdi2->next);
  1043. } else {
  1044. /*
  1045. * Create a new socket, new context and a new file descriptor.
  1046. */
  1047. retval = sock_create(sk->sk_family, sock->type, 0, &newsock);
  1048. if (retval < 0)
  1049. goto endProcessingReleaseSock;
  1050. newsk = newsock->sk;
  1051. lock_sock(newsk);
  1052. newsk->sk_destruct = &MksckPageDescSkDestruct;
  1053. newsk->sk_user_data = sk;
  1054. sock_hold(sk); /* Keep a reference to parent mk socket. */
  1055. newsock->ops = &mksckPageDescOps;
  1056. mpdi = kmalloc(sizeof(struct MksckPageDescInfo) +
  1057. pages*sizeof(Mksck_PageDesc), GFP_KERNEL);
  1058. if (!mpdi) {
  1059. retval = -ENOMEM;
  1060. goto endProcessingFreeNewSock;
  1061. }
  1062. sk->sk_user_data = sock_kmalloc(sk, sizeof(int), GFP_KERNEL);
  1063. if (sk->sk_user_data == NULL) {
  1064. retval = -ENOMEM;
  1065. goto endProcessingKFreeAndNewSock;
  1066. }
  1067. /*
  1068. * Mapping to a file descriptor may fail if a thread is closing
  1069. * in parallel of sock_map_fd/sock_alloc_fd, or kernel memory
  1070. * is full.
  1071. */
  1072. newfd = sock_map_fd(newsock, O_CLOEXEC);
  1073. if (newfd < 0) {
  1074. retval = newfd;
  1075. sock_kfree_s(sk, sk->sk_user_data, sizeof(int));
  1076. sk->sk_user_data = NULL;
  1077. goto endProcessingKFreeAndNewSock;
  1078. }
  1079. /*
  1080. * Notify userland from a new file descriptor, alike AF_UNIX
  1081. * ancillary.
  1082. */
  1083. retval = put_cmsg(msg, SOL_DECNET, 0, sizeof(int), &newfd);
  1084. if (retval < 0) {
  1085. sock_kfree_s(sk, sk->sk_user_data, sizeof(int));
  1086. sk->sk_user_data = NULL;
  1087. kfree(mpdi);
  1088. release_sock(newsk);
  1089. sockfd_put(newsock);
  1090. sock_release(newsock);
  1091. put_unused_fd(newfd);
  1092. goto endProcessingReleaseSock;
  1093. }
  1094. *(int *)sk->sk_user_data = newfd;
  1095. release_sock(sk);
  1096. pmpdi = (struct MksckPageDescInfo **)(&(newsk->sk_protinfo));
  1097. }
  1098. mpdi->next = NULL;
  1099. mpdi->flags = 0;
  1100. mpdi->mapCounts = 0;
  1101. mpdi->pages = pages;
  1102. memcpy(mpdi->descs, pd, pages*sizeof(Mksck_PageDesc));
  1103. *pmpdi = mpdi; /* link */
  1104. release_sock(newsk);
  1105. /*
  1106. * Increment all reference counters for the pages.
  1107. */
  1108. MksckPageDescManage(pd, pages, MANAGE_INCREMENT);
  1109. return 0;
  1110. endProcessingKFreeAndNewSock:
  1111. kfree(mpdi);
  1112. endProcessingFreeNewSock:
  1113. release_sock(newsk);
  1114. sock_release(newsock);
  1115. release_sock(sk);
  1116. return retval;
  1117. endProcessingKFreeReleaseSock:
  1118. kfree(mpdi);
  1119. release_sock(newsk);
  1120. endProcessingReleaseSock:
  1121. release_sock(sk);
  1122. return retval;
  1123. }
  1124. /**
  1125. * @brief Callback at socket destruction
  1126. *
  1127. * @param sk pointer to kernel socket structure
  1128. */
  1129. static void
  1130. MksckPageDescSkDestruct(struct sock *sk)
  1131. {
  1132. struct sock *mkSk = NULL;
  1133. struct MksckPageDescInfo *mpdi;
  1134. lock_sock(sk);
  1135. mpdi = sk->sk_protinfo;
  1136. while (mpdi) {
  1137. struct MksckPageDescInfo *next = mpdi->next;
  1138. MksckPageDescManage(mpdi->descs, mpdi->pages,
  1139. MANAGE_DECREMENT);
  1140. kfree(mpdi);
  1141. mpdi = next;
  1142. }
  1143. if (sk->sk_user_data) {
  1144. mkSk = (struct sock *)sk->sk_user_data;
  1145. sk->sk_user_data = NULL;
  1146. }
  1147. sk->sk_protinfo = NULL;
  1148. release_sock(sk);
  1149. /*
  1150. * Clean the mksck socket that we are holding.
  1151. */
  1152. if (mkSk) {
  1153. lock_sock(mkSk);
  1154. sock_kfree_s(mkSk, mkSk->sk_user_data, sizeof(int));
  1155. mkSk->sk_user_data = NULL;
  1156. release_sock(mkSk);
  1157. sock_put(mkSk); /* reverse of sock_hold() */
  1158. }
  1159. }
  1160. /**
  1161. * @brief The mmap operation of the PageDesc context file descriptor.
  1162. *
  1163. * The mmap command is used to mmap any detached (i.e. no more accumulating)
  1164. * PageDesc context, full of the content from its parent communication mk
  1165. * socket. Mapping may be done a specified number of times, so that the
  1166. * PageDesc context could become useless (as a security restriction).
  1167. *
  1168. * Also note that mapping from an offset different from zero is considered
  1169. * as a userland invalid operation.
  1170. *
  1171. * @param file user file structure
  1172. * @param sock user socket structure
  1173. * @param vma virtual memory area structure
  1174. *
  1175. * @return error code, 0 on success
  1176. */
  1177. static int
  1178. MksckPageDescMMap(struct file *file,
  1179. struct socket *sock,
  1180. struct vm_area_struct *vma)
  1181. {
  1182. struct sock *sk = sock->sk;
  1183. struct MksckPageDescInfo *mpdi;
  1184. struct iovec iov;
  1185. unsigned long vm_flags;
  1186. int freed = 0;
  1187. iov.iov_base = (void *)vma->vm_start;
  1188. iov.iov_len = vma->vm_end - vma->vm_start;
  1189. lock_sock(sk);
  1190. mpdi = sk->sk_protinfo;
  1191. /*
  1192. * vma->vm_pgoff is checked, since offsetting the map is not supported.
  1193. */
  1194. if (!mpdi || sk->sk_user_data || vma->vm_pgoff) {
  1195. release_sock(sk);
  1196. pr_info("MMAP failed for virt %lx size %lx\n",
  1197. vma->vm_start, vma->vm_end - vma->vm_start);
  1198. return -EINVAL;
  1199. }
  1200. vm_flags = mpdi->flags;
  1201. if ((vma->vm_flags & ~vm_flags) & (VM_READ|VM_WRITE)) {
  1202. release_sock(sk);
  1203. return -EACCES;
  1204. }
  1205. while (mpdi) {
  1206. struct MksckPageDescInfo *next = mpdi->next;
  1207. MksckPageDescMap(mpdi->descs, mpdi->pages, &iov, 1, vma);
  1208. if (mpdi->mapCounts && !--mpdi->mapCounts) {
  1209. MksckPageDescManage(mpdi->descs, mpdi->pages,
  1210. MANAGE_DECREMENT);
  1211. kfree(mpdi);
  1212. freed = 1;
  1213. }
  1214. mpdi = next;
  1215. }
  1216. if (freed)
  1217. sk->sk_protinfo = NULL;
  1218. vma->vm_ops = &mksckVMOps;
  1219. release_sock(sk);
  1220. return 0;
  1221. }
  1222. /**
  1223. * @brief The ioctl operation of the PageDesc context file descriptor.
  1224. *
  1225. * The ioctl MKSCK_DETACH command is used to detach the PageDesc context
  1226. * from its parent communication mk socket. Once done, the context
  1227. * is able to remap the transferred PageDesc(s) of typed messages accumulated
  1228. * into the context.
  1229. *
  1230. * @param sock user socket structure
  1231. * @param cmd select which cmd function needs to be performed
  1232. * @param arg argument for command
  1233. *
  1234. * @return error code, 0 on success
  1235. */
  1236. static int
  1237. MksckPageDescIoctl(struct socket *sock,
  1238. unsigned int cmd,
  1239. unsigned long arg)
  1240. {
  1241. struct sock *mksck = NULL;
  1242. struct sock *sk = sock->sk;
  1243. struct MksckPageDescInfo *mpdi;
  1244. unsigned long ul[2];
  1245. int retval = 0;
  1246. switch (cmd) {
  1247. /*
  1248. * ioctl MKSCK_DETACH (in and out):
  1249. * Detach, compute size and define allowed protection access rights
  1250. *
  1251. * [in]: unsigned long flags, similar to prot argument of mmap()
  1252. * unsigned long number of available further mappings
  1253. * with 0 meaning unlimited number of mappings
  1254. * [out]: unsigned long size of the available mappable area
  1255. */
  1256. case MKSCK_DETACH:
  1257. lock_sock(sk);
  1258. mpdi = sk->sk_protinfo;
  1259. /*
  1260. * Read unsigned long argument that contains the mmap
  1261. * alike flags.
  1262. */
  1263. if (copy_from_user(ul, (void *)arg, sizeof(ul))) {
  1264. retval = -EFAULT;
  1265. /*
  1266. * Check that the file descriptor has a parent
  1267. * and some context there.
  1268. */
  1269. } else if (!mpdi || !sk->sk_user_data) {
  1270. retval = -EINVAL;
  1271. } else {
  1272. /*
  1273. * Compute mapping protection bits from argument
  1274. * and size of the mapping, that is also given
  1275. * back to userland as unsigned long.
  1276. */
  1277. uint32 flags = calc_vm_prot_bits(ul[0]);
  1278. ul[0] = 0;
  1279. while (mpdi) {
  1280. struct MksckPageDescInfo *next = mpdi->next;
  1281. ul[0] += MksckPageDescManage(mpdi->descs,
  1282. mpdi->pages,
  1283. MANAGE_COUNT);
  1284. mpdi->mapCounts = ul[1];
  1285. mpdi = next;
  1286. }
  1287. if (copy_to_user((void *)arg, ul, sizeof(ul[0]))) {
  1288. retval = -EFAULT;
  1289. } else {
  1290. mpdi = sk->sk_protinfo;
  1291. mpdi->flags = flags;
  1292. mksck = (struct sock *)sk->sk_user_data;
  1293. sk->sk_user_data = NULL;
  1294. }
  1295. }
  1296. release_sock(sk);
  1297. /*
  1298. * Clean the mksck socket that we are holding.
  1299. */
  1300. sk = mksck;
  1301. if (sk) {
  1302. lock_sock(sk);
  1303. sock_kfree_s(sk, sk->sk_user_data, sizeof(int));
  1304. sk->sk_user_data = NULL;
  1305. release_sock(sk);
  1306. sock_put(sk);
  1307. }
  1308. break;
  1309. default:
  1310. retval = -EINVAL;
  1311. break;
  1312. }
  1313. return retval;
  1314. }
  1315. /**
  1316. * @brief VMX receiving a packet from VMM.
  1317. *
  1318. * @param kiocb kernel io control block (unused)
  1319. * @param sock user socket structure
  1320. * @param msg user buffer to receive the packet
  1321. * @param len size of the user buffer
  1322. * @param flags flags
  1323. *
  1324. * @return -errno on failure, else length of untyped portion + total number
  1325. * of bytes mapped for typed portion.
  1326. */
  1327. static int
  1328. MksckDgramRecvMsg(struct kiocb *kiocb,
  1329. struct socket *sock,
  1330. struct msghdr *msg,
  1331. size_t len,
  1332. int flags)
  1333. {
  1334. int err = 0;
  1335. struct sock *sk = sock->sk;
  1336. Mksck *mksck;
  1337. Mksck_Datagram *dg;
  1338. struct sockaddr_mk *fromAddr;
  1339. uint32 read;
  1340. struct iovec *iov;
  1341. size_t payloadLen, untypedLen;
  1342. uint32 iovCount;
  1343. if (flags & MSG_OOB || flags & MSG_ERRQUEUE)
  1344. return -EOPNOTSUPP;
  1345. if ((msg->msg_name != NULL) && (msg->msg_namelen < sizeof(*fromAddr)))
  1346. return -EINVAL;
  1347. lock_sock(sk);
  1348. err = MksckTryBind(sk);
  1349. if (err) {
  1350. release_sock(sk);
  1351. return err;
  1352. }
  1353. mksck = sk->sk_protinfo;
  1354. /*
  1355. * To avoid mksck disappearing right after the release_sock the
  1356. * refcount needs to be incremented. For more details read the
  1357. * block comment on locking in MksckCreate.
  1358. */
  1359. ATOMIC_ADDV(mksck->refCount, 1);
  1360. release_sock(sk);
  1361. /*
  1362. * Get pointer to next packet in ring to be dequeued.
  1363. */
  1364. while (1) {
  1365. /*
  1366. * Wait to make sure this is the only thread trying to access
  1367. * the socket.
  1368. */
  1369. err = Mutex_Lock(&mksck->mutex, MutexModeEX);
  1370. if (err < 0)
  1371. goto decRefc;
  1372. /*
  1373. * See if packet in ring.
  1374. */
  1375. read = mksck->read;
  1376. if (read != mksck->write)
  1377. break;
  1378. /*
  1379. * Nothing there, if user wants us not to block then just
  1380. * return EAGAIN.
  1381. */
  1382. if (flags & MSG_DONTWAIT) {
  1383. Mutex_Unlock(&mksck->mutex, MutexModeEX);
  1384. err = -EAGAIN;
  1385. goto decRefc;
  1386. }
  1387. /*
  1388. * Nothing there, unlock socket and wait for data.
  1389. */
  1390. mksck->foundEmpty++;
  1391. err = Mutex_UnlSleep(&mksck->mutex, MutexModeEX,
  1392. MKSCK_CVAR_FILL);
  1393. if (err < 0) {
  1394. PRINTK("MksckDgramRecvMsg: aborted\n");
  1395. goto decRefc;
  1396. }
  1397. }
  1398. /*
  1399. * Point to packet in ring.
  1400. */
  1401. dg = (void *)&mksck->buff[read];
  1402. /*
  1403. * Provide the address of the sender.
  1404. */
  1405. if (msg->msg_name != NULL) {
  1406. fromAddr = (void *)msg->msg_name;
  1407. fromAddr->mk_addr = dg->fromAddr;
  1408. fromAddr->mk_family = AF_MKSCK;
  1409. msg->msg_namelen = sizeof(*fromAddr);
  1410. } else {
  1411. msg->msg_namelen = 0;
  1412. }
  1413. /*
  1414. * Copy data from ring buffer to caller's buffer and remove packet from
  1415. * ring buffer.
  1416. */
  1417. iov = msg->msg_iov;
  1418. iovCount = msg->msg_iovlen;
  1419. untypedLen = dg->len - dg->pages * sizeof(Mksck_PageDesc) - dg->pad;
  1420. payloadLen = untypedLen;
  1421. /*
  1422. * Handle the untyped portion of the message.
  1423. */
  1424. if (untypedLen <= len) {
  1425. err = memcpy_toiovec(iov, dg->data, untypedLen);
  1426. if (err < 0) {
  1427. pr_warn("MksckDgramRecvMsg: Failed to " \
  1428. "memcpy_to_iovec untyped message component " \
  1429. "(buf len %d datagram len %d (untyped %d))\n",
  1430. len, dg->len, untypedLen);
  1431. }
  1432. } else {
  1433. err = -EINVAL;
  1434. }
  1435. /*
  1436. * Map in the typed descriptor.
  1437. */
  1438. if (err >= 0 && dg->pages > 0) {
  1439. Mksck_PageDesc *pd =
  1440. (Mksck_PageDesc *)(dg->data + untypedLen + dg->pad);
  1441. /*
  1442. * There are 3 ways of receiving typed messages from the monitor
  1443. * - The typed message is mapped directly into a VMA.
  1444. * To indicate this the userland sets msg_controllen == 0.
  1445. * - The typed message is mapped directly into a VMA and a
  1446. * file descriptor created for further mappings on the host
  1447. * (in same userland address space or an alternate userland
  1448. * address space). In this case msg_controllen should be set
  1449. * to sizeof(fd).
  1450. * - The typed message is not mapped directly into a VMA, but
  1451. * a file descriptor is created for later mapping on the
  1452. * host. In this case msg_controllen should be set to
  1453. * sizeof(fd) and the supplied iovec shall not specify a
  1454. * receive window.
  1455. */
  1456. if (msg->msg_controllen > 0)
  1457. err = MksckPageDescToFd(sock, msg, pd, dg->pages);
  1458. if ((msg->msg_controllen <= 0) ||
  1459. (err != 0) ||
  1460. (MsgHdrHasAvailableRoom(msg) != 0)) {
  1461. /*
  1462. * Lock for a change of mapping.
  1463. */
  1464. down_write(&current->mm->mmap_sem);
  1465. payloadLen += MksckPageDescMap(pd, dg->pages,
  1466. iov, iovCount, NULL);
  1467. up_write(&current->mm->mmap_sem);
  1468. }
  1469. }
  1470. /*
  1471. * Now that packet is removed, it is safe to unlock socket so another
  1472. * thread can do a recv(). We also want to wake someone waiting for
  1473. * room to insert a new packet.
  1474. */
  1475. if ((err >= 0) && Mksck_IncReadIndex(mksck, read, dg))
  1476. Mutex_UnlWake(&mksck->mutex, MutexModeEX,
  1477. MKSCK_CVAR_ROOM, true);
  1478. else
  1479. Mutex_Unlock(&mksck->mutex, MutexModeEX);
  1480. /*
  1481. * If memcpy error, return error status.
  1482. * Otherwise, return number of bytes copied.
  1483. */
  1484. if (err >= 0)
  1485. err = payloadLen;
  1486. decRefc:
  1487. Mksck_DecRefc(mksck);
  1488. return err;
  1489. }
  1490. /**
  1491. * @brief VMX sending a packet to VMM.
  1492. *
  1493. * @param kiocb kernel io control block
  1494. * @param sock user socket structure
  1495. * @param msg packet to be transmitted
  1496. * @param len length of the packet
  1497. *
  1498. * @return length of the sent msg on success, -errno on failure
  1499. */
  1500. static int
  1501. MksckDgramSendMsg(struct kiocb *kiocb,
  1502. struct socket *sock,
  1503. struct msghdr *msg,
  1504. size_t len)
  1505. {
  1506. int err = 0;
  1507. struct sock *sk = sock->sk;
  1508. Mksck *peerMksck;
  1509. Mksck_Datagram *dg;
  1510. uint32 needed;
  1511. uint32 write;
  1512. Mksck_Address fromAddr;
  1513. if (msg->msg_flags & MSG_OOB)
  1514. return -EOPNOTSUPP;
  1515. if (len > MKSCK_XFER_MAX)
  1516. return -EMSGSIZE;
  1517. /*
  1518. * In the next locked section peerMksck pointer needs to be set and
  1519. * its refcount needs to be incremented.
  1520. */
  1521. lock_sock(sk);
  1522. do {
  1523. Mksck *mksck;
  1524. Mksck_Address peerAddr = {
  1525. .addr =
  1526. (msg->msg_name ?
  1527. ((struct sockaddr_mk *)msg->msg_name)->mk_addr.addr :
  1528. MKSCK_ADDR_UNDEF)
  1529. };
  1530. err = MksckTryBind(sk);
  1531. if (err)
  1532. break;
  1533. mksck = sk->sk_protinfo;
  1534. fromAddr = mksck->addr;
  1535. /*
  1536. * If the socket is connected, use that address (no sendto for
  1537. * connected sockets). Else, use the provided address if any.
  1538. */
  1539. peerMksck = mksck->peer;
  1540. if (peerMksck) {
  1541. if (peerAddr.addr != MKSCK_ADDR_UNDEF &&
  1542. peerAddr.addr != mksck->peerAddr.addr) {
  1543. err = -EISCONN;
  1544. break;
  1545. }
  1546. /*
  1547. * To avoid mksckPeer disappearing right after the
  1548. * release_sock the refcount needs to be incremented.
  1549. * For more details read the block comment on locking
  1550. * in MksckCreate.
  1551. */
  1552. ATOMIC_ADDV(peerMksck->refCount, 1);
  1553. } else if (peerAddr.addr == MKSCK_ADDR_UNDEF) {
  1554. err = -ENOTCONN;
  1555. } else {
  1556. /*
  1557. * LockPeer also increments the refc on the peer.
  1558. */
  1559. err = LockPeer(peerAddr, &peerMksck);
  1560. }
  1561. } while (0);
  1562. release_sock(sk);
  1563. if (err)
  1564. return err;
  1565. /*
  1566. * Get pointer to sufficient empty space in ring buffer.
  1567. */
  1568. needed = MKSCK_DGSIZE(len);
  1569. while (1) {
  1570. /*
  1571. * Wait to make sure this is the only thread trying to write
  1572. * to ring.
  1573. */
  1574. err = Mutex_Lock(&peerMksck->mutex, MutexModeEX);
  1575. if (err < 0)
  1576. goto decRefc;
  1577. /*
  1578. * Check if socket can receive data.
  1579. */
  1580. if (peerMksck->shutDown & MKSCK_SHUT_RD) {
  1581. err = -ENOTCONN;
  1582. goto unlockDecRefc;
  1583. }
  1584. /*
  1585. * See if there is room for the packet.
  1586. */
  1587. write = Mksck_FindSendRoom(peerMksck, needed);
  1588. if (write != MKSCK_FINDSENDROOM_FULL)
  1589. break;
  1590. /*
  1591. * No room, unlock socket and maybe wait for room.
  1592. */
  1593. if (msg->msg_flags & MSG_DONTWAIT) {
  1594. err = -EAGAIN;
  1595. goto unlockDecRefc;
  1596. }
  1597. peerMksck->foundFull++;
  1598. err = Mutex_UnlSleep(&peerMksck->mutex, MutexModeEX,
  1599. MKSCK_CVAR_ROOM);
  1600. if (err < 0) {
  1601. PRINTK("MksckDgramSendMsg: aborted\n");
  1602. goto decRefc;
  1603. }
  1604. }
  1605. /*
  1606. * Point to room in ring and fill in message.
  1607. */
  1608. dg = (void *)&peerMksck->buff[write];
  1609. dg->fromAddr = fromAddr;
  1610. dg->len = len;
  1611. err = memcpy_fromiovec(dg->data, msg->msg_iov, len);
  1612. if (err != 0)
  1613. goto unlockDecRefc;
  1614. /*
  1615. * Increment past message.
  1616. */
  1617. Mksck_IncWriteIndex(peerMksck, write, needed);
  1618. /*
  1619. * Unlock socket and wake someone trying to receive, ie, we filled
  1620. * in a message.
  1621. */
  1622. Mutex_UnlWake(&peerMksck->mutex, MutexModeEX, MKSCK_CVAR_FILL, false);
  1623. /*
  1624. * Maybe guest is in a general 'wait for interrupt' wait or
  1625. * grinding away executing guest instructions.
  1626. *
  1627. * If it has a receive callback armed for the socket and is
  1628. * waiting a message, just wake it up. Else send an IPI to the CPU
  1629. * running the guest so it will interrupt whatever it is doing and
  1630. * read the message.
  1631. *
  1632. * Holding the mksckPage->mutex prevents mksckPage->vmHKVA from
  1633. * clearing on us.
  1634. */
  1635. if (peerMksck->rcvCBEntryMVA != 0) {
  1636. MksckPage *peerMksckPage = Mksck_ToSharedPage(peerMksck);
  1637. err = Mutex_Lock(&peerMksckPage->mutex, MutexModeSH);
  1638. if (err == 0) {
  1639. uint32 sockIdx = peerMksck->index;
  1640. struct MvpkmVM *vm =
  1641. (struct MvpkmVM *)peerMksckPage->vmHKVA;
  1642. /*
  1643. * The destruction of vm and wsp is blocked by the
  1644. * mksckPage->mutex.
  1645. */
  1646. if (vm) {
  1647. WorldSwitchPage *wsp = vm->wsp;
  1648. ASSERT(sockIdx <
  1649. 8 * sizeof(peerMksckPage->wakeVMMRecv));
  1650. ATOMIC_ORV(peerMksckPage->wakeVMMRecv,
  1651. 1U << sockIdx);
  1652. if (wsp)
  1653. Mvpkm_WakeGuest(vm, ACTION_MKSCK);
  1654. }
  1655. Mutex_Unlock(&peerMksckPage->mutex, MutexModeSH);
  1656. }
  1657. }
  1658. /*
  1659. * If all are happy tell the caller the number of transferred bytes.
  1660. */
  1661. if (!err)
  1662. err = len;
  1663. /*
  1664. * Now that we are done with target socket, allow it to be freed.
  1665. */
  1666. decRefc:
  1667. Mksck_DecRefc(peerMksck);
  1668. return err;
  1669. unlockDecRefc:
  1670. Mutex_Unlock(&peerMksck->mutex, MutexModeEX);
  1671. goto decRefc;
  1672. }
  1673. /**
  1674. * @brief Page fault handler for receive windows. Since the host process
  1675. * should not be faulting in this region and only be accessing
  1676. * memory that has been established via a typed message transfer,
  1677. * we always signal the fault back to the process.
  1678. */
  1679. static int
  1680. MksckFault(struct vm_area_struct *vma,
  1681. struct vm_fault *vmf)
  1682. {
  1683. return VM_FAULT_SIGBUS;
  1684. }
  1685. /**
  1686. * @brief Establish a region in the host process suitable for use as a
  1687. * receive window.
  1688. *
  1689. * @param file file reference (ignored).
  1690. * @param sock user socket structure.
  1691. * @param vma Linux virtual memory area defining the region.
  1692. *
  1693. * @return 0 on success, otherwise error code.
  1694. */
  1695. static int
  1696. MksckMMap(struct file *file,
  1697. struct socket *sock,
  1698. struct vm_area_struct *vma)
  1699. {
  1700. /*
  1701. * All the hard work is done in MksckDgramRecvMsg. Here we simply mark
  1702. * the vma as belonging to Mksck.
  1703. */
  1704. vma->vm_ops = &mksckVMOps;
  1705. return 0;
  1706. }
  1707. /**
  1708. * @brief This gets called after returning from the monitor.
  1709. * Since the monitor doesn't directly wake VMX threads when it sends
  1710. * something to VMX (for efficiency), this routine checks for the
  1711. * omitted wakes and does them.
  1712. * @param mksckPage some shared page that the monitor writes packets to, ie
  1713. * an host shared page
  1714. */
  1715. void
  1716. Mksck_WakeBlockedSockets(MksckPage *mksckPage)
  1717. {
  1718. Mksck *mksck;
  1719. uint32 i, wakeHostRecv;
  1720. wakeHostRecv = mksckPage->wakeHostRecv;
  1721. if (wakeHostRecv != 0) {
  1722. mksckPage->wakeHostRecv = 0;
  1723. for (i = 0; wakeHostRecv != 0; i++) {
  1724. if (wakeHostRecv & 1) {
  1725. mksck = &mksckPage->sockets[i];
  1726. Mutex_CondSig(&mksck->mutex,
  1727. MKSCK_CVAR_FILL, true);
  1728. }
  1729. wakeHostRecv >>= 1;
  1730. }
  1731. }
  1732. }
  1733. /**
  1734. * @brief allocate and initialize a shared page.
  1735. * @return pointer to shared page.<br>
  1736. * NULL on error
  1737. */
  1738. MksckPage *
  1739. MksckPageAlloc(void)
  1740. {
  1741. uint32 jj;
  1742. /*
  1743. * Ask for pages in the virtual kernel space. There is no
  1744. * requirement to be physically contiguous.
  1745. */
  1746. MksckPage *mksckPage = vmalloc(MKSCKPAGE_SIZE);
  1747. if (mksckPage) {
  1748. /*
  1749. * Initialize its contents. Start refCount at 1 and decrement
  1750. * it when the worldswitch or VM page gets freed.
  1751. */
  1752. memset(mksckPage, 0, MKSCKPAGE_SIZE);
  1753. ATOMIC_SETV(mksckPage->refCount, 1);
  1754. mksckPage->portStore = MKSCK_PORT_HIGH;
  1755. Mutex_Init(&mksckPage->mutex);
  1756. for (jj = 0; jj < MKSCK_SOCKETS_PER_PAGE; jj++)
  1757. Mutex_Init(&mksckPage->sockets[jj].mutex);
  1758. }
  1759. return mksckPage;
  1760. }
  1761. /**
  1762. * @brief Release the allocated pages.
  1763. * @param mksckPage the address of the mksckPage to be released
  1764. */
  1765. static void
  1766. MksckPageRelease(MksckPage *mksckPage)
  1767. {
  1768. int ii;
  1769. for (ii = 0; ii < MKSCK_SOCKETS_PER_PAGE; ii++)
  1770. Mutex_Destroy(&mksckPage->sockets[ii].mutex);
  1771. Mutex_Destroy(&mksckPage->mutex);
  1772. vfree(mksckPage);
  1773. }
  1774. /**
  1775. * @brief Using the tgid locate the vmid of this process.
  1776. * Assumed that mksckPageListLock is held
  1777. * @return the vmId if page is already allocated,
  1778. * the first vacant vmid if not yet allocated.<br>
  1779. * MKSCK_PORT_UNDEF if no slot is vacant
  1780. */
  1781. static inline Mksck_VmId
  1782. GetHostVmId(void)
  1783. {
  1784. uint32 jj;
  1785. Mksck_VmId vmId, vmIdFirstVacant = MKSCK_VMID_UNDEF;
  1786. MksckPage *mksckPage;
  1787. uint32 tgid = task_tgid_vnr(current);
  1788. /*
  1789. * Assign an unique vmId to the shared page. Start the search from
  1790. * the vmId that is the result of hashing tgid to 15 bits. As a
  1791. * used page with a given vmId can occupy only a given slot in the
  1792. * mksckPages array, it is enough to search through the
  1793. * MKSCK_MAX_SHARES slots for a vacancy.
  1794. */
  1795. for (jj = 0, vmId = MKSCK_TGID2VMID(tgid);
  1796. jj < MKSCK_MAX_SHARES;
  1797. jj++, vmId++) {
  1798. if (vmId > MKSCK_VMID_HIGH)
  1799. vmId = 0;
  1800. mksckPage = mksckPages[MKSCK_VMID2IDX(vmId)];
  1801. if (mksckPage) {
  1802. if (mksckPage->tgid == tgid &&
  1803. !mksckPage->isGuest)
  1804. return mksckPage->vmId;
  1805. } else if (vmIdFirstVacant == MKSCK_VMID_UNDEF) {
  1806. vmIdFirstVacant = vmId;
  1807. }
  1808. }
  1809. return vmIdFirstVacant;
  1810. }
  1811. /**
  1812. * @brief Locate the first empty slot
  1813. * Assumed that mksckPageListLock is held
  1814. * @return the first vacant vmid.<br>
  1815. * MKSCK_PORT_UNDEF if no slot is vacant
  1816. */
  1817. static inline Mksck_VmId
  1818. GetNewGuestVmId(void)
  1819. {
  1820. Mksck_VmId vmId;
  1821. for (vmId = 0; vmId < MKSCK_MAX_SHARES; vmId++) {
  1822. if (!mksckPages[MKSCK_VMID2IDX(vmId)])
  1823. return vmId;
  1824. }
  1825. return MKSCK_VMID_UNDEF;
  1826. }
  1827. /**
  1828. * @brief Find shared page for a given idx. The page referred to be the
  1829. * idx should exist and be locked by the caller.
  1830. * @param idx index of the page in the array
  1831. * @return pointer to shared page
  1832. */
  1833. MksckPage *
  1834. MksckPage_GetFromIdx(uint32 idx)
  1835. {
  1836. MksckPage *mksckPage = mksckPages[idx];
  1837. ASSERT(mksckPage);
  1838. ASSERT(idx < MKSCK_MAX_SHARES);
  1839. ASSERT(ATOMIC_GETO(mksckPage->refCount));
  1840. return mksckPage;
  1841. }
  1842. /**
  1843. * @brief find shared page for a given vmId
  1844. * The vmid should exist and be locked by the caller.
  1845. * @param vmId vmId to look for, either an host vmId or a guest vmId
  1846. * @return pointer to shared page
  1847. */
  1848. MksckPage *
  1849. MksckPage_GetFromVmId(Mksck_VmId vmId)
  1850. {
  1851. MksckPage *mksckPage = mksckPages[MKSCK_VMID2IDX(vmId)];
  1852. ASSERT(mksckPage);
  1853. ASSERT(mksckPage->vmId == vmId);
  1854. ASSERT(ATOMIC_GETO(mksckPage->refCount));
  1855. return mksckPage;
  1856. }
  1857. /**
  1858. * @brief find shared page for a given vmId
  1859. * @param vmId vmId to look for, either an host vmId or a guest vmId
  1860. * @return NULL: no such shared page exists<br>
  1861. * else: pointer to shared page.
  1862. * Call Mksck_DecRefc() when done with pointer
  1863. */
  1864. MksckPage *
  1865. MksckPage_GetFromVmIdIncRefc(Mksck_VmId vmId)
  1866. {
  1867. MksckPage *mksckPage;
  1868. spin_lock(&mksckPageListLock);
  1869. mksckPage = mksckPages[MKSCK_VMID2IDX(vmId)];
  1870. if (!mksckPage || (mksckPage->vmId != vmId)) {
  1871. pr_info("MksckPage_GetFromVmIdIncRefc: vmId %04X not found\n",
  1872. vmId);
  1873. mksckPage = NULL;
  1874. } else {
  1875. ATOMIC_ADDV(mksckPage->refCount, 1);
  1876. }
  1877. spin_unlock(&mksckPageListLock);
  1878. return mksckPage;
  1879. }
  1880. /**
  1881. * @brief find or allocate shared page using tgid
  1882. * @return NULL: no such shared page exists<br>
  1883. * else: pointer to shared page.
  1884. * Call Mksck_DecRefc() when done with pointer
  1885. */
  1886. MksckPage *
  1887. MksckPage_GetFromTgidIncRefc(void)
  1888. {
  1889. MksckPage *mksckPage;
  1890. Mksck_VmId vmId;
  1891. while (1) {
  1892. spin_lock(&mksckPageListLock);
  1893. vmId = GetHostVmId();
  1894. if (vmId == MKSCK_VMID_UNDEF) {
  1895. /*
  1896. * No vmId has been allocated yet and there is no
  1897. * free slot.
  1898. */
  1899. spin_unlock(&mksckPageListLock);
  1900. return NULL;
  1901. }
  1902. mksckPage = mksckPages[MKSCK_VMID2IDX(vmId)];
  1903. if (mksckPage != NULL) {
  1904. /*
  1905. * There is a vmid already allocated, increment the
  1906. * ref count on it.
  1907. */
  1908. ATOMIC_ADDV(mksckPage->refCount, 1);
  1909. spin_unlock(&mksckPageListLock);
  1910. return mksckPage;
  1911. }
  1912. /*
  1913. * Have to release spinlock to allocate a new page.
  1914. */
  1915. spin_unlock(&mksckPageListLock);
  1916. mksckPage = MksckPageAlloc();
  1917. if (mksckPage == NULL)
  1918. return NULL;
  1919. /*
  1920. * Re-lock and make sure no one else allocated while unlocked.
  1921. * If someone else did allocate, free ours off and use theirs.
  1922. */
  1923. spin_lock(&mksckPageListLock);
  1924. vmId = GetHostVmId();
  1925. if ((vmId != MKSCK_VMID_UNDEF) &&
  1926. (mksckPages[MKSCK_VMID2IDX(vmId)] == NULL))
  1927. break;
  1928. spin_unlock(&mksckPageListLock);
  1929. MksckPageRelease(mksckPage);
  1930. }
  1931. /*
  1932. * This is a successful new allocation. insert it into the table
  1933. * and initialize the fields.
  1934. */
  1935. mksckPages[MKSCK_VMID2IDX(vmId)] = mksckPage;
  1936. mksckPage->vmId = vmId;
  1937. mksckPage->isGuest = false;
  1938. mksckPage->vmHKVA = 0;
  1939. mksckPage->tgid = task_tgid_vnr(current);
  1940. pr_warn("New host mksck page is allocated: idx %x, vmId %x, tgid %d\n",
  1941. MKSCK_VMID2IDX(vmId), vmId, mksckPage->tgid);
  1942. spin_unlock(&mksckPageListLock);
  1943. return mksckPage;
  1944. }
  1945. /**
  1946. * @brief Initialize the VMX provided wsp. Allocate communication page.
  1947. * @param vm which virtual machine we're running
  1948. * @return 0 if all OK, error value otherwise
  1949. */
  1950. int
  1951. Mksck_WspInitialize(struct MvpkmVM *vm)
  1952. {
  1953. WorldSwitchPage *wsp = vm->wsp;
  1954. int err;
  1955. Mksck_VmId vmId;
  1956. MksckPage *mksckPage;
  1957. if (wsp->guestId)
  1958. return -EBUSY;
  1959. mksckPage = MksckPageAlloc();
  1960. if (!mksckPage)
  1961. return -ENOMEM;
  1962. spin_lock(&mksckPageListLock);
  1963. vmId = GetNewGuestVmId();
  1964. if (vmId == MKSCK_VMID_UNDEF) {
  1965. err = -EMFILE;
  1966. MksckPageRelease(mksckPage);
  1967. pr_err("Mksck_WspInitialize: Cannot allocate vmId\n");
  1968. } else {
  1969. /*
  1970. * Now that the mksckPage is all initialized, let others see it.
  1971. */
  1972. mksckPages[MKSCK_VMID2IDX(vmId)] = mksckPage;
  1973. mksckPage->vmId = vmId;
  1974. mksckPage->isGuest = true;
  1975. mksckPage->vmHKVA = (HKVA)vm;
  1976. /* mksckPage->tgid is undefined when isGuest is true */
  1977. wsp->guestId = vmId;
  1978. pr_warn("New guest mksck page is allocated: idx %x, vmId %x\n",
  1979. MKSCK_VMID2IDX(vmId), vmId);
  1980. err = 0;
  1981. /*
  1982. * All stable, ie, mksckPages[] written, ok to unlock now.
  1983. */
  1984. spin_unlock(&mksckPageListLock);
  1985. }
  1986. return err;
  1987. }
  1988. /**
  1989. * @brief Release the wsp. Clean up after the monitor. Free the
  1990. * associated communication page.
  1991. * @param wsp which worldswitch page (VCPU)
  1992. */
  1993. void
  1994. Mksck_WspRelease(WorldSwitchPage *wsp)
  1995. {
  1996. int ii;
  1997. int err;
  1998. MksckPage *mksckPage = MksckPage_GetFromVmId(wsp->guestId);
  1999. /*
  2000. * The worldswitch page for a particular VCPU is about to be freed
  2001. * off, so we know the monitor will never execute again. But the
  2002. * monitor most likely left some sockets open. Those may have
  2003. * outbound connections to host sockets that we must close.
  2004. *
  2005. * Loop through all possibly open sockets.
  2006. */
  2007. uint32 isOpened = wsp->isOpened;
  2008. Mksck *mksck = mksckPage->sockets;
  2009. while (isOpened) {
  2010. if (isOpened & 1) {
  2011. ASSERT(ATOMIC_GETO(mksck->refCount) != 0);
  2012. /*
  2013. * The socket may be connected to a peer (host) socket,
  2014. * so we have to decrement that target socket's
  2015. * reference count.
  2016. * Unfortunately, Mksck_DisconnectPeer(mksck) cannot
  2017. * be called as mksck->peer is an mva not an hkva.
  2018. * Translate the address first.
  2019. */
  2020. if (mksck->peer) {
  2021. MksckPage *mksckPagePeer =
  2022. MksckPage_GetFromVmId(mksck->peerAddr.vmId);
  2023. ASSERT(mksckPagePeer);
  2024. mksck->peer =
  2025. MksckPage_GetFromAddr(mksckPagePeer,
  2026. mksck->peerAddr);
  2027. ASSERT(mksck->peer);
  2028. /* mksck->peer is now a hkva */
  2029. }
  2030. Mksck_CloseCommon(mksck);
  2031. }
  2032. isOpened >>= 1;
  2033. mksck++;
  2034. }
  2035. /*
  2036. * A host socket may be in the process of sending to the guest. It
  2037. * will attempt to wake up the guest using mksckPage->vmHKVA and
  2038. * mksckPage->vmHKVA->wsp. To assure that the vm and wsp structures
  2039. * are not disappearing from under the sending thread we lock the
  2040. * page here.
  2041. */
  2042. err = Mutex_Lock(&mksckPage->mutex, MutexModeEX);
  2043. ASSERT(!err);
  2044. mksckPage->vmHKVA = 0;
  2045. Mutex_Unlock(&mksckPage->mutex, MutexModeEX);
  2046. /*
  2047. * Decrement refcount set by MksckPageAlloc() call in
  2048. * Mksck_WspInitialize().
  2049. */
  2050. MksckPage_DecRefc(mksckPage);
  2051. /*
  2052. * Decrement refcount set by VMM:Mksck_Init() referring to the local
  2053. * variable guestMksckPage.
  2054. */
  2055. if (wsp->guestPageMapped) {
  2056. wsp->guestPageMapped = false;
  2057. MksckPage_DecRefc(mksckPage);
  2058. }
  2059. /*
  2060. * Another task is to decrement the reference count on the mksck
  2061. * pages the monitor accessed. Those pages are listed in the
  2062. * wsp->isPageMapped list. They were locked by the monitor
  2063. * calling WSCALL_GET_PAGE_FROM_VMID
  2064. */
  2065. for (ii = 0; ii < MKSCK_MAX_SHARES; ii++) {
  2066. if (wsp->isPageMapped[ii]) {
  2067. MksckPage *mksckPageOther = MksckPage_GetFromIdx(ii);
  2068. wsp->isPageMapped[ii] = false;
  2069. MksckPage_DecRefc(mksckPageOther);
  2070. }
  2071. }
  2072. }
  2073. /**
  2074. * @brief disconnect from peer by decrementing
  2075. * peer socket's reference count and clearing the pointer.
  2076. * @param mksck local socket to check for connection
  2077. */
  2078. void
  2079. Mksck_DisconnectPeer(Mksck *mksck)
  2080. {
  2081. Mksck *peerMksck = mksck->peer;
  2082. if (peerMksck != NULL) {
  2083. mksck->peer = NULL;
  2084. mksck->peerAddr.addr = MKSCK_ADDR_UNDEF;
  2085. Mksck_DecRefc(peerMksck);
  2086. }
  2087. }
  2088. /**
  2089. * @brief decrement shared page reference count, free page if it goes zero.
  2090. * also do a dmb first to make sure all activity on the struct is
  2091. * finished before decrementing the ref count.
  2092. * @param mksckPage shared page
  2093. */
  2094. void
  2095. MksckPage_DecRefc(struct MksckPage *mksckPage)
  2096. {
  2097. uint32 oldRefc;
  2098. DMB();
  2099. do {
  2100. while ((oldRefc = ATOMIC_GETO(mksckPage->refCount)) == 1) {
  2101. /*
  2102. * Find corresponding entry in list of known shared
  2103. * pages and clear it so we can't open any new sockets
  2104. * on this shared page, thus preventing its refCount
  2105. * from being incremented.
  2106. */
  2107. spin_lock(&mksckPageListLock);
  2108. if (ATOMIC_SETIF(mksckPage->refCount, 0, 1)) {
  2109. uint32 ii = MKSCK_VMID2IDX(mksckPage->vmId);
  2110. ASSERT(ii < MKSCK_MAX_SHARES);
  2111. ASSERT(mksckPages[ii] == mksckPage);
  2112. mksckPages[ii] = NULL;
  2113. spin_unlock(&mksckPageListLock);
  2114. pr_warn("%s mksck page is released: idx %x, " \
  2115. "vmId %x, tgid %d\n",
  2116. mksckPage->isGuest ? "Guest" : "Host",
  2117. ii, mksckPage->vmId, mksckPage->tgid);
  2118. MksckPageRelease(mksckPage);
  2119. return;
  2120. }
  2121. spin_unlock(&mksckPageListLock);
  2122. }
  2123. ASSERT(oldRefc != 0);
  2124. } while (!ATOMIC_SETIF(mksckPage->refCount, oldRefc - 1, oldRefc));
  2125. }
  2126. /**
  2127. * @brief Lookup if the provided mpn belongs to one of the Mksck pages.
  2128. * Map if found.
  2129. * @return 0 if all OK, error value otherwise
  2130. */
  2131. int
  2132. MksckPage_LookupAndInsertPage(struct vm_area_struct *vma,
  2133. unsigned long address,
  2134. MPN mpn)
  2135. {
  2136. int ii, jj;
  2137. struct MksckPage **mksckPagePtr = mksckPages;
  2138. spin_lock(&mksckPageListLock);
  2139. for (jj = MKSCK_MAX_SHARES; jj--; mksckPagePtr++) {
  2140. if (*mksckPagePtr) {
  2141. for (ii = 0; ii < MKSCKPAGE_TOTAL; ii++) {
  2142. HKVA tmp = ((HKVA)*mksckPagePtr) +
  2143. ii * PAGE_SIZE;
  2144. if (vmalloc_to_pfn((void *)tmp) == mpn &&
  2145. vm_insert_page(vma, address,
  2146. pfn_to_page(mpn)) == 0) {
  2147. spin_unlock(&mksckPageListLock);
  2148. return 0;
  2149. }
  2150. }
  2151. }
  2152. }
  2153. spin_unlock(&mksckPageListLock);
  2154. return -1;
  2155. }
  2156. /**
  2157. * @brief Print information on the allocated shared pages
  2158. *
  2159. * This function reports (among many other things) on the use of locks
  2160. * on the mksck page (page lock and individual socket locks). To avoid
  2161. * the Hiesenberg effect it avoids using locks unless there is a
  2162. * danger of dereferencing freed memory. In particular, holding
  2163. * mksckPageListLock ensures that the mksck page is not freed while it
  2164. * is read. But under very rare conditions this function may report
  2165. * inconsistent or garbage data.
  2166. */
  2167. static int
  2168. MksckPageInfoShow(struct seq_file *m,
  2169. void *private)
  2170. {
  2171. int ii, jj;
  2172. uint32 isPageMapped = 0;
  2173. int err;
  2174. struct MvpkmVM *vm;
  2175. /*
  2176. * Lock is needed to atomize the test and dereference of mksckPages[ii].
  2177. */
  2178. spin_lock(&mksckPageListLock);
  2179. for (ii = 0; ii < MKSCK_MAX_SHARES; ii++) {
  2180. struct MksckPage *mksckPage = mksckPages[ii];
  2181. if (mksckPage != NULL && mksckPage->isGuest) {
  2182. /*
  2183. * After the refcount is incremented mksckPage will
  2184. * not be freed and it can continued to be dereferenced
  2185. * after the unlock of mksckPageListLock.
  2186. */
  2187. ATOMIC_ADDV(mksckPage->refCount, 1);
  2188. spin_unlock(&mksckPageListLock);
  2189. /*
  2190. * Need the page lock to dereference mksckPage->vmHKVA.
  2191. */
  2192. err = Mutex_Lock(&mksckPage->mutex, MutexModeEX);
  2193. vm = (struct MvpkmVM *)mksckPage->vmHKVA;
  2194. if (err == 0 && vm && vm->wsp) {
  2195. for (jj = 0; jj < MKSCK_MAX_SHARES; jj++) {
  2196. if (vm->wsp->isPageMapped[jj])
  2197. isPageMapped |= 1<<jj;
  2198. }
  2199. }
  2200. Mutex_Unlock(&mksckPage->mutex, MutexModeEX);
  2201. /*
  2202. * Decrement the page refcount and relock the
  2203. * mksckPageListLock for the next for loop.
  2204. */
  2205. MksckPage_DecRefc(mksckPage);
  2206. spin_lock(&mksckPageListLock);
  2207. break;
  2208. }
  2209. }
  2210. /*
  2211. * mksckPageListLock is still locked, mksckPages[ii] can be
  2212. * dereferenced
  2213. */
  2214. for (ii = 0; ii < MKSCK_MAX_SHARES; ii++) {
  2215. struct MksckPage *mksckPage = mksckPages[ii];
  2216. if (mksckPage != NULL) {
  2217. uint32 lState = ATOMIC_GETO(mksckPage->mutex.state);
  2218. uint32 isOpened = 0; /* Guest has an implicit ref. */
  2219. seq_printf(m, "MksckPage[%02d]: { vmId = %4x(%c), " \
  2220. "refC = %2d%s", ii, mksckPage->vmId,
  2221. mksckPage->isGuest ? 'G' : 'H',
  2222. ATOMIC_GETO(mksckPage->refCount),
  2223. (isPageMapped&(1<<ii) ? "*" : ""));
  2224. if (lState)
  2225. seq_printf(m, ", lock=%x locked by line %d, " \
  2226. "unlocked by %d",
  2227. lState, mksckPage->mutex.line,
  2228. mksckPage->mutex.lineUnl);
  2229. if (!mksckPage->isGuest) {
  2230. struct task_struct *target;
  2231. seq_printf(m, ", tgid = %d", mksckPage->tgid);
  2232. rcu_read_lock();
  2233. target = pid_task(find_vpid(mksckPage->tgid),
  2234. PIDTYPE_PID);
  2235. seq_printf(m, "(%s)",
  2236. (target ? target->comm :
  2237. "no such process"));
  2238. rcu_read_unlock();
  2239. } else {
  2240. ATOMIC_ADDV(mksckPage->refCount, 1);
  2241. spin_unlock(&mksckPageListLock);
  2242. err = Mutex_Lock(&mksckPage->mutex,
  2243. MutexModeEX);
  2244. vm = (struct MvpkmVM *)mksckPage->vmHKVA;
  2245. if (err == 0 && vm && vm->wsp)
  2246. isOpened = vm->wsp->isOpened;
  2247. Mutex_Unlock(&mksckPage->mutex, MutexModeEX);
  2248. MksckPage_DecRefc(mksckPage);
  2249. spin_lock(&mksckPageListLock);
  2250. /*
  2251. * As the mksckPageListLock was unlocked,
  2252. * nothing prevented the MksckPage_DecRefc from
  2253. * actually freeing the page. Lets verify that
  2254. * the page is still there.
  2255. */
  2256. if (mksckPage != mksckPages[ii]) {
  2257. seq_puts(m, " released }\n");
  2258. continue;
  2259. }
  2260. }
  2261. seq_puts(m, ", sockets[] = {");
  2262. for (jj = 0;
  2263. jj < mksckPage->numAllocSocks;
  2264. jj++, isOpened >>= 1) {
  2265. Mksck *mksck = mksckPage->sockets + jj;
  2266. if (ATOMIC_GETO(mksck->refCount)) {
  2267. uint32 blocked;
  2268. char *shutdRO =
  2269. (mksck->shutDown & MKSCK_SHUT_RD ?
  2270. " SHUTD_RD" : "");
  2271. char *shutdRW =
  2272. (mksck->shutDown & MKSCK_SHUT_WR ?
  2273. " SHUTD_WR" : "");
  2274. lState =
  2275. ATOMIC_GETO(mksck->mutex.state);
  2276. seq_printf(m, "\n " \
  2277. "{ addr = %8x, " \
  2278. "refC = %2d%s%s%s",
  2279. mksck->addr.addr,
  2280. ATOMIC_GETO(mksck->refCount),
  2281. (isOpened & 1 ? "*" : ""),
  2282. shutdRO,
  2283. shutdRW);
  2284. if (mksck->peer)
  2285. seq_printf(m,
  2286. ", peerAddr = %8x",
  2287. mksck->peerAddr.addr);
  2288. if (lState)
  2289. seq_printf(m,
  2290. ", lock=%x locked " \
  2291. "by line %d, " \
  2292. "unlocked by %d",
  2293. lState,
  2294. mksck->mutex.line,
  2295. mksck->mutex.lineUnl);
  2296. blocked =
  2297. ATOMIC_GETO(mksck->mutex.blocked);
  2298. if (blocked)
  2299. seq_printf(m, ", blocked=%d",
  2300. blocked);
  2301. seq_puts(m, " }");
  2302. }
  2303. }
  2304. seq_puts(m, " } }\n");
  2305. }
  2306. }
  2307. spin_unlock(&mksckPageListLock);
  2308. return 0;
  2309. }
  2310. static int
  2311. MksckPageInfoOpen(struct inode *inode,
  2312. struct file *file)
  2313. {
  2314. return single_open(file, MksckPageInfoShow, inode->i_private);
  2315. }
  2316. static const struct file_operations mksckPageInfoFops = {
  2317. .open = MksckPageInfoOpen,
  2318. .read = seq_read,
  2319. .llseek = seq_lseek,
  2320. .release = single_release,
  2321. };
  2322. void
  2323. MksckPageInfo_Init(struct dentry *parent)
  2324. {
  2325. debugfs_create_file("mksckPage", S_IROTH, parent,
  2326. NULL, &mksckPageInfoFops);
  2327. }