123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676 |
- /*
- * Linux 2.6.32 and later Kernel module for VMware MVP Hypervisor Support
- *
- * Copyright (C) 2010-2013 VMware, Inc. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; see the file COPYING. If not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- */
- #line 5
- /**
- * @file
- *
- * @brief The monitor/kernel socket interface kernel extension.
- */
- #define __KERNEL_SYSCALLS__
- #include <linux/version.h>
- #include <linux/kernel.h>
- #include <linux/module.h>
- #include <linux/init.h>
- #include <linux/slab.h>
- #include <linux/fs.h>
- #include <linux/errno.h>
- #include <linux/types.h>
- #include <linux/proc_fs.h>
- #include <linux/fcntl.h>
- #include <linux/syscalls.h>
- #include <linux/kmod.h>
- #include <linux/socket.h>
- #include <linux/net.h>
- #include <linux/skbuff.h>
- #include <linux/miscdevice.h>
- #include <linux/poll.h>
- #include <linux/rcupdate.h>
- #include <linux/smp.h>
- #include <linux/spinlock.h>
- #include <linux/mm.h>
- #include <linux/mman.h>
- #include <linux/file.h>
- #include <linux/vmalloc.h>
- #include <linux/debugfs.h>
- #include <linux/seq_file.h>
- #include <net/sock.h>
- #include <asm/memory.h>
- #include <asm/system.h>
- #include <linux/uaccess.h>
- #include "mvp.h"
- #include "actions.h"
- #include "mvpkm_kernel.h"
- #include "mksck_kernel.h"
- #include "mksck_sockaddr.h"
- #include "mutex_kernel.h"
- void NORETURN
- FatalError(char const *file,
- int line,
- FECode feCode,
- int bugno,
- char const *fmt,
- ...)
- {
- static DEFINE_MUTEX(fatalErrorMutex);
- /*
- * Lock around printing the error details so that messages from multiple
- * threads are not interleaved.
- */
- mutex_lock(&fatalErrorMutex);
- FATALERROR_COMMON(printk, vprintk, file, line, feCode, bugno, fmt);
- dump_stack();
- /* done printing */
- mutex_unlock(&fatalErrorMutex);
- /*
- * do_exit below exits the current thread but does not crash the kernel.
- * Hence, the stack dump will actually be readable from other user
- * threads.
- */
- do_exit(1);
- }
- /*
- * The project uses a new address family: AF_MKSCK. Optimally, this address
- * family would be registered/assigned its own constant.
- *
- * Instead, we ASSUME that DECnet is not needed, so we re-use its constant.
- */
- static struct proto mksckProto = {
- .name = "AF_MKSCK",
- .owner = THIS_MODULE,
- .obj_size = sizeof(struct sock),
- };
- static int
- MksckCreate(struct net *net,
- struct socket *sock,
- int protocol,
- int kern);
- static struct net_proto_family mksckFamilyOps = {
- .family = AF_MKSCK,
- .owner = THIS_MODULE,
- .create = MksckCreate,
- };
- static int MksckFault(struct vm_area_struct *vma, struct vm_fault *vmf);
- /**
- * @brief Linux vma operations for receive windows established via Mksck mmap.
- */
- static struct vm_operations_struct mksckVMOps = {
- .fault = MksckFault
- };
- /*
- * List of hosts and guests we know about.
- */
- static spinlock_t mksckPageListLock;
- static MksckPage *mksckPages[MKSCK_MAX_SHARES];
- /*
- * The following functions form the AF_MKSCK DGRAM operations.
- */
- static int MksckRelease(struct socket *sock);
- static int MksckBacklogRcv(struct sock *sk, struct sk_buff *skb);
- static void MksckSkDestruct(struct sock *sk);
- static int
- MksckBind(struct socket *sock,
- struct sockaddr *addr,
- int addrLen);
- static int MksckBindGeneric(struct sock *sk, Mksck_Address addr);
- static int
- MksckDgramRecvMsg(struct kiocb *kiocb,
- struct socket *sock,
- struct msghdr *msg,
- size_t len,
- int flags);
- static int
- MksckDgramSendMsg(struct kiocb *kiocb,
- struct socket *sock,
- struct msghdr *msg,
- size_t len);
- static int
- MksckGetName(struct socket *sock,
- struct sockaddr *addr,
- int *addrLen,
- int peer);
- static unsigned int
- MksckPoll(struct file *filp,
- struct socket *sock,
- poll_table *wait);
- static int
- MksckDgramConnect(struct socket *sock,
- struct sockaddr *addr,
- int addrLen,
- int flags);
- static int
- MksckMMap(struct file *file,
- struct socket *sock,
- struct vm_area_struct *vma);
- static void MksckPageRelease(struct MksckPage *mksckPage);
- static const struct proto_ops mksckDgramOps = {
- .family = AF_MKSCK,
- .owner = THIS_MODULE,
- .release = MksckRelease,
- .bind = MksckBind,
- .connect = MksckDgramConnect,
- .socketpair = sock_no_socketpair,
- .accept = sock_no_accept,
- .getname = MksckGetName,
- .poll = MksckPoll,
- .ioctl = sock_no_ioctl,
- .listen = sock_no_listen,
- .shutdown = sock_no_shutdown, /* MksckShutdown, */
- .setsockopt = sock_no_setsockopt,
- .getsockopt = sock_no_getsockopt,
- .sendmsg = MksckDgramSendMsg,
- .recvmsg = MksckDgramRecvMsg,
- .mmap = MksckMMap,
- .sendpage = sock_no_sendpage,
- };
- /**
- * @brief Initialize the MKSCK protocol
- *
- * @return 0 on success, -errno on failure
- */
- int
- Mksck_Init(void)
- {
- int err;
- spin_lock_init(&mksckPageListLock);
- err = proto_register(&mksckProto, 1);
- if (err != 0) {
- pr_err("Mksck_Init: Cannot register AF_MKSCK protocol" \
- ", errno = %d.\n", err);
- return err;
- }
- err = sock_register(&mksckFamilyOps);
- if (err < 0) {
- pr_err("Mksck_Init: Could not register address family" \
- " AF_MKSCK (errno = %d).\n", err);
- return err;
- }
- return 0;
- }
- /**
- * @brief De-register the MKSCK protocol
- */
- void
- Mksck_Exit(void)
- {
- sock_unregister(mksckFamilyOps.family);
- proto_unregister(&mksckProto);
- }
- /**
- * @brief Create a new MKSCK socket
- *
- * @param net network namespace (2.6.24 or above)
- * @param sock user socket structure
- * @param protocol protocol to be used
- * @param kern called from kernel mode
- *
- * @return 0 on success, -errno on failure
- */
- static int
- MksckCreate(struct net *net,
- struct socket *sock,
- int protocol,
- int kern)
- {
- struct sock *sk;
- uid_t currentUid = current_euid();
- if (!(currentUid == 0 ||
- currentUid == Mvpkm_vmwareUid)) {
- pr_warn("MksckCreate: rejected from process %s " \
- "tgid=%d, pid=%d euid:%d.\n",
- current->comm,
- task_tgid_vnr(current),
- task_pid_vnr(current),
- currentUid);
- return -EPERM;
- }
- if (!sock)
- return -EINVAL;
- if (protocol)
- return -EPROTONOSUPPORT;
- switch (sock->type) {
- case SOCK_DGRAM:
- sock->ops = &mksckDgramOps;
- break;
- default:
- return -ESOCKTNOSUPPORT;
- }
- sock->state = SS_UNCONNECTED;
- sk = sk_alloc(net, mksckFamilyOps.family, GFP_KERNEL, &mksckProto);
- if (!sk)
- return -ENOMEM;
- sock_init_data(sock, sk);
- sk->sk_type = SOCK_DGRAM;
- sk->sk_destruct = MksckSkDestruct;
- sk->sk_backlog_rcv = MksckBacklogRcv;
- /*
- * On socket lock...
- *
- * A bound socket will have an associated private area, the Mksck
- * structure part of MksckPage. That area is pointed to by
- * sk->sk_protinfo. In addition, a connected socket will have the
- * peer field in its associated area set to point to the associated
- * private area of the peer socket. A mechanism is needed to ensure
- * that these private areas area not freed while they are being
- * accessed within the scope of a function. A simple lock would not
- * suffice as the interface functions (like MksckDgramRecvMsg())
- * may block. Hence a reference count mechanism is employed. When
- * the mentioned references (sk->sk_protinfo and mksck->peer) to
- * the respective private areas are set a refcount is incremented,
- * and decremented when the references are deleted.
- *
- * The refcounts of areas pointed to by sk->sk_protinfo and
- * mksck->peer will be decremented under the lock of the socket.
- * Hence these private areas cannot disappear as long as the socket
- * lock is held.
- *
- * The interface functions will have one of the following
- * structures:
- *
- * simpleFn(sk)
- * {
- * lock_sock(sk);
- * if ((mksck = sk->sk_protinfo)) {
- * <non-blocking use of mksck>
- * }
- * release_sock(sk);
- * }
- *
- * complexFn(sk)
- * {
- * lock_sock(sk);
- * if ((mksck = sk->sk_protinfo))
- * IncRefc(mksck);
- * release_sock(sk);
- *
- * if (mksck) {
- * <use of mksck in a potentially blocking manner>
- * DecRefc(mksck);
- * }
- * }
- */
- sk->sk_protinfo = NULL;
- sock_reset_flag(sk, SOCK_DONE);
- return 0;
- }
- /**
- * @brief Delete a MKSCK socket
- *
- * @param sock user socket structure
- *
- * @return 0 on success, -errno on failure
- */
- static int
- MksckRelease(struct socket *sock)
- {
- struct sock *sk = sock->sk;
- if (sk) {
- lock_sock(sk);
- sock_orphan(sk);
- release_sock(sk);
- sock_put(sk);
- }
- sock->sk = NULL;
- sock->state = SS_FREE;
- return 0;
- }
- static int
- MksckBacklogRcv(struct sock *sk,
- struct sk_buff *skb)
- {
- /*
- * We should never get these as we never queue an skb.
- */
- pr_err("MksckBacklogRcv: should never get here\n");
- return -EIO;
- }
- /**
- * @brief Callback at socket destruction
- *
- * @param sk pointer to kernel socket structure
- */
- static void
- MksckSkDestruct(struct sock *sk)
- {
- Mksck *mksck;
- lock_sock(sk);
- mksck = sk->sk_protinfo;
- if (mksck != NULL) {
- sk->sk_protinfo = NULL;
- Mksck_CloseCommon(mksck);
- }
- if (sk->sk_user_data != NULL) {
- sock_kfree_s(sk, sk->sk_user_data, sizeof(int));
- sk->sk_user_data = NULL;
- }
- release_sock(sk);
- }
- /**
- * @brief Set the local address of a MKSCK socket
- *
- * @param sk kernel socket structure
- * @param addr the new address of the socket
- *
- * @return 0 on success, -errno on failure
- *
- * If addr.port is undefined a new random port is assigned.
- * If addr.vmId is undefined then the vmId computed from the tgid is used.
- * Hence the vmId of a socket does not determine the host all the time.
- *
- * Assumed that the socket is locked.
- * This function is called by explicit set (MksckBind) and implicit (Send).
- */
- static int
- MksckBindGeneric(struct sock *sk,
- Mksck_Address addr)
- {
- int err;
- Mksck *mksck;
- struct MksckPage *mksckPage;
- if (sk->sk_protinfo != NULL)
- return -EISCONN;
- /*
- * Locate the page for the given host and increment its reference
- * count so it can't get freed off while we are working on it.
- */
- if (addr.vmId == MKSCK_VMID_UNDEF) {
- mksckPage = MksckPage_GetFromTgidIncRefc();
- } else {
- pr_err("MksckBind: host bind called on vmid 0x%X\n", addr.vmId);
- mksckPage = MksckPage_GetFromVmIdIncRefc(addr.vmId);
- }
- if (mksckPage == NULL) {
- pr_err("MksckBind: no mksckPage for vm 0x%X\n", addr.vmId);
- return -ENETUNREACH;
- }
- addr.vmId = mksckPage->vmId;
- /*
- * Before we can find an unused socket port on the page we have to
- * lock the page for exclusive access so another thread can't
- * allocate the same port.
- */
- err = Mutex_Lock(&mksckPage->mutex, MutexModeEX);
- if (err < 0)
- goto outDec;
- addr.port = MksckPage_GetFreePort(mksckPage, addr.port);
- if (addr.port == MKSCK_PORT_UNDEF) {
- err = -EINVAL;
- goto outUnlockDec;
- }
- /*
- * At this point we have the mksckPage locked for exclusive access
- * and its reference count incremented. Also, addr is completely
- * filled in with vmId and port that we want to bind.
- *
- * Find an available mksck struct on the shared page and initialize it.
- */
- mksck = MksckPage_AllocSocket(mksckPage, addr);
- if (mksck == NULL) {
- err = -EMFILE;
- goto outUnlockDec;
- }
- /*
- * Stable, release mutex. Leave mksckPage->refCount incremented so
- * mksckPage can't be freed until socket is closed.
- */
- Mutex_Unlock(&mksckPage->mutex, MutexModeEX);
- /*
- * This is why we start mksck->refCount at 1. When sk_protinfo gets
- * cleared, we decrement mksck->refCount.
- */
- sk->sk_protinfo = mksck;
- PRINTK("MksckBind: socket bound to %08X\n",
- mksck->addr.addr);
- return 0;
- outUnlockDec:
- Mutex_Unlock(&mksckPage->mutex, MutexModeEX);
- outDec:
- MksckPage_DecRefc(mksckPage);
- return err;
- }
- /**
- * @brief Test if the socket is already bound to a local address and,
- * if not, bind it to an unused address.
- *
- * @param sk kernel socket structure
- * @return 0 on success, -errno on failure
- *
- * Assumed that the socket is locked.
- */
- static inline int
- MksckTryBind(struct sock *sk)
- {
- int err = 0;
- if (!sk->sk_protinfo) {
- static const Mksck_Address addr = { .addr = MKSCK_ADDR_UNDEF };
- err = MksckBindGeneric(sk, addr);
- }
- return err;
- }
- /**
- * @brief Set the address of a MKSCK socket (user call)
- *
- * @param sock user socket structure
- * @param addr the new address of the socket
- * @param addrLen length of the address
- *
- * @return 0 on success, -errno on failure
- */
- static int
- MksckBind(struct socket *sock,
- struct sockaddr *addr,
- int addrLen)
- {
- int err;
- struct sock *sk = sock->sk;
- struct sockaddr_mk *addrMk = (struct sockaddr_mk *)addr;
- if (addrLen != sizeof(*addrMk))
- return -EINVAL;
- if (addrMk->mk_family != AF_MKSCK)
- return -EAFNOSUPPORT;
- /*
- * Obtain the socket lock and call the generic Bind function.
- */
- lock_sock(sk);
- err = MksckBindGeneric(sk, addrMk->mk_addr);
- release_sock(sk);
- return err;
- }
- /**
- * @brief Lock the peer socket by locating it, incrementing its refc
- * @param addr the address of the peer socket
- * @param[out] peerMksckR set to the locked peer socket pointer
- * upon successful lookup
- * @return 0 on success, -errno on failure
- */
- static int
- LockPeer(Mksck_Address addr, Mksck **peerMksckR)
- {
- int err = 0;
- struct MksckPage *peerMksckPage =
- MksckPage_GetFromVmIdIncRefc(addr.vmId);
- Mksck *peerMksck;
- /*
- * Find corresponding destination shared page and increment its
- * reference count so it can't be freed while we are sending to the
- * socket. Make sure that the address is indeed an address of a
- * monitor/guest socket.
- */
- if (peerMksckPage == NULL) {
- pr_info("LockPeer: vmId %x is not in use!\n", addr.vmId);
- return -ENETUNREACH;
- }
- if (!peerMksckPage->isGuest) {
- MksckPage_DecRefc(peerMksckPage);
- pr_err("LockPeer: vmId %x does not belong to a guest!\n",
- addr.vmId);
- return -ENETUNREACH;
- }
- err = Mutex_Lock(&peerMksckPage->mutex, MutexModeSH);
- if (err < 0) {
- MksckPage_DecRefc(peerMksckPage);
- return err;
- }
- /*
- * Find corresponding destination socket on that shared page and
- * increment its reference count so it can't be freed while we are
- * trying to send to it.
- */
- peerMksck = MksckPage_GetFromAddr(peerMksckPage, addr);
- if (peerMksck) {
- ATOMIC_ADDV(peerMksck->refCount, 1);
- *peerMksckR = peerMksck;
- } else {
- pr_err("LockPeer: addr %x is not a defined socket!\n",
- addr.addr);
- err = -ENETUNREACH;
- }
- Mutex_Unlock(&peerMksckPage->mutex, MutexModeSH);
- MksckPage_DecRefc(peerMksckPage);
- return err;
- }
- /**
- * @brief Set the peer address of a MKSCK socket
- *
- * @param sock user socket structure
- * @param addr the new address of the socket
- * @param addrLen length of the address
- * @param flags flags
- *
- * @return 0 on success, -errno on failure
- */
- static int
- MksckDgramConnect(struct socket *sock,
- struct sockaddr *addr,
- int addrLen,
- int flags)
- {
- struct sock *sk = sock->sk;
- Mksck *mksck;
- struct sockaddr_mk *peerAddrMk = (struct sockaddr_mk *)addr;
- int err = 0;
- if (addrLen != sizeof(*peerAddrMk)) {
- pr_info("MksckConnect: wrong address length!\n");
- return -EINVAL;
- }
- if (peerAddrMk->mk_family != AF_MKSCK) {
- pr_info("MksckConnect: wrong address family!\n");
- return -EAFNOSUPPORT;
- }
- lock_sock(sk);
- err = MksckTryBind(sk);
- if (err)
- goto releaseSock;
- mksck = sk->sk_protinfo;
- /*
- * First sever any past peer connections...
- */
- Mksck_DisconnectPeer(mksck);
- sock->state = SS_UNCONNECTED;
- /*
- * ... and build new connections.
- */
- if (peerAddrMk->mk_addr.addr != MKSCK_ADDR_UNDEF) {
- sock->state = SS_CONNECTED;
- mksck->peerAddr = peerAddrMk->mk_addr;
- err = LockPeer(mksck->peerAddr, &mksck->peer);
- PRINTK("MksckConnect: socket %x is connected" \
- " to %x!\n", mksck->addr.addr, mksck->peerAddr.addr);
- }
- releaseSock:
- release_sock(sk);
- return err;
- }
- /**
- * @brief returns the address of a MKSCK socket/peer address
- *
- * @param sock user socket structure
- * @param addr the new address of the socket
- * @param addrLen length of the address
- * @param peer 1 if the peer address is sought
- *
- * @return 0 on success, -errno on failure
- */
- static int
- MksckGetName(struct socket *sock,
- struct sockaddr *addr,
- int *addrLen,
- int peer)
- {
- int err;
- Mksck *mksck;
- struct sock *sk = sock->sk;
- /*
- * MAX_SOCK_ADDR is size of *addr, but it's not exported.
- * ASSERT_ON_COMPILE(sizeof(struct sockaddr_mk) <= MAX_SOCK_ADDR);
- */
- lock_sock(sk);
- mksck = sk->sk_protinfo;
- if (mksck == NULL) {
- if (peer) {
- err = -ENOTCONN;
- } else {
- ((struct sockaddr_mk *)addr)->mk_family = AF_MKSCK;
- ((struct sockaddr_mk *)addr)->mk_addr.addr =
- MKSCK_ADDR_UNDEF;
- *addrLen = sizeof(struct sockaddr_mk);
- err = 0;
- }
- } else if (!peer) {
- ((struct sockaddr_mk *)addr)->mk_family = AF_MKSCK;
- ((struct sockaddr_mk *)addr)->mk_addr = mksck->addr;
- *addrLen = sizeof(struct sockaddr_mk);
- err = 0;
- } else if (mksck->peerAddr.addr == MKSCK_ADDR_UNDEF) {
- err = -ENOTCONN;
- } else {
- ((struct sockaddr_mk *)addr)->mk_family = AF_MKSCK;
- ((struct sockaddr_mk *)addr)->mk_addr = mksck->peerAddr;
- *addrLen = sizeof(struct sockaddr_mk);
- err = 0;
- }
- release_sock(sk);
- return err;
- }
- /**
- * @brief VMX polling a received packet from VMM.
- *
- * @param filp kernel file pointer to poll for
- * @param sock user socket structure
- * @param wait kernel polling table where to poll if not null
- *
- * @return poll mask state given from socket state.
- */
- static unsigned int MksckPoll(struct file *filp,
- struct socket *sock,
- poll_table *wait)
- {
- struct sock *sk = sock->sk;
- unsigned int mask = 0;
- Mksck *mksck = NULL;
- uint32 read;
- int err;
- lock_sock(sk);
- err = MksckTryBind(sk);
- if (err) {
- release_sock(sk);
- return err;
- }
- mksck = sk->sk_protinfo;
- /*
- * To avoid mksck disappearing right after the release_sock the
- * refcount needs to be incremented. For more details read the
- * block comment on locking in MksckCreate.
- */
- ATOMIC_ADDV(mksck->refCount, 1);
- release_sock(sk);
- /*
- * Wait to make sure this is the only thread trying to access socket.
- */
- err = Mutex_Lock(&mksck->mutex, MutexModeEX);
- if (err < 0) {
- /*
- * We might get in this situation if we are signaled
- * (select() may handle this, so leave)
- */
- PRINTK("MksckPoll: try to abort\n");
- return mask;
- }
- /*
- * See if packet in ring.
- */
- read = mksck->read;
- if (read != mksck->write) {
- mask |= POLLIN | POLLRDNORM; /* readable, socket is unlocked */
- /*
- * Note that if we implement support for POLLOUT, we SHOULD
- * change this Mutex_Unlock by Mutex_UnlPoll, because there is
- * no obvious knowledge about the sleepy reason that is
- * intended by user
- */
- Mutex_Unlock(&mksck->mutex, MutexModeEX);
- } else {
- Mutex_UnlPoll(&mksck->mutex, MutexModeEX,
- MKSCK_CVAR_FILL, filp, wait);
- }
- /*
- * Note that locking rules differ a little inside MksckPoll, since we
- * are not only given a pointer to the struct socket but also a pointer
- * to a struct file. This means that during the whole operation of this
- * function and during any pending wait (registered with poll_wait()),
- * the file itself is reference counted up, and we should rely on that
- * 'upper' reference counting to prevent from tearing the Mksck down.
- * That holds true since we don't re-bind sockets.
- */
- Mksck_DecRefc(mksck);
- return mask;
- }
- /**
- * @brief Manage a set of Mksck_PageDesc from a message or a stored array.
- *
- * @param pd set of Mksck_PageDesc
- * @param pages Mksck_PageDesc pages count for this management operation
- * @param incr ternary used to indicate if we want to reference (+1), or
- * dereference (-1), or count (0) 4k pages
- *
- * @return length of bytes processed.
- */
- static size_t
- MksckPageDescManage(Mksck_PageDesc *pd,
- uint32 pages,
- int incr)
- {
- size_t payloadLen = 0;
- uint32 i;
- for (i = 0; i < pages && pd[i].mpn != INVALID_MPN; ++i) {
- uint32 j;
- for (j = 0; j < 1 << pd[i].order; ++j) {
- struct page *page;
- MPN currMPN = pd[i].mpn + j;
- /*
- * The monitor tried to send an invalid MPN, bad.
- */
- if (!pfn_valid(currMPN)) {
- pr_warn("MksckPageDescManage: Invalid MPN %x\n",
- currMPN);
- } else {
- page = pfn_to_page(currMPN);
- if (incr == 1)
- get_page(page);
- if (incr == -1)
- put_page(page);
- }
- payloadLen += PAGE_SIZE;
- }
- }
- return payloadLen;
- }
- /**
- * @brief Management values to be used as third parameter of MksckPageDescManage
- */
- #define MANAGE_INCREMENT 1
- #define MANAGE_DECREMENT -1
- #define MANAGE_COUNT 0
- /**
- * @brief Map a set of Mksck_PageDesc from a message or a stored array.
- *
- * @param pd set of Mksck_PageDesc
- * @param pages pages count for this mapping
- * @param iov vectored user virtual addresses of the recv commands
- * @param iovCount size for iov parameter
- * @param vma virtual memory area used for the mapping, note that
- * this is mandatorily required MksckPageDescMap is used
- * on an indirect PageDesc context (i.e whenever iov is
- * not computed by the kernel but by ourselves).
- *
- * Since find_vma() and vm_insert_page() are used, this function must
- * be called with current's mmap_sem locked, or inside an MMap operation.
- *
- * @return length of bytes mapped.
- */
- static size_t
- MksckPageDescMap(Mksck_PageDesc *pd,
- uint32 pages,
- struct iovec *iov,
- int iovCount,
- struct vm_area_struct *vma)
- {
- size_t payloadLen = 0;
- uint32 i;
- for (i = 0; i < pages && pd[i].mpn != INVALID_MPN; ++i) {
- uint32 j;
- for (j = 0; j < 1 << pd[i].order; ++j) {
- HUVA huva = 0;
- struct page *page;
- MPN currMPN = pd[i].mpn + j;
- while (iovCount > 0 && iov->iov_len == 0) {
- iovCount--;
- iov++;
- }
- if (iovCount == 0) {
- pr_warn("MksckPageDescMap: Invalid " \
- "iov length\n");
- goto map_done;
- }
- huva = (HUVA)iov->iov_base;
- /*
- * iovecs for receiving the typed component of the
- * message should have page aligned base and size
- * sufficient for page descriptor mappings.
- */
- if (huva & (PAGE_SIZE - 1) ||
- iov->iov_len < PAGE_SIZE) {
- pr_warn("MksckPageDescMap: Invalid huva %x " \
- "or iov_len %d\n", huva, iov->iov_len);
- goto map_done;
- }
- /*
- * Might be in a new vma...
- */
- if (vma == NULL || huva < vma->vm_start ||
- huva >= vma->vm_end) {
- vma = find_vma(current->mm, huva);
- /*
- * Couldn't find a matching vma for huva.
- */
- if (vma == NULL ||
- huva < vma->vm_start ||
- vma->vm_ops != &mksckVMOps) {
- pr_warn("MksckPageDescMap: " \
- "Invalid vma\n");
- goto map_done;
- }
- }
- /*
- * The monitor tried to send an invalid MPN, bad.
- */
- if (!pfn_valid(currMPN)) {
- pr_warn("MksckPageDescMap: Invalid MPN %x\n",
- currMPN);
- } else {
- int rc;
- page = pfn_to_page(currMPN);
- /*
- * Map into the receive window.
- */
- rc = vm_insert_page(vma, huva, page);
- if (rc) {
- pr_warn("MksckPageDescMap: Failed to " \
- "insert %x at %x, error %d\n",
- currMPN, huva, rc);
- goto map_done;
- }
- ASSERT(iov->iov_len >= PAGE_SIZE);
- iov->iov_base += PAGE_SIZE;
- iov->iov_len -= PAGE_SIZE;
- }
- payloadLen += PAGE_SIZE;
- }
- }
- map_done:
- return payloadLen;
- }
- /**
- * @brief Check if the provided MsgHdr has still room for a receive operation.
- *
- * @param msg user buffer
- * @return 1 if MsgHdr has IO space room in order to receive a mapping, 0 otherwise.
- */
- static int
- MsgHdrHasAvailableRoom(struct msghdr *msg)
- {
- struct iovec *vec = msg->msg_iov;
- uint32 count = msg->msg_iovlen;
- while (count > 0 && vec->iov_len == 0) {
- count--;
- vec++;
- }
- return (count != 0);
- }
- /**
- * Whenever a typed message is received from the monitor, we may choose to store
- * all the page descriptor content in a linked state of descriptors, through the
- * following information context
- */
- struct MksckPageDescInfo {
- struct MksckPageDescInfo *next;
- uint32 flags;
- uint32 pages;
- uint32 mapCounts;
- Mksck_PageDesc descs[0];
- };
- static void MksckPageDescSkDestruct(struct sock *sk);
- static int
- MksckPageDescMMap(struct file *file,
- struct socket *sock,
- struct vm_area_struct *vma);
- static int
- MksckPageDescIoctl(struct socket *sock,
- unsigned int cmd,
- unsigned long arg);
- /**
- * @brief Delete a page descriptor container socket
- *
- * @param sock user socket structure
- * @return 0 on success, -errno on failure
- */
- static int
- MksckPageDescRelease(struct socket *sock)
- {
- /* This is generic socket release */
- struct sock *sk = sock->sk;
- if (sk) {
- lock_sock(sk);
- sock_orphan(sk);
- release_sock(sk);
- sock_put(sk);
- }
- sock->sk = NULL;
- sock->state = SS_FREE;
- return 0;
- }
- /**
- * Whenever a typed message is received from the monitor, we may choose to store
- * all the page descriptor content for a future mapping. One shall put a context
- * usable by host userland, that means trough a file descriptor, and as a secure
- * implementation we choose to define a strict set of operations that are used
- * only for that purpose. This set of operation is reduced to leaving the
- * default "PageDesc(s) accumulating" mode (inside ioctl), mapping the context,
- * and generic socket destruction.
- */
- static const struct proto_ops mksckPageDescOps = {
- .family = AF_MKSCK,
- .owner = THIS_MODULE,
- .release = MksckPageDescRelease,
- .bind = sock_no_bind,
- .connect = sock_no_connect,
- .socketpair = sock_no_socketpair,
- .accept = sock_no_accept,
- .getname = sock_no_getname,
- .poll = sock_no_poll,
- .ioctl = MksckPageDescIoctl,
- .listen = sock_no_listen,
- .shutdown = sock_no_shutdown,
- .setsockopt = sock_no_setsockopt,
- .getsockopt = sock_no_getsockopt,
- .sendmsg = sock_no_sendmsg,
- .recvmsg = sock_no_recvmsg,
- .mmap = MksckPageDescMMap,
- .sendpage = sock_no_sendpage,
- };
- /**
- * @brief Create or accumulate to a PageDesc context, backed as a descriptor.
- *
- * @param sock user socket structure
- * @param msg user buffer to receive the file descriptor as ancillary data
- * @param pd source descriptor part of a message
- * @param pages pages count for this mapping
- *
- * @return error if negative, 0 otherwise
- *
- */
- static int
- MksckPageDescToFd(struct socket *sock,
- struct msghdr *msg,
- Mksck_PageDesc *pd,
- uint32 pages)
- {
- int retval;
- int newfd;
- struct socket *newsock;
- struct sock *newsk;
- struct sock *sk = sock->sk;
- struct MksckPageDescInfo **pmpdi, *mpdi;
- lock_sock(sk);
- /*
- * Relation between any mk socket and the PageDesc context is as follow:
- *
- * From the mk socket to the PageDesc context:
- * - sk->sk_user_data is a WEAK LINK, containing only a file descriptor
- * numerical value such that accumulating is keyed on it.
- *
- * From the PageDesc context to the mk socket:
- * - sk->sk_protinfo contains a MksckPageDescInfo struct.
- * - sk->sk_user_data is a pointer REF-COUNTED sock_hold() LINK, also it
- * is rarely dereferenced but usually used to check that
- * the right socket pair is used. Full dereferencing is
- * used only to break the described links.
- */
- if (sk->sk_user_data) {
- struct MksckPageDescInfo *mpdi2;
- /* Continue any previous on-going mapping, i.e accumulate */
- newfd = *((int *)sk->sk_user_data);
- /* Promote the weak link */
- newsock = sockfd_lookup(newfd, &retval);
- if (!newsock) {
- retval = -EINVAL;
- goto endProcessingReleaseSock;
- }
- newsk = newsock->sk;
- lock_sock(newsk);
- sockfd_put(newsock);
- if (((struct sock *)newsk->sk_user_data) != sk) {
- /*
- * One way of going into this situation would be for
- * userland to dup the file descriptor just received,
- * close the original number, and open a new mk socket
- * in the very same spot. The userland code have
- * a lot of way of interacting with the kernel without
- * this code to be notified.
- */
- retval = -EINVAL;
- release_sock(newsk);
- goto endProcessingReleaseSock;
- }
- mpdi = kmalloc(sizeof(struct MksckPageDescInfo) +
- pages*sizeof(Mksck_PageDesc), GFP_KERNEL);
- if (!mpdi) {
- retval = -ENOMEM;
- release_sock(newsk);
- goto endProcessingReleaseSock;
- }
- /*
- * There is no mandatory needs for us to notify userland from
- * the progress in "appending" to the file descriptor, but it
- * would feel strange if the userland would have no mean to
- * tell if the received message was just not thrown away. So, in
- * order to be consistent one fill the ancillary message while
- * "creating" and "appending to" file descriptors.
- */
- retval = put_cmsg(msg, SOL_DECNET, 0, sizeof(int), &newfd);
- if (retval < 0)
- goto endProcessingKFreeReleaseSock;
- release_sock(sk);
- mpdi2 = (struct MksckPageDescInfo *)newsk->sk_protinfo;
- while (mpdi2->next)
- mpdi2 = mpdi2->next;
- pmpdi = &(mpdi2->next);
- } else {
- /*
- * Create a new socket, new context and a new file descriptor.
- */
- retval = sock_create(sk->sk_family, sock->type, 0, &newsock);
- if (retval < 0)
- goto endProcessingReleaseSock;
- newsk = newsock->sk;
- lock_sock(newsk);
- newsk->sk_destruct = &MksckPageDescSkDestruct;
- newsk->sk_user_data = sk;
- sock_hold(sk); /* Keep a reference to parent mk socket. */
- newsock->ops = &mksckPageDescOps;
- mpdi = kmalloc(sizeof(struct MksckPageDescInfo) +
- pages*sizeof(Mksck_PageDesc), GFP_KERNEL);
- if (!mpdi) {
- retval = -ENOMEM;
- goto endProcessingFreeNewSock;
- }
- sk->sk_user_data = sock_kmalloc(sk, sizeof(int), GFP_KERNEL);
- if (sk->sk_user_data == NULL) {
- retval = -ENOMEM;
- goto endProcessingKFreeAndNewSock;
- }
- /*
- * Mapping to a file descriptor may fail if a thread is closing
- * in parallel of sock_map_fd/sock_alloc_fd, or kernel memory
- * is full.
- */
- newfd = sock_map_fd(newsock, O_CLOEXEC);
- if (newfd < 0) {
- retval = newfd;
- sock_kfree_s(sk, sk->sk_user_data, sizeof(int));
- sk->sk_user_data = NULL;
- goto endProcessingKFreeAndNewSock;
- }
- /*
- * Notify userland from a new file descriptor, alike AF_UNIX
- * ancillary.
- */
- retval = put_cmsg(msg, SOL_DECNET, 0, sizeof(int), &newfd);
- if (retval < 0) {
- sock_kfree_s(sk, sk->sk_user_data, sizeof(int));
- sk->sk_user_data = NULL;
- kfree(mpdi);
- release_sock(newsk);
- sockfd_put(newsock);
- sock_release(newsock);
- put_unused_fd(newfd);
- goto endProcessingReleaseSock;
- }
- *(int *)sk->sk_user_data = newfd;
- release_sock(sk);
- pmpdi = (struct MksckPageDescInfo **)(&(newsk->sk_protinfo));
- }
- mpdi->next = NULL;
- mpdi->flags = 0;
- mpdi->mapCounts = 0;
- mpdi->pages = pages;
- memcpy(mpdi->descs, pd, pages*sizeof(Mksck_PageDesc));
- *pmpdi = mpdi; /* link */
- release_sock(newsk);
- /*
- * Increment all reference counters for the pages.
- */
- MksckPageDescManage(pd, pages, MANAGE_INCREMENT);
- return 0;
- endProcessingKFreeAndNewSock:
- kfree(mpdi);
- endProcessingFreeNewSock:
- release_sock(newsk);
- sock_release(newsock);
- release_sock(sk);
- return retval;
- endProcessingKFreeReleaseSock:
- kfree(mpdi);
- release_sock(newsk);
- endProcessingReleaseSock:
- release_sock(sk);
- return retval;
- }
- /**
- * @brief Callback at socket destruction
- *
- * @param sk pointer to kernel socket structure
- */
- static void
- MksckPageDescSkDestruct(struct sock *sk)
- {
- struct sock *mkSk = NULL;
- struct MksckPageDescInfo *mpdi;
- lock_sock(sk);
- mpdi = sk->sk_protinfo;
- while (mpdi) {
- struct MksckPageDescInfo *next = mpdi->next;
- MksckPageDescManage(mpdi->descs, mpdi->pages,
- MANAGE_DECREMENT);
- kfree(mpdi);
- mpdi = next;
- }
- if (sk->sk_user_data) {
- mkSk = (struct sock *)sk->sk_user_data;
- sk->sk_user_data = NULL;
- }
- sk->sk_protinfo = NULL;
- release_sock(sk);
- /*
- * Clean the mksck socket that we are holding.
- */
- if (mkSk) {
- lock_sock(mkSk);
- sock_kfree_s(mkSk, mkSk->sk_user_data, sizeof(int));
- mkSk->sk_user_data = NULL;
- release_sock(mkSk);
- sock_put(mkSk); /* reverse of sock_hold() */
- }
- }
- /**
- * @brief The mmap operation of the PageDesc context file descriptor.
- *
- * The mmap command is used to mmap any detached (i.e. no more accumulating)
- * PageDesc context, full of the content from its parent communication mk
- * socket. Mapping may be done a specified number of times, so that the
- * PageDesc context could become useless (as a security restriction).
- *
- * Also note that mapping from an offset different from zero is considered
- * as a userland invalid operation.
- *
- * @param file user file structure
- * @param sock user socket structure
- * @param vma virtual memory area structure
- *
- * @return error code, 0 on success
- */
- static int
- MksckPageDescMMap(struct file *file,
- struct socket *sock,
- struct vm_area_struct *vma)
- {
- struct sock *sk = sock->sk;
- struct MksckPageDescInfo *mpdi;
- struct iovec iov;
- unsigned long vm_flags;
- int freed = 0;
- iov.iov_base = (void *)vma->vm_start;
- iov.iov_len = vma->vm_end - vma->vm_start;
- lock_sock(sk);
- mpdi = sk->sk_protinfo;
- /*
- * vma->vm_pgoff is checked, since offsetting the map is not supported.
- */
- if (!mpdi || sk->sk_user_data || vma->vm_pgoff) {
- release_sock(sk);
- pr_info("MMAP failed for virt %lx size %lx\n",
- vma->vm_start, vma->vm_end - vma->vm_start);
- return -EINVAL;
- }
- vm_flags = mpdi->flags;
- if ((vma->vm_flags & ~vm_flags) & (VM_READ|VM_WRITE)) {
- release_sock(sk);
- return -EACCES;
- }
- while (mpdi) {
- struct MksckPageDescInfo *next = mpdi->next;
- MksckPageDescMap(mpdi->descs, mpdi->pages, &iov, 1, vma);
- if (mpdi->mapCounts && !--mpdi->mapCounts) {
- MksckPageDescManage(mpdi->descs, mpdi->pages,
- MANAGE_DECREMENT);
- kfree(mpdi);
- freed = 1;
- }
- mpdi = next;
- }
- if (freed)
- sk->sk_protinfo = NULL;
- vma->vm_ops = &mksckVMOps;
- release_sock(sk);
- return 0;
- }
- /**
- * @brief The ioctl operation of the PageDesc context file descriptor.
- *
- * The ioctl MKSCK_DETACH command is used to detach the PageDesc context
- * from its parent communication mk socket. Once done, the context
- * is able to remap the transferred PageDesc(s) of typed messages accumulated
- * into the context.
- *
- * @param sock user socket structure
- * @param cmd select which cmd function needs to be performed
- * @param arg argument for command
- *
- * @return error code, 0 on success
- */
- static int
- MksckPageDescIoctl(struct socket *sock,
- unsigned int cmd,
- unsigned long arg)
- {
- struct sock *mksck = NULL;
- struct sock *sk = sock->sk;
- struct MksckPageDescInfo *mpdi;
- unsigned long ul[2];
- int retval = 0;
- switch (cmd) {
- /*
- * ioctl MKSCK_DETACH (in and out):
- * Detach, compute size and define allowed protection access rights
- *
- * [in]: unsigned long flags, similar to prot argument of mmap()
- * unsigned long number of available further mappings
- * with 0 meaning unlimited number of mappings
- * [out]: unsigned long size of the available mappable area
- */
- case MKSCK_DETACH:
- lock_sock(sk);
- mpdi = sk->sk_protinfo;
- /*
- * Read unsigned long argument that contains the mmap
- * alike flags.
- */
- if (copy_from_user(ul, (void *)arg, sizeof(ul))) {
- retval = -EFAULT;
- /*
- * Check that the file descriptor has a parent
- * and some context there.
- */
- } else if (!mpdi || !sk->sk_user_data) {
- retval = -EINVAL;
- } else {
- /*
- * Compute mapping protection bits from argument
- * and size of the mapping, that is also given
- * back to userland as unsigned long.
- */
- uint32 flags = calc_vm_prot_bits(ul[0]);
- ul[0] = 0;
- while (mpdi) {
- struct MksckPageDescInfo *next = mpdi->next;
- ul[0] += MksckPageDescManage(mpdi->descs,
- mpdi->pages,
- MANAGE_COUNT);
- mpdi->mapCounts = ul[1];
- mpdi = next;
- }
- if (copy_to_user((void *)arg, ul, sizeof(ul[0]))) {
- retval = -EFAULT;
- } else {
- mpdi = sk->sk_protinfo;
- mpdi->flags = flags;
- mksck = (struct sock *)sk->sk_user_data;
- sk->sk_user_data = NULL;
- }
- }
- release_sock(sk);
- /*
- * Clean the mksck socket that we are holding.
- */
- sk = mksck;
- if (sk) {
- lock_sock(sk);
- sock_kfree_s(sk, sk->sk_user_data, sizeof(int));
- sk->sk_user_data = NULL;
- release_sock(sk);
- sock_put(sk);
- }
- break;
- default:
- retval = -EINVAL;
- break;
- }
- return retval;
- }
- /**
- * @brief VMX receiving a packet from VMM.
- *
- * @param kiocb kernel io control block (unused)
- * @param sock user socket structure
- * @param msg user buffer to receive the packet
- * @param len size of the user buffer
- * @param flags flags
- *
- * @return -errno on failure, else length of untyped portion + total number
- * of bytes mapped for typed portion.
- */
- static int
- MksckDgramRecvMsg(struct kiocb *kiocb,
- struct socket *sock,
- struct msghdr *msg,
- size_t len,
- int flags)
- {
- int err = 0;
- struct sock *sk = sock->sk;
- Mksck *mksck;
- Mksck_Datagram *dg;
- struct sockaddr_mk *fromAddr;
- uint32 read;
- struct iovec *iov;
- size_t payloadLen, untypedLen;
- uint32 iovCount;
- if (flags & MSG_OOB || flags & MSG_ERRQUEUE)
- return -EOPNOTSUPP;
- if ((msg->msg_name != NULL) && (msg->msg_namelen < sizeof(*fromAddr)))
- return -EINVAL;
- lock_sock(sk);
- err = MksckTryBind(sk);
- if (err) {
- release_sock(sk);
- return err;
- }
- mksck = sk->sk_protinfo;
- /*
- * To avoid mksck disappearing right after the release_sock the
- * refcount needs to be incremented. For more details read the
- * block comment on locking in MksckCreate.
- */
- ATOMIC_ADDV(mksck->refCount, 1);
- release_sock(sk);
- /*
- * Get pointer to next packet in ring to be dequeued.
- */
- while (1) {
- /*
- * Wait to make sure this is the only thread trying to access
- * the socket.
- */
- err = Mutex_Lock(&mksck->mutex, MutexModeEX);
- if (err < 0)
- goto decRefc;
- /*
- * See if packet in ring.
- */
- read = mksck->read;
- if (read != mksck->write)
- break;
- /*
- * Nothing there, if user wants us not to block then just
- * return EAGAIN.
- */
- if (flags & MSG_DONTWAIT) {
- Mutex_Unlock(&mksck->mutex, MutexModeEX);
- err = -EAGAIN;
- goto decRefc;
- }
- /*
- * Nothing there, unlock socket and wait for data.
- */
- mksck->foundEmpty++;
- err = Mutex_UnlSleep(&mksck->mutex, MutexModeEX,
- MKSCK_CVAR_FILL);
- if (err < 0) {
- PRINTK("MksckDgramRecvMsg: aborted\n");
- goto decRefc;
- }
- }
- /*
- * Point to packet in ring.
- */
- dg = (void *)&mksck->buff[read];
- /*
- * Provide the address of the sender.
- */
- if (msg->msg_name != NULL) {
- fromAddr = (void *)msg->msg_name;
- fromAddr->mk_addr = dg->fromAddr;
- fromAddr->mk_family = AF_MKSCK;
- msg->msg_namelen = sizeof(*fromAddr);
- } else {
- msg->msg_namelen = 0;
- }
- /*
- * Copy data from ring buffer to caller's buffer and remove packet from
- * ring buffer.
- */
- iov = msg->msg_iov;
- iovCount = msg->msg_iovlen;
- untypedLen = dg->len - dg->pages * sizeof(Mksck_PageDesc) - dg->pad;
- payloadLen = untypedLen;
- /*
- * Handle the untyped portion of the message.
- */
- if (untypedLen <= len) {
- err = memcpy_toiovec(iov, dg->data, untypedLen);
- if (err < 0) {
- pr_warn("MksckDgramRecvMsg: Failed to " \
- "memcpy_to_iovec untyped message component " \
- "(buf len %d datagram len %d (untyped %d))\n",
- len, dg->len, untypedLen);
- }
- } else {
- err = -EINVAL;
- }
- /*
- * Map in the typed descriptor.
- */
- if (err >= 0 && dg->pages > 0) {
- Mksck_PageDesc *pd =
- (Mksck_PageDesc *)(dg->data + untypedLen + dg->pad);
- /*
- * There are 3 ways of receiving typed messages from the monitor
- * - The typed message is mapped directly into a VMA.
- * To indicate this the userland sets msg_controllen == 0.
- * - The typed message is mapped directly into a VMA and a
- * file descriptor created for further mappings on the host
- * (in same userland address space or an alternate userland
- * address space). In this case msg_controllen should be set
- * to sizeof(fd).
- * - The typed message is not mapped directly into a VMA, but
- * a file descriptor is created for later mapping on the
- * host. In this case msg_controllen should be set to
- * sizeof(fd) and the supplied iovec shall not specify a
- * receive window.
- */
- if (msg->msg_controllen > 0)
- err = MksckPageDescToFd(sock, msg, pd, dg->pages);
- if ((msg->msg_controllen <= 0) ||
- (err != 0) ||
- (MsgHdrHasAvailableRoom(msg) != 0)) {
- /*
- * Lock for a change of mapping.
- */
- down_write(¤t->mm->mmap_sem);
- payloadLen += MksckPageDescMap(pd, dg->pages,
- iov, iovCount, NULL);
- up_write(¤t->mm->mmap_sem);
- }
- }
- /*
- * Now that packet is removed, it is safe to unlock socket so another
- * thread can do a recv(). We also want to wake someone waiting for
- * room to insert a new packet.
- */
- if ((err >= 0) && Mksck_IncReadIndex(mksck, read, dg))
- Mutex_UnlWake(&mksck->mutex, MutexModeEX,
- MKSCK_CVAR_ROOM, true);
- else
- Mutex_Unlock(&mksck->mutex, MutexModeEX);
- /*
- * If memcpy error, return error status.
- * Otherwise, return number of bytes copied.
- */
- if (err >= 0)
- err = payloadLen;
- decRefc:
- Mksck_DecRefc(mksck);
- return err;
- }
- /**
- * @brief VMX sending a packet to VMM.
- *
- * @param kiocb kernel io control block
- * @param sock user socket structure
- * @param msg packet to be transmitted
- * @param len length of the packet
- *
- * @return length of the sent msg on success, -errno on failure
- */
- static int
- MksckDgramSendMsg(struct kiocb *kiocb,
- struct socket *sock,
- struct msghdr *msg,
- size_t len)
- {
- int err = 0;
- struct sock *sk = sock->sk;
- Mksck *peerMksck;
- Mksck_Datagram *dg;
- uint32 needed;
- uint32 write;
- Mksck_Address fromAddr;
- if (msg->msg_flags & MSG_OOB)
- return -EOPNOTSUPP;
- if (len > MKSCK_XFER_MAX)
- return -EMSGSIZE;
- /*
- * In the next locked section peerMksck pointer needs to be set and
- * its refcount needs to be incremented.
- */
- lock_sock(sk);
- do {
- Mksck *mksck;
- Mksck_Address peerAddr = {
- .addr =
- (msg->msg_name ?
- ((struct sockaddr_mk *)msg->msg_name)->mk_addr.addr :
- MKSCK_ADDR_UNDEF)
- };
- err = MksckTryBind(sk);
- if (err)
- break;
- mksck = sk->sk_protinfo;
- fromAddr = mksck->addr;
- /*
- * If the socket is connected, use that address (no sendto for
- * connected sockets). Else, use the provided address if any.
- */
- peerMksck = mksck->peer;
- if (peerMksck) {
- if (peerAddr.addr != MKSCK_ADDR_UNDEF &&
- peerAddr.addr != mksck->peerAddr.addr) {
- err = -EISCONN;
- break;
- }
- /*
- * To avoid mksckPeer disappearing right after the
- * release_sock the refcount needs to be incremented.
- * For more details read the block comment on locking
- * in MksckCreate.
- */
- ATOMIC_ADDV(peerMksck->refCount, 1);
- } else if (peerAddr.addr == MKSCK_ADDR_UNDEF) {
- err = -ENOTCONN;
- } else {
- /*
- * LockPeer also increments the refc on the peer.
- */
- err = LockPeer(peerAddr, &peerMksck);
- }
- } while (0);
- release_sock(sk);
- if (err)
- return err;
- /*
- * Get pointer to sufficient empty space in ring buffer.
- */
- needed = MKSCK_DGSIZE(len);
- while (1) {
- /*
- * Wait to make sure this is the only thread trying to write
- * to ring.
- */
- err = Mutex_Lock(&peerMksck->mutex, MutexModeEX);
- if (err < 0)
- goto decRefc;
- /*
- * Check if socket can receive data.
- */
- if (peerMksck->shutDown & MKSCK_SHUT_RD) {
- err = -ENOTCONN;
- goto unlockDecRefc;
- }
- /*
- * See if there is room for the packet.
- */
- write = Mksck_FindSendRoom(peerMksck, needed);
- if (write != MKSCK_FINDSENDROOM_FULL)
- break;
- /*
- * No room, unlock socket and maybe wait for room.
- */
- if (msg->msg_flags & MSG_DONTWAIT) {
- err = -EAGAIN;
- goto unlockDecRefc;
- }
- peerMksck->foundFull++;
- err = Mutex_UnlSleep(&peerMksck->mutex, MutexModeEX,
- MKSCK_CVAR_ROOM);
- if (err < 0) {
- PRINTK("MksckDgramSendMsg: aborted\n");
- goto decRefc;
- }
- }
- /*
- * Point to room in ring and fill in message.
- */
- dg = (void *)&peerMksck->buff[write];
- dg->fromAddr = fromAddr;
- dg->len = len;
- err = memcpy_fromiovec(dg->data, msg->msg_iov, len);
- if (err != 0)
- goto unlockDecRefc;
- /*
- * Increment past message.
- */
- Mksck_IncWriteIndex(peerMksck, write, needed);
- /*
- * Unlock socket and wake someone trying to receive, ie, we filled
- * in a message.
- */
- Mutex_UnlWake(&peerMksck->mutex, MutexModeEX, MKSCK_CVAR_FILL, false);
- /*
- * Maybe guest is in a general 'wait for interrupt' wait or
- * grinding away executing guest instructions.
- *
- * If it has a receive callback armed for the socket and is
- * waiting a message, just wake it up. Else send an IPI to the CPU
- * running the guest so it will interrupt whatever it is doing and
- * read the message.
- *
- * Holding the mksckPage->mutex prevents mksckPage->vmHKVA from
- * clearing on us.
- */
- if (peerMksck->rcvCBEntryMVA != 0) {
- MksckPage *peerMksckPage = Mksck_ToSharedPage(peerMksck);
- err = Mutex_Lock(&peerMksckPage->mutex, MutexModeSH);
- if (err == 0) {
- uint32 sockIdx = peerMksck->index;
- struct MvpkmVM *vm =
- (struct MvpkmVM *)peerMksckPage->vmHKVA;
- /*
- * The destruction of vm and wsp is blocked by the
- * mksckPage->mutex.
- */
- if (vm) {
- WorldSwitchPage *wsp = vm->wsp;
- ASSERT(sockIdx <
- 8 * sizeof(peerMksckPage->wakeVMMRecv));
- ATOMIC_ORV(peerMksckPage->wakeVMMRecv,
- 1U << sockIdx);
- if (wsp)
- Mvpkm_WakeGuest(vm, ACTION_MKSCK);
- }
- Mutex_Unlock(&peerMksckPage->mutex, MutexModeSH);
- }
- }
- /*
- * If all are happy tell the caller the number of transferred bytes.
- */
- if (!err)
- err = len;
- /*
- * Now that we are done with target socket, allow it to be freed.
- */
- decRefc:
- Mksck_DecRefc(peerMksck);
- return err;
- unlockDecRefc:
- Mutex_Unlock(&peerMksck->mutex, MutexModeEX);
- goto decRefc;
- }
- /**
- * @brief Page fault handler for receive windows. Since the host process
- * should not be faulting in this region and only be accessing
- * memory that has been established via a typed message transfer,
- * we always signal the fault back to the process.
- */
- static int
- MksckFault(struct vm_area_struct *vma,
- struct vm_fault *vmf)
- {
- return VM_FAULT_SIGBUS;
- }
- /**
- * @brief Establish a region in the host process suitable for use as a
- * receive window.
- *
- * @param file file reference (ignored).
- * @param sock user socket structure.
- * @param vma Linux virtual memory area defining the region.
- *
- * @return 0 on success, otherwise error code.
- */
- static int
- MksckMMap(struct file *file,
- struct socket *sock,
- struct vm_area_struct *vma)
- {
- /*
- * All the hard work is done in MksckDgramRecvMsg. Here we simply mark
- * the vma as belonging to Mksck.
- */
- vma->vm_ops = &mksckVMOps;
- return 0;
- }
- /**
- * @brief This gets called after returning from the monitor.
- * Since the monitor doesn't directly wake VMX threads when it sends
- * something to VMX (for efficiency), this routine checks for the
- * omitted wakes and does them.
- * @param mksckPage some shared page that the monitor writes packets to, ie
- * an host shared page
- */
- void
- Mksck_WakeBlockedSockets(MksckPage *mksckPage)
- {
- Mksck *mksck;
- uint32 i, wakeHostRecv;
- wakeHostRecv = mksckPage->wakeHostRecv;
- if (wakeHostRecv != 0) {
- mksckPage->wakeHostRecv = 0;
- for (i = 0; wakeHostRecv != 0; i++) {
- if (wakeHostRecv & 1) {
- mksck = &mksckPage->sockets[i];
- Mutex_CondSig(&mksck->mutex,
- MKSCK_CVAR_FILL, true);
- }
- wakeHostRecv >>= 1;
- }
- }
- }
- /**
- * @brief allocate and initialize a shared page.
- * @return pointer to shared page.<br>
- * NULL on error
- */
- MksckPage *
- MksckPageAlloc(void)
- {
- uint32 jj;
- /*
- * Ask for pages in the virtual kernel space. There is no
- * requirement to be physically contiguous.
- */
- MksckPage *mksckPage = vmalloc(MKSCKPAGE_SIZE);
- if (mksckPage) {
- /*
- * Initialize its contents. Start refCount at 1 and decrement
- * it when the worldswitch or VM page gets freed.
- */
- memset(mksckPage, 0, MKSCKPAGE_SIZE);
- ATOMIC_SETV(mksckPage->refCount, 1);
- mksckPage->portStore = MKSCK_PORT_HIGH;
- Mutex_Init(&mksckPage->mutex);
- for (jj = 0; jj < MKSCK_SOCKETS_PER_PAGE; jj++)
- Mutex_Init(&mksckPage->sockets[jj].mutex);
- }
- return mksckPage;
- }
- /**
- * @brief Release the allocated pages.
- * @param mksckPage the address of the mksckPage to be released
- */
- static void
- MksckPageRelease(MksckPage *mksckPage)
- {
- int ii;
- for (ii = 0; ii < MKSCK_SOCKETS_PER_PAGE; ii++)
- Mutex_Destroy(&mksckPage->sockets[ii].mutex);
- Mutex_Destroy(&mksckPage->mutex);
- vfree(mksckPage);
- }
- /**
- * @brief Using the tgid locate the vmid of this process.
- * Assumed that mksckPageListLock is held
- * @return the vmId if page is already allocated,
- * the first vacant vmid if not yet allocated.<br>
- * MKSCK_PORT_UNDEF if no slot is vacant
- */
- static inline Mksck_VmId
- GetHostVmId(void)
- {
- uint32 jj;
- Mksck_VmId vmId, vmIdFirstVacant = MKSCK_VMID_UNDEF;
- MksckPage *mksckPage;
- uint32 tgid = task_tgid_vnr(current);
- /*
- * Assign an unique vmId to the shared page. Start the search from
- * the vmId that is the result of hashing tgid to 15 bits. As a
- * used page with a given vmId can occupy only a given slot in the
- * mksckPages array, it is enough to search through the
- * MKSCK_MAX_SHARES slots for a vacancy.
- */
- for (jj = 0, vmId = MKSCK_TGID2VMID(tgid);
- jj < MKSCK_MAX_SHARES;
- jj++, vmId++) {
- if (vmId > MKSCK_VMID_HIGH)
- vmId = 0;
- mksckPage = mksckPages[MKSCK_VMID2IDX(vmId)];
- if (mksckPage) {
- if (mksckPage->tgid == tgid &&
- !mksckPage->isGuest)
- return mksckPage->vmId;
- } else if (vmIdFirstVacant == MKSCK_VMID_UNDEF) {
- vmIdFirstVacant = vmId;
- }
- }
- return vmIdFirstVacant;
- }
- /**
- * @brief Locate the first empty slot
- * Assumed that mksckPageListLock is held
- * @return the first vacant vmid.<br>
- * MKSCK_PORT_UNDEF if no slot is vacant
- */
- static inline Mksck_VmId
- GetNewGuestVmId(void)
- {
- Mksck_VmId vmId;
- for (vmId = 0; vmId < MKSCK_MAX_SHARES; vmId++) {
- if (!mksckPages[MKSCK_VMID2IDX(vmId)])
- return vmId;
- }
- return MKSCK_VMID_UNDEF;
- }
- /**
- * @brief Find shared page for a given idx. The page referred to be the
- * idx should exist and be locked by the caller.
- * @param idx index of the page in the array
- * @return pointer to shared page
- */
- MksckPage *
- MksckPage_GetFromIdx(uint32 idx)
- {
- MksckPage *mksckPage = mksckPages[idx];
- ASSERT(mksckPage);
- ASSERT(idx < MKSCK_MAX_SHARES);
- ASSERT(ATOMIC_GETO(mksckPage->refCount));
- return mksckPage;
- }
- /**
- * @brief find shared page for a given vmId
- * The vmid should exist and be locked by the caller.
- * @param vmId vmId to look for, either an host vmId or a guest vmId
- * @return pointer to shared page
- */
- MksckPage *
- MksckPage_GetFromVmId(Mksck_VmId vmId)
- {
- MksckPage *mksckPage = mksckPages[MKSCK_VMID2IDX(vmId)];
- ASSERT(mksckPage);
- ASSERT(mksckPage->vmId == vmId);
- ASSERT(ATOMIC_GETO(mksckPage->refCount));
- return mksckPage;
- }
- /**
- * @brief find shared page for a given vmId
- * @param vmId vmId to look for, either an host vmId or a guest vmId
- * @return NULL: no such shared page exists<br>
- * else: pointer to shared page.
- * Call Mksck_DecRefc() when done with pointer
- */
- MksckPage *
- MksckPage_GetFromVmIdIncRefc(Mksck_VmId vmId)
- {
- MksckPage *mksckPage;
- spin_lock(&mksckPageListLock);
- mksckPage = mksckPages[MKSCK_VMID2IDX(vmId)];
- if (!mksckPage || (mksckPage->vmId != vmId)) {
- pr_info("MksckPage_GetFromVmIdIncRefc: vmId %04X not found\n",
- vmId);
- mksckPage = NULL;
- } else {
- ATOMIC_ADDV(mksckPage->refCount, 1);
- }
- spin_unlock(&mksckPageListLock);
- return mksckPage;
- }
- /**
- * @brief find or allocate shared page using tgid
- * @return NULL: no such shared page exists<br>
- * else: pointer to shared page.
- * Call Mksck_DecRefc() when done with pointer
- */
- MksckPage *
- MksckPage_GetFromTgidIncRefc(void)
- {
- MksckPage *mksckPage;
- Mksck_VmId vmId;
- while (1) {
- spin_lock(&mksckPageListLock);
- vmId = GetHostVmId();
- if (vmId == MKSCK_VMID_UNDEF) {
- /*
- * No vmId has been allocated yet and there is no
- * free slot.
- */
- spin_unlock(&mksckPageListLock);
- return NULL;
- }
- mksckPage = mksckPages[MKSCK_VMID2IDX(vmId)];
- if (mksckPage != NULL) {
- /*
- * There is a vmid already allocated, increment the
- * ref count on it.
- */
- ATOMIC_ADDV(mksckPage->refCount, 1);
- spin_unlock(&mksckPageListLock);
- return mksckPage;
- }
- /*
- * Have to release spinlock to allocate a new page.
- */
- spin_unlock(&mksckPageListLock);
- mksckPage = MksckPageAlloc();
- if (mksckPage == NULL)
- return NULL;
- /*
- * Re-lock and make sure no one else allocated while unlocked.
- * If someone else did allocate, free ours off and use theirs.
- */
- spin_lock(&mksckPageListLock);
- vmId = GetHostVmId();
- if ((vmId != MKSCK_VMID_UNDEF) &&
- (mksckPages[MKSCK_VMID2IDX(vmId)] == NULL))
- break;
- spin_unlock(&mksckPageListLock);
- MksckPageRelease(mksckPage);
- }
- /*
- * This is a successful new allocation. insert it into the table
- * and initialize the fields.
- */
- mksckPages[MKSCK_VMID2IDX(vmId)] = mksckPage;
- mksckPage->vmId = vmId;
- mksckPage->isGuest = false;
- mksckPage->vmHKVA = 0;
- mksckPage->tgid = task_tgid_vnr(current);
- pr_warn("New host mksck page is allocated: idx %x, vmId %x, tgid %d\n",
- MKSCK_VMID2IDX(vmId), vmId, mksckPage->tgid);
- spin_unlock(&mksckPageListLock);
- return mksckPage;
- }
- /**
- * @brief Initialize the VMX provided wsp. Allocate communication page.
- * @param vm which virtual machine we're running
- * @return 0 if all OK, error value otherwise
- */
- int
- Mksck_WspInitialize(struct MvpkmVM *vm)
- {
- WorldSwitchPage *wsp = vm->wsp;
- int err;
- Mksck_VmId vmId;
- MksckPage *mksckPage;
- if (wsp->guestId)
- return -EBUSY;
- mksckPage = MksckPageAlloc();
- if (!mksckPage)
- return -ENOMEM;
- spin_lock(&mksckPageListLock);
- vmId = GetNewGuestVmId();
- if (vmId == MKSCK_VMID_UNDEF) {
- err = -EMFILE;
- MksckPageRelease(mksckPage);
- pr_err("Mksck_WspInitialize: Cannot allocate vmId\n");
- } else {
- /*
- * Now that the mksckPage is all initialized, let others see it.
- */
- mksckPages[MKSCK_VMID2IDX(vmId)] = mksckPage;
- mksckPage->vmId = vmId;
- mksckPage->isGuest = true;
- mksckPage->vmHKVA = (HKVA)vm;
- /* mksckPage->tgid is undefined when isGuest is true */
- wsp->guestId = vmId;
- pr_warn("New guest mksck page is allocated: idx %x, vmId %x\n",
- MKSCK_VMID2IDX(vmId), vmId);
- err = 0;
- /*
- * All stable, ie, mksckPages[] written, ok to unlock now.
- */
- spin_unlock(&mksckPageListLock);
- }
- return err;
- }
- /**
- * @brief Release the wsp. Clean up after the monitor. Free the
- * associated communication page.
- * @param wsp which worldswitch page (VCPU)
- */
- void
- Mksck_WspRelease(WorldSwitchPage *wsp)
- {
- int ii;
- int err;
- MksckPage *mksckPage = MksckPage_GetFromVmId(wsp->guestId);
- /*
- * The worldswitch page for a particular VCPU is about to be freed
- * off, so we know the monitor will never execute again. But the
- * monitor most likely left some sockets open. Those may have
- * outbound connections to host sockets that we must close.
- *
- * Loop through all possibly open sockets.
- */
- uint32 isOpened = wsp->isOpened;
- Mksck *mksck = mksckPage->sockets;
- while (isOpened) {
- if (isOpened & 1) {
- ASSERT(ATOMIC_GETO(mksck->refCount) != 0);
- /*
- * The socket may be connected to a peer (host) socket,
- * so we have to decrement that target socket's
- * reference count.
- * Unfortunately, Mksck_DisconnectPeer(mksck) cannot
- * be called as mksck->peer is an mva not an hkva.
- * Translate the address first.
- */
- if (mksck->peer) {
- MksckPage *mksckPagePeer =
- MksckPage_GetFromVmId(mksck->peerAddr.vmId);
- ASSERT(mksckPagePeer);
- mksck->peer =
- MksckPage_GetFromAddr(mksckPagePeer,
- mksck->peerAddr);
- ASSERT(mksck->peer);
- /* mksck->peer is now a hkva */
- }
- Mksck_CloseCommon(mksck);
- }
- isOpened >>= 1;
- mksck++;
- }
- /*
- * A host socket may be in the process of sending to the guest. It
- * will attempt to wake up the guest using mksckPage->vmHKVA and
- * mksckPage->vmHKVA->wsp. To assure that the vm and wsp structures
- * are not disappearing from under the sending thread we lock the
- * page here.
- */
- err = Mutex_Lock(&mksckPage->mutex, MutexModeEX);
- ASSERT(!err);
- mksckPage->vmHKVA = 0;
- Mutex_Unlock(&mksckPage->mutex, MutexModeEX);
- /*
- * Decrement refcount set by MksckPageAlloc() call in
- * Mksck_WspInitialize().
- */
- MksckPage_DecRefc(mksckPage);
- /*
- * Decrement refcount set by VMM:Mksck_Init() referring to the local
- * variable guestMksckPage.
- */
- if (wsp->guestPageMapped) {
- wsp->guestPageMapped = false;
- MksckPage_DecRefc(mksckPage);
- }
- /*
- * Another task is to decrement the reference count on the mksck
- * pages the monitor accessed. Those pages are listed in the
- * wsp->isPageMapped list. They were locked by the monitor
- * calling WSCALL_GET_PAGE_FROM_VMID
- */
- for (ii = 0; ii < MKSCK_MAX_SHARES; ii++) {
- if (wsp->isPageMapped[ii]) {
- MksckPage *mksckPageOther = MksckPage_GetFromIdx(ii);
- wsp->isPageMapped[ii] = false;
- MksckPage_DecRefc(mksckPageOther);
- }
- }
- }
- /**
- * @brief disconnect from peer by decrementing
- * peer socket's reference count and clearing the pointer.
- * @param mksck local socket to check for connection
- */
- void
- Mksck_DisconnectPeer(Mksck *mksck)
- {
- Mksck *peerMksck = mksck->peer;
- if (peerMksck != NULL) {
- mksck->peer = NULL;
- mksck->peerAddr.addr = MKSCK_ADDR_UNDEF;
- Mksck_DecRefc(peerMksck);
- }
- }
- /**
- * @brief decrement shared page reference count, free page if it goes zero.
- * also do a dmb first to make sure all activity on the struct is
- * finished before decrementing the ref count.
- * @param mksckPage shared page
- */
- void
- MksckPage_DecRefc(struct MksckPage *mksckPage)
- {
- uint32 oldRefc;
- DMB();
- do {
- while ((oldRefc = ATOMIC_GETO(mksckPage->refCount)) == 1) {
- /*
- * Find corresponding entry in list of known shared
- * pages and clear it so we can't open any new sockets
- * on this shared page, thus preventing its refCount
- * from being incremented.
- */
- spin_lock(&mksckPageListLock);
- if (ATOMIC_SETIF(mksckPage->refCount, 0, 1)) {
- uint32 ii = MKSCK_VMID2IDX(mksckPage->vmId);
- ASSERT(ii < MKSCK_MAX_SHARES);
- ASSERT(mksckPages[ii] == mksckPage);
- mksckPages[ii] = NULL;
- spin_unlock(&mksckPageListLock);
- pr_warn("%s mksck page is released: idx %x, " \
- "vmId %x, tgid %d\n",
- mksckPage->isGuest ? "Guest" : "Host",
- ii, mksckPage->vmId, mksckPage->tgid);
- MksckPageRelease(mksckPage);
- return;
- }
- spin_unlock(&mksckPageListLock);
- }
- ASSERT(oldRefc != 0);
- } while (!ATOMIC_SETIF(mksckPage->refCount, oldRefc - 1, oldRefc));
- }
- /**
- * @brief Lookup if the provided mpn belongs to one of the Mksck pages.
- * Map if found.
- * @return 0 if all OK, error value otherwise
- */
- int
- MksckPage_LookupAndInsertPage(struct vm_area_struct *vma,
- unsigned long address,
- MPN mpn)
- {
- int ii, jj;
- struct MksckPage **mksckPagePtr = mksckPages;
- spin_lock(&mksckPageListLock);
- for (jj = MKSCK_MAX_SHARES; jj--; mksckPagePtr++) {
- if (*mksckPagePtr) {
- for (ii = 0; ii < MKSCKPAGE_TOTAL; ii++) {
- HKVA tmp = ((HKVA)*mksckPagePtr) +
- ii * PAGE_SIZE;
- if (vmalloc_to_pfn((void *)tmp) == mpn &&
- vm_insert_page(vma, address,
- pfn_to_page(mpn)) == 0) {
- spin_unlock(&mksckPageListLock);
- return 0;
- }
- }
- }
- }
- spin_unlock(&mksckPageListLock);
- return -1;
- }
- /**
- * @brief Print information on the allocated shared pages
- *
- * This function reports (among many other things) on the use of locks
- * on the mksck page (page lock and individual socket locks). To avoid
- * the Hiesenberg effect it avoids using locks unless there is a
- * danger of dereferencing freed memory. In particular, holding
- * mksckPageListLock ensures that the mksck page is not freed while it
- * is read. But under very rare conditions this function may report
- * inconsistent or garbage data.
- */
- static int
- MksckPageInfoShow(struct seq_file *m,
- void *private)
- {
- int ii, jj;
- uint32 isPageMapped = 0;
- int err;
- struct MvpkmVM *vm;
- /*
- * Lock is needed to atomize the test and dereference of mksckPages[ii].
- */
- spin_lock(&mksckPageListLock);
- for (ii = 0; ii < MKSCK_MAX_SHARES; ii++) {
- struct MksckPage *mksckPage = mksckPages[ii];
- if (mksckPage != NULL && mksckPage->isGuest) {
- /*
- * After the refcount is incremented mksckPage will
- * not be freed and it can continued to be dereferenced
- * after the unlock of mksckPageListLock.
- */
- ATOMIC_ADDV(mksckPage->refCount, 1);
- spin_unlock(&mksckPageListLock);
- /*
- * Need the page lock to dereference mksckPage->vmHKVA.
- */
- err = Mutex_Lock(&mksckPage->mutex, MutexModeEX);
- vm = (struct MvpkmVM *)mksckPage->vmHKVA;
- if (err == 0 && vm && vm->wsp) {
- for (jj = 0; jj < MKSCK_MAX_SHARES; jj++) {
- if (vm->wsp->isPageMapped[jj])
- isPageMapped |= 1<<jj;
- }
- }
- Mutex_Unlock(&mksckPage->mutex, MutexModeEX);
- /*
- * Decrement the page refcount and relock the
- * mksckPageListLock for the next for loop.
- */
- MksckPage_DecRefc(mksckPage);
- spin_lock(&mksckPageListLock);
- break;
- }
- }
- /*
- * mksckPageListLock is still locked, mksckPages[ii] can be
- * dereferenced
- */
- for (ii = 0; ii < MKSCK_MAX_SHARES; ii++) {
- struct MksckPage *mksckPage = mksckPages[ii];
- if (mksckPage != NULL) {
- uint32 lState = ATOMIC_GETO(mksckPage->mutex.state);
- uint32 isOpened = 0; /* Guest has an implicit ref. */
- seq_printf(m, "MksckPage[%02d]: { vmId = %4x(%c), " \
- "refC = %2d%s", ii, mksckPage->vmId,
- mksckPage->isGuest ? 'G' : 'H',
- ATOMIC_GETO(mksckPage->refCount),
- (isPageMapped&(1<<ii) ? "*" : ""));
- if (lState)
- seq_printf(m, ", lock=%x locked by line %d, " \
- "unlocked by %d",
- lState, mksckPage->mutex.line,
- mksckPage->mutex.lineUnl);
- if (!mksckPage->isGuest) {
- struct task_struct *target;
- seq_printf(m, ", tgid = %d", mksckPage->tgid);
- rcu_read_lock();
- target = pid_task(find_vpid(mksckPage->tgid),
- PIDTYPE_PID);
- seq_printf(m, "(%s)",
- (target ? target->comm :
- "no such process"));
- rcu_read_unlock();
- } else {
- ATOMIC_ADDV(mksckPage->refCount, 1);
- spin_unlock(&mksckPageListLock);
- err = Mutex_Lock(&mksckPage->mutex,
- MutexModeEX);
- vm = (struct MvpkmVM *)mksckPage->vmHKVA;
- if (err == 0 && vm && vm->wsp)
- isOpened = vm->wsp->isOpened;
- Mutex_Unlock(&mksckPage->mutex, MutexModeEX);
- MksckPage_DecRefc(mksckPage);
- spin_lock(&mksckPageListLock);
- /*
- * As the mksckPageListLock was unlocked,
- * nothing prevented the MksckPage_DecRefc from
- * actually freeing the page. Lets verify that
- * the page is still there.
- */
- if (mksckPage != mksckPages[ii]) {
- seq_puts(m, " released }\n");
- continue;
- }
- }
- seq_puts(m, ", sockets[] = {");
- for (jj = 0;
- jj < mksckPage->numAllocSocks;
- jj++, isOpened >>= 1) {
- Mksck *mksck = mksckPage->sockets + jj;
- if (ATOMIC_GETO(mksck->refCount)) {
- uint32 blocked;
- char *shutdRO =
- (mksck->shutDown & MKSCK_SHUT_RD ?
- " SHUTD_RD" : "");
- char *shutdRW =
- (mksck->shutDown & MKSCK_SHUT_WR ?
- " SHUTD_WR" : "");
- lState =
- ATOMIC_GETO(mksck->mutex.state);
- seq_printf(m, "\n " \
- "{ addr = %8x, " \
- "refC = %2d%s%s%s",
- mksck->addr.addr,
- ATOMIC_GETO(mksck->refCount),
- (isOpened & 1 ? "*" : ""),
- shutdRO,
- shutdRW);
- if (mksck->peer)
- seq_printf(m,
- ", peerAddr = %8x",
- mksck->peerAddr.addr);
- if (lState)
- seq_printf(m,
- ", lock=%x locked " \
- "by line %d, " \
- "unlocked by %d",
- lState,
- mksck->mutex.line,
- mksck->mutex.lineUnl);
- blocked =
- ATOMIC_GETO(mksck->mutex.blocked);
- if (blocked)
- seq_printf(m, ", blocked=%d",
- blocked);
- seq_puts(m, " }");
- }
- }
- seq_puts(m, " } }\n");
- }
- }
- spin_unlock(&mksckPageListLock);
- return 0;
- }
- static int
- MksckPageInfoOpen(struct inode *inode,
- struct file *file)
- {
- return single_open(file, MksckPageInfoShow, inode->i_private);
- }
- static const struct file_operations mksckPageInfoFops = {
- .open = MksckPageInfoOpen,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
- };
- void
- MksckPageInfo_Init(struct dentry *parent)
- {
- debugfs_create_file("mksckPage", S_IROTH, parent,
- NULL, &mksckPageInfoFops);
- }
|