hyperv_transport.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897
  1. /*
  2. * Hyper-V transport for vsock
  3. *
  4. * Hyper-V Sockets supplies a byte-stream based communication mechanism
  5. * between the host and the VM. This driver implements the necessary
  6. * support in the VM by introducing the new vsock transport.
  7. *
  8. * Copyright (c) 2017, Microsoft Corporation.
  9. *
  10. * This program is free software; you can redistribute it and/or modify it
  11. * under the terms and conditions of the GNU General Public License,
  12. * version 2, as published by the Free Software Foundation.
  13. *
  14. * This program is distributed in the hope it will be useful, but WITHOUT
  15. * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  16. * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  17. * more details.
  18. *
  19. */
  20. #include <linux/module.h>
  21. #include <linux/vmalloc.h>
  22. #include <linux/hyperv.h>
  23. #include <net/sock.h>
  24. #include <net/af_vsock.h>
  25. /* The host side's design of the feature requires 6 exact 4KB pages for
  26. * recv/send rings respectively -- this is suboptimal considering memory
  27. * consumption, however unluckily we have to live with it, before the
  28. * host comes up with a better design in the future.
  29. */
  30. #define PAGE_SIZE_4K 4096
  31. #define RINGBUFFER_HVS_RCV_SIZE (PAGE_SIZE_4K * 6)
  32. #define RINGBUFFER_HVS_SND_SIZE (PAGE_SIZE_4K * 6)
  33. /* The MTU is 16KB per the host side's design */
  34. #define HVS_MTU_SIZE (1024 * 16)
  35. /* How long to wait for graceful shutdown of a connection */
  36. #define HVS_CLOSE_TIMEOUT (8 * HZ)
  37. struct vmpipe_proto_header {
  38. u32 pkt_type;
  39. u32 data_size;
  40. };
  41. /* For recv, we use the VMBus in-place packet iterator APIs to directly copy
  42. * data from the ringbuffer into the userspace buffer.
  43. */
  44. struct hvs_recv_buf {
  45. /* The header before the payload data */
  46. struct vmpipe_proto_header hdr;
  47. /* The payload */
  48. u8 data[HVS_MTU_SIZE];
  49. };
  50. /* We can send up to HVS_MTU_SIZE bytes of payload to the host, but let's use
  51. * a small size, i.e. HVS_SEND_BUF_SIZE, to minimize the dynamically-allocated
  52. * buffer, because tests show there is no significant performance difference.
  53. *
  54. * Note: the buffer can be eliminated in the future when we add new VMBus
  55. * ringbuffer APIs that allow us to directly copy data from userspace buffer
  56. * to VMBus ringbuffer.
  57. */
  58. #define HVS_SEND_BUF_SIZE (PAGE_SIZE_4K - sizeof(struct vmpipe_proto_header))
  59. struct hvs_send_buf {
  60. /* The header before the payload data */
  61. struct vmpipe_proto_header hdr;
  62. /* The payload */
  63. u8 data[HVS_SEND_BUF_SIZE];
  64. };
  65. #define HVS_HEADER_LEN (sizeof(struct vmpacket_descriptor) + \
  66. sizeof(struct vmpipe_proto_header))
  67. /* See 'prev_indices' in hv_ringbuffer_read(), hv_ringbuffer_write(), and
  68. * __hv_pkt_iter_next().
  69. */
  70. #define VMBUS_PKT_TRAILER_SIZE (sizeof(u64))
  71. #define HVS_PKT_LEN(payload_len) (HVS_HEADER_LEN + \
  72. ALIGN((payload_len), 8) + \
  73. VMBUS_PKT_TRAILER_SIZE)
  74. union hvs_service_id {
  75. uuid_le srv_id;
  76. struct {
  77. unsigned int svm_port;
  78. unsigned char b[sizeof(uuid_le) - sizeof(unsigned int)];
  79. };
  80. };
  81. /* Per-socket state (accessed via vsk->trans) */
  82. struct hvsock {
  83. struct vsock_sock *vsk;
  84. uuid_le vm_srv_id;
  85. uuid_le host_srv_id;
  86. struct vmbus_channel *chan;
  87. struct vmpacket_descriptor *recv_desc;
  88. /* The length of the payload not delivered to userland yet */
  89. u32 recv_data_len;
  90. /* The offset of the payload */
  91. u32 recv_data_off;
  92. /* Have we sent the zero-length packet (FIN)? */
  93. bool fin_sent;
  94. };
  95. /* In the VM, we support Hyper-V Sockets with AF_VSOCK, and the endpoint is
  96. * <cid, port> (see struct sockaddr_vm). Note: cid is not really used here:
  97. * when we write apps to connect to the host, we can only use VMADDR_CID_ANY
  98. * or VMADDR_CID_HOST (both are equivalent) as the remote cid, and when we
  99. * write apps to bind() & listen() in the VM, we can only use VMADDR_CID_ANY
  100. * as the local cid.
  101. *
  102. * On the host, Hyper-V Sockets are supported by Winsock AF_HYPERV:
  103. * https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/user-
  104. * guide/make-integration-service, and the endpoint is <VmID, ServiceId> with
  105. * the below sockaddr:
  106. *
  107. * struct SOCKADDR_HV
  108. * {
  109. * ADDRESS_FAMILY Family;
  110. * USHORT Reserved;
  111. * GUID VmId;
  112. * GUID ServiceId;
  113. * };
  114. * Note: VmID is not used by Linux VM and actually it isn't transmitted via
  115. * VMBus, because here it's obvious the host and the VM can easily identify
  116. * each other. Though the VmID is useful on the host, especially in the case
  117. * of Windows container, Linux VM doesn't need it at all.
  118. *
  119. * To make use of the AF_VSOCK infrastructure in Linux VM, we have to limit
  120. * the available GUID space of SOCKADDR_HV so that we can create a mapping
  121. * between AF_VSOCK port and SOCKADDR_HV Service GUID. The rule of writing
  122. * Hyper-V Sockets apps on the host and in Linux VM is:
  123. *
  124. ****************************************************************************
  125. * The only valid Service GUIDs, from the perspectives of both the host and *
  126. * Linux VM, that can be connected by the other end, must conform to this *
  127. * format: <port>-facb-11e6-bd58-64006a7986d3. *
  128. ****************************************************************************
  129. *
  130. * When we write apps on the host to connect(), the GUID ServiceID is used.
  131. * When we write apps in Linux VM to connect(), we only need to specify the
  132. * port and the driver will form the GUID and use that to request the host.
  133. *
  134. */
  135. /* 00000000-facb-11e6-bd58-64006a7986d3 */
  136. static const uuid_le srv_id_template =
  137. UUID_LE(0x00000000, 0xfacb, 0x11e6, 0xbd, 0x58,
  138. 0x64, 0x00, 0x6a, 0x79, 0x86, 0xd3);
  139. static bool is_valid_srv_id(const uuid_le *id)
  140. {
  141. return !memcmp(&id->b[4], &srv_id_template.b[4], sizeof(uuid_le) - 4);
  142. }
  143. static unsigned int get_port_by_srv_id(const uuid_le *svr_id)
  144. {
  145. return *((unsigned int *)svr_id);
  146. }
  147. static void hvs_addr_init(struct sockaddr_vm *addr, const uuid_le *svr_id)
  148. {
  149. unsigned int port = get_port_by_srv_id(svr_id);
  150. vsock_addr_init(addr, VMADDR_CID_ANY, port);
  151. }
  152. static void hvs_set_channel_pending_send_size(struct vmbus_channel *chan)
  153. {
  154. set_channel_pending_send_size(chan,
  155. HVS_PKT_LEN(HVS_SEND_BUF_SIZE));
  156. virt_mb();
  157. }
  158. static bool hvs_channel_readable(struct vmbus_channel *chan)
  159. {
  160. u32 readable = hv_get_bytes_to_read(&chan->inbound);
  161. /* 0-size payload means FIN */
  162. return readable >= HVS_PKT_LEN(0);
  163. }
  164. static int hvs_channel_readable_payload(struct vmbus_channel *chan)
  165. {
  166. u32 readable = hv_get_bytes_to_read(&chan->inbound);
  167. if (readable > HVS_PKT_LEN(0)) {
  168. /* At least we have 1 byte to read. We don't need to return
  169. * the exact readable bytes: see vsock_stream_recvmsg() ->
  170. * vsock_stream_has_data().
  171. */
  172. return 1;
  173. }
  174. if (readable == HVS_PKT_LEN(0)) {
  175. /* 0-size payload means FIN */
  176. return 0;
  177. }
  178. /* No payload or FIN */
  179. return -1;
  180. }
  181. static size_t hvs_channel_writable_bytes(struct vmbus_channel *chan)
  182. {
  183. u32 writeable = hv_get_bytes_to_write(&chan->outbound);
  184. size_t ret;
  185. /* The ringbuffer mustn't be 100% full, and we should reserve a
  186. * zero-length-payload packet for the FIN: see hv_ringbuffer_write()
  187. * and hvs_shutdown().
  188. */
  189. if (writeable <= HVS_PKT_LEN(1) + HVS_PKT_LEN(0))
  190. return 0;
  191. ret = writeable - HVS_PKT_LEN(1) - HVS_PKT_LEN(0);
  192. return round_down(ret, 8);
  193. }
  194. static int hvs_send_data(struct vmbus_channel *chan,
  195. struct hvs_send_buf *send_buf, size_t to_write)
  196. {
  197. send_buf->hdr.pkt_type = 1;
  198. send_buf->hdr.data_size = to_write;
  199. return vmbus_sendpacket(chan, &send_buf->hdr,
  200. sizeof(send_buf->hdr) + to_write,
  201. 0, VM_PKT_DATA_INBAND, 0);
  202. }
  203. static void hvs_channel_cb(void *ctx)
  204. {
  205. struct sock *sk = (struct sock *)ctx;
  206. struct vsock_sock *vsk = vsock_sk(sk);
  207. struct hvsock *hvs = vsk->trans;
  208. struct vmbus_channel *chan = hvs->chan;
  209. if (hvs_channel_readable(chan))
  210. sk->sk_data_ready(sk);
  211. if (hv_get_bytes_to_write(&chan->outbound) > 0)
  212. sk->sk_write_space(sk);
  213. }
  214. static void hvs_do_close_lock_held(struct vsock_sock *vsk,
  215. bool cancel_timeout)
  216. {
  217. struct sock *sk = sk_vsock(vsk);
  218. sock_set_flag(sk, SOCK_DONE);
  219. vsk->peer_shutdown = SHUTDOWN_MASK;
  220. if (vsock_stream_has_data(vsk) <= 0)
  221. sk->sk_state = TCP_CLOSING;
  222. sk->sk_state_change(sk);
  223. if (vsk->close_work_scheduled &&
  224. (!cancel_timeout || cancel_delayed_work(&vsk->close_work))) {
  225. vsk->close_work_scheduled = false;
  226. vsock_remove_sock(vsk);
  227. /* Release the reference taken while scheduling the timeout */
  228. sock_put(sk);
  229. }
  230. }
  231. static void hvs_close_connection(struct vmbus_channel *chan)
  232. {
  233. struct sock *sk = get_per_channel_state(chan);
  234. lock_sock(sk);
  235. hvs_do_close_lock_held(vsock_sk(sk), true);
  236. release_sock(sk);
  237. /* Release the refcnt for the channel that's opened in
  238. * hvs_open_connection().
  239. */
  240. sock_put(sk);
  241. }
  242. static void hvs_open_connection(struct vmbus_channel *chan)
  243. {
  244. uuid_le *if_instance, *if_type;
  245. unsigned char conn_from_host;
  246. struct sockaddr_vm addr;
  247. struct sock *sk, *new = NULL;
  248. struct vsock_sock *vnew = NULL;
  249. struct hvsock *hvs = NULL;
  250. struct hvsock *hvs_new = NULL;
  251. int ret;
  252. if_type = &chan->offermsg.offer.if_type;
  253. if_instance = &chan->offermsg.offer.if_instance;
  254. conn_from_host = chan->offermsg.offer.u.pipe.user_def[0];
  255. if (!is_valid_srv_id(if_type))
  256. return;
  257. hvs_addr_init(&addr, conn_from_host ? if_type : if_instance);
  258. sk = vsock_find_bound_socket(&addr);
  259. if (!sk)
  260. return;
  261. lock_sock(sk);
  262. if ((conn_from_host && sk->sk_state != TCP_LISTEN) ||
  263. (!conn_from_host && sk->sk_state != TCP_SYN_SENT))
  264. goto out;
  265. if (conn_from_host) {
  266. if (sk->sk_ack_backlog >= sk->sk_max_ack_backlog)
  267. goto out;
  268. new = __vsock_create(sock_net(sk), NULL, sk, GFP_KERNEL,
  269. sk->sk_type, 0);
  270. if (!new)
  271. goto out;
  272. new->sk_state = TCP_SYN_SENT;
  273. vnew = vsock_sk(new);
  274. hvs_addr_init(&vnew->local_addr, if_type);
  275. /* Remote peer is always the host */
  276. vsock_addr_init(&vnew->remote_addr,
  277. VMADDR_CID_HOST, VMADDR_PORT_ANY);
  278. vnew->remote_addr.svm_port = get_port_by_srv_id(if_instance);
  279. hvs_new = vnew->trans;
  280. hvs_new->chan = chan;
  281. } else {
  282. hvs = vsock_sk(sk)->trans;
  283. hvs->chan = chan;
  284. }
  285. set_channel_read_mode(chan, HV_CALL_DIRECT);
  286. ret = vmbus_open(chan, RINGBUFFER_HVS_SND_SIZE,
  287. RINGBUFFER_HVS_RCV_SIZE, NULL, 0,
  288. hvs_channel_cb, conn_from_host ? new : sk);
  289. if (ret != 0) {
  290. if (conn_from_host) {
  291. hvs_new->chan = NULL;
  292. sock_put(new);
  293. } else {
  294. hvs->chan = NULL;
  295. }
  296. goto out;
  297. }
  298. set_per_channel_state(chan, conn_from_host ? new : sk);
  299. /* This reference will be dropped by hvs_close_connection(). */
  300. sock_hold(conn_from_host ? new : sk);
  301. vmbus_set_chn_rescind_callback(chan, hvs_close_connection);
  302. /* Set the pending send size to max packet size to always get
  303. * notifications from the host when there is enough writable space.
  304. * The host is optimized to send notifications only when the pending
  305. * size boundary is crossed, and not always.
  306. */
  307. hvs_set_channel_pending_send_size(chan);
  308. if (conn_from_host) {
  309. new->sk_state = TCP_ESTABLISHED;
  310. sk->sk_ack_backlog++;
  311. hvs_addr_init(&vnew->local_addr, if_type);
  312. hvs_new->vm_srv_id = *if_type;
  313. hvs_new->host_srv_id = *if_instance;
  314. vsock_insert_connected(vnew);
  315. vsock_enqueue_accept(sk, new);
  316. } else {
  317. sk->sk_state = TCP_ESTABLISHED;
  318. sk->sk_socket->state = SS_CONNECTED;
  319. vsock_insert_connected(vsock_sk(sk));
  320. }
  321. sk->sk_state_change(sk);
  322. out:
  323. /* Release refcnt obtained when we called vsock_find_bound_socket() */
  324. sock_put(sk);
  325. release_sock(sk);
  326. }
  327. static u32 hvs_get_local_cid(void)
  328. {
  329. return VMADDR_CID_ANY;
  330. }
  331. static int hvs_sock_init(struct vsock_sock *vsk, struct vsock_sock *psk)
  332. {
  333. struct hvsock *hvs;
  334. hvs = kzalloc(sizeof(*hvs), GFP_KERNEL);
  335. if (!hvs)
  336. return -ENOMEM;
  337. vsk->trans = hvs;
  338. hvs->vsk = vsk;
  339. return 0;
  340. }
  341. static int hvs_connect(struct vsock_sock *vsk)
  342. {
  343. union hvs_service_id vm, host;
  344. struct hvsock *h = vsk->trans;
  345. vm.srv_id = srv_id_template;
  346. vm.svm_port = vsk->local_addr.svm_port;
  347. h->vm_srv_id = vm.srv_id;
  348. host.srv_id = srv_id_template;
  349. host.svm_port = vsk->remote_addr.svm_port;
  350. h->host_srv_id = host.srv_id;
  351. return vmbus_send_tl_connect_request(&h->vm_srv_id, &h->host_srv_id);
  352. }
  353. static void hvs_shutdown_lock_held(struct hvsock *hvs, int mode)
  354. {
  355. struct vmpipe_proto_header hdr;
  356. if (hvs->fin_sent || !hvs->chan)
  357. return;
  358. /* It can't fail: see hvs_channel_writable_bytes(). */
  359. (void)hvs_send_data(hvs->chan, (struct hvs_send_buf *)&hdr, 0);
  360. hvs->fin_sent = true;
  361. }
  362. static int hvs_shutdown(struct vsock_sock *vsk, int mode)
  363. {
  364. if (!(mode & SEND_SHUTDOWN))
  365. return 0;
  366. hvs_shutdown_lock_held(vsk->trans, mode);
  367. return 0;
  368. }
  369. static void hvs_close_timeout(struct work_struct *work)
  370. {
  371. struct vsock_sock *vsk =
  372. container_of(work, struct vsock_sock, close_work.work);
  373. struct sock *sk = sk_vsock(vsk);
  374. sock_hold(sk);
  375. lock_sock(sk);
  376. if (!sock_flag(sk, SOCK_DONE))
  377. hvs_do_close_lock_held(vsk, false);
  378. vsk->close_work_scheduled = false;
  379. release_sock(sk);
  380. sock_put(sk);
  381. }
  382. /* Returns true, if it is safe to remove socket; false otherwise */
  383. static bool hvs_close_lock_held(struct vsock_sock *vsk)
  384. {
  385. struct sock *sk = sk_vsock(vsk);
  386. if (!(sk->sk_state == TCP_ESTABLISHED ||
  387. sk->sk_state == TCP_CLOSING))
  388. return true;
  389. if ((sk->sk_shutdown & SHUTDOWN_MASK) != SHUTDOWN_MASK)
  390. hvs_shutdown_lock_held(vsk->trans, SHUTDOWN_MASK);
  391. if (sock_flag(sk, SOCK_DONE))
  392. return true;
  393. /* This reference will be dropped by the delayed close routine */
  394. sock_hold(sk);
  395. INIT_DELAYED_WORK(&vsk->close_work, hvs_close_timeout);
  396. vsk->close_work_scheduled = true;
  397. schedule_delayed_work(&vsk->close_work, HVS_CLOSE_TIMEOUT);
  398. return false;
  399. }
  400. static void hvs_release(struct vsock_sock *vsk)
  401. {
  402. struct sock *sk = sk_vsock(vsk);
  403. bool remove_sock;
  404. lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
  405. remove_sock = hvs_close_lock_held(vsk);
  406. release_sock(sk);
  407. if (remove_sock)
  408. vsock_remove_sock(vsk);
  409. }
  410. static void hvs_destruct(struct vsock_sock *vsk)
  411. {
  412. struct hvsock *hvs = vsk->trans;
  413. struct vmbus_channel *chan = hvs->chan;
  414. if (chan)
  415. vmbus_hvsock_device_unregister(chan);
  416. kfree(hvs);
  417. }
  418. static int hvs_dgram_bind(struct vsock_sock *vsk, struct sockaddr_vm *addr)
  419. {
  420. return -EOPNOTSUPP;
  421. }
  422. static int hvs_dgram_dequeue(struct vsock_sock *vsk, struct msghdr *msg,
  423. size_t len, int flags)
  424. {
  425. return -EOPNOTSUPP;
  426. }
  427. static int hvs_dgram_enqueue(struct vsock_sock *vsk,
  428. struct sockaddr_vm *remote, struct msghdr *msg,
  429. size_t dgram_len)
  430. {
  431. return -EOPNOTSUPP;
  432. }
  433. static bool hvs_dgram_allow(u32 cid, u32 port)
  434. {
  435. return false;
  436. }
  437. static int hvs_update_recv_data(struct hvsock *hvs)
  438. {
  439. struct hvs_recv_buf *recv_buf;
  440. u32 payload_len;
  441. recv_buf = (struct hvs_recv_buf *)(hvs->recv_desc + 1);
  442. payload_len = recv_buf->hdr.data_size;
  443. if (payload_len > HVS_MTU_SIZE)
  444. return -EIO;
  445. if (payload_len == 0)
  446. hvs->vsk->peer_shutdown |= SEND_SHUTDOWN;
  447. hvs->recv_data_len = payload_len;
  448. hvs->recv_data_off = 0;
  449. return 0;
  450. }
  451. static ssize_t hvs_stream_dequeue(struct vsock_sock *vsk, struct msghdr *msg,
  452. size_t len, int flags)
  453. {
  454. struct hvsock *hvs = vsk->trans;
  455. bool need_refill = !hvs->recv_desc;
  456. struct hvs_recv_buf *recv_buf;
  457. u32 to_read;
  458. int ret;
  459. if (flags & MSG_PEEK)
  460. return -EOPNOTSUPP;
  461. if (need_refill) {
  462. hvs->recv_desc = hv_pkt_iter_first(hvs->chan);
  463. ret = hvs_update_recv_data(hvs);
  464. if (ret)
  465. return ret;
  466. }
  467. recv_buf = (struct hvs_recv_buf *)(hvs->recv_desc + 1);
  468. to_read = min_t(u32, len, hvs->recv_data_len);
  469. ret = memcpy_to_msg(msg, recv_buf->data + hvs->recv_data_off, to_read);
  470. if (ret != 0)
  471. return ret;
  472. hvs->recv_data_len -= to_read;
  473. if (hvs->recv_data_len == 0) {
  474. hvs->recv_desc = hv_pkt_iter_next(hvs->chan, hvs->recv_desc);
  475. if (hvs->recv_desc) {
  476. ret = hvs_update_recv_data(hvs);
  477. if (ret)
  478. return ret;
  479. }
  480. } else {
  481. hvs->recv_data_off += to_read;
  482. }
  483. return to_read;
  484. }
  485. static ssize_t hvs_stream_enqueue(struct vsock_sock *vsk, struct msghdr *msg,
  486. size_t len)
  487. {
  488. struct hvsock *hvs = vsk->trans;
  489. struct vmbus_channel *chan = hvs->chan;
  490. struct hvs_send_buf *send_buf;
  491. ssize_t to_write, max_writable, ret;
  492. BUILD_BUG_ON(sizeof(*send_buf) != PAGE_SIZE_4K);
  493. send_buf = kmalloc(sizeof(*send_buf), GFP_KERNEL);
  494. if (!send_buf)
  495. return -ENOMEM;
  496. max_writable = hvs_channel_writable_bytes(chan);
  497. to_write = min_t(ssize_t, len, max_writable);
  498. to_write = min_t(ssize_t, to_write, HVS_SEND_BUF_SIZE);
  499. ret = memcpy_from_msg(send_buf->data, msg, to_write);
  500. if (ret < 0)
  501. goto out;
  502. ret = hvs_send_data(hvs->chan, send_buf, to_write);
  503. if (ret < 0)
  504. goto out;
  505. ret = to_write;
  506. out:
  507. kfree(send_buf);
  508. return ret;
  509. }
  510. static s64 hvs_stream_has_data(struct vsock_sock *vsk)
  511. {
  512. struct hvsock *hvs = vsk->trans;
  513. s64 ret;
  514. if (hvs->recv_data_len > 0)
  515. return 1;
  516. switch (hvs_channel_readable_payload(hvs->chan)) {
  517. case 1:
  518. ret = 1;
  519. break;
  520. case 0:
  521. vsk->peer_shutdown |= SEND_SHUTDOWN;
  522. ret = 0;
  523. break;
  524. default: /* -1 */
  525. ret = 0;
  526. break;
  527. }
  528. return ret;
  529. }
  530. static s64 hvs_stream_has_space(struct vsock_sock *vsk)
  531. {
  532. struct hvsock *hvs = vsk->trans;
  533. return hvs_channel_writable_bytes(hvs->chan);
  534. }
  535. static u64 hvs_stream_rcvhiwat(struct vsock_sock *vsk)
  536. {
  537. return HVS_MTU_SIZE + 1;
  538. }
  539. static bool hvs_stream_is_active(struct vsock_sock *vsk)
  540. {
  541. struct hvsock *hvs = vsk->trans;
  542. return hvs->chan != NULL;
  543. }
  544. static bool hvs_stream_allow(u32 cid, u32 port)
  545. {
  546. if (cid == VMADDR_CID_HOST)
  547. return true;
  548. return false;
  549. }
  550. static
  551. int hvs_notify_poll_in(struct vsock_sock *vsk, size_t target, bool *readable)
  552. {
  553. struct hvsock *hvs = vsk->trans;
  554. *readable = hvs_channel_readable(hvs->chan);
  555. return 0;
  556. }
  557. static
  558. int hvs_notify_poll_out(struct vsock_sock *vsk, size_t target, bool *writable)
  559. {
  560. *writable = hvs_stream_has_space(vsk) > 0;
  561. return 0;
  562. }
  563. static
  564. int hvs_notify_recv_init(struct vsock_sock *vsk, size_t target,
  565. struct vsock_transport_recv_notify_data *d)
  566. {
  567. return 0;
  568. }
  569. static
  570. int hvs_notify_recv_pre_block(struct vsock_sock *vsk, size_t target,
  571. struct vsock_transport_recv_notify_data *d)
  572. {
  573. return 0;
  574. }
  575. static
  576. int hvs_notify_recv_pre_dequeue(struct vsock_sock *vsk, size_t target,
  577. struct vsock_transport_recv_notify_data *d)
  578. {
  579. return 0;
  580. }
  581. static
  582. int hvs_notify_recv_post_dequeue(struct vsock_sock *vsk, size_t target,
  583. ssize_t copied, bool data_read,
  584. struct vsock_transport_recv_notify_data *d)
  585. {
  586. return 0;
  587. }
  588. static
  589. int hvs_notify_send_init(struct vsock_sock *vsk,
  590. struct vsock_transport_send_notify_data *d)
  591. {
  592. return 0;
  593. }
  594. static
  595. int hvs_notify_send_pre_block(struct vsock_sock *vsk,
  596. struct vsock_transport_send_notify_data *d)
  597. {
  598. return 0;
  599. }
  600. static
  601. int hvs_notify_send_pre_enqueue(struct vsock_sock *vsk,
  602. struct vsock_transport_send_notify_data *d)
  603. {
  604. return 0;
  605. }
  606. static
  607. int hvs_notify_send_post_enqueue(struct vsock_sock *vsk, ssize_t written,
  608. struct vsock_transport_send_notify_data *d)
  609. {
  610. return 0;
  611. }
  612. static void hvs_set_buffer_size(struct vsock_sock *vsk, u64 val)
  613. {
  614. /* Ignored. */
  615. }
  616. static void hvs_set_min_buffer_size(struct vsock_sock *vsk, u64 val)
  617. {
  618. /* Ignored. */
  619. }
  620. static void hvs_set_max_buffer_size(struct vsock_sock *vsk, u64 val)
  621. {
  622. /* Ignored. */
  623. }
  624. static u64 hvs_get_buffer_size(struct vsock_sock *vsk)
  625. {
  626. return -ENOPROTOOPT;
  627. }
  628. static u64 hvs_get_min_buffer_size(struct vsock_sock *vsk)
  629. {
  630. return -ENOPROTOOPT;
  631. }
  632. static u64 hvs_get_max_buffer_size(struct vsock_sock *vsk)
  633. {
  634. return -ENOPROTOOPT;
  635. }
  636. static struct vsock_transport hvs_transport = {
  637. .get_local_cid = hvs_get_local_cid,
  638. .init = hvs_sock_init,
  639. .destruct = hvs_destruct,
  640. .release = hvs_release,
  641. .connect = hvs_connect,
  642. .shutdown = hvs_shutdown,
  643. .dgram_bind = hvs_dgram_bind,
  644. .dgram_dequeue = hvs_dgram_dequeue,
  645. .dgram_enqueue = hvs_dgram_enqueue,
  646. .dgram_allow = hvs_dgram_allow,
  647. .stream_dequeue = hvs_stream_dequeue,
  648. .stream_enqueue = hvs_stream_enqueue,
  649. .stream_has_data = hvs_stream_has_data,
  650. .stream_has_space = hvs_stream_has_space,
  651. .stream_rcvhiwat = hvs_stream_rcvhiwat,
  652. .stream_is_active = hvs_stream_is_active,
  653. .stream_allow = hvs_stream_allow,
  654. .notify_poll_in = hvs_notify_poll_in,
  655. .notify_poll_out = hvs_notify_poll_out,
  656. .notify_recv_init = hvs_notify_recv_init,
  657. .notify_recv_pre_block = hvs_notify_recv_pre_block,
  658. .notify_recv_pre_dequeue = hvs_notify_recv_pre_dequeue,
  659. .notify_recv_post_dequeue = hvs_notify_recv_post_dequeue,
  660. .notify_send_init = hvs_notify_send_init,
  661. .notify_send_pre_block = hvs_notify_send_pre_block,
  662. .notify_send_pre_enqueue = hvs_notify_send_pre_enqueue,
  663. .notify_send_post_enqueue = hvs_notify_send_post_enqueue,
  664. .set_buffer_size = hvs_set_buffer_size,
  665. .set_min_buffer_size = hvs_set_min_buffer_size,
  666. .set_max_buffer_size = hvs_set_max_buffer_size,
  667. .get_buffer_size = hvs_get_buffer_size,
  668. .get_min_buffer_size = hvs_get_min_buffer_size,
  669. .get_max_buffer_size = hvs_get_max_buffer_size,
  670. };
  671. static int hvs_probe(struct hv_device *hdev,
  672. const struct hv_vmbus_device_id *dev_id)
  673. {
  674. struct vmbus_channel *chan = hdev->channel;
  675. hvs_open_connection(chan);
  676. /* Always return success to suppress the unnecessary error message
  677. * in vmbus_probe(): on error the host will rescind the device in
  678. * 30 seconds and we can do cleanup at that time in
  679. * vmbus_onoffer_rescind().
  680. */
  681. return 0;
  682. }
  683. static int hvs_remove(struct hv_device *hdev)
  684. {
  685. struct vmbus_channel *chan = hdev->channel;
  686. vmbus_close(chan);
  687. return 0;
  688. }
  689. /* This isn't really used. See vmbus_match() and vmbus_probe() */
  690. static const struct hv_vmbus_device_id id_table[] = {
  691. {},
  692. };
  693. static struct hv_driver hvs_drv = {
  694. .name = "hv_sock",
  695. .hvsock = true,
  696. .id_table = id_table,
  697. .probe = hvs_probe,
  698. .remove = hvs_remove,
  699. };
  700. static int __init hvs_init(void)
  701. {
  702. int ret;
  703. if (vmbus_proto_version < VERSION_WIN10)
  704. return -ENODEV;
  705. ret = vmbus_driver_register(&hvs_drv);
  706. if (ret != 0)
  707. return ret;
  708. ret = vsock_core_init(&hvs_transport);
  709. if (ret) {
  710. vmbus_driver_unregister(&hvs_drv);
  711. return ret;
  712. }
  713. return 0;
  714. }
  715. static void __exit hvs_exit(void)
  716. {
  717. vsock_core_exit();
  718. vmbus_driver_unregister(&hvs_drv);
  719. }
  720. module_init(hvs_init);
  721. module_exit(hvs_exit);
  722. MODULE_DESCRIPTION("Hyper-V Sockets");
  723. MODULE_VERSION("1.0.0");
  724. MODULE_LICENSE("GPL");
  725. MODULE_ALIAS_NETPROTO(PF_VSOCK);