summaryrefslogtreecommitdiffstats
path: root/net/vmw_vsock
diff options
context:
space:
mode:
Diffstat (limited to 'net/vmw_vsock')
-rw-r--r--net/vmw_vsock/af_vsock.c38
-rw-r--r--net/vmw_vsock/hyperv_transport.c93
-rw-r--r--net/vmw_vsock/virtio_transport.c134
3 files changed, 181 insertions, 84 deletions
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 169112f8aa1e..ab47bf3ab66e 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -274,7 +274,8 @@ EXPORT_SYMBOL_GPL(vsock_insert_connected);
void vsock_remove_bound(struct vsock_sock *vsk)
{
spin_lock_bh(&vsock_table_lock);
- __vsock_remove_bound(vsk);
+ if (__vsock_in_bound_table(vsk))
+ __vsock_remove_bound(vsk);
spin_unlock_bh(&vsock_table_lock);
}
EXPORT_SYMBOL_GPL(vsock_remove_bound);
@@ -282,7 +283,8 @@ EXPORT_SYMBOL_GPL(vsock_remove_bound);
void vsock_remove_connected(struct vsock_sock *vsk)
{
spin_lock_bh(&vsock_table_lock);
- __vsock_remove_connected(vsk);
+ if (__vsock_in_connected_table(vsk))
+ __vsock_remove_connected(vsk);
spin_unlock_bh(&vsock_table_lock);
}
EXPORT_SYMBOL_GPL(vsock_remove_connected);
@@ -318,35 +320,10 @@ struct sock *vsock_find_connected_socket(struct sockaddr_vm *src,
}
EXPORT_SYMBOL_GPL(vsock_find_connected_socket);
-static bool vsock_in_bound_table(struct vsock_sock *vsk)
-{
- bool ret;
-
- spin_lock_bh(&vsock_table_lock);
- ret = __vsock_in_bound_table(vsk);
- spin_unlock_bh(&vsock_table_lock);
-
- return ret;
-}
-
-static bool vsock_in_connected_table(struct vsock_sock *vsk)
-{
- bool ret;
-
- spin_lock_bh(&vsock_table_lock);
- ret = __vsock_in_connected_table(vsk);
- spin_unlock_bh(&vsock_table_lock);
-
- return ret;
-}
-
void vsock_remove_sock(struct vsock_sock *vsk)
{
- if (vsock_in_bound_table(vsk))
- vsock_remove_bound(vsk);
-
- if (vsock_in_connected_table(vsk))
- vsock_remove_connected(vsk);
+ vsock_remove_bound(vsk);
+ vsock_remove_connected(vsk);
}
EXPORT_SYMBOL_GPL(vsock_remove_sock);
@@ -477,8 +454,7 @@ static void vsock_pending_work(struct work_struct *work)
* incoming packets can't find this socket, and to reduce the reference
* count.
*/
- if (vsock_in_connected_table(vsk))
- vsock_remove_connected(vsk);
+ vsock_remove_connected(vsk);
sk->sk_state = TCP_CLOSE;
diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
index 62dcdf082349..f2084e3f7aa4 100644
--- a/net/vmw_vsock/hyperv_transport.c
+++ b/net/vmw_vsock/hyperv_transport.c
@@ -14,14 +14,14 @@
#include <net/sock.h>
#include <net/af_vsock.h>
-/* The host side's design of the feature requires 6 exact 4KB pages for
- * recv/send rings respectively -- this is suboptimal considering memory
- * consumption, however unluckily we have to live with it, before the
- * host comes up with a better design in the future.
+/* Older (VMBUS version 'VERSION_WIN10' or before) Windows hosts have some
+ * stricter requirements on the hv_sock ring buffer size of six 4K pages. Newer
+ * hosts don't have this limitation; but, keep the defaults the same for compat.
*/
#define PAGE_SIZE_4K 4096
#define RINGBUFFER_HVS_RCV_SIZE (PAGE_SIZE_4K * 6)
#define RINGBUFFER_HVS_SND_SIZE (PAGE_SIZE_4K * 6)
+#define RINGBUFFER_HVS_MAX_SIZE (PAGE_SIZE_4K * 64)
/* The MTU is 16KB per the host side's design */
#define HVS_MTU_SIZE (1024 * 16)
@@ -46,8 +46,9 @@ struct hvs_recv_buf {
};
/* We can send up to HVS_MTU_SIZE bytes of payload to the host, but let's use
- * a small size, i.e. HVS_SEND_BUF_SIZE, to minimize the dynamically-allocated
- * buffer, because tests show there is no significant performance difference.
+ * a smaller size, i.e. HVS_SEND_BUF_SIZE, to maximize concurrency between the
+ * guest and the host processing as one VMBUS packet is the smallest processing
+ * unit.
*
* Note: the buffer can be eliminated in the future when we add new VMBus
* ringbuffer APIs that allow us to directly copy data from userspace buffer
@@ -321,8 +322,11 @@ static void hvs_open_connection(struct vmbus_channel *chan)
struct sockaddr_vm addr;
struct sock *sk, *new = NULL;
struct vsock_sock *vnew = NULL;
- struct hvsock *hvs, *hvs_new = NULL;
+ struct hvsock *hvs = NULL;
+ struct hvsock *hvs_new = NULL;
+ int rcvbuf;
int ret;
+ int sndbuf;
if_type = &chan->offermsg.offer.if_type;
if_instance = &chan->offermsg.offer.if_instance;
@@ -364,9 +368,34 @@ static void hvs_open_connection(struct vmbus_channel *chan)
}
set_channel_read_mode(chan, HV_CALL_DIRECT);
- ret = vmbus_open(chan, RINGBUFFER_HVS_SND_SIZE,
- RINGBUFFER_HVS_RCV_SIZE, NULL, 0,
- hvs_channel_cb, conn_from_host ? new : sk);
+
+ /* Use the socket buffer sizes as hints for the VMBUS ring size. For
+ * server side sockets, 'sk' is the parent socket and thus, this will
+ * allow the child sockets to inherit the size from the parent. Keep
+ * the mins to the default value and align to page size as per VMBUS
+ * requirements.
+ * For the max, the socket core library will limit the socket buffer
+ * size that can be set by the user, but, since currently, the hv_sock
+ * VMBUS ring buffer is physically contiguous allocation, restrict it
+ * further.
+ * Older versions of hv_sock host side code cannot handle bigger VMBUS
+ * ring buffer size. Use the version number to limit the change to newer
+ * versions.
+ */
+ if (vmbus_proto_version < VERSION_WIN10_V5) {
+ sndbuf = RINGBUFFER_HVS_SND_SIZE;
+ rcvbuf = RINGBUFFER_HVS_RCV_SIZE;
+ } else {
+ sndbuf = max_t(int, sk->sk_sndbuf, RINGBUFFER_HVS_SND_SIZE);
+ sndbuf = min_t(int, sndbuf, RINGBUFFER_HVS_MAX_SIZE);
+ sndbuf = ALIGN(sndbuf, PAGE_SIZE);
+ rcvbuf = max_t(int, sk->sk_rcvbuf, RINGBUFFER_HVS_RCV_SIZE);
+ rcvbuf = min_t(int, rcvbuf, RINGBUFFER_HVS_MAX_SIZE);
+ rcvbuf = ALIGN(rcvbuf, PAGE_SIZE);
+ }
+
+ ret = vmbus_open(chan, sndbuf, rcvbuf, NULL, 0, hvs_channel_cb,
+ conn_from_host ? new : sk);
if (ret != 0) {
if (conn_from_host) {
hvs_new->chan = NULL;
@@ -424,6 +453,7 @@ static u32 hvs_get_local_cid(void)
static int hvs_sock_init(struct vsock_sock *vsk, struct vsock_sock *psk)
{
struct hvsock *hvs;
+ struct sock *sk = sk_vsock(vsk);
hvs = kzalloc(sizeof(*hvs), GFP_KERNEL);
if (!hvs)
@@ -431,7 +461,8 @@ static int hvs_sock_init(struct vsock_sock *vsk, struct vsock_sock *psk)
vsk->trans = hvs;
hvs->vsk = vsk;
-
+ sk->sk_sndbuf = RINGBUFFER_HVS_SND_SIZE;
+ sk->sk_rcvbuf = RINGBUFFER_HVS_RCV_SIZE;
return 0;
}
@@ -627,7 +658,9 @@ static ssize_t hvs_stream_enqueue(struct vsock_sock *vsk, struct msghdr *msg,
struct hvsock *hvs = vsk->trans;
struct vmbus_channel *chan = hvs->chan;
struct hvs_send_buf *send_buf;
- ssize_t to_write, max_writable, ret;
+ ssize_t to_write, max_writable;
+ ssize_t ret = 0;
+ ssize_t bytes_written = 0;
BUILD_BUG_ON(sizeof(*send_buf) != PAGE_SIZE_4K);
@@ -635,20 +668,34 @@ static ssize_t hvs_stream_enqueue(struct vsock_sock *vsk, struct msghdr *msg,
if (!send_buf)
return -ENOMEM;
- max_writable = hvs_channel_writable_bytes(chan);
- to_write = min_t(ssize_t, len, max_writable);
- to_write = min_t(ssize_t, to_write, HVS_SEND_BUF_SIZE);
-
- ret = memcpy_from_msg(send_buf->data, msg, to_write);
- if (ret < 0)
- goto out;
+ /* Reader(s) could be draining data from the channel as we write.
+ * Maximize bandwidth, by iterating until the channel is found to be
+ * full.
+ */
+ while (len) {
+ max_writable = hvs_channel_writable_bytes(chan);
+ if (!max_writable)
+ break;
+ to_write = min_t(ssize_t, len, max_writable);
+ to_write = min_t(ssize_t, to_write, HVS_SEND_BUF_SIZE);
+ /* memcpy_from_msg is safe for loop as it advances the offsets
+ * within the message iterator.
+ */
+ ret = memcpy_from_msg(send_buf->data, msg, to_write);
+ if (ret < 0)
+ goto out;
- ret = hvs_send_data(hvs->chan, send_buf, to_write);
- if (ret < 0)
- goto out;
+ ret = hvs_send_data(hvs->chan, send_buf, to_write);
+ if (ret < 0)
+ goto out;
- ret = to_write;
+ bytes_written += to_write;
+ len -= to_write;
+ }
out:
+ /* If any data has been sent, return that */
+ if (bytes_written)
+ ret = bytes_written;
kfree(send_buf);
return ret;
}
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 9c287e3e393c..0815d1357861 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -38,6 +38,7 @@ struct virtio_vsock {
* must be accessed with tx_lock held.
*/
struct mutex tx_lock;
+ bool tx_run;
struct work_struct send_pkt_work;
spinlock_t send_pkt_list_lock;
@@ -53,6 +54,7 @@ struct virtio_vsock {
* must be accessed with rx_lock held.
*/
struct mutex rx_lock;
+ bool rx_run;
int rx_buf_nr;
int rx_buf_max_nr;
@@ -60,24 +62,28 @@ struct virtio_vsock {
* vqs[VSOCK_VQ_EVENT] must be accessed with event_lock held.
*/
struct mutex event_lock;
+ bool event_run;
struct virtio_vsock_event event_list[8];
u32 guest_cid;
};
-static struct virtio_vsock *virtio_vsock_get(void)
-{
- return the_virtio_vsock;
-}
-
static u32 virtio_transport_get_local_cid(void)
{
- struct virtio_vsock *vsock = virtio_vsock_get();
+ struct virtio_vsock *vsock;
+ u32 ret;
- if (!vsock)
- return VMADDR_CID_ANY;
+ rcu_read_lock();
+ vsock = rcu_dereference(the_virtio_vsock);
+ if (!vsock) {
+ ret = VMADDR_CID_ANY;
+ goto out_rcu;
+ }
- return vsock->guest_cid;
+ ret = vsock->guest_cid;
+out_rcu:
+ rcu_read_unlock();
+ return ret;
}
static void virtio_transport_loopback_work(struct work_struct *work)
@@ -91,6 +97,10 @@ static void virtio_transport_loopback_work(struct work_struct *work)
spin_unlock_bh(&vsock->loopback_list_lock);
mutex_lock(&vsock->rx_lock);
+
+ if (!vsock->rx_run)
+ goto out;
+
while (!list_empty(&pkts)) {
struct virtio_vsock_pkt *pkt;
@@ -99,6 +109,7 @@ static void virtio_transport_loopback_work(struct work_struct *work)
virtio_transport_recv_pkt(pkt);
}
+out:
mutex_unlock(&vsock->rx_lock);
}
@@ -127,6 +138,9 @@ virtio_transport_send_pkt_work(struct work_struct *work)
mutex_lock(&vsock->tx_lock);
+ if (!vsock->tx_run)
+ goto out;
+
vq = vsock->vqs[VSOCK_VQ_TX];
for (;;) {
@@ -185,6 +199,7 @@ virtio_transport_send_pkt_work(struct work_struct *work)
if (added)
virtqueue_kick(vq);
+out:
mutex_unlock(&vsock->tx_lock);
if (restart_rx)
@@ -197,14 +212,18 @@ virtio_transport_send_pkt(struct virtio_vsock_pkt *pkt)
struct virtio_vsock *vsock;
int len = pkt->len;
- vsock = virtio_vsock_get();
+ rcu_read_lock();
+ vsock = rcu_dereference(the_virtio_vsock);
if (!vsock) {
virtio_transport_free_pkt(pkt);
- return -ENODEV;
+ len = -ENODEV;
+ goto out_rcu;
}
- if (le64_to_cpu(pkt->hdr.dst_cid) == vsock->guest_cid)
- return virtio_transport_send_pkt_loopback(vsock, pkt);
+ if (le64_to_cpu(pkt->hdr.dst_cid) == vsock->guest_cid) {
+ len = virtio_transport_send_pkt_loopback(vsock, pkt);
+ goto out_rcu;
+ }
if (pkt->reply)
atomic_inc(&vsock->queued_replies);
@@ -214,6 +233,9 @@ virtio_transport_send_pkt(struct virtio_vsock_pkt *pkt)
spin_unlock_bh(&vsock->send_pkt_list_lock);
queue_work(virtio_vsock_workqueue, &vsock->send_pkt_work);
+
+out_rcu:
+ rcu_read_unlock();
return len;
}
@@ -222,12 +244,14 @@ virtio_transport_cancel_pkt(struct vsock_sock *vsk)
{
struct virtio_vsock *vsock;
struct virtio_vsock_pkt *pkt, *n;
- int cnt = 0;
+ int cnt = 0, ret;
LIST_HEAD(freeme);
- vsock = virtio_vsock_get();
+ rcu_read_lock();
+ vsock = rcu_dereference(the_virtio_vsock);
if (!vsock) {
- return -ENODEV;
+ ret = -ENODEV;
+ goto out_rcu;
}
spin_lock_bh(&vsock->send_pkt_list_lock);
@@ -255,7 +279,11 @@ virtio_transport_cancel_pkt(struct vsock_sock *vsk)
queue_work(virtio_vsock_workqueue, &vsock->rx_work);
}
- return 0;
+ ret = 0;
+
+out_rcu:
+ rcu_read_unlock();
+ return ret;
}
static void virtio_vsock_rx_fill(struct virtio_vsock *vsock)
@@ -307,6 +335,10 @@ static void virtio_transport_tx_work(struct work_struct *work)
vq = vsock->vqs[VSOCK_VQ_TX];
mutex_lock(&vsock->tx_lock);
+
+ if (!vsock->tx_run)
+ goto out;
+
do {
struct virtio_vsock_pkt *pkt;
unsigned int len;
@@ -317,6 +349,8 @@ static void virtio_transport_tx_work(struct work_struct *work)
added = true;
}
} while (!virtqueue_enable_cb(vq));
+
+out:
mutex_unlock(&vsock->tx_lock);
if (added)
@@ -345,6 +379,9 @@ static void virtio_transport_rx_work(struct work_struct *work)
mutex_lock(&vsock->rx_lock);
+ if (!vsock->rx_run)
+ goto out;
+
do {
virtqueue_disable_cb(vq);
for (;;) {
@@ -454,6 +491,9 @@ static void virtio_transport_event_work(struct work_struct *work)
mutex_lock(&vsock->event_lock);
+ if (!vsock->event_run)
+ goto out;
+
do {
struct virtio_vsock_event *event;
unsigned int len;
@@ -468,7 +508,7 @@ static void virtio_transport_event_work(struct work_struct *work)
} while (!virtqueue_enable_cb(vq));
virtqueue_kick(vsock->vqs[VSOCK_VQ_EVENT]);
-
+out:
mutex_unlock(&vsock->event_lock);
}
@@ -565,7 +605,8 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
return ret;
/* Only one virtio-vsock device per guest is supported */
- if (the_virtio_vsock) {
+ if (rcu_dereference_protected(the_virtio_vsock,
+ lockdep_is_held(&the_virtio_vsock_mutex))) {
ret = -EBUSY;
goto out;
}
@@ -590,8 +631,6 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
vsock->rx_buf_max_nr = 0;
atomic_set(&vsock->queued_replies, 0);
- vdev->priv = vsock;
- the_virtio_vsock = vsock;
mutex_init(&vsock->tx_lock);
mutex_init(&vsock->rx_lock);
mutex_init(&vsock->event_lock);
@@ -605,14 +644,23 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
INIT_WORK(&vsock->send_pkt_work, virtio_transport_send_pkt_work);
INIT_WORK(&vsock->loopback_work, virtio_transport_loopback_work);
+ mutex_lock(&vsock->tx_lock);
+ vsock->tx_run = true;
+ mutex_unlock(&vsock->tx_lock);
+
mutex_lock(&vsock->rx_lock);
virtio_vsock_rx_fill(vsock);
+ vsock->rx_run = true;
mutex_unlock(&vsock->rx_lock);
mutex_lock(&vsock->event_lock);
virtio_vsock_event_fill(vsock);
+ vsock->event_run = true;
mutex_unlock(&vsock->event_lock);
+ vdev->priv = vsock;
+ rcu_assign_pointer(the_virtio_vsock, vsock);
+
mutex_unlock(&the_virtio_vsock_mutex);
return 0;
@@ -627,15 +675,33 @@ static void virtio_vsock_remove(struct virtio_device *vdev)
struct virtio_vsock *vsock = vdev->priv;
struct virtio_vsock_pkt *pkt;
- flush_work(&vsock->loopback_work);
- flush_work(&vsock->rx_work);
- flush_work(&vsock->tx_work);
- flush_work(&vsock->event_work);
- flush_work(&vsock->send_pkt_work);
+ mutex_lock(&the_virtio_vsock_mutex);
+
+ vdev->priv = NULL;
+ rcu_assign_pointer(the_virtio_vsock, NULL);
+ synchronize_rcu();
/* Reset all connected sockets when the device disappear */
vsock_for_each_connected_socket(virtio_vsock_reset_sock);
+ /* Stop all work handlers to make sure no one is accessing the device,
+ * so we can safely call vdev->config->reset().
+ */
+ mutex_lock(&vsock->rx_lock);
+ vsock->rx_run = false;
+ mutex_unlock(&vsock->rx_lock);
+
+ mutex_lock(&vsock->tx_lock);
+ vsock->tx_run = false;
+ mutex_unlock(&vsock->tx_lock);
+
+ mutex_lock(&vsock->event_lock);
+ vsock->event_run = false;
+ mutex_unlock(&vsock->event_lock);
+
+ /* Flush all device writes and interrupts, device will not use any
+ * more buffers.
+ */
vdev->config->reset(vdev);
mutex_lock(&vsock->rx_lock);
@@ -666,12 +732,20 @@ static void virtio_vsock_remove(struct virtio_device *vdev)
}
spin_unlock_bh(&vsock->loopback_list_lock);
- mutex_lock(&the_virtio_vsock_mutex);
- the_virtio_vsock = NULL;
- mutex_unlock(&the_virtio_vsock_mutex);
-
+ /* Delete virtqueues and flush outstanding callbacks if any */
vdev->config->del_vqs(vdev);
+ /* Other works can be queued before 'config->del_vqs()', so we flush
+ * all works before to free the vsock object to avoid use after free.
+ */
+ flush_work(&vsock->loopback_work);
+ flush_work(&vsock->rx_work);
+ flush_work(&vsock->tx_work);
+ flush_work(&vsock->event_work);
+ flush_work(&vsock->send_pkt_work);
+
+ mutex_unlock(&the_virtio_vsock_mutex);
+
kfree(vsock);
}