summaryrefslogblamecommitdiffstats
path: root/src/kernel/net-txrx.c
blob: 82f740887094f19b405b3509e10f8d3758e1c612 (plain) (tree)
























                                                               
                         




















                                                                          
                                                          











































































                                                                       
                                                                   







































                                                                              

                                                                     
                              

                           


                                                                     







                                                     













                                              

                                         



























                                                                          



                                    
                                                     









































                                                                             































                                                                               










































                                                                              

                                                    
                                                            

                                                 
                 
                                                           








                                                                                       
                                                           



















                                                                                      
                                                                           









































                                                                           
                                            


































































































































































                                                                                                                     

                                                      




                                                                 





                                                                            
                                      

















                                                                                      
/*
 * This file is part of the Distributed Network Block Device 3
 *
 * Copyright(c) 2019 Frederic Robra <frederic@robra.org>
 * Parts copyright 2011-2012 Johann Latocha <johann@latocha.de>
 *
 * This file may be licensed under the terms of of the
 * GNU General Public License Version 2 (the ``GPL'').
 *
 * Software distributed under the License is distributed
 * on an ``AS IS'' basis, WITHOUT WARRANTY OF ANY KIND, either
 * express or implied. See the GPL for the specific language
 * governing rights and limitations.
 *
 * You should have received a copy of the GPL along with this
 * program. If not, go to http://www.gnu.org/licenses/gpl.html
 * or write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 *
 */

#include "net-txrx.h"
#include "utils.h"
#include "clientconfig.h"
#include "mq.h"
#include <linux/signal.h>

#include <net/sock.h>



#define DNBD3_REQ_OP_SPECIAL REQ_OP_DRV_IN
#define DNBD3_REQ_OP_CONNECT REQ_OP_DRV_OUT

#define dnbd3_cmd_to_op_special(req, cmd) \
	(req)->cmd_flags = DNBD3_REQ_OP_SPECIAL | ((cmd) << REQ_FLAG_BITS)

#define dnbd3_op_special_to_cmd(req) \
	((req)->cmd_flags >> REQ_FLAG_BITS)

#define dnbd3_connect_to_req(req) \
	(req)->cmd_flags = DNBD3_REQ_OP_CONNECT \
			| ((CMD_SELECT_IMAGE) << REQ_FLAG_BITS)

#define dnbd3_test_block_to_req(req) \
	do { \
		(req)->cmd_flags = REQ_OP_READ; \
		(req)->__data_len = RTT_TEST_BLOCK_SIZE; \
		(req)->__sector = 0; \
	} while (0)


#define REQUEST_TIMEOUT \
	(HZ * SOCKET_TIMEOUT_CLIENT_DATA)


#define dnbd3_init_msghdr(h) \
	do { \
		(h).msg_name = NULL; \
		(h).msg_namelen = 0; \
		(h).msg_control = NULL; \
		(h).msg_controllen = 0; \
		(h).msg_flags = MSG_WAITALL | MSG_NOSIGNAL; \
	} while (0)



/*
 * Methods for request and receive commands
 */


/**
 * dnbd3_to_handle - convert tag and cookie to handle
 * @tag: the tag to convert
 * @cookie: the cookie to convert
 */
static inline uint64_t dnbd3_to_handle(uint32_t tag, uint32_t cookie) {
	return ((uint64_t) tag << 32) | cookie;
}

/**
 * dnbd3_tag_from_handle - get tag from handle
 * @handle: the handle
 */
static inline uint32_t dnbd3_tag_from_handle(uint64_t handle) {
	return (uint32_t)(handle >> 32);
}

/**
 * dnbd3_cookie_from_handle - get cookie from handle
 * @handle: the handle
 */
static inline uint32_t dnbd3_cookie_from_handle(uint64_t handle) {
	return (uint32_t) handle;
}

/**
 * dnbd3_send_request - send a request
 * @sock: the socket where the request is send
 * @req: the request to send
 * @cmd: optional - the dnbd3_cmd from mq
 *
 * the tx_lock of the socket must be held
 */
int dnbd3_send_request(struct dnbd3_sock *sock, struct request *req,
		struct dnbd3_cmd *cmd)
{
	dnbd3_request_t request;
	struct msghdr msg;
	struct kvec iov[2];
	size_t iov_num = 1;
	size_t lng;
	int result;
	uint32_t tag;
	uint64_t handle;
	serialized_buffer_t payload_buffer;
	sock->pending = req;
	dnbd3_init_msghdr(msg);

	request.magic = dnbd3_packet_magic;

	switch (req_op(req)) {
	case REQ_OP_READ:
//		debug_sock(sock, "request operation read %p", req);
		request.cmd = CMD_GET_BLOCK;
		request.offset = blk_rq_pos(req) << 9; // * 512
		request.size = blk_rq_bytes(req);
		break;
	case DNBD3_REQ_OP_SPECIAL:
		debug_sock(sock, "request operation special");
		request.cmd = dnbd3_op_special_to_cmd(req);
		request.size = 0;
		break;
	case DNBD3_REQ_OP_CONNECT:
		debug_sock(sock, "request operation connect to %s",
				sock->device->imgname);
		request.cmd = CMD_SELECT_IMAGE;
		serializer_reset_write(&payload_buffer);
		serializer_put_uint16(&payload_buffer, PROTOCOL_VERSION);
		serializer_put_string(&payload_buffer, sock->device->imgname);
		serializer_put_uint16(&payload_buffer, sock->device->rid);
		serializer_put_uint8(&payload_buffer, 0); // is_server = false
		iov[1].iov_base = &payload_buffer;
		request.size = serializer_get_written_length(&payload_buffer);
		iov[1].iov_len = request.size;
		iov_num = 2;
		break;
	default:
		return -EIO;
	}
	sock->cookie++;
	if (cmd != NULL) {
		cmd->cookie = sock->cookie;
		tag = blk_mq_unique_tag(req);
		handle = dnbd3_to_handle(tag, sock->cookie);
	} else {
		handle = sock->cookie;
	}
	memcpy(&request.handle, &handle, sizeof(handle));

	fixup_request(request);
	iov[0].iov_base = &request;
	iov[0].iov_len = sizeof(request);
	lng = iov_num == 1 ? iov[0].iov_len : iov[0].iov_len + iov[1].iov_len;
	if (unlikely(!sock->sock)) {
		warn_sock(sock, "socket was shutdown while sending");
		result = -EIO;
		goto error;
	}
	result = kernel_sendmsg(sock->sock, &msg, iov, iov_num, lng);
	if (result != lng) {
		error_sock(sock, "connection to server lost");
		/*
		 * this triggers:
		 * kernel BUG at block/blk-mq.c:704!
		 * command will be reqeued by timeout
		 */
//		if (cmd) {
//			dnbd3_requeue_cmd(cmd);
//		}
		sock->server->failures++;
		goto error;
	}

error:
	sock->pending = NULL;
	return result;
}


/**
 * dnbd3_send_request_cmd - send a dndb3 cmd
 * @sock: the socket where the request is send
 * @dnbd3_cmd: the dnbd3 cmd to send
 *
 * the tx_lock of the socket must be held
 */
int dnbd3_send_request_cmd(struct dnbd3_sock *sock, uint16_t dnbd3_cmd)
{
	int result;
	struct request *req = kmalloc(sizeof(struct request), GFP_KERNEL);
	if (!req) {
		error_sock(sock, "kmalloc failed");
		result = -EIO;
		goto error;
	}

	switch (dnbd3_cmd) {
	case CMD_KEEPALIVE:
	case CMD_GET_SERVERS:
		dnbd3_cmd_to_op_special(req, dnbd3_cmd);
		break;
	case CMD_SELECT_IMAGE:
		dnbd3_connect_to_req(req);
		break;
	case CMD_GET_BLOCK:
		dnbd3_test_block_to_req(req);
		break;
	default:
		warn_sock(sock, "unsupported command %d", dnbd3_cmd);
		result = -EINVAL;
		goto error;
	}

	if (unlikely(!sock->sock)) {
		result = -EIO;
		goto error;
	}
	result = dnbd3_send_request(sock, req, NULL);

error:
	if (req) {
		kfree(req);
	}
	return result;
}

/**
 * dnbd3_receive_cmd - receive a command
 * @sock: the socket where the request is received
 * @reply: an unused reply will be filled with the reply of the server
 *
 * this method should be called directly after the dnbd3_send_request_ method
 */
int dnbd3_receive_cmd(struct dnbd3_sock *sock, dnbd3_reply_t *reply)
{
	int result;
	struct msghdr msg;
	struct kvec iov;
	dnbd3_init_msghdr(msg);
	iov.iov_base = reply;
	iov.iov_len = sizeof(dnbd3_reply_t);
	result = kernel_recvmsg(sock->sock, &msg, &iov, 1, iov.iov_len,
			msg.msg_flags);
	if (result <= 0) {
		return result;
	}
	fixup_reply(dnbd3_reply);

	if (reply->magic != dnbd3_packet_magic) {
		error_sock(sock, "receive cmd wrong magic packet");
		return -EIO;
	}

	if (reply->cmd == 0) {
		error_sock(sock, "receive command was 0");
		return -EIO;
	}
	return result;
}

//static int dnbd3_clear_socket(struct dnbd3_sock *sock, dnbd3_reply_t *reply,
//		int remaining)
//{
//	int result = 0;
//	char *buf;
//	struct kvec iov;
//	struct msghdr msg;
//	dnbd3_init_msghdr(msg);
//	warn_sock(sock, "clearing socket %d bytes", remaining);
//	buf = kmalloc(RTT_BLOCK_SIZE, GFP_KERNEL);
//	if (!buf) {
//		error_sock(sock, "kmalloc failed");
//		return -EIO;
//	}
//	iov.iov_base = buf;
//	iov.iov_len = RTT_BLOCK_SIZE;
//	while (remaining > 0) {
//		result = kernel_recvmsg(sock->sock, &msg, &iov, 1, iov.iov_len,
//				msg.msg_flags);
//		if (result <= 0) {
//			goto error;
//		}
//		remaining -= result;
//	}
//
//	debug_sock(sock, "cleared socket");
//error:
//	if (buf) {
//		kfree(buf);
//	}
//	return result;
//}

/**
 * dnbd3_receive_cmd_get_block_mq - receive a block for mq
 * @sock: the socket where the request is received
 * @reply: the reply initialized by dnbd3_receive_cmd
 *
 * this method should be called directly after the dnbd3_receive_cmd method
 *
 * this method copies the data to user space according to the request which is
 * encoded in the handle by the send request method and decoded here.
 */
int dnbd3_receive_cmd_get_block_mq(struct dnbd3_sock *sock,
		dnbd3_reply_t *reply)
{
	struct dnbd3_cmd *cmd;
	struct msghdr msg;
	struct request *req = NULL;
	struct kvec iov;
	struct req_iterator iter;
	struct bio_vec bvec_inst;
	struct dnbd3_device *dev = sock->device;
	struct bio_vec *bvec = &bvec_inst;
	sigset_t blocked, oldset;
	void *kaddr;
	uint32_t tag, cookie;
	uint16_t hwq;
	uint32_t remaining = reply->size;
	int result = 0;
	uint64_t handle;
	dnbd3_init_msghdr(msg);

	memcpy(&handle, &reply->handle, sizeof(handle));
	cookie = dnbd3_cookie_from_handle(handle);
	tag = dnbd3_tag_from_handle(handle);

	hwq = blk_mq_unique_tag_to_hwq(tag);
	if (hwq < dev->tag_set.nr_hw_queues) {
		req = blk_mq_tag_to_rq(dev->tag_set.tags[hwq],
				blk_mq_unique_tag_to_tag(tag));
	}
	if (!req || !blk_mq_request_started(req)) {
		error_sock(sock, "unexpected reply (%d) %p", tag, req);
		if (req) {
			cmd = blk_mq_rq_to_pdu(req);
			mutex_lock(&cmd->lock);
			debug_sock(sock, "requeue request");
			dnbd3_requeue_cmd(cmd);
			mutex_unlock(&cmd->lock);
		}
//		dnbd3_clear_socket(sock, reply, remaining);
		return -EIO;
	}
	cmd = blk_mq_rq_to_pdu(req);

	mutex_lock(&cmd->lock);
	if (cmd->cookie != cookie) {
		error_sock(sock, "double reply on req %p, cookie %u, handle cookie %u",
				req, cmd->cookie, cookie);
		mutex_unlock(&cmd->lock);
//		dnbd3_clear_socket(sock, reply, remaining);
		return -EIO;
	}

	rq_for_each_segment(bvec_inst, req, iter) {
		siginitsetinv(&blocked, sigmask(SIGKILL));
		sigprocmask(SIG_SETMASK, &blocked, &oldset);

		kaddr = kmap(bvec->bv_page) + bvec->bv_offset;
		iov.iov_base = kaddr;
		iov.iov_len = bvec->bv_len;
		result = kernel_recvmsg(sock->sock, &msg, &iov, 1, bvec->bv_len,
				msg.msg_flags);
		remaining -= result;
		if (result != bvec->bv_len) {
			kunmap(bvec->bv_page);
			sigprocmask(SIG_SETMASK, &oldset, NULL );
			error_sock(sock, "could not receive from net to block layer");
			dnbd3_requeue_cmd(cmd);
			mutex_unlock(&cmd->lock);
			if (result >= 0) {
//				dnbd3_clear_socket(sock, reply, remaining);
				return -EIO;
			} else {
				return result;
			}
		}
		kunmap(bvec->bv_page);

		sigprocmask(SIG_SETMASK, &oldset, NULL );
	}
	mutex_unlock(&cmd->lock);
	dnbd3_end_cmd(cmd, 0);
	return result;
}



/**
 * dnbd3_receive_cmd_get_block_test - receive a test block
 * @sock: the socket where the request is received
 * @reply: the reply initialized by dnbd3_receive_cmd
 *
 * this method should be called directly after the dnbd3_receive_cmd method
 *
 * the received data is just thrown away
 */
int dnbd3_receive_cmd_get_block_test(struct dnbd3_sock *sock,
		dnbd3_reply_t *reply)
{
	struct msghdr msg;
	struct kvec iov;
	int result = 0;
	char *buf = kmalloc(reply->size, GFP_KERNEL);
	if (!buf) {
		error_sock(sock, "kmalloc failed");
		goto error;
	}

	dnbd3_init_msghdr(msg);
	iov.iov_base = buf;
	iov.iov_len = reply->size;
	result = kernel_recvmsg(sock->sock, &msg, &iov, 1, reply->size,
			msg.msg_flags);
	if (result != RTT_TEST_BLOCK_SIZE) {
		error_sock(sock, "receive test block failed");
		goto error;
	}

error:
	if (buf) {
		kfree(buf);
	}
	return result;
}

/**
 * dnbd3_receive_cmd_get_servers - receive new servers
 * @sock: the socket where the request is received
 * @reply: the reply initialized by dnbd3_receive_cmd
 *
 * this method should be called directly after the dnbd3_receive_cmd method
 *
 * the new servers are copied to dnbd3_device.new_servers and
 * dnbd3_device.new_server_num is set accordingly
 */
int dnbd3_receive_cmd_get_servers(struct dnbd3_sock *sock, dnbd3_reply_t *reply)
{
	struct msghdr msg;
	struct kvec iov;
	struct dnbd3_device *dev = sock->device;
	int result = 1;
	int count, remaining;
	dnbd3_init_msghdr(msg);

	debug_sock(sock, "get servers received");
	mutex_lock(&dev->device_lock);
	if (!dev->use_server_provided_alts) {
		remaining = reply->size;
		goto consume_payload;
	}
	dev->new_servers_num = 0;
	count = MIN(NUMBER_SERVERS, reply->size / sizeof(dnbd3_server_entry_t));

	if (count != 0) {
		iov.iov_base = dev->new_servers;
		iov.iov_len = count * sizeof(dnbd3_server_entry_t);
		result = kernel_recvmsg(sock->sock, &msg, &iov, 1, iov.iov_len,
				msg.msg_flags);
		if (result <= 0) {
			error_sock(sock, "failed to receive get servers %d",
					result);
			mutex_unlock(&dev->device_lock);
			return result;
		} else if (result != (count * sizeof(dnbd3_server_entry_t))) {
			error_sock(sock, "failed to get servers");
			mutex_unlock(&dev->device_lock);
			return -EIO;
		}
		dev->new_servers_num = count;
	}
	/*
	 * if there were more servers than accepted, remove the remaining data
	 * from the socket buffer
	 * abuse the reply struct as the receive buffer
	 */
	remaining = reply->size - (count * sizeof(dnbd3_server_entry_t));
consume_payload:
	while (remaining > 0) {
		count = MIN(sizeof(dnbd3_reply_t), remaining);
		iov.iov_base = reply;
		iov.iov_len = count;
		result = kernel_recvmsg(sock->sock, &msg, &iov, 1, count,
				msg.msg_flags);
		if (result <= 0) {
			error_sock(sock, "failed to receive payload from get servers");
			mutex_unlock(&dev->device_lock);
			return result;
		}
		remaining -= result;
	}
	mutex_unlock(&dev->device_lock);
	return result;
}

/**
 * dnbd3_receive_cmd_latest_rid - receive latest rid
 * @sock: the socket where the request is received
 * @reply: the reply initialized by dnbd3_receive_cmd
 *
 * this method should be called directly after the dnbd3_receive_cmd method
 *
 * dnbd3_device.update_available is set if a new RID is received
 */
int dnbd3_receive_cmd_latest_rid(struct dnbd3_sock *sock, dnbd3_reply_t *reply)
{
	struct kvec iov;
	uint16_t rid;
	int result;
	struct msghdr msg;
	struct dnbd3_device *dev = sock->device;
	dnbd3_init_msghdr(msg);
	debug_sock(sock, "latest rid received");

	if (reply->size != 2) {
		error_sock(sock, "failed to get latest rid, wrong size");
		return -EIO;
	}
	iov.iov_base = &rid;
	iov.iov_len = sizeof(rid);
	result = kernel_recvmsg(sock->sock, &msg, &iov, 1, iov.iov_len,
			msg.msg_flags);
	if (result <= 0) {
		error_sock(sock, "failed to receive latest rid");
		return result;
	}
	rid = net_order_16(rid);
	debug_sock(sock, "latest rid of %s is %d (currently using %d)",
			dev->imgname, (int)rid, (int)dev->rid);
	dev->update_available = (rid > dev->rid ? true : false);
	return result;
}


/**
 * dnbd3_receive_cmd_latest_rid - select the image
 * @sock: the socket where the request is received
 * @reply: the reply initialized by dnbd3_receive_cmd
 *
 * this method should be called directly after the dnbd3_receive_cmd method
 *
 * if this is the first connection the image name, file size and rid will be set
 * if this is a further connection image name, file size and rid will be checked
 */
int dnbd3_receive_cmd_select_image(struct dnbd3_sock *sock,
		dnbd3_reply_t *reply)
{
	struct kvec iov;
	uint16_t rid;
	char *name;
	int result;
	struct msghdr msg;
	serialized_buffer_t payload_buffer;
	uint64_t reported_size;
	struct dnbd3_device *dev = sock->device;
	dnbd3_init_msghdr(msg);
	debug_sock(sock, "select image received");
	iov.iov_base = &payload_buffer;
	iov.iov_len = reply->size;
	result = kernel_recvmsg(sock->sock, &msg, &iov, 1, iov.iov_len,
			msg.msg_flags);
	if (result <= 0) {
		error_sock(sock, "failed to receive select image %d", result);
		return result;
	} else if (result != reply->size) {
		error_sock(sock, "could not read CMD_SELECT_IMAGE payload on handshake, size is %d and should be %d",
				result, reply->size);
		return -EIO;
	}

	/* handle/check reply payload */
	serializer_reset_read(&payload_buffer, reply->size);
	sock->server->protocol_version = serializer_get_uint16(&payload_buffer);
	if (sock->server->protocol_version < MIN_SUPPORTED_SERVER) {
		error_sock(sock, "server version is lower than min supported version");
		return -EIO;
	}

	name = serializer_get_string(&payload_buffer);
	rid = serializer_get_uint16(&payload_buffer);
	if (dev->rid == 0) {
		dev->rid = rid;
	}

	if (dev->rid != rid || strcmp(name, dev->imgname) != 0) {
		error_sock(sock, "server offers image '%s', requested '%s'",
				name, dev->imgname);
		return -EIO;
	}

	reported_size = serializer_get_uint64(&payload_buffer);
	if (dev->reported_size == 0) {
		if (reported_size < 4096) {
			error_sock(sock, "reported size by server is < 4096");
			return -EIO;
		}
		dev->reported_size = reported_size;
		set_capacity(dev->disk, dev->reported_size >> 9); /* 512 Byte */
	} else if (dev->reported_size != reported_size) {
		error_sock(sock, "reported size by server is %llu but should be %llu",
				reported_size, dev->reported_size);
		return -EIO;
	}
	return result;

}