/*
* This file is part of the Distributed Network Block Device 3
*
* Copyright(c) 2019 Frederic Robra <frederic@robra.org>
* Parts copyright 2011-2012 Johann Latocha <johann@latocha.de>
*
* This file may be licensed under the terms of of the
* GNU General Public License Version 2 (the ``GPL'').
*
* Software distributed under the License is distributed
* on an ``AS IS'' basis, WITHOUT WARRANTY OF ANY KIND, either
* express or implied. See the GPL for the specific language
* governing rights and limitations.
*
* You should have received a copy of the GPL along with this
* program. If not, go to http://www.gnu.org/licenses/gpl.html
* or write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
*/
#include "net-txrx.h"
#include "utils.h"
#include "clientconfig.h"
#include "mq.h"
#include <linux/signal.h>
#include <net/sock.h>
#define DNBD3_REQ_OP_SPECIAL REQ_OP_DRV_IN
#define DNBD3_REQ_OP_CONNECT REQ_OP_DRV_OUT
#define dnbd3_cmd_to_op_special(req, cmd) \
(req)->cmd_flags = DNBD3_REQ_OP_SPECIAL | ((cmd) << REQ_FLAG_BITS)
#define dnbd3_op_special_to_cmd(req) \
((req)->cmd_flags >> REQ_FLAG_BITS)
#define dnbd3_connect_to_req(req) \
(req)->cmd_flags = DNBD3_REQ_OP_CONNECT \
| ((CMD_SELECT_IMAGE) << REQ_FLAG_BITS)
#define dnbd3_test_block_to_req(req) \
do { \
(req)->cmd_flags = REQ_OP_READ; \
(req)->__data_len = RTT_TEST_BLOCK_SIZE; \
(req)->__sector = 0; \
} while (0)
#define REQUEST_TIMEOUT \
(HZ * SOCKET_TIMEOUT_CLIENT_DATA)
#define dnbd3_init_msghdr(h) \
do { \
(h).msg_name = NULL; \
(h).msg_namelen = 0; \
(h).msg_control = NULL; \
(h).msg_controllen = 0; \
(h).msg_flags = MSG_WAITALL | MSG_NOSIGNAL; \
} while (0)
/*
* Methods for request and receive commands
*/
/**
* dnbd3_to_handle - convert tag and cookie to handle
* @tag: the tag to convert
* @cookie: the cookie to convert
*/
static inline uint64_t dnbd3_to_handle(uint32_t tag, uint32_t cookie) {
return ((uint64_t) tag << 32) | cookie;
}
/**
* dnbd3_tag_from_handle - get tag from handle
* @handle: the handle
*/
static inline uint32_t dnbd3_tag_from_handle(uint64_t handle) {
return (uint32_t)(handle >> 32);
}
/**
* dnbd3_cookie_from_handle - get cookie from handle
* @handle: the handle
*/
static inline uint32_t dnbd3_cookie_from_handle(uint64_t handle) {
return (uint32_t) handle;
}
/**
* dnbd3_send_request - send a request
* @sock: the socket where the request is send
* @req: the request to send
* @cmd: optional - the dnbd3_cmd from mq
*
* the tx_lock of the socket must be held
*/
int dnbd3_send_request(struct dnbd3_sock *sock, struct request *req,
struct dnbd3_cmd *cmd)
{
dnbd3_request_t request;
struct msghdr msg;
struct kvec iov[2];
size_t iov_num = 1;
size_t lng;
int result;
uint32_t tag;
uint64_t handle;
serialized_buffer_t payload_buffer;
sock->pending = req;
dnbd3_init_msghdr(msg);
request.magic = dnbd3_packet_magic;
switch (req_op(req)) {
case REQ_OP_READ:
// debug_sock(sock, "request operation read %p", req);
request.cmd = CMD_GET_BLOCK;
request.offset = blk_rq_pos(req) << 9; // * 512
request.size = blk_rq_bytes(req);
break;
case DNBD3_REQ_OP_SPECIAL:
debug_sock(sock, "request operation special");
request.cmd = dnbd3_op_special_to_cmd(req);
request.size = 0;
break;
case DNBD3_REQ_OP_CONNECT:
debug_sock(sock, "request operation connect to %s",
sock->device->imgname);
request.cmd = CMD_SELECT_IMAGE;
serializer_reset_write(&payload_buffer);
serializer_put_uint16(&payload_buffer, PROTOCOL_VERSION);
serializer_put_string(&payload_buffer, sock->device->imgname);
serializer_put_uint16(&payload_buffer, sock->device->rid);
serializer_put_uint8(&payload_buffer, 0); // is_server = false
iov[1].iov_base = &payload_buffer;
request.size = serializer_get_written_length(&payload_buffer);
iov[1].iov_len = request.size;
iov_num = 2;
break;
default:
return -EIO;
}
sock->cookie++;
if (cmd != NULL) {
cmd->cookie = sock->cookie;
tag = blk_mq_unique_tag(req);
handle = dnbd3_to_handle(tag, sock->cookie);
} else {
handle = sock->cookie;
}
memcpy(&request.handle, &handle, sizeof(handle));
fixup_request(request);
iov[0].iov_base = &request;
iov[0].iov_len = sizeof(request);
lng = iov_num == 1 ? iov[0].iov_len : iov[0].iov_len + iov[1].iov_len;
if (unlikely(!sock->sock)) {
warn_sock(sock, "socket was shutdown while sending");
result = -EIO;
goto error;
}
result = kernel_sendmsg(sock->sock, &msg, iov, iov_num, lng);
if (result != lng) {
error_sock(sock, "connection to server lost");
/*
* this triggers:
* kernel BUG at block/blk-mq.c:704!
* command will be reqeued by timeout
*/
// if (cmd) {
// dnbd3_requeue_cmd(cmd);
// }
sock->server->failures++;
goto error;
}
error:
sock->pending = NULL;
return result;
}
/**
* dnbd3_send_request_cmd - send a dndb3 cmd
* @sock: the socket where the request is send
* @dnbd3_cmd: the dnbd3 cmd to send
*
* the tx_lock of the socket must be held
*/
int dnbd3_send_request_cmd(struct dnbd3_sock *sock, uint16_t dnbd3_cmd)
{
int result;
struct request *req = kmalloc(sizeof(struct request), GFP_KERNEL);
if (!req) {
error_sock(sock, "kmalloc failed");
return -ENOMEM;
}
switch (dnbd3_cmd) {
case CMD_KEEPALIVE:
case CMD_GET_SERVERS:
dnbd3_cmd_to_op_special(req, dnbd3_cmd);
break;
case CMD_SELECT_IMAGE:
dnbd3_connect_to_req(req);
break;
case CMD_GET_BLOCK:
dnbd3_test_block_to_req(req);
break;
default:
warn_sock(sock, "unsupported command %d", dnbd3_cmd);
result = -EINVAL;
goto error;
}
if (unlikely(!sock->sock)) {
result = -EIO;
goto error;
}
result = dnbd3_send_request(sock, req, NULL);
error:
if (req) {
kfree(req);
}
return result;
}
/**
* dnbd3_receive_cmd - receive a command
* @sock: the socket where the request is received
* @reply: an unused reply will be filled with the reply of the server
*
* this method should be called directly after the dnbd3_send_request_ method
*/
int dnbd3_receive_cmd(struct dnbd3_sock *sock, dnbd3_reply_t *reply)
{
int result;
struct msghdr msg;
struct kvec iov;
dnbd3_init_msghdr(msg);
iov.iov_base = reply;
iov.iov_len = sizeof(dnbd3_reply_t);
result = kernel_recvmsg(sock->sock, &msg, &iov, 1, iov.iov_len,
msg.msg_flags);
if (result <= 0) {
return result;
}
fixup_reply(dnbd3_reply);
if (reply->magic != dnbd3_packet_magic) {
error_sock(sock, "receive cmd wrong magic packet");
return -EIO;
}
if (reply->cmd == 0) {
error_sock(sock, "receive command was 0");
return -EIO;
}
return result;
}
//static int dnbd3_clear_socket(struct dnbd3_sock *sock, dnbd3_reply_t *reply,
// int remaining)
//{
// int result = 0;
// char *buf;
// struct kvec iov;
// struct msghdr msg;
// dnbd3_init_msghdr(msg);
// warn_sock(sock, "clearing socket %d bytes", remaining);
// buf = kmalloc(RTT_BLOCK_SIZE, GFP_KERNEL);
// if (!buf) {
// error_sock(sock, "kmalloc failed");
// return -ENOMEM;
// }
// iov.iov_base = buf;
// iov.iov_len = RTT_BLOCK_SIZE;
// while (remaining > 0) {
// result = kernel_recvmsg(sock->sock, &msg, &iov, 1, iov.iov_len,
// msg.msg_flags);
// if (result <= 0) {
// goto error;
// }
// remaining -= result;
// }
//
// debug_sock(sock, "cleared socket");
//error:
// if (buf) {
// kfree(buf);
// }
// return result;
//}
/**
* dnbd3_receive_cmd_get_block_mq - receive a block for mq
* @sock: the socket where the request is received
* @reply: the reply initialized by dnbd3_receive_cmd
*
* this method should be called directly after the dnbd3_receive_cmd method
*
* this method copies the data to user space according to the request which is
* encoded in the handle by the send request method and decoded here.
*/
int dnbd3_receive_cmd_get_block_mq(struct dnbd3_sock *sock,
dnbd3_reply_t *reply)
{
struct dnbd3_cmd *cmd;
struct msghdr msg;
struct request *req = NULL;
struct kvec iov;
struct req_iterator iter;
struct bio_vec bvec_inst;
struct dnbd3_device *dev = sock->device;
struct bio_vec *bvec = &bvec_inst;
sigset_t blocked, oldset;
void *kaddr;
uint32_t tag, cookie;
uint16_t hwq;
uint32_t remaining = reply->size;
int result = 0;
uint64_t handle;
dnbd3_init_msghdr(msg);
memcpy(&handle, &reply->handle, sizeof(handle));
cookie = dnbd3_cookie_from_handle(handle);
tag = dnbd3_tag_from_handle(handle);
hwq = blk_mq_unique_tag_to_hwq(tag);
if (hwq < dev->tag_set.nr_hw_queues) {
req = blk_mq_tag_to_rq(dev->tag_set.tags[hwq],
blk_mq_unique_tag_to_tag(tag));
}
if (!req || !blk_mq_request_started(req)) {
error_sock(sock, "unexpected reply (%d) %p", tag, req);
if (req) {
cmd = blk_mq_rq_to_pdu(req);
mutex_lock(&cmd->lock);
debug_sock(sock, "requeue request");
dnbd3_requeue_cmd(cmd);
mutex_unlock(&cmd->lock);
}
// dnbd3_clear_socket(sock, reply, remaining);
return -EIO;
}
cmd = blk_mq_rq_to_pdu(req);
mutex_lock(&cmd->lock);
if (cmd->cookie != cookie) {
error_sock(sock, "double reply on req %p, cookie %u, handle cookie %u",
req, cmd->cookie, cookie);
mutex_unlock(&cmd->lock);
// dnbd3_clear_socket(sock, reply, remaining);
return -EIO;
}
rq_for_each_segment(bvec_inst, req, iter) {
siginitsetinv(&blocked, sigmask(SIGKILL));
sigprocmask(SIG_SETMASK, &blocked, &oldset);
kaddr = kmap(bvec->bv_page) + bvec->bv_offset;
iov.iov_base = kaddr;
iov.iov_len = bvec->bv_len;
result = kernel_recvmsg(sock->sock, &msg, &iov, 1, bvec->bv_len,
msg.msg_flags);
remaining -= result;
if (result != bvec->bv_len) {
kunmap(bvec->bv_page);
sigprocmask(SIG_SETMASK, &oldset, NULL );
error_sock(sock, "could not receive from net to block layer");
dnbd3_requeue_cmd(cmd);
mutex_unlock(&cmd->lock);
if (result >= 0) {
// dnbd3_clear_socket(sock, reply, remaining);
return -EIO;
} else {
return result;
}
}
kunmap(bvec->bv_page);
sigprocmask(SIG_SETMASK, &oldset, NULL );
}
mutex_unlock(&cmd->lock);
dnbd3_end_cmd(cmd, 0);
return result;
}
/**
* dnbd3_receive_cmd_get_block_test - receive a test block
* @sock: the socket where the request is received
* @reply: the reply initialized by dnbd3_receive_cmd
*
* this method should be called directly after the dnbd3_receive_cmd method
*
* the received data is just thrown away
*/
int dnbd3_receive_cmd_get_block_test(struct dnbd3_sock *sock,
dnbd3_reply_t *reply)
{
struct msghdr msg;
struct kvec iov;
int result = 0;
char *buf = kmalloc(reply->size, GFP_KERNEL);
if (!buf) {
error_sock(sock, "kmalloc failed");
return -ENOMEM;
}
dnbd3_init_msghdr(msg);
iov.iov_base = buf;
iov.iov_len = reply->size;
result = kernel_recvmsg(sock->sock, &msg, &iov, 1, reply->size,
msg.msg_flags);
if (result != RTT_TEST_BLOCK_SIZE) {
error_sock(sock, "receive test block failed");
goto error;
}
error:
if (buf) {
kfree(buf);
}
return result;
}
/**
* dnbd3_receive_cmd_get_servers - receive new servers
* @sock: the socket where the request is received
* @reply: the reply initialized by dnbd3_receive_cmd
*
* this method should be called directly after the dnbd3_receive_cmd method
*
* the new servers are copied to dnbd3_device.new_servers and
* dnbd3_device.new_server_num is set accordingly
*/
int dnbd3_receive_cmd_get_servers(struct dnbd3_sock *sock, dnbd3_reply_t *reply)
{
struct msghdr msg;
struct kvec iov;
struct dnbd3_device *dev = sock->device;
int result = 1;
int count, remaining;
dnbd3_init_msghdr(msg);
debug_sock(sock, "get servers received");
mutex_lock(&dev->device_lock);
if (!dev->use_server_provided_alts) {
remaining = reply->size;
goto consume_payload;
}
dev->new_servers_num = 0;
count = MIN(NUMBER_SERVERS, reply->size / sizeof(dnbd3_server_entry_t));
if (count != 0) {
iov.iov_base = dev->new_servers;
iov.iov_len = count * sizeof(dnbd3_server_entry_t);
result = kernel_recvmsg(sock->sock, &msg, &iov, 1, iov.iov_len,
msg.msg_flags);
if (result <= 0) {
error_sock(sock, "failed to receive get servers %d",
result);
mutex_unlock(&dev->device_lock);
return result;
} else if (result != (count * sizeof(dnbd3_server_entry_t))) {
error_sock(sock, "failed to get servers");
mutex_unlock(&dev->device_lock);
return -EIO;
}
dev->new_servers_num = count;
}
/*
* if there were more servers than accepted, remove the remaining data
* from the socket buffer
* abuse the reply struct as the receive buffer
*/
remaining = reply->size - (count * sizeof(dnbd3_server_entry_t));
consume_payload:
while (remaining > 0) {
count = MIN(sizeof(dnbd3_reply_t), remaining);
iov.iov_base = reply;
iov.iov_len = count;
result = kernel_recvmsg(sock->sock, &msg, &iov, 1, count,
msg.msg_flags);
if (result <= 0) {
error_sock(sock, "failed to receive payload from get servers");
mutex_unlock(&dev->device_lock);
return result;
}
remaining -= result;
}
mutex_unlock(&dev->device_lock);
return result;
}
/**
* dnbd3_receive_cmd_latest_rid - receive latest rid
* @sock: the socket where the request is received
* @reply: the reply initialized by dnbd3_receive_cmd
*
* this method should be called directly after the dnbd3_receive_cmd method
*
* dnbd3_device.update_available is set if a new RID is received
*/
int dnbd3_receive_cmd_latest_rid(struct dnbd3_sock *sock, dnbd3_reply_t *reply)
{
struct kvec iov;
uint16_t rid;
int result;
struct msghdr msg;
struct dnbd3_device *dev = sock->device;
dnbd3_init_msghdr(msg);
debug_sock(sock, "latest rid received");
if (reply->size != 2) {
error_sock(sock, "failed to get latest rid, wrong size");
return -EIO;
}
iov.iov_base = &rid;
iov.iov_len = sizeof(rid);
result = kernel_recvmsg(sock->sock, &msg, &iov, 1, iov.iov_len,
msg.msg_flags);
if (result <= 0) {
error_sock(sock, "failed to receive latest rid");
return result;
}
rid = net_order_16(rid);
debug_sock(sock, "latest rid of %s is %d (currently using %d)",
dev->imgname, (int)rid, (int)dev->rid);
dev->update_available = (rid > dev->rid ? true : false);
return result;
}
/**
* dnbd3_receive_cmd_latest_rid - select the image
* @sock: the socket where the request is received
* @reply: the reply initialized by dnbd3_receive_cmd
*
* this method should be called directly after the dnbd3_receive_cmd method
*
* if this is the first connection the image name, file size and rid will be set
* if this is a further connection image name, file size and rid will be checked
*/
int dnbd3_receive_cmd_select_image(struct dnbd3_sock *sock,
dnbd3_reply_t *reply)
{
struct kvec iov;
uint16_t rid;
char *name;
int result;
struct msghdr msg;
serialized_buffer_t payload_buffer;
uint64_t reported_size;
struct dnbd3_device *dev = sock->device;
dnbd3_init_msghdr(msg);
debug_sock(sock, "select image received");
iov.iov_base = &payload_buffer;
iov.iov_len = reply->size;
result = kernel_recvmsg(sock->sock, &msg, &iov, 1, iov.iov_len,
msg.msg_flags);
if (result <= 0) {
error_sock(sock, "failed to receive select image %d", result);
return result;
} else if (result != reply->size) {
error_sock(sock, "could not read CMD_SELECT_IMAGE payload on handshake, size is %d and should be %d",
result, reply->size);
return -EIO;
}
/* handle/check reply payload */
serializer_reset_read(&payload_buffer, reply->size);
sock->server->protocol_version = serializer_get_uint16(&payload_buffer);
if (sock->server->protocol_version < MIN_SUPPORTED_SERVER) {
error_sock(sock, "server version is lower than min supported version");
return -EIO;
}
name = serializer_get_string(&payload_buffer);
rid = serializer_get_uint16(&payload_buffer);
if (dev->rid == 0) {
dev->rid = rid;
}
if (dev->rid != rid || strcmp(name, dev->imgname) != 0) {
error_sock(sock, "server offers image '%s', requested '%s'",
name, dev->imgname);
return -EIO;
}
reported_size = serializer_get_uint64(&payload_buffer);
if (dev->reported_size == 0) {
if (reported_size < 4096) {
error_sock(sock, "reported size by server is < 4096");
return -EIO;
}
dev->reported_size = reported_size;
set_capacity(dev->disk, dev->reported_size >> 9); /* 512 Byte */
} else if (dev->reported_size != reported_size) {
error_sock(sock, "reported size by server is %llu but should be %llu",
reported_size, dev->reported_size);
return -EIO;
}
return result;
}