diff options
author | Frederic Robra | 2019-08-29 16:50:04 +0200 |
---|---|---|
committer | Frederic Robra | 2019-08-29 16:50:04 +0200 |
commit | 4a0c8ebec090c06cf32121efd86ea8fae9e9a9c7 (patch) | |
tree | e4dac84f40082cedd4828ebb4aaa174cb6f5b5d0 /src | |
parent | optimized behaviour with failures (diff) | |
download | dnbd3-ng-4a0c8ebec090c06cf32121efd86ea8fae9e9a9c7.tar.gz dnbd3-ng-4a0c8ebec090c06cf32121efd86ea8fae9e9a9c7.tar.xz dnbd3-ng-4a0c8ebec090c06cf32121efd86ea8fae9e9a9c7.zip |
splitted net
Diffstat (limited to 'src')
-rw-r--r-- | src/kernel/mq.c | 2 | ||||
-rw-r--r-- | src/kernel/net-txrx.c | 608 | ||||
-rw-r--r-- | src/kernel/net-txrx.h | 116 | ||||
-rw-r--r-- | src/kernel/net.c | 608 | ||||
-rw-r--r-- | src/kernel/net.h | 4 | ||||
-rw-r--r-- | src/kernel/utils.c | 19 | ||||
-rw-r--r-- | src/kernel/utils.h | 12 |
7 files changed, 765 insertions, 604 deletions
diff --git a/src/kernel/mq.c b/src/kernel/mq.c index b1c1e99..94f06f2 100644 --- a/src/kernel/mq.c +++ b/src/kernel/mq.c @@ -19,7 +19,7 @@ */ #include "mq.h" -#include "net.h" +#include "net-txrx.h" #include <linux/wait.h> #include <linux/blk-mq.h> diff --git a/src/kernel/net-txrx.c b/src/kernel/net-txrx.c new file mode 100644 index 0000000..d905ce9 --- /dev/null +++ b/src/kernel/net-txrx.c @@ -0,0 +1,608 @@ +/* + * This file is part of the Distributed Network Block Device 3 + * + * Copyright(c) 2019 Frederic Robra <frederic@robra.org> + * Parts copyright 2011-2012 Johann Latocha <johann@latocha.de> + * + * This file may be licensed under the terms of of the + * GNU General Public License Version 2 (the ``GPL''). + * + * Software distributed under the License is distributed + * on an ``AS IS'' basis, WITHOUT WARRANTY OF ANY KIND, either + * express or implied. See the GPL for the specific language + * governing rights and limitations. + * + * You should have received a copy of the GPL along with this + * program. If not, go to http://www.gnu.org/licenses/gpl.html + * or write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + */ + +#include "net-txrx.h" +#include "utils.h" +#include "clientconfig.h" +#include "mq.h" + +#include <net/sock.h> + + + +#define DNBD3_REQ_OP_SPECIAL REQ_OP_DRV_IN +#define DNBD3_REQ_OP_CONNECT REQ_OP_DRV_OUT + +#define dnbd3_cmd_to_op_special(req, cmd) \ + (req)->cmd_flags = DNBD3_REQ_OP_SPECIAL | ((cmd) << REQ_FLAG_BITS) + +#define dnbd3_op_special_to_cmd(req) \ + ((req)->cmd_flags >> REQ_FLAG_BITS) + +#define dnbd3_connect_to_req(req) \ + (req)->cmd_flags = DNBD3_REQ_OP_CONNECT \ + | ((CMD_SELECT_IMAGE) << REQ_FLAG_BITS) + +#define dnbd3_test_block_to_req(req) \ + do { \ + (req)->cmd_flags = REQ_OP_READ; \ + (req)->__data_len = RTT_BLOCK_SIZE; \ + (req)->__sector = 0; \ + } while (0) + + +#define REQUEST_TIMEOUT \ + (HZ * SOCKET_TIMEOUT_CLIENT_DATA) + + +#define dnbd3_init_msghdr(h) \ + do { \ + (h).msg_name = NULL; \ + (h).msg_namelen = 0; \ + (h).msg_control = NULL; \ + (h).msg_controllen = 0; \ + (h).msg_flags = MSG_WAITALL | MSG_NOSIGNAL; \ + } while (0) + + + +/* + * Methods for request and receive commands + */ + + +/** + * dnbd3_to_handle - convert tag and cookie to handle + * @tag: the tag to convert + * @cookie: the cookie to convert + */ +static inline uint64_t dnbd3_to_handle(uint32_t tag, uint32_t cookie) { + return ((uint64_t) tag << 32) | cookie; +} + +/** + * dnbd3_tag_from_handle - get tag from handle + * @handle: the handle + */ +static inline uint32_t dnbd3_tag_from_handle(uint64_t handle) { + return (uint32_t)(handle >> 32); +} + +/** + * dnbd3_cookie_from_handle - get cookie from handle + * @handle: the handle + */ +static inline uint32_t dnbd3_cookie_from_handle(uint64_t handle) { + return (uint32_t) handle; +} + +/** + * dnbd3_send_request - send a request + * @sock: the socket where the request is send + * @req: the request to send + * @cmd: optional - the dnbd3_cmd from mq + * + * the tx_lock of the socket must be held + */ +int dnbd3_send_request(struct dnbd3_sock *sock, struct request *req, + struct dnbd3_cmd *cmd) +{ + dnbd3_request_t request; + struct msghdr msg; + struct kvec iov[2]; + size_t iov_num = 1; + size_t lng; + int result; + uint32_t tag; + uint64_t handle; + serialized_buffer_t payload_buffer; + sock->pending = req; + dnbd3_init_msghdr(msg); + + request.magic = dnbd3_packet_magic; + + switch (req_op(req)) { + case REQ_OP_READ: + debug_sock(sock, "request operation read"); + request.cmd = CMD_GET_BLOCK; + request.offset = blk_rq_pos(req) << 9; // * 512 + request.size = blk_rq_bytes(req); + break; + case DNBD3_REQ_OP_SPECIAL: + debug_sock(sock, "request operation special"); + request.cmd = dnbd3_op_special_to_cmd(req); + request.size = 0; + break; + case DNBD3_REQ_OP_CONNECT: + debug_sock(sock, "request operation connect to %s", + sock->device->imgname); + request.cmd = CMD_SELECT_IMAGE; + serializer_reset_write(&payload_buffer); + serializer_put_uint16(&payload_buffer, PROTOCOL_VERSION); + serializer_put_string(&payload_buffer, sock->device->imgname); + serializer_put_uint16(&payload_buffer, sock->device->rid); + serializer_put_uint8(&payload_buffer, 0); // is_server = false + iov[1].iov_base = &payload_buffer; + request.size = serializer_get_written_length(&payload_buffer); + iov[1].iov_len = request.size; + iov_num = 2; + break; + default: + return -EIO; + } + sock->cookie++; + if (cmd != NULL) { + cmd->cookie = sock->cookie; + tag = blk_mq_unique_tag(req); + handle = dnbd3_to_handle(tag, sock->cookie); + } else { + handle = sock->cookie; + } + memcpy(&request.handle, &handle, sizeof(handle)); + + fixup_request(request); + iov[0].iov_base = &request; + iov[0].iov_len = sizeof(request); + lng = iov_num == 1 ? iov[0].iov_len : iov[0].iov_len + iov[1].iov_len; + result = kernel_sendmsg(sock->sock, &msg, iov, iov_num, lng); + if (result != lng) { + error_sock(sock, "connection to server lost"); + if (cmd) { + dnbd3_requeue_cmd(cmd); + } + sock->panic = true; + sock->server->failures++; + goto error; + } + +error: + sock->pending = NULL; + return result; +} + + +/** + * dnbd3_send_request_cmd - send a dndb3 cmd + * @sock: the socket where the request is send + * @dnbd3_cmd: the dnbd3 cmd to send + */ +int dnbd3_send_request_cmd(struct dnbd3_sock *sock, uint16_t dnbd3_cmd) +{ + int result; + struct request *req = kmalloc(sizeof(struct request), GFP_KERNEL); + if (!req) { + error_sock(sock, "kmalloc failed"); + result = -EIO; + goto error; + } + + switch (dnbd3_cmd) { + case CMD_KEEPALIVE: + case CMD_GET_SERVERS: + dnbd3_cmd_to_op_special(req, dnbd3_cmd); + break; + case CMD_SELECT_IMAGE: + dnbd3_connect_to_req(req); + break; + case CMD_GET_BLOCK: + dnbd3_test_block_to_req(req); + break; + default: + warn_sock(sock, "unsupported command %d", dnbd3_cmd); + result = -EINVAL; + goto error; + } + + mutex_lock(&sock->tx_lock); + sock->pending = req; + result = dnbd3_send_request(sock, req, NULL); + mutex_unlock(&sock->tx_lock); + +error: + if (req) { + kfree(req); + } + return result; +} + +/** + * dnbd3_receive_cmd - receive a command + * @sock: the socket where the request is received + * @reply: an unused reply will be filled with the reply of the server + * + * this method should be called directly after the dnbd3_send_request_ method + */ +int dnbd3_receive_cmd(struct dnbd3_sock *sock, dnbd3_reply_t *reply) +{ + int result; + struct msghdr msg; + struct kvec iov; + dnbd3_init_msghdr(msg); + iov.iov_base = reply; + iov.iov_len = sizeof(dnbd3_reply_t); + result = kernel_recvmsg(sock->sock, &msg, &iov, 1, iov.iov_len, + msg.msg_flags); + if (result <= 0) { + return result; + } + fixup_reply(dnbd3_reply); + + if (reply->magic != dnbd3_packet_magic) { + error_sock(sock, "receive cmd wrong magic packet"); + return -EIO; + } + + if (reply->cmd == 0) { + error_sock(sock, "receive command was 0"); + return -EIO; + } + return result; +} + +static int dnbd3_clear_socket(struct dnbd3_sock *sock, dnbd3_reply_t *reply, + int remaining) +{ + int result = 0; + char *buf; + struct kvec iov; + struct msghdr msg; + dnbd3_init_msghdr(msg); + warn_sock(sock, "clearing socket %d bytes", remaining); + buf = kmalloc(RTT_BLOCK_SIZE, GFP_KERNEL); + if (!buf) { + error_sock(sock, "kmalloc failed"); + return -EIO; + } + /* hold the tx_lock so no new requests are send */ + mutex_lock(&sock->tx_lock); + iov.iov_base = buf; + iov.iov_len = RTT_BLOCK_SIZE; + while (remaining > 0) { + result = kernel_recvmsg(sock->sock, &msg, &iov, 1, iov.iov_len, + msg.msg_flags); + if (result <= 0) { + goto error; + } + remaining -= result; + } + + debug_sock(sock, "cleared socket"); +error: + mutex_unlock(&sock->tx_lock); + if (buf) { + kfree(buf); + } + return result; +} + +/** + * dnbd3_receive_cmd_get_block_mq - receive a block for mq + * @sock: the socket where the request is received + * @reply: the reply initialized by dnbd3_receive_cmd + * + * this method should be called directly after the dnbd3_receive_cmd method + * + * this method copies the data to user space according to the request which is + * encoded in the handle by the send request method and decoded here. + */ +int dnbd3_receive_cmd_get_block_mq(struct dnbd3_sock *sock, + dnbd3_reply_t *reply) +{ + struct dnbd3_cmd *cmd; + struct msghdr msg; + struct request *req = NULL; + struct kvec iov; + struct req_iterator iter; + struct bio_vec bvec_inst; + struct dnbd3_device *dev = sock->device; + struct bio_vec *bvec = &bvec_inst; + sigset_t blocked, oldset; + void *kaddr; + uint32_t tag, cookie; + uint16_t hwq; + uint32_t remaining = reply->size; + int result = 0; + uint64_t handle; + dnbd3_init_msghdr(msg); + + memcpy(&handle, &reply->handle, sizeof(handle)); + cookie = dnbd3_cookie_from_handle(handle); + tag = dnbd3_tag_from_handle(handle); + + hwq = blk_mq_unique_tag_to_hwq(tag); + if (hwq < dev->tag_set.nr_hw_queues) { + req = blk_mq_tag_to_rq(dev->tag_set.tags[hwq], + blk_mq_unique_tag_to_tag(tag)); + } + if (!req || !blk_mq_request_started(req)) { + error_sock(sock, "unexpected reply (%d) %p", tag, req); + if (req) { + debug_sock(sock, "requeue request"); + dnbd3_requeue_cmd(blk_mq_rq_to_pdu(req)); + } + dnbd3_clear_socket(sock, reply, remaining); + return -EIO; + } + cmd = blk_mq_rq_to_pdu(req); + + mutex_lock(&cmd->lock); + if (cmd->cookie != cookie) { + error_sock(sock, "double reply on req %p, cookie %u, handle cookie %u", + req, cmd->cookie, cookie); + mutex_unlock(&cmd->lock); + dnbd3_clear_socket(sock, reply, remaining); + return -EIO; + } + + rq_for_each_segment(bvec_inst, req, iter) { + siginitsetinv(&blocked, sigmask(SIGKILL)); + sigprocmask(SIG_SETMASK, &blocked, &oldset); + + kaddr = kmap(bvec->bv_page) + bvec->bv_offset; + iov.iov_base = kaddr; + iov.iov_len = bvec->bv_len; + result = kernel_recvmsg(sock->sock, &msg, &iov, 1, bvec->bv_len, + msg.msg_flags); + remaining -= result; + if (result != bvec->bv_len) { + kunmap(bvec->bv_page); + sigprocmask(SIG_SETMASK, &oldset, NULL ); + error_sock(sock, "could not receive from net to block layer"); + dnbd3_requeue_cmd(cmd); + mutex_unlock(&cmd->lock); + if (result >= 0) { + dnbd3_clear_socket(sock, reply, remaining); + return -EIO; + } else { + return result; + } + } + kunmap(bvec->bv_page); + + sigprocmask(SIG_SETMASK, &oldset, NULL ); + } + mutex_unlock(&cmd->lock); + dnbd3_end_cmd(cmd, 0); + return result; +} + + + +/** + * dnbd3_receive_cmd_get_block_test - receive a test block + * @sock: the socket where the request is received + * @reply: the reply initialized by dnbd3_receive_cmd + * + * this method should be called directly after the dnbd3_receive_cmd method + * + * the received data is just thrown away + */ +int dnbd3_receive_cmd_get_block_test(struct dnbd3_sock *sock, + dnbd3_reply_t *reply) +{ + struct msghdr msg; + struct kvec iov; + int result = 0; + char *buf = kmalloc(reply->size, GFP_KERNEL); + if (!buf) { + error_sock(sock, "kmalloc failed"); + goto error; + } + + dnbd3_init_msghdr(msg); + iov.iov_base = buf; + iov.iov_len = reply->size; + result = kernel_recvmsg(sock->sock, &msg, &iov, 1, reply->size, + msg.msg_flags); + if (result != RTT_BLOCK_SIZE) { + error_sock(sock, "receive test block failed"); + goto error; + } + +error: + if (buf) { + kfree(buf); + } + return result; +} + +/** + * dnbd3_receive_cmd_get_servers - receive new servers + * @sock: the socket where the request is received + * @reply: the reply initialized by dnbd3_receive_cmd + * + * this method should be called directly after the dnbd3_receive_cmd method + * + * the new servers are copied to dnbd3_device.new_servers and + * dnbd3_device.new_server_num is set accordingly + */ +int dnbd3_receive_cmd_get_servers(struct dnbd3_sock *sock, dnbd3_reply_t *reply) +{ + struct msghdr msg; + struct kvec iov; + struct dnbd3_device *dev = sock->device; + int result = 1; + int count, remaining; + dnbd3_init_msghdr(msg); + + debug_sock(sock, "get servers received"); + mutex_lock(&dev->device_lock); + if (!dev->use_server_provided_alts) { + remaining = reply->size; + goto consume_payload; + } + dev->new_servers_num = 0; + count = MIN(NUMBER_SERVERS, reply->size / sizeof(dnbd3_server_entry_t)); + + if (count != 0) { + iov.iov_base = dev->new_servers; + iov.iov_len = count * sizeof(dnbd3_server_entry_t); + result = kernel_recvmsg(sock->sock, &msg, &iov, 1, iov.iov_len, + msg.msg_flags); + if (result <= 0) { + error_sock(sock, "failed to receive get servers %d", + result); + mutex_unlock(&dev->device_lock); + return result; + } else if (result != (count * sizeof(dnbd3_server_entry_t))) { + error_sock(sock, "failed to get servers"); + mutex_unlock(&dev->device_lock); + return -EIO; + } + dev->new_servers_num = count; + } + /* + * if there were more servers than accepted, remove the remaining data + * from the socket buffer + * abuse the reply struct as the receive buffer + */ + remaining = reply->size - (count * sizeof(dnbd3_server_entry_t)); +consume_payload: + while (remaining > 0) { + count = MIN(sizeof(dnbd3_reply_t), remaining); + iov.iov_base = reply; + iov.iov_len = count; + result = kernel_recvmsg(sock->sock, &msg, &iov, 1, count, + msg.msg_flags); + if (result <= 0) { + error_sock(sock, "failed to receive payload from get servers"); + mutex_unlock(&dev->device_lock); + return result; + } + remaining -= result; + } + mutex_unlock(&dev->device_lock); + return result; +} + +/** + * dnbd3_receive_cmd_latest_rid - receive latest rid + * @sock: the socket where the request is received + * @reply: the reply initialized by dnbd3_receive_cmd + * + * this method should be called directly after the dnbd3_receive_cmd method + * + * dnbd3_device.update_available is set if a new RID is received + */ +int dnbd3_receive_cmd_latest_rid(struct dnbd3_sock *sock, dnbd3_reply_t *reply) +{ + struct kvec iov; + uint16_t rid; + int result; + struct msghdr msg; + struct dnbd3_device *dev = sock->device; + dnbd3_init_msghdr(msg); + debug_sock(sock, "latest rid received"); + + if (reply->size != 2) { + error_sock(sock, "failed to get latest rid, wrong size"); + return -EIO; + } + iov.iov_base = &rid; + iov.iov_len = sizeof(rid); + result = kernel_recvmsg(sock->sock, &msg, &iov, 1, iov.iov_len, + msg.msg_flags); + if (result <= 0) { + error_sock(sock, "failed to receive latest rid"); + return result; + } + rid = net_order_16(rid); + debug_sock(sock, "latest rid of %s is %d (currently using %d)", + dev->imgname, (int)rid, (int)dev->rid); + dev->update_available = (rid > dev->rid ? true : false); + return result; +} + + +/** + * dnbd3_receive_cmd_latest_rid - select the image + * @sock: the socket where the request is received + * @reply: the reply initialized by dnbd3_receive_cmd + * + * this method should be called directly after the dnbd3_receive_cmd method + * + * if this is the first connection the image name, file size and rid will be set + * if this is a further connection image name, file size and rid will be checked + */ +int dnbd3_receive_cmd_select_image(struct dnbd3_sock *sock, + dnbd3_reply_t *reply) +{ + struct kvec iov; + uint16_t rid; + char *name; + int result; + struct msghdr msg; + serialized_buffer_t payload_buffer; + uint64_t reported_size; + struct dnbd3_device *dev = sock->device; + dnbd3_init_msghdr(msg); + debug_sock(sock, "select image received"); + iov.iov_base = &payload_buffer; + iov.iov_len = reply->size; + result = kernel_recvmsg(sock->sock, &msg, &iov, 1, iov.iov_len, + msg.msg_flags); + if (result <= 0) { + error_sock(sock, "failed to receive select image %d", result); + return result; + } else if (result != reply->size) { + error_sock(sock, "could not read CMD_SELECT_IMAGE payload on handshake, size is %d and should be %d", + result, reply->size); + return -EIO; + } + + /* handle/check reply payload */ + serializer_reset_read(&payload_buffer, reply->size); + sock->server->protocol_version = serializer_get_uint16(&payload_buffer); + if (sock->server->protocol_version < MIN_SUPPORTED_SERVER) { + error_sock(sock, "server version is lower than min supported version"); + return -EIO; + } + + //TODO compare RID + + name = serializer_get_string(&payload_buffer); + rid = serializer_get_uint16(&payload_buffer); + if (dev->rid != rid && strcmp(name, dev->imgname) != 0) { + error_sock(sock, "server offers image '%s', requested '%s'", + name, dev->imgname); + return -EIO; + } + + reported_size = serializer_get_uint64(&payload_buffer); + if (!dev->reported_size) { + if (reported_size < 4096) { + error_sock(sock, "reported size by server is < 4096"); + return -EIO; + } + dev->reported_size = reported_size; + set_capacity(dev->disk, dev->reported_size >> 9); /* 512 Byte */ + } else if (dev->reported_size != reported_size) { + error_sock(sock, "reported size by server is %llu but should be %llu", + reported_size, dev->reported_size); + return -EIO; + } + return result; + +} + + + + diff --git a/src/kernel/net-txrx.h b/src/kernel/net-txrx.h new file mode 100644 index 0000000..3636ffe --- /dev/null +++ b/src/kernel/net-txrx.h @@ -0,0 +1,116 @@ +/* + * This file is part of the Distributed Network Block Device 3 + * + * Copyright(c) 2019 Frederic Robra <frederic@robra.org> + * Parts copyright 2011-2012 Johann Latocha <johann@latocha.de> + * + * This file may be licensed under the terms of of the + * GNU General Public License Version 2 (the ``GPL''). + * + * Software distributed under the License is distributed + * on an ``AS IS'' basis, WITHOUT WARRANTY OF ANY KIND, either + * express or implied. See the GPL for the specific language + * governing rights and limitations. + * + * You should have received a copy of the GPL along with this + * program. If not, go to http://www.gnu.org/licenses/gpl.html + * or write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + */ + + +#ifndef NET_TXRX_H_ +#define NET_TXRX_H_ + + + +#include "dnbd3.h" + +#define dnbd3_is_sock_alive(s) ((s).sock && (s).server) + +int dnbd3_send_request(struct dnbd3_sock *sock, struct request *req, struct dnbd3_cmd *cmd); + +/** + * dnbd3_send_request_cmd - send a dndb3 cmd + * @sock: the socket where the request is send + * @dnbd3_cmd: the dnbd3 cmd to send + */ +int dnbd3_send_request_cmd(struct dnbd3_sock *sock, uint16_t dnbd3_cmd); + + +/** + * dnbd3_receive_cmd - receive a command + * @sock: the socket where the request is received + * @reply: an unused reply will be filled with the reply of the server + * + * this method should be called directly after the dnbd3_send_request_ method + */ +int dnbd3_receive_cmd(struct dnbd3_sock *sock, dnbd3_reply_t *reply); + + +/** + * dnbd3_receive_cmd_get_block_mq - receive a block for mq + * @sock: the socket where the request is received + * @reply: the reply initialized by dnbd3_receive_cmd + * + * this method should be called directly after the dnbd3_receive_cmd method + * + * this method copies the data to user space according to the request which is + * encoded in the handle by the send request method and decoded here. + */ +int dnbd3_receive_cmd_get_block_mq(struct dnbd3_sock *sock, + dnbd3_reply_t *reply); + + +/** + * dnbd3_receive_cmd_get_block_test - receive a test block + * @sock: the socket where the request is received + * @reply: the reply initialized by dnbd3_receive_cmd + * + * this method should be called directly after the dnbd3_receive_cmd method + * + * the received data is just thrown away + */ +int dnbd3_receive_cmd_get_block_test(struct dnbd3_sock *sock, + dnbd3_reply_t *reply); + + +/** + * dnbd3_receive_cmd_get_servers - receive new servers + * @sock: the socket where the request is received + * @reply: the reply initialized by dnbd3_receive_cmd + * + * this method should be called directly after the dnbd3_receive_cmd method + * + * the new servers are copied to dnbd3_device.new_servers and + * dnbd3_device.new_server_num is set accordingly + */ +int dnbd3_receive_cmd_get_servers(struct dnbd3_sock *sock, dnbd3_reply_t *reply); + + +/** + * dnbd3_receive_cmd_latest_rid - receive latest rid + * @sock: the socket where the request is received + * @reply: the reply initialized by dnbd3_receive_cmd + * + * this method should be called directly after the dnbd3_receive_cmd method + * + * dnbd3_device.update_available is set if a new RID is received + */ +int dnbd3_receive_cmd_latest_rid(struct dnbd3_sock *sock, dnbd3_reply_t *reply); + +/** + * dnbd3_receive_cmd_latest_rid - select the image + * @sock: the socket where the request is received + * @reply: the reply initialized by dnbd3_receive_cmd + * + * this method should be called directly after the dnbd3_receive_cmd method + * + * if this is the first connection the image name, file size and rid will be set + * if this is a further connection image name, file size and rid will be checked + */ +int dnbd3_receive_cmd_select_image(struct dnbd3_sock *sock, + dnbd3_reply_t *reply); + +#endif /* KERNEL_NET_TXRX_H_ */ diff --git a/src/kernel/net.c b/src/kernel/net.c index 1207d0a..cb4e76c 100644 --- a/src/kernel/net.c +++ b/src/kernel/net.c @@ -25,613 +25,26 @@ #include <linux/sort.h> #include "net.h" +#include "net-txrx.h" #include "utils.h" #include "clientconfig.h" #include "mq.h" -#define DNBD3_REQ_OP_SPECIAL REQ_OP_DRV_IN -#define DNBD3_REQ_OP_CONNECT REQ_OP_DRV_OUT - -#define dnbd3_cmd_to_op_special(req, cmd) \ - (req)->cmd_flags = DNBD3_REQ_OP_SPECIAL | ((cmd) << REQ_FLAG_BITS) - -#define dnbd3_op_special_to_cmd(req) \ - ((req)->cmd_flags >> REQ_FLAG_BITS) - -#define dnbd3_connect_to_req(req) \ - (req)->cmd_flags = DNBD3_REQ_OP_CONNECT \ - | ((CMD_SELECT_IMAGE) << REQ_FLAG_BITS) - -#define dnbd3_test_block_to_req(req) \ - do { \ - (req)->cmd_flags = REQ_OP_READ; \ - (req)->__data_len = RTT_BLOCK_SIZE; \ - (req)->__sector = 0; \ - } while (0) - #define dnbd3_sock_create(af,type,proto,sock) \ sock_create_kern(&init_net, (af) == HOST_IP4 ? AF_INET : AF_INET6, \ type, proto, sock) -#define REQUEST_TIMEOUT \ - (HZ * SOCKET_TIMEOUT_CLIENT_DATA) +#define dnbd3_is_sock_alive(s) ((s).sock && (s).server) -#define dnbd3_init_msghdr(h) \ - do { \ - (h).msg_name = NULL; \ - (h).msg_namelen = 0; \ - (h).msg_control = NULL; \ - (h).msg_controllen = 0; \ - (h).msg_flags = MSG_WAITALL | MSG_NOSIGNAL; \ - } while (0) static int __dnbd3_socket_connect(struct dnbd3_sock *sock, - struct dnbd3_server * server); + struct dnbd3_server *server); static int dnbd3_socket_connect(struct dnbd3_sock *sock, - struct dnbd3_server * server); + struct dnbd3_server *server); static int dnbd3_socket_disconnect(struct dnbd3_sock *sock); -static uint64_t dnbd3_average_rtt(struct dnbd3_server *server) -{ - int i, j = 0; - uint64_t avg = 0; - for (i = 0; i < 4; i++) { - avg += server->rtts[i]; - j += server->rtts[i] == 0 ? 0 : 1; - } - if (avg == 0) { - return RTT_UNKNOWN; - } else { - avg = avg / j; - avg += server->failures * avg / 10; - return avg; - } -} - -/* - * Methods for request and receive commands - */ - - -/** - * dnbd3_to_handle - convert tag and cookie to handle - * @tag: the tag to convert - * @cookie: the cookie to convert - */ -static inline uint64_t dnbd3_to_handle(uint32_t tag, uint32_t cookie) { - return ((uint64_t) tag << 32) | cookie; -} - -/** - * dnbd3_tag_from_handle - get tag from handle - * @handle: the handle - */ -static inline uint32_t dnbd3_tag_from_handle(uint64_t handle) { - return (uint32_t)(handle >> 32); -} - -/** - * dnbd3_cookie_from_handle - get cookie from handle - * @handle: the handle - */ -static inline uint32_t dnbd3_cookie_from_handle(uint64_t handle) { - return (uint32_t) handle; -} - -/** - * dnbd3_send_request - send a request - * @sock: the socket where the request is send - * @req: the request to send - * @cmd: optional - the dnbd3_cmd from mq - * - * the tx_lock of the socket must be held - */ -int dnbd3_send_request(struct dnbd3_sock *sock, struct request *req, - struct dnbd3_cmd *cmd) -{ - dnbd3_request_t request; - struct msghdr msg; - struct kvec iov[2]; - size_t iov_num = 1; - size_t lng; - int result; - uint32_t tag; - uint64_t handle; - serialized_buffer_t payload_buffer; - sock->pending = req; - dnbd3_init_msghdr(msg); - - request.magic = dnbd3_packet_magic; - - switch (req_op(req)) { - case REQ_OP_READ: - debug_sock(sock, "request operation read"); - request.cmd = CMD_GET_BLOCK; - request.offset = blk_rq_pos(req) << 9; // * 512 - request.size = blk_rq_bytes(req); - break; - case DNBD3_REQ_OP_SPECIAL: - debug_sock(sock, "request operation special"); - request.cmd = dnbd3_op_special_to_cmd(req); - request.size = 0; - break; - case DNBD3_REQ_OP_CONNECT: - debug_sock(sock, "request operation connect to %s", - sock->device->imgname); - request.cmd = CMD_SELECT_IMAGE; - serializer_reset_write(&payload_buffer); - serializer_put_uint16(&payload_buffer, PROTOCOL_VERSION); - serializer_put_string(&payload_buffer, sock->device->imgname); - serializer_put_uint16(&payload_buffer, sock->device->rid); - serializer_put_uint8(&payload_buffer, 0); // is_server = false - iov[1].iov_base = &payload_buffer; - request.size = serializer_get_written_length(&payload_buffer); - iov[1].iov_len = request.size; - iov_num = 2; - break; - default: - return -EIO; - } - sock->cookie++; - if (cmd != NULL) { - cmd->cookie = sock->cookie; - tag = blk_mq_unique_tag(req); - handle = dnbd3_to_handle(tag, sock->cookie); - } else { - handle = sock->cookie; - } - memcpy(&request.handle, &handle, sizeof(handle)); - - fixup_request(request); - iov[0].iov_base = &request; - iov[0].iov_len = sizeof(request); - lng = iov_num == 1 ? iov[0].iov_len : iov[0].iov_len + iov[1].iov_len; - result = kernel_sendmsg(sock->sock, &msg, iov, iov_num, lng); - if (result != lng) { - error_sock(sock, "connection to server lost"); - if (cmd) { - dnbd3_requeue_cmd(cmd); - } - sock->panic = true; - sock->server->failures++; - goto error; - } - -error: - sock->pending = NULL; - return result; -} - - -/** - * dnbd3_send_request_cmd - send a dndb3 cmd - * @sock: the socket where the request is send - * @dnbd3_cmd: the dnbd3 cmd to send - */ -static int dnbd3_send_request_cmd(struct dnbd3_sock *sock, uint16_t dnbd3_cmd) -{ - int result; - struct request *req = kmalloc(sizeof(struct request), GFP_KERNEL); - if (!req) { - error_sock(sock, "kmalloc failed"); - result = -EIO; - goto error; - } - - switch (dnbd3_cmd) { - case CMD_KEEPALIVE: - case CMD_GET_SERVERS: - dnbd3_cmd_to_op_special(req, dnbd3_cmd); - break; - case CMD_SELECT_IMAGE: - dnbd3_connect_to_req(req); - break; - case CMD_GET_BLOCK: - dnbd3_test_block_to_req(req); - break; - default: - warn_sock(sock, "unsupported command %d", dnbd3_cmd); - result = -EINVAL; - goto error; - } - - mutex_lock(&sock->tx_lock); - sock->pending = req; - result = dnbd3_send_request(sock, req, NULL); - mutex_unlock(&sock->tx_lock); - -error: - if (req) { - kfree(req); - } - return result; -} - -/** - * dnbd3_receive_cmd - receive a command - * @sock: the socket where the request is received - * @reply: an unused reply will be filled with the reply of the server - * - * this method should be called directly after the dnbd3_send_request_ method - */ -static int dnbd3_receive_cmd(struct dnbd3_sock *sock, dnbd3_reply_t *reply) -{ - int result; - struct msghdr msg; - struct kvec iov; - dnbd3_init_msghdr(msg); - iov.iov_base = reply; - iov.iov_len = sizeof(dnbd3_reply_t); - result = kernel_recvmsg(sock->sock, &msg, &iov, 1, iov.iov_len, - msg.msg_flags); - if (result <= 0) { - return result; - } - fixup_reply(dnbd3_reply); - - if (reply->magic != dnbd3_packet_magic) { - error_sock(sock, "receive cmd wrong magic packet"); - return -EIO; - } - - if (reply->cmd == 0) { - error_sock(sock, "receive command was 0"); - return -EIO; - } - return result; -} - -static int dnbd3_clear_socket(struct dnbd3_sock *sock, dnbd3_reply_t *reply, - int remaining) -{ - int result = 0; - char *buf; - struct kvec iov; - struct msghdr msg; - dnbd3_init_msghdr(msg); - warn_sock(sock, "clearing socket %d bytes", remaining); - buf = kmalloc(RTT_BLOCK_SIZE, GFP_KERNEL); - if (!buf) { - error_sock(sock, "kmalloc failed"); - return -EIO; - } - /* hold the tx_lock so no new requests are send */ - mutex_lock(&sock->tx_lock); - iov.iov_base = buf; - iov.iov_len = RTT_BLOCK_SIZE; - while (remaining > 0) { - result = kernel_recvmsg(sock->sock, &msg, &iov, 1, iov.iov_len, - msg.msg_flags); - if (result <= 0) { - goto error; - } - remaining -= result; - } - - debug_sock(sock, "cleared socket"); -error: - mutex_unlock(&sock->tx_lock); - if (buf) { - kfree(buf); - } - return result; -} - -/** - * dnbd3_receive_cmd_get_block_mq - receive a block for mq - * @sock: the socket where the request is received - * @reply: the reply initialized by dnbd3_receive_cmd - * - * this method should be called directly after the dnbd3_receive_cmd method - * - * this method copies the data to user space according to the request which is - * encoded in the handle by the send request method and decoded here. - */ -static int dnbd3_receive_cmd_get_block_mq(struct dnbd3_sock *sock, - dnbd3_reply_t *reply) -{ - struct dnbd3_cmd *cmd; - struct msghdr msg; - struct request *req = NULL; - struct kvec iov; - struct req_iterator iter; - struct bio_vec bvec_inst; - struct dnbd3_device *dev = sock->device; - struct bio_vec *bvec = &bvec_inst; - sigset_t blocked, oldset; - void *kaddr; - uint32_t tag, cookie; - uint16_t hwq; - uint32_t remaining = reply->size; - int result = 0; - uint64_t handle; - dnbd3_init_msghdr(msg); - - memcpy(&handle, &reply->handle, sizeof(handle)); - cookie = dnbd3_cookie_from_handle(handle); - tag = dnbd3_tag_from_handle(handle); - - hwq = blk_mq_unique_tag_to_hwq(tag); - if (hwq < dev->tag_set.nr_hw_queues) { - req = blk_mq_tag_to_rq(dev->tag_set.tags[hwq], - blk_mq_unique_tag_to_tag(tag)); - } - if (!req || !blk_mq_request_started(req)) { - error_sock(sock, "unexpected reply (%d) %p", tag, req); - if (req) { - debug_sock(sock, "requeue request"); - dnbd3_requeue_cmd(blk_mq_rq_to_pdu(req)); - } - dnbd3_clear_socket(sock, reply, remaining); - return -EIO; - } - cmd = blk_mq_rq_to_pdu(req); - - mutex_lock(&cmd->lock); - if (cmd->cookie != cookie) { - error_sock(sock, "double reply on req %p, cookie %u, handle cookie %u", - req, cmd->cookie, cookie); - mutex_unlock(&cmd->lock); - dnbd3_clear_socket(sock, reply, remaining); - return -EIO; - } - - rq_for_each_segment(bvec_inst, req, iter) { - siginitsetinv(&blocked, sigmask(SIGKILL)); - sigprocmask(SIG_SETMASK, &blocked, &oldset); - - kaddr = kmap(bvec->bv_page) + bvec->bv_offset; - iov.iov_base = kaddr; - iov.iov_len = bvec->bv_len; - result = kernel_recvmsg(sock->sock, &msg, &iov, 1, bvec->bv_len, - msg.msg_flags); - remaining -= result; - if (result != bvec->bv_len) { - kunmap(bvec->bv_page); - sigprocmask(SIG_SETMASK, &oldset, NULL ); - error_sock(sock, "could not receive from net to block layer"); - dnbd3_requeue_cmd(cmd); - mutex_unlock(&cmd->lock); - if (result >= 0) { - dnbd3_clear_socket(sock, reply, remaining); - return -EIO; - } else { - return result; - } - } - kunmap(bvec->bv_page); - - sigprocmask(SIG_SETMASK, &oldset, NULL ); - } - mutex_unlock(&cmd->lock); - dnbd3_end_cmd(cmd, 0); - return result; -} - - - -/** - * dnbd3_receive_cmd_get_block_test - receive a test block - * @sock: the socket where the request is received - * @reply: the reply initialized by dnbd3_receive_cmd - * - * this method should be called directly after the dnbd3_receive_cmd method - * - * the received data is just thrown away - */ -static int dnbd3_receive_cmd_get_block_test(struct dnbd3_sock *sock, - dnbd3_reply_t *reply) -{ - struct msghdr msg; - struct kvec iov; - int result = 0; - char *buf = kmalloc(reply->size, GFP_KERNEL); - if (!buf) { - error_sock(sock, "kmalloc failed"); - goto error; - } - - dnbd3_init_msghdr(msg); - iov.iov_base = buf; - iov.iov_len = reply->size; - result = kernel_recvmsg(sock->sock, &msg, &iov, 1, reply->size, - msg.msg_flags); - if (result != RTT_BLOCK_SIZE) { - error_sock(sock, "receive test block failed"); - goto error; - } - -error: - if (buf) { - kfree(buf); - } - return result; -} - -/** - * dnbd3_receive_cmd_get_servers - receive new servers - * @sock: the socket where the request is received - * @reply: the reply initialized by dnbd3_receive_cmd - * - * this method should be called directly after the dnbd3_receive_cmd method - * - * the new servers are copied to dnbd3_device.new_servers and - * dnbd3_device.new_server_num is set accordingly - */ -static int dnbd3_receive_cmd_get_servers(struct dnbd3_sock *sock, - dnbd3_reply_t *reply) -{ - struct msghdr msg; - struct kvec iov; - struct dnbd3_device *dev = sock->device; - int result = 1; - int count, remaining; - dnbd3_init_msghdr(msg); - - debug_sock(sock, "get servers received"); - mutex_lock(&dev->device_lock); - if (!dev->use_server_provided_alts) { - remaining = reply->size; - goto consume_payload; - } - dev->new_servers_num = 0; - count = MIN(NUMBER_SERVERS, reply->size / sizeof(dnbd3_server_entry_t)); - - if (count != 0) { - iov.iov_base = dev->new_servers; - iov.iov_len = count * sizeof(dnbd3_server_entry_t); - result = kernel_recvmsg(sock->sock, &msg, &iov, 1, iov.iov_len, - msg.msg_flags); - if (result <= 0) { - error_sock(sock, "failed to receive get servers %d", - result); - mutex_unlock(&dev->device_lock); - return result; - } else if (result != (count * sizeof(dnbd3_server_entry_t))) { - error_sock(sock, "failed to get servers"); - mutex_unlock(&dev->device_lock); - return -EIO; - } - dev->new_servers_num = count; - } - /* - * if there were more servers than accepted, remove the remaining data - * from the socket buffer - * abuse the reply struct as the receive buffer - */ - remaining = reply->size - (count * sizeof(dnbd3_server_entry_t)); -consume_payload: - while (remaining > 0) { - count = MIN(sizeof(dnbd3_reply_t), remaining); - iov.iov_base = reply; - iov.iov_len = count; - result = kernel_recvmsg(sock->sock, &msg, &iov, 1, count, - msg.msg_flags); - if (result <= 0) { - error_sock(sock, "failed to receive payload from get servers"); - mutex_unlock(&dev->device_lock); - return result; - } - remaining -= result; - } - mutex_unlock(&dev->device_lock); - return result; -} - -/** - * dnbd3_receive_cmd_latest_rid - receive latest rid - * @sock: the socket where the request is received - * @reply: the reply initialized by dnbd3_receive_cmd - * - * this method should be called directly after the dnbd3_receive_cmd method - * - * dnbd3_device.update_available is set if a new RID is received - */ -static int dnbd3_receive_cmd_latest_rid(struct dnbd3_sock *sock, - dnbd3_reply_t *reply) -{ - struct kvec iov; - uint16_t rid; - int result; - struct msghdr msg; - struct dnbd3_device *dev = sock->device; - dnbd3_init_msghdr(msg); - debug_sock(sock, "latest rid received"); - - if (reply->size != 2) { - error_sock(sock, "failed to get latest rid, wrong size"); - return -EIO; - } - iov.iov_base = &rid; - iov.iov_len = sizeof(rid); - result = kernel_recvmsg(sock->sock, &msg, &iov, 1, iov.iov_len, - msg.msg_flags); - if (result <= 0) { - error_sock(sock, "failed to receive latest rid"); - return result; - } - rid = net_order_16(rid); - debug_sock(sock, "latest rid of %s is %d (currently using %d)", - dev->imgname, (int)rid, (int)dev->rid); - dev->update_available = (rid > dev->rid ? true : false); - return result; -} - - -/** - * dnbd3_receive_cmd_latest_rid - select the image - * @sock: the socket where the request is received - * @reply: the reply initialized by dnbd3_receive_cmd - * - * this method should be called directly after the dnbd3_receive_cmd method - * - * if this is the first connection the image name, file size and rid will be set - * if this is a further connection image name, file size and rid will be checked - */ -static int dnbd3_receive_cmd_select_image(struct dnbd3_sock *sock, - dnbd3_reply_t *reply) -{ - struct kvec iov; - uint16_t rid; - char *name; - int result; - struct msghdr msg; - serialized_buffer_t payload_buffer; - uint64_t reported_size; - struct dnbd3_device *dev = sock->device; - dnbd3_init_msghdr(msg); - debug_sock(sock, "select image received"); - iov.iov_base = &payload_buffer; - iov.iov_len = reply->size; - result = kernel_recvmsg(sock->sock, &msg, &iov, 1, iov.iov_len, - msg.msg_flags); - if (result <= 0) { - error_sock(sock, "failed to receive select image %d", result); - return result; - } else if (result != reply->size) { - error_sock(sock, "could not read CMD_SELECT_IMAGE payload on handshake, size is %d and should be %d", - result, reply->size); - return -EIO; - } - - /* handle/check reply payload */ - serializer_reset_read(&payload_buffer, reply->size); - sock->server->protocol_version = serializer_get_uint16(&payload_buffer); - if (sock->server->protocol_version < MIN_SUPPORTED_SERVER) { - error_sock(sock, "server version is lower than min supported version"); - return -EIO; - } - - //TODO compare RID - - name = serializer_get_string(&payload_buffer); - rid = serializer_get_uint16(&payload_buffer); - if (dev->rid != rid && strcmp(name, dev->imgname) != 0) { - error_sock(sock, "server offers image '%s', requested '%s'", - name, dev->imgname); - return -EIO; - } - - reported_size = serializer_get_uint64(&payload_buffer); - if (!dev->reported_size) { - if (reported_size < 4096) { - error_sock(sock, "reported size by server is < 4096"); - return -EIO; - } - dev->reported_size = reported_size; - set_capacity(dev->disk, dev->reported_size >> 9); /* 512 Byte */ - } else if (dev->reported_size != reported_size) { - error_sock(sock, "reported size by server is %llu but should be %llu", - reported_size, dev->reported_size); - return -EIO; - } - return result; - -} - - /* * Timer and workers */ @@ -838,7 +251,7 @@ static int dnbd3_panic_connect(struct dnbd3_device *dev) } } if (working == NULL) { - return -EIO; + return -ENOENT; } for (i = 0; i < NUMBER_CONNECTIONS; i++) { if (dev->socks[i].server != working) { @@ -925,7 +338,8 @@ static void dnbd3_panic_worker(struct work_struct *work) int sock_alive = 0; dev = container_of(work, struct dnbd3_device, panic_worker); for (i = 0; i < NUMBER_CONNECTIONS; i++) { - if (dev->socks[i].panic) { + if (dev->socks[i].panic + || dev->socks[i].server->failures > 1000) { panic = true; dnbd3_set_rtt_unreachable(dev->socks[i].server); dnbd3_socket_disconnect(&dev->socks[i]); @@ -962,7 +376,6 @@ static int dnbd3_meassure_rtt(struct dnbd3_device *dev, { struct timeval start, end; dnbd3_reply_t reply; - struct request req; int result; uint64_t rtt = RTT_UNREACHABLE; struct dnbd3_sock sock = { @@ -977,7 +390,6 @@ static int dnbd3_meassure_rtt(struct dnbd3_device *dev, error_sock(&sock, "socket connect failed in rtt measurement"); goto error; } - dnbd3_connect_to_req(&req); result = dnbd3_send_request_cmd(&sock, CMD_SELECT_IMAGE); if (result <= 0) { error_sock(&sock, "request select image failed in rtt measurement"); @@ -1138,8 +550,10 @@ static void dnbd3_discovery_worker(struct work_struct *work) for (i = 0; i < NUMBER_SERVERS; i++) { server = &dev->alt_servers[i]; if (server->host.type) { + /* failure count is divided*/ + server->failures = server->failures / 2; if (dnbd3_meassure_rtt(dev, server) <= 0) { - server->failures++; + server->failures = server->failures * 2; warn_server(dev, server, "failed to meassure rtt"); } @@ -1316,8 +730,6 @@ static int dnbd3_socket_connect(struct dnbd3_sock *sock, if (dnbd3_send_request_cmd(sock, CMD_GET_SERVERS) <= 0) { error_sock(sock, "failed to get servers in discovery"); } - /* failure count is divided on each successful connect */ - server->failures = server->failures / 2; return 0; error: diff --git a/src/kernel/net.h b/src/kernel/net.h index 7efe859..b8df66e 100644 --- a/src/kernel/net.h +++ b/src/kernel/net.h @@ -25,10 +25,6 @@ #include "dnbd3.h" -#define dnbd3_is_sock_alive(s) ((s).sock && (s).server) - -int dnbd3_send_request(struct dnbd3_sock *sock, struct request *req, struct dnbd3_cmd *cmd); - int dnbd3_net_connect(struct dnbd3_device *dev); int dnbd3_net_disconnect(struct dnbd3_device *dev); diff --git a/src/kernel/utils.c b/src/kernel/utils.c index 957d69b..164d51a 100644 --- a/src/kernel/utils.c +++ b/src/kernel/utils.c @@ -2,6 +2,7 @@ * This file is part of the Distributed Network Block Device 3 * * Copyright(c) 2011-2012 Johann Latocha <johann@latocha.de> + * Copyright(c) 2019 Frederic Robra <frederic@robra.org> * * This file may be licensed under the terms of of the * GNU General Public License Version 2 (the ``GPL''). @@ -19,6 +20,7 @@ */ #include "utils.h" +#include "clientconfig.h" #include <linux/kernel.h> @@ -40,3 +42,20 @@ void inet_ntoa(struct in_addr addr, char *str) unsigned char *ptr = (unsigned char *) &addr; sprintf(str, "%d.%d.%d.%d", ptr[0] & 0xff, ptr[1] & 0xff, ptr[2] & 0xff, ptr[3] & 0xff); } + +uint64_t dnbd3_average_rtt(struct dnbd3_server *server) +{ + int i, j = 0; + uint64_t avg = 0; + for (i = 0; i < 4; i++) { + avg += server->rtts[i]; + j += server->rtts[i] == 0 ? 0 : 1; + } + if (avg == 0) { + return RTT_UNKNOWN; + } else { + avg = avg / j; + avg += server->failures * avg / 10; + return avg; + } +} diff --git a/src/kernel/utils.h b/src/kernel/utils.h index e54b3cf..d7eb87f 100644 --- a/src/kernel/utils.h +++ b/src/kernel/utils.h @@ -1,7 +1,8 @@ /* * This file is part of the Distributed Network Block Device 3 - * + * Copyright(c) 2011-2012 Johann Latocha <johann@latocha.de> + * Copyright(c) 2019 Frederic Robra <frederic@robra.org> * * This file may be licensed under the terms of of the * GNU General Public License Version 2 (the ``GPL''). @@ -21,9 +22,18 @@ #ifndef UTILS_H_ #define UTILS_H_ +#include "dnbd3.h" + #include <linux/in.h> unsigned int inet_addr(char *str); + void inet_ntoa(struct in_addr addr, char *str); +/** + * dnbd3_average_rtt - calculate the average rtt + * @server: the server to calculate the average rtt + */ +uint64_t dnbd3_average_rtt(struct dnbd3_server *server); + #endif /* UTILS_H_ */ |