/* * This file is part of the Distributed Network Block Device 3 * * Copyright(c) 2019 Frederic Robra * Parts copyright 2011-2012 Johann Latocha * * This file may be licensed under the terms of of the * GNU General Public License Version 2 (the ``GPL''). * * Software distributed under the License is distributed * on an ``AS IS'' basis, WITHOUT WARRANTY OF ANY KIND, either * express or implied. See the GPL for the specific language * governing rights and limitations. * * You should have received a copy of the GPL along with this * program. If not, go to http://www.gnu.org/licenses/gpl.html * or write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * */ #include "net-txrx.h" #include "utils.h" #include "clientconfig.h" #include "mq.h" #include #include #define DNBD3_REQ_OP_SPECIAL REQ_OP_DRV_IN #define DNBD3_REQ_OP_CONNECT REQ_OP_DRV_OUT #define dnbd3_cmd_to_op_special(req, cmd) \ (req)->cmd_flags = DNBD3_REQ_OP_SPECIAL | ((cmd) << REQ_FLAG_BITS) #define dnbd3_op_special_to_cmd(req) \ ((req)->cmd_flags >> REQ_FLAG_BITS) #define dnbd3_connect_to_req(req) \ (req)->cmd_flags = DNBD3_REQ_OP_CONNECT \ | ((CMD_SELECT_IMAGE) << REQ_FLAG_BITS) #define dnbd3_test_block_to_req(req) \ do { \ (req)->cmd_flags = REQ_OP_READ; \ (req)->__data_len = RTT_BLOCK_SIZE; \ (req)->__sector = 0; \ } while (0) #define REQUEST_TIMEOUT \ (HZ * SOCKET_TIMEOUT_CLIENT_DATA) #define dnbd3_init_msghdr(h) \ do { \ (h).msg_name = NULL; \ (h).msg_namelen = 0; \ (h).msg_control = NULL; \ (h).msg_controllen = 0; \ (h).msg_flags = MSG_WAITALL | MSG_NOSIGNAL; \ } while (0) /* * Methods for request and receive commands */ /** * dnbd3_to_handle - convert tag and cookie to handle * @tag: the tag to convert * @cookie: the cookie to convert */ static inline uint64_t dnbd3_to_handle(uint32_t tag, uint32_t cookie) { return ((uint64_t) tag << 32) | cookie; } /** * dnbd3_tag_from_handle - get tag from handle * @handle: the handle */ static inline uint32_t dnbd3_tag_from_handle(uint64_t handle) { return (uint32_t)(handle >> 32); } /** * dnbd3_cookie_from_handle - get cookie from handle * @handle: the handle */ static inline uint32_t dnbd3_cookie_from_handle(uint64_t handle) { return (uint32_t) handle; } /** * dnbd3_send_request - send a request * @sock: the socket where the request is send * @req: the request to send * @cmd: optional - the dnbd3_cmd from mq * * the tx_lock of the socket must be held */ int dnbd3_send_request(struct dnbd3_sock *sock, struct request *req, struct dnbd3_cmd *cmd) { dnbd3_request_t request; struct msghdr msg; struct kvec iov[2]; size_t iov_num = 1; size_t lng; int result; uint32_t tag; uint64_t handle; serialized_buffer_t payload_buffer; sock->pending = req; dnbd3_init_msghdr(msg); request.magic = dnbd3_packet_magic; switch (req_op(req)) { case REQ_OP_READ: // debug_sock(sock, "request operation read %p", req); request.cmd = CMD_GET_BLOCK; request.offset = blk_rq_pos(req) << 9; // * 512 request.size = blk_rq_bytes(req); break; case DNBD3_REQ_OP_SPECIAL: debug_sock(sock, "request operation special"); request.cmd = dnbd3_op_special_to_cmd(req); request.size = 0; break; case DNBD3_REQ_OP_CONNECT: debug_sock(sock, "request operation connect to %s", sock->device->imgname); request.cmd = CMD_SELECT_IMAGE; serializer_reset_write(&payload_buffer); serializer_put_uint16(&payload_buffer, PROTOCOL_VERSION); serializer_put_string(&payload_buffer, sock->device->imgname); serializer_put_uint16(&payload_buffer, sock->device->rid); serializer_put_uint8(&payload_buffer, 0); // is_server = false iov[1].iov_base = &payload_buffer; request.size = serializer_get_written_length(&payload_buffer); iov[1].iov_len = request.size; iov_num = 2; break; default: return -EIO; } sock->cookie++; if (cmd != NULL) { cmd->cookie = sock->cookie; tag = blk_mq_unique_tag(req); handle = dnbd3_to_handle(tag, sock->cookie); } else { handle = sock->cookie; } memcpy(&request.handle, &handle, sizeof(handle)); fixup_request(request); iov[0].iov_base = &request; iov[0].iov_len = sizeof(request); lng = iov_num == 1 ? iov[0].iov_len : iov[0].iov_len + iov[1].iov_len; if (unlikely(!sock->sock)) { warn_sock(sock, "socket was shutdown while sending"); result = -EIO; goto error; } result = kernel_sendmsg(sock->sock, &msg, iov, iov_num, lng); if (result != lng) { error_sock(sock, "connection to server lost"); /* * this triggers: * kernel BUG at block/blk-mq.c:704! * command will be reqeued by timeout */ // if (cmd) { // dnbd3_requeue_cmd(cmd); // } sock->server->failures++; goto error; } error: sock->pending = NULL; return result; } /** * dnbd3_send_request_cmd - send a dndb3 cmd * @sock: the socket where the request is send * @dnbd3_cmd: the dnbd3 cmd to send * * the tx_lock of the socket must be held */ int dnbd3_send_request_cmd(struct dnbd3_sock *sock, uint16_t dnbd3_cmd) { int result; struct request *req = kmalloc(sizeof(struct request), GFP_KERNEL); if (!req) { error_sock(sock, "kmalloc failed"); result = -EIO; goto error; } switch (dnbd3_cmd) { case CMD_KEEPALIVE: case CMD_GET_SERVERS: dnbd3_cmd_to_op_special(req, dnbd3_cmd); break; case CMD_SELECT_IMAGE: dnbd3_connect_to_req(req); break; case CMD_GET_BLOCK: dnbd3_test_block_to_req(req); break; default: warn_sock(sock, "unsupported command %d", dnbd3_cmd); result = -EINVAL; goto error; } if (unlikely(!sock->sock)) { result = -EIO; goto error; } result = dnbd3_send_request(sock, req, NULL); error: if (req) { kfree(req); } return result; } /** * dnbd3_receive_cmd - receive a command * @sock: the socket where the request is received * @reply: an unused reply will be filled with the reply of the server * * this method should be called directly after the dnbd3_send_request_ method */ int dnbd3_receive_cmd(struct dnbd3_sock *sock, dnbd3_reply_t *reply) { int result; struct msghdr msg; struct kvec iov; dnbd3_init_msghdr(msg); iov.iov_base = reply; iov.iov_len = sizeof(dnbd3_reply_t); result = kernel_recvmsg(sock->sock, &msg, &iov, 1, iov.iov_len, msg.msg_flags); if (result <= 0) { return result; } fixup_reply(dnbd3_reply); if (reply->magic != dnbd3_packet_magic) { error_sock(sock, "receive cmd wrong magic packet"); return -EIO; } if (reply->cmd == 0) { error_sock(sock, "receive command was 0"); return -EIO; } return result; } static int dnbd3_clear_socket(struct dnbd3_sock *sock, dnbd3_reply_t *reply, int remaining) { int result = 0; char *buf; struct kvec iov; struct msghdr msg; dnbd3_init_msghdr(msg); warn_sock(sock, "clearing socket %d bytes", remaining); buf = kmalloc(RTT_BLOCK_SIZE, GFP_KERNEL); if (!buf) { error_sock(sock, "kmalloc failed"); return -EIO; } iov.iov_base = buf; iov.iov_len = RTT_BLOCK_SIZE; while (remaining > 0) { result = kernel_recvmsg(sock->sock, &msg, &iov, 1, iov.iov_len, msg.msg_flags); if (result <= 0) { goto error; } remaining -= result; } debug_sock(sock, "cleared socket"); error: if (buf) { kfree(buf); } return result; } /** * dnbd3_receive_cmd_get_block_mq - receive a block for mq * @sock: the socket where the request is received * @reply: the reply initialized by dnbd3_receive_cmd * * this method should be called directly after the dnbd3_receive_cmd method * * this method copies the data to user space according to the request which is * encoded in the handle by the send request method and decoded here. */ int dnbd3_receive_cmd_get_block_mq(struct dnbd3_sock *sock, dnbd3_reply_t *reply) { struct dnbd3_cmd *cmd; struct msghdr msg; struct request *req = NULL; struct kvec iov; struct req_iterator iter; struct bio_vec bvec_inst; struct dnbd3_device *dev = sock->device; struct bio_vec *bvec = &bvec_inst; sigset_t blocked, oldset; void *kaddr; uint32_t tag, cookie; uint16_t hwq; uint32_t remaining = reply->size; int result = 0; uint64_t handle; dnbd3_init_msghdr(msg); memcpy(&handle, &reply->handle, sizeof(handle)); cookie = dnbd3_cookie_from_handle(handle); tag = dnbd3_tag_from_handle(handle); hwq = blk_mq_unique_tag_to_hwq(tag); if (hwq < dev->tag_set.nr_hw_queues) { req = blk_mq_tag_to_rq(dev->tag_set.tags[hwq], blk_mq_unique_tag_to_tag(tag)); } if (!req || !blk_mq_request_started(req)) { error_sock(sock, "unexpected reply (%d) %p", tag, req); if (req) { cmd = blk_mq_rq_to_pdu(req); mutex_lock(&cmd->lock); debug_sock(sock, "requeue request"); dnbd3_requeue_cmd(cmd); mutex_unlock(&cmd->lock); } dnbd3_clear_socket(sock, reply, remaining); return -EIO; } cmd = blk_mq_rq_to_pdu(req); mutex_lock(&cmd->lock); if (cmd->cookie != cookie) { error_sock(sock, "double reply on req %p, cookie %u, handle cookie %u", req, cmd->cookie, cookie); mutex_unlock(&cmd->lock); dnbd3_clear_socket(sock, reply, remaining); return -EIO; } rq_for_each_segment(bvec_inst, req, iter) { siginitsetinv(&blocked, sigmask(SIGKILL)); sigprocmask(SIG_SETMASK, &blocked, &oldset); kaddr = kmap(bvec->bv_page) + bvec->bv_offset; iov.iov_base = kaddr; iov.iov_len = bvec->bv_len; result = kernel_recvmsg(sock->sock, &msg, &iov, 1, bvec->bv_len, msg.msg_flags); remaining -= result; if (result != bvec->bv_len) { kunmap(bvec->bv_page); sigprocmask(SIG_SETMASK, &oldset, NULL ); error_sock(sock, "could not receive from net to block layer"); dnbd3_requeue_cmd(cmd); mutex_unlock(&cmd->lock); if (result >= 0) { dnbd3_clear_socket(sock, reply, remaining); return -EIO; } else { return result; } } kunmap(bvec->bv_page); sigprocmask(SIG_SETMASK, &oldset, NULL ); } mutex_unlock(&cmd->lock); dnbd3_end_cmd(cmd, 0); return result; } /** * dnbd3_receive_cmd_get_block_test - receive a test block * @sock: the socket where the request is received * @reply: the reply initialized by dnbd3_receive_cmd * * this method should be called directly after the dnbd3_receive_cmd method * * the received data is just thrown away */ int dnbd3_receive_cmd_get_block_test(struct dnbd3_sock *sock, dnbd3_reply_t *reply) { struct msghdr msg; struct kvec iov; int result = 0; char *buf = kmalloc(reply->size, GFP_KERNEL); if (!buf) { error_sock(sock, "kmalloc failed"); goto error; } dnbd3_init_msghdr(msg); iov.iov_base = buf; iov.iov_len = reply->size; result = kernel_recvmsg(sock->sock, &msg, &iov, 1, reply->size, msg.msg_flags); if (result != RTT_BLOCK_SIZE) { error_sock(sock, "receive test block failed"); goto error; } error: if (buf) { kfree(buf); } return result; } /** * dnbd3_receive_cmd_get_servers - receive new servers * @sock: the socket where the request is received * @reply: the reply initialized by dnbd3_receive_cmd * * this method should be called directly after the dnbd3_receive_cmd method * * the new servers are copied to dnbd3_device.new_servers and * dnbd3_device.new_server_num is set accordingly */ int dnbd3_receive_cmd_get_servers(struct dnbd3_sock *sock, dnbd3_reply_t *reply) { struct msghdr msg; struct kvec iov; struct dnbd3_device *dev = sock->device; int result = 1; int count, remaining; dnbd3_init_msghdr(msg); debug_sock(sock, "get servers received"); mutex_lock(&dev->device_lock); if (!dev->use_server_provided_alts) { remaining = reply->size; goto consume_payload; } dev->new_servers_num = 0; count = MIN(NUMBER_SERVERS, reply->size / sizeof(dnbd3_server_entry_t)); if (count != 0) { iov.iov_base = dev->new_servers; iov.iov_len = count * sizeof(dnbd3_server_entry_t); result = kernel_recvmsg(sock->sock, &msg, &iov, 1, iov.iov_len, msg.msg_flags); if (result <= 0) { error_sock(sock, "failed to receive get servers %d", result); mutex_unlock(&dev->device_lock); return result; } else if (result != (count * sizeof(dnbd3_server_entry_t))) { error_sock(sock, "failed to get servers"); mutex_unlock(&dev->device_lock); return -EIO; } dev->new_servers_num = count; } /* * if there were more servers than accepted, remove the remaining data * from the socket buffer * abuse the reply struct as the receive buffer */ remaining = reply->size - (count * sizeof(dnbd3_server_entry_t)); consume_payload: while (remaining > 0) { count = MIN(sizeof(dnbd3_reply_t), remaining); iov.iov_base = reply; iov.iov_len = count; result = kernel_recvmsg(sock->sock, &msg, &iov, 1, count, msg.msg_flags); if (result <= 0) { error_sock(sock, "failed to receive payload from get servers"); mutex_unlock(&dev->device_lock); return result; } remaining -= result; } mutex_unlock(&dev->device_lock); return result; } /** * dnbd3_receive_cmd_latest_rid - receive latest rid * @sock: the socket where the request is received * @reply: the reply initialized by dnbd3_receive_cmd * * this method should be called directly after the dnbd3_receive_cmd method * * dnbd3_device.update_available is set if a new RID is received */ int dnbd3_receive_cmd_latest_rid(struct dnbd3_sock *sock, dnbd3_reply_t *reply) { struct kvec iov; uint16_t rid; int result; struct msghdr msg; struct dnbd3_device *dev = sock->device; dnbd3_init_msghdr(msg); debug_sock(sock, "latest rid received"); if (reply->size != 2) { error_sock(sock, "failed to get latest rid, wrong size"); return -EIO; } iov.iov_base = &rid; iov.iov_len = sizeof(rid); result = kernel_recvmsg(sock->sock, &msg, &iov, 1, iov.iov_len, msg.msg_flags); if (result <= 0) { error_sock(sock, "failed to receive latest rid"); return result; } rid = net_order_16(rid); debug_sock(sock, "latest rid of %s is %d (currently using %d)", dev->imgname, (int)rid, (int)dev->rid); dev->update_available = (rid > dev->rid ? true : false); return result; } /** * dnbd3_receive_cmd_latest_rid - select the image * @sock: the socket where the request is received * @reply: the reply initialized by dnbd3_receive_cmd * * this method should be called directly after the dnbd3_receive_cmd method * * if this is the first connection the image name, file size and rid will be set * if this is a further connection image name, file size and rid will be checked */ int dnbd3_receive_cmd_select_image(struct dnbd3_sock *sock, dnbd3_reply_t *reply) { struct kvec iov; uint16_t rid; char *name; int result; struct msghdr msg; serialized_buffer_t payload_buffer; uint64_t reported_size; struct dnbd3_device *dev = sock->device; dnbd3_init_msghdr(msg); debug_sock(sock, "select image received"); iov.iov_base = &payload_buffer; iov.iov_len = reply->size; result = kernel_recvmsg(sock->sock, &msg, &iov, 1, iov.iov_len, msg.msg_flags); if (result <= 0) { error_sock(sock, "failed to receive select image %d", result); return result; } else if (result != reply->size) { error_sock(sock, "could not read CMD_SELECT_IMAGE payload on handshake, size is %d and should be %d", result, reply->size); return -EIO; } /* handle/check reply payload */ serializer_reset_read(&payload_buffer, reply->size); sock->server->protocol_version = serializer_get_uint16(&payload_buffer); if (sock->server->protocol_version < MIN_SUPPORTED_SERVER) { error_sock(sock, "server version is lower than min supported version"); return -EIO; } //TODO compare RID name = serializer_get_string(&payload_buffer); rid = serializer_get_uint16(&payload_buffer); if (dev->rid != rid && strcmp(name, dev->imgname) != 0) { error_sock(sock, "server offers image '%s', requested '%s'", name, dev->imgname); return -EIO; } reported_size = serializer_get_uint64(&payload_buffer); if (!dev->reported_size) { if (reported_size < 4096) { error_sock(sock, "reported size by server is < 4096"); return -EIO; } dev->reported_size = reported_size; set_capacity(dev->disk, dev->reported_size >> 9); /* 512 Byte */ } else if (dev->reported_size != reported_size) { error_sock(sock, "reported size by server is %llu but should be %llu", reported_size, dev->reported_size); return -EIO; } return result; }