From e9c43f1f8a391997d5a8041fdd3bd86b43bdf767 Mon Sep 17 00:00:00 2001 From: Frederic Robra Date: Tue, 27 Aug 2019 17:00:40 +0200 Subject: optimized behaviour with failures --- src/kernel/net.c | 123 +++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 79 insertions(+), 44 deletions(-) (limited to 'src/kernel/net.c') diff --git a/src/kernel/net.c b/src/kernel/net.c index 79c860f..1207d0a 100644 --- a/src/kernel/net.c +++ b/src/kernel/net.c @@ -73,6 +73,22 @@ static int dnbd3_socket_connect(struct dnbd3_sock *sock, struct dnbd3_server * server); static int dnbd3_socket_disconnect(struct dnbd3_sock *sock); +static uint64_t dnbd3_average_rtt(struct dnbd3_server *server) +{ + int i, j = 0; + uint64_t avg = 0; + for (i = 0; i < 4; i++) { + avg += server->rtts[i]; + j += server->rtts[i] == 0 ? 0 : 1; + } + if (avg == 0) { + return RTT_UNKNOWN; + } else { + avg = avg / j; + avg += server->failures * avg / 10; + return avg; + } +} /* * Methods for request and receive commands @@ -267,6 +283,42 @@ static int dnbd3_receive_cmd(struct dnbd3_sock *sock, dnbd3_reply_t *reply) return result; } +static int dnbd3_clear_socket(struct dnbd3_sock *sock, dnbd3_reply_t *reply, + int remaining) +{ + int result = 0; + char *buf; + struct kvec iov; + struct msghdr msg; + dnbd3_init_msghdr(msg); + warn_sock(sock, "clearing socket %d bytes", remaining); + buf = kmalloc(RTT_BLOCK_SIZE, GFP_KERNEL); + if (!buf) { + error_sock(sock, "kmalloc failed"); + return -EIO; + } + /* hold the tx_lock so no new requests are send */ + mutex_lock(&sock->tx_lock); + iov.iov_base = buf; + iov.iov_len = RTT_BLOCK_SIZE; + while (remaining > 0) { + result = kernel_recvmsg(sock->sock, &msg, &iov, 1, iov.iov_len, + msg.msg_flags); + if (result <= 0) { + goto error; + } + remaining -= result; + } + + debug_sock(sock, "cleared socket"); +error: + mutex_unlock(&sock->tx_lock); + if (buf) { + kfree(buf); + } + return result; +} + /** * dnbd3_receive_cmd_get_block_mq - receive a block for mq * @sock: the socket where the request is received @@ -312,8 +364,8 @@ static int dnbd3_receive_cmd_get_block_mq(struct dnbd3_sock *sock, debug_sock(sock, "requeue request"); dnbd3_requeue_cmd(blk_mq_rq_to_pdu(req)); } -// return -EIO; - goto clear_socket; + dnbd3_clear_socket(sock, reply, remaining); + return -EIO; } cmd = blk_mq_rq_to_pdu(req); @@ -322,8 +374,8 @@ static int dnbd3_receive_cmd_get_block_mq(struct dnbd3_sock *sock, error_sock(sock, "double reply on req %p, cookie %u, handle cookie %u", req, cmd->cookie, cookie); mutex_unlock(&cmd->lock); -// return -EIO; - goto clear_socket; + dnbd3_clear_socket(sock, reply, remaining); + return -EIO; } rq_for_each_segment(bvec_inst, req, iter) { @@ -343,7 +395,8 @@ static int dnbd3_receive_cmd_get_block_mq(struct dnbd3_sock *sock, dnbd3_requeue_cmd(cmd); mutex_unlock(&cmd->lock); if (result >= 0) { - goto clear_socket; + dnbd3_clear_socket(sock, reply, remaining); + return -EIO; } else { return result; } @@ -355,35 +408,10 @@ static int dnbd3_receive_cmd_get_block_mq(struct dnbd3_sock *sock, mutex_unlock(&cmd->lock); dnbd3_end_cmd(cmd, 0); return result; -clear_socket: - warn_sock(sock, "caught an error while receiving block, clearing buffer"); - char *buf = kmalloc(RTT_BLOCK_SIZE, GFP_KERNEL); - if (!buf) { - error_sock(sock, "kmalloc failed"); - return -EIO; - } - - iov.iov_base = buf; - iov.iov_len = RTT_BLOCK_SIZE; - while (remaining > 0) { - result = kernel_recvmsg(sock->sock, &msg, &iov, 1, iov.iov_len, - msg.msg_flags); - if (result <= 0) { - goto error; - } - remaining -= result; - } - - debug_sock(sock, "cleared buffer %d bytes, reply size is %d", result, - reply->size); -error: - if (buf) { - kfree(buf); - } - return -EIO; } + /** * dnbd3_receive_cmd_get_block_test - receive a test block * @sock: the socket where the request is received @@ -620,26 +648,29 @@ static int dnbd3_receive_cmd_select_image(struct dnbd3_sock *sock, static void dnbd3_timer(struct timer_list *arg) { struct dnbd3_device *dev; + unsigned long busy; int i; dev = container_of(arg, struct dnbd3_device, timer); queue_work(dnbd3_wq, &dev->panic_worker); + busy = dnbd3_is_mq_busy(dev); - if (!dnbd3_is_mq_busy(dev)) { - if (dev->timer_count % TIMER_INTERVAL_KEEPALIVE_PACKET == 0) { - for (i = 0; i < NUMBER_CONNECTIONS; i++) { - if (dnbd3_is_sock_alive(dev->socks[i])) { - queue_work(dnbd3_wq, &dev->socks[i].keepalive_worker); - } + if (dev->timer_count % TIMER_INTERVAL_KEEPALIVE_PACKET == 0) { + for (i = 0; i < NUMBER_CONNECTIONS; i++) { + if (!test_bit(i, &busy) && dnbd3_is_sock_alive(dev->socks[i])) { + queue_work(dnbd3_wq, &dev->socks[i].keepalive_worker); } } + } + + if (!busy) { /* start after 4 seconds */ if (dev->timer_count % TIMER_INTERVAL_PROBE_NORMAL == 4) { queue_work(dnbd3_wq, &dev->discovery_worker); } - dev->timer_count++; } + dev->timer_count++; dev->timer.expires = jiffies + HZ; add_timer(&dev->timer); } @@ -739,8 +770,10 @@ static void dnbd3_keepalive_worker(struct work_struct *work) { struct dnbd3_sock *sock; sock = container_of(work, struct dnbd3_sock, keepalive_worker); - debug_sock(sock, "starting keepalive worker"); - dnbd3_send_request_cmd(sock, CMD_KEEPALIVE); + if (sock->server != NULL && !sock->panic) { + debug_sock(sock, "starting keepalive worker"); + dnbd3_send_request_cmd(sock, CMD_KEEPALIVE); + } } /** @@ -754,9 +787,9 @@ static int dnbd3_compare_servers(const void *lhs, const void *rhs) { struct dnbd3_server *rhs_server = *((struct dnbd3_server **) rhs); l = lhs_server->host.type != 0 ? lhs_server->avg_rtt - : RTT_UNREACHABLE + 1; + : RTT_UNREACHABLE; r = rhs_server->host.type != 0 ? rhs_server->avg_rtt - : RTT_UNREACHABLE + 1; + : RTT_UNREACHABLE; if (l < r) { return -1; } else if (l > r) { @@ -998,7 +1031,7 @@ static int dnbd3_meassure_rtt(struct dnbd3_device *dev, error: sock.server->rtts[dev->discovery_count % 4] = rtt; - sock.server->avg_rtt = dnbd3_avg_rtt(sock.server); + sock.server->avg_rtt = dnbd3_average_rtt(sock.server); if (result <= 0) { server->failures++; } @@ -1279,10 +1312,12 @@ static int dnbd3_socket_connect(struct dnbd3_sock *sock, INIT_WORK(&sock->keepalive_worker, dnbd3_keepalive_worker); - /* TODO not on every connect? request alternative servers receiver will handle this */ + /* request alternative servers receiver will handle this */ if (dnbd3_send_request_cmd(sock, CMD_GET_SERVERS) <= 0) { error_sock(sock, "failed to get servers in discovery"); } + /* failure count is divided on each successful connect */ + server->failures = server->failures / 2; return 0; error: -- cgit v1.2.3-55-g7522