diff options
-rw-r--r-- | .github/workflows/test-cow-fuse.yml | 2 | ||||
-rw-r--r-- | cmake/FindKernelHeaders.cmake | 4 | ||||
-rw-r--r-- | src/fuse/cowDoc/readme.md | 16 | ||||
-rw-r--r-- | src/fuse/cowfile.c | 31 | ||||
-rw-r--r-- | src/kernel/net.c | 70 |
5 files changed, 82 insertions, 41 deletions
diff --git a/.github/workflows/test-cow-fuse.yml b/.github/workflows/test-cow-fuse.yml index 9b42532..46b6933 100644 --- a/.github/workflows/test-cow-fuse.yml +++ b/.github/workflows/test-cow-fuse.yml @@ -146,6 +146,6 @@ jobs: /mnt/work/logs/randomLog1.out \ /mnt/work/logs/randomTestLog1.out \ /mnt/work/logs/randomLog2.out \ - /mnt/work/tmp/status.txt \ + /mnt/work/tmp/status \ ${{ github.workspace }}/build/src/server/dnbd3.log \ ${{ github.workspace }}/../cow_server/cow_merger_service/publish/log.out diff --git a/cmake/FindKernelHeaders.cmake b/cmake/FindKernelHeaders.cmake index c04243e..ce71aa8 100644 --- a/cmake/FindKernelHeaders.cmake +++ b/cmake/FindKernelHeaders.cmake @@ -31,10 +31,10 @@ find_path(KernelHeaders_INCLUDE_DIR # get Linux kernel headers version file(READ "${KERNEL_BUILD_DIR}/include/generated/utsrelease.h" tmpvar) -string(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]+" KernelHeaders_VERSION ${tmpvar}) +string(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]+" KernelHeaders_VERSION "${tmpvar}") if("${KernelHeaders_VERSION}" EQUAL "") file(READ "${KERNEL_BUILD_DIR}/include/config/kernel.release" tmpvar) - string(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]+" KernelHeaders_VERSION ${tmpvar}) + string(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]+" KernelHeaders_VERSION "${tmpvar}") endif() if("${KernelHeaders_VERSION}" EQUAL "") string(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]+" KernelHeaders_VERSION ${KernelHeaders_INCLUDE_DIR}) diff --git a/src/fuse/cowDoc/readme.md b/src/fuse/cowDoc/readme.md index 2b752bb..51a0052 100644 --- a/src/fuse/cowDoc/readme.md +++ b/src/fuse/cowDoc/readme.md @@ -24,12 +24,12 @@ A typical use case is updating or adding software to an existing image. - `- L <path>` Similar to `-c <path>`, but instead of creating a new session, an existing one is loaded from the specified path. - `-m` the client requests a merge after the image has been unmounted and all changes have been uploaded. -- `--cowStatFile` creates a status file at the same location as the data and meta file. The file contains information about the current session, for more information see [here](#status). -- `--cowStatStdout` similar to `--cowStatFile` but the information will be printed in the stdout. +- `--cow-stats-file` creates a status file at the same location as the data and meta file. The file contains information about the current session, for more information see [here](#status). +- `--cow-stats-stdout` similar to `--cow-stats-file` but the information will be printed in the stdout. Example parameters for creating a new cow session: ``` -./dnbd3-fuse "/home/user/VMs/mount" -f -h localhost -i imagename -c "/home/user/temp" -C "192.168.178.20:5000" --cowStatStdout -m +./dnbd3-fuse "/home/user/VMs/mount" -f -h localhost -i imagename -c "/home/user/temp" -C "192.168.178.20:5000" --cow-stats-stdout -m ``` @@ -117,11 +117,11 @@ There are two different limits for the number of parallel uploads in the [config ## Files -When a new CoW session is started, a new `meta`, `data` and, if so set in the command line arguments, a `status.txt` file is created. +When a new CoW session is started, a new `meta`, `data` and, if so set in the command line arguments, a `status` file is created. ### status -The file `status.txt` can be activated with the command line parameter `--cowStatFile`. +The file `status` can be activated with the command line parameter `--cow-stats-file`. The file will contain the following: @@ -147,9 +147,9 @@ It is `done` when the image has been unmounted and all clusters have been upload - `ulspeed` the current upload speed in kb/s. Once all clusters have been uploaded, the status is set to `done`. -If you define `COW_DUMP_BLOCK_UPLOADS`, a list of all clusters, sorted by the number of uploads, is copied to the status.txt file after the cluster upload is completed. +If you define `COW_DUMP_BLOCK_UPLOADS`, a list of all clusters, sorted by the number of uploads, is copied to the status file after the cluster upload is completed. -With the command line parameter `--cowStatStdout` the same output of the stats file will be printed in stdout. +With the command line parameter `--cow-stats-stdout` the same output of the stats file will be printed in stdout. ### meta @@ -231,7 +231,7 @@ The following configuration variables have been added to `config/cow.h`. - `COW_MIN_UPLOAD_DELAY` sets the minimum time in seconds that must have elapsed since the last change to a cow cluster before it is uploaded. This value can be fine-tuned. A larger value usually reduces redundant uploading of clusters. A smaller value reduces the time for the final upload after the image has been unmounted. -If you set `COW_DUMP_BLOCK_UPLOADS` and set the command line parameter `--cowStatFile`, then a list of all clusters, sorted by the number of uploads, will be written to the status.txt file after the cluster upload is complete. +If you set `COW_DUMP_BLOCK_UPLOADS` and set the command line parameter `--cow-stats-file`, then a list of all clusters, sorted by the number of uploads, will be written to the status file after the cluster upload is complete. This can help in fine-tuning `COW_MIN_UPLOAD_DELAY`. - `COW_STATS_UPDATE_TIME` defines the update frequency of the stdout output/statistics file in seconds. Setting it too low could affect performance as a loop runs over all clusters. - `COW_MAX_PARALLEL_BACKGROUND_UPLOADS` defines the maximum number of parallel cluster uploads. This number is used when the image is still mounted and the user is still using it. diff --git a/src/fuse/cowfile.c b/src/fuse/cowfile.c index b183202..a5d6e4d 100644 --- a/src/fuse/cowfile.c +++ b/src/fuse/cowfile.c @@ -261,6 +261,10 @@ static size_t curlReadCallbackUploadBlock( char *ptr, size_t size, size_t nmemb, { cow_curl_read_upload_t *uploadBlock = (cow_curl_read_upload_t *)userdata; size_t len = 0; + + if ( size * nmemb < DNBD3_BLOCK_SIZE ) { + logadd( LOG_INFO, "Wow, curl read callback with %d bytes left", (int)( size * nmemb ) ); + } // Check if we're still in the bitfield if ( uploadBlock->position < COW_BITFIELD_SIZE ) { size_t lenCpy = MIN( COW_BITFIELD_SIZE - uploadBlock->position, size * nmemb ); @@ -276,8 +280,9 @@ static size_t curlReadCallbackUploadBlock( char *ptr, size_t size, size_t nmemb, ssize_t spaceLeft = ( size * nmemb ) - len; // Only read blocks that have been written to the cluster. Saves bandwidth. Not optimal since // we do a lot of 4k/32k reads, but it's not that performance critical I guess... - while ( spaceLeft >= (ssize_t)DNBD3_BLOCK_SIZE && inClusterOffset < (off_t)COW_DATA_CLUSTER_SIZE ) { + while ( spaceLeft > 0 && inClusterOffset < (off_t)COW_DATA_CLUSTER_SIZE ) { int bitNumber = (int)( inClusterOffset / DNBD3_BLOCK_SIZE ); + uint32_t blockOffset = (uint32_t)( inClusterOffset % DNBD3_BLOCK_SIZE ); size_t readSize; // Small performance hack: All bits one in a byte, do a 32k instead of 4k read // TODO: preadv with a large iov, reading unchanged blocks into a trash-buffer @@ -288,8 +293,13 @@ static size_t curlReadCallbackUploadBlock( char *ptr, size_t size, size_t nmemb, } else { readSize = DNBD3_BLOCK_SIZE; } + readSize -= blockOffset; + if ( (ssize_t)readSize > spaceLeft ) { + readSize = spaceLeft; + } // If handling single block, check bits in our copy, as global bitfield could change - if ( readSize != DNBD3_BLOCK_SIZE || checkBit( uploadBlock->bitfield, bitNumber ) ) { + // If uploading 8 blocks at once, check already happened above + if ( readSize > DNBD3_BLOCK_SIZE || checkBit( uploadBlock->bitfield, bitNumber ) ) { ssize_t lengthRead = pread( cow.fdData, ( ptr + len ), readSize, uploadBlock->cluster->offset + inClusterOffset ); if ( lengthRead == -1 ) { @@ -424,9 +434,10 @@ static void dumpBlockUploads() } qsort( blockUploads, currentBlock, sizeof( cow_cluster_statistics_t ), cmpfunc ); - dprintf( cow.fdStats, "\n\nclusterNumber: uploads\n==Block Upload Dump===\n" ); + dprintf( cow.fdStats, "\n\n[BlockStats]\n" ); for ( uint64_t i = 0; i < currentBlock; i++ ) { - dprintf( cow.fdStats, "%" PRIu64 ": %" PRIu64 " \n", blockUploads[i].clusterNumber, blockUploads[i].uploads ); + dprintf( cow.fdStats, "%" PRIu64 "=%" PRIu64 " \n", + blockUploads[i].clusterNumber, blockUploads[i].uploads ); } } #endif @@ -454,6 +465,7 @@ static void updateCowStatsFile( uint64_t inQueue, uint64_t modified, uint64_t id int len = snprintf( buffer, sizeof buffer, "[General]\n" + "uuid=%s\n" "state=%s\n" "inQueue=%" PRIu64 "\n" "modifiedClusters=%" PRIu64 "\n" @@ -461,6 +473,7 @@ static void updateCowStatsFile( uint64_t inQueue, uint64_t modified, uint64_t id "totalClustersUploaded=%" PRIu64 "\n" "activeUploads=%i\n" "%s%s\n", + metadata->uuid, state, inQueue, modified, idle, totalBlocksUploaded, activeUploads, COW_SHOW_UL_SPEED ? "avgSpeedKb=" : "", speedBuffer ); @@ -478,12 +491,12 @@ static void updateCowStatsFile( uint64_t inQueue, uint64_t modified, uint64_t id // Pad with a bunch of newlines so we don't change the file size all the time ssize_t extra = MIN( 20, (ssize_t)sizeof(buffer) - len - 1 ); memset( buffer + len, '\n', extra ); - lseek( cow.fdStats, 43, SEEK_SET ); - if ( write( cow.fdStats, buffer, len + extra ) != len + extra ) { + if ( pwrite( cow.fdStats, buffer, len + extra, 0 ) != len + extra ) { logadd( LOG_WARNING, "Could not update cow status file" ); } #ifdef COW_DUMP_BLOCK_UPLOADS if ( !uploadLoop && uploadLoopDone ) { + lseek( cow.fdStats, len + extra, SEEK_SET ); dumpBlockUploads(); } #endif @@ -549,7 +562,7 @@ static size_t curlHeaderCallbackUploadBlock( char *buffer, size_t size, size_t n if ( len < 13 ) return len; for ( int i = 0; i < 11; ++i ) { - buffer[i] |= 0x60; + buffer[i] |= 0x20; } if ( strncmp( buffer, "retry-after:", 12 ) != 0 ) return len; @@ -883,10 +896,10 @@ static bool createCowStatsFile( char *path ) { char pathStatus[strlen( path ) + 12]; - snprintf( pathStatus, strlen( path ) + 12, "%s%s", path, "/status.txt" ); + snprintf( pathStatus, strlen( path ) + 12, "%s%s", path, "/status" ); char buffer[100]; - int len = snprintf( buffer, 100, "uuid=%s\nstate: active\n", metadata->uuid ); + int len = snprintf( buffer, 100, "[General]\nuuid=%s\nstate=active\n", metadata->uuid ); if ( statStdout ) { logadd( LOG_INFO, "%s", buffer ); } diff --git a/src/kernel/net.c b/src/kernel/net.c index 5ef4016..fcea31b 100644 --- a/src/kernel/net.c +++ b/src/kernel/net.c @@ -86,7 +86,7 @@ static bool dnbd3_execute_handshake(dnbd3_device_t *dev, struct socket *sock, struct sockaddr_storage *addr, uint16_t *remote_version, bool copy_image_info); static bool dnbd3_request_test_block(dnbd3_device_t *dev, struct sockaddr_storage *addr, - struct socket *sock); + struct socket *sock, u64 test_start, u32 test_size); static bool dnbd3_send_empty_request(dnbd3_device_t *dev, u16 cmd); @@ -143,6 +143,7 @@ static void dnbd3_start_discover(dnbd3_device_t *dev, bool panic) // Panic freshly turned on dev->panic = true; dev->discover_interval = TIMER_INTERVAL_PROBE_PANIC; + dev->discover_count = 0; } spin_unlock_irqrestore(&dev->blk_lock, irqflags); dnbd3_flag_reset(dev->connection_lock); @@ -192,10 +193,14 @@ static void dnbd3_internal_discover(dnbd3_device_t *dev) { struct socket *sock, *best_sock = NULL; dnbd3_alt_server_t *alt; + struct request *blk_request; struct sockaddr_storage host_compare, best_server; uint16_t remote_version; ktime_t start, end; unsigned long rtt = 0, best_rtt = 0; + u64 test_start = 0; + u32 test_size = RTT_BLOCK_SIZE; + unsigned long irqflags; int i, j, k, isize, fails, rtt_threshold; int do_change = 0; u8 check_order[NUMBER_SERVERS]; @@ -219,6 +224,10 @@ static void dnbd3_internal_discover(dnbd3_device_t *dev) best_server.ss_family = 0; best_rtt = RTT_UNREACHABLE; + if (dev->panic) { + dnbd3_dev_dbg_host(dev, &host_compare, "Discover in panic mode\n"); + } + if (!ready || dev->panic) isize = NUMBER_SERVERS; else @@ -249,6 +258,24 @@ static void dnbd3_internal_discover(dnbd3_device_t *dev) if (!dnbd3_execute_handshake(dev, sock, &host_compare, &remote_version, false)) goto error; + if (dev->panic) { + // In panic mode, use next pending request for testing, this has a higher chance of + // filtering out a server which can't actually handle our requests, instead of just + // requesting the very first block which should be cached by every server. + spin_lock_irqsave(&dev->send_queue_lock, irqflags); + if (!list_empty(&dev->send_queue)) { + blk_request = list_entry(dev->send_queue.next, struct request, queuelist); + test_start = blk_rq_pos(blk_request) << 9; /* sectors to bytes */ + test_size = blk_rq_bytes(blk_request); + } + spin_unlock_irqrestore(&dev->send_queue_lock, irqflags); + } + + // actual rtt measurement is just the first block request and reply + start = ktime_get_real(); + if (!dnbd3_request_test_block(dev, &host_compare, sock, test_start, test_size)) + goto error; + end = ktime_get_real(); // panic mode, take first responding server if (dev->panic) { @@ -259,7 +286,10 @@ static void dnbd3_internal_discover(dnbd3_device_t *dev) // Check global flag, a connect might have been in progress if (best_sock != NULL) sock_release(best_sock); - set_socket_timeout(sock, false, SOCKET_TIMEOUT_RECV * 1000 + 1000); + set_socket_timeout(sock, false, MAX( + SOCKET_TIMEOUT_RECV * 1000, + (int)ktime_ms_delta(end, start) + ) + 1000); if (dnbd3_set_primary_connection(dev, sock, &host_compare, remote_version) != 0) sock_release(sock); dnbd3_flag_reset(dev->connection_lock); @@ -267,12 +297,6 @@ static void dnbd3_internal_discover(dnbd3_device_t *dev) } } - // actual rtt measurement is just the first block requests and reply - start = ktime_get_real(); - if (!dnbd3_request_test_block(dev, &host_compare, sock)) - goto error; - end = ktime_get_real(); - mutex_lock(&dev->alt_servers_lock); if (is_same_server(&dev->alt_servers[i].host, &host_compare)) { dev->alt_servers[i].protocol_version = remote_version; @@ -446,7 +470,9 @@ static void dnbd3_recv_workfn(struct work_struct *work) int remaining; int ret; + dnbd3_dev_dbg_cur(dev, "starting receive worker...\n"); mutex_lock(&dev->recv_mutex); + dnbd3_dev_dbg_cur(dev, "receive worker started\n"); while (dev->sock) { // receive net reply ret = dnbd3_recv_reply(dev->sock, &reply_hdr); @@ -594,6 +620,7 @@ static void dnbd3_recv_workfn(struct work_struct *work) out_unlock: // This will check if we actually still need a new connection dnbd3_start_discover(dev, true); + dnbd3_dev_dbg_cur(dev, "Receive worker exited\n"); mutex_unlock(&dev->recv_mutex); } @@ -623,7 +650,7 @@ static void set_socket_timeout(struct socket *sock, bool set_send, int timeout_m static int dnbd3_connect(dnbd3_device_t *dev, struct sockaddr_storage *addr, struct socket **sock_out) { ktime_t start; - int ret, connect_time_ms; + int ret, connect_time_ms, diff; struct socket *sock; int retries = 4; const int addrlen = addr->ss_family == AF_INET ? sizeof(struct sockaddr_in) @@ -659,7 +686,7 @@ static int dnbd3_connect(dnbd3_device_t *dev, struct sockaddr_storage *addr, str if (dev->panic && dev->panic_count > 1) { /* in panic mode for some time, start increasing timeouts */ - connect_time_ms = dev->panic_count * 1000; + connect_time_ms = dev->panic_count * 333; } else { /* otherwise, use 2*RTT of current server */ connect_time_ms = dev->cur_server.rtt * 2 / 1000; @@ -667,21 +694,21 @@ static int dnbd3_connect(dnbd3_device_t *dev, struct sockaddr_storage *addr, str /* but obey a minimal configurable value, and maximum sanity check */ if (connect_time_ms < SOCKET_TIMEOUT_SEND * 1000) connect_time_ms = SOCKET_TIMEOUT_SEND * 1000; - else if (connect_time_ms > 60000) - connect_time_ms = 60000; + else if (connect_time_ms > 15000) + connect_time_ms = 15000; set_socket_timeout(sock, false, connect_time_ms); // recv set_socket_timeout(sock, true, connect_time_ms); // send start = ktime_get_real(); while (--retries > 0) { ret = kernel_connect(sock, (struct sockaddr *)addr, addrlen, 0); - connect_time_ms = (int)ktime_ms_delta(ktime_get_real(), start); - if (connect_time_ms > 2 * SOCKET_TIMEOUT_SEND * 1000) { + diff = (int)ktime_ms_delta(ktime_get_real(), start); + if (diff > 2 * connect_time_ms) { /* Either I'm losing my mind or there was a specific build of kernel * 5.x where SO_RCVTIMEO didn't affect the connect call above, so * this function would hang for over a minute for unreachable hosts. - * Leave in this debug check for twice the configured timeout + * Leave in this debug check for twice the configured timeout. */ - dnbd3_dev_dbg_host(dev, addr, "connect: call took %dms\n", + dnbd3_dev_err_host(dev, addr, "connect: call took %dms\n", connect_time_ms); } if (ret != 0) { @@ -916,23 +943,24 @@ static bool dnbd3_drain_socket(dnbd3_device_t *dev, struct socket *sock, int byt return true; } -static bool dnbd3_request_test_block(dnbd3_device_t *dev, struct sockaddr_storage *addr, struct socket *sock) +static bool dnbd3_request_test_block(dnbd3_device_t *dev, struct sockaddr_storage *addr, + struct socket *sock, u64 test_start, u32 test_size) { dnbd3_reply_t reply_hdr; // Request block - if (!dnbd3_send_request(sock, CMD_GET_BLOCK, 0, 0, RTT_BLOCK_SIZE)) { + if (!dnbd3_send_request(sock, CMD_GET_BLOCK, 0, test_start, test_size)) { dnbd3_err_dbg_host(dev, addr, "requesting test block failed\n"); return false; } - // receive net reply + // receive reply header if (dnbd3_recv_reply(sock, &reply_hdr) != sizeof(reply_hdr)) { dnbd3_err_dbg_host(dev, addr, "receiving test block header packet failed\n"); return false; } if (reply_hdr.magic != dnbd3_packet_magic || reply_hdr.cmd != CMD_GET_BLOCK - || reply_hdr.size != RTT_BLOCK_SIZE || reply_hdr.handle != 0) { + || reply_hdr.size != test_size || reply_hdr.handle != 0) { dnbd3_err_dbg_host(dev, addr, "unexpected reply to block request: cmd=%d, size=%d, handle=%llu (discover)\n", (int)reply_hdr.cmd, (int)reply_hdr.size, reply_hdr.handle); @@ -940,7 +968,7 @@ static bool dnbd3_request_test_block(dnbd3_device_t *dev, struct sockaddr_storag } // receive data - return dnbd3_drain_socket(dev, sock, RTT_BLOCK_SIZE); + return dnbd3_drain_socket(dev, sock, test_size); } #undef dnbd3_err_dbg_host |