summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.github/workflows/test-cow-fuse.yml2
-rw-r--r--cmake/FindKernelHeaders.cmake4
-rw-r--r--src/fuse/cowDoc/readme.md16
-rw-r--r--src/fuse/cowfile.c31
-rw-r--r--src/kernel/net.c70
5 files changed, 82 insertions, 41 deletions
diff --git a/.github/workflows/test-cow-fuse.yml b/.github/workflows/test-cow-fuse.yml
index 9b42532..46b6933 100644
--- a/.github/workflows/test-cow-fuse.yml
+++ b/.github/workflows/test-cow-fuse.yml
@@ -146,6 +146,6 @@ jobs:
/mnt/work/logs/randomLog1.out \
/mnt/work/logs/randomTestLog1.out \
/mnt/work/logs/randomLog2.out \
- /mnt/work/tmp/status.txt \
+ /mnt/work/tmp/status \
${{ github.workspace }}/build/src/server/dnbd3.log \
${{ github.workspace }}/../cow_server/cow_merger_service/publish/log.out
diff --git a/cmake/FindKernelHeaders.cmake b/cmake/FindKernelHeaders.cmake
index c04243e..ce71aa8 100644
--- a/cmake/FindKernelHeaders.cmake
+++ b/cmake/FindKernelHeaders.cmake
@@ -31,10 +31,10 @@ find_path(KernelHeaders_INCLUDE_DIR
# get Linux kernel headers version
file(READ "${KERNEL_BUILD_DIR}/include/generated/utsrelease.h" tmpvar)
-string(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]+" KernelHeaders_VERSION ${tmpvar})
+string(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]+" KernelHeaders_VERSION "${tmpvar}")
if("${KernelHeaders_VERSION}" EQUAL "")
file(READ "${KERNEL_BUILD_DIR}/include/config/kernel.release" tmpvar)
- string(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]+" KernelHeaders_VERSION ${tmpvar})
+ string(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]+" KernelHeaders_VERSION "${tmpvar}")
endif()
if("${KernelHeaders_VERSION}" EQUAL "")
string(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]+" KernelHeaders_VERSION ${KernelHeaders_INCLUDE_DIR})
diff --git a/src/fuse/cowDoc/readme.md b/src/fuse/cowDoc/readme.md
index 2b752bb..51a0052 100644
--- a/src/fuse/cowDoc/readme.md
+++ b/src/fuse/cowDoc/readme.md
@@ -24,12 +24,12 @@ A typical use case is updating or adding software to an existing image.
- `- L <path>` Similar to `-c <path>`, but instead of creating a new session, an existing one is loaded from the specified path.
- `-m` the client requests a merge after the image has been unmounted and all changes have been uploaded.
-- `--cowStatFile` creates a status file at the same location as the data and meta file. The file contains information about the current session, for more information see [here](#status).
-- `--cowStatStdout` similar to `--cowStatFile` but the information will be printed in the stdout.
+- `--cow-stats-file` creates a status file at the same location as the data and meta file. The file contains information about the current session, for more information see [here](#status).
+- `--cow-stats-stdout` similar to `--cow-stats-file` but the information will be printed in the stdout.
Example parameters for creating a new cow session:
```
-./dnbd3-fuse "/home/user/VMs/mount" -f -h localhost -i imagename -c "/home/user/temp" -C "192.168.178.20:5000" --cowStatStdout -m
+./dnbd3-fuse "/home/user/VMs/mount" -f -h localhost -i imagename -c "/home/user/temp" -C "192.168.178.20:5000" --cow-stats-stdout -m
```
@@ -117,11 +117,11 @@ There are two different limits for the number of parallel uploads in the [config
## Files
-When a new CoW session is started, a new `meta`, `data` and, if so set in the command line arguments, a `status.txt` file is created.
+When a new CoW session is started, a new `meta`, `data` and, if so set in the command line arguments, a `status` file is created.
### status
-The file `status.txt` can be activated with the command line parameter `--cowStatFile`.
+The file `status` can be activated with the command line parameter `--cow-stats-file`.
The file will contain the following:
@@ -147,9 +147,9 @@ It is `done` when the image has been unmounted and all clusters have been upload
- `ulspeed` the current upload speed in kb/s.
Once all clusters have been uploaded, the status is set to `done`.
-If you define `COW_DUMP_BLOCK_UPLOADS`, a list of all clusters, sorted by the number of uploads, is copied to the status.txt file after the cluster upload is completed.
+If you define `COW_DUMP_BLOCK_UPLOADS`, a list of all clusters, sorted by the number of uploads, is copied to the status file after the cluster upload is completed.
-With the command line parameter `--cowStatStdout` the same output of the stats file will be printed in stdout.
+With the command line parameter `--cow-stats-stdout` the same output of the stats file will be printed in stdout.
### meta
@@ -231,7 +231,7 @@ The following configuration variables have been added to `config/cow.h`.
- `COW_MIN_UPLOAD_DELAY` sets the minimum time in seconds that must have elapsed since the last change to a cow cluster before it is uploaded.
This value can be fine-tuned. A larger value usually reduces redundant uploading of clusters.
A smaller value reduces the time for the final upload after the image has been unmounted.
-If you set `COW_DUMP_BLOCK_UPLOADS` and set the command line parameter `--cowStatFile`, then a list of all clusters, sorted by the number of uploads, will be written to the status.txt file after the cluster upload is complete.
+If you set `COW_DUMP_BLOCK_UPLOADS` and set the command line parameter `--cow-stats-file`, then a list of all clusters, sorted by the number of uploads, will be written to the status file after the cluster upload is complete.
This can help in fine-tuning `COW_MIN_UPLOAD_DELAY`.
- `COW_STATS_UPDATE_TIME` defines the update frequency of the stdout output/statistics file in seconds. Setting it too low could affect performance as a loop runs over all clusters.
- `COW_MAX_PARALLEL_BACKGROUND_UPLOADS` defines the maximum number of parallel cluster uploads. This number is used when the image is still mounted and the user is still using it.
diff --git a/src/fuse/cowfile.c b/src/fuse/cowfile.c
index b183202..a5d6e4d 100644
--- a/src/fuse/cowfile.c
+++ b/src/fuse/cowfile.c
@@ -261,6 +261,10 @@ static size_t curlReadCallbackUploadBlock( char *ptr, size_t size, size_t nmemb,
{
cow_curl_read_upload_t *uploadBlock = (cow_curl_read_upload_t *)userdata;
size_t len = 0;
+
+ if ( size * nmemb < DNBD3_BLOCK_SIZE ) {
+ logadd( LOG_INFO, "Wow, curl read callback with %d bytes left", (int)( size * nmemb ) );
+ }
// Check if we're still in the bitfield
if ( uploadBlock->position < COW_BITFIELD_SIZE ) {
size_t lenCpy = MIN( COW_BITFIELD_SIZE - uploadBlock->position, size * nmemb );
@@ -276,8 +280,9 @@ static size_t curlReadCallbackUploadBlock( char *ptr, size_t size, size_t nmemb,
ssize_t spaceLeft = ( size * nmemb ) - len;
// Only read blocks that have been written to the cluster. Saves bandwidth. Not optimal since
// we do a lot of 4k/32k reads, but it's not that performance critical I guess...
- while ( spaceLeft >= (ssize_t)DNBD3_BLOCK_SIZE && inClusterOffset < (off_t)COW_DATA_CLUSTER_SIZE ) {
+ while ( spaceLeft > 0 && inClusterOffset < (off_t)COW_DATA_CLUSTER_SIZE ) {
int bitNumber = (int)( inClusterOffset / DNBD3_BLOCK_SIZE );
+ uint32_t blockOffset = (uint32_t)( inClusterOffset % DNBD3_BLOCK_SIZE );
size_t readSize;
// Small performance hack: All bits one in a byte, do a 32k instead of 4k read
// TODO: preadv with a large iov, reading unchanged blocks into a trash-buffer
@@ -288,8 +293,13 @@ static size_t curlReadCallbackUploadBlock( char *ptr, size_t size, size_t nmemb,
} else {
readSize = DNBD3_BLOCK_SIZE;
}
+ readSize -= blockOffset;
+ if ( (ssize_t)readSize > spaceLeft ) {
+ readSize = spaceLeft;
+ }
// If handling single block, check bits in our copy, as global bitfield could change
- if ( readSize != DNBD3_BLOCK_SIZE || checkBit( uploadBlock->bitfield, bitNumber ) ) {
+ // If uploading 8 blocks at once, check already happened above
+ if ( readSize > DNBD3_BLOCK_SIZE || checkBit( uploadBlock->bitfield, bitNumber ) ) {
ssize_t lengthRead = pread( cow.fdData, ( ptr + len ), readSize,
uploadBlock->cluster->offset + inClusterOffset );
if ( lengthRead == -1 ) {
@@ -424,9 +434,10 @@ static void dumpBlockUploads()
}
qsort( blockUploads, currentBlock, sizeof( cow_cluster_statistics_t ), cmpfunc );
- dprintf( cow.fdStats, "\n\nclusterNumber: uploads\n==Block Upload Dump===\n" );
+ dprintf( cow.fdStats, "\n\n[BlockStats]\n" );
for ( uint64_t i = 0; i < currentBlock; i++ ) {
- dprintf( cow.fdStats, "%" PRIu64 ": %" PRIu64 " \n", blockUploads[i].clusterNumber, blockUploads[i].uploads );
+ dprintf( cow.fdStats, "%" PRIu64 "=%" PRIu64 " \n",
+ blockUploads[i].clusterNumber, blockUploads[i].uploads );
}
}
#endif
@@ -454,6 +465,7 @@ static void updateCowStatsFile( uint64_t inQueue, uint64_t modified, uint64_t id
int len = snprintf( buffer, sizeof buffer,
"[General]\n"
+ "uuid=%s\n"
"state=%s\n"
"inQueue=%" PRIu64 "\n"
"modifiedClusters=%" PRIu64 "\n"
@@ -461,6 +473,7 @@ static void updateCowStatsFile( uint64_t inQueue, uint64_t modified, uint64_t id
"totalClustersUploaded=%" PRIu64 "\n"
"activeUploads=%i\n"
"%s%s\n",
+ metadata->uuid,
state, inQueue, modified, idle, totalBlocksUploaded, activeUploads,
COW_SHOW_UL_SPEED ? "avgSpeedKb=" : "",
speedBuffer );
@@ -478,12 +491,12 @@ static void updateCowStatsFile( uint64_t inQueue, uint64_t modified, uint64_t id
// Pad with a bunch of newlines so we don't change the file size all the time
ssize_t extra = MIN( 20, (ssize_t)sizeof(buffer) - len - 1 );
memset( buffer + len, '\n', extra );
- lseek( cow.fdStats, 43, SEEK_SET );
- if ( write( cow.fdStats, buffer, len + extra ) != len + extra ) {
+ if ( pwrite( cow.fdStats, buffer, len + extra, 0 ) != len + extra ) {
logadd( LOG_WARNING, "Could not update cow status file" );
}
#ifdef COW_DUMP_BLOCK_UPLOADS
if ( !uploadLoop && uploadLoopDone ) {
+ lseek( cow.fdStats, len + extra, SEEK_SET );
dumpBlockUploads();
}
#endif
@@ -549,7 +562,7 @@ static size_t curlHeaderCallbackUploadBlock( char *buffer, size_t size, size_t n
if ( len < 13 )
return len;
for ( int i = 0; i < 11; ++i ) {
- buffer[i] |= 0x60;
+ buffer[i] |= 0x20;
}
if ( strncmp( buffer, "retry-after:", 12 ) != 0 )
return len;
@@ -883,10 +896,10 @@ static bool createCowStatsFile( char *path )
{
char pathStatus[strlen( path ) + 12];
- snprintf( pathStatus, strlen( path ) + 12, "%s%s", path, "/status.txt" );
+ snprintf( pathStatus, strlen( path ) + 12, "%s%s", path, "/status" );
char buffer[100];
- int len = snprintf( buffer, 100, "uuid=%s\nstate: active\n", metadata->uuid );
+ int len = snprintf( buffer, 100, "[General]\nuuid=%s\nstate=active\n", metadata->uuid );
if ( statStdout ) {
logadd( LOG_INFO, "%s", buffer );
}
diff --git a/src/kernel/net.c b/src/kernel/net.c
index 5ef4016..fcea31b 100644
--- a/src/kernel/net.c
+++ b/src/kernel/net.c
@@ -86,7 +86,7 @@ static bool dnbd3_execute_handshake(dnbd3_device_t *dev, struct socket *sock,
struct sockaddr_storage *addr, uint16_t *remote_version, bool copy_image_info);
static bool dnbd3_request_test_block(dnbd3_device_t *dev, struct sockaddr_storage *addr,
- struct socket *sock);
+ struct socket *sock, u64 test_start, u32 test_size);
static bool dnbd3_send_empty_request(dnbd3_device_t *dev, u16 cmd);
@@ -143,6 +143,7 @@ static void dnbd3_start_discover(dnbd3_device_t *dev, bool panic)
// Panic freshly turned on
dev->panic = true;
dev->discover_interval = TIMER_INTERVAL_PROBE_PANIC;
+ dev->discover_count = 0;
}
spin_unlock_irqrestore(&dev->blk_lock, irqflags);
dnbd3_flag_reset(dev->connection_lock);
@@ -192,10 +193,14 @@ static void dnbd3_internal_discover(dnbd3_device_t *dev)
{
struct socket *sock, *best_sock = NULL;
dnbd3_alt_server_t *alt;
+ struct request *blk_request;
struct sockaddr_storage host_compare, best_server;
uint16_t remote_version;
ktime_t start, end;
unsigned long rtt = 0, best_rtt = 0;
+ u64 test_start = 0;
+ u32 test_size = RTT_BLOCK_SIZE;
+ unsigned long irqflags;
int i, j, k, isize, fails, rtt_threshold;
int do_change = 0;
u8 check_order[NUMBER_SERVERS];
@@ -219,6 +224,10 @@ static void dnbd3_internal_discover(dnbd3_device_t *dev)
best_server.ss_family = 0;
best_rtt = RTT_UNREACHABLE;
+ if (dev->panic) {
+ dnbd3_dev_dbg_host(dev, &host_compare, "Discover in panic mode\n");
+ }
+
if (!ready || dev->panic)
isize = NUMBER_SERVERS;
else
@@ -249,6 +258,24 @@ static void dnbd3_internal_discover(dnbd3_device_t *dev)
if (!dnbd3_execute_handshake(dev, sock, &host_compare, &remote_version, false))
goto error;
+ if (dev->panic) {
+ // In panic mode, use next pending request for testing, this has a higher chance of
+ // filtering out a server which can't actually handle our requests, instead of just
+ // requesting the very first block which should be cached by every server.
+ spin_lock_irqsave(&dev->send_queue_lock, irqflags);
+ if (!list_empty(&dev->send_queue)) {
+ blk_request = list_entry(dev->send_queue.next, struct request, queuelist);
+ test_start = blk_rq_pos(blk_request) << 9; /* sectors to bytes */
+ test_size = blk_rq_bytes(blk_request);
+ }
+ spin_unlock_irqrestore(&dev->send_queue_lock, irqflags);
+ }
+
+ // actual rtt measurement is just the first block request and reply
+ start = ktime_get_real();
+ if (!dnbd3_request_test_block(dev, &host_compare, sock, test_start, test_size))
+ goto error;
+ end = ktime_get_real();
// panic mode, take first responding server
if (dev->panic) {
@@ -259,7 +286,10 @@ static void dnbd3_internal_discover(dnbd3_device_t *dev)
// Check global flag, a connect might have been in progress
if (best_sock != NULL)
sock_release(best_sock);
- set_socket_timeout(sock, false, SOCKET_TIMEOUT_RECV * 1000 + 1000);
+ set_socket_timeout(sock, false, MAX(
+ SOCKET_TIMEOUT_RECV * 1000,
+ (int)ktime_ms_delta(end, start)
+ ) + 1000);
if (dnbd3_set_primary_connection(dev, sock, &host_compare, remote_version) != 0)
sock_release(sock);
dnbd3_flag_reset(dev->connection_lock);
@@ -267,12 +297,6 @@ static void dnbd3_internal_discover(dnbd3_device_t *dev)
}
}
- // actual rtt measurement is just the first block requests and reply
- start = ktime_get_real();
- if (!dnbd3_request_test_block(dev, &host_compare, sock))
- goto error;
- end = ktime_get_real();
-
mutex_lock(&dev->alt_servers_lock);
if (is_same_server(&dev->alt_servers[i].host, &host_compare)) {
dev->alt_servers[i].protocol_version = remote_version;
@@ -446,7 +470,9 @@ static void dnbd3_recv_workfn(struct work_struct *work)
int remaining;
int ret;
+ dnbd3_dev_dbg_cur(dev, "starting receive worker...\n");
mutex_lock(&dev->recv_mutex);
+ dnbd3_dev_dbg_cur(dev, "receive worker started\n");
while (dev->sock) {
// receive net reply
ret = dnbd3_recv_reply(dev->sock, &reply_hdr);
@@ -594,6 +620,7 @@ static void dnbd3_recv_workfn(struct work_struct *work)
out_unlock:
// This will check if we actually still need a new connection
dnbd3_start_discover(dev, true);
+ dnbd3_dev_dbg_cur(dev, "Receive worker exited\n");
mutex_unlock(&dev->recv_mutex);
}
@@ -623,7 +650,7 @@ static void set_socket_timeout(struct socket *sock, bool set_send, int timeout_m
static int dnbd3_connect(dnbd3_device_t *dev, struct sockaddr_storage *addr, struct socket **sock_out)
{
ktime_t start;
- int ret, connect_time_ms;
+ int ret, connect_time_ms, diff;
struct socket *sock;
int retries = 4;
const int addrlen = addr->ss_family == AF_INET ? sizeof(struct sockaddr_in)
@@ -659,7 +686,7 @@ static int dnbd3_connect(dnbd3_device_t *dev, struct sockaddr_storage *addr, str
if (dev->panic && dev->panic_count > 1) {
/* in panic mode for some time, start increasing timeouts */
- connect_time_ms = dev->panic_count * 1000;
+ connect_time_ms = dev->panic_count * 333;
} else {
/* otherwise, use 2*RTT of current server */
connect_time_ms = dev->cur_server.rtt * 2 / 1000;
@@ -667,21 +694,21 @@ static int dnbd3_connect(dnbd3_device_t *dev, struct sockaddr_storage *addr, str
/* but obey a minimal configurable value, and maximum sanity check */
if (connect_time_ms < SOCKET_TIMEOUT_SEND * 1000)
connect_time_ms = SOCKET_TIMEOUT_SEND * 1000;
- else if (connect_time_ms > 60000)
- connect_time_ms = 60000;
+ else if (connect_time_ms > 15000)
+ connect_time_ms = 15000;
set_socket_timeout(sock, false, connect_time_ms); // recv
set_socket_timeout(sock, true, connect_time_ms); // send
start = ktime_get_real();
while (--retries > 0) {
ret = kernel_connect(sock, (struct sockaddr *)addr, addrlen, 0);
- connect_time_ms = (int)ktime_ms_delta(ktime_get_real(), start);
- if (connect_time_ms > 2 * SOCKET_TIMEOUT_SEND * 1000) {
+ diff = (int)ktime_ms_delta(ktime_get_real(), start);
+ if (diff > 2 * connect_time_ms) {
/* Either I'm losing my mind or there was a specific build of kernel
* 5.x where SO_RCVTIMEO didn't affect the connect call above, so
* this function would hang for over a minute for unreachable hosts.
- * Leave in this debug check for twice the configured timeout
+ * Leave in this debug check for twice the configured timeout.
*/
- dnbd3_dev_dbg_host(dev, addr, "connect: call took %dms\n",
+ dnbd3_dev_err_host(dev, addr, "connect: call took %dms\n",
connect_time_ms);
}
if (ret != 0) {
@@ -916,23 +943,24 @@ static bool dnbd3_drain_socket(dnbd3_device_t *dev, struct socket *sock, int byt
return true;
}
-static bool dnbd3_request_test_block(dnbd3_device_t *dev, struct sockaddr_storage *addr, struct socket *sock)
+static bool dnbd3_request_test_block(dnbd3_device_t *dev, struct sockaddr_storage *addr,
+ struct socket *sock, u64 test_start, u32 test_size)
{
dnbd3_reply_t reply_hdr;
// Request block
- if (!dnbd3_send_request(sock, CMD_GET_BLOCK, 0, 0, RTT_BLOCK_SIZE)) {
+ if (!dnbd3_send_request(sock, CMD_GET_BLOCK, 0, test_start, test_size)) {
dnbd3_err_dbg_host(dev, addr, "requesting test block failed\n");
return false;
}
- // receive net reply
+ // receive reply header
if (dnbd3_recv_reply(sock, &reply_hdr) != sizeof(reply_hdr)) {
dnbd3_err_dbg_host(dev, addr, "receiving test block header packet failed\n");
return false;
}
if (reply_hdr.magic != dnbd3_packet_magic || reply_hdr.cmd != CMD_GET_BLOCK
- || reply_hdr.size != RTT_BLOCK_SIZE || reply_hdr.handle != 0) {
+ || reply_hdr.size != test_size || reply_hdr.handle != 0) {
dnbd3_err_dbg_host(dev, addr,
"unexpected reply to block request: cmd=%d, size=%d, handle=%llu (discover)\n",
(int)reply_hdr.cmd, (int)reply_hdr.size, reply_hdr.handle);
@@ -940,7 +968,7 @@ static bool dnbd3_request_test_block(dnbd3_device_t *dev, struct sockaddr_storag
}
// receive data
- return dnbd3_drain_socket(dev, sock, RTT_BLOCK_SIZE);
+ return dnbd3_drain_socket(dev, sock, test_size);
}
#undef dnbd3_err_dbg_host