From 609ca9001343b90338d9e61d63ffc78b5b99a63c Mon Sep 17 00:00:00 2001
From: Simon Rettberg
Date: Tue, 11 May 2021 09:43:37 +0200
Subject: [SERVER] Add minRequestSize: Enlarge relayed requests

Any request from a client being relayed to an uplink server will have
its size extended to this value. It will also be applied to background
replication requests, if the BGR mode is FULL.
As request coalescing is currently very primitive, this setting should
usually be left diabled, and bgrWindowSize used instead, if appropriate.
If you enable this, set it to something large (1M+), or it might have
adverse effects.
---
 src/server/globals.c | 37 +++++++++++++++++++++++++++----------
 src/server/globals.h |  6 ++++++
 src/server/uplink.c  | 21 +++++++++++++++------
 3 files changed, 48 insertions(+), 16 deletions(-)

diff --git a/src/server/globals.c b/src/server/globals.c
index 8ef7aec..f6432cb 100644
--- a/src/server/globals.c
+++ b/src/server/globals.c
@@ -38,6 +38,7 @@ atomic_int _maxImages = SERVER_MAX_IMAGES;
 atomic_uint _maxPayload = 9000000; // 9MB
 atomic_uint_fast64_t _maxReplicationSize = (uint64_t)100000000000LL;
 atomic_uint _maxPrefetch = 262144; // 256KB
+atomic_uint _minRequestSize = 0;
 
 /**
  * True when loading config the first time. Consecutive loads will
@@ -89,6 +90,7 @@ static int ini_handler(void *custom UNUSED, const char* section, const char* key
 	SAVE_TO_VAR_UINT( limits, maxPayload );
 	SAVE_TO_VAR_UINT64( limits, maxReplicationSize );
 	SAVE_TO_VAR_UINT( limits, maxPrefetch );
+	SAVE_TO_VAR_UINT( limits, minRequestSize );
 	SAVE_TO_VAR_BOOL( dnbd3, pretendClient );
 	SAVE_TO_VAR_INT( dnbd3, autoFreeDiskSpaceDelay );
 	if ( strcmp( section, "dnbd3" ) == 0 && strcmp( key, "backgroundReplication" ) == 0 ) {
@@ -134,16 +136,30 @@ void globals_loadConfig()
 	if ( initialLoad ) {
 		sanitizeFixedConfig();
 	}
-	if ( _backgroundReplication == BGR_FULL && _sparseFiles && _bgrMinClients < 5 ) {
-		logadd( LOG_WARNING, "Ignoring 'sparseFiles=true' since backgroundReplication is set to true and bgrMinClients is too low" );
-		_sparseFiles = false;
-	}
-	if ( _bgrWindowSize < 1 ) {
-		_bgrWindowSize = 1;
-	} else if ( _bgrWindowSize > UPLINK_MAX_QUEUE - 10 ) {
-		_bgrWindowSize = UPLINK_MAX_QUEUE - 10;
-		logadd( LOG_MINOR, "Limiting bgrWindowSize to %d, because of UPLINK_MAX_QUEUE",
-				_bgrWindowSize );
+	if ( _isProxy ) {
+		if ( _backgroundReplication == BGR_FULL && _sparseFiles && _bgrMinClients < 5 ) {
+			logadd( LOG_WARNING, "Ignoring 'sparseFiles=true' since backgroundReplication is set to true and bgrMinClients is too low" );
+			_sparseFiles = false;
+		}
+		if ( _bgrWindowSize < 1 ) {
+			_bgrWindowSize = 1;
+		} else if ( _bgrWindowSize > UPLINK_MAX_QUEUE - 10 ) {
+			_bgrWindowSize = UPLINK_MAX_QUEUE - 10;
+			logadd( LOG_MINOR, "Limiting bgrWindowSize to %d, because of UPLINK_MAX_QUEUE",
+					_bgrWindowSize );
+		}
+		if ( _maxPayload < 256 * 1024 ) {
+			logadd( LOG_WARNING, "maxPayload was increased to 256k" );
+			_maxPayload = 256 * 1024;
+		}
+		if ( _maxPrefetch > _maxPayload ) {
+			logadd( LOG_WARNING, "Reducing maxPrefetch to maxPayload" );
+			_maxPrefetch = _maxPayload;
+		}
+		if ( _minRequestSize > _maxPayload ) {
+			logadd( LOG_WARNING, "Reducing minRequestSize to maxPayload" );
+			_minRequestSize = _maxPayload;
+		}
 	}
 	// Dump config as interpreted
 	char buffer[2000];
@@ -354,6 +370,7 @@ size_t globals_dumpConfig(char *buffer, size_t size)
 	PINT(maxPayload);
 	PUINT64(maxReplicationSize);
 	PINT(maxPrefetch);
+	PINT(minRequestSize);
 	return size - rem;
 }
 
diff --git a/src/server/globals.h b/src/server/globals.h
index b255668..bde1184 100644
--- a/src/server/globals.h
+++ b/src/server/globals.h
@@ -340,6 +340,12 @@ extern atomic_int _autoFreeDiskSpaceDelay;
  */
 extern atomic_uint _maxPrefetch;
 
+/**
+ * Use with care. Can severely degrade performance.
+ * Set either 0 or very high.
+ */
+extern atomic_uint _minRequestSize;
+
 /**
  * Load the server configuration.
  */
diff --git a/src/server/uplink.c b/src/server/uplink.c
index 4329663..8a83124 100644
--- a/src/server/uplink.c
+++ b/src/server/uplink.c
@@ -360,7 +360,7 @@ static bool uplink_requestInternal(dnbd3_uplink_t *uplink, void *data, uplink_ca
 	if ( callback == NULL ) {
 		// Set upper-most bit for replication requests that we fire
 		// In client mode, at least set prefetch flag to prevent prefetch cascading
-		hops |= _pretendClient ? HOP_FLAG_PREFETCH : HOP_FLAG_BGR;
+		hops |= (uint8_t)( _pretendClient ? HOP_FLAG_PREFETCH : HOP_FLAG_BGR );
 	}
 
 	req_t req, preReq;
@@ -369,7 +369,7 @@ static bool uplink_requestInternal(dnbd3_uplink_t *uplink, void *data, uplink_ca
 	const uint64_t end = start + length;
 	req.start = start & ~(DNBD3_BLOCK_SIZE - 1);
 	req.end = end;
-	/* Don't do this for now -- this breaks matching of prefetch jobs, since they'd
+	/* Don't do this  -- this breaks matching of prefetch jobs, since they'd
 	 * be misaligned, and the next client request wouldn't match anything.
 	 * To improve this, we need to be able to attach a queue_client to multiple queue_entries
 	 * and then serve it once all the queue_entries are done (atomic_int in queue_client).
@@ -379,11 +379,11 @@ static bool uplink_requestInternal(dnbd3_uplink_t *uplink, void *data, uplink_ca
 	 * and we should just drop all affected clients. Then as a next step, don't serve the
 	 * clients form the receive buffer, but just issue a normal sendfile() call after writing
 	 * the received data to the local cache.
-	if ( callback != NULL ) {
+	 */
+	if ( callback != NULL && _minRequestSize != 0 ) {
 		// Not background replication request, extend request size
 		extendRequest( req.start, &req.end, uplink->image, _minRequestSize );
 	}
-	*/
 	req.end = (req.end + DNBD3_BLOCK_SIZE - 1) & ~(DNBD3_BLOCK_SIZE - 1);
 	// Critical section - work with the queue
 	mutex_lock( &uplink->queueLock );
@@ -889,9 +889,18 @@ static bool sendReplicationRequest(dnbd3_uplink_t *uplink)
 			uplink->nextReplicationIndex = -1;
 			break;
 		}
-		const uint64_t offset = (uint64_t)replicationIndex * FILE_BYTES_PER_MAP_BYTE;
-		const uint32_t size = (uint32_t)MIN( image->virtualFilesize - offset, FILE_BYTES_PER_MAP_BYTE );
 		const uint64_t handle = ++uplink->queueId;
+		const uint64_t offset = (uint64_t)replicationIndex * FILE_BYTES_PER_MAP_BYTE;
+		uint32_t size = (uint32_t)MIN( image->virtualFilesize - offset, FILE_BYTES_PER_MAP_BYTE );
+		// Extend the default 32k request size if _minRequestSize is > 32k
+		for ( size_t extra = 1; extra < ( _minRequestSize / FILE_BYTES_PER_MAP_BYTE )
+				&& offset + size < image->virtualFilesize
+				&& _backgroundReplication == BGR_FULL; ++extra ) {
+			if ( atomic_load_explicit( &cache->map[replicationIndex+1], memory_order_relaxed ) == 0xff )
+				break; // Hit complete 32k block, stop here
+			replicationIndex++;
+			size += (uint32_t)MIN( image->virtualFilesize - offset - size, FILE_BYTES_PER_MAP_BYTE );
+		}
 		if ( !uplink_requestInternal( uplink, NULL, NULL, handle, offset, size, 0 ) ) {
 			logadd( LOG_DEBUG1, "Error sending background replication request to uplink server (%s:%d)",
 					PIMG(uplink->image) );
-- 
cgit v1.2.3-55-g7522