summaryrefslogtreecommitdiffstats
path: root/src/server
diff options
context:
space:
mode:
authorSimon Rettberg2018-03-15 22:03:36 +0100
committerSimon Rettberg2018-03-15 22:03:36 +0100
commit09dbfb968c4ef469f4a43dad3b9818fd2ed4ef82 (patch)
treea2e936ba7e199badad7b18efa71576203bf5b48b /src/server
parent[SERVER] Make TSAN happy (diff)
downloaddnbd3-09dbfb968c4ef469f4a43dad3b9818fd2ed4ef82.tar.gz
dnbd3-09dbfb968c4ef469f4a43dad3b9818fd2ed4ef82.tar.xz
dnbd3-09dbfb968c4ef469f4a43dad3b9818fd2ed4ef82.zip
[SERVER] Experimental support for sparse files in proxy mode
Will not preallocate images in this mode. Old images are only deleted if the disk is full, determined by write() calls to the cache file yielding ENOSPC or EDQUOT. In such a case, the least recently used image(s) will be deleted to free up at least 256MiB, and then the write() call will be repeated. This *should* work somewhat reliably unless the cache partition is ridiculously small. Performance might suffer a little, and disk fragmentation might occur much faster than in prealloc mode. Testing is needed.
Diffstat (limited to 'src/server')
-rw-r--r--src/server/globals.c7
-rw-r--r--src/server/globals.h14
-rw-r--r--src/server/image.c44
-rw-r--r--src/server/image.h2
-rw-r--r--src/server/uplink.c11
5 files changed, 67 insertions, 11 deletions
diff --git a/src/server/globals.c b/src/server/globals.c
index b1775f0..0502bbc 100644
--- a/src/server/globals.c
+++ b/src/server/globals.c
@@ -18,6 +18,7 @@ int _clientPenalty = 0;
bool _isProxy = false;
bool _backgroundReplication = true;
bool _lookupMissingForProxy = true;
+bool _sparseFiles = false;
bool _removeMissingImages = true;
int _uplinkTimeout = SOCKET_TIMEOUT_UPLINK;
int _clientTimeout = SOCKET_TIMEOUT_CLIENT;
@@ -54,6 +55,7 @@ static int ini_handler(void *custom UNUSED, const char* section, const char* key
SAVE_TO_VAR_BOOL( dnbd3, proxyPrivateOnly );
SAVE_TO_VAR_BOOL( dnbd3, backgroundReplication );
SAVE_TO_VAR_BOOL( dnbd3, lookupMissingForProxy );
+ SAVE_TO_VAR_BOOL( dnbd3, sparseFiles );
SAVE_TO_VAR_BOOL( dnbd3, removeMissingImages );
SAVE_TO_VAR_BOOL( dnbd3, closeUnusedFd );
SAVE_TO_VAR_UINT( dnbd3, serverPenalty );
@@ -146,6 +148,10 @@ void globals_loadConfig()
}
}
}
+ if ( _backgroundReplication && _sparseFiles ) {
+ logadd( LOG_WARNING, "Ignoring 'sparseFiles=true' since backgroundReplication is set to true" );
+ _sparseFiles = false;
+ }
// Dump config as interpreted
char buffer[2000];
globals_dumpConfig( buffer, sizeof(buffer) );
@@ -257,6 +263,7 @@ size_t globals_dumpConfig(char *buffer, size_t size)
PBOOL(isProxy);
PBOOL(backgroundReplication);
PBOOL(lookupMissingForProxy);
+ PBOOL(sparseFiles);
PBOOL(removeMissingImages);
PINT(uplinkTimeout);
PINT(clientTimeout);
diff --git a/src/server/globals.h b/src/server/globals.h
index c1d5d78..d43878f 100644
--- a/src/server/globals.h
+++ b/src/server/globals.h
@@ -214,6 +214,20 @@ extern bool _backgroundReplication;
extern bool _lookupMissingForProxy;
/**
+ * Should we preallocate proxied images right at the start to make
+ * sure we can cache it entirely, or rather create sparse files
+ * with holes in them? With sparse files, we just keep writing
+ * cached blocks to disk until it is full, and only then will we
+ * start to delete old images. This might be a bit flaky so use
+ * only in space restricted environments. Also make sure your
+ * file system actually supports sparse files / files with holes
+ * in them, or you might get really shitty performance.
+ * This setting will have no effect if background replication is
+ * turned on.
+ */
+extern bool _sparseFiles;
+
+/**
* Port to listen on (default: #define PORT (5003))
*/
extern int _listenPort;
diff --git a/src/server/image.c b/src/server/image.c
index ca00c63..78091f1 100644
--- a/src/server/image.c
+++ b/src/server/image.c
@@ -49,7 +49,7 @@ static bool image_addToList(dnbd3_image_t *image);
static bool image_load(char *base, char *path, int withUplink);
static bool image_clone(int sock, char *name, uint16_t revision, uint64_t imageSize);
static bool image_calcBlockCrc32(const int fd, const size_t block, const uint64_t realFilesize, uint32_t *crc);
-static bool image_ensureDiskSpace(uint64_t size);
+static bool image_ensureDiskSpace(uint64_t size, bool force);
static uint8_t* image_loadCacheMap(const char * const imagePath, const int64_t fileSize);
static uint32_t* image_loadCrcList(const char * const imagePath, const int64_t fileSize, uint32_t *masterCrc);
@@ -1125,13 +1125,16 @@ bool image_create(char *image, int revision, uint64_t size)
// Try cache map first
if ( !file_alloc( fdCache, 0, mapsize ) ) {
const int err = errno;
- logadd( LOG_ERROR, "Could not allocate %d bytes for %s (errno=%d)", mapsize, cache, err );
- goto failure_cleanup;
+ logadd( LOG_DEBUG1, "Could not allocate %d bytes for %s (errno=%d)", mapsize, cache, err );
}
// Now write image
- if ( !file_alloc( fdImage, 0, size ) ) {
+ if ( !_sparseFiles && !file_alloc( fdImage, 0, size ) ) {
const int err = errno;
logadd( LOG_ERROR, "Could not allocate %" PRIu64 " bytes for %s (errno=%d)", size, path, err );
+ logadd( LOG_ERROR, "It is highly recommended to use a file system that supports preallocating disk"
+ " space without actually writing all zeroes to the block device." );
+ logadd( LOG_ERROR, "If you cannot fix this, try setting sparseFiles=true, but don't expect"
+ " divine performance during replication." );
goto failure_cleanup;
}
close( fdImage );
@@ -1275,8 +1278,13 @@ static dnbd3_image_t *loadImageProxy(char * const name, const uint16_t revision,
goto server_fail;
}
pthread_mutex_lock( &reloadLock );
- ok = image_ensureDiskSpace( remoteImageSize )
- && image_clone( sock, name, remoteRid, remoteImageSize ); // This sets up the file+map+crc and loads the img
+ // Ensure disk space entirely if not using sparse files, otherwise just make sure we have some room at least
+ if ( _sparseFiles ) {
+ ok = image_ensureDiskSpace( 2ull * 1024 * 1024 * 1024, false ); // 2GiB, maybe configurable one day
+ } else {
+ ok = image_ensureDiskSpace( remoteImageSize + ( 10 * 1024 * 1024 ), false ); // some extra space for cache map etc.
+ }
+ ok = ok && image_clone( sock, name, remoteRid, remoteImageSize ); // This sets up the file+map+crc and loads the img
pthread_mutex_unlock( &reloadLock );
if ( !ok ) goto server_fail;
@@ -1677,14 +1685,30 @@ static bool image_calcBlockCrc32(const int fd, const size_t block, const uint64_
}
/**
+ * Call image_ensureDiskSpace (below), but aquire
+ * reloadLock first.
+ */
+bool image_ensureDiskSpaceLocked(uint64_t size, bool force)
+{
+ bool ret;
+ pthread_mutex_lock( &reloadLock );
+ ret = image_ensureDiskSpace( size, force );
+ pthread_mutex_unlock( &reloadLock );
+ return ret;
+}
+
+/**
* Make sure at least size bytes are available in _basePath.
* Will delete old images to make room for new ones.
* TODO: Store last access time of images. Currently the
- * last access time is reset on server restart. Thus it will
- * currently only delete images if server uptime is > 10 hours
+ * last access time is reset to the file modification time
+ * on server restart. Thus it will
+ * currently only delete images if server uptime is > 10 hours.
+ * This can be overridden by setting force to true, in case
+ * free space is desperately needed.
* Return true iff enough space is available. false in random other cases
*/
-static bool image_ensureDiskSpace(uint64_t size)
+static bool image_ensureDiskSpace(uint64_t size, bool force)
{
for ( int maxtries = 0; maxtries < 20; ++maxtries ) {
uint64_t available;
@@ -1694,7 +1718,7 @@ static bool image_ensureDiskSpace(uint64_t size)
return true;
}
if ( available > size ) return true;
- if ( dnbd3_serverUptime() < 10 * 3600 ) {
+ if ( !force && dnbd3_serverUptime() < 10 * 3600 ) {
logadd( LOG_INFO, "Only %dMiB free, %dMiB requested, but server uptime < 10 hours...", (int)(available / (1024ll * 1024ll)),
(int)(size / (1024 * 1024)) );
return false;
diff --git a/src/server/image.h b/src/server/image.h
index 213734d..2290744 100644
--- a/src/server/image.h
+++ b/src/server/image.h
@@ -43,6 +43,8 @@ int image_getCompletenessEstimate(dnbd3_image_t * const image);
void image_closeUnusedFd();
+bool image_ensureDiskSpaceLocked(uint64_t size, bool force);
+
// one byte in the map covers 8 4kib blocks, so 32kib per byte
// "+ (1 << 15) - 1" is required to account for the last bit of
// the image that is smaller than 32kib
diff --git a/src/server/uplink.c b/src/server/uplink.c
index aec47cb..b166c6d 100644
--- a/src/server/uplink.c
+++ b/src/server/uplink.c
@@ -592,10 +592,19 @@ static void uplink_handleReceive(dnbd3_connection_t *link)
spin_unlock( &link->image->lock );
// 1) Write to cache file
if ( link->image->cacheFd != -1 ) {
+ bool tryFree = true;
uint32_t done = 0;
while ( done < inReply.size ) {
ret = (int)pwrite( link->image->cacheFd, link->recvBuffer + done, inReply.size - done, start + done );
- if ( ret == -1 && errno == EINTR ) continue;
+ if ( ret == -1 ) {
+ if ( errno == EINTR ) continue;
+ if ( errno == ENOSPC || errno == EDQUOT ) {
+ // try to free 256MiB
+ if ( !tryFree || !image_ensureDiskSpaceLocked( 256ull * 1024 * 1024, true ) ) break;
+ tryFree = false;
+ continue; // Success, retry write
+ }
+ }
if ( ret <= 0 ) break;
done += (uint32_t)ret;
}