summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/server/altservers.c20
-rw-r--r--src/server/altservers.h2
-rw-r--r--src/server/globals.h3
-rw-r--r--src/server/image.c136
-rw-r--r--src/server/image.h2
-rw-r--r--src/server/uplink.c76
-rw-r--r--src/serverconfig.h5
7 files changed, 168 insertions, 76 deletions
diff --git a/src/server/altservers.c b/src/server/altservers.c
index a6ad235..380737c 100644
--- a/src/server/altservers.c
+++ b/src/server/altservers.c
@@ -274,6 +274,26 @@ int altservers_getHostListForReplication(const char *image, dnbd3_host_t *server
}
/**
+ * Returns true if there is at least one alt-server the
+ * given image name would be allowed to be cloned from.
+ */
+bool altservers_imageHasAltServers(const char *image)
+{
+ bool ret = false;
+ mutex_lock( &altServersLock );
+ for ( int i = 0; i < numAltServers; ++i ) {
+ if ( altServers[i].isClientOnly || ( !altServers[i].isPrivate && _proxyPrivateOnly ) )
+ continue;
+ if ( !isImageAllowed( &altServers[i], image ) )
+ continue;
+ ret = true;
+ break;
+ }
+ mutex_unlock( &altServersLock );
+ return ret;
+}
+
+/**
* Get <size> alt servers. If there are more alt servers than
* requested, random servers will be picked.
* This function is suited for finding uplink servers as
diff --git a/src/server/altservers.h b/src/server/altservers.h
index 8e29aaa..78f6fcc 100644
--- a/src/server/altservers.h
+++ b/src/server/altservers.h
@@ -19,6 +19,8 @@ int altservers_getListForClient(dnbd3_client_t *client, dnbd3_server_entry_t *ou
int altservers_getHostListForReplication(const char *image, dnbd3_host_t *servers, int size);
+bool altservers_imageHasAltServers(const char *image);
+
bool altservers_toString(int server, char *buffer, size_t len);
int altservers_netCloseness(dnbd3_host_t *host1, dnbd3_host_t *host2);
diff --git a/src/server/globals.h b/src/server/globals.h
index 5de4180..10d3ee3 100644
--- a/src/server/globals.h
+++ b/src/server/globals.h
@@ -93,6 +93,7 @@ struct _dnbd3_uplink
// If BGR == BGR_HASHBLOCK, -1 means "currently no incomplete block"
uint64_t replicationHandle; // Handle of pending replication request
atomic_uint_fast64_t bytesReceived; // Number of bytes received by the uplink since startup.
+ atomic_uint_fast64_t bytesReceivedLastSave; // Number of bytes received when we last saved the cache map
int queueLen; // length of queue
uint32_t idleTime; // How many seconds the uplink was idle (apart from keep-alives)
dnbd3_queued_request_t queue[SERVER_MAX_UPLINK_QUEUE];
@@ -128,7 +129,6 @@ struct _dnbd3_image
uint64_t virtualFilesize; // virtual size of image (real size rounded up to multiple of 4k)
uint64_t realFilesize; // actual file size on disk
ticks atime; // last access time
- ticks lastWorkCheck; // last time a non-working image has been checked
ticks nextCompletenessEstimate; // next time the completeness estimate should be updated
uint32_t *crc32; // list of crc32 checksums for each 16MiB block in image
uint32_t masterCrc32; // CRC-32 of the crc-32 list
@@ -144,6 +144,7 @@ struct _dnbd3_image
atomic_bool queue; // Too many requests waiting on uplink
} problem;
uint16_t rid; // revision of image
+ atomic_bool mapDirty; // Cache map has been modified outside uplink (only integrity checker for now)
pthread_mutex_t lock;
};
diff --git a/src/server/image.c b/src/server/image.c
index 3583f86..5a9e15b 100644
--- a/src/server/image.c
+++ b/src/server/image.c
@@ -55,6 +55,8 @@ static dnbd3_cache_map_t* image_loadCacheMap(const char * const imagePath, const
static uint32_t* image_loadCrcList(const char * const imagePath, const int64_t fileSize, uint32_t *masterCrc);
static bool image_checkRandomBlocks(dnbd3_image_t *image, const int count, int fromFd);
static void* closeUnusedFds(void*);
+static void* saveAllCacheMaps(void*);
+static bool saveCacheMap(dnbd3_image_t *image);
static void allocCacheMap(dnbd3_image_t *image, bool complete);
static void cmfree(ref *ref)
@@ -73,6 +75,7 @@ void image_serverStartup()
mutex_init( &remoteCloneLock, LOCK_REMOTE_CLONE );
mutex_init( &reloadLock, LOCK_RELOAD );
server_addJob( &closeUnusedFds, NULL, 10, 900 );
+ server_addJob( &saveAllCacheMaps, NULL, 9, 20 );
}
/**
@@ -160,6 +163,8 @@ void image_updateCachemap(dnbd3_image_t *image, uint64_t start, uint64_t end, co
integrity_check( image, block, false );
}
}
+ } else if ( !set ) {
+ image->mapDirty = true;
}
ref_put( &cache->reference );
}
@@ -624,6 +629,7 @@ static dnbd3_image_t* image_free(dnbd3_image_t *image)
// this will get called again when the uplink is done.
if ( !uplink_shutdown( image ) )
return NULL;
+ saveCacheMap( image );
mutex_lock( &image->lock );
ref_setref( &image->ref_cacheMap, NULL );
free( image->crc32 );
@@ -1830,6 +1836,135 @@ static void* closeUnusedFds(void* nix UNUSED)
return NULL;
}
+#define IMGCOUNT 5
+static void* saveAllCacheMaps(void* nix UNUSED)
+{
+ static ticks nextSave;
+ dnbd3_image_t *list[IMGCOUNT];
+ int count = 0;
+ declare_now;
+ bool full = timing_reached( &nextSave, &now );
+ mutex_lock( &imageListLock );
+ for ( int i = 0; i < _num_images; ++i ) {
+ dnbd3_image_t * const image = _images[i];
+ if ( image->mapDirty ) {
+ // Flag is set if integrity checker found a problem - save out
+ image->users++;
+ list[count++] = image;
+ image->mapDirty = false;
+ } else {
+ // Otherwise, consider longer timeout and byte count limits of uplink
+ dnbd3_uplink_t *uplink = ref_get_uplink( &image->uplinkref );
+ if ( uplink != NULL ) {
+ assert( uplink->bytesReceivedLastSave <= uplink->bytesReceived );
+ uint64_t diff = uplink->bytesReceived - uplink->bytesReceivedLastSave;
+ if ( diff > CACHE_MAP_MAX_UNSAVED_BYTES
+ || ( full && diff != 0 ) ) {
+ image->users++;
+ list[count++] = image;
+ uplink->bytesReceivedLastSave = uplink->bytesReceived;
+ }
+ ref_put( &uplink->reference );
+ }
+ }
+ if ( count == IMGCOUNT )
+ break;
+ }
+ mutex_unlock( &imageListLock );
+ if ( full && count < IMGCOUNT ) {
+ // Only update nextSave once we handled all images in the list
+ timing_addSeconds( &nextSave, &now, CACHE_MAP_MAX_SAVE_DELAY );
+ }
+ for ( int i = 0; i < count; ++i ) {
+ saveCacheMap( list[i] );
+ image_release( list[i] );
+ }
+ return NULL;
+}
+#undef IMGCOUNT
+
+/**
+ * Saves the cache map of the given image.
+ * Return true on success.
+ * @param image the image
+ */
+static bool saveCacheMap(dnbd3_image_t *image)
+{
+ if ( !_isProxy )
+ return true; // Nothing to do
+ dnbd3_cache_map_t *cache = ref_get_cachemap( image );
+ if ( cache == NULL )
+ return true; // Nothing to do
+ // Check if we're a "hybrid proxy", i.e. there are only some namespaces (directories)
+ // for which we have any upstream servers configured. If there's none, don't touch
+ // the cache map on disk.
+ if ( !altservers_imageHasAltServers( image->name ) ) {
+ ref_put( &cache->reference );
+ return true; // Nothing to do
+ }
+
+ logadd( LOG_DEBUG2, "Saving cache map of %s:%d", image->name, (int)image->rid );
+ const size_t size = IMGSIZE_TO_MAPBYTES(image->virtualFilesize);
+ char mapfile[strlen( image->path ) + 4 + 1];
+ strcpy( mapfile, image->path );
+ strcat( mapfile, ".map" );
+
+ int fd = open( mapfile, O_WRONLY | O_CREAT, 0644 );
+ if ( fd == -1 ) {
+ const int err = errno;
+ ref_put( &cache->reference );
+ logadd( LOG_WARNING, "Could not open file to write cache map to disk (errno=%d) file %s", err, mapfile );
+ return false;
+ }
+
+ // On Linux we could use readFd, but in general it's not guaranteed to work
+ int imgFd = open( image->path, O_WRONLY );
+ if ( imgFd == -1 ) {
+ logadd( LOG_WARNING, "Cannot open %s for fsync(): errno=%d", image->path, errno );
+ } else {
+ if ( fsync( imgFd ) == -1 ) {
+ logadd( LOG_ERROR, "fsync() on image file %s failed with errno %d. Resetting cache map.", image->path, errno );
+ dnbd3_cache_map_t *old = image_loadCacheMap(image->path, image->virtualFilesize);
+ const int mapSize = IMGSIZE_TO_MAPBYTES( image->virtualFilesize );
+ if ( old == NULL ) {
+ // Could not load old map. FS might be toast.
+ logadd( LOG_ERROR, "Cannot load old cache map. Setting all zero." );
+ memset( cache->map, 0, mapSize );
+ } else {
+ // AND the maps together to be safe
+ for ( int i = 0; i < mapSize; ++i ) {
+ cache->map[i] &= old->map[i];
+ }
+ old->reference.free( &old->reference );
+ }
+ }
+ close( imgFd );
+ }
+
+ // Write current map to file
+ size_t done = 0;
+ while ( done < size ) {
+ const ssize_t ret = write( fd, cache->map + done, size - done );
+ if ( ret == -1 ) {
+ if ( errno == EINTR ) continue;
+ logadd( LOG_WARNING, "Could not write cache map (errno=%d) file %s", errno, mapfile );
+ break;
+ }
+ if ( ret <= 0 ) {
+ logadd( LOG_WARNING, "Unexpected return value %d for write() to %s", (int)ret, mapfile );
+ break;
+ }
+ done += (size_t)ret;
+ }
+ ref_put( &cache->reference );
+ if ( fsync( fd ) == -1 ) {
+ logadd( LOG_WARNING, "fsync() on image map %s failed with errno %d", mapfile, errno );
+ }
+ close( fd );
+ // TODO fsync on parent directory
+ return true;
+}
+
static void allocCacheMap(dnbd3_image_t *image, bool complete)
{
const uint8_t val = complete ? 0xff : 0;
@@ -1846,4 +1981,3 @@ static void allocCacheMap(dnbd3_image_t *image, bool complete)
}
mutex_unlock( &image->lock );
}
-
diff --git a/src/server/image.h b/src/server/image.h
index 89791fc..4614c74 100644
--- a/src/server/image.h
+++ b/src/server/image.h
@@ -49,6 +49,8 @@ void image_closeUnusedFd();
bool image_ensureDiskSpaceLocked(uint64_t size, bool force);
+bool image_saveCacheMap(dnbd3_image_t *image);
+
// one byte in the map covers 8 4kib blocks, so 32kib per byte
// "+ (1 << 15) - 1" is required to account for the last bit of
// the image that is smaller than 32kib
diff --git a/src/server/uplink.c b/src/server/uplink.c
index 97cb2a9..e5ab9c0 100644
--- a/src/server/uplink.c
+++ b/src/server/uplink.c
@@ -57,7 +57,6 @@ static bool uplink_sendKeepalive(dnbd3_uplink_t *uplink);
static void uplink_addCrc32(dnbd3_uplink_t *uplink);
static bool uplink_sendReplicationRequest(dnbd3_uplink_t *uplink);
static bool uplink_reopenCacheFd(dnbd3_uplink_t *uplink, const bool force);
-static bool uplink_saveCacheMap(dnbd3_uplink_t *uplink);
static bool uplink_connectionShouldShutdown(dnbd3_uplink_t *uplink);
static void uplink_connectionFailed(dnbd3_uplink_t *uplink, bool findNew);
@@ -103,6 +102,7 @@ bool uplink_init(dnbd3_image_t *image, int sock, dnbd3_host_t *host, int version
mutex_init( &uplink->sendMutex, LOCK_UPLINK_SEND );
uplink->image = image;
uplink->bytesReceived = 0;
+ uplink->bytesReceivedLastSave = 0;
uplink->idleTime = 0;
uplink->queueLen = 0;
uplink->cacheFd = -1;
@@ -445,7 +445,6 @@ static void* uplink_mainloop(void *data)
int altCheckInterval = SERVER_RTT_INTERVAL_INIT;
int rttTestResult;
uint32_t discoverFailCount = 0;
- uint32_t unsavedSeconds = 0;
ticks nextAltCheck, lastKeepalive;
char buffer[200];
memset( events, 0, sizeof(events) );
@@ -561,12 +560,6 @@ static void* uplink_mainloop(void *data)
if ( timepassed >= SERVER_UPLINK_KEEPALIVE_INTERVAL ) {
lastKeepalive = now;
uplink->idleTime += timepassed;
- unsavedSeconds += timepassed;
- if ( unsavedSeconds > 240 || ( unsavedSeconds > 60 && uplink->idleTime >= 20 && uplink->idleTime <= 70 ) ) {
- // fsync/save every 4 minutes, or every 60 seconds if uplink is idle
- unsavedSeconds = 0;
- uplink_saveCacheMap( uplink );
- }
// Keep-alive
if ( uplink->current.fd != -1 && uplink->replicationHandle == REP_NONE ) {
// Send keep-alive if nothing is happening, and try to trigger background rep.
@@ -639,9 +632,9 @@ static void* uplink_mainloop(void *data)
}
#endif
}
- cleanup: ;
- uplink_saveCacheMap( uplink );
+cleanup: ;
dnbd3_image_t *image = uplink->image;
+ image->mapDirty = true; // Force writeout of cache map
mutex_lock( &image->lock );
bool exp = false;
if ( atomic_compare_exchange_strong( &uplink->shutdown, &exp, true ) ) {
@@ -1135,69 +1128,6 @@ static bool uplink_reopenCacheFd(dnbd3_uplink_t *uplink, const bool force)
return uplink->cacheFd != -1;
}
-/**
- * Saves the cache map of the given image.
- * Return true on success.
- * Locks on: imageListLock, image.lock
- */
-static bool uplink_saveCacheMap(dnbd3_uplink_t *uplink)
-{
- dnbd3_image_t *image = uplink->image;
- assert( image != NULL );
-
- if ( uplink->cacheFd != -1 ) {
- if ( fsync( uplink->cacheFd ) == -1 ) {
- // A failing fsync means we have no guarantee that any data
- // since the last fsync (or open if none) has been saved. Apart
- // from keeping the cache map from the last successful fsync
- // around and restoring it there isn't much we can do to recover
- // a consistent state. Bail out.
- logadd( LOG_ERROR, "fsync() on image file %s failed with errno %d", image->path, errno );
- logadd( LOG_ERROR, "Bailing out immediately" );
- exit( 1 );
- }
- }
-
- dnbd3_cache_map_t *cache = ref_get_cachemap( image );
- if ( cache == NULL )
- return true;
- logadd( LOG_DEBUG2, "Saving cache map of %s:%d", image->name, (int)image->rid );
- const size_t size = IMGSIZE_TO_MAPBYTES(image->virtualFilesize);
- assert( image->path != NULL );
- char mapfile[strlen( image->path ) + 4 + 1];
- strcpy( mapfile, image->path );
- strcat( mapfile, ".map" );
-
- int fd = open( mapfile, O_WRONLY | O_CREAT, 0644 );
- if ( fd == -1 ) {
- const int err = errno;
- ref_put( &cache->reference );
- logadd( LOG_WARNING, "Could not open file to write cache map to disk (errno=%d) file %s", err, mapfile );
- return false;
- }
-
- size_t done = 0;
- while ( done < size ) {
- const ssize_t ret = write( fd, cache->map + done, size - done );
- if ( ret == -1 ) {
- if ( errno == EINTR ) continue;
- logadd( LOG_WARNING, "Could not write cache map (errno=%d) file %s", errno, mapfile );
- break;
- }
- if ( ret <= 0 ) {
- logadd( LOG_WARNING, "Unexpected return value %d for write() to %s", (int)ret, mapfile );
- break;
- }
- done += (size_t)ret;
- }
- ref_put( &cache->reference );
- if ( fsync( fd ) == -1 ) {
- logadd( LOG_WARNING, "fsync() on image map %s failed with errno %d", mapfile, errno );
- }
- close( fd );
- return true;
-}
-
static bool uplink_connectionShouldShutdown(dnbd3_uplink_t *uplink)
{
return ( uplink->idleTime > SERVER_UPLINK_IDLE_TIMEOUT
diff --git a/src/serverconfig.h b/src/serverconfig.h
index 239f0a2..5c7301d 100644
--- a/src/serverconfig.h
+++ b/src/serverconfig.h
@@ -17,7 +17,10 @@
#define SERVER_UPLINK_QUEUELEN_THRES 900 // Threshold where we start dropping incoming clients
#define SERVER_MAX_PENDING_ALT_CHECKS 500 // Length of queue for pending alt checks requested by uplinks
-#define SERVER_CACHE_MAP_SAVE_INTERVAL 90
+// Wait a maximum of 5 minutes before saving cache map (if data was received at all)
+#define CACHE_MAP_MAX_SAVE_DELAY 300
+// If more than 500MB have been received from uplink without saving cache map, do so
+#define CACHE_MAP_MAX_UNSAVED_BYTES ((uint64_t)500 * 1000 * 1000)
// Time in ms to wait for a read/write call to complete on an uplink connection
#define SOCKET_TIMEOUT_UPLINK 5000