summaryrefslogtreecommitdiffstats
path: root/src/server/image.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/server/image.c')
-rw-r--r--src/server/image.c519
1 files changed, 274 insertions, 245 deletions
diff --git a/src/server/image.c b/src/server/image.c
index bfba6cb..6259e38 100644
--- a/src/server/image.c
+++ b/src/server/image.c
@@ -8,6 +8,7 @@
#include "../shared/protocol.h"
#include "../shared/timing.h"
#include "../shared/crc32.h"
+#include "reference.h"
#include <assert.h>
#include <fcntl.h>
@@ -50,24 +51,33 @@ static bool image_clone(int sock, char *name, uint16_t revision, uint64_t imageS
static bool image_calcBlockCrc32(const int fd, const size_t block, const uint64_t realFilesize, uint32_t *crc);
static bool image_ensureDiskSpace(uint64_t size, bool force);
-static uint8_t* image_loadCacheMap(const char * const imagePath, const int64_t fileSize);
+static dnbd3_cache_map_t* image_loadCacheMap(const char * const imagePath, const int64_t fileSize);
static uint32_t* image_loadCrcList(const char * const imagePath, const int64_t fileSize, uint32_t *masterCrc);
-static bool image_checkRandomBlocks(const int count, int fdImage, const int64_t fileSize, uint32_t * const crc32list, uint8_t * const cache_map);
+static void image_checkRandomBlocks(dnbd3_image_t *image, const int count);
+static void* closeUnusedFds(void*);
+static void allocCacheMap(dnbd3_image_t *image, bool complete);
+
+static void cmfree(ref *ref)
+{
+ dnbd3_cache_map_t *cache = container_of(ref, dnbd3_cache_map_t, reference);
+ logadd( LOG_DEBUG2, "Freeing a cache map" );
+ free( cache );
+}
// ##########################################
void image_serverStartup()
{
srand( (unsigned int)time( NULL ) );
- mutex_init( &imageListLock );
- mutex_init( &remoteCloneLock );
- mutex_init( &reloadLock );
+ mutex_init( &imageListLock, LOCK_IMAGE_LIST );
+ mutex_init( &remoteCloneLock, LOCK_REMOTE_CLONE );
+ mutex_init( &reloadLock, LOCK_RELOAD );
+ server_addJob( &closeUnusedFds, NULL, 10, 900 );
}
/**
* Update cache-map of given image for the given byte range
* start (inclusive) - end (exclusive)
- * Locks on: images[].lock
*/
void image_updateCachemap(dnbd3_image_t *image, uint64_t start, uint64_t end, const bool set)
{
@@ -88,33 +98,59 @@ void image_updateCachemap(dnbd3_image_t *image, uint64_t start, uint64_t end, co
if ( start >= end )
return;
bool setNewBlocks = false;
- uint64_t pos = start;
- mutex_lock( &image->lock );
- if ( image->cache_map == NULL ) {
+ dnbd3_cache_map_t *cache = ref_get_cachemap( image );
+ if ( cache == NULL ) {
// Image seems already complete
if ( set ) {
// This makes no sense
- mutex_unlock( &image->lock );
- logadd( LOG_DEBUG1, "image_updateCachemap(true) with no cache_map: %s", image->path );
+ logadd( LOG_DEBUG1, "image_updateCachemap(true) with no cache map: %s", image->path );
return;
}
// Recreate a cache map, set it to all 1 initially as we assume the image was complete
- const int byteSize = IMGSIZE_TO_MAPBYTES( image->virtualFilesize );
- image->cache_map = malloc( byteSize );
- memset( image->cache_map, 0xff, byteSize );
- }
- while ( pos < end ) {
- const size_t map_y = (int)( pos >> 15 );
- const int map_x = (int)( (pos >> 12) & 7 ); // mod 8
- const int bit_mask = 1 << map_x;
- if ( set ) {
- if ( (image->cache_map[map_y] & bit_mask) == 0 ) setNewBlocks = true;
- image->cache_map[map_y] |= (uint8_t)bit_mask;
- } else {
- image->cache_map[map_y] &= (uint8_t)~bit_mask;
+ allocCacheMap( image, true );
+ cache = ref_get_cachemap( image );
+ if ( cache == NULL ) {
+ logadd( LOG_WARNING, "WHAT!!!?!?!= No cache map right after alloc?! %s", image->path );
+ return;
}
- pos += DNBD3_BLOCK_SIZE;
}
+ // Set/unset
+ const uint64_t firstByteInMap = start >> 15;
+ const uint64_t lastByteInMap = (end - 1) >> 15;
+ uint64_t pos;
+ // First byte
+ uint8_t fb = 0, lb = 0;
+ for ( pos = start; firstByteInMap == (pos >> 15) && pos < end; pos += DNBD3_BLOCK_SIZE ) {
+ const int map_x = (pos >> 12) & 7; // mod 8
+ const uint8_t bit_mask = (uint8_t)( 1 << map_x );
+ fb |= bit_mask;
+ }
+ // Last byte
+ if ( lastByteInMap != firstByteInMap ) {
+ for ( pos = lastByteInMap << 15; pos < end; pos += DNBD3_BLOCK_SIZE ) {
+ assert( lastByteInMap == (pos >> 15) );
+ const int map_x = (pos >> 12) & 7; // mod 8
+ const uint8_t bit_mask = (uint8_t)( 1 << map_x );
+ lb |= bit_mask;
+ }
+ }
+ atomic_thread_fence( memory_order_acquire );
+ if ( set ) {
+ uint8_t fo = atomic_fetch_or_explicit( &cache->map[firstByteInMap], fb, memory_order_relaxed );
+ uint8_t lo = atomic_fetch_or_explicit( &cache->map[lastByteInMap], lb, memory_order_relaxed );
+ setNewBlocks = ( fo != cache->map[firstByteInMap] || lo != cache->map[lastByteInMap] );
+ } else {
+ atomic_fetch_and_explicit( &cache->map[firstByteInMap], (uint8_t)~fb, memory_order_relaxed );
+ atomic_fetch_and_explicit( &cache->map[lastByteInMap], (uint8_t)~lb, memory_order_relaxed );
+ }
+ const uint8_t nval = set ? 0xff : 0;
+ // Everything in between
+ for ( pos = firstByteInMap + 1; pos < lastByteInMap; ++pos ) {
+ if ( atomic_exchange_explicit( &cache->map[pos], nval, memory_order_relaxed ) != nval && set ) {
+ setNewBlocks = true;
+ }
+ }
+ atomic_thread_fence( memory_order_release );
if ( setNewBlocks && image->crc32 != NULL ) {
// If setNewBlocks is set, at least one of the blocks was not cached before, so queue all hash blocks
// for checking, even though this might lead to checking some hash block again, if it was
@@ -122,19 +158,14 @@ void image_updateCachemap(dnbd3_image_t *image, uint64_t start, uint64_t end, co
// First set start and end to borders of hash blocks
start &= ~(uint64_t)(HASH_BLOCK_SIZE - 1);
end = (end + HASH_BLOCK_SIZE - 1) & ~(uint64_t)(HASH_BLOCK_SIZE - 1);
- pos = start;
- while ( pos < end ) {
- if ( image->cache_map == NULL ) break;
+ for ( pos = start; pos < end; pos += HASH_BLOCK_SIZE ) {
const int block = (int)( pos / HASH_BLOCK_SIZE );
- if ( image_isHashBlockComplete( image->cache_map, block, image->realFilesize ) ) {
- mutex_unlock( &image->lock );
- integrity_check( image, block );
- mutex_lock( &image->lock );
+ if ( image_isHashBlockComplete( cache->map, block, image->realFilesize ) ) {
+ integrity_check( image, block, false );
}
- pos += HASH_BLOCK_SIZE;
}
}
- mutex_unlock( &image->lock );
+ ref_put( &cache->reference );
}
/**
@@ -146,20 +177,18 @@ void image_updateCachemap(dnbd3_image_t *image, uint64_t start, uint64_t end, co
bool image_isComplete(dnbd3_image_t *image)
{
assert( image != NULL );
- mutex_lock( &image->lock );
if ( image->virtualFilesize == 0 ) {
- mutex_unlock( &image->lock );
return false;
}
- if ( image->cache_map == NULL ) {
- mutex_unlock( &image->lock );
+ dnbd3_cache_map_t *cache = ref_get_cachemap( image );
+ if ( cache == NULL ) {
return true;
}
bool complete = true;
int j;
const int map_len_bytes = IMGSIZE_TO_MAPBYTES( image->virtualFilesize );
for (j = 0; j < map_len_bytes - 1; ++j) {
- if ( image->cache_map[j] != 0xFF ) {
+ if ( cache->map[j] != 0xFF ) {
complete = false;
break;
}
@@ -174,18 +203,27 @@ bool image_isComplete(dnbd3_image_t *image)
for (j = 0; j < blocks_in_last_byte; ++j)
last_byte |= (uint8_t)(1 << j);
}
- complete = ((image->cache_map[map_len_bytes - 1] & last_byte) == last_byte);
+ complete = ((cache->map[map_len_bytes - 1] & last_byte) == last_byte);
}
- if ( !complete ) {
- mutex_unlock( &image->lock );
+ ref_put( &cache->reference );
+ if ( !complete )
return false;
+ mutex_lock( &image->lock );
+ // Lock and make sure current cache map is still the one we saw complete
+ dnbd3_cache_map_t *current = ref_get_cachemap( image );
+ if ( current == cache ) {
+ // Set cache map NULL as it's complete
+ ref_setref( &image->ref_cacheMap, NULL );
+ }
+ if ( current != NULL ) {
+ ref_put( &current->reference );
}
- char mapfile[PATHLEN] = "";
- free( image->cache_map );
- image->cache_map = NULL;
- snprintf( mapfile, PATHLEN, "%s.map", image->path );
mutex_unlock( &image->lock );
- unlink( mapfile );
+ if ( current == cache ) { // Successfully set cache map to NULL above
+ char mapfile[PATHLEN] = "";
+ snprintf( mapfile, PATHLEN, "%s.map", image->path );
+ unlink( mapfile );
+ }
return true;
}
@@ -203,7 +241,9 @@ bool image_ensureOpen(dnbd3_image_t *image)
{
if ( image->readFd != -1 ) return image;
int newFd = open( image->path, O_RDONLY );
- if ( newFd != -1 ) {
+ if ( newFd == -1 ) {
+ logadd( LOG_WARNING, "Cannot open %s for reading", image->path );
+ } else {
// Check size
const off_t flen = lseek( newFd, 0, SEEK_END );
if ( flen == -1 ) {
@@ -234,6 +274,22 @@ bool image_ensureOpen(dnbd3_image_t *image)
return image->readFd != -1;
}
+dnbd3_image_t* image_byId(int imgId)
+{
+ int i;
+ mutex_lock( &imageListLock );
+ for (i = 0; i < _num_images; ++i) {
+ dnbd3_image_t * const image = _images[i];
+ if ( image != NULL && image->id == imgId ) {
+ image->users++;
+ mutex_unlock( &imageListLock );
+ return image;
+ }
+ }
+ mutex_unlock( &imageListLock );
+ return NULL;
+}
+
/**
* Get an image by name+rid. This function increases a reference counter,
* so you HAVE TO CALL image_release for every image_get() call at some
@@ -267,14 +323,12 @@ dnbd3_image_t* image_get(char *name, uint16_t revision, bool checkIfWorking)
return NULL ;
}
- mutex_lock( &candidate->lock );
- mutex_unlock( &imageListLock );
candidate->users++;
- mutex_unlock( &candidate->lock );
+ mutex_unlock( &imageListLock );
// Found, see if it works
-// TODO: Also make sure a non-working image still has old fd open but created a new one and removed itself from the list
-// TODO: But remember size-changed images forever
+ // TODO: Also make sure a non-working image still has old fd open but created a new one and removed itself from the list
+ // TODO: But remember size-changed images forever
if ( candidate->working || checkIfWorking ) {
// Is marked working, but might not have an fd open
if ( !image_ensureOpen( candidate ) ) {
@@ -317,14 +371,14 @@ dnbd3_image_t* image_get(char *name, uint16_t revision, bool checkIfWorking)
logadd( LOG_WARNING, "lseek() on %s failed (errno=%d)%s.", candidate->path, errno, removingText );
reload = true;
} else if ( (uint64_t)len != candidate->realFilesize ) {
- logadd( LOG_DEBUG1, "Size of %s changed at runtime, keeping disabled! Expected: %" PRIu64 ", found: %" PRIu64
+ logadd( LOG_WARNING, "Size of %s changed at runtime, keeping disabled! Expected: %" PRIu64 ", found: %" PRIu64
". Try sending SIGHUP to server if you know what you're doing.",
candidate->path, candidate->realFilesize, (uint64_t)len );
} else {
// Seek worked, file size is same, now see if we can read from file
char buffer[100];
if ( pread( candidate->readFd, buffer, sizeof(buffer), 0 ) == -1 ) {
- logadd( LOG_DEBUG2, "Reading first %d bytes from %s failed (errno=%d)%s.",
+ logadd( LOG_WARNING, "Reading first %d bytes from %s failed (errno=%d)%s.",
(int)sizeof(buffer), candidate->path, errno, removingText );
reload = true;
} else if ( !candidate->working ) {
@@ -338,6 +392,7 @@ dnbd3_image_t* image_get(char *name, uint16_t revision, bool checkIfWorking)
// Could not access the image with exising fd - mark for reload which will re-open the file.
// make a copy of the image struct but keep the old one around. If/When it's not being used
// anymore, it will be freed automatically.
+ logadd( LOG_DEBUG1, "Reloading image file %s", candidate->path );
dnbd3_image_t *img = calloc( sizeof(dnbd3_image_t), 1 );
img->path = strdup( candidate->path );
img->name = strdup( candidate->name );
@@ -349,19 +404,18 @@ dnbd3_image_t* image_get(char *name, uint16_t revision, bool checkIfWorking)
img->rid = candidate->rid;
img->users = 1;
img->working = false;
- mutex_init( &img->lock );
+ img->ref_cacheMap = NULL;
+ mutex_init( &img->lock, LOCK_IMAGE );
if ( candidate->crc32 != NULL ) {
const size_t mb = IMGSIZE_TO_HASHBLOCKS( candidate->virtualFilesize ) * sizeof(uint32_t);
img->crc32 = malloc( mb );
memcpy( img->crc32, candidate->crc32, mb );
}
- mutex_lock( &candidate->lock );
- if ( candidate->cache_map != NULL ) {
- const size_t mb = IMGSIZE_TO_MAPBYTES( candidate->virtualFilesize );
- img->cache_map = malloc( mb );
- memcpy( img->cache_map, candidate->cache_map, mb );
+ dnbd3_cache_map_t *cache = ref_get_cachemap( candidate );
+ if ( cache != NULL ) {
+ ref_setref( &img->ref_cacheMap, &cache->reference );
+ ref_put( &cache->reference );
}
- mutex_unlock( &candidate->lock );
if ( image_addToList( img ) ) {
image_release( candidate );
candidate = img;
@@ -369,19 +423,16 @@ dnbd3_image_t* image_get(char *name, uint16_t revision, bool checkIfWorking)
img->users = 0;
image_free( img );
}
+ // Check if image is incomplete, initialize uplink
+ if ( candidate->ref_cacheMap != NULL ) {
+ uplink_init( candidate, -1, NULL, -1 );
+ }
// readFd == -1 and working == FALSE at this point,
// this function needs some splitting up for handling as we need to run most
// of the above code again. for now we know that the next call for this
// name:rid will get ne newly inserted "img" and try to re-open the file.
}
- // Check if image is incomplete, handle
- if ( candidate->cache_map != NULL ) {
- if ( candidate->uplink == NULL ) {
- uplink_init( candidate, -1, NULL, -1 );
- }
- }
-
return candidate; // We did all we can, hopefully it's working
}
@@ -391,17 +442,15 @@ dnbd3_image_t* image_get(char *name, uint16_t revision, bool checkIfWorking)
* Every call to image_lock() needs to be followed by a call to image_release() at some point.
* Locks on: imageListLock, _images[].lock
*/
-dnbd3_image_t* image_lock(dnbd3_image_t *image) // TODO: get rid, fix places that do image->users--
+dnbd3_image_t* image_lock(dnbd3_image_t *image)
{
if ( image == NULL ) return NULL ;
int i;
mutex_lock( &imageListLock );
for (i = 0; i < _num_images; ++i) {
if ( _images[i] == image ) {
- mutex_lock( &image->lock );
- mutex_unlock( &imageListLock );
image->users++;
- mutex_unlock( &image->lock );
+ mutex_unlock( &imageListLock );
return image;
}
}
@@ -419,12 +468,9 @@ dnbd3_image_t* image_release(dnbd3_image_t *image)
{
if ( image == NULL ) return NULL;
mutex_lock( &imageListLock );
- mutex_lock( &image->lock );
assert( image->users > 0 );
- image->users--;
- bool inUse = image->users != 0;
- mutex_unlock( &image->lock );
- if ( inUse ) { // Still in use, do nothing
+ // Decrement and check for 0
+ if ( --image->users != 0 ) { // Still in use, do nothing
mutex_unlock( &imageListLock );
return NULL;
}
@@ -439,7 +485,7 @@ dnbd3_image_t* image_release(dnbd3_image_t *image)
}
mutex_unlock( &imageListLock );
// So it wasn't in the images list anymore either, get rid of it
- if ( !inUse ) image = image_free( image );
+ image = image_free( image );
return NULL;
}
@@ -470,7 +516,6 @@ static dnbd3_image_t* image_remove(dnbd3_image_t *image)
{
bool mustFree = false;
mutex_lock( &imageListLock );
- mutex_lock( &image->lock );
for ( int i = _num_images - 1; i >= 0; --i ) {
if ( _images[i] == image ) {
_images[i] = NULL;
@@ -478,7 +523,6 @@ static dnbd3_image_t* image_remove(dnbd3_image_t *image)
}
if ( _images[i] == NULL && i + 1 == _num_images ) _num_images--;
}
- mutex_unlock( &image->lock );
mutex_unlock( &imageListLock );
if ( mustFree ) image = image_free( image );
return image;
@@ -493,17 +537,7 @@ void image_killUplinks()
mutex_lock( &imageListLock );
for (i = 0; i < _num_images; ++i) {
if ( _images[i] == NULL ) continue;
- mutex_lock( &_images[i]->lock );
- if ( _images[i]->uplink != NULL ) {
- mutex_lock( &_images[i]->uplink->queueLock );
- if ( !_images[i]->uplink->shutdown ) {
- thread_detach( _images[i]->uplink->thread );
- _images[i]->uplink->shutdown = true;
- }
- mutex_unlock( &_images[i]->uplink->queueLock );
- signal_call( _images[i]->uplink->signal );
- }
- mutex_unlock( &_images[i]->lock );
+ uplink_shutdown( _images[i] );
}
mutex_unlock( &imageListLock );
}
@@ -542,18 +576,14 @@ bool image_loadAll(char *path)
// Lock again, see if image is still there, free if required
mutex_lock( &imageListLock );
if ( ret || i >= _num_images || _images[i] == NULL || _images[i]->id != imgId ) continue;
- // Image needs to be removed
+ // File not readable but still in list -- needs to be removed
imgHandle = _images[i];
_images[i] = NULL;
if ( i + 1 == _num_images ) _num_images--;
- mutex_lock( &imgHandle->lock );
- const bool freeImg = ( imgHandle->users == 0 );
- mutex_unlock( &imgHandle->lock );
- // We unlocked, but the image has been removed from the list already, so
- // there's no way the users-counter can increase at this point.
- if ( freeImg ) {
+ if ( imgHandle->users == 0 ) {
// Image is not in use anymore, free the dangling entry immediately
- mutex_unlock( &imageListLock ); // image_free might do several fs operations; unlock
+ mutex_unlock( &imageListLock ); // image_free locks on this, and
+ // might do several fs operations; unlock
image_free( imgHandle );
mutex_lock( &imageListLock );
}
@@ -581,12 +611,10 @@ bool image_tryFreeAll()
{
mutex_lock( &imageListLock );
for (int i = _num_images - 1; i >= 0; --i) {
- if ( _images[i] != NULL && _images[i]->users == 0 ) { // XXX Data race...
+ if ( _images[i] != NULL && _images[i]->users == 0 ) {
dnbd3_image_t *image = _images[i];
_images[i] = NULL;
- mutex_unlock( &imageListLock );
image = image_free( image );
- mutex_lock( &imageListLock );
}
if ( i + 1 == _num_images && _images[i] == NULL ) _num_images--;
}
@@ -596,35 +624,34 @@ bool image_tryFreeAll()
/**
* Free image. DOES NOT check if it's in use.
- * Indirectly locks on imageListLock, image.lock, uplink.queueLock
+ * (Indirectly) locks on image.lock, uplink.queueLock
*/
static dnbd3_image_t* image_free(dnbd3_image_t *image)
{
assert( image != NULL );
- if ( !_shutdown ) {
- logadd( LOG_INFO, "Freeing image %s:%d", image->name, (int)image->rid );
- }
- //
- uplink_shutdown( image );
+ assert( image->users == 0 );
+ logadd( ( _shutdown ? LOG_DEBUG1 : LOG_INFO ), "Freeing image %s:%d", image->name, (int)image->rid );
+ // uplink_shutdown might return false to tell us
+ // that the shutdown is in progress. Bail out since
+ // this will get called again when the uplink is done.
+ if ( !uplink_shutdown( image ) )
+ return NULL;
mutex_lock( &image->lock );
- free( image->cache_map );
+ ref_setref( &image->ref_cacheMap, NULL );
free( image->crc32 );
free( image->path );
free( image->name );
- image->cache_map = NULL;
image->crc32 = NULL;
image->path = NULL;
image->name = NULL;
mutex_unlock( &image->lock );
if ( image->readFd != -1 ) close( image->readFd );
mutex_destroy( &image->lock );
- //
- memset( image, 0, sizeof(*image) );
free( image );
return NULL ;
}
-bool image_isHashBlockComplete(const uint8_t * const cacheMap, const uint64_t block, const uint64_t realFilesize)
+bool image_isHashBlockComplete(atomic_uint_least8_t * const cacheMap, const uint64_t block, const uint64_t realFilesize)
{
if ( cacheMap == NULL ) return true;
const uint64_t end = (block + 1) * HASH_BLOCK_SIZE;
@@ -731,7 +758,7 @@ static bool image_load(char *base, char *path, int withUplink)
{
int revision = -1;
struct stat st;
- uint8_t *cache_map = NULL;
+ dnbd3_cache_map_t *cache = NULL;
uint32_t *crc32list = NULL;
dnbd3_image_t *existing = NULL;
int fdImage = -1;
@@ -814,24 +841,15 @@ static bool image_load(char *base, char *path, int withUplink)
}
// 1. Allocate memory for the cache map if the image is incomplete
- cache_map = image_loadCacheMap( path, virtualFilesize );
+ cache = image_loadCacheMap( path, virtualFilesize );
// XXX: Maybe try sha-256 or 512 first if you're paranoid (to be implemented)
// 2. Load CRC-32 list of image
- bool doFullCheck = false;
uint32_t masterCrc = 0;
const int hashBlockCount = IMGSIZE_TO_HASHBLOCKS( virtualFilesize );
crc32list = image_loadCrcList( path, virtualFilesize, &masterCrc );
- // Check CRC32
- if ( crc32list != NULL ) {
- if ( !image_checkRandomBlocks( 4, fdImage, realFilesize, crc32list, cache_map ) ) {
- logadd( LOG_ERROR, "quick crc32 check of %s failed. Data corruption?", path );
- doFullCheck = true;
- }
- }
-
// Compare data just loaded to identical image we apparently already loaded
if ( existing != NULL ) {
if ( existing->realFilesize != realFilesize ) {
@@ -850,7 +868,7 @@ static bool image_load(char *base, char *path, int withUplink)
crc32list = NULL;
function_return = true;
goto load_error; // Keep existing
- } else if ( existing->cache_map != NULL && cache_map == NULL ) {
+ } else if ( existing->ref_cacheMap != NULL && cache == NULL ) {
// Just ignore that fact, if replication is really complete the cache map will be removed anyways
logadd( LOG_INFO, "Image '%s:%d' has no cache map on disk!", existing->name, (int)existing->rid );
function_return = true;
@@ -870,19 +888,20 @@ static bool image_load(char *base, char *path, int withUplink)
dnbd3_image_t *image = calloc( 1, sizeof(dnbd3_image_t) );
image->path = strdup( path );
image->name = strdup( imgName );
- image->cache_map = cache_map;
+ image->ref_cacheMap = NULL;
+ ref_setref( &image->ref_cacheMap, &cache->reference );
image->crc32 = crc32list;
image->masterCrc32 = masterCrc;
- image->uplink = NULL;
+ image->uplinkref = NULL;
image->realFilesize = realFilesize;
image->virtualFilesize = virtualFilesize;
image->rid = (uint16_t)revision;
image->users = 0;
image->readFd = -1;
- image->working = (image->cache_map == NULL );
+ image->working = ( cache == NULL );
timing_get( &image->nextCompletenessEstimate );
image->completenessEstimate = -1;
- mutex_init( &image->lock );
+ mutex_init( &image->lock, LOCK_IMAGE );
int32_t offset;
if ( stat( path, &st ) == 0 ) {
// Negatively offset atime by file modification time
@@ -894,16 +913,16 @@ static bool image_load(char *base, char *path, int withUplink)
timing_gets( &image->atime, offset );
// Prevent freeing in cleanup
- cache_map = NULL;
+ cache = NULL;
crc32list = NULL;
// Get rid of cache map if image is complete
- if ( image->cache_map != NULL ) {
+ if ( image->ref_cacheMap != NULL ) {
image_isComplete( image );
}
// Image is definitely incomplete, initialize uplink worker
- if ( image->cache_map != NULL ) {
+ if ( image->ref_cacheMap != NULL ) {
image->working = false;
if ( withUplink ) {
uplink_init( image, -1, NULL, -1 );
@@ -915,6 +934,8 @@ static bool image_load(char *base, char *path, int withUplink)
if ( image_addToList( image ) ) {
// Keep fd for reading
fdImage = -1;
+ // Check CRC32
+ image_checkRandomBlocks( image, 4 );
} else {
logadd( LOG_ERROR, "Image list full: Could not add image %s", path );
image->readFd = -1; // Keep fdImage instead, will be closed below
@@ -922,33 +943,28 @@ static bool image_load(char *base, char *path, int withUplink)
goto load_error;
}
logadd( LOG_DEBUG1, "Loaded image '%s:%d'\n", image->name, (int)image->rid );
- // CRC errors found...
- if ( doFullCheck ) {
- logadd( LOG_INFO, "Queueing full CRC32 check for '%s:%d'\n", image->name, (int)image->rid );
- integrity_check( image, -1 );
- }
-
function_return = true;
// Clean exit:
load_error: ;
if ( existing != NULL ) existing = image_release( existing );
if ( crc32list != NULL ) free( crc32list );
- if ( cache_map != NULL ) free( cache_map );
+ if ( cache != NULL ) free( cache );
if ( fdImage != -1 ) close( fdImage );
return function_return;
}
-static uint8_t* image_loadCacheMap(const char * const imagePath, const int64_t fileSize)
+static dnbd3_cache_map_t* image_loadCacheMap(const char * const imagePath, const int64_t fileSize)
{
- uint8_t *retval = NULL;
+ dnbd3_cache_map_t *retval = NULL;
char mapFile[strlen( imagePath ) + 10 + 1];
sprintf( mapFile, "%s.map", imagePath );
int fdMap = open( mapFile, O_RDONLY );
- if ( fdMap >= 0 ) {
+ if ( fdMap != -1 ) {
const int map_size = IMGSIZE_TO_MAPBYTES( fileSize );
- retval = calloc( 1, map_size );
- const ssize_t rd = read( fdMap, retval, map_size );
+ retval = calloc( 1, sizeof(*retval) + map_size );
+ ref_init( &retval->reference, cmfree, 0 );
+ const ssize_t rd = read( fdMap, retval->map, map_size );
if ( map_size != rd ) {
logadd( LOG_WARNING, "Could only read %d of expected %d bytes of cache map of '%s'", (int)rd, (int)map_size, imagePath );
// Could not read complete map, that means the rest of the image file will be considered incomplete
@@ -1009,18 +1025,26 @@ static uint32_t* image_loadCrcList(const char * const imagePath, const int64_t f
return retval;
}
-static bool image_checkRandomBlocks(const int count, int fdImage, const int64_t realFilesize, uint32_t * const crc32list, uint8_t * const cache_map)
+static void image_checkRandomBlocks(dnbd3_image_t *image, const int count)
{
+ if ( image->crc32 == NULL )
+ return;
// This checks the first block and (up to) count - 1 random blocks for corruption
// via the known crc32 list. This is very sloppy and is merely supposed to detect
// accidental corruption due to broken dnbd3-proxy functionality or file system
- // corruption.
+ // corruption, or people replacing/updating images which is a very stupid thing.
assert( count > 0 );
- const int hashBlocks = IMGSIZE_TO_HASHBLOCKS( realFilesize );
- int blocks[count + 1];
+ dnbd3_cache_map_t *cache = ref_get_cachemap( image );
+ const int hashBlocks = IMGSIZE_TO_HASHBLOCKS( image->virtualFilesize );
+ int blocks[count];
int index = 0, j;
int block;
- if ( image_isHashBlockComplete( cache_map, 0, realFilesize ) ) blocks[index++] = 0;
+ if ( image_isHashBlockComplete( cache->map, 0, image->virtualFilesize ) ) {
+ blocks[index++] = 0;
+ }
+ if ( hashBlocks > 1 && image_isHashBlockComplete( cache->map, hashBlocks - 1, image->virtualFilesize ) ) {
+ blocks[index++] = hashBlocks - 1;
+ }
int tries = count * 5; // Try only so many times to find a non-duplicate complete block
while ( index + 1 < count && --tries > 0 ) {
block = rand() % hashBlocks; // Random block
@@ -1028,11 +1052,15 @@ static bool image_checkRandomBlocks(const int count, int fdImage, const int64_t
if ( blocks[j] == block ) goto while_end;
}
// Block complete? If yes, add to list
- if ( image_isHashBlockComplete( cache_map, block, realFilesize ) ) blocks[index++] = block;
+ if ( image_isHashBlockComplete( cache->map, block, image->virtualFilesize ) ) {
+ blocks[index++] = block;
+ }
while_end: ;
}
- blocks[MIN(index, count)] = -1; // End of array has to be marked by a -1
- return image_checkBlocksCrc32( fdImage, crc32list, blocks, realFilesize ); // Return result of check
+ ref_put( &cache->reference );
+ for ( int i = 0; i < index; ++i ) {
+ integrity_check( image, blocks[i], true );
+ }
}
/**
@@ -1191,7 +1219,7 @@ static dnbd3_image_t *loadImageProxy(char * const name, const uint16_t revision,
dnbd3_host_t servers[REP_NUM_SRV];
int uplinkSock = -1;
dnbd3_host_t uplinkServer;
- const int count = altservers_getListForUplink( servers, REP_NUM_SRV, false );
+ const int count = altservers_getHostListForReplication( name, servers, REP_NUM_SRV );
uint16_t remoteProtocolVersion;
uint16_t remoteRid = revision;
uint64_t remoteImageSize;
@@ -1504,9 +1532,9 @@ json_t* image_getListAsJson()
json_t *imagesJson = json_array();
json_t *jsonImage;
int i;
- char uplinkName[100] = { 0 };
+ char uplinkName[100];
uint64_t bytesReceived;
- int users, completeness, idleTime;
+ int completeness, idleTime;
declare_now;
mutex_lock( &imageListLock );
@@ -1514,27 +1542,26 @@ json_t* image_getListAsJson()
if ( _images[i] == NULL ) continue;
dnbd3_image_t *image = _images[i];
mutex_lock( &image->lock );
- mutex_unlock( &imageListLock );
- users = image->users;
idleTime = (int)timing_diff( &image->atime, &now );
completeness = image_getCompletenessEstimate( image );
- if ( image->uplink == NULL ) {
+ mutex_unlock( &image->lock );
+ dnbd3_uplink_t *uplink = ref_get_uplink( &image->uplinkref );
+ if ( uplink == NULL ) {
bytesReceived = 0;
uplinkName[0] = '\0';
} else {
- bytesReceived = image->uplink->bytesReceived;
- if ( image->uplink->fd == -1 || !host_to_string( &image->uplink->currentServer, uplinkName, sizeof(uplinkName) ) ) {
+ bytesReceived = uplink->bytesReceived;
+ if ( !uplink_getHostString( uplink, uplinkName, sizeof(uplinkName) ) ) {
uplinkName[0] = '\0';
}
+ ref_put( &uplink->reference );
}
- image->users++; // Prevent freeing after we unlock
- mutex_unlock( &image->lock );
jsonImage = json_pack( "{sisssisisisisI}",
"id", image->id, // id, name, rid never change, so access them without locking
"name", image->name,
"rid", (int) image->rid,
- "users", users,
+ "users", image->users,
"complete", completeness,
"idle", idleTime,
"size", (json_int_t)image->virtualFilesize );
@@ -1546,8 +1573,6 @@ json_t* image_getListAsJson()
}
json_array_append_new( imagesJson, jsonImage );
- image = image_release( image ); // Since we did image->users++;
- mutex_lock( &imageListLock );
}
mutex_unlock( &imageListLock );
return imagesJson;
@@ -1556,30 +1581,37 @@ json_t* image_getListAsJson()
/**
* Get completeness of an image in percent. Only estimated, not exact.
* Returns: 0-100
- * DOES NOT LOCK, so make sure to do so before calling
*/
int image_getCompletenessEstimate(dnbd3_image_t * const image)
{
assert( image != NULL );
- if ( image->cache_map == NULL ) return image->working ? 100 : 0;
+ dnbd3_cache_map_t *cache = ref_get_cachemap( image );
+ if ( cache == NULL )
+ return image->working ? 100 : 0;
+ const int len = IMGSIZE_TO_MAPBYTES( image->virtualFilesize );
+ if ( unlikely( len == 0 ) ) {
+ ref_put( &cache->reference );
+ return 0;
+ }
declare_now;
if ( !timing_reached( &image->nextCompletenessEstimate, &now ) ) {
// Since this operation is relatively expensive, we cache the result for a while
+ ref_put( &cache->reference );
return image->completenessEstimate;
}
int i;
int percent = 0;
- const int len = IMGSIZE_TO_MAPBYTES( image->virtualFilesize );
- if ( len == 0 ) return 0;
for ( i = 0; i < len; ++i ) {
- if ( image->cache_map[i] == 0xff ) {
+ const uint8_t v = atomic_load_explicit( &cache->map[i], memory_order_relaxed );
+ if ( v == 0xff ) {
percent += 100;
- } else if ( image->cache_map[i] != 0 ) {
+ } else if ( v != 0 ) {
percent += 50;
}
}
+ ref_put( &cache->reference );
image->completenessEstimate = percent / len;
- timing_set( &image->nextCompletenessEstimate, &now, 8 + rand() % 32 );
+ timing_set( &image->nextCompletenessEstimate, &now, 4 + rand() % 16 );
return image->completenessEstimate;
}
@@ -1611,7 +1643,7 @@ bool image_checkBlocksCrc32(const int fd, uint32_t *crc32list, const int *blocks
static bool image_calcBlockCrc32(const int fd, const size_t block, const uint64_t realFilesize, uint32_t *crc)
{
// Make buffer 4k aligned in case fd has O_DIRECT set
-#define BSIZE 262144
+#define BSIZE (512*1024)
char rawBuffer[BSIZE + DNBD3_BLOCK_SIZE];
char * const buffer = (char*)( ( (uintptr_t)rawBuffer + ( DNBD3_BLOCK_SIZE - 1 ) ) & ~( DNBD3_BLOCK_SIZE - 1 ) );
// How many bytes to read from the input file
@@ -1669,7 +1701,7 @@ bool image_ensureDiskSpaceLocked(uint64_t size, bool force)
* TODO: Store last access time of images. Currently the
* last access time is reset to the file modification time
* on server restart. Thus it will
- * currently only delete images if server uptime is > 10 hours.
+ * currently only delete images if server uptime is > 24 hours.
* This can be overridden by setting force to true, in case
* free space is desperately needed.
* Return true iff enough space is available. false in random other cases
@@ -1679,48 +1711,55 @@ static bool image_ensureDiskSpace(uint64_t size, bool force)
for ( int maxtries = 0; maxtries < 20; ++maxtries ) {
uint64_t available;
if ( !file_freeDiskSpace( _basePath, NULL, &available ) ) {
- const int e = errno;
- logadd( LOG_WARNING, "Could not get free disk space (errno %d), will assume there is enough space left... ;-)\n", e );
+ logadd( LOG_WARNING, "Could not get free disk space (errno %d), will assume there is enough space left... ;-)\n", errno );
return true;
}
- if ( available > size ) return true;
- if ( !force && dnbd3_serverUptime() < 10 * 3600 ) {
- logadd( LOG_INFO, "Only %dMiB free, %dMiB requested, but server uptime < 10 hours...", (int)(available / (1024ll * 1024ll)),
- (int)(size / (1024 * 1024)) );
+ if ( available > size )
+ return true; // Yay
+ if ( !_isProxy || _autoFreeDiskSpaceDelay == -1 )
+ return false; // If not in proxy mode at all, or explicitly disabled, never delete anything
+ if ( !force && dnbd3_serverUptime() < (uint32_t)_autoFreeDiskSpaceDelay ) {
+ logadd( LOG_INFO, "Only %dMiB free, %dMiB requested, but server uptime < %d minutes...", (int)(available / (1024ll * 1024ll)),
+ (int)(size / (1024 * 1024)), _autoFreeDiskSpaceDelay / 60 );
return false;
}
logadd( LOG_INFO, "Only %dMiB free, %dMiB requested, freeing an image...", (int)(available / (1024ll * 1024ll)),
(int)(size / (1024 * 1024)) );
// Find least recently used image
dnbd3_image_t *oldest = NULL;
- int i; // XXX improve locking
+ int i;
+ mutex_lock( &imageListLock );
for (i = 0; i < _num_images; ++i) {
- if ( _images[i] == NULL ) continue;
- dnbd3_image_t *current = image_lock( _images[i] );
+ dnbd3_image_t *current = _images[i];
if ( current == NULL ) continue;
- if ( current->users == 1 ) { // Just from the lock above
+ if ( current->users == 0 ) { // Not in use :-)
if ( oldest == NULL || timing_1le2( &current->atime, &oldest->atime ) ) {
// Oldest access time so far
oldest = current;
}
}
- current = image_release( current );
+ }
+ if ( oldest != NULL ) {
+ oldest->users++;
+ }
+ mutex_unlock( &imageListLock );
+ if ( oldest == NULL ) {
+ logadd( LOG_INFO, "All images are currently in use :-(" );
+ return false;
}
declare_now;
- if ( oldest == NULL || ( !_sparseFiles && timing_diff( &oldest->atime, &now ) < 86400 ) ) {
- if ( oldest == NULL ) {
- logadd( LOG_INFO, "All images are currently in use :-(" );
- } else {
- logadd( LOG_INFO, "Won't free any image, all have been in use in the past 24 hours :-(" );
- }
+ if ( !_sparseFiles && timing_diff( &oldest->atime, &now ) < 86400 ) {
+ logadd( LOG_INFO, "Won't free any image, all have been in use in the past 24 hours :-(" );
+ image_release( oldest ); // We did users++ above; image might have to be freed entirely
return false;
}
- oldest = image_lock( oldest );
- if ( oldest == NULL ) continue; // Image freed in the meantime? Try again
logadd( LOG_INFO, "'%s:%d' has to go!", oldest->name, (int)oldest->rid );
- char *filename = strdup( oldest->path );
- oldest = image_remove( oldest );
- oldest = image_release( oldest );
+ char *filename = strdup( oldest->path ); // Copy name as we remove the image first
+ oldest = image_remove( oldest ); // Remove from list first...
+ oldest = image_release( oldest ); // Decrease users counter; if it falls to 0, image will be freed
+ // Technically the image might have been grabbed again, but chances for
+ // this should be close to zero anyways since the image went unused for more than 24 hours..
+ // Proper fix would be a "delete" flag in the image struct that will be checked in image_free
unlink( filename );
size_t len = strlen( filename ) + 10;
char buffer[len];
@@ -1735,62 +1774,52 @@ static bool image_ensureDiskSpace(uint64_t size, bool force)
return false;
}
-void image_closeUnusedFd()
+#define FDCOUNT (400)
+static void* closeUnusedFds(void* nix UNUSED)
{
- int fd, i;
+ if ( !_closeUnusedFd )
+ return NULL;
ticks deadline;
timing_gets( &deadline, -UNUSED_FD_TIMEOUT );
- char imgstr[300];
+ int fds[FDCOUNT];
+ int fdindex = 0;
mutex_lock( &imageListLock );
- for (i = 0; i < _num_images; ++i) {
+ for ( int i = 0; i < _num_images; ++i ) {
dnbd3_image_t * const image = _images[i];
- if ( image == NULL )
+ if ( image == NULL || image->readFd == -1 )
continue;
- mutex_lock( &image->lock );
- mutex_unlock( &imageListLock );
- if ( image->users == 0 && image->uplink == NULL && timing_reached( &image->atime, &deadline ) ) {
- snprintf( imgstr, sizeof(imgstr), "%s:%d", image->name, (int)image->rid );
- fd = image->readFd;
- image->readFd = -1;
- } else {
- fd = -1;
- }
- mutex_unlock( &image->lock );
- if ( fd != -1 ) {
- close( fd );
- logadd( LOG_DEBUG1, "Inactive fd closed for %s", imgstr );
+ // TODO: Also close for idle uplinks (uplink_connectionShouldShutdown)
+ // TODO: And close writeFd for idle uplinks....
+ if ( image->users == 0 && image->uplinkref == NULL && timing_reached( &image->atime, &deadline ) ) {
+ logadd( LOG_DEBUG1, "Inactive fd closed for %s:%d", image->name, (int)image->rid );
+ fds[fdindex++] = image->readFd;
+ image->readFd = -1; // Not a race; image->users is 0 and to increase it you need imageListLock
+ if ( fdindex == FDCOUNT )
+ break;
}
- mutex_lock( &imageListLock );
}
mutex_unlock( &imageListLock );
+ // Do this after unlock since close might block
+ for ( int i = 0; i < fdindex; ++i ) {
+ close( fds[i] );
+ }
+ return NULL;
+}
+
+static void allocCacheMap(dnbd3_image_t *image, bool complete)
+{
+ const uint8_t val = complete ? 0xff : 0;
+ const int byteSize = IMGSIZE_TO_MAPBYTES( image->virtualFilesize );
+ dnbd3_cache_map_t *cache = malloc( sizeof(*cache) + byteSize );
+ ref_init( &cache->reference, cmfree, 0 );
+ memset( cache->map, val, byteSize );
+ mutex_lock( &image->lock );
+ if ( image->ref_cacheMap != NULL ) {
+ logadd( LOG_WARNING, "BUG: allocCacheMap called but there already is a cache map for %s:%d", image->name, (int)image->rid );
+ free( cache );
+ } else {
+ ref_setref( &image->ref_cacheMap, &cache->reference );
+ }
+ mutex_unlock( &image->lock );
}
-/*
- void image_find_latest()
- {
- // Not in array or most recent rid is requested, try file system
- if (revision != 0) {
- // Easy case - specific RID
- char
- } else {
- // Determine base directory where the image in question has to reside.
- // Eg, the _basePath is "/srv/", requested image is "rz/ubuntu/default-13.04"
- // Then searchPath has to be set to "/srv/rz/ubuntu"
- char searchPath[strlen(_basePath) + len + 1];
- char *lastSlash = strrchr(name, '/');
- char *baseName; // Name of the image. In the example above, it will be "default-13.04"
- if ( lastSlash == NULL ) {
- *searchPath = '\0';
- baseName = name;
- } else {
- char *from = name, *to = searchPath;
- while (from < lastSlash) *to++ = *from++;
- *to = '\0';
- baseName = lastSlash + 1;
- }
- // Now we have the search path in our real file system and the expected image name.
- // The revision naming sceme is <IMAGENAME>.r<RID>, so if we're looking for revision 13,
- // our example image has to be named default-13.04.r13
- }
- }
- */