summaryrefslogtreecommitdiffstats
path: root/src/server/image.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/server/image.c')
-rw-r--r--src/server/image.c685
1 files changed, 490 insertions, 195 deletions
diff --git a/src/server/image.c b/src/server/image.c
index 16dae45..51fd5b6 100644
--- a/src/server/image.c
+++ b/src/server/image.c
@@ -5,9 +5,9 @@
#include "locks.h"
#include "integrity.h"
#include "altservers.h"
-#include "../shared/protocol.h"
-#include "../shared/timing.h"
-#include "../shared/crc32.h"
+#include <dnbd3/shared/protocol.h>
+#include <dnbd3/shared/timing.h>
+#include <dnbd3/shared/crc32.h>
#include "reference.h"
#include <assert.h>
@@ -46,16 +46,21 @@ static dnbd3_image_t* image_remove(dnbd3_image_t *image);
static dnbd3_image_t* image_free(dnbd3_image_t *image);
static bool image_load_all_internal(char *base, char *path);
static bool image_addToList(dnbd3_image_t *image);
-static bool image_load(char *base, char *path, int withUplink);
+static bool image_load(char *base, char *path, bool withUplink);
static bool image_clone(int sock, char *name, uint16_t revision, uint64_t imageSize);
static bool image_calcBlockCrc32(const int fd, const size_t block, const uint64_t realFilesize, uint32_t *crc);
static bool image_ensureDiskSpace(uint64_t size, bool force);
static dnbd3_cache_map_t* image_loadCacheMap(const char * const imagePath, const int64_t fileSize);
static uint32_t* image_loadCrcList(const char * const imagePath, const int64_t fileSize, uint32_t *masterCrc);
-static void image_checkRandomBlocks(dnbd3_image_t *image, const int count);
+static bool image_checkRandomBlocks(dnbd3_image_t *image, const int count, int fromFd);
static void* closeUnusedFds(void*);
+static bool isImageFromUpstream(dnbd3_image_t *image);
+static void* saveLoadAllCacheMaps(void*);
+static void saveCacheMap(dnbd3_image_t *image);
static void allocCacheMap(dnbd3_image_t *image, bool complete);
+static void saveMetaData(dnbd3_image_t *image, ticks *now, time_t walltime);
+static void loadImageMeta(dnbd3_image_t *image);
static void cmfree(ref *ref)
{
@@ -73,6 +78,7 @@ void image_serverStartup()
mutex_init( &remoteCloneLock, LOCK_REMOTE_CLONE );
mutex_init( &reloadLock, LOCK_RELOAD );
server_addJob( &closeUnusedFds, NULL, 10, 900 );
+ server_addJob( &saveLoadAllCacheMaps, NULL, 9, 20 );
}
/**
@@ -118,39 +124,35 @@ void image_updateCachemap(dnbd3_image_t *image, uint64_t start, uint64_t end, co
const uint64_t firstByteInMap = start >> 15;
const uint64_t lastByteInMap = (end - 1) >> 15;
uint64_t pos;
- // First byte
- uint8_t fb = 0, lb = 0;
- for ( pos = start; firstByteInMap == (pos >> 15) && pos < end; pos += DNBD3_BLOCK_SIZE ) {
- const int map_x = (pos >> 12) & 7; // mod 8
- const uint8_t bit_mask = (uint8_t)( 1 << map_x );
- fb |= bit_mask;
- }
- // Last byte
- if ( lastByteInMap != firstByteInMap ) {
- for ( pos = lastByteInMap << 15; pos < end; pos += DNBD3_BLOCK_SIZE ) {
- assert( lastByteInMap == (pos >> 15) );
- const int map_x = (pos >> 12) & 7; // mod 8
- const uint8_t bit_mask = (uint8_t)( 1 << map_x );
- lb |= bit_mask;
- }
- }
- atomic_thread_fence( memory_order_acquire );
- if ( set ) {
- uint8_t fo = atomic_fetch_or_explicit( &cache->map[firstByteInMap], fb, memory_order_relaxed );
- uint8_t lo = atomic_fetch_or_explicit( &cache->map[lastByteInMap], lb, memory_order_relaxed );
- setNewBlocks = ( fo != cache->map[firstByteInMap] || lo != cache->map[lastByteInMap] );
+ // First and last byte masks
+ const uint8_t fb = (uint8_t)(0xff << ((start >> 12) & 7));
+ const uint8_t lb = (uint8_t)(~(0xff << ((((end - 1) >> 12) & 7) + 1)));
+ if ( firstByteInMap == lastByteInMap ) {
+ if ( set ) {
+ uint8_t o = atomic_fetch_or( &cache->map[firstByteInMap], (uint8_t)(fb & lb) );
+ setNewBlocks = o != ( o | (fb & lb) );
+ } else {
+ atomic_fetch_and( &cache->map[firstByteInMap], (uint8_t)~(fb & lb) );
+ }
} else {
- atomic_fetch_and_explicit( &cache->map[firstByteInMap], (uint8_t)~fb, memory_order_relaxed );
- atomic_fetch_and_explicit( &cache->map[lastByteInMap], (uint8_t)~lb, memory_order_relaxed );
- }
- const uint8_t nval = set ? 0xff : 0;
- // Everything in between
- for ( pos = firstByteInMap + 1; pos < lastByteInMap; ++pos ) {
- if ( atomic_exchange_explicit( &cache->map[pos], nval, memory_order_relaxed ) != nval && set ) {
- setNewBlocks = true;
+ atomic_thread_fence( memory_order_acquire );
+ if ( set ) {
+ uint8_t fo = atomic_fetch_or_explicit( &cache->map[firstByteInMap], fb, memory_order_relaxed );
+ uint8_t lo = atomic_fetch_or_explicit( &cache->map[lastByteInMap], lb, memory_order_relaxed );
+ setNewBlocks = ( fo != ( fo | fb ) || lo != ( lo | lb ) );
+ } else {
+ atomic_fetch_and_explicit( &cache->map[firstByteInMap], (uint8_t)~fb, memory_order_relaxed );
+ atomic_fetch_and_explicit( &cache->map[lastByteInMap], (uint8_t)~lb, memory_order_relaxed );
+ }
+ // Everything in between
+ const uint8_t nval = set ? 0xff : 0;
+ for ( pos = firstByteInMap + 1; pos < lastByteInMap; ++pos ) {
+ if ( atomic_exchange_explicit( &cache->map[pos], nval, memory_order_relaxed ) != nval && set ) {
+ setNewBlocks = true;
+ }
}
+ atomic_thread_fence( memory_order_release );
}
- atomic_thread_fence( memory_order_release );
if ( setNewBlocks && image->crc32 != NULL ) {
// If setNewBlocks is set, at least one of the blocks was not cached before, so queue all hash blocks
// for checking, even though this might lead to checking some hash block again, if it was
@@ -164,6 +166,8 @@ void image_updateCachemap(dnbd3_image_t *image, uint64_t start, uint64_t end, co
integrity_check( image, block, false );
}
}
+ } else if ( !set ) {
+ cache->dirty = true;
}
ref_put( &cache->reference );
}
@@ -239,35 +243,74 @@ bool image_isComplete(dnbd3_image_t *image)
*/
bool image_ensureOpen(dnbd3_image_t *image)
{
- if ( image->readFd != -1 ) return image;
- int newFd = open( image->path, O_RDONLY );
+ bool sizeChanged = false;
+ if ( image->readFd != -1 && !image->problem.changed )
+ return true;
+ int newFd = image->readFd == -1 ? open( image->path, O_RDONLY ) : dup( image->readFd );
if ( newFd == -1 ) {
- logadd( LOG_WARNING, "Cannot open %s for reading", image->path );
+ if ( !image->problem.read ) {
+ logadd( LOG_WARNING, "[access] Cannot open '%s' for reading (errno=%d)", image->path, errno );
+ image->problem.read = true;
+ }
} else {
- // Check size
+ // Check size + read access
+ char buffer[100];
const off_t flen = lseek( newFd, 0, SEEK_END );
if ( flen == -1 ) {
- logadd( LOG_WARNING, "Could not seek to end of %s (errno %d)", image->path, errno );
+ if ( !image->problem.read ) {
+ logadd( LOG_WARNING, "Could not seek to end of %s (errno=%d)", image->path, errno );
+ image->problem.read = true;
+ }
close( newFd );
newFd = -1;
} else if ( (uint64_t)flen != image->realFilesize ) {
- logadd( LOG_WARNING, "Size of active image with closed fd changed from %" PRIu64 " to %" PRIu64, image->realFilesize, (uint64_t)flen );
+ if ( !image->problem.changed ) {
+ logadd( LOG_WARNING, "Size of active image with closed fd changed from %" PRIu64 " to %" PRIu64,
+ image->realFilesize, (uint64_t)flen );
+ }
+ sizeChanged = true;
+ } else if ( pread( newFd, buffer, sizeof(buffer), 0 ) == -1 ) {
+ if ( !image->problem.read ) {
+ logadd( LOG_WARNING, "Reading first %d bytes from %s failed (errno=%d)",
+ (int)sizeof(buffer), image->path, errno );
+ image->problem.read = true;
+ }
close( newFd );
newFd = -1;
}
}
if ( newFd == -1 ) {
- mutex_lock( &image->lock );
- image->working = false;
- mutex_unlock( &image->lock );
+ if ( sizeChanged ) {
+ image->problem.changed = true;
+ }
return false;
}
+
+ // Re-opened. Check if the "size/content changed" flag was set before and if so, check crc32,
+ // but only if the size we just got above is correct.
+ if ( image->problem.changed && !sizeChanged ) {
+ if ( image->crc32 == NULL ) {
+ // Cannot verify further, hope for the best
+ image->problem.changed = false;
+ logadd( LOG_DEBUG1, "Size of image %s:%d changed back to expected value", PIMG(image) );
+ } else if ( image_checkRandomBlocks( image, 1, newFd ) ) {
+ // This should have checked the first block (if complete) -> All is well again
+ image->problem.changed = false;
+ logadd( LOG_DEBUG1, "Size and CRC of image %s:%d changed back to expected value", PIMG(image) );
+ }
+ } else {
+ image->problem.changed = sizeChanged;
+ }
+
mutex_lock( &image->lock );
if ( image->readFd == -1 ) {
image->readFd = newFd;
+ image->problem.read = false;
mutex_unlock( &image->lock );
} else {
- // There was a race while opening the file (happens cause not locked cause blocking), we lost the race so close new fd and proceed
+ // There was a race while opening the file (happens cause not locked cause blocking),
+ // we lost the race so close new fd and proceed.
+ // *OR* we dup()'ed above for cheating when the image changed before.
mutex_unlock( &image->lock );
close( newFd );
}
@@ -296,10 +339,9 @@ dnbd3_image_t* image_byId(int imgId)
* point...
* Locks on: imageListLock, _images[].lock
*/
-dnbd3_image_t* image_get(char *name, uint16_t revision, bool checkIfWorking)
+dnbd3_image_t* image_get(const char *name, uint16_t revision, bool ensureFdOpen)
{
int i;
- const char *removingText = _removeMissingImages ? ", removing from list" : "";
dnbd3_image_t *candidate = NULL;
// Simple sanity check
const size_t slen = strlen( name );
@@ -326,84 +368,36 @@ dnbd3_image_t* image_get(char *name, uint16_t revision, bool checkIfWorking)
candidate->users++;
mutex_unlock( &imageListLock );
- // Found, see if it works
- // TODO: Also make sure a non-working image still has old fd open but created a new one and removed itself from the list
- // TODO: But remember size-changed images forever
- if ( candidate->working || checkIfWorking ) {
- // Is marked working, but might not have an fd open
- if ( !image_ensureOpen( candidate ) ) {
- mutex_lock( &candidate->lock );
- timing_get( &candidate->lastWorkCheck );
- mutex_unlock( &candidate->lock );
- if ( _removeMissingImages ) {
- candidate = image_remove( candidate ); // No release here, the image is still returned and should be released by caller
- }
- return candidate;
- }
- }
-
- if ( !checkIfWorking ) return candidate; // Not interested in re-cechking working state
-
- // ...not working...
-
- // Don't re-check too often
- mutex_lock( &candidate->lock );
- bool check;
- declare_now;
- check = timing_diff( &candidate->lastWorkCheck, &now ) > NONWORKING_RECHECK_INTERVAL_SECONDS;
- if ( check ) {
- candidate->lastWorkCheck = now;
- }
- mutex_unlock( &candidate->lock );
- if ( !check ) {
+ if ( !ensureFdOpen ) // Don't want to re-check
return candidate;
- }
- // reaching this point means:
- // 1) We should check if the image is working, it might or might not be in working state right now
- // 2) The image is open for reading (or at least was at some point, the fd might be stale if images lie on an NFS share etc.)
- // 3) We made sure not to re-check this image too often
-
- // Common for ro and rw images: Size check, read check
- const off_t len = lseek( candidate->readFd, 0, SEEK_END );
- bool reload = false;
- if ( len == -1 ) {
- logadd( LOG_WARNING, "lseek() on %s failed (errno=%d)%s.", candidate->path, errno, removingText );
- reload = true;
- } else if ( (uint64_t)len != candidate->realFilesize ) {
- logadd( LOG_WARNING, "Size of %s changed at runtime, keeping disabled! Expected: %" PRIu64 ", found: %" PRIu64
- ". Try sending SIGHUP to server if you know what you're doing.",
- candidate->path, candidate->realFilesize, (uint64_t)len );
- } else {
- // Seek worked, file size is same, now see if we can read from file
- char buffer[100];
- if ( pread( candidate->readFd, buffer, sizeof(buffer), 0 ) == -1 ) {
- logadd( LOG_WARNING, "Reading first %d bytes from %s failed (errno=%d)%s.",
- (int)sizeof(buffer), candidate->path, errno, removingText );
- reload = true;
- } else if ( !candidate->working ) {
- // Seems everything is fine again \o/
- candidate->working = true;
- logadd( LOG_INFO, "Changed state of %s:%d to 'working'", candidate->name, candidate->rid );
- }
- }
+ if ( image_ensureOpen( candidate ) && !candidate->problem.read )
+ return candidate; // We have a read fd and no read or changed problems
- if ( reload ) {
+ // -- image could not be opened again, or is open but has problem --
+
+ if ( _removeMissingImages && !file_isReadable( candidate->path ) ) {
+ candidate = image_remove( candidate );
+ // No image_release here, the image is still returned and should be released by caller
+ } else if ( candidate->readFd != -1 ) {
+ // We cannot just close the fd as it might be in use. Make a copy and remove old entry.
+ candidate = image_remove( candidate );
// Could not access the image with exising fd - mark for reload which will re-open the file.
// make a copy of the image struct but keep the old one around. If/When it's not being used
// anymore, it will be freed automatically.
- logadd( LOG_DEBUG1, "Reloading image file %s", candidate->path );
+ logadd( LOG_DEBUG1, "Reloading image file %s because of read problem/changed", candidate->path );
dnbd3_image_t *img = calloc( sizeof(dnbd3_image_t), 1 );
img->path = strdup( candidate->path );
img->name = strdup( candidate->name );
img->virtualFilesize = candidate->virtualFilesize;
img->realFilesize = candidate->realFilesize;
- img->atime = now;
+ timing_get( &img->atime );
img->masterCrc32 = candidate->masterCrc32;
img->readFd = -1;
img->rid = candidate->rid;
img->users = 1;
- img->working = false;
+ img->problem.read = true;
+ img->problem.changed = candidate->problem.changed;
img->ref_cacheMap = NULL;
mutex_init( &img->lock, LOCK_IMAGE );
if ( candidate->crc32 != NULL ) {
@@ -419,18 +413,17 @@ dnbd3_image_t* image_get(char *name, uint16_t revision, bool checkIfWorking)
if ( image_addToList( img ) ) {
image_release( candidate );
candidate = img;
+ // Check if image is incomplete, initialize uplink
+ if ( candidate->ref_cacheMap != NULL ) {
+ uplink_init( candidate, -1, NULL, -1 );
+ }
+ // Try again with new instance
+ image_ensureOpen( candidate );
} else {
img->users = 0;
image_free( img );
}
- // Check if image is incomplete, initialize uplink
- if ( candidate->ref_cacheMap != NULL ) {
- uplink_init( candidate, -1, NULL, -1 );
- }
- // readFd == -1 and working == FALSE at this point,
- // this function needs some splitting up for handling as we need to run most
- // of the above code again. for now we know that the next call for this
- // name:rid will get ne newly inserted "img" and try to re-open the file.
+ // readFd == -1 and problem.read == true
}
return candidate; // We did all we can, hopefully it's working
@@ -449,6 +442,7 @@ dnbd3_image_t* image_lock(dnbd3_image_t *image)
mutex_lock( &imageListLock );
for (i = 0; i < _num_images; ++i) {
if ( _images[i] == image ) {
+ assert( _images[i]->id == image->id );
image->users++;
mutex_unlock( &imageListLock );
return image;
@@ -479,6 +473,7 @@ dnbd3_image_t* image_release(dnbd3_image_t *image)
// responsible for freeing it
for (int i = 0; i < _num_images; ++i) {
if ( _images[i] == image ) { // Found, do nothing
+ assert( _images[i]->id == image->id );
mutex_unlock( &imageListLock );
return NULL;
}
@@ -518,6 +513,7 @@ static dnbd3_image_t* image_remove(dnbd3_image_t *image)
mutex_lock( &imageListLock );
for ( int i = _num_images - 1; i >= 0; --i ) {
if ( _images[i] == image ) {
+ assert( _images[i]->id == image->id );
_images[i] = NULL;
mustFree = ( image->users == 0 );
}
@@ -630,12 +626,18 @@ static dnbd3_image_t* image_free(dnbd3_image_t *image)
{
assert( image != NULL );
assert( image->users == 0 );
- logadd( ( _shutdown ? LOG_DEBUG1 : LOG_INFO ), "Freeing image %s:%d", image->name, (int)image->rid );
+ logadd( ( _shutdown ? LOG_DEBUG1 : LOG_INFO ), "Freeing image %s:%d", PIMG(image) );
// uplink_shutdown might return false to tell us
// that the shutdown is in progress. Bail out since
// this will get called again when the uplink is done.
if ( !uplink_shutdown( image ) )
return NULL;
+ if ( isImageFromUpstream( image ) ) {
+ saveMetaData( image, NULL, 0 );
+ if ( image->ref_cacheMap != NULL ) {
+ saveCacheMap( image );
+ }
+ }
mutex_lock( &image->lock );
ref_setref( &image->ref_cacheMap, NULL );
free( image->crc32 );
@@ -700,7 +702,8 @@ static bool image_load_all_internal(char *base, char *path)
while ( !_shutdown && (entryPtr = readdir( dir )) != NULL ) {
entry = *entryPtr;
- if ( strcmp( entry.d_name, "." ) == 0 || strcmp( entry.d_name, ".." ) == 0 ) continue;
+ if ( entry.d_name[0] == '.' )
+ continue; // No hidden files, no . or ..
if ( strlen( entry.d_name ) > SUBDIR_LEN ) {
logadd( LOG_WARNING, "Skipping entry %s: Too long (max %d bytes)", entry.d_name, (int)SUBDIR_LEN );
continue;
@@ -717,7 +720,7 @@ static bool image_load_all_internal(char *base, char *path)
if ( S_ISDIR( st.st_mode ) ) {
image_load_all_internal( base, subpath ); // Recurse
} else if ( !isForbiddenExtension( subpath ) ) {
- image_load( base, subpath, true ); // Load image if possible
+ image_load( base, subpath, false ); // Load image if possible
}
}
closedir( dir );
@@ -756,10 +759,9 @@ static bool image_addToList(dnbd3_image_t *image)
* Note that this is NOT THREAD SAFE so make sure its always
* called on one thread only.
*/
-static bool image_load(char *base, char *path, int withUplink)
+static bool image_load(char *base, char *path, bool withUplink)
{
int revision = -1;
- struct stat st;
dnbd3_cache_map_t *cache = NULL;
uint32_t *crc32list = NULL;
dnbd3_image_t *existing = NULL;
@@ -824,7 +826,9 @@ static bool image_load(char *base, char *path, int withUplink)
fdImage = open( path, O_RDONLY );
}
if ( fdImage == -1 ) {
- logadd( LOG_ERROR, "Could not open '%s' for reading...", path );
+ if ( errno != ENOENT ) {
+ logadd( LOG_ERROR, "[load] Cannot open '%s' for reading (errno=%d)", path, errno );
+ }
goto load_error;
}
// Determine file size
@@ -855,16 +859,16 @@ static bool image_load(char *base, char *path, int withUplink)
// Compare data just loaded to identical image we apparently already loaded
if ( existing != NULL ) {
if ( existing->realFilesize != realFilesize ) {
- logadd( LOG_WARNING, "Size of image '%s:%d' has changed.", existing->name, (int)existing->rid );
+ logadd( LOG_WARNING, "Size of image '%s:%d' has changed.", PIMG(existing) );
// Image will be replaced below
} else if ( existing->crc32 != NULL && crc32list != NULL
&& memcmp( existing->crc32, crc32list, sizeof(uint32_t) * hashBlockCount ) != 0 ) {
- logadd( LOG_WARNING, "CRC32 list of image '%s:%d' has changed.", existing->name, (int)existing->rid );
+ logadd( LOG_WARNING, "CRC32 list of image '%s:%d' has changed.", PIMG(existing) );
logadd( LOG_WARNING, "The image will be reloaded, but you should NOT replace existing images while the server is running." );
logadd( LOG_WARNING, "Actually even if it's not running this should never be done. Use a new RID instead!" );
// Image will be replaced below
} else if ( existing->crc32 == NULL && crc32list != NULL ) {
- logadd( LOG_INFO, "Found CRC-32 list for already loaded image '%s:%d', adding...", existing->name, (int)existing->rid );
+ logadd( LOG_INFO, "Found CRC-32 list for already loaded image '%s:%d', adding...", PIMG(existing) );
existing->crc32 = crc32list;
existing->masterCrc32 = masterCrc;
crc32list = NULL;
@@ -872,7 +876,7 @@ static bool image_load(char *base, char *path, int withUplink)
goto load_error; // Keep existing
} else if ( existing->ref_cacheMap != NULL && cache == NULL ) {
// Just ignore that fact, if replication is really complete the cache map will be removed anyways
- logadd( LOG_INFO, "Image '%s:%d' has no cache map on disk!", existing->name, (int)existing->rid );
+ logadd( LOG_INFO, "Image '%s:%d' has no cache map on disk!", PIMG(existing) );
function_return = true;
goto load_error; // Keep existing
} else {
@@ -900,19 +904,10 @@ static bool image_load(char *base, char *path, int withUplink)
image->rid = (uint16_t)revision;
image->users = 0;
image->readFd = -1;
- image->working = ( cache == NULL );
timing_get( &image->nextCompletenessEstimate );
image->completenessEstimate = -1;
mutex_init( &image->lock, LOCK_IMAGE );
- int32_t offset;
- if ( stat( path, &st ) == 0 ) {
- // Negatively offset atime by file modification time
- offset = (int32_t)( st.st_mtime - time( NULL ) );
- if ( offset > 0 ) offset = 0;
- } else {
- offset = 0;
- }
- timing_gets( &image->atime, offset );
+ loadImageMeta( image );
// Prevent freeing in cleanup
cache = NULL;
@@ -925,7 +920,7 @@ static bool image_load(char *base, char *path, int withUplink)
// Image is definitely incomplete, initialize uplink worker
if ( image->ref_cacheMap != NULL ) {
- image->working = false;
+ image->problem.uplink = true;
if ( withUplink ) {
uplink_init( image, -1, NULL, -1 );
}
@@ -937,14 +932,14 @@ static bool image_load(char *base, char *path, int withUplink)
// Keep fd for reading
fdImage = -1;
// Check CRC32
- image_checkRandomBlocks( image, 4 );
+ image_checkRandomBlocks( image, 4, -1 );
} else {
logadd( LOG_ERROR, "Image list full: Could not add image %s", path );
image->readFd = -1; // Keep fdImage instead, will be closed below
image = image_free( image );
goto load_error;
}
- logadd( LOG_DEBUG1, "Loaded image '%s:%d'\n", image->name, (int)image->rid );
+ logadd( LOG_DEBUG1, "Loaded image '%s:%d'\n", PIMG(image) );
function_return = true;
// Clean exit:
@@ -1027,10 +1022,19 @@ static uint32_t* image_loadCrcList(const char * const imagePath, const int64_t f
return retval;
}
-static void image_checkRandomBlocks(dnbd3_image_t *image, const int count)
+/**
+ * Check up to count random blocks from given image. If fromFd is -1, the check will
+ * be run asynchronously using the integrity checker. Otherwise, the check will
+ * happen in the function and return the result of the check.
+ * @param image image to check
+ * @param count number of blocks to check (max)
+ * @param fromFd, check synchronously and use this fd for reading, -1 = async
+ * @return true = OK, false = error. Meaningless if fromFd == -1
+ */
+static bool image_checkRandomBlocks(dnbd3_image_t *image, const int count, int fromFd)
{
if ( image->crc32 == NULL )
- return;
+ return true;
// This checks the first block and (up to) count - 1 random blocks for corruption
// via the known crc32 list. This is very sloppy and is merely supposed to detect
// accidental corruption due to broken dnbd3-proxy functionality or file system
@@ -1038,7 +1042,7 @@ static void image_checkRandomBlocks(dnbd3_image_t *image, const int count)
assert( count > 0 );
dnbd3_cache_map_t *cache = ref_get_cachemap( image );
const int hashBlocks = IMGSIZE_TO_HASHBLOCKS( image->virtualFilesize );
- int blocks[count];
+ int blocks[count+1]; // +1 for "-1" in sync case
int index = 0, j;
int block;
if ( image_isHashBlockComplete( cache, 0, image->virtualFilesize ) ) {
@@ -1062,9 +1066,16 @@ while_end: ;
if ( cache != NULL ) {
ref_put( &cache->reference );
}
- for ( int i = 0; i < index; ++i ) {
- integrity_check( image, blocks[i], true );
+ if ( fromFd == -1 ) {
+ // Async
+ for ( int i = 0; i < index; ++i ) {
+ integrity_check( image, blocks[i], true );
+ }
+ return true;
}
+ // Sync
+ blocks[index] = -1;
+ return image_checkBlocksCrc32( fromFd, image->crc32, blocks, image->realFilesize );
}
/**
@@ -1079,7 +1090,7 @@ bool image_create(char *image, int revision, uint64_t size)
logadd( LOG_ERROR, "revision id invalid: %d", revision );
return false;
}
- char path[PATHLEN], cache[PATHLEN];
+ char path[PATHLEN], cache[PATHLEN+4];
char *lastSlash = strrchr( image, '/' );
if ( lastSlash == NULL ) {
snprintf( path, PATHLEN, "%s/%s.r%d", _basePath, image, revision );
@@ -1090,7 +1101,7 @@ bool image_create(char *image, int revision, uint64_t size)
*lastSlash = '/';
snprintf( path, PATHLEN, "%s/%s.r%d", _basePath, image, revision );
}
- snprintf( cache, PATHLEN, "%s.map", path );
+ snprintf( cache, PATHLEN+4, "%s.map", path );
size = (size + DNBD3_BLOCK_SIZE - 1) & ~(uint64_t)(DNBD3_BLOCK_SIZE - 1);
const int mapsize = IMGSIZE_TO_MAPBYTES(size);
// Write files
@@ -1111,14 +1122,19 @@ bool image_create(char *image, int revision, uint64_t size)
logadd( LOG_DEBUG1, "Could not allocate %d bytes for %s (errno=%d)", mapsize, cache, err );
}
// Now write image
+ bool fallback = false;
if ( !_sparseFiles && !file_alloc( fdImage, 0, size ) ) {
logadd( LOG_ERROR, "Could not allocate %" PRIu64 " bytes for %s (errno=%d)", size, path, errno );
logadd( LOG_ERROR, "It is highly recommended to use a file system that supports preallocating disk"
" space without actually writing all zeroes to the block device." );
logadd( LOG_ERROR, "If you cannot fix this, try setting sparseFiles=true, but don't expect"
" divine performance during replication." );
- goto failure_cleanup;
- } else if ( _sparseFiles && !file_setSize( fdImage, size ) ) {
+ if ( !_ignoreAllocErrors ) {
+ goto failure_cleanup;
+ }
+ fallback = true;
+ }
+ if ( ( _sparseFiles || fallback ) && !file_setSize( fdImage, size ) ) {
logadd( LOG_ERROR, "Could not create sparse file of %" PRIu64 " bytes for %s (errno=%d)", size, path, errno );
logadd( LOG_ERROR, "Make sure you have enough disk space, check directory permissions, fs errors etc." );
goto failure_cleanup;
@@ -1162,14 +1178,18 @@ dnbd3_image_t* image_getOrLoad(char * const name, const uint16_t revision)
// Sanity check
if ( len == 0 || name[len - 1] == '/' || name[0] == '/'
|| name[0] == '.' || strstr( name, "/." ) != NULL ) return NULL;
- // If in proxy mode, check with upstream server first
+ // Re-check latest local revision
+ image = loadImageServer( name, revision );
+ // If in proxy mode, check with upstream servers
if ( _isProxy ) {
+ // Forget the locally loaded one
+ image_release( image );
+ // Check with upstream - if unsuccessful, will return the same
+ // as loadImageServer did
image = loadImageProxy( name, revision, len );
- if ( image != NULL )
- return image;
}
// Lookup on local storage
- return loadImageServer( name, revision );
+ return image;
}
/**
@@ -1227,19 +1247,20 @@ static dnbd3_image_t *loadImageProxy(char * const name, const uint16_t revision,
int uplinkSock = -1;
dnbd3_host_t uplinkServer;
const int count = altservers_getHostListForReplication( name, servers, REP_NUM_SRV );
- uint16_t remoteProtocolVersion;
uint16_t remoteRid = revision;
- uint64_t remoteImageSize;
+ uint16_t acceptedRemoteRid = 0;
+ uint16_t remoteProtocolVersion = 0;
struct sockaddr_storage sa;
socklen_t salen;
poll_list_t *cons = sock_newPollList();
logadd( LOG_DEBUG2, "Trying to clone %s:%d from %d hosts", name, (int)revision, count );
for (int i = 0; i < count + 5; ++i) { // "i < count + 5" for 5 additional iterations, waiting on pending connects
- char *remoteName;
+ char *remoteName = NULL;
+ uint64_t remoteImageSize = 0;
bool ok = false;
int sock;
if ( i >= count ) {
- sock = sock_multiConnect( cons, NULL, 100, 1000 );
+ sock = sock_multiConnect( cons, NULL, 100, _uplinkTimeout );
if ( sock == -2 ) break;
} else {
if ( log_hasMask( LOG_DEBUG2 ) ) {
@@ -1248,7 +1269,7 @@ static dnbd3_image_t *loadImageProxy(char * const name, const uint16_t revision,
host[len] = '\0';
logadd( LOG_DEBUG2, "Trying to replicate from %s", host );
}
- sock = sock_multiConnect( cons, &servers[i], 100, 1000 );
+ sock = sock_multiConnect( cons, &servers[i], 100, _uplinkTimeout );
}
if ( sock == -1 || sock == -2 ) continue;
salen = sizeof(sa);
@@ -1273,7 +1294,11 @@ static dnbd3_image_t *loadImageProxy(char * const name, const uint16_t revision,
} else {
ok = image_ensureDiskSpace( remoteImageSize + ( 10 * 1024 * 1024 ), false ); // some extra space for cache map etc.
}
- ok = ok && image_clone( sock, name, remoteRid, remoteImageSize ); // This sets up the file+map+crc and loads the img
+ if ( ok ) {
+ ok = image_clone( sock, name, remoteRid, remoteImageSize ); // This sets up the file+map+crc and loads the img
+ } else {
+ logadd( LOG_INFO, "Not enough space to replicate '%s:%d'", name, (int)revision );
+ }
mutex_unlock( &reloadLock );
if ( !ok ) goto server_fail;
@@ -1282,26 +1307,32 @@ static dnbd3_image_t *loadImageProxy(char * const name, const uint16_t revision,
if ( !sock_sockaddrToDnbd3( (struct sockaddr*)&sa, &uplinkServer ) ) {
uplinkServer.type = 0;
}
- break;
+ acceptedRemoteRid = remoteRid;
+ break; // TODO: Maybe we should try the remaining servers if rid == 0, in case there's an even newer one
server_fail: ;
close( sock );
}
sock_destroyPollList( cons );
- // If we still have a pointer to a local image, release the reference
- if ( image != NULL ) image_release( image );
+ // If we still have a pointer to a local image, compare rid
+ if ( image != NULL ) {
+ if ( ( revision == 0 && image->rid >= acceptedRemoteRid ) || ( image->rid == revision ) ) {
+ return image;
+ }
+ // release the reference
+ image_release( image );
+ }
// If everything worked out, this call should now actually return the image
- image = image_get( name, remoteRid, false );
+ image = image_get( name, acceptedRemoteRid, false );
if ( image != NULL && uplinkSock != -1 ) {
// If so, init the uplink and pass it the socket
- sock_setTimeout( uplinkSock, _uplinkTimeout );
if ( !uplink_init( image, uplinkSock, &uplinkServer, remoteProtocolVersion ) ) {
close( uplinkSock );
} else {
// Clumsy busy wait, but this should only take as long as it takes to start a thread, so is it really worth using a signalling mechanism?
int i = 0;
- while ( !image->working && ++i < 100 )
+ while ( image->problem.uplink && ++i < 100 )
usleep( 2000 );
}
} else if ( uplinkSock != -1 ) {
@@ -1318,6 +1349,7 @@ static dnbd3_image_t *loadImageServer(char * const name, const uint16_t requeste
{
char imageFile[PATHLEN] = "";
uint16_t detectedRid = 0;
+ bool isLegacyFile = false;
if ( requestedRid != 0 ) {
snprintf( imageFile, PATHLEN, "%s/%s.r%d", _basePath, name, (int)requestedRid );
@@ -1354,6 +1386,7 @@ static dnbd3_image_t *loadImageServer(char * const name, const uint16_t requeste
&& ( detectedRid == 0 || !file_isReadable( imageFile ) ) ) {
snprintf( imageFile, PATHLEN, "%s/%s", _basePath, name );
detectedRid = 1;
+ isLegacyFile = true;
}
logadd( LOG_DEBUG2, "Trying to load %s:%d ( -> %d) as %s", name, (int)requestedRid, (int)detectedRid, imageFile );
// No file was determined, or it doesn't seem to exist/be readable
@@ -1361,7 +1394,7 @@ static dnbd3_image_t *loadImageServer(char * const name, const uint16_t requeste
logadd( LOG_DEBUG2, "Not found, bailing out" );
return image_get( name, requestedRid, true );
}
- if ( !_vmdkLegacyMode && requestedRid == 0 ) {
+ if ( !isLegacyFile && requestedRid == 0 ) {
// rid 0 requested - check if detected rid is readable, decrease rid if not until we reach 0
while ( detectedRid != 0 ) {
dnbd3_image_t *image = image_get( name, detectedRid, true );
@@ -1429,9 +1462,13 @@ static bool image_clone(int sock, char *name, uint16_t revision, uint64_t imageS
logadd( LOG_WARNING, "OTF-Clone: Corrupted CRC-32 list. ignored. (%s)", name );
} else {
int fd = open( crcFile, O_WRONLY | O_CREAT, 0644 );
- write( fd, &masterCrc, sizeof(uint32_t) );
- write( fd, crc32list, crc32len );
+ ssize_t ret = write( fd, &masterCrc, sizeof(masterCrc) );
+ ret += write( fd, crc32list, crc32len );
close( fd );
+ if ( (size_t)ret != crc32len + sizeof(masterCrc) ) {
+ logadd( LOG_WARNING, "Could not save freshly received crc32 list for %s:%d", name, (int)revision );
+ unlink( crcFile );
+ }
}
}
free( crc32list );
@@ -1564,14 +1601,23 @@ json_t* image_getListAsJson()
ref_put( &uplink->reference );
}
- jsonImage = json_pack( "{sisssisisisisI}",
+ int problems = 0;
+#define addproblem(name,val) if (image->problem.name) problems |= (1 << val)
+ addproblem(read, 0);
+ addproblem(write, 1);
+ addproblem(changed, 2);
+ addproblem(uplink, 3);
+ addproblem(queue, 4);
+
+ jsonImage = json_pack( "{sisssisisisisIsi}",
"id", image->id, // id, name, rid never change, so access them without locking
"name", image->name,
"rid", (int) image->rid,
"users", image->users,
"complete", completeness,
"idle", idleTime,
- "size", (json_int_t)image->virtualFilesize );
+ "size", (json_int_t)image->virtualFilesize,
+ "problems", problems );
if ( bytesReceived != 0 ) {
json_object_set_new( jsonImage, "bytesReceived", json_integer( (json_int_t) bytesReceived ) );
}
@@ -1594,7 +1640,7 @@ int image_getCompletenessEstimate(dnbd3_image_t * const image)
assert( image != NULL );
dnbd3_cache_map_t *cache = ref_get_cachemap( image );
if ( cache == NULL )
- return image->working ? 100 : 0;
+ return 100;
const int len = IMGSIZE_TO_MAPBYTES( image->virtualFilesize );
if ( unlikely( len == 0 ) ) {
ref_put( &cache->reference );
@@ -1705,46 +1751,51 @@ bool image_ensureDiskSpaceLocked(uint64_t size, bool force)
/**
* Make sure at least size bytes are available in _basePath.
* Will delete old images to make room for new ones.
- * TODO: Store last access time of images. Currently the
- * last access time is reset to the file modification time
- * on server restart. Thus it will
- * currently only delete images if server uptime is > 24 hours.
+ * It will only delete images if a configurable uptime is
+ * reached.
* This can be overridden by setting force to true, in case
* free space is desperately needed.
* Return true iff enough space is available. false in random other cases
*/
static bool image_ensureDiskSpace(uint64_t size, bool force)
{
- for ( int maxtries = 0; maxtries < 20; ++maxtries ) {
+ for ( int maxtries = 0; maxtries < 50; ++maxtries ) {
uint64_t available;
if ( !file_freeDiskSpace( _basePath, NULL, &available ) ) {
- logadd( LOG_WARNING, "Could not get free disk space (errno %d), will assume there is enough space left... ;-)\n", errno );
+ logadd( LOG_WARNING, "Could not get free disk space (errno %d), will assume there is enough space left.", errno );
return true;
}
if ( available > size )
return true; // Yay
- if ( !_isProxy || _autoFreeDiskSpaceDelay == -1 )
+ if ( !_isProxy || _autoFreeDiskSpaceDelay == -1 ) {
+ logadd( LOG_INFO, "Only %dMiB free, %dMiB requested, but auto-freeing of disk space is disabled.",
+ (int)(available / (1024ll * 1024)),
+ (int)(size / (1024ll * 1024)) );
return false; // If not in proxy mode at all, or explicitly disabled, never delete anything
+ }
if ( !force && dnbd3_serverUptime() < (uint32_t)_autoFreeDiskSpaceDelay ) {
- logadd( LOG_INFO, "Only %dMiB free, %dMiB requested, but server uptime < %d minutes...", (int)(available / (1024ll * 1024ll)),
- (int)(size / (1024 * 1024)), _autoFreeDiskSpaceDelay / 60 );
+ logadd( LOG_INFO, "Only %dMiB free, %dMiB requested, but server uptime < %d minutes...",
+ (int)(available / (1024ll * 1024)),
+ (int)(size / (1024ll * 1024)), _autoFreeDiskSpaceDelay / 60 );
return false;
}
- logadd( LOG_INFO, "Only %dMiB free, %dMiB requested, freeing an image...", (int)(available / (1024ll * 1024ll)),
- (int)(size / (1024 * 1024)) );
+ logadd( LOG_INFO, "Only %dMiB free, %dMiB requested, freeing an image...",
+ (int)(available / (1024ll * 1024)),
+ (int)(size / (1024ll * 1024)) );
// Find least recently used image
dnbd3_image_t *oldest = NULL;
int i;
mutex_lock( &imageListLock );
for (i = 0; i < _num_images; ++i) {
dnbd3_image_t *current = _images[i];
- if ( current == NULL ) continue;
- if ( current->users == 0 ) { // Not in use :-)
- if ( oldest == NULL || timing_1le2( &current->atime, &oldest->atime ) ) {
- // Oldest access time so far
- oldest = current;
- }
- }
+ if ( current == NULL || current->users != 0 )
+ continue; // Empty slot or in use
+ if ( oldest != NULL && timing_1le2( &oldest->atime, &current->atime ) )
+ continue; // Already got a newer one
+ if ( !isImageFromUpstream( current ) )
+ continue; // Not replicated, don't touch
+ // Oldest access time so far
+ oldest = current;
}
if ( oldest != NULL ) {
oldest->users++;
@@ -1760,7 +1811,7 @@ static bool image_ensureDiskSpace(uint64_t size, bool force)
image_release( oldest ); // We did users++ above; image might have to be freed entirely
return false;
}
- logadd( LOG_INFO, "'%s:%d' has to go!", oldest->name, (int)oldest->rid );
+ logadd( LOG_INFO, "'%s:%d' has to go!", PIMG(oldest) );
char *filename = strdup( oldest->path ); // Copy name as we remove the image first
oldest = image_remove( oldest ); // Remove from list first...
oldest = image_release( oldest ); // Decrease users counter; if it falls to 0, image will be freed
@@ -1790,15 +1841,14 @@ static void* closeUnusedFds(void* nix UNUSED)
timing_gets( &deadline, -UNUSED_FD_TIMEOUT );
int fds[FDCOUNT];
int fdindex = 0;
+ setThreadName( "unused-fd-close" );
mutex_lock( &imageListLock );
for ( int i = 0; i < _num_images; ++i ) {
dnbd3_image_t * const image = _images[i];
if ( image == NULL || image->readFd == -1 )
continue;
- // TODO: Also close for idle uplinks (uplink_connectionShouldShutdown)
- // TODO: And close writeFd for idle uplinks....
if ( image->users == 0 && image->uplinkref == NULL && timing_reached( &image->atime, &deadline ) ) {
- logadd( LOG_DEBUG1, "Inactive fd closed for %s:%d", image->name, (int)image->rid );
+ logadd( LOG_DEBUG1, "Inactive fd closed for %s:%d", PIMG(image) );
fds[fdindex++] = image->readFd;
image->readFd = -1; // Not a race; image->users is 0 and to increase it you need imageListLock
if ( fdindex == FDCOUNT )
@@ -1813,6 +1863,177 @@ static void* closeUnusedFds(void* nix UNUSED)
return NULL;
}
+static bool isImageFromUpstream(dnbd3_image_t *image)
+{
+ if ( !_isProxy )
+ return false; // Nothing to do
+ // Check if we're a "hybrid proxy", i.e. there are only some namespaces (directories)
+ // for which we have any upstream servers configured. If there's none, don't touch
+ // the cache map on disk.
+ if ( !altservers_imageHasAltServers( image->name ) )
+ return false; // Nothing to do
+ return true;
+}
+
+static void* saveLoadAllCacheMaps(void* nix UNUSED)
+{
+ static ticks nextSave;
+ declare_now;
+ bool full = timing_reached( &nextSave, &now );
+ time_t walltime = 0;
+ setThreadName( "cache-mapper" );
+ if ( full ) {
+ walltime = time( NULL );
+ // Update at start to avoid concurrent runs
+ timing_addSeconds( &nextSave, &now, CACHE_MAP_MAX_SAVE_DELAY );
+ }
+ mutex_lock( &imageListLock );
+ for ( int i = 0; i < _num_images; ++i ) {
+ dnbd3_image_t * const image = _images[i];
+ if ( image == NULL )
+ continue;
+ image->users++;
+ mutex_unlock( &imageListLock );
+ const bool fromUpstream = isImageFromUpstream( image );
+ dnbd3_cache_map_t *cache = ref_get_cachemap( image );
+ if ( cache != NULL ) {
+ if ( fromUpstream ) {
+ // Replicated image, we're responsible for updating the map, so save it
+ // Save if dirty bit is set, blocks were invalidated
+ bool save = cache->dirty;
+ dnbd3_uplink_t *uplink = ref_get_uplink( &image->uplinkref );
+ if ( !save ) {
+ // Otherwise, consider longer timeout and byte count limits of uplink
+ if ( uplink != NULL ) {
+ assert( uplink->bytesReceivedLastSave <= uplink->bytesReceived );
+ uint64_t diff = uplink->bytesReceived - uplink->bytesReceivedLastSave;
+ if ( diff > CACHE_MAP_MAX_UNSAVED_BYTES || ( full && diff != 0 ) ) {
+ save = true;
+ }
+ }
+ }
+ if ( save ) {
+ cache->dirty = false;
+ if ( uplink != NULL ) {
+ uplink->bytesReceivedLastSave = uplink->bytesReceived;
+ }
+ saveCacheMap( image );
+ }
+ if ( uplink != NULL ) {
+ ref_put( &uplink->reference );
+ }
+ } else {
+ // We're not replicating this image, if there's a cache map, reload
+ // it periodically, since we might read from a shared storage that
+ // another server instance is writing to.
+ if ( full || ( !cache->unchanged && !image->problem.read ) ) {
+ logadd( LOG_DEBUG2, "Reloading cache map of %s:%d", PIMG(image) );
+ dnbd3_cache_map_t *onDisk = image_loadCacheMap(image->path, image->virtualFilesize);
+ if ( onDisk == NULL ) {
+ // Should be complete now
+ logadd( LOG_DEBUG1, "External replication of %s:%d complete", PIMG(image) );
+ ref_setref( &image->ref_cacheMap, NULL );
+ } else {
+ const int mapSize = IMGSIZE_TO_MAPBYTES( image->virtualFilesize );
+ if ( memcmp( cache->map, onDisk->map, mapSize ) == 0 ) {
+ // Unchanged
+ cache->unchanged = true;
+ onDisk->reference.free( &onDisk->reference );
+ } else {
+ // Replace
+ ref_setref( &image->ref_cacheMap, &onDisk->reference );
+ logadd( LOG_DEBUG2, "Map changed" );
+ }
+ }
+ }
+ } // end reload cache map
+ ref_put( &cache->reference );
+ } // end has cache map
+ if ( full && fromUpstream ) {
+ saveMetaData( image, &now, walltime );
+ }
+ image_release( image ); // Always do this instead of users-- to handle freeing
+ mutex_lock( &imageListLock );
+ }
+ mutex_unlock( &imageListLock );
+ return NULL;
+}
+
+/**
+ * Saves the cache map of the given image.
+ * Return false if this image doesn't have a cache map, or if the image
+ * doesn't have any uplink to replicate from. In this case the image might
+ * still have a cache map that was loaded from disk, and should be reloaded
+ * periodically.
+ * @param image the image
+ */
+static void saveCacheMap(dnbd3_image_t *image)
+{
+ dnbd3_cache_map_t *cache = ref_get_cachemap( image );
+ if ( cache == NULL )
+ return; // Race - wasn't NULL in function call above...
+
+ logadd( LOG_DEBUG2, "Saving cache map of %s:%d", PIMG(image) );
+ const size_t size = IMGSIZE_TO_MAPBYTES(image->virtualFilesize);
+ char mapfile[strlen( image->path ) + 4 + 1];
+ strcpy( mapfile, image->path );
+ strcat( mapfile, ".map" );
+
+ int fd = open( mapfile, O_WRONLY | O_CREAT, 0644 );
+ if ( fd == -1 ) {
+ const int err = errno;
+ ref_put( &cache->reference );
+ logadd( LOG_WARNING, "Could not open file to write cache map to disk (errno=%d) file %s", err, mapfile );
+ return;
+ }
+
+ // On Linux we could use readFd, but in general it's not guaranteed to work
+ int imgFd = open( image->path, O_WRONLY );
+ if ( imgFd == -1 ) {
+ logadd( LOG_WARNING, "Cannot open %s for fsync(): errno=%d", image->path, errno );
+ } else {
+ if ( fsync( imgFd ) == -1 ) {
+ logadd( LOG_ERROR, "fsync() on image file %s failed with errno %d. Resetting cache map.", image->path, errno );
+ dnbd3_cache_map_t *old = image_loadCacheMap(image->path, image->virtualFilesize);
+ const int mapSize = IMGSIZE_TO_MAPBYTES( image->virtualFilesize );
+ if ( old == NULL ) {
+ // Could not load old map. FS might be toast.
+ logadd( LOG_ERROR, "Cannot load old cache map. Setting all zero." );
+ memset( cache->map, 0, mapSize );
+ } else {
+ // AND the maps together to be safe
+ for ( int i = 0; i < mapSize; ++i ) {
+ cache->map[i] &= old->map[i];
+ }
+ old->reference.free( &old->reference );
+ }
+ }
+ close( imgFd );
+ }
+
+ // Write current map to file
+ size_t done = 0;
+ while ( done < size ) {
+ const ssize_t ret = write( fd, cache->map + done, size - done );
+ if ( ret == -1 ) {
+ if ( errno == EINTR ) continue;
+ logadd( LOG_WARNING, "Could not write cache map (errno=%d) file %s", errno, mapfile );
+ break;
+ }
+ if ( ret <= 0 ) {
+ logadd( LOG_WARNING, "Unexpected return value %d for write() to %s", (int)ret, mapfile );
+ break;
+ }
+ done += (size_t)ret;
+ }
+ ref_put( &cache->reference );
+ if ( fsync( fd ) == -1 ) {
+ logadd( LOG_WARNING, "fsync() on image map %s failed with errno %d", mapfile, errno );
+ }
+ close( fd );
+ // TODO fsync on parent directory
+}
+
static void allocCacheMap(dnbd3_image_t *image, bool complete)
{
const uint8_t val = complete ? 0xff : 0;
@@ -1822,7 +2043,7 @@ static void allocCacheMap(dnbd3_image_t *image, bool complete)
memset( cache->map, val, byteSize );
mutex_lock( &image->lock );
if ( image->ref_cacheMap != NULL ) {
- logadd( LOG_WARNING, "BUG: allocCacheMap called but there already is a cache map for %s:%d", image->name, (int)image->rid );
+ logadd( LOG_WARNING, "BUG: allocCacheMap called but there already is a map for %s:%d", PIMG(image) );
free( cache );
} else {
ref_setref( &image->ref_cacheMap, &cache->reference );
@@ -1830,3 +2051,77 @@ static void allocCacheMap(dnbd3_image_t *image, bool complete)
mutex_unlock( &image->lock );
}
+/**
+ * It's assumed you hold a reference to the image
+ */
+static void saveMetaData(dnbd3_image_t *image, ticks *now, time_t walltime)
+{
+ if ( !image->accessed )
+ return;
+ ticks tmp;
+ uint32_t diff;
+ char *fn;
+ if ( asprintf( &fn, "%s.meta", image->path ) == -1 ) {
+ logadd( LOG_WARNING, "Cannot asprintf meta" );
+ return;
+ }
+ if ( now == NULL ) {
+ timing_get( &tmp );
+ now = &tmp;
+ walltime = time( NULL );
+ }
+ mutex_lock( &image->lock );
+ image->accessed = false;
+ diff = timing_diff( &image->atime, now );
+ mutex_unlock( &image->lock );
+ FILE *f = fopen( fn, "w" );
+ if ( f == NULL ) {
+ logadd( LOG_WARNING, "Cannot open %s for writing", fn );
+ } else {
+ fprintf( f, "[main]\natime=%"PRIu64"\n", (uint64_t)( walltime - diff ) );
+ fclose( f );
+ }
+ free( fn );
+ // TODO: fsync() dir
+}
+
+static void loadImageMeta(dnbd3_image_t *image)
+{
+ int32_t offset = 1;
+ char *fn;
+ if ( asprintf( &fn, "%s.meta", image->path ) == -1 ) {
+ logadd( LOG_WARNING, "asprintf load" );
+ } else {
+ int fh = open( fn, O_RDONLY );
+ free( fn );
+ if ( fh != -1 ) {
+ char buf[200];
+ ssize_t ret = read( fh, buf, sizeof(buf)-1 );
+ close( fh );
+ if ( ret > 0 ) {
+ buf[ret] = '\0';
+ // Do it the cheap way until we actually store more stuff
+ char *pos = strstr( buf, "atime=" );
+ if ( pos != NULL ) {
+ offset = (int32_t)( atol( pos + 6 ) - time( NULL ) );
+ }
+ }
+ }
+ }
+ if ( offset == 1 ) {
+ // Nothing from .meta file, use old guesstimate
+ struct stat st;
+ if ( stat( image->path, &st ) == 0 ) {
+ // Negatively offset atime by file modification time
+ offset = (int32_t)( st.st_mtime - time( NULL ) );
+ } else {
+ offset = 0;
+ }
+ image->accessed = true;
+ }
+ if ( offset > 0 ) {
+ offset = 0;
+ }
+ timing_gets( &image->atime, offset );
+}
+