summaryrefslogtreecommitdiffstats
path: root/src/server/image.c
diff options
context:
space:
mode:
authorSimon Rettberg2015-12-15 17:45:44 +0100
committerSimon Rettberg2015-12-15 17:45:44 +0100
commit72104f2e83fa724f9667c876dca17a2c5ee9b2a2 (patch)
tree38837580c70b390f0bc35c15d2bc4d0865a9f3c4 /src/server/image.c
parent[SERVER] Make listen port configurable (diff)
downloaddnbd3-72104f2e83fa724f9667c876dca17a2c5ee9b2a2.tar.gz
dnbd3-72104f2e83fa724f9667c876dca17a2c5ee9b2a2.tar.xz
dnbd3-72104f2e83fa724f9667c876dca17a2c5ee9b2a2.zip
[SERVER] Remove non-working images from list, plus refactoring
Now that we can automatically load unknown images from disk on request, it makes sense to remove non-working images from the image list. On future requests, we will look for them on disk again, which is nice in case of temporary storage hickups. Also, some more ore less related locking has been refined (loading images, replicating images)
Diffstat (limited to 'src/server/image.c')
-rw-r--r--src/server/image.c226
1 files changed, 175 insertions, 51 deletions
diff --git a/src/server/image.c b/src/server/image.c
index fb3f8ba..d4df26d 100644
--- a/src/server/image.c
+++ b/src/server/image.c
@@ -26,6 +26,7 @@
#include <glob.h>
#define PATHLEN (2000)
+#define NONWORKING_RECHECK_INTERVAL_SECONDS (60)
// ##########################################
@@ -44,10 +45,10 @@ typedef struct
time_t deadline;
} imagecache;
static imagecache remoteCloneCache[CACHELEN];
-static int remoteCloneCacheIndex = 0;
// ##########################################
+static void image_remove(dnbd3_image_t *image);
static dnbd3_image_t* image_free(dnbd3_image_t *image);
static bool image_isHashBlockComplete(uint8_t * const cacheMap, const uint64_t block, const uint64_t fileSize);
static bool image_load_all_internal(char *base, char *path);
@@ -283,18 +284,65 @@ dnbd3_image_t* image_get(char *name, uint16_t revision, bool checkIfWorking)
candidate->users++;
spin_unlock( &candidate->lock );
- if ( !checkIfWorking ) return candidate;
+ if ( !checkIfWorking ) return candidate; // Found, but not interested in working state
// Found, see if it works
- if ( !candidate->working && candidate->cache_map != NULL && candidate->uplink == NULL && file_isWritable( candidate->path ) ) {
- // Not working and has file + cache-map, try to init uplink (uplink_init will check if proxy mode is enabled)
- uplink_init( candidate, -1, NULL );
- } else if ( candidate->working && candidate->uplink != NULL && candidate->uplink->queueLen > SERVER_UPLINK_QUEUELEN_THRES ) {
- // To many pending uplink requests. We take that as a hint that the uplink is clogged or no working uplink server
- // exists, so "working" is changed to false for now. Should a new uplink server be found the uplink thread will
- // set this back to true some time.
- candidate->working = false;
+
+ if ( candidate->working ) {
+ // Last known state was "working", see if that should change
+ if ( candidate->readFd == -1 ) {
+ candidate->working = false;
+ }
+ } else { // ...not working...
+ // Don't re-check too often
+ spin_lock( &candidate->lock );
+ bool check;
+ const time_t now = time( NULL );
+ check = ( now - candidate->lastWorkCheck ) > NONWORKING_RECHECK_INTERVAL_SECONDS;
+ if ( check ) {
+ candidate->lastWorkCheck = now;
+ }
+ spin_unlock( &candidate->lock );
+ if ( !check ) {
+ return candidate;
+ }
+ // Check if the local file exists, has the right size, and is readable (writable for incomplete image)
+ if ( candidate->cache_map != NULL ) {
+ // -- Incomplete - rw check
+ if ( candidate->cacheFd == -1 ) { // Make sure file is open for writing
+ candidate->cacheFd = open( candidate->path, O_RDWR );
+ // It might have failed - still offer proxy mode, we just can't cache
+ if ( candidate->cacheFd == -1 ) {
+ logadd( LOG_WARNING, "Cannot re-open %s for writing - replication disabled", candidate->path );
+ }
+ }
+ if ( candidate->uplink == NULL && candidate->cacheFd != -1 ) {
+ uplink_init( candidate, -1, NULL );
+ }
+ }
+ // Common for ro and rw images
+ const off_t len = lseek( candidate->readFd, 0, SEEK_END );
+ if ( len == -1 ) {
+ logadd( LOG_WARNING, "lseek() on %s failed (errno=%d), removing image", candidate->path, errno );
+ image_remove( candidate ); // No release here, the image is still returned and should be released by caller
+ } else if ( (uint64_t)len != candidate->realFilesize ) {
+ logadd( LOG_DEBUG1, "Size of %s changed at runtime, keeping disabled! Expected: %" PRIu64 ", found: %" PRIu64
+ ". Try sending SIGHUP to server if you know what you're doing.",
+ candidate->path, candidate->realFilesize, (uint64_t)len );
+ } else {
+ // Seek worked, file size is same, now see if we can read from file
+ char buffer[100];
+ if ( pread( candidate->readFd, buffer, sizeof(buffer), 0 ) == -1 ) {
+ logadd( LOG_DEBUG2, "Reading first %d bytes from %s failed (errno=%d), removing image",
+ (int)sizeof(buffer), candidate->path, errno );
+ image_remove( candidate );
+ } else {
+ // Seems everything is fine again \o/
+ candidate->working = true;
+ }
+ }
}
+
return candidate; // Success :-)
}
@@ -304,7 +352,7 @@ dnbd3_image_t* image_get(char *name, uint16_t revision, bool checkIfWorking)
* Every call to image_lock() needs to be followed by a call to image_release() at some point.
* Locks on: imageListLock, _images[].lock
*/
-dnbd3_image_t* image_lock(dnbd3_image_t *image)
+dnbd3_image_t* image_lock(dnbd3_image_t *image) // TODO: get rid, fix places that do image->users--
{
if ( image == NULL ) return NULL ;
int i;
@@ -361,7 +409,7 @@ dnbd3_image_t* image_release(dnbd3_image_t *image)
* no active users
* Locks on: imageListLock, image[].lock
*/
-void image_remove(dnbd3_image_t *image)
+static void image_remove(dnbd3_image_t *image)
{
bool wasInList = false;
spin_lock( &imageListLock );
@@ -399,13 +447,46 @@ void image_killUplinks()
/**
* Load all images in given path recursively.
* Pass NULL to use path from config.
- * NOT THREAD SAFE, make sure this is only running
- * on one thread at a time!
*/
bool image_loadAll(char *path)
{
bool ret;
+ char imgPath[PATHLEN];
+ int imgId;
+ dnbd3_image_t *imgHandle;
+
if ( path == NULL ) path = _basePath;
+ if ( _removeMissingImages ) {
+ // Check if all loaded images still exist on disk
+ logadd( LOG_DEBUG1, "Checking for vanished images" );
+ spin_lock( &imageListLock );
+ for (int i = _num_images - 1; i >= 0; --i) {
+ if ( _images[i] == NULL ) {
+ if ( i + 1 == _num_images ) _num_images--;
+ continue;
+ }
+ imgId = _images[i]->id;
+ snprintf( imgPath, PATHLEN, "%s", _images[i]->path );
+ spin_unlock( &imageListLock ); // isReadable hits the fs; unlock
+ // Check if fill can still be opened for reading
+ ret = file_isReadable( imgPath );
+ // Lock again, see if image is still there, free if required
+ spin_lock( &imageListLock );
+ if ( ret || i >= _num_images || _images[i] == NULL || _images[i]->id != imgId ) continue;
+ // Image needs to be removed
+ imgHandle = _images[i];
+ _images[i] = NULL;
+ if ( i + 1 == _num_images ) _num_images--;
+ if ( imgHandle->users != 0 ) continue; // Still in use, do not free (last releasing user will trigger)
+ // Image is not in use anymore, free the dangling entry immediately
+ spin_unlock( &imageListLock ); // image_free might do several fs operations; unlock
+ image_free( imgHandle );
+ spin_lock( &imageListLock );
+ }
+ spin_unlock( &imageListLock );
+ }
+ // Now scan for new images
+ logadd( LOG_DEBUG1, "Scanning for new or modified images" );
pthread_mutex_lock( &reloadLock );
ret = image_load_all_internal( path, path );
pthread_mutex_unlock( &reloadLock );
@@ -438,6 +519,9 @@ bool image_tryFreeAll()
static dnbd3_image_t* image_free(dnbd3_image_t *image)
{
assert( image != NULL );
+ if ( !_shutdown ) {
+ logadd( LOG_INFO, "Freeing image %s:%d", image->lower_name, (int)image->rid );
+ }
//
image_saveCacheMap( image );
uplink_shutdown( image );
@@ -511,7 +595,7 @@ static bool image_load_all_internal(char *base, char *path)
logadd( LOG_WARNING, "stat() for '%s' failed. Ignoring....", subpath );
continue;
}
- if ( S_ISDIR( st.st_mode )) {
+ if ( S_ISDIR( st.st_mode ) ) {
image_load_all_internal( base, subpath ); // Recurse
} else {
image_load( base, subpath, true ); // Load image if possible
@@ -531,7 +615,7 @@ static bool image_load_all_internal(char *base, char *path)
static bool image_load(char *base, char *path, int withUplink)
{
static int imgIdCounter = 0; // Used to assign unique numeric IDs to images
- int i, revision;
+ int i, revision = -1;
struct stat st;
uint8_t *cache_map = NULL;
uint32_t *crc32list = NULL;
@@ -563,7 +647,7 @@ static bool image_load(char *base, char *path, int withUplink)
// Easy - legacy mode, simply append full file name and set rid to 1
strcat( dst, fileName );
revision = 1;
- } else {
+ } else if ( !_vmdkLegacyMode ) {
// Try to parse *.r<ID> syntax
for (i = fileNameLen - 1; i > 1; --i) {
if ( fileName[i] < '0' || fileName[i] > '9' ) break;
@@ -578,7 +662,7 @@ static bool image_load(char *base, char *path, int withUplink)
}
*dst = '\0';
}
- if ( revision <= 0 ) {
+ if ( revision <= 0 || revision >= 65536 ) {
logadd( LOG_WARNING, "Image '%s' has invalid revision ID %d", path, revision );
goto load_error;
}
@@ -612,7 +696,7 @@ static bool image_load(char *base, char *path, int withUplink)
// 1. Allocate memory for the cache map if the image is incomplete
cache_map = image_loadCacheMap( path, virtualFilesize );
- // TODO: Maybe try sha-256 or 512 first if you're paranoid (to be implemented)
+ // XXX: Maybe try sha-256 or 512 first if you're paranoid (to be implemented)
// 2. Load CRC-32 list of image
uint32_t masterCrc;
@@ -630,34 +714,34 @@ static bool image_load(char *base, char *path, int withUplink)
// Compare data just loaded to identical image we apparently already loaded
if ( existing != NULL ) {
if ( existing->realFilesize != realFilesize ) {
- // Image will be replaced below
logadd( LOG_WARNING, "Size of image '%s:%d' has changed.", existing->lower_name, (int)existing->rid );
+ // Image will be replaced below
} else if ( existing->crc32 != NULL && crc32list != NULL
&& memcmp( existing->crc32, crc32list, sizeof(uint32_t) * hashBlockCount ) != 0 ) {
- // Image will be replaced below
logadd( LOG_WARNING, "CRC32 list of image '%s:%d' has changed.", existing->lower_name, (int)existing->rid );
logadd( LOG_WARNING, "The image will be reloaded, but you should NOT replace existing images while the server is running." );
logadd( LOG_WARNING, "Actually even if it's not running this should never be done. Use a new RID instead!" );
+ // Image will be replaced below
} else if ( existing->crc32 == NULL && crc32list != NULL ) {
logadd( LOG_INFO, "Found CRC-32 list for already loaded image '%s:%d', adding...", existing->lower_name, (int)existing->rid );
existing->crc32 = crc32list;
existing->masterCrc32 = masterCrc;
crc32list = NULL;
function_return = true;
- goto load_error;
+ goto load_error; // Keep existing
} else if ( existing->cache_map != NULL && cache_map == NULL ) {
// Just ignore that fact, if replication is really complete the cache map will be removed anyways
logadd( LOG_INFO, "Image '%s:%d' has no cache map on disk!", existing->lower_name, (int)existing->rid );
function_return = true;
- goto load_error;
+ goto load_error; // Keep existing
} else {
// Nothing changed about the existing image, so do nothing
function_return = true;
- goto load_error;
+ goto load_error; // Keep existing
}
- // Remove image from images array
- image_release( existing );
+ // Remove existing image from images array, so it will be replaced by the reloaded image
image_remove( existing );
+ image_release( existing );
existing = NULL;
}
@@ -714,7 +798,7 @@ static bool image_load(char *base, char *path, int withUplink)
spin_lock( &imageListLock );
// Now we're locked, assign unique ID to image (unique for this running server instance!)
image->id = ++imgIdCounter;
- for (i = 0; i < _num_images; ++i) {
+ for ( i = 0; i < _num_images; ++i ) {
if ( _images[i] != NULL ) continue;
_images[i] = image;
break;
@@ -727,7 +811,7 @@ static bool image_load(char *base, char *path, int withUplink)
goto load_error;
}
_images[_num_images++] = image;
- logadd( LOG_DEBUG1, "Loaded image '%s'\n", image->lower_name );
+ logadd( LOG_DEBUG1, "Loaded image '%s:%d'\n", image->lower_name, (int)image->rid );
}
// Keep fd for reading
image->readFd = fdImage;
@@ -888,7 +972,7 @@ bool image_create(char *image, int revision, uint64_t size)
close( fdCache );
return true;
//
- failure_cleanup: ;
+failure_cleanup: ;
if ( fdImage >= 0 ) close( fdImage );
if ( fdCache >= 0 ) close( fdCache );
remove( path );
@@ -913,8 +997,11 @@ static dnbd3_image_t *loadImageServer(char * const name, const uint16_t requeste
*/
dnbd3_image_t* image_getOrLoad(char * const name, const uint16_t revision)
{
- // not proxy, specific revision - nothing to do
- if ( !_isProxy && revision != 0 ) return image_get( name, revision, true );
+ // specific revision - try shortcut
+ if ( revision != 0 ) {
+ dnbd3_image_t *image = image_get( name, revision, true );
+ if ( image != NULL ) return image;
+ }
const size_t len = strlen( name );
// Sanity check
if ( len == 0 || name[len - 1] == '/' || name[0] == '/'
@@ -935,14 +1022,15 @@ static dnbd3_image_t *loadImageProxy(char * const name, const uint16_t revision,
{
int i;
// Already existing locally?
- dnbd3_image_t *image = image_get( name, revision, true );
- // exists and specific revision - nothing to do
- if ( image != NULL && revision != 0 ) return image;
+ dnbd3_image_t *image = NULL;
+ if ( revision == 0 ) {
+ image = image_get( name, revision, true );
+ }
// Doesn't exist or is rid 0, try remote if not already tried it recently
const time_t now = time( NULL );
char *cmpname = name;
- int useIndex = -1;
+ int useIndex = -1, fallbackIndex = 0;
if ( len >= NAMELEN ) cmpname += 1 + len - NAMELEN;
pthread_mutex_lock( &remoteCloneLock );
for (i = 0; i < CACHELEN; ++i) {
@@ -952,6 +1040,9 @@ static dnbd3_image_t *loadImageProxy(char * const name, const uint16_t revision,
pthread_mutex_unlock( &remoteCloneLock ); // Was recently checked...
return image;
}
+ if ( remoteCloneCache[i].deadline < remoteCloneCache[fallbackIndex].deadline ) {
+ fallbackIndex = i;
+ }
}
// Re-check to prevent two clients at the same time triggering this,
// but only if rid != 0, since we would just get an old rid then
@@ -966,11 +1057,13 @@ static dnbd3_image_t *loadImageProxy(char * const name, const uint16_t revision,
serialized_buffer_t serialized;
// Mark as recently checked
if ( useIndex == -1 ) {
- useIndex = remoteCloneCacheIndex = (remoteCloneCacheIndex + 1) % CACHELEN;
+ useIndex = fallbackIndex;
}
remoteCloneCache[useIndex].deadline = now + SERVER_REMOTE_IMAGE_CHECK_CACHETIME;
snprintf( remoteCloneCache[useIndex].name, NAMELEN, "%s", cmpname );
remoteCloneCache[useIndex].rid = revision;
+ pthread_mutex_unlock( &remoteCloneLock );
+
// Get some alt servers and try to get the image from there
dnbd3_host_t servers[4];
int uplinkSock = -1;
@@ -980,26 +1073,32 @@ static dnbd3_image_t *loadImageProxy(char * const name, const uint16_t revision,
uint16_t remoteRid = revision;
uint64_t remoteImageSize;
for (i = 0; i < count; ++i) {
+ char *remoteName;
+ bool ok = false;
int sock = sock_connect( &servers[i], 750, _uplinkTimeout );
- if ( sock < 0 ) continue;
+ if ( sock == -1 ) continue;
if ( !dnbd3_select_image( sock, name, revision, FLAGS8_SERVER ) ) goto server_fail;
- char *remoteName;
if ( !dnbd3_select_image_reply( &serialized, sock, &remoteProtocolVersion, &remoteName, &remoteRid, &remoteImageSize ) ) goto server_fail;
if ( remoteProtocolVersion < MIN_SUPPORTED_SERVER || remoteRid == 0 ) goto server_fail;
- if ( revision != 0 && remoteRid != revision ) goto server_fail;
+ if ( revision != 0 && remoteRid != revision ) goto server_fail; // Want specific revision but uplink supplied different rid
if ( revision == 0 && image != NULL && image->rid >= remoteRid ) goto server_fail; // Not actually a failure: Highest remote rid is <= highest local rid - don't clone!
if ( remoteImageSize < DNBD3_BLOCK_SIZE || remoteName == NULL || strcmp( name, remoteName ) != 0 ) goto server_fail;
if ( remoteImageSize > SERVER_MAX_PROXY_IMAGE_SIZE ) goto server_fail;
- if ( !image_ensureDiskSpace( remoteImageSize ) ) goto server_fail;
- if ( !image_clone( sock, name, remoteRid, remoteImageSize ) ) goto server_fail; // This sets up the file+map+crc
+ pthread_mutex_lock( &reloadLock );
+ ok = image_ensureDiskSpace( remoteImageSize )
+ && image_clone( sock, name, remoteRid, remoteImageSize ); // This sets up the file+map+crc and loads the img
+ pthread_mutex_unlock( &reloadLock );
+ if ( !ok ) goto server_fail;
+
// Cloning worked :-)
uplinkSock = sock;
uplinkServer = &servers[i];
break;
- server_fail: ;
+
+server_fail: ;
close( sock );
}
- pthread_mutex_unlock( &remoteCloneLock );
+
// If we still have a pointer to a local image, release the reference
if ( image != NULL ) image_release( image );
// If everything worked out, this call should now actually return the image
@@ -1030,9 +1129,10 @@ static dnbd3_image_t *loadImageServer(char * const name, const uint16_t requeste
uint16_t detectedRid = 0;
if ( _vmdkLegacyMode ) {
- // TODO
- assert( 0 );
- detectedRid = requestedRid;
+ if ( strend( name, ".vmdk" ) ) {
+ snprintf( imageFile, PATHLEN, "%s/%s", _basePath, name );
+ detectedRid = MAX( 1, requestedRid );
+ }
} else if ( requestedRid != 0 ) {
snprintf( imageFile, PATHLEN, "%s/%s.r%d", _basePath, name, requestedRid );
detectedRid = requestedRid;
@@ -1044,8 +1144,9 @@ static dnbd3_image_t *loadImageServer(char * const name, const uint16_t requeste
if ( ret == 0 ) {
long int best = 0;
for ( size_t i = 0; i < g.gl_pathc; ++i ) {
- char *rev = strrchr( g.gl_pathv[i], 'r' );
- if ( rev == NULL ) continue;
+ const char * const path = g.gl_pathv[i];
+ const char * rev = strrchr( path, 'r' );
+ if ( rev == NULL || rev == path || *(rev - 1) != '.' ) continue;
rev++;
if ( *rev < '0' || *rev > '9' ) continue;
char *err = NULL;
@@ -1062,22 +1163,45 @@ static dnbd3_image_t *loadImageServer(char * const name, const uint16_t requeste
}
globfree( &g );
}
+ logadd( LOG_DEBUG2, "Trying to load %s:%d ( -> %d) as %s", name, (int)requestedRid, (int)detectedRid, imageFile );
// No file was determined, or it doesn't seem to exist/be readable
- if ( imageFile[0] == '\0' || !file_isReadable( imageFile ) ) { // XXX glob fallback to rid-1? Rework above
+ if ( imageFile[0] == '\0' ) {
+ logadd( LOG_DEBUG2, "Not found, bailing out" );
return image_get( name, detectedRid, true );
}
+ if ( requestedRid == 0 ) {
+ // rid 0 requested - check if detected rid is readable, decrease rid if not until we reach 0
+ while ( detectedRid != 0 ) {
+ dnbd3_image_t *image = image_get( name, detectedRid, true );
+ if ( image != NULL ) {
+ // globbed rid already loaded, return
+ return image;
+ }
+ if ( file_isReadable( imageFile ) ) {
+ // globbed rid is
+ break;
+ }
+ logadd( LOG_DEBUG2, "%s: rid %d globbed but not readable, trying lower rid...", name, (int)detectedRid );
+ detectedRid--;
+ snprintf( imageFile, PATHLEN, "%s/%s.r%d", _basePath, name, requestedRid );
+ }
+ }
+
// Now lock on the loading mutex, then check again if the image exists (we're multi-threaded)
pthread_mutex_lock( &reloadLock );
dnbd3_image_t* image = image_get( name, detectedRid, true );
if ( image != NULL ) {
// The image magically appeared in the meantime
+ logadd( LOG_DEBUG2, "Magically appeared" );
pthread_mutex_unlock( &reloadLock );
return image;
}
// Still not loaded, let's try to do so
+ logadd( LOG_DEBUG2, "Calling load" );
image_load( _basePath, imageFile, false );
pthread_mutex_unlock( &reloadLock );
// If loading succeeded, this will return the image
+ logadd( LOG_DEBUG2, "Calling get" );
return image_get( name, requestedRid, true );
}
@@ -1333,7 +1457,7 @@ static bool image_calcBlockCrc32(const int fd, const int block, const uint32_t r
*/
static bool image_ensureDiskSpace(uint64_t size)
{
- for (;;) {
+ for ( int maxtries = 0; maxtries < 20; ++maxtries ) {
const int64_t available = file_freeDiskSpace( _basePath );
if ( available == -1 ) {
const int e = errno;
@@ -1368,7 +1492,7 @@ static bool image_ensureDiskSpace(uint64_t size)
return false;
}
oldest = image_lock( oldest );
- if ( oldest == NULL ) return false;
+ if ( oldest == NULL ) continue; // Image freed in the meantime? Try again
logadd( LOG_INFO, "'%s:%d' has to go!", oldest->lower_name, (int)oldest->rid );
unlink( oldest->path );
size_t len = strlen( oldest->path ) + 5 + 1;