diff options
Diffstat (limited to 'src/server/image.c')
-rw-r--r-- | src/server/image.c | 226 |
1 files changed, 175 insertions, 51 deletions
diff --git a/src/server/image.c b/src/server/image.c index fb3f8ba..d4df26d 100644 --- a/src/server/image.c +++ b/src/server/image.c @@ -26,6 +26,7 @@ #include <glob.h> #define PATHLEN (2000) +#define NONWORKING_RECHECK_INTERVAL_SECONDS (60) // ########################################## @@ -44,10 +45,10 @@ typedef struct time_t deadline; } imagecache; static imagecache remoteCloneCache[CACHELEN]; -static int remoteCloneCacheIndex = 0; // ########################################## +static void image_remove(dnbd3_image_t *image); static dnbd3_image_t* image_free(dnbd3_image_t *image); static bool image_isHashBlockComplete(uint8_t * const cacheMap, const uint64_t block, const uint64_t fileSize); static bool image_load_all_internal(char *base, char *path); @@ -283,18 +284,65 @@ dnbd3_image_t* image_get(char *name, uint16_t revision, bool checkIfWorking) candidate->users++; spin_unlock( &candidate->lock ); - if ( !checkIfWorking ) return candidate; + if ( !checkIfWorking ) return candidate; // Found, but not interested in working state // Found, see if it works - if ( !candidate->working && candidate->cache_map != NULL && candidate->uplink == NULL && file_isWritable( candidate->path ) ) { - // Not working and has file + cache-map, try to init uplink (uplink_init will check if proxy mode is enabled) - uplink_init( candidate, -1, NULL ); - } else if ( candidate->working && candidate->uplink != NULL && candidate->uplink->queueLen > SERVER_UPLINK_QUEUELEN_THRES ) { - // To many pending uplink requests. We take that as a hint that the uplink is clogged or no working uplink server - // exists, so "working" is changed to false for now. Should a new uplink server be found the uplink thread will - // set this back to true some time. - candidate->working = false; + + if ( candidate->working ) { + // Last known state was "working", see if that should change + if ( candidate->readFd == -1 ) { + candidate->working = false; + } + } else { // ...not working... + // Don't re-check too often + spin_lock( &candidate->lock ); + bool check; + const time_t now = time( NULL ); + check = ( now - candidate->lastWorkCheck ) > NONWORKING_RECHECK_INTERVAL_SECONDS; + if ( check ) { + candidate->lastWorkCheck = now; + } + spin_unlock( &candidate->lock ); + if ( !check ) { + return candidate; + } + // Check if the local file exists, has the right size, and is readable (writable for incomplete image) + if ( candidate->cache_map != NULL ) { + // -- Incomplete - rw check + if ( candidate->cacheFd == -1 ) { // Make sure file is open for writing + candidate->cacheFd = open( candidate->path, O_RDWR ); + // It might have failed - still offer proxy mode, we just can't cache + if ( candidate->cacheFd == -1 ) { + logadd( LOG_WARNING, "Cannot re-open %s for writing - replication disabled", candidate->path ); + } + } + if ( candidate->uplink == NULL && candidate->cacheFd != -1 ) { + uplink_init( candidate, -1, NULL ); + } + } + // Common for ro and rw images + const off_t len = lseek( candidate->readFd, 0, SEEK_END ); + if ( len == -1 ) { + logadd( LOG_WARNING, "lseek() on %s failed (errno=%d), removing image", candidate->path, errno ); + image_remove( candidate ); // No release here, the image is still returned and should be released by caller + } else if ( (uint64_t)len != candidate->realFilesize ) { + logadd( LOG_DEBUG1, "Size of %s changed at runtime, keeping disabled! Expected: %" PRIu64 ", found: %" PRIu64 + ". Try sending SIGHUP to server if you know what you're doing.", + candidate->path, candidate->realFilesize, (uint64_t)len ); + } else { + // Seek worked, file size is same, now see if we can read from file + char buffer[100]; + if ( pread( candidate->readFd, buffer, sizeof(buffer), 0 ) == -1 ) { + logadd( LOG_DEBUG2, "Reading first %d bytes from %s failed (errno=%d), removing image", + (int)sizeof(buffer), candidate->path, errno ); + image_remove( candidate ); + } else { + // Seems everything is fine again \o/ + candidate->working = true; + } + } } + return candidate; // Success :-) } @@ -304,7 +352,7 @@ dnbd3_image_t* image_get(char *name, uint16_t revision, bool checkIfWorking) * Every call to image_lock() needs to be followed by a call to image_release() at some point. * Locks on: imageListLock, _images[].lock */ -dnbd3_image_t* image_lock(dnbd3_image_t *image) +dnbd3_image_t* image_lock(dnbd3_image_t *image) // TODO: get rid, fix places that do image->users-- { if ( image == NULL ) return NULL ; int i; @@ -361,7 +409,7 @@ dnbd3_image_t* image_release(dnbd3_image_t *image) * no active users * Locks on: imageListLock, image[].lock */ -void image_remove(dnbd3_image_t *image) +static void image_remove(dnbd3_image_t *image) { bool wasInList = false; spin_lock( &imageListLock ); @@ -399,13 +447,46 @@ void image_killUplinks() /** * Load all images in given path recursively. * Pass NULL to use path from config. - * NOT THREAD SAFE, make sure this is only running - * on one thread at a time! */ bool image_loadAll(char *path) { bool ret; + char imgPath[PATHLEN]; + int imgId; + dnbd3_image_t *imgHandle; + if ( path == NULL ) path = _basePath; + if ( _removeMissingImages ) { + // Check if all loaded images still exist on disk + logadd( LOG_DEBUG1, "Checking for vanished images" ); + spin_lock( &imageListLock ); + for (int i = _num_images - 1; i >= 0; --i) { + if ( _images[i] == NULL ) { + if ( i + 1 == _num_images ) _num_images--; + continue; + } + imgId = _images[i]->id; + snprintf( imgPath, PATHLEN, "%s", _images[i]->path ); + spin_unlock( &imageListLock ); // isReadable hits the fs; unlock + // Check if fill can still be opened for reading + ret = file_isReadable( imgPath ); + // Lock again, see if image is still there, free if required + spin_lock( &imageListLock ); + if ( ret || i >= _num_images || _images[i] == NULL || _images[i]->id != imgId ) continue; + // Image needs to be removed + imgHandle = _images[i]; + _images[i] = NULL; + if ( i + 1 == _num_images ) _num_images--; + if ( imgHandle->users != 0 ) continue; // Still in use, do not free (last releasing user will trigger) + // Image is not in use anymore, free the dangling entry immediately + spin_unlock( &imageListLock ); // image_free might do several fs operations; unlock + image_free( imgHandle ); + spin_lock( &imageListLock ); + } + spin_unlock( &imageListLock ); + } + // Now scan for new images + logadd( LOG_DEBUG1, "Scanning for new or modified images" ); pthread_mutex_lock( &reloadLock ); ret = image_load_all_internal( path, path ); pthread_mutex_unlock( &reloadLock ); @@ -438,6 +519,9 @@ bool image_tryFreeAll() static dnbd3_image_t* image_free(dnbd3_image_t *image) { assert( image != NULL ); + if ( !_shutdown ) { + logadd( LOG_INFO, "Freeing image %s:%d", image->lower_name, (int)image->rid ); + } // image_saveCacheMap( image ); uplink_shutdown( image ); @@ -511,7 +595,7 @@ static bool image_load_all_internal(char *base, char *path) logadd( LOG_WARNING, "stat() for '%s' failed. Ignoring....", subpath ); continue; } - if ( S_ISDIR( st.st_mode )) { + if ( S_ISDIR( st.st_mode ) ) { image_load_all_internal( base, subpath ); // Recurse } else { image_load( base, subpath, true ); // Load image if possible @@ -531,7 +615,7 @@ static bool image_load_all_internal(char *base, char *path) static bool image_load(char *base, char *path, int withUplink) { static int imgIdCounter = 0; // Used to assign unique numeric IDs to images - int i, revision; + int i, revision = -1; struct stat st; uint8_t *cache_map = NULL; uint32_t *crc32list = NULL; @@ -563,7 +647,7 @@ static bool image_load(char *base, char *path, int withUplink) // Easy - legacy mode, simply append full file name and set rid to 1 strcat( dst, fileName ); revision = 1; - } else { + } else if ( !_vmdkLegacyMode ) { // Try to parse *.r<ID> syntax for (i = fileNameLen - 1; i > 1; --i) { if ( fileName[i] < '0' || fileName[i] > '9' ) break; @@ -578,7 +662,7 @@ static bool image_load(char *base, char *path, int withUplink) } *dst = '\0'; } - if ( revision <= 0 ) { + if ( revision <= 0 || revision >= 65536 ) { logadd( LOG_WARNING, "Image '%s' has invalid revision ID %d", path, revision ); goto load_error; } @@ -612,7 +696,7 @@ static bool image_load(char *base, char *path, int withUplink) // 1. Allocate memory for the cache map if the image is incomplete cache_map = image_loadCacheMap( path, virtualFilesize ); - // TODO: Maybe try sha-256 or 512 first if you're paranoid (to be implemented) + // XXX: Maybe try sha-256 or 512 first if you're paranoid (to be implemented) // 2. Load CRC-32 list of image uint32_t masterCrc; @@ -630,34 +714,34 @@ static bool image_load(char *base, char *path, int withUplink) // Compare data just loaded to identical image we apparently already loaded if ( existing != NULL ) { if ( existing->realFilesize != realFilesize ) { - // Image will be replaced below logadd( LOG_WARNING, "Size of image '%s:%d' has changed.", existing->lower_name, (int)existing->rid ); + // Image will be replaced below } else if ( existing->crc32 != NULL && crc32list != NULL && memcmp( existing->crc32, crc32list, sizeof(uint32_t) * hashBlockCount ) != 0 ) { - // Image will be replaced below logadd( LOG_WARNING, "CRC32 list of image '%s:%d' has changed.", existing->lower_name, (int)existing->rid ); logadd( LOG_WARNING, "The image will be reloaded, but you should NOT replace existing images while the server is running." ); logadd( LOG_WARNING, "Actually even if it's not running this should never be done. Use a new RID instead!" ); + // Image will be replaced below } else if ( existing->crc32 == NULL && crc32list != NULL ) { logadd( LOG_INFO, "Found CRC-32 list for already loaded image '%s:%d', adding...", existing->lower_name, (int)existing->rid ); existing->crc32 = crc32list; existing->masterCrc32 = masterCrc; crc32list = NULL; function_return = true; - goto load_error; + goto load_error; // Keep existing } else if ( existing->cache_map != NULL && cache_map == NULL ) { // Just ignore that fact, if replication is really complete the cache map will be removed anyways logadd( LOG_INFO, "Image '%s:%d' has no cache map on disk!", existing->lower_name, (int)existing->rid ); function_return = true; - goto load_error; + goto load_error; // Keep existing } else { // Nothing changed about the existing image, so do nothing function_return = true; - goto load_error; + goto load_error; // Keep existing } - // Remove image from images array - image_release( existing ); + // Remove existing image from images array, so it will be replaced by the reloaded image image_remove( existing ); + image_release( existing ); existing = NULL; } @@ -714,7 +798,7 @@ static bool image_load(char *base, char *path, int withUplink) spin_lock( &imageListLock ); // Now we're locked, assign unique ID to image (unique for this running server instance!) image->id = ++imgIdCounter; - for (i = 0; i < _num_images; ++i) { + for ( i = 0; i < _num_images; ++i ) { if ( _images[i] != NULL ) continue; _images[i] = image; break; @@ -727,7 +811,7 @@ static bool image_load(char *base, char *path, int withUplink) goto load_error; } _images[_num_images++] = image; - logadd( LOG_DEBUG1, "Loaded image '%s'\n", image->lower_name ); + logadd( LOG_DEBUG1, "Loaded image '%s:%d'\n", image->lower_name, (int)image->rid ); } // Keep fd for reading image->readFd = fdImage; @@ -888,7 +972,7 @@ bool image_create(char *image, int revision, uint64_t size) close( fdCache ); return true; // - failure_cleanup: ; +failure_cleanup: ; if ( fdImage >= 0 ) close( fdImage ); if ( fdCache >= 0 ) close( fdCache ); remove( path ); @@ -913,8 +997,11 @@ static dnbd3_image_t *loadImageServer(char * const name, const uint16_t requeste */ dnbd3_image_t* image_getOrLoad(char * const name, const uint16_t revision) { - // not proxy, specific revision - nothing to do - if ( !_isProxy && revision != 0 ) return image_get( name, revision, true ); + // specific revision - try shortcut + if ( revision != 0 ) { + dnbd3_image_t *image = image_get( name, revision, true ); + if ( image != NULL ) return image; + } const size_t len = strlen( name ); // Sanity check if ( len == 0 || name[len - 1] == '/' || name[0] == '/' @@ -935,14 +1022,15 @@ static dnbd3_image_t *loadImageProxy(char * const name, const uint16_t revision, { int i; // Already existing locally? - dnbd3_image_t *image = image_get( name, revision, true ); - // exists and specific revision - nothing to do - if ( image != NULL && revision != 0 ) return image; + dnbd3_image_t *image = NULL; + if ( revision == 0 ) { + image = image_get( name, revision, true ); + } // Doesn't exist or is rid 0, try remote if not already tried it recently const time_t now = time( NULL ); char *cmpname = name; - int useIndex = -1; + int useIndex = -1, fallbackIndex = 0; if ( len >= NAMELEN ) cmpname += 1 + len - NAMELEN; pthread_mutex_lock( &remoteCloneLock ); for (i = 0; i < CACHELEN; ++i) { @@ -952,6 +1040,9 @@ static dnbd3_image_t *loadImageProxy(char * const name, const uint16_t revision, pthread_mutex_unlock( &remoteCloneLock ); // Was recently checked... return image; } + if ( remoteCloneCache[i].deadline < remoteCloneCache[fallbackIndex].deadline ) { + fallbackIndex = i; + } } // Re-check to prevent two clients at the same time triggering this, // but only if rid != 0, since we would just get an old rid then @@ -966,11 +1057,13 @@ static dnbd3_image_t *loadImageProxy(char * const name, const uint16_t revision, serialized_buffer_t serialized; // Mark as recently checked if ( useIndex == -1 ) { - useIndex = remoteCloneCacheIndex = (remoteCloneCacheIndex + 1) % CACHELEN; + useIndex = fallbackIndex; } remoteCloneCache[useIndex].deadline = now + SERVER_REMOTE_IMAGE_CHECK_CACHETIME; snprintf( remoteCloneCache[useIndex].name, NAMELEN, "%s", cmpname ); remoteCloneCache[useIndex].rid = revision; + pthread_mutex_unlock( &remoteCloneLock ); + // Get some alt servers and try to get the image from there dnbd3_host_t servers[4]; int uplinkSock = -1; @@ -980,26 +1073,32 @@ static dnbd3_image_t *loadImageProxy(char * const name, const uint16_t revision, uint16_t remoteRid = revision; uint64_t remoteImageSize; for (i = 0; i < count; ++i) { + char *remoteName; + bool ok = false; int sock = sock_connect( &servers[i], 750, _uplinkTimeout ); - if ( sock < 0 ) continue; + if ( sock == -1 ) continue; if ( !dnbd3_select_image( sock, name, revision, FLAGS8_SERVER ) ) goto server_fail; - char *remoteName; if ( !dnbd3_select_image_reply( &serialized, sock, &remoteProtocolVersion, &remoteName, &remoteRid, &remoteImageSize ) ) goto server_fail; if ( remoteProtocolVersion < MIN_SUPPORTED_SERVER || remoteRid == 0 ) goto server_fail; - if ( revision != 0 && remoteRid != revision ) goto server_fail; + if ( revision != 0 && remoteRid != revision ) goto server_fail; // Want specific revision but uplink supplied different rid if ( revision == 0 && image != NULL && image->rid >= remoteRid ) goto server_fail; // Not actually a failure: Highest remote rid is <= highest local rid - don't clone! if ( remoteImageSize < DNBD3_BLOCK_SIZE || remoteName == NULL || strcmp( name, remoteName ) != 0 ) goto server_fail; if ( remoteImageSize > SERVER_MAX_PROXY_IMAGE_SIZE ) goto server_fail; - if ( !image_ensureDiskSpace( remoteImageSize ) ) goto server_fail; - if ( !image_clone( sock, name, remoteRid, remoteImageSize ) ) goto server_fail; // This sets up the file+map+crc + pthread_mutex_lock( &reloadLock ); + ok = image_ensureDiskSpace( remoteImageSize ) + && image_clone( sock, name, remoteRid, remoteImageSize ); // This sets up the file+map+crc and loads the img + pthread_mutex_unlock( &reloadLock ); + if ( !ok ) goto server_fail; + // Cloning worked :-) uplinkSock = sock; uplinkServer = &servers[i]; break; - server_fail: ; + +server_fail: ; close( sock ); } - pthread_mutex_unlock( &remoteCloneLock ); + // If we still have a pointer to a local image, release the reference if ( image != NULL ) image_release( image ); // If everything worked out, this call should now actually return the image @@ -1030,9 +1129,10 @@ static dnbd3_image_t *loadImageServer(char * const name, const uint16_t requeste uint16_t detectedRid = 0; if ( _vmdkLegacyMode ) { - // TODO - assert( 0 ); - detectedRid = requestedRid; + if ( strend( name, ".vmdk" ) ) { + snprintf( imageFile, PATHLEN, "%s/%s", _basePath, name ); + detectedRid = MAX( 1, requestedRid ); + } } else if ( requestedRid != 0 ) { snprintf( imageFile, PATHLEN, "%s/%s.r%d", _basePath, name, requestedRid ); detectedRid = requestedRid; @@ -1044,8 +1144,9 @@ static dnbd3_image_t *loadImageServer(char * const name, const uint16_t requeste if ( ret == 0 ) { long int best = 0; for ( size_t i = 0; i < g.gl_pathc; ++i ) { - char *rev = strrchr( g.gl_pathv[i], 'r' ); - if ( rev == NULL ) continue; + const char * const path = g.gl_pathv[i]; + const char * rev = strrchr( path, 'r' ); + if ( rev == NULL || rev == path || *(rev - 1) != '.' ) continue; rev++; if ( *rev < '0' || *rev > '9' ) continue; char *err = NULL; @@ -1062,22 +1163,45 @@ static dnbd3_image_t *loadImageServer(char * const name, const uint16_t requeste } globfree( &g ); } + logadd( LOG_DEBUG2, "Trying to load %s:%d ( -> %d) as %s", name, (int)requestedRid, (int)detectedRid, imageFile ); // No file was determined, or it doesn't seem to exist/be readable - if ( imageFile[0] == '\0' || !file_isReadable( imageFile ) ) { // XXX glob fallback to rid-1? Rework above + if ( imageFile[0] == '\0' ) { + logadd( LOG_DEBUG2, "Not found, bailing out" ); return image_get( name, detectedRid, true ); } + if ( requestedRid == 0 ) { + // rid 0 requested - check if detected rid is readable, decrease rid if not until we reach 0 + while ( detectedRid != 0 ) { + dnbd3_image_t *image = image_get( name, detectedRid, true ); + if ( image != NULL ) { + // globbed rid already loaded, return + return image; + } + if ( file_isReadable( imageFile ) ) { + // globbed rid is + break; + } + logadd( LOG_DEBUG2, "%s: rid %d globbed but not readable, trying lower rid...", name, (int)detectedRid ); + detectedRid--; + snprintf( imageFile, PATHLEN, "%s/%s.r%d", _basePath, name, requestedRid ); + } + } + // Now lock on the loading mutex, then check again if the image exists (we're multi-threaded) pthread_mutex_lock( &reloadLock ); dnbd3_image_t* image = image_get( name, detectedRid, true ); if ( image != NULL ) { // The image magically appeared in the meantime + logadd( LOG_DEBUG2, "Magically appeared" ); pthread_mutex_unlock( &reloadLock ); return image; } // Still not loaded, let's try to do so + logadd( LOG_DEBUG2, "Calling load" ); image_load( _basePath, imageFile, false ); pthread_mutex_unlock( &reloadLock ); // If loading succeeded, this will return the image + logadd( LOG_DEBUG2, "Calling get" ); return image_get( name, requestedRid, true ); } @@ -1333,7 +1457,7 @@ static bool image_calcBlockCrc32(const int fd, const int block, const uint32_t r */ static bool image_ensureDiskSpace(uint64_t size) { - for (;;) { + for ( int maxtries = 0; maxtries < 20; ++maxtries ) { const int64_t available = file_freeDiskSpace( _basePath ); if ( available == -1 ) { const int e = errno; @@ -1368,7 +1492,7 @@ static bool image_ensureDiskSpace(uint64_t size) return false; } oldest = image_lock( oldest ); - if ( oldest == NULL ) return false; + if ( oldest == NULL ) continue; // Image freed in the meantime? Try again logadd( LOG_INFO, "'%s:%d' has to go!", oldest->lower_name, (int)oldest->rid ); unlink( oldest->path ); size_t len = strlen( oldest->path ) + 5 + 1; |