summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSimon Rettberg2019-08-29 14:49:18 +0200
committerSimon Rettberg2019-08-29 14:49:18 +0200
commit88695877f085af475a6ca8a01c2fbb08eb5b15da (patch)
tree7e196a12eca25c72802c255f42dac77af13cf484
parent[SERVER] reference: Fix error msg usage (diff)
downloaddnbd3-88695877f085af475a6ca8a01c2fbb08eb5b15da.tar.gz
dnbd3-88695877f085af475a6ca8a01c2fbb08eb5b15da.tar.xz
dnbd3-88695877f085af475a6ca8a01c2fbb08eb5b15da.zip
[SERVER] Use weakref for cache maps
Gets rid of a bunch of locking, especially the hot path in net.c where clients are requesting data. Many clients unsing the same incomplete image previously created a bottleneck here.
-rw-r--r--src/server/globals.h10
-rw-r--r--src/server/image.c208
-rw-r--r--src/server/image.h2
-rw-r--r--src/server/integrity.c10
-rw-r--r--src/server/net.c81
-rw-r--r--src/server/reference.h5
-rw-r--r--src/server/uplink.c64
7 files changed, 220 insertions, 160 deletions
diff --git a/src/server/globals.h b/src/server/globals.h
index f940666..221af78 100644
--- a/src/server/globals.h
+++ b/src/server/globals.h
@@ -99,6 +99,12 @@ typedef struct
int permissions;
} dnbd3_access_rule_t;
+typedef struct
+{
+ ref reference;
+ atomic_uint_least8_t map[];
+} dnbd3_cache_map_t;
+
/**
* Image struct. An image path could be something like
* /mnt/images/rz/zfs/Windows7 ZfS.vmdk.r1
@@ -110,7 +116,7 @@ struct _dnbd3_image
char *path; // absolute path of the image
char *name; // public name of the image (usually relative path minus revision ID)
weakref uplinkref; // pointer to a server connection
- uint8_t *cache_map; // cache map telling which parts are locally cached, NULL if complete
+ weakref ref_cacheMap; // cache map telling which parts are locally cached, NULL if complete
uint64_t virtualFilesize; // virtual size of image (real size rounded up to multiple of 4k)
uint64_t realFilesize; // actual file size on disk
ticks atime; // last access time
@@ -119,7 +125,7 @@ struct _dnbd3_image
uint32_t *crc32; // list of crc32 checksums for each 16MiB block in image
uint32_t masterCrc32; // CRC-32 of the crc-32 list
int readFd; // used to read the image. Used from multiple threads, so use atomic operations (pread et al)
- int completenessEstimate; // Completeness estimate in percent
+ atomic_int completenessEstimate; // Completeness estimate in percent
atomic_int users; // clients currently using this image. XXX Lock on imageListLock when modifying and checking whether the image should be freed. Reading it elsewhere is fine without the lock.
int id; // Unique ID of this image. Only unique in the context of this running instance of DNBD3-Server
atomic_bool working; // true if image exists and completeness is == 100% or a working upstream proxy is connected
diff --git a/src/server/image.c b/src/server/image.c
index 4eab1d2..1972f48 100644
--- a/src/server/image.c
+++ b/src/server/image.c
@@ -51,10 +51,18 @@ static bool image_clone(int sock, char *name, uint16_t revision, uint64_t imageS
static bool image_calcBlockCrc32(const int fd, const size_t block, const uint64_t realFilesize, uint32_t *crc);
static bool image_ensureDiskSpace(uint64_t size, bool force);
-static uint8_t* image_loadCacheMap(const char * const imagePath, const int64_t fileSize);
+static dnbd3_cache_map_t* image_loadCacheMap(const char * const imagePath, const int64_t fileSize);
static uint32_t* image_loadCrcList(const char * const imagePath, const int64_t fileSize, uint32_t *masterCrc);
-static bool image_checkRandomBlocks(const int count, int fdImage, const int64_t fileSize, uint32_t * const crc32list, uint8_t * const cache_map);
+static bool image_checkRandomBlocks(const int count, int fdImage, const int64_t fileSize, uint32_t * const crc32list, atomic_uint_least8_t * const cache_map);
static void* closeUnusedFds(void*);
+static void allocCacheMap(dnbd3_image_t *image, bool complete);
+
+static void cmfree(ref *ref)
+{
+ dnbd3_cache_map_t *cache = container_of(ref, dnbd3_cache_map_t, reference);
+ logadd( LOG_DEBUG2, "Freeing a cache map" );
+ free( cache );
+}
// ##########################################
@@ -70,7 +78,6 @@ void image_serverStartup()
/**
* Update cache-map of given image for the given byte range
* start (inclusive) - end (exclusive)
- * Locks on: images[].lock
*/
void image_updateCachemap(dnbd3_image_t *image, uint64_t start, uint64_t end, const bool set)
{
@@ -91,33 +98,55 @@ void image_updateCachemap(dnbd3_image_t *image, uint64_t start, uint64_t end, co
if ( start >= end )
return;
bool setNewBlocks = false;
- uint64_t pos = start;
- mutex_lock( &image->lock );
- if ( image->cache_map == NULL ) {
+ dnbd3_cache_map_t *cache = ref_get_cachemap( image );
+ if ( cache == NULL ) {
// Image seems already complete
if ( set ) {
// This makes no sense
- mutex_unlock( &image->lock );
- logadd( LOG_DEBUG1, "image_updateCachemap(true) with no cache_map: %s", image->path );
+ logadd( LOG_DEBUG1, "image_updateCachemap(true) with no cache map: %s", image->path );
return;
}
// Recreate a cache map, set it to all 1 initially as we assume the image was complete
- const int byteSize = IMGSIZE_TO_MAPBYTES( image->virtualFilesize );
- image->cache_map = malloc( byteSize );
- memset( image->cache_map, 0xff, byteSize );
- }
- while ( pos < end ) {
- const size_t map_y = (int)( pos >> 15 );
- const int map_x = (int)( (pos >> 12) & 7 ); // mod 8
- const int bit_mask = 1 << map_x;
- if ( set ) {
- if ( (image->cache_map[map_y] & bit_mask) == 0 ) setNewBlocks = true;
- image->cache_map[map_y] |= (uint8_t)bit_mask;
- } else {
- image->cache_map[map_y] &= (uint8_t)~bit_mask;
+ allocCacheMap( image, true );
+ cache = ref_get_cachemap( image );
+ if ( cache == NULL ) {
+ logadd( LOG_WARNING, "WHAT!!!?!?!= No cache map right after alloc?! %s", image->path );
+ return;
}
- pos += DNBD3_BLOCK_SIZE;
}
+ // Set/unset
+ const uint64_t firstByteInMap = start >> 15;
+ const uint64_t lastByteInMap = (end - 1) >> 15;
+ uint64_t pos;
+ // First byte
+ uint8_t fb = 0, lb = 0;
+ for ( pos = start; firstByteInMap == (pos >> 15) && pos < end; pos += DNBD3_BLOCK_SIZE ) {
+ const int map_x = (pos >> 12) & 7; // mod 8
+ const uint8_t bit_mask = (uint8_t)( 1 << map_x );
+ fb |= bit_mask;
+ }
+ // Last byte
+ for ( pos = lastByteInMap << 15; pos < end; pos += DNBD3_BLOCK_SIZE ) {
+ const int map_x = (pos >> 12) & 7; // mod 8
+ const uint8_t bit_mask = (uint8_t)( 1 << map_x );
+ lb |= bit_mask;
+ }
+ if ( set ) {
+ uint8_t fo = atomic_fetch_or_explicit( &cache->map[firstByteInMap], fb, memory_order_relaxed );
+ uint8_t lo = atomic_fetch_or_explicit( &cache->map[lastByteInMap], lb, memory_order_relaxed );
+ setNewBlocks = ( fo != cache->map[firstByteInMap] || lo != cache->map[lastByteInMap] );
+ } else {
+ atomic_fetch_and_explicit( &cache->map[firstByteInMap], (uint8_t)~fb, memory_order_relaxed );
+ atomic_fetch_and_explicit( &cache->map[lastByteInMap], (uint8_t)~lb, memory_order_relaxed );
+ }
+ const uint8_t nval = set ? 0xff : 0;
+ // Everything in between
+ for ( pos = firstByteInMap + 1; pos < lastByteInMap; ++pos ) {
+ if ( atomic_exchange_explicit( &cache->map[pos], nval, memory_order_relaxed ) != nval && set ) {
+ setNewBlocks = true;
+ }
+ }
+ atomic_thread_fence( memory_order_release );
if ( setNewBlocks && image->crc32 != NULL ) {
// If setNewBlocks is set, at least one of the blocks was not cached before, so queue all hash blocks
// for checking, even though this might lead to checking some hash block again, if it was
@@ -125,19 +154,14 @@ void image_updateCachemap(dnbd3_image_t *image, uint64_t start, uint64_t end, co
// First set start and end to borders of hash blocks
start &= ~(uint64_t)(HASH_BLOCK_SIZE - 1);
end = (end + HASH_BLOCK_SIZE - 1) & ~(uint64_t)(HASH_BLOCK_SIZE - 1);
- pos = start;
- while ( pos < end ) {
- if ( image->cache_map == NULL ) break;
+ for ( pos = start; pos < end; pos += HASH_BLOCK_SIZE ) {
const int block = (int)( pos / HASH_BLOCK_SIZE );
- if ( image_isHashBlockComplete( image->cache_map, block, image->realFilesize ) ) {
- mutex_unlock( &image->lock );
+ if ( image_isHashBlockComplete( cache->map, block, image->realFilesize ) ) {
integrity_check( image, block );
- mutex_lock( &image->lock );
}
- pos += HASH_BLOCK_SIZE;
}
}
- mutex_unlock( &image->lock );
+ ref_put( &cache->reference );
}
/**
@@ -149,20 +173,18 @@ void image_updateCachemap(dnbd3_image_t *image, uint64_t start, uint64_t end, co
bool image_isComplete(dnbd3_image_t *image)
{
assert( image != NULL );
- mutex_lock( &image->lock );
if ( image->virtualFilesize == 0 ) {
- mutex_unlock( &image->lock );
return false;
}
- if ( image->cache_map == NULL ) {
- mutex_unlock( &image->lock );
+ dnbd3_cache_map_t *cache = ref_get_cachemap( image );
+ if ( cache == NULL ) {
return true;
}
bool complete = true;
int j;
const int map_len_bytes = IMGSIZE_TO_MAPBYTES( image->virtualFilesize );
for (j = 0; j < map_len_bytes - 1; ++j) {
- if ( image->cache_map[j] != 0xFF ) {
+ if ( cache->map[j] != 0xFF ) {
complete = false;
break;
}
@@ -177,18 +199,27 @@ bool image_isComplete(dnbd3_image_t *image)
for (j = 0; j < blocks_in_last_byte; ++j)
last_byte |= (uint8_t)(1 << j);
}
- complete = ((image->cache_map[map_len_bytes - 1] & last_byte) == last_byte);
+ complete = ((cache->map[map_len_bytes - 1] & last_byte) == last_byte);
}
- if ( !complete ) {
- mutex_unlock( &image->lock );
+ ref_put( &cache->reference );
+ if ( !complete )
return false;
+ mutex_lock( &image->lock );
+ // Lock and make sure current cache map is still the one we saw complete
+ dnbd3_cache_map_t *current = ref_get_cachemap( image );
+ if ( current == cache ) {
+ // Set cache map NULL as it's complete
+ ref_setref( &image->ref_cacheMap, NULL );
+ }
+ if ( current != NULL ) {
+ ref_put( &current->reference );
}
- char mapfile[PATHLEN] = "";
- free( image->cache_map );
- image->cache_map = NULL;
- snprintf( mapfile, PATHLEN, "%s.map", image->path );
mutex_unlock( &image->lock );
- unlink( mapfile );
+ if ( current == cache ) { // Successfully set cache map to NULL above
+ char mapfile[PATHLEN] = "";
+ snprintf( mapfile, PATHLEN, "%s.map", image->path );
+ unlink( mapfile );
+ }
return true;
}
@@ -350,19 +381,18 @@ dnbd3_image_t* image_get(char *name, uint16_t revision, bool checkIfWorking)
img->rid = candidate->rid;
img->users = 1;
img->working = false;
+ img->ref_cacheMap = NULL;
mutex_init( &img->lock, LOCK_IMAGE );
if ( candidate->crc32 != NULL ) {
const size_t mb = IMGSIZE_TO_HASHBLOCKS( candidate->virtualFilesize ) * sizeof(uint32_t);
img->crc32 = malloc( mb );
memcpy( img->crc32, candidate->crc32, mb );
}
- mutex_lock( &candidate->lock );
- if ( candidate->cache_map != NULL ) {
- const size_t mb = IMGSIZE_TO_MAPBYTES( candidate->virtualFilesize );
- img->cache_map = malloc( mb );
- memcpy( img->cache_map, candidate->cache_map, mb );
+ dnbd3_cache_map_t *cache = ref_get_cachemap( candidate );
+ if ( cache != NULL ) {
+ ref_setref( &img->ref_cacheMap, &cache->reference );
+ ref_put( &cache->reference );
}
- mutex_unlock( &candidate->lock );
if ( image_addToList( img ) ) {
image_release( candidate );
candidate = img;
@@ -377,7 +407,7 @@ dnbd3_image_t* image_get(char *name, uint16_t revision, bool checkIfWorking)
}
// Check if image is incomplete, handle
- if ( candidate->cache_map != NULL ) {
+ if ( candidate->ref_cacheMap != NULL ) {
uplink_init( candidate, -1, NULL, -1 );
}
@@ -585,11 +615,10 @@ static dnbd3_image_t* image_free(dnbd3_image_t *image)
if ( !uplink_shutdown( image ) )
return NULL;
mutex_lock( &image->lock );
- free( image->cache_map );
+ ref_setref( &image->ref_cacheMap, NULL );
free( image->crc32 );
free( image->path );
free( image->name );
- image->cache_map = NULL;
image->crc32 = NULL;
image->path = NULL;
image->name = NULL;
@@ -600,7 +629,7 @@ static dnbd3_image_t* image_free(dnbd3_image_t *image)
return NULL ;
}
-bool image_isHashBlockComplete(const uint8_t * const cacheMap, const uint64_t block, const uint64_t realFilesize)
+bool image_isHashBlockComplete(atomic_uint_least8_t * const cacheMap, const uint64_t block, const uint64_t realFilesize)
{
if ( cacheMap == NULL ) return true;
const uint64_t end = (block + 1) * HASH_BLOCK_SIZE;
@@ -707,7 +736,7 @@ static bool image_load(char *base, char *path, int withUplink)
{
int revision = -1;
struct stat st;
- uint8_t *cache_map = NULL;
+ dnbd3_cache_map_t *cache = NULL;
uint32_t *crc32list = NULL;
dnbd3_image_t *existing = NULL;
int fdImage = -1;
@@ -790,7 +819,7 @@ static bool image_load(char *base, char *path, int withUplink)
}
// 1. Allocate memory for the cache map if the image is incomplete
- cache_map = image_loadCacheMap( path, virtualFilesize );
+ cache = image_loadCacheMap( path, virtualFilesize );
// XXX: Maybe try sha-256 or 512 first if you're paranoid (to be implemented)
@@ -802,7 +831,7 @@ static bool image_load(char *base, char *path, int withUplink)
// Check CRC32
if ( crc32list != NULL ) {
- if ( !image_checkRandomBlocks( 4, fdImage, realFilesize, crc32list, cache_map ) ) {
+ if ( !image_checkRandomBlocks( 4, fdImage, realFilesize, crc32list, cache != NULL ? cache->map : NULL ) ) {
logadd( LOG_ERROR, "quick crc32 check of %s failed. Data corruption?", path );
doFullCheck = true;
}
@@ -826,7 +855,7 @@ static bool image_load(char *base, char *path, int withUplink)
crc32list = NULL;
function_return = true;
goto load_error; // Keep existing
- } else if ( existing->cache_map != NULL && cache_map == NULL ) {
+ } else if ( existing->ref_cacheMap != NULL && cache == NULL ) {
// Just ignore that fact, if replication is really complete the cache map will be removed anyways
logadd( LOG_INFO, "Image '%s:%d' has no cache map on disk!", existing->name, (int)existing->rid );
function_return = true;
@@ -846,7 +875,8 @@ static bool image_load(char *base, char *path, int withUplink)
dnbd3_image_t *image = calloc( 1, sizeof(dnbd3_image_t) );
image->path = strdup( path );
image->name = strdup( imgName );
- image->cache_map = cache_map;
+ image->ref_cacheMap = NULL;
+ ref_setref( &image->ref_cacheMap, &cache->reference );
image->crc32 = crc32list;
image->masterCrc32 = masterCrc;
image->uplinkref = NULL;
@@ -855,7 +885,7 @@ static bool image_load(char *base, char *path, int withUplink)
image->rid = (uint16_t)revision;
image->users = 0;
image->readFd = -1;
- image->working = (image->cache_map == NULL );
+ image->working = ( cache == NULL );
timing_get( &image->nextCompletenessEstimate );
image->completenessEstimate = -1;
mutex_init( &image->lock, LOCK_IMAGE );
@@ -870,16 +900,16 @@ static bool image_load(char *base, char *path, int withUplink)
timing_gets( &image->atime, offset );
// Prevent freeing in cleanup
- cache_map = NULL;
+ cache = NULL;
crc32list = NULL;
// Get rid of cache map if image is complete
- if ( image->cache_map != NULL ) {
+ if ( image->ref_cacheMap != NULL ) {
image_isComplete( image );
}
// Image is definitely incomplete, initialize uplink worker
- if ( image->cache_map != NULL ) {
+ if ( image->ref_cacheMap != NULL ) {
image->working = false;
if ( withUplink ) {
uplink_init( image, -1, NULL, -1 );
@@ -910,21 +940,22 @@ static bool image_load(char *base, char *path, int withUplink)
load_error: ;
if ( existing != NULL ) existing = image_release( existing );
if ( crc32list != NULL ) free( crc32list );
- if ( cache_map != NULL ) free( cache_map );
+ if ( cache != NULL ) free( cache );
if ( fdImage != -1 ) close( fdImage );
return function_return;
}
-static uint8_t* image_loadCacheMap(const char * const imagePath, const int64_t fileSize)
+static dnbd3_cache_map_t* image_loadCacheMap(const char * const imagePath, const int64_t fileSize)
{
- uint8_t *retval = NULL;
+ dnbd3_cache_map_t *retval = NULL;
char mapFile[strlen( imagePath ) + 10 + 1];
sprintf( mapFile, "%s.map", imagePath );
int fdMap = open( mapFile, O_RDONLY );
- if ( fdMap >= 0 ) {
+ if ( fdMap != -1 ) {
const int map_size = IMGSIZE_TO_MAPBYTES( fileSize );
- retval = calloc( 1, map_size );
- const ssize_t rd = read( fdMap, retval, map_size );
+ retval = calloc( 1, sizeof(*retval) + map_size );
+ ref_init( &retval->reference, cmfree, 0 );
+ const ssize_t rd = read( fdMap, retval->map, map_size );
if ( map_size != rd ) {
logadd( LOG_WARNING, "Could only read %d of expected %d bytes of cache map of '%s'", (int)rd, (int)map_size, imagePath );
// Could not read complete map, that means the rest of the image file will be considered incomplete
@@ -985,7 +1016,7 @@ static uint32_t* image_loadCrcList(const char * const imagePath, const int64_t f
return retval;
}
-static bool image_checkRandomBlocks(const int count, int fdImage, const int64_t realFilesize, uint32_t * const crc32list, uint8_t * const cache_map)
+static bool image_checkRandomBlocks(const int count, int fdImage, const int64_t realFilesize, uint32_t * const crc32list, atomic_uint_least8_t * const cache_map)
{
// This checks the first block and (up to) count - 1 random blocks for corruption
// via the known crc32 list. This is very sloppy and is merely supposed to detect
@@ -1529,30 +1560,37 @@ json_t* image_getListAsJson()
/**
* Get completeness of an image in percent. Only estimated, not exact.
* Returns: 0-100
- * DOES NOT LOCK, so make sure to do so before calling
*/
int image_getCompletenessEstimate(dnbd3_image_t * const image)
{
assert( image != NULL );
- if ( image->cache_map == NULL ) return image->working ? 100 : 0;
+ dnbd3_cache_map_t *cache = ref_get_cachemap( image );
+ if ( cache == NULL )
+ return image->working ? 100 : 0;
+ const int len = IMGSIZE_TO_MAPBYTES( image->virtualFilesize );
+ if ( unlikely( len == 0 ) ) {
+ ref_put( &cache->reference );
+ return 0;
+ }
declare_now;
if ( !timing_reached( &image->nextCompletenessEstimate, &now ) ) {
// Since this operation is relatively expensive, we cache the result for a while
+ ref_put( &cache->reference );
return image->completenessEstimate;
}
int i;
int percent = 0;
- const int len = IMGSIZE_TO_MAPBYTES( image->virtualFilesize );
- if ( len == 0 ) return 0;
for ( i = 0; i < len; ++i ) {
- if ( image->cache_map[i] == 0xff ) {
+ const uint8_t v = atomic_load_explicit( &cache->map[i], memory_order_relaxed );
+ if ( v == 0xff ) {
percent += 100;
- } else if ( image->cache_map[i] != 0 ) {
+ } else if ( v != 0 ) {
percent += 50;
}
}
+ ref_put( &cache->reference );
image->completenessEstimate = percent / len;
- timing_set( &image->nextCompletenessEstimate, &now, 8 + rand() % 32 );
+ timing_set( &image->nextCompletenessEstimate, &now, 4 + rand() % 16 );
return image->completenessEstimate;
}
@@ -1744,3 +1782,21 @@ static void* closeUnusedFds(void* nix UNUSED)
}
return NULL;
}
+
+static void allocCacheMap(dnbd3_image_t *image, bool complete)
+{
+ const uint8_t val = complete ? 0xff : 0;
+ const int byteSize = IMGSIZE_TO_MAPBYTES( image->virtualFilesize );
+ dnbd3_cache_map_t *cache = malloc( sizeof(*cache) + byteSize );
+ ref_init( &cache->reference, cmfree, 0 );
+ memset( cache->map, val, byteSize );
+ mutex_lock( &image->lock );
+ if ( image->ref_cacheMap != NULL ) {
+ logadd( LOG_WARNING, "BUG: allocCacheMap called but there already is a cache map for %s:%d", image->name, (int)image->rid );
+ free( cache );
+ } else {
+ ref_setref( &image->ref_cacheMap, &cache->reference );
+ }
+ mutex_unlock( &image->lock );
+}
+
diff --git a/src/server/image.h b/src/server/image.h
index 4668eff..cd87f03 100644
--- a/src/server/image.h
+++ b/src/server/image.h
@@ -9,7 +9,7 @@ void image_serverStartup();
bool image_isComplete(dnbd3_image_t *image);
-bool image_isHashBlockComplete(const uint8_t * const cacheMap, const uint64_t block, const uint64_t fileSize);
+bool image_isHashBlockComplete(atomic_uint_least8_t * const cacheMap, const uint64_t block, const uint64_t fileSize);
void image_updateCachemap(dnbd3_image_t *image, uint64_t start, uint64_t end, const bool set);
diff --git a/src/server/integrity.c b/src/server/integrity.c
index 1fcb558..a9fbae6 100644
--- a/src/server/integrity.c
+++ b/src/server/integrity.c
@@ -181,10 +181,12 @@ static void* integrity_main(void * data UNUSED)
const uint64_t end = MIN( (uint64_t)(blocks[0] + 1) * HASH_BLOCK_SIZE, image->virtualFilesize );
bool complete = true;
if ( qCount == CHECK_ALL ) {
- // When checking full image, skip incomplete blocks, otherwise assume block is complete
- mutex_lock( &image->lock );
- complete = image_isHashBlockComplete( image->cache_map, blocks[0], fileSize );
- mutex_unlock( &image->lock );
+ dnbd3_cache_map_t *cache = ref_get_cachemap( image );
+ if ( cache != NULL ) {
+ // When checking full image, skip incomplete blocks, otherwise assume block is complete
+ complete = image_isHashBlockComplete( cache->map, blocks[0], fileSize );
+ ref_put( &cache->reference );
+ }
}
#if defined(linux) || defined(__linux)
while ( sync_file_range( fd, start, end - start, SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE | SYNC_FILE_RANGE_WAIT_AFTER ) == -1 )
diff --git a/src/server/net.c b/src/server/net.c
index 9c855e4..12bcdad 100644
--- a/src/server/net.c
+++ b/src/server/net.c
@@ -246,7 +246,7 @@ void* net_handleNewConnection(void *clientPtr)
// We're a proxy, client is another proxy, we don't do BGR, but connecting proxy does...
// Reject, as this would basically force this proxy to do BGR too.
image = image_get( image_name, rid, true );
- if ( image != NULL && image->cache_map != NULL ) {
+ if ( image != NULL && image->ref_cacheMap != NULL ) {
// Only exception is if the image is complete locally
image = image_release( image );
}
@@ -268,7 +268,7 @@ void* net_handleNewConnection(void *clientPtr)
} else {
// Image is fine so far, but occasionally drop a client if the uplink for the image is clogged or unavailable
bOk = true;
- if ( image->cache_map != NULL ) {
+ if ( image->ref_cacheMap != NULL ) {
dnbd3_uplink_t *uplink = ref_get_uplink( &image->uplinkref );
if ( uplink == NULL || uplink->cacheFd == -1 || uplink->queueLen > SERVER_UPLINK_QUEUELEN_THRES ) {
bOk = ( rand() % 4 ) == 1;
@@ -338,57 +338,52 @@ void* net_handleNewConnection(void *clientPtr)
break;
}
- if ( request.size != 0 && image->cache_map != NULL ) {
+ dnbd3_cache_map_t *cache;
+ if ( request.size != 0 && ( cache = ref_get_cachemap( image ) ) != NULL ) {
// This is a proxyed image, check if we need to relay the request...
start = offset & ~(uint64_t)(DNBD3_BLOCK_SIZE - 1);
end = (offset + request.size + DNBD3_BLOCK_SIZE - 1) & ~(uint64_t)(DNBD3_BLOCK_SIZE - 1);
bool isCached = true;
- mutex_lock( &image->lock );
- // Check again as we only aquired the lock just now
- if ( image->cache_map != NULL ) {
- const uint64_t firstByteInMap = start >> 15;
- const uint64_t lastByteInMap = (end - 1) >> 15;
- uint64_t pos;
- // Middle - quick checking
- if ( isCached ) {
- pos = firstByteInMap + 1;
- while ( pos < lastByteInMap ) {
- if ( image->cache_map[pos] != 0xff ) {
- isCached = false;
- break;
- }
- ++pos;
+ const uint64_t firstByteInMap = start >> 15;
+ const uint64_t lastByteInMap = (end - 1) >> 15;
+ uint64_t pos;
+ uint8_t b;
+ atomic_thread_fence( memory_order_acquire );
+ // Middle - quick checking
+ if ( isCached ) {
+ for ( pos = firstByteInMap + 1; pos < lastByteInMap; ++pos ) {
+ if ( atomic_load_explicit( &cache->map[pos], memory_order_relaxed ) != 0xff ) {
+ isCached = false;
+ break;
}
}
- // First byte
- if ( isCached ) {
- pos = start;
- do {
- const int map_x = (pos >> 12) & 7; // mod 8
- const uint8_t bit_mask = (uint8_t)( 1 << map_x );
- if ( (image->cache_map[firstByteInMap] & bit_mask) == 0 ) {
- isCached = false;
- break;
- }
- pos += DNBD3_BLOCK_SIZE;
- } while ( firstByteInMap == (pos >> 15) && pos < end );
+ }
+ // First byte
+ if ( isCached ) {
+ b = atomic_load_explicit( &cache->map[firstByteInMap], memory_order_relaxed );
+ for ( pos = start; firstByteInMap == (pos >> 15) && pos < end; pos += DNBD3_BLOCK_SIZE ) {
+ const int map_x = (pos >> 12) & 7; // mod 8
+ const uint8_t bit_mask = (uint8_t)( 1 << map_x );
+ if ( (b & bit_mask) == 0 ) {
+ isCached = false;
+ break;
+ }
}
- // Last byte - only check if request spans multiple bytes in cache map
- if ( isCached && firstByteInMap != lastByteInMap ) {
- pos = lastByteInMap << 15;
- while ( pos < end ) {
- assert( lastByteInMap == (pos >> 15) );
- const int map_x = (pos >> 12) & 7; // mod 8
- const uint8_t bit_mask = (uint8_t)( 1 << map_x );
- if ( (image->cache_map[lastByteInMap] & bit_mask) == 0 ) {
- isCached = false;
- break;
- }
- pos += DNBD3_BLOCK_SIZE;
+ }
+ // Last byte - only check if request spans multiple bytes in cache map
+ if ( isCached && firstByteInMap != lastByteInMap ) {
+ b = atomic_load_explicit( &cache->map[lastByteInMap], memory_order_relaxed );
+ for ( pos = lastByteInMap << 15; pos < end; pos += DNBD3_BLOCK_SIZE ) {
+ assert( lastByteInMap == (pos >> 15) );
+ const int map_x = (pos >> 12) & 7; // mod 8
+ const uint8_t bit_mask = (uint8_t)( 1 << map_x );
+ if ( (b & bit_mask) == 0 ) {
+ isCached = false;
+ break;
}
}
}
- mutex_unlock( &image->lock );
+ ref_put( &cache->reference );
if ( !isCached ) {
if ( !uplink_request( client, request.handle, offset, request.size, request.hops ) ) {
logadd( LOG_DEBUG1, "Could not relay uncached request from %s to upstream proxy, disabling image %s:%d",
diff --git a/src/server/reference.h b/src/server/reference.h
index 8883eb1..2a80955 100644
--- a/src/server/reference.h
+++ b/src/server/reference.h
@@ -51,4 +51,9 @@ static inline void ref_put( ref *ref )
ref == NULL ? NULL : container_of(ref, dnbd3_uplink_t, reference); \
})
+#define ref_get_cachemap(image) ({ \
+ ref* ref = ref_get( &(image)->ref_cacheMap ); \
+ ref == NULL ? NULL : container_of(ref, dnbd3_cache_map_t, reference); \
+})
+
#endif
diff --git a/src/server/uplink.c b/src/server/uplink.c
index d77be9c..0a6bd11 100644
--- a/src/server/uplink.c
+++ b/src/server/uplink.c
@@ -91,7 +91,7 @@ bool uplink_init(dnbd3_image_t *image, int sock, dnbd3_host_t *host, int version
ref_put( &uplink->reference );
return true; // There's already an uplink, so should we consider this success or failure?
}
- if ( image->cache_map == NULL ) {
+ if ( image->ref_cacheMap == NULL ) {
logadd( LOG_WARNING, "Uplink was requested for image %s, but it is already complete", image->name );
goto failure;
}
@@ -170,7 +170,7 @@ bool uplink_shutdown(dnbd3_image_t *image)
mutex_unlock( &uplink->queueLock );
bool retval = ( exp && image->users == 0 );
mutex_unlock( &image->lock );
- return exp;
+ return retval;
}
/**
@@ -214,7 +214,7 @@ static void uplink_free(ref *ref)
dnbd3_image_t *image = image_lock( uplink->image );
if ( image != NULL ) {
// != NULL means image is still in list...
- if ( !_shutdown && image->cache_map != NULL ) {
+ if ( !_shutdown && image->ref_cacheMap != NULL ) {
// Ingegrity checker must have found something in the meantime
uplink_init( image, -1, NULL, 0 );
}
@@ -707,13 +707,14 @@ static void uplink_sendReplicationRequest(dnbd3_uplink_t *uplink)
if ( uplink == NULL || uplink->current.fd == -1 ) return;
if ( _backgroundReplication == BGR_DISABLED || uplink->cacheFd == -1 ) return; // Don't do background replication
if ( uplink->nextReplicationIndex == -1 || uplink->replicationHandle != REP_NONE )
- return;
+ return; // Already a replication request on the wire, or no more blocks to replicate
dnbd3_image_t * const image = uplink->image;
if ( image->virtualFilesize < DNBD3_BLOCK_SIZE ) return;
- mutex_lock( &image->lock );
- if ( image == NULL || image->cache_map == NULL || image->users < _bgrMinClients ) {
- // No cache map (=image complete), or replication pending, or not enough users, do nothing
- mutex_unlock( &image->lock );
+ if ( image->users < _bgrMinClients ) return; // Not enough active users
+ dnbd3_cache_map_t *cache = ref_get_cachemap( image );
+ if ( cache == NULL || image->users < _bgrMinClients ) {
+ // No cache map (=image complete)
+ ref_put( &cache->reference );
return;
}
const int mapBytes = IMGSIZE_TO_MAPBYTES( image->virtualFilesize );
@@ -727,16 +728,18 @@ static void uplink_sendReplicationRequest(dnbd3_uplink_t *uplink)
endByte = mapBytes;
}
}
+ atomic_thread_fence( memory_order_acquire );
int replicationIndex = -1;
for ( int j = uplink->nextReplicationIndex; j < endByte; ++j ) {
const int i = j % ( mapBytes ); // Wrap around for BGR_FULL
- if ( image->cache_map[i] != 0xff && ( i != lastBlockIndex || !uplink->replicatedLastBlock ) ) {
+ if ( atomic_load_explicit( &cache->map[i], memory_order_relaxed ) != 0xff
+ && ( i != lastBlockIndex || !uplink->replicatedLastBlock ) ) {
// Found incomplete one
replicationIndex = i;
break;
}
}
- mutex_unlock( &image->lock );
+ ref_put( &cache->reference );
if ( replicationIndex == -1 && _backgroundReplication == BGR_HASHBLOCK ) {
// Nothing left in current block, find next one
replicationIndex = uplink_findNextIncompleteHashBlock( uplink, endByte );
@@ -768,23 +771,24 @@ static void uplink_sendReplicationRequest(dnbd3_uplink_t *uplink)
}
/**
- * find next index into cache_map that corresponds to the beginning
+ * find next index into cache map that corresponds to the beginning
* of a hash block which is neither completely empty nor completely
* replicated yet. Returns -1 if no match.
*/
static int uplink_findNextIncompleteHashBlock(dnbd3_uplink_t *uplink, const int startMapIndex)
{
int retval = -1;
- mutex_lock( &uplink->image->lock );
- const int mapBytes = IMGSIZE_TO_MAPBYTES( uplink->image->virtualFilesize );
- const uint8_t *cache_map = uplink->image->cache_map;
- if ( cache_map != NULL ) {
- int j;
+ dnbd3_cache_map_t *cache = ref_get_cachemap( uplink->image );
+ if ( cache != NULL ) {
+ const int mapBytes = IMGSIZE_TO_MAPBYTES( uplink->image->virtualFilesize );
const int start = ( startMapIndex & MAP_INDEX_HASH_START_MASK );
+ atomic_thread_fence( memory_order_acquire );
+ int j;
for (j = 0; j < mapBytes; ++j) {
const int i = ( start + j ) % mapBytes;
- const bool isFull = cache_map[i] == 0xff || ( i + 1 == mapBytes && uplink->replicatedLastBlock );
- const bool isEmpty = cache_map[i] == 0;
+ const uint8_t b = atomic_load_explicit( &cache->map[i], memory_order_relaxed );
+ const bool isFull = b == 0xff || ( i + 1 == mapBytes && uplink->replicatedLastBlock );
+ const bool isEmpty = b == 0;
if ( !isEmpty && !isFull ) {
// Neither full nor empty, replicate
if ( retval == -1 ) {
@@ -811,7 +815,7 @@ static int uplink_findNextIncompleteHashBlock(dnbd3_uplink_t *uplink, const int
retval = -1;
}
}
- mutex_unlock( &uplink->image->lock );
+ ref_put( &cache->reference );
return retval;
}
@@ -1107,7 +1111,7 @@ static bool uplink_saveCacheMap(dnbd3_uplink_t *uplink)
if ( fsync( uplink->cacheFd ) == -1 ) {
// A failing fsync means we have no guarantee that any data
// since the last fsync (or open if none) has been saved. Apart
- // from keeping the cache_map from the last successful fsync
+ // from keeping the cache map from the last successful fsync
// around and restoring it there isn't much we can do to recover
// a consistent state. Bail out.
logadd( LOG_ERROR, "fsync() on image file %s failed with errno %d", image->path, errno );
@@ -1116,21 +1120,13 @@ static bool uplink_saveCacheMap(dnbd3_uplink_t *uplink)
}
}
- if ( image->cache_map == NULL ) return true;
- logadd( LOG_DEBUG2, "Saving cache map of %s:%d", image->name, (int)image->rid );
- mutex_lock( &image->lock );
- // Lock and get a copy of the cache map, as it could be freed by another thread that is just about to
- // figure out that this image's cache copy is complete
- if ( image->cache_map == NULL || image->virtualFilesize < DNBD3_BLOCK_SIZE ) {
- mutex_unlock( &image->lock );
+ dnbd3_cache_map_t *cache = ref_get_cachemap( image );
+ if ( cache == NULL )
return true;
- }
+ logadd( LOG_DEBUG2, "Saving cache map of %s:%d", image->name, (int)image->rid );
const size_t size = IMGSIZE_TO_MAPBYTES(image->virtualFilesize);
- uint8_t *map = malloc( size );
- memcpy( map, image->cache_map, size );
// Unlock. Use path and cacheFd without locking. path should never change after initialization of the image,
// cacheFd is owned by the uplink thread and we don't want to hold a spinlock during I/O
- mutex_unlock( &image->lock );
assert( image->path != NULL );
char mapfile[strlen( image->path ) + 4 + 1];
strcpy( mapfile, image->path );
@@ -1139,14 +1135,14 @@ static bool uplink_saveCacheMap(dnbd3_uplink_t *uplink)
int fd = open( mapfile, O_WRONLY | O_CREAT, 0644 );
if ( fd == -1 ) {
const int err = errno;
- free( map );
+ ref_put( &cache->reference );
logadd( LOG_WARNING, "Could not open file to write cache map to disk (errno=%d) file %s", err, mapfile );
return false;
}
size_t done = 0;
while ( done < size ) {
- const ssize_t ret = write( fd, map, size - done );
+ const ssize_t ret = write( fd, cache->map + done, size - done );
if ( ret == -1 ) {
if ( errno == EINTR ) continue;
logadd( LOG_WARNING, "Could not write cache map (errno=%d) file %s", errno, mapfile );
@@ -1158,11 +1154,11 @@ static bool uplink_saveCacheMap(dnbd3_uplink_t *uplink)
}
done += (size_t)ret;
}
+ ref_put( &cache->reference );
if ( fsync( fd ) == -1 ) {
logadd( LOG_WARNING, "fsync() on image map %s failed with errno %d", mapfile, errno );
}
close( fd );
- free( map );
return true;
}