From 1460b746b6f5482ce1c56a30af232e824e316e56 Mon Sep 17 00:00:00 2001 From: Simon Rettberg Date: Thu, 17 Dec 2015 15:58:06 +0100 Subject: [SERVER] Performance: Optimized some functions (gprof) A run with gprof revealed that background replication is a huge CPU hog. The block selection was very slow and has been improved a lot. Minor improvements were made to other functions that scan the cache map of an image and are thus relatively slow. --- CMakeLists.txt | 2 +- src/server/globals.h | 15 +++++++++------ src/server/image.c | 25 ++++++++++++++++++------- src/server/image.h | 2 +- src/server/uplink.c | 28 +++++++++++++++++----------- 5 files changed, 46 insertions(+), 26 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 628e36e..cf85392 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,7 +16,7 @@ if(CMAKE_C_COMPILER MATCHES "clang") SET(CMAKE_C_FLAGS_RELEASE "-std=c99 -O2 -Wno-unused-result -D_GNU_SOURCE -DNDEBUG -Wno-multichar -fno-strict-aliasing") elseif (CMAKE_C_COMPILER MATCHES "(cc-)|(cc$)") message( "Using (g)cc flags." ) - SET(CMAKE_C_FLAGS_DEBUG "-std=c99 -O0 -pg -g -Wall -Wextra -Wpedantic -D_GNU_SOURCE -D_DEBUG -Wno-multichar -fno-strict-aliasing") + SET(CMAKE_C_FLAGS_DEBUG "-std=c99 -O0 -g -Wall -Wextra -Wpedantic -D_GNU_SOURCE -D_DEBUG -Wno-multichar -fno-strict-aliasing") SET(CMAKE_C_FLAGS_RELEASE "-std=c99 -O2 -Wno-unused-result -D_GNU_SOURCE -DNDEBUG -Wno-multichar -fno-strict-aliasing") else() message( FATAL_ERROR "Could not determine compiler type." ) diff --git a/src/server/globals.h b/src/server/globals.h index a06e0e0..9cf349a 100644 --- a/src/server/globals.h +++ b/src/server/globals.h @@ -55,7 +55,8 @@ struct _dnbd3_connection uint8_t *recvBuffer; // Buffer for receiving payload uint32_t recvBufferLen; // Len of ^^ volatile bool shutdown; // signal this thread to stop, must only be set from uplink_shutdown() or cleanup in uplink_mainloop() - int replicatedLastBlock; // bool telling if the last block has been replicated yet + bool replicatedLastBlock; // bool telling if the last block has been replicated yet + int nextReplicationIndex; // Which index in the cache map we should start looking for incomplete blocks at uint64_t replicationHandle; // Handle of pending replication request uint64_t bytesReceived; // Number of bytes received by the connection. }; @@ -99,19 +100,21 @@ struct _dnbd3_image { char *path; // absolute path of the image char *lower_name; // relative path, all lowercase, minus revision ID - uint8_t *cache_map; // cache map telling which parts are locally cached, NULL if complete - uint32_t *crc32; // list of crc32 checksums for each 16MiB block in image - uint32_t masterCrc32; // CRC-32 of the crc-32 list dnbd3_connection_t *uplink; // pointer to a server connection + uint8_t *cache_map; // cache map telling which parts are locally cached, NULL if complete uint64_t virtualFilesize; // virtual size of image (real size rounded up to multiple of 4k) uint64_t realFilesize; // actual file size on disk + time_t atime; // last access time + time_t lastWorkCheck; // last time a non-working image has been checked + time_t nextCompletenessEstimate; // last time the completeness estimate was updated + uint32_t *crc32; // list of crc32 checksums for each 16MiB block in image + uint32_t masterCrc32; // CRC-32 of the crc-32 list int readFd; // used to read the image. Used from multiple threads, so use atomic operations (pread et al) int cacheFd; // used to write to the image, in case it is relayed. ONLY USE FROM UPLINK THREAD! int rid; // revision of image + int completenessEstimate; // Completeness estimate in percent int users; // clients currently using this image int id; // Unique ID of this image. Only unique in the context of this running instance of DNBD3-Server - time_t atime; // last access time - time_t lastWorkCheck; // last time a non-working image has been checked bool working; // true if image exists and completeness is == 100% or a working upstream proxy is connected pthread_spinlock_t lock; }; diff --git a/src/server/image.c b/src/server/image.c index 6d486c4..da19b6a 100644 --- a/src/server/image.c +++ b/src/server/image.c @@ -50,7 +50,7 @@ static imagecache remoteCloneCache[CACHELEN]; static void image_remove(dnbd3_image_t *image); static dnbd3_image_t* image_free(dnbd3_image_t *image); -static bool image_isHashBlockComplete(uint8_t * const cacheMap, const uint64_t block, const uint64_t fileSize); +static bool image_isHashBlockComplete(const uint8_t * const cacheMap, const uint64_t block, const uint64_t fileSize); static bool image_load_all_internal(char *base, char *path); static bool image_load(char *base, char *path, int withUplink); static bool image_clone(int sock, char *name, uint16_t revision, uint64_t imageSize); @@ -551,17 +551,21 @@ static dnbd3_image_t* image_free(dnbd3_image_t *image) return NULL ; } -static bool image_isHashBlockComplete(uint8_t * const cacheMap, const uint64_t block, const uint64_t realFilesize) +static bool image_isHashBlockComplete(const uint8_t * const cacheMap, const uint64_t block, const uint64_t realFilesize) { if ( cacheMap == NULL ) return true; const uint64_t end = (block + 1) * HASH_BLOCK_SIZE; if ( end <= realFilesize ) { - for (uint64_t mapPos = block * HASH_BLOCK_SIZE; mapPos < end; mapPos += (DNBD3_BLOCK_SIZE * 8)) { - if ( cacheMap[mapPos / (DNBD3_BLOCK_SIZE * 8)] != 0xff ) { + // Trivial case: block in question is not the last block (well, or image size is multiple of HASH_BLOCK_SIZE) + const int startCacheIndex = (int)( ( block * HASH_BLOCK_SIZE ) / ( DNBD3_BLOCK_SIZE * 8 ) ); + const int endCacheIndex = startCacheIndex + ( HASH_BLOCK_SIZE / ( DNBD3_BLOCK_SIZE * 8 ) ); + for ( int i = startCacheIndex; i < endCacheIndex; ++i ) { + if ( cacheMap[i] != 0xff ) { return false; } } } else { + // Special case: Checking last block, which is smaller than HASH_BLOCK_SIZE for (uint64_t mapPos = block * HASH_BLOCK_SIZE; mapPos < realFilesize; mapPos += DNBD3_BLOCK_SIZE ) { const int map_y = mapPos >> 15; const int map_x = (mapPos >> 12) & 7; // mod 8 @@ -1409,10 +1413,15 @@ json_t* image_getListAsJson() * Returns: 0-100 * DOES NOT LOCK, so make sure to do so before calling */ -int image_getCompletenessEstimate(const dnbd3_image_t * const image) +int image_getCompletenessEstimate(dnbd3_image_t * const image) { assert( image != NULL ); if ( image->cache_map == NULL ) return image->working ? 100 : 0; + const time_t now = time( NULL ); + if ( now < image->nextCompletenessEstimate ) { + // Since this operation is relatively expensive, we cache the result for a while + return image->completenessEstimate; + } int i; int percent = 0; const int len = IMGSIZE_TO_MAPBYTES( image->virtualFilesize ); @@ -1420,11 +1429,13 @@ int image_getCompletenessEstimate(const dnbd3_image_t * const image) for ( i = 0; i < len; ++i ) { if ( image->cache_map[i] == 0xff ) { percent += 100; - } else if ( image->cache_map[i] > 0 ) { + } else if ( image->cache_map[i] != 0 ) { percent += 50; } } - return percent / len; + image->completenessEstimate = percent / len; + image->nextCompletenessEstimate = now + 10 + rand() % 30; + return image->completenessEstimate; } /** diff --git a/src/server/image.h b/src/server/image.h index 25fde37..859507d 100644 --- a/src/server/image.h +++ b/src/server/image.h @@ -43,7 +43,7 @@ bool image_generateCrcFile(char *image); json_t* image_getListAsJson(); -int image_getCompletenessEstimate(const dnbd3_image_t * const image); +int image_getCompletenessEstimate(dnbd3_image_t * const image); // one byte in the map covers 8 4kib blocks, so 32kib per byte // "+ (1 << 15) - 1" is required to account for the last bit of diff --git a/src/server/uplink.c b/src/server/uplink.c index 8be8e5a..726b08b 100644 --- a/src/server/uplink.c +++ b/src/server/uplink.c @@ -285,6 +285,7 @@ static void* uplink_mainloop(void *data) link->currentServer = link->betterServer; link->replicationHandle = 0; link->image->working = true; + link->replicatedLastBlock = false; // Reset this to be safe - request could've been sent but reply was never received buffer[0] = '@'; if ( host_to_string( &link->currentServer, buffer + 1, sizeof(buffer) - 1 ) ) { logadd( LOG_DEBUG1, "(Uplink %s) Now connected to %s\n", link->image->lower_name, buffer + 1 ); @@ -500,7 +501,8 @@ static void uplink_sendReplicationRequest(dnbd3_connection_t *link) const int len = IMGSIZE_TO_MAPBYTES( image->realFilesize ) - 1; // Needs to be 8 (bit->byte, bitmap) const uint32_t requestBlockSize = DNBD3_BLOCK_SIZE * 8; - for (int i = 0; i <= len; ++i) { + for ( int j = 0; j <= len; ++j ) { + const int i = ( j + link->nextReplicationIndex ) % ( len + 1 ); if ( image->cache_map == NULL || link->fd == -1 ) break; if ( image->cache_map[i] == 0xff || (i == len && link->replicatedLastBlock) ) continue; link->replicationHandle = 1; // Prevent race condition @@ -512,6 +514,7 @@ static void uplink_sendReplicationRequest(dnbd3_connection_t *link) logadd( LOG_DEBUG1, "Error sending background replication request to uplink server!\n" ); return; } + link->nextReplicationIndex = i + 1; // Remember last incomplete offset for next time so we don't play Schlemiel the painter if ( i == len ) link->replicatedLastBlock = true; // Special treatment, last byte in map could represent less than 8 blocks return; // Request was sent, bail out, nothing is locked } @@ -552,14 +555,9 @@ static void uplink_handleReceive(dnbd3_connection_t *link) link->recvBufferLen = MIN(9000000, inReply.size + 65536); // XXX dont miss occurrence link->recvBuffer = realloc( link->recvBuffer, link->recvBufferLen ); } - uint32_t done = 0; - while ( done < inReply.size ) { - ret = recv( link->fd, link->recvBuffer + done, inReply.size - done, MSG_NOSIGNAL ); - if ( ret <= 0 ) { - logadd( LOG_INFO, "Lost connection to uplink server of %s (payload)", link->image->path ); - goto error_cleanup; - } - done += ret; + if ( (uint32_t)sock_recv( link->fd, link->recvBuffer, inReply.size ) != inReply.size ) { + logadd( LOG_INFO, "Lost connection to uplink server of %s (payload)", link->image->path ); + goto error_cleanup; } // Payload read completely // Bail out if we're not interested @@ -571,9 +569,17 @@ static void uplink_handleReceive(dnbd3_connection_t *link) link->bytesReceived += inReply.size; // 1) Write to cache file if ( link->image->cacheFd != -1 ) { - ret = (int)pwrite( link->image->cacheFd, link->recvBuffer, inReply.size, start ); - if ( ret > 0 ) image_updateCachemap( link->image, start, start + ret, true ); + uint32_t done = 0; + while ( done < inReply.size ) { + ret = (int)pwrite( link->image->cacheFd, link->recvBuffer + done, inReply.size - done, start + done ); + if ( ret == -1 && errno == EINTR ) continue; + if ( ret <= 0 ) break; + done += (uint32_t)ret; + } + if ( done > 0 ) image_updateCachemap( link->image, start, start + done, true ); if ( ret == -1 && ( errno == EBADF || errno == EINVAL || errno == EIO ) ) { + logadd( LOG_WARNING, "Error writing received data for %s:%d; disabling caching.", + link->image->lower_name, (int)link->image->rid ); const int fd = link->image->cacheFd; link->image->cacheFd = -1; close( fd ); -- cgit v1.2.3-55-g7522