summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/server/image.c16
-rw-r--r--src/server/integrity.c81
2 files changed, 67 insertions, 30 deletions
diff --git a/src/server/image.c b/src/server/image.c
index 673a269..9d36317 100644
--- a/src/server/image.c
+++ b/src/server/image.c
@@ -72,6 +72,8 @@ void image_updateCachemap(dnbd3_image_t *image, uint64_t start, uint64_t end, co
assert( image != NULL );
// This should always be block borders due to how the protocol works, but better be safe
// than accidentally mark blocks as cached when they really aren't entirely cached.
+ assert( end <= image->virtualFilesize );
+ assert( start <= end );
if ( set ) {
// If we set as cached, move "inwards" in case we're not at 4k border
end &= ~(uint64_t)(DNBD3_BLOCK_SIZE - 1);
@@ -81,6 +83,8 @@ void image_updateCachemap(dnbd3_image_t *image, uint64_t start, uint64_t end, co
start &= ~(uint64_t)(DNBD3_BLOCK_SIZE - 1);
end = (uint64_t)(end + DNBD3_BLOCK_SIZE - 1) & ~(uint64_t)(DNBD3_BLOCK_SIZE - 1);
}
+ if ( start >= end )
+ return;
bool setNewBlocks = false;
uint64_t pos = start;
spin_lock( &image->lock );
@@ -1601,7 +1605,10 @@ bool image_checkBlocksCrc32(const int fd, uint32_t *crc32list, const int *blocks
*/
static bool image_calcBlockCrc32(const int fd, const size_t block, const uint64_t realFilesize, uint32_t *crc)
{
- char buffer[262144];
+ // Make buffer 4k aligned in case fd has O_DIRECT set
+#define BSIZE 262144
+ char rawBuffer[BSIZE + DNBD3_BLOCK_SIZE];
+ char * const buffer = (char*)( ( (uintptr_t)rawBuffer + ( DNBD3_BLOCK_SIZE - 1 ) ) & ~( DNBD3_BLOCK_SIZE - 1 ) );
// How many bytes to read from the input file
const uint64_t bytesFromFile = MIN( HASH_BLOCK_SIZE, realFilesize - ( block * HASH_BLOCK_SIZE) );
// Determine how many bytes we had to read if the file size were a multiple of 4k
@@ -1614,7 +1621,7 @@ static bool image_calcBlockCrc32(const int fd, const size_t block, const uint64_
*crc = crc32( 0, NULL, 0 );
// Calculate the crc32 by reading data from the file
while ( bytes < bytesFromFile ) {
- const size_t n = (size_t)MIN( sizeof(buffer), bytesFromFile - bytes );
+ const size_t n = (size_t)MIN( BSIZE, bytesFromFile - bytes );
const ssize_t r = pread( fd, buffer, n, readPos + bytes );
if ( r <= 0 ) {
logadd( LOG_WARNING, "CRC: Read error (errno=%d)", errno );
@@ -1625,16 +1632,17 @@ static bool image_calcBlockCrc32(const int fd, const size_t block, const uint64_
}
// If the virtual file size is different, keep going using nullbytes
if ( bytesFromFile < virtualBytesFromFile ) {
- memset( buffer, 0, sizeof(buffer) );
+ memset( buffer, 0, BSIZE );
bytes = (size_t)( virtualBytesFromFile - bytesFromFile );
while ( bytes != 0 ) {
- const size_t len = MIN( sizeof(buffer), bytes );
+ const size_t len = MIN( BSIZE, bytes );
*crc = crc32( *crc, (uint8_t*)buffer, len );
bytes -= len;
}
}
*crc = net_order_32( *crc );
return true;
+#undef BSIZE
}
/**
diff --git a/src/server/integrity.c b/src/server/integrity.c
index c9ac798..4d01ba7 100644
--- a/src/server/integrity.c
+++ b/src/server/integrity.c
@@ -8,6 +8,10 @@
#include <assert.h>
#include <sys/syscall.h>
#include <sys/resource.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <fcntl.h>
#define CHECK_QUEUE_SIZE 500
@@ -128,14 +132,6 @@ static void* integrity_main(void * data UNUSED)
}
if ( image == NULL ) continue;
// We have the image. Call image_release() some time
- // Make sure the image is open for reading (closeUnusedFd)
- if ( !image_ensureOpen( image ) ) {
- // TODO: Open new fd for file with O_DIRECT in case we do a full scan,
- // so we don't thrash the whole fs cache
- logadd( LOG_MINOR, "Cannot hash check block %d of %s -- no readFd", checkQueue[i].block, image->path );
- image_release( image );
- continue;
- }
bool full = checkQueue[i].full;
bool foundCorrupted = false;
spin_lock( &image->lock );
@@ -153,27 +149,60 @@ static void* integrity_main(void * data UNUSED)
}
memcpy( buffer, image->crc32, required );
spin_unlock( &image->lock );
+ // Open for direct I/O if possible; this prevents polluting the fs cache
+ int fd = open( image->path, O_RDONLY | O_DIRECT );
+ bool direct = fd != -1;
+ if ( unlikely( !direct ) ) {
+ // Try unbuffered; flush to disk for that
+ logadd( LOG_DEBUG1, "O_DIRECT failed for %s", image->path );
+ image_ensureOpen( image );
+ fd = image->readFd;
+ }
int checkCount = full ? 5 : 1;
- while ( blocks[0] < numHashBlocks ) {
- bool complete = true;
- if ( full ) {
- // When checking full image, skip incomplete blocks, otherwise assume block is complete
- spin_lock( &image->lock );
- complete = image_isHashBlockComplete( image->cache_map, blocks[0], fileSize );
- spin_unlock( &image->lock );
- }
- if ( complete && !image_checkBlocksCrc32( image->readFd, (uint32_t*)buffer, blocks, fileSize ) ) {
- logadd( LOG_WARNING, "Hash check for block %d of %s failed!", blocks[0], image->name );
- image_updateCachemap( image, blocks[0] * HASH_BLOCK_SIZE, (blocks[0] + 1) * HASH_BLOCK_SIZE, false );
- // If this is not a full check, queue one
- if ( !full ) {
- logadd( LOG_INFO, "Queueing full check for %s", image->name );
- integrity_check( image, -1 );
+ if ( fd != -1 ) {
+ while ( blocks[0] < numHashBlocks && !_shutdown ) {
+ const uint64_t start = blocks[0] * HASH_BLOCK_SIZE;
+ const uint64_t end = MIN( (uint64_t)(blocks[0] + 1) * HASH_BLOCK_SIZE, image->virtualFilesize );
+ bool complete = true;
+ if ( full ) {
+ // When checking full image, skip incomplete blocks, otherwise assume block is complete
+ spin_lock( &image->lock );
+ complete = image_isHashBlockComplete( image->cache_map, blocks[0], fileSize );
+ spin_unlock( &image->lock );
}
- foundCorrupted = true;
+ if ( fsync( fd ) == -1 ) {
+ logadd( LOG_ERROR, "Cannot flush %s for integrity check", image->path );
+ exit( 1 );
+ }
+ // Use direct I/O only if read length is multiple of 4096 to be on the safe side
+ int tfd;
+ if ( direct && ( end % DNBD3_BLOCK_SIZE ) == 0 ) {
+ // Suitable for direct io
+ tfd = fd;
+ } else if ( !image_ensureOpen( image ) ) {
+ logadd( LOG_WARNING, "Cannot open %s for reading", image->path );
+ break;
+ } else {
+ tfd = image->readFd;
+ // Evict from cache so we have to re-read, making sure data was properly stored
+ posix_fadvise( fd, start, end - start, POSIX_FADV_DONTNEED );
+ }
+ if ( complete && !image_checkBlocksCrc32( tfd, (uint32_t*)buffer, blocks, fileSize ) ) {
+ logadd( LOG_WARNING, "Hash check for block %d of %s failed!", blocks[0], image->name );
+ image_updateCachemap( image, start, end, false );
+ // If this is not a full check, queue one
+ if ( !full ) {
+ logadd( LOG_INFO, "Queueing full check for %s", image->name );
+ integrity_check( image, -1 );
+ }
+ foundCorrupted = true;
+ }
+ if ( complete && --checkCount == 0 ) break;
+ blocks[0]++;
+ }
+ if ( direct ) {
+ close( fd );
}
- if ( complete && --checkCount == 0 ) break;
- blocks[0]++;
}
pthread_mutex_lock( &integrityQueueLock );
if ( full ) {