summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSimon Rettberg2013-11-12 16:39:04 +0100
committerSimon Rettberg2013-11-12 16:39:04 +0100
commit080d2c7799e7d2e3a1378bc94c7d530542a75539 (patch)
tree4f1f8ce5c697f961dbc53a27669a57297f5e8ea1
parent[SERVER] Fix embarrassing deadlock when deleting old images (diff)
parenttiny fix (diff)
downloaddnbd3-080d2c7799e7d2e3a1378bc94c7d530542a75539.tar.gz
dnbd3-080d2c7799e7d2e3a1378bc94c7d530542a75539.tar.xz
dnbd3-080d2c7799e7d2e3a1378bc94c7d530542a75539.zip
Merge branch 'simon' of dnbd3:dnbd3 into simon
Conflicts: src/server/altservers.c
-rw-r--r--CMakeLists.txt9
-rw-r--r--LOCKS6
-rw-r--r--client.conf.example29
-rw-r--r--server.conf.example9
-rw-r--r--src/client/client.c125
-rw-r--r--src/config.h17
-rw-r--r--src/kernel/blk.c13
-rw-r--r--src/kernel/dnbd3.h4
-rw-r--r--src/kernel/net.c151
-rw-r--r--src/server/altservers.c186
-rw-r--r--src/server/globals.c2
-rw-r--r--src/server/globals.h5
-rw-r--r--src/server/image.c14
-rw-r--r--src/server/net.c2
14 files changed, 310 insertions, 262 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 596fa43..03964a6 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -2,8 +2,8 @@
# GENERAL #
################################################################################
-PROJECT(dnbd3)
-CMAKE_MINIMUM_REQUIRED(VERSION 2.8.0)
+PROJECT(dnbd3 C)
+CMAKE_MINIMUM_REQUIRED(VERSION 2.6.2)
IF (CMAKE_BUILD_TYPE STREQUAL "")
SET(CMAKE_BUILD_TYPE Debug)
ENDIF()
@@ -12,10 +12,9 @@ message( "Build Type selected: ${CMAKE_BUILD_TYPE}" )
SET(CMAKE_C_FLAGS_DEBUG "-std=c99 -O0 -g -Wall -Wno-unused-result -D_GNU_SOURCE -D_DEBUG -Wno-multichar")
SET(CMAKE_C_FLAGS_RELEASE "-std=c99 -O2 -Wno-unused-result -D_GNU_SOURCE -DNDEBUG -Wno-multichar")
-SET(CMAKE_CXX_FLAGS_DEBUG "-std=c99 -O0 -g -Wall -Wno-unused-result -D_GNU_SOURCE -D_DEBUG")
-SET(CMAKE_CXX_FLAGS_RELEASE "-std=c99 -O2 -Wno-unused-result -D_GNU_SOURCE -DNDEBUG" )
+#SET(CMAKE_CXX_FLAGS_DEBUG "-std=c99 -O0 -g -Wall -Wno-unused-result -D_GNU_SOURCE -D_DEBUG")
+#SET(CMAKE_CXX_FLAGS_RELEASE "-std=c99 -O2 -Wno-unused-result -D_GNU_SOURCE -DNDEBUG" )
-ADD_DEFINITIONS(-DIPC_TCP)
ADD_DEFINITIONS(-D_FILE_OFFSET_BITS=64)
ADD_DEFINITIONS(-DWITH_IPV6)
diff --git a/LOCKS b/LOCKS
index 5de521a..a108f1d 100644
--- a/LOCKS
+++ b/LOCKS
@@ -17,10 +17,12 @@ remoteCloneLock
_images_lock
_images[].lock
uplink.queueLock
-_alts_lock
+pendingLockProduce
+pendingLockConsume
+altServersLock
client.sendMutex
-If you need to lock multiple clients at once,
+If you need to lock multiple clients/images/... at once,
lock the client with the lowest array index first.
If the program logic would require to aquire the
diff --git a/client.conf.example b/client.conf.example
deleted file mode 100644
index 264632f..0000000
--- a/client.conf.example
+++ /dev/null
@@ -1,29 +0,0 @@
-# This is a sample configuration file for dnbd3-client
-
-[Ubuntu 10.04]
-server=132.230.4.29
-vid=1
-rid=3
-device=/dev/dnbd0
-ahead=256
-
-[Ubuntu 10.10]
-server=132.230.4.29
-vid=2
-rid=1
-device=/dev/dnbd1
-ahead=256
-
-[Ubuntu 11.04]
-server=132.230.4.29
-vid=3
-rid=1
-device=/dev/dnbd2
-ahead=256
-
-[Ubuntu 11.10]
-server=132.230.4.29
-vid=4
-rid=1
-device=/dev/dnbd3
-ahead=256
diff --git a/server.conf.example b/server.conf.example
deleted file mode 100644
index 464a870..0000000
--- a/server.conf.example
+++ /dev/null
@@ -1,9 +0,0 @@
-# This is a sample configuration file for dnbd3-server
-
-[settings]
-default_namespace=uni-freiburg/rz/bunker
-
-[eclipse-cdt linux (tar.gz)]
-file=/home/sr/Downloads/eclipse-cpp-juno-linux-gtk-x86_64.tar.gz
-servers=127.0.0.20:1234;132.230.4.58;2000::dead:beef
-rid=3
diff --git a/src/client/client.c b/src/client/client.c
index ae6879e..c0d316b 100644
--- a/src/client/client.c
+++ b/src/client/client.c
@@ -42,8 +42,9 @@
#define DEV_LEN 15
#define MAX_DEVS 50
+
static int openDevices[MAX_DEVS];
-static const char *optString = "f:h:i:r:d:a:c:s:HV?";
+static const char *optString = "f:h:i:r:d:a:cs:HV?k";
static const struct option longOpts[] = {
{ "file", required_argument, NULL, 'f' },
{ "host", required_argument, NULL, 'h' },
@@ -51,8 +52,10 @@ static const struct option longOpts[] = {
{ "rid", required_argument, NULL, 'r' },
{ "device", required_argument, NULL, 'd' },
{ "ahead", required_argument, NULL, 'a' },
- { "close", required_argument, NULL, 'c' },
+ { "close", no_argument, NULL, 'c' },
{ "switch", required_argument, NULL, 's' },
+ { "add", required_argument, NULL, 'adds' },
+ { "remove", required_argument, NULL, 'rems' },
{ "help", no_argument, NULL, 'H' },
{ "version", no_argument, NULL, 'V' },
{ "daemon", no_argument, NULL, 'D' },
@@ -65,13 +68,43 @@ static const struct option longOpts[] = {
static int dnbd3_ioctl(const char *dev, const int command, dnbd3_ioctl_t * const msg);
static void dnbd3_client_daemon();
static void dnbd3_daemon_action(int client, int argc, char **argv);
-static int dnbd3_daemon_close(int uid, char *device);
+static int dnbd3_daemon_ioctl(int uid, char *device, int action, const char *actionName, char *host);
static char* dnbd3_daemon_open(int uid, char *host, char *image, int rid, int readAhead);
static int dnbd3_daemon_send(int argc, char **argv);
static void dnbd3_print_help(char *argv_0);
static void dnbd3_print_version();
/**
+ * Convert a host and port (network byte order) to printable representation.
+ * Worst case required buffer len is 48, eg. [1234:1234:1234:1234:1234:1234:1234:1234]:12345 (+ \0)
+ * Returns TRUE on success, FALSE on error
+ */
+static char host_to_string(const dnbd3_host_t *host, char *target, size_t targetlen)
+{
+ // Worst case: Port 5 chars, ':' to separate ip and port 1 char, terminating null 1 char = 7, [] for IPv6
+ if ( targetlen < 10 ) return FALSE;
+ if ( host->type == AF_INET6 ) {
+ *target++ = '[';
+ inet_ntop( AF_INET6, host->addr, target, targetlen - 10 );
+ target += strlen( target );
+ *target++ = ']';
+ } else if ( host->type == AF_INET ) {
+ inet_ntop( AF_INET, host->addr, target, targetlen - 8 );
+ target += strlen( target );
+ } else {
+ snprintf( target, targetlen, "<?addrtype=%d>", (int)host->type );
+ return FALSE;
+ }
+ *target = '\0';
+ if ( host->port != 0 ) {
+ // There are still at least 7 bytes left in the buffer, port is at most 5 bytes + ':' + '\0' = 7
+ snprintf( target, 7, ":%d", (int)ntohs( host->port ) );
+ }
+ return TRUE;
+}
+
+
+/**
* Parse IPv4 or IPv6 address in string representation to a suitable format usable by the BSD socket library
* @string eg. "1.2.3.4" or "2a01::10:5", optially with port appended, eg "1.2.3.4:6666" or "[2a01::10:5]:6666"
* @af will contain either AF_INET or AF_INET6
@@ -162,10 +195,9 @@ static int dnbd3_get_ip(char *hostname, dnbd3_host_t *host)
int main(int argc, char *argv[])
{
char *dev = NULL;
+ char host[50];
- int close_dev = 0;
- int switch_host = 0;
- int killSwitch = FALSE;
+ int action = -1;
dnbd3_ioctl_t msg;
memset( &msg, 0, sizeof(dnbd3_ioctl_t) );
@@ -189,6 +221,7 @@ int main(int argc, char *argv[])
if ( !dnbd3_get_ip( optarg, &msg.host ) ) exit( EXIT_FAILURE );
break;
case 'i':
+ action = IOCTL_OPEN;
msg.imgname = strdup( optarg );
break;
case 'r':
@@ -202,12 +235,19 @@ int main(int argc, char *argv[])
msg.read_ahead_kb = atoi( optarg );
break;
case 'c':
- dev = strdup( optarg );
- close_dev = 1;
+ action = IOCTL_CLOSE;
break;
case 's':
dnbd3_get_ip( optarg, &msg.host );
- switch_host = 1;
+ action = IOCTL_SWITCH;
+ break;
+ case 'adds':
+ dnbd3_get_ip( optarg, &msg.host );
+ action = IOCTL_ADD_SRV;
+ break;
+ case 'rems':
+ dnbd3_get_ip( optarg, &msg.host );
+ action = IOCTL_REM_SRV;
break;
case 'H':
dnbd3_print_help( argv[0] );
@@ -221,9 +261,6 @@ int main(int argc, char *argv[])
case 'D':
dnbd3_client_daemon();
break;
- case 'k':
- killSwitch = TRUE;
- break;
}
opt = getopt_long( argc, argv, optString, longOpts, &longIndex );
}
@@ -232,7 +269,7 @@ int main(int argc, char *argv[])
struct stat st;
if ( stat( SOCK_PATH, &st ) == 0 ) {
if ( dnbd3_daemon_send( argc, argv ) ) exit( 0 );
- printf( "Failed.\n" );
+ printf( "\nFailed.\n" );
exit( 1 );
}
@@ -245,8 +282,10 @@ int main(int argc, char *argv[])
setuid( getuid() );
}
+ host_to_string( &msg.host, host, 50 );
+
// close device
- if ( close_dev && msg.host.type == 0 && dev && (msg.imgname == NULL )) {
+ if ( action == IOCTL_CLOSE && msg.host.type == 0 && dev && (msg.imgname == NULL )) {
printf( "INFO: Closing device %s\n", dev );
if ( dnbd3_ioctl( dev, IOCTL_CLOSE, &msg ) ) exit( EXIT_SUCCESS );
printf( "Couldn't close device.\n" );
@@ -254,16 +293,18 @@ int main(int argc, char *argv[])
}
// switch host
- if ( switch_host && msg.host.type != 0 && dev && (msg.imgname == NULL )) {
- printf( "INFO: Switching device %s to %s\n", dev, "<fixme>" );
- if ( dnbd3_ioctl( dev, IOCTL_SWITCH, &msg ) ) exit( EXIT_SUCCESS );
- printf( "Switching server failed. Maybe the device is not connected?\n" );
+ if ( (action == IOCTL_SWITCH || action == IOCTL_ADD_SRV || action == IOCTL_REM_SRV) && msg.host.type != 0 && dev && (msg.imgname == NULL )) {
+ if ( action == IOCTL_SWITCH ) printf( "INFO: Switching device %s to %s\n", dev, host );
+ if ( action == IOCTL_ADD_SRV ) printf( "INFO: %s: adding %s\n", dev, host );
+ if ( action == IOCTL_REM_SRV ) printf( "INFO: %s: removing %s\n", dev, host );
+ if ( dnbd3_ioctl( dev, action, &msg ) ) exit( EXIT_SUCCESS );
+ printf( "Failed! Maybe the device is not connected?\n" );
exit( EXIT_FAILURE );
}
// connect
- if ( msg.host.type != 0 && dev && (msg.imgname != NULL )) {
- printf( "INFO: Connecting device %s to %s for image %s\n", dev, "<fixme>", msg.imgname );
+ if ( action == IOCTL_OPEN && msg.host.type != 0 && dev && (msg.imgname != NULL )) {
+ printf( "INFO: Connecting device %s to %s for image %s\n", dev, host, msg.imgname );
if ( dnbd3_ioctl( dev, IOCTL_OPEN, &msg ) ) exit( EXIT_SUCCESS );
printf( "ERROR: connecting device failed. Maybe it's already connected?\n" );
exit( EXIT_FAILURE );
@@ -372,17 +413,24 @@ static void dnbd3_daemon_action(int client, int argc, char **argv)
char *host = NULL, *image = NULL, *device = NULL;
int rid = 0, uid = 0, killMe = FALSE, ahead = 512;
int len;
+ int action = -1;
+ const char *actionName = NULL;
optind = 1;
opt = getopt_long( argc, argv, optString, longOpts, &longIndex );
while ( opt != -1 ) {
switch ( opt ) {
+ case 'd':
+ device = optarg;
+ break;
case 'h':
host = optarg;
break;
case 'i':
image = optarg;
+ action = IOCTL_OPEN;
+ actionName = "Open";
break;
case 'r':
rid = atoi( optarg );
@@ -391,7 +439,16 @@ static void dnbd3_daemon_action(int client, int argc, char **argv)
uid = atoi( optarg );
break;
case 'c':
- device = optarg;
+ action = IOCTL_CLOSE;
+ actionName = "Close";
+ break;
+ case 'adds':
+ action = IOCTL_ADD_SRV;
+ actionName = "Add Server";
+ break;
+ case 'rems':
+ action = IOCTL_REM_SRV;
+ actionName = "Remove Server";
break;
case 'a':
ahead = atoi( optarg );
@@ -415,8 +472,8 @@ static void dnbd3_daemon_action(int client, int argc, char **argv)
exit( 0 );
}
- if ( device != NULL ) {
- if ( dnbd3_daemon_close( uid, device ) ) {
+ if ( (action == IOCTL_CLOSE || ((action == IOCTL_ADD_SRV || action == IOCTL_REM_SRV) && host != NULL)) && device != NULL ) {
+ if ( dnbd3_daemon_ioctl( uid, device, action, actionName, host ) ) {
len = 0;
} else {
len = -1;
@@ -424,7 +481,7 @@ static void dnbd3_daemon_action(int client, int argc, char **argv)
send( client, &len, sizeof(len), 0 );
return;
}
- if ( host != NULL && image != NULL && rid >= 0 ) {
+ if ( action == IOCTL_OPEN && host != NULL && image != NULL && rid >= 0 ) {
device = dnbd3_daemon_open( uid, host, image, rid, ahead );
if ( device != NULL ) {
len = strlen( device );
@@ -439,7 +496,7 @@ static void dnbd3_daemon_action(int client, int argc, char **argv)
printf( "Received a client request I cannot understand.\n" );
}
-static int dnbd3_daemon_close(int uid, char *device)
+static int dnbd3_daemon_ioctl(int uid, char *device, int action, const char *actionName, char *host)
{
int index = -1;
char dev[DEV_LEN];
@@ -448,25 +505,31 @@ static int dnbd3_daemon_close(int uid, char *device)
} else {
index = atoi( device );
}
+ dnbd3_ioctl_t msg;
+ memset( &msg, 0, sizeof(msg) );
+ msg.len = (uint16_t)sizeof(msg);
+ if ( host != NULL ) {
+ dnbd3_get_ip( host, &msg.host );
+ }
if ( index < 0 || index >= MAX_DEVS ) {
- printf( "Close request with invalid device id %d\n", index );
+ printf( "%s request with invalid device id %d\n", actionName, index );
return FALSE;
}
snprintf( dev, DEV_LEN, "/dev/dnbd%d", index );
if ( openDevices[index] == -1 ) {
- printf( "Close request by %d for closed device %s\n", uid, dev );
+ printf( "%s request by %d for closed device %s\n", actionName, uid, dev );
return TRUE;
}
if ( openDevices[index] != uid ) {
- printf( "User %d is not allowed to close %s owned by %d\n", uid, dev, openDevices[index] );
+ printf( "%s: User %d cannot access %s owned by %d\n", actionName, uid, dev, openDevices[index] );
return FALSE;
}
- if ( dnbd3_ioctl( dev, IOCTL_CLOSE, NULL ) ) {
- printf( "Closed device %s of user %d\n", dev, uid );
+ if ( dnbd3_ioctl( dev, action, &msg ) ) {
+ printf( "%s request for device %s of user %d successful\n", actionName, dev, uid );
openDevices[index] = -1;
return TRUE;
}
- printf( "Error closing device %s, requested by %d\n", dev, uid );
+ printf( "%s: Error on device %s, requested by %d\n", actionName, dev, uid );
return FALSE;
}
diff --git a/src/config.h b/src/config.h
index 7df49a0..0228ae9 100644
--- a/src/config.h
+++ b/src/config.h
@@ -25,19 +25,19 @@
// ########### SERVER ###########
// +++++ Performance related
-#define SERVER_MAX_CLIENTS 5000
+#define SERVER_MAX_CLIENTS 2000
#define SERVER_MAX_IMAGES 5000
-#define SERVER_MAX_ALTS 1000
-#define SERVER_MAX_UPLINK_QUEUE 1500
-#define SERVER_MAX_UPLINK_FAILS 4 // How many times may a server fail until it is considered bad
-#define SERVER_BAD_UPLINK_IGNORE 300 // How many seconds is a server considered bad?
-#define SERVER_UPLINK_QUEUELEN_THRES 900
+#define SERVER_MAX_ALTS 250
+#define SERVER_MAX_UPLINK_QUEUE 1500
+#define SERVER_MAX_UPLINK_FAILS 8 // How many times may a server fail until it is considered bad
+#define SERVER_BAD_UPLINK_IGNORE 120 // How many seconds is a server considered bad?
+#define SERVER_UPLINK_QUEUELEN_THRES 900
#define SERVER_MAX_PENDING_ALT_CHECKS 50
// +++++ Other magic constants
#define SERVER_RTT_PROBES 5
#define SERVER_RTT_DELAY_INIT 5
-#define SERVER_RTT_DELAY_MAX 15
+#define SERVER_RTT_DELAY_MAX 45
#define SERVER_REMOTE_IMAGE_CHECK_CACHETIME 600 // 10 minutes
#define SERVER_MAX_PROXY_IMAGE_SIZE 100000000000LL // 100GB
@@ -61,7 +61,8 @@
#define COMMENT_LENGTH 120
// in seconds if not stated otherwise (MS = milliseconds)
-#define SOCKET_TIMEOUT_SERVER_MS 30000
+#define SOCKET_TIMEOUT_SERVER_MS 15000
+#define SOCKET_TIMEOUT_SERVER_RETRIES 3 // When waiting for next header, max reties * above timeout is the actual total timeout (ping timeout)
#define SOCKET_TIMEOUT_CLIENT_DATA 2
#define SOCKET_TIMEOUT_CLIENT_DISCOVERY 1
diff --git a/src/kernel/blk.c b/src/kernel/blk.c
index 9cd4a76..e55de25 100644
--- a/src/kernel/blk.c
+++ b/src/kernel/blk.c
@@ -200,18 +200,7 @@ int dnbd3_blk_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, u
break;
case IOCTL_SWITCH:
- if (msg == NULL)
- {
- result = -EINVAL;
- }
- else if (memcmp(&dev->cur_server.host, &msg->host, sizeof(msg->host)))
- {
- dnbd3_net_disconnect(dev);
- dev->cur_server.host = msg->host;
- result = dnbd3_net_connect(dev);
- }
- else
- result = 0;
+ result = -EINVAL;
break;
case IOCTL_ADD_SRV:
diff --git a/src/kernel/dnbd3.h b/src/kernel/dnbd3.h
index aa9ea86..c703019 100644
--- a/src/kernel/dnbd3.h
+++ b/src/kernel/dnbd3.h
@@ -68,8 +68,8 @@ typedef struct
struct socket *better_sock;
// process
- struct task_struct *thread_send;
- struct task_struct *thread_receive;
+ struct task_struct * volatile thread_send;
+ struct task_struct * volatile thread_receive;
struct task_struct *thread_discover;
struct timer_list hb_timer;
wait_queue_head_t process_queue_send;
diff --git a/src/kernel/net.c b/src/kernel/net.c
index 8703d9c..e65801a 100644
--- a/src/kernel/net.c
+++ b/src/kernel/net.c
@@ -122,11 +122,19 @@ int dnbd3_net_connect(dnbd3_device_t *dev)
struct request *req1 = NULL;
struct timeval timeout;
- char get_servers = 0, set_client = 0;
-
- while (dev->disconnecting)
- {
- if (dev->better_sock)
+ if (dev->disconnecting) {
+ debug_dev("CONNECT: Still disconnecting!!!\n");
+ while (dev->disconnecting)
+ schedule();
+ }
+ if (dev->thread_receive != NULL) {
+ debug_dev("CONNECT: Still receiving!!!\n");
+ while (dev->thread_receive != NULL)
+ schedule();
+ }
+ if (dev->thread_send != NULL) {
+ debug_dev("CONNECT: Still sending!!!\n");
+ while (dev->thread_send != NULL)
schedule();
}
@@ -135,24 +143,9 @@ int dnbd3_net_connect(dnbd3_device_t *dev)
// do some checks before connecting
- if (!dev->is_server && is_same_server(&dev->cur_server, &dev->initial_server))
- {
- // Forget all known alt servers
- memset(dev->alt_servers, 0, sizeof(dev->alt_servers[0]) * NUMBER_SERVERS);
- memcpy(dev->alt_servers, &dev->initial_server, sizeof(dev->alt_servers[0]));
- get_servers = 1;
- }
- if (dev->better_sock)
- {
- set_client = 1;
- }
-
- if (get_servers || set_client)
- {
- req1 = kmalloc(sizeof(*req1), GFP_ATOMIC );
- if (!req1)
- error_dev("FATAL: Kmalloc(1) failed.");
- }
+ req1 = kmalloc(sizeof(*req1), GFP_ATOMIC );
+ if (!req1)
+ error_dev("FATAL: Kmalloc(1) failed.");
if (dev->cur_server.host.port == 0 || dev->cur_server.host.type == 0 || dev->imgname == NULL )
error_dev("FATAL: Host, port or image name not set.");
@@ -272,19 +265,10 @@ int dnbd3_net_connect(dnbd3_device_t *dev)
dev->panic = 0;
dev->panic_count = 0;
- if (get_servers) // This connection is established to the initial server (from the ioctl call)
- {
- // Enqueue request to request_queue_send for a fresh list of alt servers
- req1->cmd_type = REQ_TYPE_SPECIAL;
- req1->cmd_flags = CMD_GET_SERVERS;
- list_add(&req1->queuelist, &dev->request_queue_send);
- }
- else if (set_client)
- {
- req1->cmd_type = REQ_TYPE_SPECIAL;
- req1->cmd_flags = CMD_SET_CLIENT_MODE;
- list_add(&req1->queuelist, &dev->request_queue_send);
- }
+ // Enqueue request to request_queue_send for a fresh list of alt servers
+ req1->cmd_type = REQ_TYPE_SPECIAL;
+ req1->cmd_flags = CMD_GET_SERVERS;
+ list_add(&req1->queuelist, &dev->request_queue_send);
// create required threads
dev->thread_send = kthread_create(dnbd3_net_send, dev, dev->disk->disk_name);
@@ -306,7 +290,8 @@ int dnbd3_net_connect(dnbd3_device_t *dev)
add_timer(&dev->hb_timer);
return 0;
- error: if (dev->sock)
+ error: ;
+ if (dev->sock)
{
sock_release(dev->sock);
dev->sock = NULL;
@@ -340,13 +325,11 @@ int dnbd3_net_disconnect(dnbd3_device_t *dev)
if (dev->thread_send)
{
kthread_stop(dev->thread_send);
- dev->thread_send = NULL;
}
if (dev->thread_receive)
{
kthread_stop(dev->thread_receive);
- dev->thread_receive = NULL;
}
if (dev->thread_discover)
@@ -437,9 +420,10 @@ int dnbd3_net_discover(void *data)
struct timeval start, end;
unsigned long rtt, best_rtt = 0;
unsigned long irqflags;
- int i, istart, isize, best_server, current_server;
+ int i, j, isize, best_server, current_server;
int turn = 0;
- int ready = 0, do_change, last_alt_count = 0;
+ int ready = 0, do_change = 0;
+ char check_order[NUMBER_SERVERS];
int mlen;
struct request *last_request = (struct request *)123, *cur_request = (struct request *)456;
@@ -463,6 +447,10 @@ int dnbd3_net_discover(void *data)
dnbd3_request.magic = dnbd3_packet_magic;
+ for (i = 0; i < NUMBER_SERVERS; ++i) {
+ check_order[i] = i;
+ }
+
for (;;)
{
wait_event_interruptible(dev->process_queue_discover,
@@ -525,26 +513,34 @@ int dnbd3_net_discover(void *data)
current_server = best_server = -1;
best_rtt = 0xFFFFFFFul;
- if (dev->heartbeat_count < STARTUP_MODE_DURATION || last_alt_count == 0 || dev->panic)
+ if (dev->heartbeat_count < STARTUP_MODE_DURATION || dev->panic)
{
- istart = 0;
isize = NUMBER_SERVERS;
}
else
{
- istart = jiffies % MAX(last_alt_count - 2, 1);
isize = 3;
}
+ if (NUMBER_SERVERS > isize) {
+ for (i = 0; i < isize; ++i) {
+ j = ((start.tv_sec >> i) ^ (start.tv_usec >> j)) % NUMBER_SERVERS;
+ if (j != i) {
+ mlen = check_order[i];
+ check_order[i] = check_order[j];
+ check_order[j] = mlen;
+ }
+ }
+ }
- for (i = istart; i < NUMBER_SERVERS; ++i)
+ for (j = 0; j < NUMBER_SERVERS; ++j)
{
+ i = check_order[j];
if (dev->alt_servers[i].host.type == 0) // Empty slot
continue;
- last_alt_count = i;
- if (!dev->panic && dev->alt_servers[i].failures > 50 && (jiffies & 7) != 0) // If not in panic mode, skip server if it failed too many times
+ if (!dev->panic && dev->alt_servers[i].failures > 50 && (start.tv_usec & 7) != 0) // If not in panic mode, skip server if it failed too many times
+ continue;
+ if (isize-- <= 0 && !is_same_server(&dev->cur_server, &dev->alt_servers[i]))
continue;
- if (isize-- <= 0)
- break;
// Initialize socket and connect
if (sock_create_kern(dev->alt_servers[i].host.type, SOCK_STREAM, IPPROTO_TCP, &sock) < 0)
@@ -653,13 +649,13 @@ int dnbd3_net_discover(void *data)
}
else if (sizeof(size_t) >= 8)
{
- dnbd3_request.offset = ((((start.tv_usec << 12) ^ start.tv_usec) << 4) % dev->reported_size)
+ dnbd3_request.offset = ((((start.tv_sec << 12) ^ start.tv_usec) << 4) % dev->reported_size)
& ~(uint64_t)(RTT_BLOCK_SIZE - 1);
//printk("Random offset 64bit: %lluMiB\n", (unsigned long long)(dnbd3_request.offset >> 20));
}
else // On 32bit, prevent modulo on a 64bit data type. This limits the random block picking to the first 4GB of the image
{
- dnbd3_request.offset = ((((start.tv_usec << 12) ^ start.tv_usec) << 4) % (uint32_t)dev->reported_size)
+ dnbd3_request.offset = ((((start.tv_sec << 12) ^ start.tv_usec) << 4) % (uint32_t)dev->reported_size)
& ~(RTT_BLOCK_SIZE - 1);
//printk("Random offset 32bit: %lluMiB\n", (unsigned long long)(dnbd3_request.offset >> 20));
}
@@ -727,7 +723,8 @@ int dnbd3_net_discover(void *data)
continue;
- error: ++dev->alt_servers[i].failures;
+ error: ;
+ ++dev->alt_servers[i].failures;
sock_release(sock);
sock = NULL;
dev->alt_servers[i].rtts[turn] = RTT_UNREACHABLE;
@@ -756,7 +753,7 @@ int dnbd3_net_discover(void *data)
continue;
}
- do_change = !dev->is_server && ready && best_server != current_server && (jiffies & 3) != 0
+ do_change = !dev->is_server && ready && best_server != current_server && (start.tv_usec & 3) != 0
&& RTT_THRESHOLD_FACTOR(dev->cur_rtt) > best_rtt + 1500;
if (ready && !do_change) {
@@ -798,7 +795,7 @@ int dnbd3_net_discover(void *data)
best_sock = NULL;
}
- if (!ready || (jiffies & 7) != 0)
+ if (!ready || (start.tv_usec & 7) != 0)
turn = (turn + 1) % 4;
if (turn == 3)
ready = 1;
@@ -811,7 +808,7 @@ int dnbd3_net_discover(void *data)
int dnbd3_net_send(void *data)
{
dnbd3_device_t *dev = data;
- struct request *blk_request;
+ struct request *blk_request, *tmp_request;
dnbd3_request_t dnbd3_request;
struct msghdr msg;
@@ -825,6 +822,19 @@ int dnbd3_net_send(void *data)
set_user_nice(current, -20);
+ // move already sent requests to request_queue_send again
+ while (!list_empty(&dev->request_queue_receive))
+ {
+ printk("WARN: Request queue was not empty on %s\n", dev->disk->disk_name);
+ spin_lock_irqsave(&dev->blk_lock, irqflags);
+ list_for_each_entry_safe(blk_request, tmp_request, &dev->request_queue_receive, queuelist)
+ {
+ list_del_init(&blk_request->queuelist);
+ list_add(&blk_request->queuelist, &dev->request_queue_send);
+ }
+ spin_unlock_irqrestore(&dev->blk_lock, irqflags);
+ }
+
for (;;)
{
wait_event_interruptible(dev->process_queue_send, kthread_should_stop() || !list_empty(&dev->request_queue_send));
@@ -879,29 +889,25 @@ int dnbd3_net_send(void *data)
iov.iov_len = sizeof(dnbd3_request);
if (kernel_sendmsg(dev->sock, &msg, &iov, 1, sizeof(dnbd3_request)) != sizeof(dnbd3_request))
{
- printk("Couldn't properly send a request header.\n");
+ debug_dev("ERROR: Connection to server lost (send)");
goto error;
}
wake_up(&dev->process_queue_receive);
}
+ dev->thread_send = NULL;
return 0;
- error:
- debug_dev("ERROR: Connection to server lost (send)");
+ error: ;
if (dev->sock)
- {
kernel_sock_shutdown(dev->sock, SHUT_RDWR);
- dev->sock = NULL;
- }
- dev->thread_send = NULL;
if (!dev->disconnecting)
{
dev->panic = 1;
- // start discover
dev->discover = 1;
wake_up(&dev->process_queue_discover);
}
+ dev->thread_send = NULL;
return -1;
}
@@ -938,7 +944,7 @@ int dnbd3_net_receive(void *data)
continue;
}
if (ret <= 0)
- error_dev_va("Connection closed (%d).", ret);
+ error_dev("ERROR: Connection to server lost (receive)");
if (ret != sizeof(dnbd3_reply))
error_dev("ERROR: Recv msg header.");
fixup_reply(dnbd3_reply);
@@ -1065,30 +1071,19 @@ int dnbd3_net_receive(void *data)
}
printk("dnbd3_net_receive terminated normally.\n");
+ dev->thread_receive = NULL;
return 0;
error:
- // move already sent requests to request_queue_send again
- while (!list_empty(&dev->request_queue_receive))
- {
- printk("WARN: Request queue was not empty on %s\n", dev->disk->disk_name);
- spin_lock_irqsave(&dev->blk_lock, irqflags);
- list_for_each_entry_safe(blk_request, tmp_request, &dev->request_queue_receive, queuelist)
- {
- list_del_init(&blk_request->queuelist);
- list_add(&blk_request->queuelist, &dev->request_queue_send);
- }
- spin_unlock_irqrestore(&dev->blk_lock, irqflags);
- }
if (dev->sock)
kernel_sock_shutdown(dev->sock, SHUT_RDWR);
- dev->thread_receive = NULL;
if (!dev->disconnecting)
{
dev->panic = 1;
- // start discover
dev->discover = 1;
wake_up(&dev->process_queue_discover);
}
+ dev->thread_receive = NULL;
return -1;
}
+
diff --git a/src/server/altservers.c b/src/server/altservers.c
index 44a4b85..8962e15 100644
--- a/src/server/altservers.c
+++ b/src/server/altservers.c
@@ -17,12 +17,13 @@
#include "protocol.h"
static dnbd3_connection_t *pending[SERVER_MAX_PENDING_ALT_CHECKS];
-static pthread_spinlock_t pendingLock;
+static pthread_spinlock_t pendingLockProduce; // Lock for adding something to pending. (NULL -> nonNULL)
+static pthread_mutex_t pendingLockConsume = PTHREAD_MUTEX_INITIALIZER; // Lock for removegin something (nunNULL -> NULL)
static int signalPipe = -1;
-static dnbd3_alt_server_t _alt_servers[SERVER_MAX_ALTS];
-static int _num_alts = 0;
-static pthread_spinlock_t _alts_lock;
+static dnbd3_alt_server_t altServers[SERVER_MAX_ALTS];
+static int numAltServers = 0;
+static pthread_spinlock_t altServersLock;
static int initDone = FALSE;
static pthread_t altThread;
@@ -32,13 +33,13 @@ static unsigned int altservers_updateRtt(const dnbd3_host_t * const host, const
int altservers_getCount()
{
- return _num_alts;
+ return numAltServers;
}
void altservers_init()
{
- spin_init( &_alts_lock, PTHREAD_PROCESS_PRIVATE );
- memset( _alt_servers, 0, SERVER_MAX_ALTS * sizeof(dnbd3_alt_server_t) );
+ spin_init( &altServersLock, PTHREAD_PROCESS_PRIVATE );
+ memset( altServers, 0, SERVER_MAX_ALTS * sizeof(dnbd3_alt_server_t) );
if ( 0 != pthread_create( &altThread, NULL, &altservers_main, (void *)NULL ) ) {
memlogf( "[ERROR] Could not start altservers connector thread" );
exit( EXIT_FAILURE );
@@ -49,7 +50,6 @@ void altservers_init()
void altservers_shutdown()
{
if ( !initDone ) return;
- spin_destroy( &_alts_lock );
pthread_join( altThread, NULL );
}
@@ -92,28 +92,28 @@ int altservers_load()
int altservers_add(dnbd3_host_t *host, const char *comment, const int isPrivate, const int isClientOnly)
{
int i, freeSlot = -1;
- spin_lock( &_alts_lock );
- for (i = 0; i < _num_alts; ++i) {
- if ( isSameAddressPort( &_alt_servers[i].host, host ) ) {
- spin_unlock( &_alts_lock );
+ spin_lock( &altServersLock );
+ for (i = 0; i < numAltServers; ++i) {
+ if ( isSameAddressPort( &altServers[i].host, host ) ) {
+ spin_unlock( &altServersLock );
return FALSE;
- } else if ( freeSlot == -1 && _alt_servers[i].host.type == 0 ) {
+ } else if ( freeSlot == -1 && altServers[i].host.type == 0 ) {
freeSlot = i;
}
}
if ( freeSlot == -1 ) {
- if ( _num_alts >= SERVER_MAX_ALTS ) {
+ if ( numAltServers >= SERVER_MAX_ALTS ) {
memlogf( "[WARNING] Cannot add another alt server, maximum of %d already reached.", (int)SERVER_MAX_ALTS );
- spin_unlock( &_alts_lock );
+ spin_unlock( &altServersLock );
return FALSE;
}
- freeSlot = _num_alts++;
+ freeSlot = numAltServers++;
}
- _alt_servers[freeSlot].host = *host;
- _alt_servers[freeSlot].isPrivate = isPrivate;
- _alt_servers[freeSlot].isClientOnly = isClientOnly;
- if ( comment != NULL ) snprintf( _alt_servers[freeSlot].comment, COMMENT_LENGTH, "%s", comment );
- spin_unlock( &_alts_lock );
+ altServers[freeSlot].host = *host;
+ altServers[freeSlot].isPrivate = isPrivate;
+ altServers[freeSlot].isClientOnly = isClientOnly;
+ if ( comment != NULL ) snprintf( altServers[freeSlot].comment, COMMENT_LENGTH, "%s", comment );
+ spin_unlock( &altServersLock );
return TRUE;
}
@@ -123,25 +123,33 @@ int altservers_add(dnbd3_host_t *host, const char *comment, const int isPrivate,
void altservers_findUplink(dnbd3_connection_t *uplink)
{
int i;
+ // if betterFd != -1 it means the uplink is supposed to switch to another
+ // server. As this function here is called by the uplink thread, it can
+ // never be that the uplink is supposed to switch, but instead calls
+ // this function.
assert( uplink->betterFd == -1 );
- spin_lock( &pendingLock );
+ spin_lock( &pendingLockProduce );
+ // it is however possible that an RTT measurement is currently in progress,
+ // so check for that case and do nothing if one is in progress
if ( uplink->rttTestResult == RTT_INPROGRESS ) {
for (i = 0; i < SERVER_MAX_PENDING_ALT_CHECKS; ++i) {
if ( pending[i] != uplink ) continue;
- spin_unlock( &pendingLock );
+ // Yep, measuring right now
+ spin_unlock( &pendingLockProduce );
return;
}
}
+ // Find free slot for measurement
for (i = 0; i < SERVER_MAX_PENDING_ALT_CHECKS; ++i) {
if ( pending[i] != NULL ) continue;
pending[i] = uplink;
uplink->rttTestResult = RTT_INPROGRESS;
- spin_unlock( &pendingLock );
- write( signalPipe, "", 1 );
+ spin_unlock( &pendingLockProduce );
+ write( signalPipe, "", 1 ); // Wake altservers thread up
return;
}
// End of loop - no free slot
- spin_unlock( &pendingLock );
+ spin_unlock( &pendingLockProduce );
memlogf( "[WARNING] No more free RTT measurement slots, ignoring a request..." );
}
@@ -150,39 +158,40 @@ void altservers_findUplink(dnbd3_connection_t *uplink)
*/
void altservers_removeUplink(dnbd3_connection_t *uplink)
{
- spin_lock( &pendingLock );
+ pthread_mutex_lock( &pendingLockConsume );
for (int i = 0; i < SERVER_MAX_PENDING_ALT_CHECKS; ++i) {
if ( pending[i] == uplink ) pending[i] = NULL;
}
- spin_unlock( &pendingLock );
+ pthread_mutex_unlock( &pendingLockConsume );
}
/**
* Get <size> known (working) alt servers, ordered by network closeness
* (by finding the smallest possible subnet)
- * Private servers are excluded
+ * Private servers are excluded, so this is what you want to call to
+ * get a list of servers you can tell a client about
*/
int altservers_getMatching(dnbd3_host_t *host, dnbd3_server_entry_t *output, int size)
{
- if ( host == NULL || host->type == 0 || _num_alts == 0 || output == NULL || size <= 0 ) return 0;
+ if ( host == NULL || host->type == 0 || numAltServers == 0 || output == NULL || size <= 0 ) return 0;
int i, j;
int count = 0;
int distance[size];
- spin_lock( &_alts_lock );
- for (i = 0; i < _num_alts; ++i) {
- if ( host->type != _alt_servers[i].host.type ) continue; // Wrong address family
- if ( _alt_servers[i].isPrivate ) continue; // Do not tell clients about private servers
+ spin_lock( &altServersLock );
+ for (i = 0; i < numAltServers; ++i) {
+ if ( host->type != altServers[i].host.type ) continue; // Wrong address family
+ if ( altServers[i].isPrivate ) continue; // Do not tell clients about private servers
// TODO: Prefer same AF here, but if in the end we got less servers than requested, add
// servers of other AF too (after this loop)
if ( count == 0 ) {
// Trivial - this is the first entry
- output[0].host = _alt_servers[i].host;
+ output[0].host = altServers[i].host;
output[0].failures = 0;
distance[0] = altservers_netCloseness( host, &output[0].host );
count++;
} else {
// Other entries already exist, insert in proper position
- const int dist = altservers_netCloseness( host, &_alt_servers[i].host );
+ const int dist = altservers_netCloseness( host, &altServers[i].host );
for (j = 0; j < size; ++j) {
if ( j < count && dist <= distance[j] ) continue;
if ( j > count ) break; // Should never happen but just in case...
@@ -194,7 +203,7 @@ int altservers_getMatching(dnbd3_host_t *host, dnbd3_server_entry_t *output, int
if ( count < size ) {
count++;
}
- output[j].host = _alt_servers[i].host;
+ output[j].host = altServers[i].host;
output[j].failures = 0;
distance[j] = dist;
break;
@@ -202,39 +211,43 @@ int altservers_getMatching(dnbd3_host_t *host, dnbd3_server_entry_t *output, int
}
}
// TODO: "if count < size then add servers of other address families"
- spin_unlock( &_alts_lock );
+ spin_unlock( &altServersLock );
return count;
}
/**
* Get <size> alt servers. If there are more alt servers than
- * requested, random servers will be picked
+ * requested, random servers will be picked.
+ * This function is suited for finding uplink servers as
+ * it includes private servers and ignores any "client only" servers
*/
int altservers_get(dnbd3_host_t *output, int size)
{
if ( size <= 0 ) return 0;
int count = 0, i;
const time_t now = time( NULL );
- spin_lock( &_alts_lock );
+ spin_lock( &altServersLock );
// Flip first server in list with a random one every time this is called
- if ( _num_alts > 1 ) {
- const dnbd3_alt_server_t tmp = _alt_servers[0];
+ if ( numAltServers > 1 ) {
+ const dnbd3_alt_server_t tmp = altServers[0];
do {
- i = rand() % _num_alts;
+ i = rand() % numAltServers;
} while ( i == 0 );
- _alt_servers[0] = _alt_servers[i];
- _alt_servers[i] = tmp;
+ altServers[0] = altServers[i];
+ altServers[i] = tmp;
}
- for (i = 0; i < _num_alts; ++i) {
- if ( _alt_servers[i].host.type == 0 ) continue;
- if ( _proxyPrivateOnly && !_alt_servers[i].isPrivate ) continue;
- if ( _alt_servers[i].isClientOnly ) continue;
- if ( _alt_servers[i].numFails > SERVER_MAX_UPLINK_FAILS && now - _alt_servers[i].lastFail > SERVER_BAD_UPLINK_IGNORE ) continue;
- _alt_servers[i].numFails = 0;
- output[count++] = _alt_servers[i].host;
+ for (i = 0; i < numAltServers; ++i) {
+ if ( altServers[i].host.type == 0 ) continue; // Slot is empty
+ if ( _proxyPrivateOnly && !altServers[i].isPrivate ) continue; // Config says to consider private alt-servers only? ignore!
+ if ( altServers[i].isClientOnly ) continue;
+ if ( altServers[i].numFails > SERVER_MAX_UPLINK_FAILS // server failed X times in a row
+ && now - altServers[i].lastFail > SERVER_BAD_UPLINK_IGNORE ) continue; // and last fail was not too long ago? ignore!
+ // server seems ok, include in output and reset its fail counter
+ altServers[i].numFails = 0;
+ output[count++] = altServers[i].host;
if ( count >= size ) break;
}
- spin_unlock( &_alts_lock );
+ spin_unlock( &altServersLock );
return count;
}
@@ -245,24 +258,24 @@ static unsigned int altservers_updateRtt(const dnbd3_host_t * const host, const
{
unsigned int avg = rtt;
int i;
- spin_lock( &_alts_lock );
- for (i = 0; i < _num_alts; ++i) {
- if ( !isSameAddressPort( host, &_alt_servers[i].host ) ) continue;
- _alt_servers[i].rtt[++_alt_servers[i].rttIndex % SERVER_RTT_PROBES] = rtt;
+ spin_lock( &altServersLock );
+ for (i = 0; i < numAltServers; ++i) {
+ if ( !isSameAddressPort( host, &altServers[i].host ) ) continue;
+ altServers[i].rtt[++altServers[i].rttIndex % SERVER_RTT_PROBES] = rtt;
#if SERVER_RTT_PROBES == 5
- avg = (_alt_servers[i].rtt[0] + _alt_servers[i].rtt[1] + _alt_servers[i].rtt[2] + _alt_servers[i].rtt[3] + _alt_servers[i].rtt[4])
+ avg = (altServers[i].rtt[0] + altServers[i].rtt[1] + altServers[i].rtt[2] + altServers[i].rtt[3] + altServers[i].rtt[4])
/ SERVER_RTT_PROBES;
#else
#warning You might want to change the code in altservers_update_rtt if you changed SERVER_RTT_PROBES
avg = 0;
for (int j = 0; j < SERVER_RTT_PROBES; ++j) {
- avg += _alt_servers[i].rtt[j];
+ avg += altServers[i].rtt[j];
}
avg /= SERVER_RTT_PROBES;
#endif
break;
}
- spin_unlock( &_alts_lock );
+ spin_unlock( &altServersLock );
return avg;
}
@@ -295,18 +308,29 @@ void altservers_serverFailed(const dnbd3_host_t * const host)
{
int i;
const time_t now = time( NULL );
- spin_lock( &_alts_lock );
- for (i = 0; i < _num_alts; ++i) {
- if ( !isSameAddressPort( host, &_alt_servers[i].host ) ) continue;
- if ( now - _alt_servers[i].lastFail > SERVER_RTT_DELAY_INIT ) {
- _alt_servers[i].numFails++;
- _alt_servers[i].lastFail = now;
+ spin_lock( &altServersLock );
+ for (i = 0; i < numAltServers; ++i) {
+ if ( !isSameAddressPort( host, &altServers[i].host ) ) continue;
+ // Do only increase counter if last fail was not too recent. This is
+ // to prevent the counter from increasing rapidly if many images use the
+ // same uplink. If there's a network hickup, all uplinks will call this
+ // function and would increase the counter too quickly, disabling the server.
+ if ( now - altServers[i].lastFail > SERVER_RTT_DELAY_INIT ) {
+ altServers[i].numFails++;
+ altServers[i].lastFail = now;
}
break;
}
- spin_unlock( &_alts_lock );
+ spin_unlock( &altServersLock );
}
-
+/**
+ * Mainloop of this module. It will wait for requests by uplinks to find a
+ * suitable uplink server for them. If found, it will tell the uplink about
+ * the best server found. Currently the RTT history is kept per server and
+ * not per uplink, so if many images use the same uplink server, the history
+ * will update quite quickly. Needs to be improved some time, ie. by only
+ * updating the rtt if the last update was at least X seconds ago.
+ */
static void *altservers_main(void *data)
{
const int MAXEVENTS = 3;
@@ -324,7 +348,7 @@ static void *altservers_main(void *data)
setThreadName( "altserver-check" );
blockNoncriticalSignals();
// Init spinlock
- spin_init( &pendingLock, PTHREAD_PROCESS_PRIVATE );
+ spin_init( &pendingLockProduce, PTHREAD_PROCESS_PRIVATE );
// Init waiting links queue
for (int i = 0; i < SERVER_MAX_PENDING_ALT_CHECKS; ++i)
pending[i] = NULL;
@@ -374,10 +398,13 @@ static void *altservers_main(void *data)
}
}
// Work your way through the queue
- spin_lock( &pendingLock );
for (itLink = 0; itLink < SERVER_MAX_PENDING_ALT_CHECKS; ++itLink) {
- if ( pending[itLink] == NULL ) continue;
- spin_unlock( &pendingLock );
+ if ( pending[itLink] == NULL ) continue; // Check once before locking, as a mutex is expensive
+ pthread_mutex_lock( &pendingLockConsume );
+ if ( pending[itLink] == NULL ) { // Check again after locking
+ continue;
+ pthread_mutex_unlock( &pendingLockConsume );
+ }
dnbd3_connection_t * const uplink = pending[itLink];
assert( uplink->rttTestResult == RTT_INPROGRESS );
// Now get 4 alt servers
@@ -398,10 +425,10 @@ static void *altservers_main(void *data)
unsigned int bestRtt = 0xfffffff;
unsigned int currentRtt = 0xfffffff;
for (itAlt = 0; itAlt < numAlts; ++itAlt) {
- usleep( 1000 );
+ usleep( 1000 ); // Wait a very short moment for the network to recover (we might be doing lots of measurements...)
// Connect
clock_gettime( CLOCK_MONOTONIC_RAW, &start );
- int sock = sock_connect( &servers[itAlt], 750, 1250 );
+ int sock = sock_connect( &servers[itAlt], 750, _uplinkTimeout );
if ( sock < 0 ) continue;
// Select image ++++++++++++++++++++++++++++++
if ( !dnbd3_select_image( sock, uplink->image->lower_name, uplink->image->rid, FLAGS8_SERVER ) ) {
@@ -429,7 +456,7 @@ static void *altservers_main(void *data)
// Request random block ++++++++++++++++++++++++++++++
fixup_request( request );
if ( !dnbd3_get_block( sock,
- (((uint64_t)start.tv_nsec | (uint64_t)rand()) * DNBD3_BLOCK_SIZE )% uplink->image->filesize,
+ (((uint64_t)start.tv_nsec ^ (uint64_t)rand()) * DNBD3_BLOCK_SIZE )% uplink->image->filesize,
DNBD3_BLOCK_SIZE) ) {
ERROR_GOTO_VA( server_failed, "[ERROR] Could not request random block for %s", uplink->image->lower_name );
}
@@ -453,18 +480,23 @@ static void *altservers_main(void *data)
const unsigned int rtt = (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_nsec - start.tv_nsec) / 1000; // µs
const unsigned int avg = altservers_updateRtt( &servers[itAlt], rtt );
if ( uplink->fd != -1 && isSameAddressPort( &servers[itAlt], &uplink->currentServer ) ) {
+ // Was measuring current server
currentRtt = avg;
close( sock );
} else if ( avg < bestRtt ) {
+ // Was another server, update "best"
if ( bestSock != -1 ) close( bestSock );
bestSock = sock;
bestRtt = avg;
bestIndex = itAlt;
} else {
+ // Was too slow, ignore
close( sock );
}
+ // We're done, call continue
continue;
// Jump here if anything went wrong
+ // This will cleanup and continue
server_failed: ;
altservers_serverFailed( &servers[itAlt] );
server_image_not_available: ;
@@ -484,12 +516,10 @@ static void *altservers_main(void *data)
}
// end of loop over all pending uplinks
pending[itLink] = NULL;
- spin_lock( &pendingLock );
+ pthread_mutex_unlock( &pendingLockConsume );
}
- spin_unlock( &pendingLock );
}
cleanup: ;
- spin_destroy( &pendingLock );
if ( fdEpoll != -1 ) close( fdEpoll );
if ( readPipe != -1 ) close( readPipe );
if ( signalPipe != -1 ) close( signalPipe );
diff --git a/src/server/globals.c b/src/server/globals.c
index 4f081c7..3fcb61d 100644
--- a/src/server/globals.c
+++ b/src/server/globals.c
@@ -14,6 +14,7 @@ int _serverPenalty = 0;
int _clientPenalty = 0;
int _isProxy = FALSE;
int _proxyPrivateOnly = FALSE;
+int _uplinkTimeout = 1250;
#define SAVE_TO_VAR_STR(ss, kk) do { if (strcmp(section, #ss) == 0 && strcmp(key, #kk) == 0) { if (_ ## kk != NULL) free(_ ## kk); _ ## kk = strdup(value); } } while (0)
#define SAVE_TO_VAR_BOOL(ss, kk) do { if (strcmp(section, #ss) == 0 && strcmp(key, #kk) == 0) _ ## kk = atoi(value) != 0 || strcmp(value, "true") == 0 || strcmp(value, "True") == 0 || strcmp(value, "TRUE") == 0; } while (0)
@@ -27,6 +28,7 @@ static int ini_handler(void *custom, const char* section, const char* key, const
SAVE_TO_VAR_BOOL( dnbd3, proxyPrivateOnly );
SAVE_TO_VAR_INT( dnbd3, serverPenalty );
SAVE_TO_VAR_INT( dnbd3, clientPenalty );
+ SAVE_TO_VAR_INT( dnbd3, uplinkTimeout );
return TRUE;
}
diff --git a/src/server/globals.h b/src/server/globals.h
index e2de1fb..7d333e0 100644
--- a/src/server/globals.h
+++ b/src/server/globals.h
@@ -163,6 +163,11 @@ extern int _isProxy;
*/
extern int _proxyPrivateOnly;
+/**
+ * Read timeout when waiting for data on an uplink
+ */
+extern int _uplinkTimeout;
+
void globals_loadConfig();
#endif /* GLOBALS_H_ */
diff --git a/src/server/image.c b/src/server/image.c
index 3ebcace..53b8aea 100644
--- a/src/server/image.c
+++ b/src/server/image.c
@@ -184,7 +184,7 @@ int image_saveCacheMap(dnbd3_image_t *image)
strcpy( mapfile, image->path );
strcat( mapfile, ".map" );
- fd = open( mapfile, O_WRONLY | O_CREAT, 0640 );
+ fd = open( mapfile, O_WRONLY | O_CREAT, 0644 );
if ( fd < 0 ) {
spin_lock( &image->lock );
image->users--;
@@ -734,8 +734,8 @@ int image_create(char *image, int revision, uint64_t size)
const int mapsize = IMGSIZE_TO_MAPBYTES(size);
// Write files
int fdImage = -1, fdCache = -1;
- fdImage = open( path, O_RDWR | O_TRUNC | O_CREAT, 0640 );
- fdCache = open( cache, O_RDWR | O_TRUNC | O_CREAT, 0640 );
+ fdImage = open( path, O_RDWR | O_TRUNC | O_CREAT, 0644 );
+ fdCache = open( cache, O_RDWR | O_TRUNC | O_CREAT, 0644 );
if ( fdImage < 0 ) {
memlogf( "[ERROR] Could not open %s for writing.", path );
goto failure_cleanup;
@@ -800,7 +800,7 @@ dnbd3_image_t* image_getOrClone(char *name, uint16_t revision)
|| remoteCloneCache[i].deadline < now
|| strcmp( cmpname, remoteCloneCache[i].name ) != 0 ) continue;
pthread_mutex_unlock( &remoteCloneLock ); // Was recently checked...
- return NULL ;
+ return image_get( name, revision );
}
// Re-check to prevent two clients at the same time triggering this
image = image_get( name, revision );
@@ -823,7 +823,7 @@ dnbd3_image_t* image_getOrClone(char *name, uint16_t revision)
uint16_t remoteVersion, remoteRid;
uint64_t remoteImageSize;
for (i = 0; i < count; ++i) {
- int sock = sock_connect( &servers[i], 500, 1500 );
+ int sock = sock_connect( &servers[i], 750, _uplinkTimeout );
if ( sock < 0 ) continue;
if ( !dnbd3_select_image( sock, name, revision, FLAGS8_SERVER ) ) goto server_fail;
char *remoteName;
@@ -884,7 +884,7 @@ static int image_clone(int sock, char *name, uint16_t revision, uint64_t imageSi
if ( lists_crc != masterCrc ) {
memlogf( "[WARNING] OTF-Clone: Corrupted CRC-32 list. ignored. (%s)", name );
} else {
- int fd = open( crcFile, O_WRONLY | O_CREAT, 0640 );
+ int fd = open( crcFile, O_WRONLY | O_CREAT, 0644 );
write( fd, &lists_crc, sizeof(uint32_t) );
write( fd, crc32list, crc32len );
close( fd );
@@ -940,7 +940,7 @@ int image_generateCrcFile(char *image)
close( fdImage );
return FALSE;
}
- int fdCrc = open( crcFile, O_RDWR | O_CREAT, 0640 );
+ int fdCrc = open( crcFile, O_RDWR | O_CREAT, 0644 );
if ( fdCrc < 0 ) {
printf( "Could not open CRC File %s for writing..\n", crcFile );
close( fdImage );
diff --git a/src/server/net.c b/src/server/net.c
index 3e9383d..1d9d226 100644
--- a/src/server/net.c
+++ b/src/server/net.c
@@ -50,7 +50,7 @@ static inline char recv_request_header(int sock, dnbd3_request_t *request)
int ret, fails = 0;
// Read request header from socket
while ( (ret = recv( sock, request, sizeof(*request), MSG_WAITALL )) != sizeof(*request) ) {
- if ( ret >= 0 || ++fails > 10 ) return FALSE;
+ if ( ret >= 0 || ++fails > SOCKET_TIMEOUT_SERVER_RETRIES ) return FALSE;
const int err = errno;
if ( err == EAGAIN || err == EINTR ) continue;
printf( "[DEBUG] Error receiving request: Could not read message header (%d/%d, e=%d)\n", ret, (int)sizeof(*request), err );