From 3e8cd5b2284fd68206cd7ea9943e892fcce037f8 Mon Sep 17 00:00:00 2001 From: sr Date: Sun, 9 Sep 2012 23:01:00 +0200 Subject: [SERVER] Automatically connect a dnbd3 device for a relayed image [SERVER] Automatically disconnect dnbd3 device if local cache copy is complete [SERVER] Pre-allocate disk space for cache file [KERNEL] Refuse connection if server reports disk size < 4096 bytes --- src/kernel/blk.c | 44 ++++++++++--------- src/kernel/net.c | 17 ++++++-- src/server/ipc.c | 5 ++- src/server/job.c | 114 ++++++++++++++++++++++++++++++++++++++++++++------ src/server/net.c | 17 ++++++-- src/server/saveload.c | 8 +++- 6 files changed, 161 insertions(+), 44 deletions(-) diff --git a/src/kernel/blk.c b/src/kernel/blk.c index 388f8b6..938a0e8 100644 --- a/src/kernel/blk.c +++ b/src/kernel/blk.c @@ -99,34 +99,39 @@ struct block_device_operations dnbd3_blk_ops = int dnbd3_blk_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) { - int result = 0; + int result = -100; dnbd3_device_t *dev = bdev->bd_disk->private_data; struct request_queue *blk_queue = dev->disk->queue; char *imgname = NULL; - dnbd3_ioctl_t *msg = kmalloc(sizeof(*msg), GFP_KERNEL); - unsigned long irqflags; + dnbd3_ioctl_t *msg = NULL; + //unsigned long irqflags; - if (msg == NULL) return -ENOMEM; - copy_from_user((char *)msg, (char *)arg, 2); - if (msg->len != sizeof(*msg)) + if (arg != 0) { - result = -ENOEXEC; - goto cleanup_return; - } - copy_from_user((char *)msg, (char *)arg, sizeof(*msg)); - if (msg->imgname != NULL && msg->imgnamelen > 0) - { - imgname = kmalloc(msg->imgnamelen + 1, GFP_KERNEL); - if (imgname == NULL) + msg = kmalloc(sizeof(*msg), GFP_KERNEL); + if (msg == NULL) return -ENOMEM; + copy_from_user((char *)msg, (char *)arg, 2); + if (msg->len != sizeof(*msg)) { - result = -ENOMEM; + result = -ENOEXEC; goto cleanup_return; } - copy_from_user(imgname, msg->imgname, msg->imgnamelen); - imgname[msg->imgnamelen] = '\0'; - //printk("IOCTL Image name of len %d is %s\n", (int)msg->imgnamelen, imgname); + copy_from_user((char *)msg, (char *)arg, sizeof(*msg)); + if (msg->imgname != NULL && msg->imgnamelen > 0) + { + imgname = kmalloc(msg->imgnamelen + 1, GFP_KERNEL); + if (imgname == NULL) + { + result = -ENOMEM; + goto cleanup_return; + } + copy_from_user(imgname, msg->imgname, msg->imgnamelen); + imgname[msg->imgnamelen] = '\0'; + //printk("IOCTL Image name of len %d is %s\n", (int)msg->imgnamelen, imgname); + } } + switch (cmd) { case IOCTL_OPEN: @@ -196,7 +201,6 @@ int dnbd3_blk_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, u } else { - spin_lock_irqsave(&dev->blk_lock, irqflags); if (dev->new_servers_num >= NUMBER_SERVERS) result = -EAGAIN; else @@ -206,11 +210,11 @@ int dnbd3_blk_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, u ++dev->new_servers_num; result = 0; } - spin_unlock_irqrestore(&dev->blk_lock, irqflags); } break; case BLKFLSBUF: + result = 0; break; default: diff --git a/src/kernel/net.c b/src/kernel/net.c index 67a4b17..d6ebb32 100644 --- a/src/kernel/net.c +++ b/src/kernel/net.c @@ -231,6 +231,8 @@ int dnbd3_net_connect(dnbd3_device_t *dev) error_dev_va("FATAL: Server provides rid %d, requested was %d.", (int)rid, (int)dev->rid); dev->rid = rid; dev->reported_size = serializer_get_uint64(&dev->payload_buffer); + if (dev->reported_size < 4096) + error_dev("ERROR: Reported size by server is < 4096"); // store image information set_capacity(dev->disk, dev->reported_size >> 9); /* 512 Byte blocks */ debug_dev_va("INFO: Filesize: %llu.", dev->reported_size); @@ -442,6 +444,9 @@ int dnbd3_net_discover(void *data) continue; dev->discover = 0; + if (dev->reported_size < 4096) + continue; + // Check if the list of alt servers needs to be updated and do so if neccessary if (dev->new_servers_num) { @@ -457,6 +462,7 @@ int dnbd3_net_discover(void *data) { // REMOVE request alt_server->host.type = 0; + debug_dev_va("Removing alt server %pI4", alt_server->host.addr); continue; } // ADD, so just reset fail counter @@ -469,9 +475,8 @@ int dnbd3_net_discover(void *data) if (alt_server == NULL) // All NUMBER_SERVERS slots are taken, ignore entry continue; // Add new server entry - memcpy(alt_server->host.addr, dev->new_servers[i].host.addr, 16); - alt_server->host.type = dev->new_servers[i].host.type; - alt_server->host.port = dev->new_servers[i].host.port; + alt_server->host = dev->new_servers[i].host; + debug_dev_va("Adding alt server %pI4", alt_server->host.addr); alt_server->rtts[0] = alt_server->rtts[1] = alt_server->rtts[2] = alt_server->rtts[3] = RTT_UNREACHABLE; @@ -576,7 +581,11 @@ int dnbd3_net_discover(void *data) // Request block dnbd3_request.cmd = CMD_GET_BLOCK; // Pick random block - if (sizeof(size_t) >= 8) + if (dev->reported_size == 0) + { + dnbd3_request.offset = 0; + } + else if (sizeof(size_t) >= 8) { dnbd3_request.offset = ((((start.tv_usec << 12) ^ start.tv_usec) << 4) % dev->reported_size) & ~(uint64_t)(RTT_BLOCK_SIZE-1); //printk("Random offset 64bit: %lluMiB\n", (unsigned long long)(dnbd3_request.offset >> 20)); diff --git a/src/server/ipc.c b/src/server/ipc.c index c59aa6b..b455c44 100644 --- a/src/server/ipc.c +++ b/src/server/ipc.c @@ -381,7 +381,8 @@ static int ipc_receive(int client_sock) xmlNewProp(tmp_node, BAD_CAST "rid", BAD_CAST strbuffer); sprintf(strbuffer, "%llu", (unsigned long long)image->filesize); xmlNewProp(tmp_node, BAD_CAST "size", BAD_CAST strbuffer); - xmlNewProp(tmp_node, BAD_CAST "file", BAD_CAST image->file); + if (image->file) + xmlNewProp(tmp_node, BAD_CAST "file", BAD_CAST image->file); xmlNewProp(tmp_node, BAD_CAST "servers", BAD_CAST "???"); // TODO if (image->cache_file && image->cache_map) { @@ -391,7 +392,7 @@ static int ipc_receive(int client_sock) if (image->cache_map[i]) complete += 100; sprintf(strbuffer, "%d", complete / size); - xmlNewProp(tmp_node, BAD_CAST "cachefill", BAD_CAST image->cache_file); + xmlNewProp(tmp_node, BAD_CAST "cachefill", BAD_CAST strbuffer); } xmlAddChild(parent_node, tmp_node); } diff --git a/src/server/job.c b/src/server/job.c index 44beb00..b78ef4f 100644 --- a/src/server/job.c +++ b/src/server/job.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -84,8 +85,9 @@ void *dnbd3_job_thread(void *data) devices[j].available = TRUE; ++j; } + num_devices = j; } - memlogf("[INFO] %d available dnbd3 devices for proxy mode", j); + memlogf("[INFO] %d available dnbd3 devices for proxy mode", num_devices); // time_t next_delete_invocation = 0; // @@ -143,6 +145,52 @@ static void connect_proxy_images() pthread_spin_unlock(&_spinlock); break; } + if (image->working && image->cache_map && image->file) + { // Check if cache is complete + int complete = TRUE, j; + const int map_len_bytes = IMGSIZE_TO_MAPBYTES(image->filesize); + for (j = 0; j < map_len_bytes - 1; ++j) + { + if (image->cache_map[j] != 0xFF) + { + complete = FALSE; + break; + } + } + if (complete) + { + const int blocks_in_last_byte = (image->filesize >> 12) & 7; + uint8_t last_byte = 0; + if (blocks_in_last_byte == 0) + last_byte = 0xFF; + else + for (j = 0; j < blocks_in_last_byte; ++j) + last_byte |= (1 << j); + complete = ((image->cache_map[map_len_bytes - 1] & last_byte) == last_byte); + } + if (!complete) + { + pthread_spin_unlock(&_spinlock); + continue; + } + // Image is 100% cached, disconnect dnbd3 device + memlogf("[INFO] Disconnecting %s because local copy of %s is complete.", image->file, image->config_group); + int dh = open(image->file, O_RDONLY); + if (dh < 0) + memlogf("[ERROR] Could not open() device '%s'", image->file); + else + { + if (ioctl(dh, IOCTL_CLOSE, (void*)0) != 0) + memlogf("[ERROR] Could not IOCTL_CLOSE device '%s'", image->file); + else + return_free_device(image->file); + close(dh); + } + free(image->file); + image->file = NULL; + pthread_spin_unlock(&_spinlock); + continue; + } if (image->working || image->file || image->low_name == NULL) { // Nothing to do pthread_spin_unlock(&_spinlock); @@ -160,7 +208,7 @@ static void connect_proxy_images() rid = image->rid; memcpy(servers, image->servers, sizeof(servers[0]) * NUMBER_SERVERS); pthread_spin_unlock(&_spinlock); - int dh = open(devname, O_WRONLY); + int dh = open(devname, O_RDWR); if (dh < 0) continue; for (s = 0; s < NUMBER_SERVERS; ++s) @@ -168,12 +216,14 @@ static void connect_proxy_images() if (servers[s].host.type == 0) continue; // connect device + printf("[DEBUG] Connecting device....\n"); msg.host = servers[s].host; msg.imgname = imagename; msg.imgnamelen = strlen(imagename); msg.rid = rid; if (ioctl(dh, IOCTL_OPEN, &msg) < 0) continue; + printf("[DEBUG] Connected! Adding alt servers...\n"); // connected for (++s; s < NUMBER_SERVERS; ++s) { @@ -182,7 +232,10 @@ static void connect_proxy_images() msg.host = servers[s].host; if (ioctl(dh, IOCTL_ADD_SRV, &msg) < 0) memlogf("[WARNING] Could not add alt server to proxy device"); + else + printf("[DEBUG] Added an alt server\n"); } + printf("[DEBUG] Done, handling file size...\n"); // LOCK + UPDATE pthread_spin_lock(&_spinlock); if (g_slist_find(_dnbd3_images, image) == NULL) @@ -195,16 +248,29 @@ static void connect_proxy_images() else { image->file = strdup(devname); - const off_t off = lseek(dh, 0, SEEK_END); - if (off < 0) - memlogf("[ERROR] Could not get image size from connected device %s", devname); - else if (image->filesize != 0 && image->filesize != off) - memlogf("[ERROR] Remote and local size of image do not match: %llu != %llu for %s", (unsigned long long)off, (unsigned long long)image->filesize, image->low_name); + long long oct = 0; + int t, ret; + for (t = 0; t < 10 && dh >= 0; ++t) + { // For some reason the ioctl might return 0 right after connecting + ret = ioctl(dh, BLKGETSIZE64, &oct); + if (ret == 0 && oct > 0) + break; + close(dh); + usleep(100 * 1000); + dh = open(devname, O_RDONLY); + } + if (dh < 0 || ret != 0) + memlogf("[ERROR] SIZE fail on %s (ret=%d, oct=%lld)", devname, ret, oct); + else if (oct == 0) + memlogf("[ERROR] Reported disk size is 0."); + else if (image->filesize != 0 && image->filesize != oct) + memlogf("[ERROR] Remote and local size of image do not match: %llu != %llu for %s", (unsigned long long)oct, (unsigned long long)image->filesize, image->low_name); else image->working = TRUE; - image->filesize = (uint64_t)off; + image->filesize = (uint64_t)oct; if (image->cache_file != NULL && image->working && image->cache_map == NULL) { + printf("[DEBUG] Image has cache file %s\n", image->cache_file); const int mapsize = IMGSIZE_TO_MAPBYTES(image->filesize); image->cache_map = calloc(mapsize, 1); off_t cachelen = -1; @@ -214,6 +280,19 @@ static void connect_proxy_images() cachelen = lseek(ch, 0, SEEK_END); close(ch); } + else + { + ch = open(image->cache_file, O_WRONLY | O_CREAT, 0600); + if (ch >= 0) + { + // Pre-allocate disk space + printf("[DEBUG] Pre-allocating disk space...\n"); + lseek(ch, image->filesize - 1, SEEK_SET); + write(ch, &ch, 1); + close(ch); + printf("[DEBUG] Allocation complete.\n"); + } + } if (cachelen == image->filesize) { char mapfile[strlen(image->cache_file) + 5]; @@ -225,7 +304,7 @@ static void connect_proxy_images() memlogf("[WARNING] Existing cache map has wrong size."); else { - lseek(cmh, 0, SEEK_CUR); + lseek(cmh, 0, SEEK_SET); read(cmh, image->cache_map, mapsize); printf("[DEBUG] Found existing cache file and map for %s\n", image->low_name); } @@ -233,7 +312,8 @@ static void connect_proxy_images() } } } - memlogf("[INFO] Enabled relayed image %s", image->low_name); + if (image->working) + memlogf("[INFO] Enabled relayed image %s (%lld)", image->low_name, oct); } pthread_spin_unlock(&_spinlock); break; @@ -376,9 +456,11 @@ static void query_servers() NEW_POINTERLIST; char *image = XML_GETPROP(cur, "name"); char *ridstr = XML_GETPROP(cur, "rid"); - if (!image || !ridstr) + char *sizestr = XML_GETPROP(cur, "size"); + if (!image || !ridstr || !sizestr) goto free_current_image; - int rid = atoi(ridstr); + const int rid = atoi(ridstr); + const long long size = atoll(sizestr); if (rid <= 0) { printf("[DEBUG] Ignoring remote image with rid %d\n", rid); @@ -425,7 +507,10 @@ static void query_servers() newimage.config_group = xmlbuffer; newimage.rid = rid; if (_cache_dir) + { newimage.cache_file = create_cache_filename(xmlbuffer, rid, cachefile, 70); + printf("[DEBUG] Cache file is %s\n", newimage.cache_file); + } dnbd3_add_image(&newimage); pthread_spin_lock(&_spinlock); local_image = dnbd3_get_image(xmlbuffer, rid, FALSE); @@ -437,7 +522,10 @@ static void query_servers() { // Image is already KNOWN, add alt server if appropriate // TODO: Check if requested for namespace - add_alt_server(local_image, &server->host); + if (size != local_image->filesize) + printf("[DEBUG] Ignoring remote image '%s' because it has a different size from the local version!\n", local_image->config_group); + else + add_alt_server(local_image, &server->host); pthread_spin_unlock(&_spinlock); } // Cleanup diff --git a/src/server/net.c b/src/server/net.c index 425f23f..a895f85 100644 --- a/src/server/net.c +++ b/src/server/net.c @@ -206,12 +206,15 @@ void *dnbd3_handle_query(void *dnbd3_client) } else { - image_file = open(image->file, O_RDONLY); - if (image_file == -1) + if (image->file) { - image = NULL; + image_file = open(image->file, O_RDONLY); + if (image_file == -1) + { + image = NULL; + } } - else + if (image) { client->image = image; if (!client->is_server) @@ -219,6 +222,10 @@ void *dnbd3_handle_query(void *dnbd3_client) if (image->cache_map && image->cache_file) image_cache = open(image->cache_file, O_RDWR); + else if (image->cache_map) + printf("[BUG] Image has cache_map but no cache file!\n"); + else if (image->cache_file) + printf("[BUG] Image has cache_file but not cache map!\n"); } } } @@ -312,6 +319,8 @@ void *dnbd3_handle_query(void *dnbd3_client) lseek(image_cache, todo_offset, SEEK_SET); if (sendfile(image_cache, image_file, (off_t *) &todo_offset, todo_size) != todo_size) { + if (image->file == NULL) + printf("[ERROR] Device was closed when local copy was incomplete."); printf("[ERROR] sendfile failed (copy to cache 1)\n"); close(client->sock); client->sock = -1; diff --git a/src/server/saveload.c b/src/server/saveload.c index b41bb56..5f67b28 100644 --- a/src/server/saveload.c +++ b/src/server/saveload.c @@ -161,7 +161,6 @@ int dnbd3_add_image(dnbd3_image_t *image) image = NULL; if (newimage) { - memlogf("[INFO] Added image '%s'", newimage->low_name); _dnbd3_images = g_slist_prepend(_dnbd3_images, newimage); } else @@ -375,6 +374,12 @@ static dnbd3_image_t *prepare_image(char *image_name, int rid, char *image_file, image->rid = rid; image->relayed = (image_file == NULL || image_file == '\0'); + if (image_file && strncmp(image_file, "/dev/dnbd", 9) == 0) + { + printf("[BUG BUG BUG] Image file is %s\n", image_file); + image->relayed = TRUE; + } + if (image->relayed) // Image is relayed (this server acts as proxy) { if (strchr(image_name, '/') == NULL) @@ -431,6 +436,7 @@ static dnbd3_image_t *prepare_image(char *image_name, int rid, char *image_file, } else if (image->filesize > 0) { + printf("[DEBUG] Size known %llu for %s\n", (unsigned long long)image->filesize, image->cache_file); const size_t map_len_bytes = IMGSIZE_TO_MAPBYTES(image->filesize); image->cache_map = calloc(map_len_bytes, sizeof(uint8_t)); // read cache map from file -- cgit v1.2.3-55-g7522