/*
* This file is part of the Distributed Network Block Device 3
*
* Copyright(c) 2011-2012 Johann Latocha <johann@latocha.de>
*
* This file may be licensed under the terms of of the
* GNU General Public License Version 2 (the ``GPL'').
*
* Software distributed under the License is distributed
* on an ``AS IS'' basis, WITHOUT WARRANTY OF ANY KIND, either
* express or implied. See the GPL for the specific language
* governing rights and limitations.
*
* You should have received a copy of the GPL along with this
* program. If not, go to http://www.gnu.org/licenses/gpl.html
* or write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
*/
#include <dnbd3/config/client.h>
#include "blk.h"
#include "net.h"
#include "sysfs.h"
#include "dnbd3_main.h"
#include <linux/pagemap.h>
#define dnbd3_req_read(req) \
req_op(req) == REQ_OP_READ
#define dnbd3_req_fs(req) \
dnbd3_req_read(req) || req_op(req) == REQ_OP_WRITE
#define dnbd3_req_special(req) \
blk_rq_is_private(req)
static int dnbd3_close_device(dnbd3_device_t *dev)
{
int result;
dnbd3_blk_fail_all_requests(dev);
dev->panic = 0;
dev->discover = 0;
result = dnbd3_net_disconnect(dev);
dnbd3_blk_fail_all_requests(dev);
blk_mq_freeze_queue(dev->queue);
set_capacity(dev->disk, 0);
blk_mq_unfreeze_queue(dev->queue);
if (dev->imgname)
{
kfree(dev->imgname);
dev->imgname = NULL;
}
return result;
}
static int dnbd3_blk_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg)
{
int result = -100;
dnbd3_device_t *dev = bdev->bd_disk->private_data;
struct request_queue *blk_queue = dev->disk->queue;
char *imgname = NULL;
dnbd3_ioctl_t *msg = NULL;
dnbd3_server_entry_t server;
dnbd3_server_t old_server;
dnbd3_server_t *alt_server;
unsigned long irqflags;
int i = 0;
u8 locked = 0;
if (arg != 0)
{
msg = kmalloc(sizeof(*msg), GFP_KERNEL);
if (msg == NULL) return -ENOMEM;
if (copy_from_user((char *)msg, (char *)arg, 2) != 0 || msg->len != sizeof(*msg))
{
result = -ENOEXEC;
goto cleanup_return;
}
if (copy_from_user((char *)msg, (char *)arg, sizeof(*msg)) != 0)
{
result = -ENOENT;
goto cleanup_return;
}
if (msg->imgname != NULL && msg->imgnamelen > 0)
{
imgname = kmalloc(msg->imgnamelen + 1, GFP_KERNEL);
if (imgname == NULL)
{
result = -ENOMEM;
goto cleanup_return;
}
if (copy_from_user(imgname, msg->imgname, msg->imgnamelen) != 0)
{
result = -ENOENT;
goto cleanup_return;
}
imgname[msg->imgnamelen] = '\0';
}
}
switch (cmd)
{
case IOCTL_OPEN:
if (atomic_cmpxchg(&dev->connection_lock, 0, 1) != 0)
{
result = -EBUSY;
break;
}
locked = 1;
if (dev->imgname != NULL)
{
result = -EBUSY;
}
else if (imgname == NULL)
{
result = -EINVAL;
}
else if (msg == NULL)
{
result = -EINVAL;
}
else
{
if (sizeof(msg->hosts[0]) != sizeof(dev->cur_server.host))
dev_warn(dnbd3_device_to_dev(dev), "odd size bug triggered in IOCTL\n");
/* assert that at least one and not to many hosts are given */
if (msg->hosts_num < 1 || msg->hosts_num > NUMBER_SERVERS) {
result = -EINVAL;
break;
}
dev->imgname = imgname;
dev->rid = msg->rid;
dev->use_server_provided_alts = msg->use_server_provided_alts;
if (blk_queue->backing_dev_info != NULL) {
blk_queue->backing_dev_info->ra_pages = (msg->read_ahead_kb * 1024) / PAGE_SIZE;
}
/* add specified servers to alt server list */
for (i = 0; i < msg->hosts_num; i++) {
/* copy provided host into corresponding alt server slot */
memset(&dev->alt_servers[i], 0, sizeof(dev->alt_servers[i]));
memcpy(&dev->alt_servers[i].host, &msg->hosts[i], sizeof(msg->hosts[i]));
dev->alt_servers[i].failures = 0;
if (dev->alt_servers[i].host.type == HOST_IP4)
dev_dbg(dnbd3_device_to_dev(dev), "adding server %pI4\n", dev->alt_servers[i].host.addr);
else
dev_dbg(dnbd3_device_to_dev(dev), "adding server [%pI6]\n", dev->alt_servers[i].host.addr);
}
/* probe added alt servers in specified order and choose first working server as initial server */
for (i = 0; i < msg->hosts_num; i++) {
/* probe added alt server */
memcpy(&dev->cur_server, &dev->alt_servers[i], sizeof(dev->cur_server));
if (dnbd3_net_connect(dev) != 0) {
/* probing server failed, cleanup connection and proceed with next specified server */
dnbd3_blk_fail_all_requests(dev);
dnbd3_net_disconnect(dev);
dnbd3_blk_fail_all_requests(dev);
result = -ENOENT;
} else {
/* probing server succeeds, abort probing of other servers */
result = i;
break;
}
}
if (result >= 0)
{
/* probing was successful */
if (dev->cur_server.host.type == HOST_IP4)
dev_dbg(dnbd3_device_to_dev(dev), "server %pI4 is initial server\n", dev->cur_server.host.addr);
else
dev_dbg(dnbd3_device_to_dev(dev), "server [%pI6] is initial server\n", dev->cur_server.host.addr);
imgname = NULL; // Prevent kfree at the end
}
else
{
/* probing failed */
dev->imgname = NULL;
}
}
break;
case IOCTL_CLOSE:
if (atomic_cmpxchg(&dev->connection_lock, 0, 1) != 0)
{
result = -EBUSY;
break;
}
locked = 1;
result = dnbd3_close_device(dev);
break;
case IOCTL_SWITCH:
if (atomic_cmpxchg(&dev->connection_lock, 0, 1) != 0)
{
result = -EBUSY;
break;
}
locked = 1;
if (dev->imgname == NULL)
{
result = -ENOENT;
}
else if (msg == NULL)
{
result = -EINVAL;
}
else
{
/* convert host to dnbd3-server for switching */
memcpy(&server.host, &msg->hosts[0], sizeof(server.host));
server.failures = 0;
alt_server = get_existing_server(&server, dev);
if (alt_server == NULL)
{
/* specified server is not known, so do not switch */
result = -EINVAL;
}
else
{
/* specified server is known, so try to switch to it */
if (!is_same_server(&dev->cur_server, alt_server))
{
/* specified server is not working, so switch to it */
/* save current working server */
/* lock device to get consistent copy of current working server */
spin_lock_irqsave(&dev->blk_lock, irqflags);
memcpy(&old_server, &dev->cur_server, sizeof(old_server));
spin_unlock_irqrestore(&dev->blk_lock, irqflags);
/* disconnect old server */
dnbd3_net_disconnect(dev);
dev_info(dnbd3_device_to_dev(dev), "switching server ...\n");
/* connect to new specified server (switching) */
memcpy(&dev->cur_server, alt_server, sizeof(dev->cur_server));
result = dnbd3_net_connect(dev);
if (result != 0)
{
/* reconnect with old server if switching has failed */
memcpy(&dev->cur_server, &old_server, sizeof(dev->cur_server));
if (dnbd3_net_connect(dev) != 0)
{
blk_mq_freeze_queue(dev->queue);
set_capacity(dev->disk, 0);
blk_mq_unfreeze_queue(dev->queue);
}
result = -ECONNABORTED;
}
}
else
{
/* specified server is already working, so do not switch */
result = 0;
}
}
}
break;
case IOCTL_ADD_SRV:
case IOCTL_REM_SRV:
if (dev->imgname == NULL)
{
result = -ENOENT;
}
else if (msg == NULL)
{
result = -EINVAL;
}
/* protect access to 'new_servers_num' and 'new_servers' */
spin_lock_irqsave(&dev->blk_lock, irqflags);
if (dev->new_servers_num >= NUMBER_SERVERS)
{
result = -EAGAIN;
}
else
{
/* add or remove specified server */
memcpy(&dev->new_servers[dev->new_servers_num].host, &msg->hosts[0], sizeof(msg->hosts[0]));
dev->new_servers[dev->new_servers_num].failures = (cmd == IOCTL_ADD_SRV ? 0 : 1); // 0 = ADD, 1 = REM
++dev->new_servers_num;
result = 0;
}
spin_unlock_irqrestore(&dev->blk_lock, irqflags);
break;
case BLKFLSBUF:
result = 0;
break;
default:
result = -EIO;
break;
}
if (locked)
atomic_set(&dev->connection_lock, 0);
cleanup_return:
if (msg) kfree(msg);
if (imgname) kfree(imgname);
return result;
}
static const struct block_device_operations dnbd3_blk_ops = {
.owner = THIS_MODULE,
.ioctl = dnbd3_blk_ioctl,
};
static blk_status_t dnbd3_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd)
{
struct request *rq = bd->rq;
dnbd3_device_t *dev = rq->q->queuedata;
unsigned long irqflags;
blk_mq_start_request(rq);
if (dev->imgname == NULL)
{
blk_mq_end_request(rq, BLK_STS_IOERR);
goto out;
}
if (!(dnbd3_req_fs(rq)))
{
blk_mq_end_request(rq, BLK_STS_IOERR);
goto out;
}
if (PROBE_COUNT_TIMEOUT > 0 && dev->panic_count >= PROBE_COUNT_TIMEOUT)
{
blk_mq_end_request(rq, BLK_STS_TIMEOUT);
goto out;
}
if (!(dnbd3_req_read(rq)))
{
blk_mq_end_request(rq, BLK_STS_NOTSUPP);
goto out;
}
spin_lock_irqsave(&dev->blk_lock, irqflags);
list_add_tail(&rq->queuelist, &dev->request_queue_send);
spin_unlock_irqrestore(&dev->blk_lock, irqflags);
wake_up(&dev->process_queue_send);
out:
return BLK_STS_OK;
}
static const struct blk_mq_ops dnbd3_mq_ops = {
.queue_rq = dnbd3_queue_rq,
};
int dnbd3_blk_add_device(dnbd3_device_t *dev, int minor)
{
int ret;
init_waitqueue_head(&dev->process_queue_send);
init_waitqueue_head(&dev->process_queue_discover);
INIT_LIST_HEAD(&dev->request_queue_send);
INIT_LIST_HEAD(&dev->request_queue_receive);
memset(&dev->cur_server, 0, sizeof(dev->cur_server));
dev->better_sock = NULL;
dev->imgname = NULL;
dev->rid = 0;
dev->update_available = 0;
memset(dev->alt_servers, 0, sizeof(dev->alt_servers[0])*NUMBER_SERVERS);
dev->thread_send = NULL;
dev->thread_receive = NULL;
dev->thread_discover = NULL;
dev->discover = 0;
atomic_set(&dev->connection_lock, 0);
dev->panic = 0;
dev->panic_count = 0;
dev->reported_size = 0;
// set up spin lock for request queues for send and receive
spin_lock_init(&dev->blk_lock);
// set up tag_set for blk-mq
dev->tag_set.ops = &dnbd3_mq_ops;
dev->tag_set.nr_hw_queues = 1;
dev->tag_set.queue_depth = 128;
dev->tag_set.numa_node = NUMA_NO_NODE;
dev->tag_set.cmd_size = 0;
dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
dev->tag_set.driver_data = dev;
ret = blk_mq_alloc_tag_set(&dev->tag_set);
if (ret)
{
dev_err(dnbd3_device_to_dev(dev), "blk_mq_alloc_tag_set failed\n");
goto out;
}
// set up blk-mq
dev->queue = blk_mq_init_queue(&dev->tag_set);
if (IS_ERR(dev->queue)) {
ret = PTR_ERR(dev->queue);
dev_err(dnbd3_device_to_dev(dev), "blk_mq_init_queue failed\n");
goto out_cleanup_tags;
}
dev->queue->queuedata = dev;
blk_queue_logical_block_size(dev->queue, DNBD3_BLOCK_SIZE);
blk_queue_physical_block_size(dev->queue, DNBD3_BLOCK_SIZE);
blk_queue_flag_set(QUEUE_FLAG_NONROT, dev->queue);
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, dev->queue);
#define ONE_MEG (1048576)
blk_queue_max_segment_size(dev->queue, ONE_MEG);
blk_queue_max_segments(dev->queue, 0xffff);
blk_queue_max_hw_sectors(dev->queue, ONE_MEG / DNBD3_BLOCK_SIZE);
dev->queue->limits.max_sectors = 256;
#undef ONE_MEG
// set up disk
if (!(dev->disk = alloc_disk(1)))
{
dev_err(dnbd3_device_to_dev(dev), "alloc_disk failed\n");
ret = -ENOMEM;
goto out_cleanup_queue;
}
dev->disk->flags |= GENHD_FL_NO_PART_SCAN;
dev->disk->major = major;
dev->disk->first_minor = minor;
dev->disk->fops = &dnbd3_blk_ops;
dev->disk->private_data = dev;
dev->disk->queue = dev->queue;
sprintf(dev->disk->disk_name, "dnbd%d", minor);
set_capacity(dev->disk, 0);
set_disk_ro(dev->disk, 1);
add_disk(dev->disk);
// set up sysfs
dnbd3_sysfs_init(dev);
return 0;
out_cleanup_queue:
blk_cleanup_queue(dev->queue);
out_cleanup_tags:
blk_mq_free_tag_set(&dev->tag_set);
out:
return ret;
}
int dnbd3_blk_del_device(dnbd3_device_t *dev)
{
while (atomic_cmpxchg(&dev->connection_lock, 0, 1) != 0)
schedule();
dnbd3_close_device(dev);
dnbd3_sysfs_exit(dev);
del_gendisk(dev->disk);
blk_cleanup_queue(dev->queue);
blk_mq_free_tag_set(&dev->tag_set);
put_disk(dev->disk);
return 0;
}
void dnbd3_blk_fail_all_requests(dnbd3_device_t *dev)
{
struct request *blk_request, *tmp_request;
struct request *blk_request2, *tmp_request2;
unsigned long flags;
struct list_head local_copy;
int dup;
INIT_LIST_HEAD(&local_copy);
spin_lock_irqsave(&dev->blk_lock, flags);
while (!list_empty(&dev->request_queue_receive))
{
list_for_each_entry_safe(blk_request, tmp_request, &dev->request_queue_receive, queuelist)
{
list_del_init(&blk_request->queuelist);
dup = 0;
list_for_each_entry_safe(blk_request2, tmp_request2, &local_copy, queuelist)
{
if (blk_request == blk_request2)
{
dev_warn(dnbd3_device_to_dev(dev), "request is in both lists\n");
dup = 1;
break;
}
}
if (!dup) list_add(&blk_request->queuelist, &local_copy);
}
}
while (!list_empty(&dev->request_queue_send))
{
list_for_each_entry_safe(blk_request, tmp_request, &dev->request_queue_send, queuelist)
{
list_del_init(&blk_request->queuelist);
dup = 0;
list_for_each_entry_safe(blk_request2, tmp_request2, &local_copy, queuelist)
{
if (blk_request == blk_request2)
{
dev_warn(dnbd3_device_to_dev(dev), "request is in both lists\n");
dup = 1;
break;
}
}
if (!dup) list_add(&blk_request->queuelist, &local_copy);
}
}
spin_unlock_irqrestore(&dev->blk_lock, flags);
list_for_each_entry_safe(blk_request, tmp_request, &local_copy, queuelist)
{
list_del_init(&blk_request->queuelist);
if (dnbd3_req_fs(blk_request))
{
spin_lock_irqsave(&dev->blk_lock, flags);
blk_mq_end_request(blk_request, BLK_STS_IOERR);
spin_unlock_irqrestore(&dev->blk_lock, flags);
}
else if (dnbd3_req_special(blk_request))
{
kfree(blk_request);
}
}
}