/*
* This file is part of the Distributed Network Block Device 3
*
* Copyright(c) 2019 Frederic Robra <frederic@robra.org>
* Parts copyright 2011-2012 Johann Latocha <johann@latocha.de>
*
* This file may be licensed under the terms of of the
* GNU General Public License Version 2 (the ``GPL'').
*
* Software distributed under the License is distributed
* on an ``AS IS'' basis, WITHOUT WARRANTY OF ANY KIND, either
* express or implied. See the GPL for the specific language
* governing rights and limitations.
*
* You should have received a copy of the GPL along with this
* program. If not, go to http://www.gnu.org/licenses/gpl.html
* or write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
*/
#include <linux/major.h>
#include <linux/blkdev.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/sched.h>
#include <linux/sched/mm.h>
#include <linux/fs.h>
#include <linux/bio.h>
#include <linux/stat.h>
#include <linux/errno.h>
#include <linux/file.h>
#include <linux/ioctl.h>
#include <linux/mutex.h>
#include <linux/compiler.h>
#include <linux/err.h>
#include <linux/kernel.h>
#include <linux/slab.h>
#include <net/sock.h>
#include <linux/net.h>
#include <linux/kthread.h>
#include <linux/types.h>
#include <linux/debugfs.h>
#include <linux/blk-mq.h>
#include <linux/uaccess.h>
#include <asm/types.h>
#include "dnbd3.h"
#include "sysfs.h"
#include "clientconfig.h"
#include "net.h"
#include "mq.h"
struct workqueue_struct *dnbd3_wq;
DEFINE_IDR(dnbd3_index_idr);
DEFINE_MUTEX(dnbd3_index_mutex);
static unsigned int max_devs = NUMBER_DEVICES;
static struct dnbd3_device *device;
int major;
/**
* dnbd3_ioctl - the ioctl function of the dnbd3 kernel modul
* @bdev: the block device
* @mode:
* @cmd: the ioctl command
* @arg: the user data
*/
static int dnbd3_ioctl(struct block_device *bdev, fmode_t mode,
unsigned int cmd, unsigned long arg)
{
int result = -EIO;
bool old_msg = false;
struct dnbd3_device *dev = bdev->bd_disk->private_data;
char *imgname = NULL;
dnbd3_ioctl_t *msg = NULL;
debug_dev(dev, "ioctl cmd %i, arg %lu", cmd, arg);
if (arg != 0) {
msg = kmalloc(sizeof(*msg), GFP_KERNEL);
if (msg == NULL) {
return -ENOMEM;
}
result = copy_from_user((char *)msg, (char *)arg, 2);
if (result != 0) {
result = -ENOEXEC;
goto error;
} else if (msg->len == 40){ /* old msg length was 40 */
old_msg = true;
} else if (msg->len != sizeof(*msg)) {
result = -ENOEXEC;
goto error;
}
result = copy_from_user((char *)msg, (char *)arg, sizeof(*msg));
if (result != 0) {
result = -ENOENT;
goto error;
}
if (msg->imgname != NULL && msg->imgnamelen > 0) {
imgname = kmalloc(msg->imgnamelen + 1, GFP_KERNEL);
if (imgname == NULL) {
result = -ENOMEM;
goto error;
}
result = copy_from_user(
imgname, msg->imgname, msg->imgnamelen);
if (result != 0) {
result = -ENOENT;
goto error;
}
imgname[msg->imgnamelen] = '\0';
debug_dev(dev, "ioctl image name of len %i is %s",
(int)msg->imgnamelen, imgname);
}
}
switch (cmd) {
case IOCTL_OPEN:
debug_dev(dev, "ioctl open");
if (dev->imgname != NULL) {
result = -EBUSY;
} else if (imgname == NULL) {
result = -EINVAL;
} else if (msg == NULL) {
result = -EINVAL;
} else {
mutex_lock(&dev->device_lock);
if (sizeof(msg->host) != sizeof(dnbd3_host_t)) {
warn_dev(dev, "odd size bug#1 triggered in ioctl");
}
dev->imgname = imgname;
dev->rid = msg->rid;
dev->use_server_provided_alts =
msg->use_server_provided_alts;
/*
* forget all alt servers on explicit connect, set first
* alt server to initial server
*/
memset(dev->alt_servers, 0, sizeof(dev->alt_servers[0])
* NUMBER_SERVERS);
memcpy(&dev->alt_servers[0].host, &msg->host,
sizeof(msg->host));
if (old_msg || msg->number_connections == 0) {
dev->number_connections = 4;
} else {
dev->number_connections = msg->number_connections;
}
mutex_unlock(&dev->device_lock);
result = dnbd3_net_connect(dev);
if (result) {
if (dev->imgname) {
kfree(dev->imgname);
dev->imgname = NULL;
}
}
imgname = NULL;
}
break;
case IOCTL_CLOSE:
debug_dev(dev, "ioctl close");
result = dnbd3_net_disconnect(dev);
set_capacity(dev->disk, 0);
if (dev->imgname) {
kfree(dev->imgname);
dev->imgname = NULL;
}
dev->rid = 0;
dev->reported_size = 0;
break;
case IOCTL_SWITCH:
debug_dev(dev, "ioctl switch");
result = -EINVAL;
break;
case IOCTL_ADD_SRV:
case IOCTL_REM_SRV:
debug_dev(dev, "ioctl add/rem srv");
if (dev->imgname == NULL) {
result = -ENOENT;
} else if (dev->new_servers_num >= NUMBER_SERVERS) {
result = -EAGAIN;
} else if (msg == NULL) {
result = -EINVAL;
} else {
mutex_lock(&dev->device_lock);
memcpy(&dev->new_servers[dev->new_servers_num].host,
&msg->host, sizeof(msg->host));
/* 0 = ADD, 1 = REM */
dev->new_servers[dev->new_servers_num].failures =
(cmd == IOCTL_ADD_SRV ? 0 : 1);
dev->new_servers_num++;
mutex_unlock(&dev->device_lock);
result = 0;
}
break;
case BLKFLSBUF:
debug_dev(dev, "ioctl blkflsbuf");
result = 0;
break;
default:
debug_dev(dev, "ioctl unhandled cmd %d", cmd);
result = -EIO;
break;
}
error:
if (msg) {
kfree(msg);
}
if (imgname) {
kfree(imgname);
}
return result;
}
/**
* struct block_device_operations - dnbd3_fops
* device operations for ioctl
*/
static struct block_device_operations dnbd3_fops =
{
.owner = THIS_MODULE,
.ioctl = dnbd3_ioctl,
.compat_ioctl = dnbd3_ioctl,
};
/**
* dnbd3_add_device - add a dnbd3 device
* @dev: the device
* @minor: the minor number of the device
*/
int dnbd3_add_device(struct dnbd3_device *dev, int minor)
{
struct gendisk *disk;
struct request_queue *q;
int err = -ENOMEM;
debug("adding device %d", minor);
mutex_init(&dev->device_lock);
mutex_lock(&dev->device_lock);
disk = alloc_disk(1);
if (!disk) {
error_dev(dev, "allocating disc failed");
goto out_free_dnbd3;
}
err = idr_alloc(&dnbd3_index_idr, dev, minor, minor + 1, GFP_KERNEL);
if (err == -ENOSPC) {
error_dev(dev, "idr alloc failed");
err = -EEXIST;
}
if (err < 0) {
goto out_free_disk;
}
dev->minor = minor;
dev->disk = disk;
dev->tag_set.ops = &dnbd3_mq_ops;
/* this can be changed later with blk_mq_update_nr_hw_queues() */
dev->tag_set.nr_hw_queues = 1;
dev->tag_set.timeout = 5 * HZ;
dev->tag_set.queue_depth = 128;
dev->tag_set.numa_node = NUMA_NO_NODE;
dev->tag_set.cmd_size = sizeof(struct dnbd3_cmd);
dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE |
BLK_MQ_F_BLOCKING;
dev->tag_set.driver_data = dev;
err = blk_mq_alloc_tag_set(&dev->tag_set);
if (err) {
goto out_free_idr;
}
q = blk_mq_init_queue(&dev->tag_set);
if (IS_ERR(q)) {
err = PTR_ERR(q);
goto out_free_tags;
}
disk->queue = q;
/*
* Tell the block layer that we are not a rotational device
*/
blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue);
disk->queue->limits.discard_granularity = 0;
disk->queue->limits.discard_alignment = 0;
blk_queue_max_discard_sectors(disk->queue, 0);
blk_queue_max_segment_size(disk->queue, UINT_MAX);
blk_queue_max_segments(disk->queue, USHRT_MAX);
blk_queue_max_hw_sectors(disk->queue, 65536);
disk->queue->limits.max_sectors = 256;
INIT_LIST_HEAD(&dev->list);
disk->major = major;
disk->first_minor = minor;
disk->fops = &dnbd3_fops;
disk->private_data = dev;
sprintf(disk->disk_name, "dnbd%i", minor);
add_disk(disk);
dnbd3_sysfs_init(dev);
mutex_unlock(&dev->device_lock);
return minor;
out_free_tags:
blk_mq_free_tag_set(&dev->tag_set);
out_free_idr:
idr_remove(&dnbd3_index_idr, minor);
out_free_disk:
put_disk(disk);
out_free_dnbd3:
kfree(dev);
mutex_unlock(&dev->device_lock);
warn_dev(dev, "failed to create device");
return err;
}
/**
* dnbd3_init - init the dnbd3 kernel modul
*/
static int __init dnbd3_init(void)
{
int i;
debug("starting kernel module");
/*
* allocate a workqueue/thread for this modul
* WQ_MEM_RECLAIM - it is allowed to allocate memory
* WQ_FREEZABLE - can go to sleep
* WQ_UNBOUND - not bound to a certain CPU
*/
dnbd3_wq = alloc_workqueue("kdnbd3",
WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_UNBOUND, 0);
if (max_devs < 0) {
error("max_devs must be >= 0");
return -EINVAL;
}
device = kcalloc(max_devs, sizeof(*device), GFP_KERNEL);
if (!device) {
error("failed to create dnbd3 device");
return -ENOMEM;
}
// initialize block device
major = register_blkdev(0, "dnbd3");
if (major == 0) {
error("register_blkdev failed");
return -EIO;
}
debug("kernel module loaded. Machine type: " ENDIAN_MODE);
// add MAX_NUMBER_DEVICES devices
mutex_lock(&dnbd3_index_mutex);
for (i = 0; i < max_devs; i++) {
dnbd3_add_device(&device[i], i);
}
mutex_unlock(&dnbd3_index_mutex);
info("init successful (%i devices)", max_devs);
return 0;
}
/**
* dnbd3_exit_cb - callback function for idr_for_each
* @id: the id
* @ptr: the entry
* @data: the callback data
*/
static int dnbd3_exit_cb(int id, void *ptr, void *data)
{
struct list_head *list = (struct list_head *)data;
struct dnbd3_device *dnbd3 = ptr;
list_add_tail(&dnbd3->list, list);
return 0;
}
/**
* dnbd3_dev_remove - remove the dnbd3 device
* @dev: the device to remove
*/
static void dnbd3_dev_remove(struct dnbd3_device *dev)
{
struct gendisk *disk = dev->disk;
struct request_queue *q;
if (dev->connected) {
dnbd3_net_disconnect(dev);
}
if (disk) {
q = disk->queue;
del_gendisk(disk);
blk_cleanup_queue(q);
blk_mq_free_tag_set(&dev->tag_set);
disk->private_data = NULL;
put_disk(disk);
}
if (dev->imgname) {
kfree(dev->imgname);
dev->imgname = NULL;
}
mutex_destroy(&dev->device_lock);
}
/**
* dnbd3_exit - exit the dnbd3 modul
*/
static void __exit dnbd3_exit(void)
{
struct dnbd3_device *dnbd3;
LIST_HEAD(del_list);
debug("stopping kernel module");
mutex_lock(&dnbd3_index_mutex);
idr_for_each(&dnbd3_index_idr, &dnbd3_exit_cb, &del_list);
mutex_unlock(&dnbd3_index_mutex);
while (!list_empty(&del_list)) {
dnbd3 = list_first_entry(&del_list, struct dnbd3_device, list);
dnbd3_sysfs_exit(dnbd3);
list_del_init(&dnbd3->list);
mutex_lock(&dnbd3_index_mutex);
idr_remove(&dnbd3_index_idr, dnbd3->minor);
mutex_unlock(&dnbd3_index_mutex);
dnbd3_dev_remove(dnbd3);
}
idr_destroy(&dnbd3_index_idr);
unregister_blkdev(major, "dnbd3");
kfree(device);
destroy_workqueue(dnbd3_wq);
info("stopped kernel module");
}
module_init(dnbd3_init);
module_exit(dnbd3_exit);
MODULE_DESCRIPTION("Distributed Network Block Device 3");
MODULE_LICENSE("GPL");
module_param(max_devs, int, 0444);
MODULE_PARM_DESC(max_devs, "number of network block devices to initialize (default: 8)");