From dbe5c58f2a44f49572b3346400779fac818fcaea Mon Sep 17 00:00:00 2001 From: Peter Lieven Date: Mon, 28 Apr 2014 13:23:25 +0200 Subject: block/iscsi: allow fall back to WRITE SAME without UNMAP if the iscsi driver receives a write zeroes request with the BDRV_REQ_MAY_UNMAP flag set it fails with -ENOTSUP if the iscsi target does not support WRITE SAME with UNMAP. However, the BDRV_REQ_MAY_UNMAP is only a hint and writing zeroes with WRITE SAME will still be better than falling back to writing zeroes with WRITE16. Signed-off-by: Peter Lieven Signed-off-by: Paolo Bonzini --- block/iscsi.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'block') diff --git a/block/iscsi.c b/block/iscsi.c index a636ea4f53..56f54193c8 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -809,13 +809,14 @@ coroutine_fn iscsi_co_write_zeroes(BlockDriverState *bs, int64_t sector_num, return -EINVAL; } - if (!(flags & BDRV_REQ_MAY_UNMAP) && !iscsilun->has_write_same) { - /* WRITE SAME without UNMAP is not supported by the target */ - return -ENOTSUP; + if ((flags & BDRV_REQ_MAY_UNMAP) && !iscsilun->lbp.lbpws) { + /* WRITE SAME with UNMAP is not supported by the target, + * fall back and try WRITE SAME without UNMAP */ + flags &= ~BDRV_REQ_MAY_UNMAP; } - if ((flags & BDRV_REQ_MAY_UNMAP) && !iscsilun->lbp.lbpws) { - /* WRITE SAME with UNMAP is not supported by the target */ + if (!(flags & BDRV_REQ_MAY_UNMAP) && !iscsilun->has_write_same) { + /* WRITE SAME without UNMAP is not supported by the target */ return -ENOTSUP; } -- cgit v1.2.3-55-g7522 From b03c38057b7ac4ffb60fa98a26dd4c8d5fa9c54c Mon Sep 17 00:00:00 2001 From: Peter Lieven Date: Mon, 28 Apr 2014 13:11:32 +0200 Subject: block/iscsi: speed up read for unallocated sectors this patch implements a cache that tracks if a page on the iscsi target is allocated or not. The cache is implemented in a way that it allows for false positives (e.g. pretending a page is allocated, but it isn't), but no false negatives. The cached allocation info is then used to speed up the read process for unallocated sectors by issueing a GET_LBA_STATUS request for all sectors that are not yet known to be allocated. If the read request is confirmed to fall into an unallocated range we directly return zeroes and do not transfer the data over the wire. Tests have shown that a relatively small amount of GET_LBA_STATUS requests happens a vServer boot time to fill the allocation cache (all those blocks are not queried again). Not to transfer all the data of unallocated sectors saves a lot of time, bandwidth and storage I/O load during block jobs or storage migration and it saves a lot of bandwidth as well for any big sequential read of the whole disk (e.g. block copy or speed tests) if a significant number of blocks is unallocated. Signed-off-by: Peter Lieven Signed-off-by: Paolo Bonzini --- block/iscsi.c | 300 ++++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 198 insertions(+), 102 deletions(-) (limited to 'block') diff --git a/block/iscsi.c b/block/iscsi.c index 56f54193c8..6b8a0a34f7 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -30,6 +30,8 @@ #include "qemu-common.h" #include "qemu/config-file.h" #include "qemu/error-report.h" +#include "qemu/bitops.h" +#include "qemu/bitmap.h" #include "block/block_int.h" #include "trace.h" #include "block/scsi.h" @@ -59,6 +61,8 @@ typedef struct IscsiLun { struct scsi_inquiry_logical_block_provisioning lbp; struct scsi_inquiry_block_limits bl; unsigned char *zeroblock; + unsigned long *allocationmap; + int cluster_sectors; } IscsiLun; typedef struct IscsiTask { @@ -91,6 +95,7 @@ typedef struct IscsiAIOCB { #define NOP_INTERVAL 5000 #define MAX_NOP_FAILURES 3 #define ISCSI_CMD_RETRIES 5 +#define ISCSI_CHECKALLOC_THRES 63 static void iscsi_bh_cb(void *p) @@ -273,6 +278,32 @@ static bool is_request_lun_aligned(int64_t sector_num, int nb_sectors, return 1; } +static void iscsi_allocationmap_set(IscsiLun *iscsilun, int64_t sector_num, + int nb_sectors) +{ + if (iscsilun->allocationmap == NULL) { + return; + } + bitmap_set(iscsilun->allocationmap, + sector_num / iscsilun->cluster_sectors, + DIV_ROUND_UP(nb_sectors, iscsilun->cluster_sectors)); +} + +static void iscsi_allocationmap_clear(IscsiLun *iscsilun, int64_t sector_num, + int nb_sectors) +{ + int64_t cluster_num, nb_clusters; + if (iscsilun->allocationmap == NULL) { + return; + } + cluster_num = DIV_ROUND_UP(sector_num, iscsilun->cluster_sectors); + nb_clusters = (sector_num + nb_sectors) / iscsilun->cluster_sectors + - cluster_num; + if (nb_clusters > 0) { + bitmap_clear(iscsilun->allocationmap, cluster_num, nb_clusters); + } +} + static int coroutine_fn iscsi_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *iov) @@ -336,9 +367,127 @@ retry: return -EIO; } + iscsi_allocationmap_set(iscsilun, sector_num, nb_sectors); + return 0; } + +static bool iscsi_allocationmap_is_allocated(IscsiLun *iscsilun, + int64_t sector_num, int nb_sectors) +{ + unsigned long size; + if (iscsilun->allocationmap == NULL) { + return true; + } + size = DIV_ROUND_UP(sector_num + nb_sectors, iscsilun->cluster_sectors); + return !(find_next_bit(iscsilun->allocationmap, size, + sector_num / iscsilun->cluster_sectors) == size); +} + + +#if defined(LIBISCSI_FEATURE_IOVECTOR) + +static int64_t coroutine_fn iscsi_co_get_block_status(BlockDriverState *bs, + int64_t sector_num, + int nb_sectors, int *pnum) +{ + IscsiLun *iscsilun = bs->opaque; + struct scsi_get_lba_status *lbas = NULL; + struct scsi_lba_status_descriptor *lbasd = NULL; + struct IscsiTask iTask; + int64_t ret; + + iscsi_co_init_iscsitask(iscsilun, &iTask); + + if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) { + ret = -EINVAL; + goto out; + } + + /* default to all sectors allocated */ + ret = BDRV_BLOCK_DATA; + ret |= (sector_num << BDRV_SECTOR_BITS) | BDRV_BLOCK_OFFSET_VALID; + *pnum = nb_sectors; + + /* LUN does not support logical block provisioning */ + if (iscsilun->lbpme == 0) { + goto out; + } + +retry: + if (iscsi_get_lba_status_task(iscsilun->iscsi, iscsilun->lun, + sector_qemu2lun(sector_num, iscsilun), + 8 + 16, iscsi_co_generic_cb, + &iTask) == NULL) { + ret = -ENOMEM; + goto out; + } + + while (!iTask.complete) { + iscsi_set_events(iscsilun); + qemu_coroutine_yield(); + } + + if (iTask.do_retry) { + if (iTask.task != NULL) { + scsi_free_scsi_task(iTask.task); + iTask.task = NULL; + } + iTask.complete = 0; + goto retry; + } + + if (iTask.status != SCSI_STATUS_GOOD) { + /* in case the get_lba_status_callout fails (i.e. + * because the device is busy or the cmd is not + * supported) we pretend all blocks are allocated + * for backwards compatibility */ + goto out; + } + + lbas = scsi_datain_unmarshall(iTask.task); + if (lbas == NULL) { + ret = -EIO; + goto out; + } + + lbasd = &lbas->descriptors[0]; + + if (sector_qemu2lun(sector_num, iscsilun) != lbasd->lba) { + ret = -EIO; + goto out; + } + + *pnum = sector_lun2qemu(lbasd->num_blocks, iscsilun); + + if (lbasd->provisioning == SCSI_PROVISIONING_TYPE_DEALLOCATED || + lbasd->provisioning == SCSI_PROVISIONING_TYPE_ANCHORED) { + ret &= ~BDRV_BLOCK_DATA; + if (iscsilun->lbprz) { + ret |= BDRV_BLOCK_ZERO; + } + } + + if (ret & BDRV_BLOCK_ZERO) { + iscsi_allocationmap_clear(iscsilun, sector_num, *pnum); + } else { + iscsi_allocationmap_set(iscsilun, sector_num, *pnum); + } + + if (*pnum > nb_sectors) { + *pnum = nb_sectors; + } +out: + if (iTask.task != NULL) { + scsi_free_scsi_task(iTask.task); + } + return ret; +} + +#endif /* LIBISCSI_FEATURE_IOVECTOR */ + + static int coroutine_fn iscsi_co_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *iov) @@ -355,6 +504,22 @@ static int coroutine_fn iscsi_co_readv(BlockDriverState *bs, return -EINVAL; } +#if defined(LIBISCSI_FEATURE_IOVECTOR) + if (iscsilun->lbprz && nb_sectors > ISCSI_CHECKALLOC_THRES && + !iscsi_allocationmap_is_allocated(iscsilun, sector_num, nb_sectors)) { + int64_t ret; + int pnum; + ret = iscsi_co_get_block_status(bs, sector_num, INT_MAX, &pnum); + if (ret < 0) { + return ret; + } + if (ret & BDRV_BLOCK_ZERO && pnum >= nb_sectors) { + qemu_iovec_memset(iov, 0, 0x00, iov->size); + return 0; + } + } +#endif + lba = sector_qemu2lun(sector_num, iscsilun); num_sectors = sector_qemu2lun(nb_sectors, iscsilun); @@ -643,101 +808,6 @@ iscsi_getlength(BlockDriverState *bs) return len; } -#if defined(LIBISCSI_FEATURE_IOVECTOR) - -static int64_t coroutine_fn iscsi_co_get_block_status(BlockDriverState *bs, - int64_t sector_num, - int nb_sectors, int *pnum) -{ - IscsiLun *iscsilun = bs->opaque; - struct scsi_get_lba_status *lbas = NULL; - struct scsi_lba_status_descriptor *lbasd = NULL; - struct IscsiTask iTask; - int64_t ret; - - iscsi_co_init_iscsitask(iscsilun, &iTask); - - if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) { - ret = -EINVAL; - goto out; - } - - /* default to all sectors allocated */ - ret = BDRV_BLOCK_DATA; - ret |= (sector_num << BDRV_SECTOR_BITS) | BDRV_BLOCK_OFFSET_VALID; - *pnum = nb_sectors; - - /* LUN does not support logical block provisioning */ - if (iscsilun->lbpme == 0) { - goto out; - } - -retry: - if (iscsi_get_lba_status_task(iscsilun->iscsi, iscsilun->lun, - sector_qemu2lun(sector_num, iscsilun), - 8 + 16, iscsi_co_generic_cb, - &iTask) == NULL) { - ret = -ENOMEM; - goto out; - } - - while (!iTask.complete) { - iscsi_set_events(iscsilun); - qemu_coroutine_yield(); - } - - if (iTask.do_retry) { - if (iTask.task != NULL) { - scsi_free_scsi_task(iTask.task); - iTask.task = NULL; - } - iTask.complete = 0; - goto retry; - } - - if (iTask.status != SCSI_STATUS_GOOD) { - /* in case the get_lba_status_callout fails (i.e. - * because the device is busy or the cmd is not - * supported) we pretend all blocks are allocated - * for backwards compatibility */ - goto out; - } - - lbas = scsi_datain_unmarshall(iTask.task); - if (lbas == NULL) { - ret = -EIO; - goto out; - } - - lbasd = &lbas->descriptors[0]; - - if (sector_qemu2lun(sector_num, iscsilun) != lbasd->lba) { - ret = -EIO; - goto out; - } - - *pnum = sector_lun2qemu(lbasd->num_blocks, iscsilun); - if (*pnum > nb_sectors) { - *pnum = nb_sectors; - } - - if (lbasd->provisioning == SCSI_PROVISIONING_TYPE_DEALLOCATED || - lbasd->provisioning == SCSI_PROVISIONING_TYPE_ANCHORED) { - ret &= ~BDRV_BLOCK_DATA; - if (iscsilun->lbprz) { - ret |= BDRV_BLOCK_ZERO; - } - } - -out: - if (iTask.task != NULL) { - scsi_free_scsi_task(iTask.task); - } - return ret; -} - -#endif /* LIBISCSI_FEATURE_IOVECTOR */ - static int coroutine_fn iscsi_co_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors) @@ -791,6 +861,8 @@ retry: return -EIO; } + iscsi_allocationmap_clear(iscsilun, sector_num, nb_sectors); + return 0; } @@ -865,6 +937,12 @@ retry: return -EIO; } + if (flags & BDRV_REQ_MAY_UNMAP) { + iscsi_allocationmap_clear(iscsilun, sector_num, nb_sectors); + } else { + iscsi_allocationmap_set(iscsilun, sector_num, nb_sectors); + } + return 0; } @@ -1297,6 +1375,22 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags, timer_mod(iscsilun->nop_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL); #endif + /* Guess the internal cluster (page) size of the iscsi target by the means + * of opt_unmap_gran. Transfer the unmap granularity only if it has a + * reasonable size */ + if (iscsilun->bl.opt_unmap_gran * iscsilun->block_size >= 64 * 1024 && + iscsilun->bl.opt_unmap_gran * iscsilun->block_size <= 16 * 1024 * 1024) { + iscsilun->cluster_sectors = (iscsilun->bl.opt_unmap_gran * + iscsilun->block_size) >> BDRV_SECTOR_BITS; +#if defined(LIBISCSI_FEATURE_IOVECTOR) + if (iscsilun->lbprz && !(bs->open_flags & BDRV_O_NOCACHE)) { + iscsilun->allocationmap = + bitmap_new(DIV_ROUND_UP(bs->total_sectors, + iscsilun->cluster_sectors)); + } +#endif + } + out: qemu_opts_del(opts); if (initiator_name != NULL) { @@ -1330,6 +1424,7 @@ static void iscsi_close(BlockDriverState *bs) qemu_aio_set_fd_handler(iscsi_get_fd(iscsi), NULL, NULL, NULL); iscsi_destroy_context(iscsi); g_free(iscsilun->zeroblock); + g_free(iscsilun->allocationmap); memset(iscsilun, 0, sizeof(IscsiLun)); } @@ -1390,6 +1485,13 @@ static int iscsi_truncate(BlockDriverState *bs, int64_t offset) return -EINVAL; } + if (iscsilun->allocationmap != NULL) { + g_free(iscsilun->allocationmap); + iscsilun->allocationmap = + bitmap_new(DIV_ROUND_UP(bs->total_sectors, + iscsilun->cluster_sectors)); + } + return 0; } @@ -1452,13 +1554,7 @@ static int iscsi_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) IscsiLun *iscsilun = bs->opaque; bdi->unallocated_blocks_are_zero = !!iscsilun->lbprz; bdi->can_write_zeroes_with_unmap = iscsilun->lbprz && iscsilun->lbp.lbpws; - /* Guess the internal cluster (page) size of the iscsi target by the means - * of opt_unmap_gran. Transfer the unmap granularity only if it has a - * reasonable size for bdi->cluster_size */ - if (iscsilun->bl.opt_unmap_gran * iscsilun->block_size >= 64 * 1024 && - iscsilun->bl.opt_unmap_gran * iscsilun->block_size <= 16 * 1024 * 1024) { - bdi->cluster_size = iscsilun->bl.opt_unmap_gran * iscsilun->block_size; - } + bdi->cluster_size = iscsilun->cluster_sectors * BDRV_SECTOR_SIZE; return 0; } -- cgit v1.2.3-55-g7522 From 5917af812e9c4bd6500927b26efe8d3e2f267bd0 Mon Sep 17 00:00:00 2001 From: Peter Lieven Date: Mon, 28 Apr 2014 17:11:33 +0200 Subject: block/iscsi: clarify the meaning of ISCSI_CHECKALLOC_THRES Signed-off-by: Peter Lieven Signed-off-by: Paolo Bonzini --- block/iscsi.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'block') diff --git a/block/iscsi.c b/block/iscsi.c index 6b8a0a34f7..e8d26bb781 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -95,7 +95,15 @@ typedef struct IscsiAIOCB { #define NOP_INTERVAL 5000 #define MAX_NOP_FAILURES 3 #define ISCSI_CMD_RETRIES 5 -#define ISCSI_CHECKALLOC_THRES 63 + +/* this threshhold is a trade-off knob to choose between + * the potential additional overhead of an extra GET_LBA_STATUS request + * vs. unnecessarily reading a lot of zero sectors over the wire. + * If a read request is greater or equal than ISCSI_CHECKALLOC_THRES + * sectors we check the allocation status of the area covered by the + * request first if the allocationmap indicates that the area might be + * unallocated. */ +#define ISCSI_CHECKALLOC_THRES 64 static void iscsi_bh_cb(void *p) @@ -505,7 +513,7 @@ static int coroutine_fn iscsi_co_readv(BlockDriverState *bs, } #if defined(LIBISCSI_FEATURE_IOVECTOR) - if (iscsilun->lbprz && nb_sectors > ISCSI_CHECKALLOC_THRES && + if (iscsilun->lbprz && nb_sectors >= ISCSI_CHECKALLOC_THRES && !iscsi_allocationmap_is_allocated(iscsilun, sector_num, nb_sectors)) { int64_t ret; int pnum; -- cgit v1.2.3-55-g7522 From 3d2acaa308bfab65329ef983654b302899bfb2b0 Mon Sep 17 00:00:00 2001 From: Peter Lieven Date: Mon, 28 Apr 2014 17:18:32 +0200 Subject: block/iscsi: allow cluster_size of 4K and greater depending on the target the opt_unmap_gran might be as low as 4K. As we know use this also as a knob to activate the allocationmap feature lower the barrier. The limit 4K (and not 512) is choosen to avoid a potentially too big allocationmap. Signed-off-by: Peter Lieven Signed-off-by: Paolo Bonzini --- block/iscsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block') diff --git a/block/iscsi.c b/block/iscsi.c index e8d26bb781..84bedfa8fc 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -1386,7 +1386,7 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags, /* Guess the internal cluster (page) size of the iscsi target by the means * of opt_unmap_gran. Transfer the unmap granularity only if it has a * reasonable size */ - if (iscsilun->bl.opt_unmap_gran * iscsilun->block_size >= 64 * 1024 && + if (iscsilun->bl.opt_unmap_gran * iscsilun->block_size >= 4 * 1024 && iscsilun->bl.opt_unmap_gran * iscsilun->block_size <= 16 * 1024 * 1024) { iscsilun->cluster_sectors = (iscsilun->bl.opt_unmap_gran * iscsilun->block_size) >> BDRV_SECTOR_BITS; -- cgit v1.2.3-55-g7522 From 6a86dec61921163b6ab582df988416a6f0ca0ed5 Mon Sep 17 00:00:00 2001 From: Peter Lieven Date: Sun, 4 May 2014 21:36:08 +0200 Subject: [PATCH] block/iscsi: bump year in copyright notice Signed-off-by: Peter Lieven --- block/iscsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block') diff --git a/block/iscsi.c b/block/iscsi.c index 84bedfa8fc..65bf97d674 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -2,7 +2,7 @@ * QEMU Block driver for iSCSI images * * Copyright (c) 2010-2011 Ronnie Sahlberg - * Copyright (c) 2012-2013 Peter Lieven + * Copyright (c) 2012-2014 Peter Lieven * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal -- cgit v1.2.3-55-g7522