From 69f47505ee66afaa513305de0c1895a224e52c45 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Mon, 8 Apr 2019 19:26:17 +0300 Subject: block: avoid recursive block_status call if possible drv_co_block_status digs bs->file for additional, more accurate search for hole inside region, reported as DATA by bs since 5daa74a6ebc. This accuracy is not free: assume we have qcow2 disk. Actually, qcow2 knows, where are holes and where is data. But every block_status request calls lseek additionally. Assume a big disk, full of data, in any iterative copying block job (or img convert) we'll call lseek(HOLE) on every iteration, and each of these lseeks will have to iterate through all metadata up to the end of file. It's obviously ineffective behavior. And for many scenarios we don't need this lseek at all. However, lseek is needed when we have metadata-preallocated image. So, let's detect metadata-preallocation case and don't dig qcow2's protocol file in other cases. The idea is to compare allocation size in POV of filesystem with allocations size in POV of Qcow2 (by refcounts). If allocation in fs is significantly lower, consider it as metadata-preallocation case. 102 iotest changed, as our detector can't detect shrinked file as metadata-preallocation, which don't seem to be wrong, as with metadata preallocation we always have valid file length. Two other iotests have a slight change in their QMP output sequence: Active 'block-commit' returns earlier because the job coroutine yields earlier on a blocking operation. This operation is loading the refcount blocks in qcow2_detect_metadata_preallocation(). Suggested-by: Denis V. Lunev Signed-off-by: Vladimir Sementsov-Ogievskiy Signed-off-by: Kevin Wolf --- include/block/block.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/block/block.h b/include/block/block.h index 9b083e2bca..531cf595cf 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -156,10 +156,15 @@ typedef struct HDGeometry { * BDRV_BLOCK_EOF: the returned pnum covers through end of file for this * layer, set by block layer * - * Internal flag: + * Internal flags: * BDRV_BLOCK_RAW: for use by passthrough drivers, such as raw, to request * that the block layer recompute the answer from the returned * BDS; must be accompanied by just BDRV_BLOCK_OFFSET_VALID. + * BDRV_BLOCK_RECURSE: request that the block layer will recursively search for + * zeroes in file child of current block node inside + * returned region. Only valid together with both + * BDRV_BLOCK_DATA and BDRV_BLOCK_OFFSET_VALID. Should not + * appear with BDRV_BLOCK_ZERO. * * If BDRV_BLOCK_OFFSET_VALID is set, the map parameter represents the * host offset within the returned BDS that is allocated for the @@ -184,6 +189,7 @@ typedef struct HDGeometry { #define BDRV_BLOCK_RAW 0x08 #define BDRV_BLOCK_ALLOCATED 0x10 #define BDRV_BLOCK_EOF 0x20 +#define BDRV_BLOCK_RECURSE 0x40 #define BDRV_BLOCK_OFFSET_MASK BDRV_SECTOR_MASK typedef QSIMPLEQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue; -- cgit v1.2.3-55-g7522 From 2b02fd81ded2f8fefbdacee431fc9fb006f92bc6 Mon Sep 17 00:00:00 2001 From: Julia Suvorova Date: Sun, 2 Jun 2019 23:17:09 +0300 Subject: block/linux-aio: Drop unused BlockAIOCB submission method Callback-based laio_submit() and laio_cancel() were left after rewriting Linux AIO backend to coroutines in hope that they would be used in other code that could bypass coroutines. They can be safely removed because they have not been used since that time. Signed-off-by: Julia Suvorova Reviewed-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- block/linux-aio.c | 72 +++++++------------------------------------------ include/block/raw-aio.h | 3 --- 2 files changed, 10 insertions(+), 65 deletions(-) (limited to 'include') diff --git a/block/linux-aio.c b/block/linux-aio.c index d4b61fb251..27100c2fd1 100644 --- a/block/linux-aio.c +++ b/block/linux-aio.c @@ -30,7 +30,6 @@ #define MAX_EVENTS 128 struct qemu_laiocb { - BlockAIOCB common; Coroutine *co; LinuxAioState *ctx; struct iocb iocb; @@ -72,7 +71,7 @@ static inline ssize_t io_event_ret(struct io_event *ev) } /* - * Completes an AIO request (calls the callback and frees the ACB). + * Completes an AIO request. */ static void qemu_laio_process_completion(struct qemu_laiocb *laiocb) { @@ -94,18 +93,15 @@ static void qemu_laio_process_completion(struct qemu_laiocb *laiocb) } laiocb->ret = ret; - if (laiocb->co) { - /* If the coroutine is already entered it must be in ioq_submit() and - * will notice laio->ret has been filled in when it eventually runs - * later. Coroutines cannot be entered recursively so avoid doing - * that! - */ - if (!qemu_coroutine_entered(laiocb->co)) { - aio_co_wake(laiocb->co); - } - } else { - laiocb->common.cb(laiocb->common.opaque, ret); - qemu_aio_unref(laiocb); + + /* + * If the coroutine is already entered it must be in ioq_submit() and + * will notice laio->ret has been filled in when it eventually runs + * later. Coroutines cannot be entered recursively so avoid doing + * that! + */ + if (!qemu_coroutine_entered(laiocb->co)) { + aio_co_wake(laiocb->co); } } @@ -273,30 +269,6 @@ static bool qemu_laio_poll_cb(void *opaque) return true; } -static void laio_cancel(BlockAIOCB *blockacb) -{ - struct qemu_laiocb *laiocb = (struct qemu_laiocb *)blockacb; - struct io_event event; - int ret; - - if (laiocb->ret != -EINPROGRESS) { - return; - } - ret = io_cancel(laiocb->ctx->ctx, &laiocb->iocb, &event); - laiocb->ret = -ECANCELED; - if (ret != 0) { - /* iocb is not cancelled, cb will be called by the event loop later */ - return; - } - - laiocb->common.cb(laiocb->common.opaque, laiocb->ret); -} - -static const AIOCBInfo laio_aiocb_info = { - .aiocb_size = sizeof(struct qemu_laiocb), - .cancel_async = laio_cancel, -}; - static void ioq_init(LaioQueue *io_q) { QSIMPLEQ_INIT(&io_q->pending); @@ -431,30 +403,6 @@ int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd, return laiocb.ret; } -BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd, - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, - BlockCompletionFunc *cb, void *opaque, int type) -{ - struct qemu_laiocb *laiocb; - off_t offset = sector_num * BDRV_SECTOR_SIZE; - int ret; - - laiocb = qemu_aio_get(&laio_aiocb_info, bs, cb, opaque); - laiocb->nbytes = nb_sectors * BDRV_SECTOR_SIZE; - laiocb->ctx = s; - laiocb->ret = -EINPROGRESS; - laiocb->is_read = (type == QEMU_AIO_READ); - laiocb->qiov = qiov; - - ret = laio_do_submit(fd, laiocb, offset, type); - if (ret < 0) { - qemu_aio_unref(laiocb); - return NULL; - } - - return &laiocb->common; -} - void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context) { aio_set_event_notifier(old_context, &s->e, false, NULL, NULL); diff --git a/include/block/raw-aio.h b/include/block/raw-aio.h index ba223dd1f1..0cb7cc74a2 100644 --- a/include/block/raw-aio.h +++ b/include/block/raw-aio.h @@ -50,9 +50,6 @@ LinuxAioState *laio_init(Error **errp); void laio_cleanup(LinuxAioState *s); int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd, uint64_t offset, QEMUIOVector *qiov, int type); -BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd, - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, - BlockCompletionFunc *cb, void *opaque, int type); void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context); void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context); void laio_io_plug(BlockDriverState *bs, LinuxAioState *s); -- cgit v1.2.3-55-g7522 From 3036a626e9efad8ee5dc178b149184cf87bd06b0 Mon Sep 17 00:00:00 2001 From: Kenneth Heitke Date: Mon, 20 May 2019 11:40:30 -0600 Subject: nvme: add Get/Set Feature Timestamp support Signed-off-by: Kenneth Heitke Reviewed-by: Klaus Birkelund Jensen Signed-off-by: Kevin Wolf --- hw/block/nvme.c | 106 +++++++++++++++++++++++++++++++++++++++++++++++++- hw/block/nvme.h | 2 + hw/block/trace-events | 2 + include/block/nvme.h | 2 + 4 files changed, 110 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 63a5b58849..30e50f7a38 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -219,6 +219,30 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1, return NVME_INVALID_FIELD | NVME_DNR; } +static uint16_t nvme_dma_write_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len, + uint64_t prp1, uint64_t prp2) +{ + QEMUSGList qsg; + QEMUIOVector iov; + uint16_t status = NVME_SUCCESS; + + if (nvme_map_prp(&qsg, &iov, prp1, prp2, len, n)) { + return NVME_INVALID_FIELD | NVME_DNR; + } + if (qsg.nsg > 0) { + if (dma_buf_write(ptr, len, &qsg)) { + status = NVME_INVALID_FIELD | NVME_DNR; + } + qemu_sglist_destroy(&qsg); + } else { + if (qemu_iovec_to_buf(&iov, 0, ptr, len) != len) { + status = NVME_INVALID_FIELD | NVME_DNR; + } + qemu_iovec_destroy(&iov); + } + return status; +} + static uint16_t nvme_dma_read_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len, uint64_t prp1, uint64_t prp2) { @@ -678,7 +702,6 @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeIdentify *c) return ret; } - static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd) { NvmeIdentify *c = (NvmeIdentify *)cmd; @@ -696,6 +719,57 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd) } } +static inline void nvme_set_timestamp(NvmeCtrl *n, uint64_t ts) +{ + trace_nvme_setfeat_timestamp(ts); + + n->host_timestamp = le64_to_cpu(ts); + n->timestamp_set_qemu_clock_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL); +} + +static inline uint64_t nvme_get_timestamp(const NvmeCtrl *n) +{ + uint64_t current_time = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL); + uint64_t elapsed_time = current_time - n->timestamp_set_qemu_clock_ms; + + union nvme_timestamp { + struct { + uint64_t timestamp:48; + uint64_t sync:1; + uint64_t origin:3; + uint64_t rsvd1:12; + }; + uint64_t all; + }; + + union nvme_timestamp ts; + ts.all = 0; + + /* + * If the sum of the Timestamp value set by the host and the elapsed + * time exceeds 2^48, the value returned should be reduced modulo 2^48. + */ + ts.timestamp = (n->host_timestamp + elapsed_time) & 0xffffffffffff; + + /* If the host timestamp is non-zero, set the timestamp origin */ + ts.origin = n->host_timestamp ? 0x01 : 0x00; + + trace_nvme_getfeat_timestamp(ts.all); + + return cpu_to_le64(ts.all); +} + +static uint16_t nvme_get_feature_timestamp(NvmeCtrl *n, NvmeCmd *cmd) +{ + uint64_t prp1 = le64_to_cpu(cmd->prp1); + uint64_t prp2 = le64_to_cpu(cmd->prp2); + + uint64_t timestamp = nvme_get_timestamp(n); + + return nvme_dma_read_prp(n, (uint8_t *)×tamp, + sizeof(timestamp), prp1, prp2); +} + static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) { uint32_t dw10 = le32_to_cpu(cmd->cdw10); @@ -710,6 +784,9 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) result = cpu_to_le32((n->num_queues - 2) | ((n->num_queues - 2) << 16)); trace_nvme_getfeat_numq(result); break; + case NVME_TIMESTAMP: + return nvme_get_feature_timestamp(n, cmd); + break; default: trace_nvme_err_invalid_getfeat(dw10); return NVME_INVALID_FIELD | NVME_DNR; @@ -719,6 +796,24 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) return NVME_SUCCESS; } +static uint16_t nvme_set_feature_timestamp(NvmeCtrl *n, NvmeCmd *cmd) +{ + uint16_t ret; + uint64_t timestamp; + uint64_t prp1 = le64_to_cpu(cmd->prp1); + uint64_t prp2 = le64_to_cpu(cmd->prp2); + + ret = nvme_dma_write_prp(n, (uint8_t *)×tamp, + sizeof(timestamp), prp1, prp2); + if (ret != NVME_SUCCESS) { + return ret; + } + + nvme_set_timestamp(n, timestamp); + + return NVME_SUCCESS; +} + static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) { uint32_t dw10 = le32_to_cpu(cmd->cdw10); @@ -735,6 +830,11 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) req->cqe.result = cpu_to_le32((n->num_queues - 2) | ((n->num_queues - 2) << 16)); break; + + case NVME_TIMESTAMP: + return nvme_set_feature_timestamp(n, cmd); + break; + default: trace_nvme_err_invalid_setfeat(dw10); return NVME_INVALID_FIELD | NVME_DNR; @@ -907,6 +1007,8 @@ static int nvme_start_ctrl(NvmeCtrl *n) nvme_init_sq(&n->admin_sq, n, n->bar.asq, 0, 0, NVME_AQA_ASQS(n->bar.aqa) + 1); + nvme_set_timestamp(n, 0ULL); + return 0; } @@ -1270,7 +1372,7 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) id->sqes = (0x6 << 4) | 0x6; id->cqes = (0x4 << 4) | 0x4; id->nn = cpu_to_le32(n->num_namespaces); - id->oncs = cpu_to_le16(NVME_ONCS_WRITE_ZEROS); + id->oncs = cpu_to_le16(NVME_ONCS_WRITE_ZEROS | NVME_ONCS_TIMESTAMP); id->psd[0].mp = cpu_to_le16(0x9c4); id->psd[0].enlat = cpu_to_le32(0x10); id->psd[0].exlat = cpu_to_le32(0x4); diff --git a/hw/block/nvme.h b/hw/block/nvme.h index 56c9d4b4b1..557194ee19 100644 --- a/hw/block/nvme.h +++ b/hw/block/nvme.h @@ -79,6 +79,8 @@ typedef struct NvmeCtrl { uint32_t cmbloc; uint8_t *cmbuf; uint64_t irq_status; + uint64_t host_timestamp; /* Timestamp sent by the host */ + uint64_t timestamp_set_qemu_clock_ms; /* QEMU clock time */ char *serial; NvmeNamespace *namespaces; diff --git a/hw/block/trace-events b/hw/block/trace-events index b92039a573..97a17838ed 100644 --- a/hw/block/trace-events +++ b/hw/block/trace-events @@ -46,6 +46,8 @@ nvme_identify_nslist(uint16_t ns) "identify namespace list, nsid=%"PRIu16"" nvme_getfeat_vwcache(const char* result) "get feature volatile write cache, result=%s" nvme_getfeat_numq(int result) "get feature number of queues, result=%d" nvme_setfeat_numq(int reqcq, int reqsq, int gotcq, int gotsq) "requested cq_count=%d sq_count=%d, responding with cq_count=%d sq_count=%d" +nvme_setfeat_timestamp(uint64_t ts) "set feature timestamp = 0x%"PRIx64"" +nvme_getfeat_timestamp(uint64_t ts) "get feature timestamp = 0x%"PRIx64"" nvme_mmio_intm_set(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask set, data=0x%"PRIx64", new_mask=0x%"PRIx64"" nvme_mmio_intm_clr(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask clr, data=0x%"PRIx64", new_mask=0x%"PRIx64"" nvme_mmio_cfg(uint64_t data) "wrote MMIO, config controller config=0x%"PRIx64"" diff --git a/include/block/nvme.h b/include/block/nvme.h index 849a6f3fa3..3ec8efcc43 100644 --- a/include/block/nvme.h +++ b/include/block/nvme.h @@ -581,6 +581,7 @@ enum NvmeIdCtrlOncs { NVME_ONCS_WRITE_ZEROS = 1 << 3, NVME_ONCS_FEATURES = 1 << 4, NVME_ONCS_RESRVATIONS = 1 << 5, + NVME_ONCS_TIMESTAMP = 1 << 6, }; #define NVME_CTRL_SQES_MIN(sqes) ((sqes) & 0xf) @@ -622,6 +623,7 @@ enum NvmeFeatureIds { NVME_INTERRUPT_VECTOR_CONF = 0x9, NVME_WRITE_ATOMICITY = 0xa, NVME_ASYNCHRONOUS_EVENT_CONF = 0xb, + NVME_TIMESTAMP = 0xe, NVME_SOFTWARE_PROGRESS_MARKER = 0x80 }; -- cgit v1.2.3-55-g7522 From 97896a4887a0a29c3314c5f0e9a82e269a6401fc Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 2 May 2019 11:10:59 +0200 Subject: block: Add Error to blk_set_aio_context() Add an Error parameter to blk_set_aio_context() and use bdrv_child_try_set_aio_context() internally to check whether all involved nodes can actually support the AioContext switch. Signed-off-by: Kevin Wolf --- block/block-backend.c | 26 ++++++++++++++++---------- hw/block/dataplane/virtio-blk.c | 12 +++++++++--- hw/block/dataplane/xen-block.c | 6 ++++-- hw/scsi/virtio-scsi.c | 10 +++++++--- include/sysemu/block-backend.h | 3 ++- tests/test-bdrv-drain.c | 8 ++++---- tests/test-block-iothread.c | 22 +++++++++++----------- 7 files changed, 53 insertions(+), 34 deletions(-) (limited to 'include') diff --git a/block/block-backend.c b/block/block-backend.c index ad3e1c882d..390fde6f71 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -1865,30 +1865,36 @@ static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb) return blk_get_aio_context(blk_acb->blk); } -static void blk_do_set_aio_context(BlockBackend *blk, AioContext *new_context, - bool update_root_node) +static int blk_do_set_aio_context(BlockBackend *blk, AioContext *new_context, + bool update_root_node, Error **errp) { BlockDriverState *bs = blk_bs(blk); ThrottleGroupMember *tgm = &blk->public.throttle_group_member; + int ret; if (bs) { + if (update_root_node) { + ret = bdrv_child_try_set_aio_context(bs, new_context, blk->root, + errp); + if (ret < 0) { + return ret; + } + } if (tgm->throttle_state) { bdrv_drained_begin(bs); throttle_group_detach_aio_context(tgm); throttle_group_attach_aio_context(tgm, new_context); bdrv_drained_end(bs); } - if (update_root_node) { - GSList *ignore = g_slist_prepend(NULL, blk->root); - bdrv_set_aio_context_ignore(bs, new_context, &ignore); - g_slist_free(ignore); - } } + + return 0; } -void blk_set_aio_context(BlockBackend *blk, AioContext *new_context) +int blk_set_aio_context(BlockBackend *blk, AioContext *new_context, + Error **errp) { - blk_do_set_aio_context(blk, new_context, true); + return blk_do_set_aio_context(blk, new_context, true, errp); } static bool blk_root_can_set_aio_ctx(BdrvChild *child, AioContext *ctx, @@ -1915,7 +1921,7 @@ static void blk_root_set_aio_ctx(BdrvChild *child, AioContext *ctx, GSList **ignore) { BlockBackend *blk = child->opaque; - blk_do_set_aio_context(blk, ctx, false); + blk_do_set_aio_context(blk, ctx, false, &error_abort); } void blk_add_aio_context_notifier(BlockBackend *blk, diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c index 8c37bd314a..158c78f852 100644 --- a/hw/block/dataplane/virtio-blk.c +++ b/hw/block/dataplane/virtio-blk.c @@ -173,6 +173,7 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); unsigned i; unsigned nvqs = s->conf->num_queues; + Error *local_err = NULL; int r; if (vblk->dataplane_started || s->starting) { @@ -212,7 +213,11 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) vblk->dataplane_started = true; trace_virtio_blk_data_plane_start(s); - blk_set_aio_context(s->conf->conf.blk, s->ctx); + r = blk_set_aio_context(s->conf->conf.blk, s->ctx, &local_err); + if (r < 0) { + error_report_err(local_err); + goto fail_guest_notifiers; + } /* Kick right away to begin processing requests already in vring */ for (i = 0; i < nvqs; i++) { @@ -281,8 +286,9 @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev) aio_context_acquire(s->ctx); aio_wait_bh_oneshot(s->ctx, virtio_blk_data_plane_stop_bh, s); - /* Drain and switch bs back to the QEMU main loop */ - blk_set_aio_context(s->conf->conf.blk, qemu_get_aio_context()); + /* Drain and try to switch bs back to the QEMU main loop. If other users + * keep the BlockBackend in the iothread, that's ok */ + blk_set_aio_context(s->conf->conf.blk, qemu_get_aio_context(), NULL); aio_context_release(s->ctx); diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c index bb8f1186e4..f7ad452bbd 100644 --- a/hw/block/dataplane/xen-block.c +++ b/hw/block/dataplane/xen-block.c @@ -682,7 +682,8 @@ void xen_block_dataplane_stop(XenBlockDataPlane *dataplane) } aio_context_acquire(dataplane->ctx); - blk_set_aio_context(dataplane->blk, qemu_get_aio_context()); + /* Xen doesn't have multiple users for nodes, so this can't fail */ + blk_set_aio_context(dataplane->blk, qemu_get_aio_context(), &error_abort); aio_context_release(dataplane->ctx); xendev = dataplane->xendev; @@ -811,7 +812,8 @@ void xen_block_dataplane_start(XenBlockDataPlane *dataplane, } aio_context_acquire(dataplane->ctx); - blk_set_aio_context(dataplane->blk, dataplane->ctx); + /* If other users keep the BlockBackend in the iothread, that's ok */ + blk_set_aio_context(dataplane->blk, dataplane->ctx, NULL); aio_context_release(dataplane->ctx); return; diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c index 839f120256..01c2b85f90 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c @@ -795,6 +795,7 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, VirtIODevice *vdev = VIRTIO_DEVICE(hotplug_dev); VirtIOSCSI *s = VIRTIO_SCSI(vdev); SCSIDevice *sd = SCSI_DEVICE(dev); + int ret; if (s->ctx && !s->dataplane_fenced) { AioContext *ctx; @@ -808,9 +809,11 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, return; } virtio_scsi_acquire(s); - blk_set_aio_context(sd->conf.blk, s->ctx); + ret = blk_set_aio_context(sd->conf.blk, s->ctx, errp); virtio_scsi_release(s); - + if (ret < 0) { + return; + } } if (virtio_vdev_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG)) { @@ -839,7 +842,8 @@ static void virtio_scsi_hotunplug(HotplugHandler *hotplug_dev, DeviceState *dev, if (s->ctx) { virtio_scsi_acquire(s); - blk_set_aio_context(sd->conf.blk, qemu_get_aio_context()); + /* If other users keep the BlockBackend in the iothread, that's ok */ + blk_set_aio_context(sd->conf.blk, qemu_get_aio_context(), NULL); virtio_scsi_release(s); } diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h index 938de34fe9..228fb3fb83 100644 --- a/include/sysemu/block-backend.h +++ b/include/sysemu/block-backend.h @@ -208,7 +208,8 @@ void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason); void blk_op_block_all(BlockBackend *blk, Error *reason); void blk_op_unblock_all(BlockBackend *blk, Error *reason); AioContext *blk_get_aio_context(BlockBackend *blk); -void blk_set_aio_context(BlockBackend *blk, AioContext *new_context); +int blk_set_aio_context(BlockBackend *blk, AioContext *new_context, + Error **errp); void blk_add_aio_context_notifier(BlockBackend *blk, void (*attached_aio_context)(AioContext *new_context, void *opaque), void (*detach_aio_context)(void *opaque), void *opaque); diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c index 5534c2adf9..e86798923f 100644 --- a/tests/test-bdrv-drain.c +++ b/tests/test-bdrv-drain.c @@ -678,7 +678,7 @@ static void test_iothread_common(enum drain_type drain_type, int drain_thread) s = bs->opaque; blk_insert_bs(blk, bs, &error_abort); - blk_set_aio_context(blk, ctx_a); + blk_set_aio_context(blk, ctx_a, &error_abort); aio_context_acquire(ctx_a); s->bh_indirection_ctx = ctx_b; @@ -742,7 +742,7 @@ static void test_iothread_common(enum drain_type drain_type, int drain_thread) } aio_context_acquire(ctx_a); - blk_set_aio_context(blk, qemu_get_aio_context()); + blk_set_aio_context(blk, qemu_get_aio_context(), &error_abort); aio_context_release(ctx_a); bdrv_unref(bs); @@ -903,7 +903,7 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type, if (use_iothread) { iothread = iothread_new(); ctx = iothread_get_aio_context(iothread); - blk_set_aio_context(blk_src, ctx); + blk_set_aio_context(blk_src, ctx, &error_abort); } else { ctx = qemu_get_aio_context(); } @@ -1001,7 +1001,7 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type, g_assert_cmpint(ret, ==, (result == TEST_JOB_SUCCESS ? 0 : -EIO)); if (use_iothread) { - blk_set_aio_context(blk_src, qemu_get_aio_context()); + blk_set_aio_context(blk_src, qemu_get_aio_context(), &error_abort); } aio_context_release(ctx); diff --git a/tests/test-block-iothread.c b/tests/test-block-iothread.c index e424d360c8..1d47ea9895 100644 --- a/tests/test-block-iothread.c +++ b/tests/test-block-iothread.c @@ -342,14 +342,14 @@ static void test_sync_op(const void *opaque) blk_insert_bs(blk, bs, &error_abort); c = QLIST_FIRST(&bs->parents); - blk_set_aio_context(blk, ctx); + blk_set_aio_context(blk, ctx, &error_abort); aio_context_acquire(ctx); t->fn(c); if (t->blkfn) { t->blkfn(blk); } aio_context_release(ctx); - blk_set_aio_context(blk, qemu_get_aio_context()); + blk_set_aio_context(blk, qemu_get_aio_context(), &error_abort); bdrv_unref(bs); blk_unref(blk); @@ -428,7 +428,7 @@ static void test_attach_blockjob(void) aio_poll(qemu_get_aio_context(), false); } - blk_set_aio_context(blk, ctx); + blk_set_aio_context(blk, ctx, &error_abort); tjob->n = 0; while (tjob->n == 0) { @@ -436,7 +436,7 @@ static void test_attach_blockjob(void) } aio_context_acquire(ctx); - blk_set_aio_context(blk, qemu_get_aio_context()); + blk_set_aio_context(blk, qemu_get_aio_context(), &error_abort); aio_context_release(ctx); tjob->n = 0; @@ -444,7 +444,7 @@ static void test_attach_blockjob(void) aio_poll(qemu_get_aio_context(), false); } - blk_set_aio_context(blk, ctx); + blk_set_aio_context(blk, ctx, &error_abort); tjob->n = 0; while (tjob->n == 0) { @@ -453,7 +453,7 @@ static void test_attach_blockjob(void) aio_context_acquire(ctx); job_complete_sync(&tjob->common.job, &error_abort); - blk_set_aio_context(blk, qemu_get_aio_context()); + blk_set_aio_context(blk, qemu_get_aio_context(), &error_abort); aio_context_release(ctx); bdrv_unref(bs); @@ -497,7 +497,7 @@ static void test_propagate_basic(void) bs_verify = bdrv_open(NULL, NULL, options, BDRV_O_RDWR, &error_abort); /* Switch the AioContext */ - blk_set_aio_context(blk, ctx); + blk_set_aio_context(blk, ctx, &error_abort); g_assert(blk_get_aio_context(blk) == ctx); g_assert(bdrv_get_aio_context(bs_a) == ctx); g_assert(bdrv_get_aio_context(bs_verify) == ctx); @@ -505,7 +505,7 @@ static void test_propagate_basic(void) /* Switch the AioContext back */ ctx = qemu_get_aio_context(); - blk_set_aio_context(blk, ctx); + blk_set_aio_context(blk, ctx, &error_abort); g_assert(blk_get_aio_context(blk) == ctx); g_assert(bdrv_get_aio_context(bs_a) == ctx); g_assert(bdrv_get_aio_context(bs_verify) == ctx); @@ -565,7 +565,7 @@ static void test_propagate_diamond(void) blk_insert_bs(blk, bs_verify, &error_abort); /* Switch the AioContext */ - blk_set_aio_context(blk, ctx); + blk_set_aio_context(blk, ctx, &error_abort); g_assert(blk_get_aio_context(blk) == ctx); g_assert(bdrv_get_aio_context(bs_verify) == ctx); g_assert(bdrv_get_aio_context(bs_a) == ctx); @@ -574,7 +574,7 @@ static void test_propagate_diamond(void) /* Switch the AioContext back */ ctx = qemu_get_aio_context(); - blk_set_aio_context(blk, ctx); + blk_set_aio_context(blk, ctx, &error_abort); g_assert(blk_get_aio_context(blk) == ctx); g_assert(bdrv_get_aio_context(bs_verify) == ctx); g_assert(bdrv_get_aio_context(bs_a) == ctx); @@ -654,7 +654,7 @@ static void test_propagate_mirror(void) job_cancel_sync_all(); aio_context_acquire(ctx); - blk_set_aio_context(blk, main_ctx); + blk_set_aio_context(blk, main_ctx, &error_abort); bdrv_try_set_aio_context(target, main_ctx, &error_abort); aio_context_release(ctx); -- cgit v1.2.3-55-g7522 From d861ab3acf8dcf817e0c2335979b258847b69564 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 25 Apr 2019 14:25:10 +0200 Subject: block: Add BlockBackend.ctx This adds a new parameter to blk_new() which requires its callers to declare from which AioContext this BlockBackend is going to be used (or the locks of which AioContext need to be taken anyway). The given context is only stored and kept up to date when changing AioContexts. Actually applying the stored AioContext to the root node is saved for another commit. Signed-off-by: Kevin Wolf --- block.c | 2 +- block/backup.c | 3 ++- block/block-backend.c | 18 +++++++++++++++--- block/commit.c | 11 +++++++---- block/crypto.c | 3 ++- block/mirror.c | 3 ++- block/parallels.c | 3 ++- block/qcow.c | 3 ++- block/qcow2.c | 6 ++++-- block/qed.c | 3 ++- block/sheepdog.c | 3 ++- block/vdi.c | 3 ++- block/vhdx.c | 3 ++- block/vmdk.c | 3 ++- block/vpc.c | 3 ++- blockdev.c | 4 ++-- blockjob.c | 2 +- hmp.c | 3 ++- hw/block/fdc.c | 2 +- hw/block/xen-block.c | 2 +- hw/core/qdev-properties-system.c | 4 +++- hw/ide/qdev.c | 2 +- hw/scsi/scsi-disk.c | 2 +- include/sysemu/block-backend.h | 2 +- migration/block.c | 3 ++- nbd/server.c | 5 +++-- qemu-img.c | 6 ++++-- tests/test-bdrv-drain.c | 30 +++++++++++++++--------------- tests/test-bdrv-graph-mod.c | 5 +++-- tests/test-block-backend.c | 6 ++++-- tests/test-block-iothread.c | 10 +++++----- tests/test-blockjob.c | 2 +- tests/test-throttle.c | 6 +++--- 33 files changed, 102 insertions(+), 64 deletions(-) (limited to 'include') diff --git a/block.c b/block.c index 4c3902508d..69ceac6377 100644 --- a/block.c +++ b/block.c @@ -2884,7 +2884,7 @@ static BlockDriverState *bdrv_open_inherit(const char *filename, /* Not requesting BLK_PERM_CONSISTENT_READ because we're only * looking at the header to guess the image format. This works even * in cases where a guest would not see a consistent state. */ - file = blk_new(0, BLK_PERM_ALL); + file = blk_new(bdrv_get_aio_context(file_bs), 0, BLK_PERM_ALL); blk_insert_bs(file, file_bs, &local_err); bdrv_unref(file_bs); if (local_err) { diff --git a/block/backup.c b/block/backup.c index 00f4f8af53..715e1d3be8 100644 --- a/block/backup.c +++ b/block/backup.c @@ -627,7 +627,8 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, } /* The target must match the source in size, so no resize here either */ - job->target = blk_new(BLK_PERM_WRITE, + job->target = blk_new(job->common.job.aio_context, + BLK_PERM_WRITE, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED | BLK_PERM_GRAPH_MOD); ret = blk_insert_bs(job->target, target, errp); diff --git a/block/block-backend.c b/block/block-backend.c index 390fde6f71..f03f14acb0 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -42,6 +42,7 @@ struct BlockBackend { char *name; int refcnt; BdrvChild *root; + AioContext *ctx; DriveInfo *legacy_dinfo; /* null unless created by drive_new() */ QTAILQ_ENTRY(BlockBackend) link; /* for block_backends */ QTAILQ_ENTRY(BlockBackend) monitor_link; /* for monitor_block_backends */ @@ -322,12 +323,13 @@ static const BdrvChildRole child_root = { * * Return the new BlockBackend on success, null on failure. */ -BlockBackend *blk_new(uint64_t perm, uint64_t shared_perm) +BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm) { BlockBackend *blk; blk = g_new0(BlockBackend, 1); blk->refcnt = 1; + blk->ctx = ctx; blk->perm = perm; blk->shared_perm = shared_perm; blk_set_enable_write_cache(blk, true); @@ -347,6 +349,7 @@ BlockBackend *blk_new(uint64_t perm, uint64_t shared_perm) /* * Creates a new BlockBackend, opens a new BlockDriverState, and connects both. + * The new BlockBackend is in the main AioContext. * * Just as with bdrv_open(), after having called this function the reference to * @options belongs to the block layer (even on failure). @@ -382,7 +385,7 @@ BlockBackend *blk_new_open(const char *filename, const char *reference, perm |= BLK_PERM_RESIZE; } - blk = blk_new(perm, BLK_PERM_ALL); + blk = blk_new(qemu_get_aio_context(), perm, BLK_PERM_ALL); bs = bdrv_open(filename, reference, options, flags, errp); if (!bs) { blk_unref(blk); @@ -1856,7 +1859,15 @@ void blk_op_unblock_all(BlockBackend *blk, Error *reason) AioContext *blk_get_aio_context(BlockBackend *blk) { - return bdrv_get_aio_context(blk_bs(blk)); + BlockDriverState *bs = blk_bs(blk); + + /* FIXME The AioContext of bs and blk can be inconsistent. For the moment, + * we prefer the one of bs for compatibility. */ + if (bs) { + return bdrv_get_aio_context(blk_bs(blk)); + } + + return blk->ctx; } static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb) @@ -1888,6 +1899,7 @@ static int blk_do_set_aio_context(BlockBackend *blk, AioContext *new_context, } } + blk->ctx = new_context; return 0; } diff --git a/block/commit.c b/block/commit.c index 14e5bb394c..4d519506d6 100644 --- a/block/commit.c +++ b/block/commit.c @@ -338,7 +338,8 @@ void commit_start(const char *job_id, BlockDriverState *bs, goto fail; } - s->base = blk_new(BLK_PERM_CONSISTENT_READ + s->base = blk_new(s->common.job.aio_context, + BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_CONSISTENT_READ @@ -351,7 +352,7 @@ void commit_start(const char *job_id, BlockDriverState *bs, s->base_bs = base; /* Required permissions are already taken with block_job_add_bdrv() */ - s->top = blk_new(0, BLK_PERM_ALL); + s->top = blk_new(s->common.job.aio_context, 0, BLK_PERM_ALL); ret = blk_insert_bs(s->top, top, errp); if (ret < 0) { goto fail; @@ -395,6 +396,7 @@ int bdrv_commit(BlockDriverState *bs) BlockDriverState *backing_file_bs = NULL; BlockDriverState *commit_top_bs = NULL; BlockDriver *drv = bs->drv; + AioContext *ctx; int64_t offset, length, backing_length; int ro; int64_t n; @@ -422,8 +424,9 @@ int bdrv_commit(BlockDriverState *bs) } } - src = blk_new(BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL); - backing = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL); + ctx = bdrv_get_aio_context(bs); + src = blk_new(ctx, BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL); + backing = blk_new(ctx, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL); ret = blk_insert_bs(src, bs, &local_err); if (ret < 0) { diff --git a/block/crypto.c b/block/crypto.c index 3af46b805f..7351fd479d 100644 --- a/block/crypto.c +++ b/block/crypto.c @@ -257,7 +257,8 @@ static int block_crypto_co_create_generic(BlockDriverState *bs, QCryptoBlock *crypto = NULL; struct BlockCryptoCreateData data; - blk = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL); + blk = blk_new(bdrv_get_aio_context(bs), + BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL); ret = blk_insert_bs(blk, bs, errp); if (ret < 0) { diff --git a/block/mirror.c b/block/mirror.c index ec4bd9f404..eb96b52de9 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -1584,7 +1584,8 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs, * We can allow anything except resize there.*/ target_is_backing = bdrv_chain_contains(bs, target); target_graph_mod = (backing_mode != MIRROR_LEAVE_BACKING_CHAIN); - s->target = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE | + s->target = blk_new(s->common.job.aio_context, + BLK_PERM_WRITE | BLK_PERM_RESIZE | (target_graph_mod ? BLK_PERM_GRAPH_MOD : 0), BLK_PERM_WRITE_UNCHANGED | (target_is_backing ? BLK_PERM_CONSISTENT_READ | diff --git a/block/parallels.c b/block/parallels.c index 2747400577..00fae125d1 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -554,7 +554,8 @@ static int coroutine_fn parallels_co_create(BlockdevCreateOptions* opts, return -EIO; } - blk = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL); + blk = blk_new(bdrv_get_aio_context(bs), + BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL); ret = blk_insert_bs(blk, bs, errp); if (ret < 0) { goto out; diff --git a/block/qcow.c b/block/qcow.c index 1bb8fd05e2..6dee5bb792 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -844,7 +844,8 @@ static int coroutine_fn qcow_co_create(BlockdevCreateOptions *opts, return -EIO; } - qcow_blk = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL); + qcow_blk = blk_new(bdrv_get_aio_context(bs), + BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL); ret = blk_insert_bs(qcow_blk, bs, errp); if (ret < 0) { goto exit; diff --git a/block/qcow2.c b/block/qcow2.c index 14f914117f..9396d490d5 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -3174,7 +3174,8 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) } /* Create BlockBackend to write to the image */ - blk = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL); + blk = blk_new(bdrv_get_aio_context(bs), + BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL); ret = blk_insert_bs(blk, bs, errp); if (ret < 0) { goto out; @@ -5006,7 +5007,8 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, } if (new_size) { - BlockBackend *blk = blk_new(BLK_PERM_RESIZE, BLK_PERM_ALL); + BlockBackend *blk = blk_new(bdrv_get_aio_context(bs), + BLK_PERM_RESIZE, BLK_PERM_ALL); ret = blk_insert_bs(blk, bs, errp); if (ret < 0) { blk_unref(blk); diff --git a/block/qed.c b/block/qed.c index dcdcd62b4a..bb4f5c9863 100644 --- a/block/qed.c +++ b/block/qed.c @@ -649,7 +649,8 @@ static int coroutine_fn bdrv_qed_co_create(BlockdevCreateOptions *opts, return -EIO; } - blk = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL); + blk = blk_new(bdrv_get_aio_context(bs), + BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL); ret = blk_insert_bs(blk, bs, errp); if (ret < 0) { goto out; diff --git a/block/sheepdog.c b/block/sheepdog.c index cbdfe9ab6e..f76d6ddbbc 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -1800,7 +1800,8 @@ static int sd_prealloc(BlockDriverState *bs, int64_t old_size, int64_t new_size, void *buf = NULL; int ret; - blk = blk_new(BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE | BLK_PERM_RESIZE, + blk = blk_new(bdrv_get_aio_context(bs), + BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL); ret = blk_insert_bs(blk, bs, errp); diff --git a/block/vdi.c b/block/vdi.c index d7ef6628e7..b9845a4cbd 100644 --- a/block/vdi.c +++ b/block/vdi.c @@ -803,7 +803,8 @@ static int coroutine_fn vdi_co_do_create(BlockdevCreateOptions *create_options, goto exit; } - blk = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL); + blk = blk_new(bdrv_get_aio_context(bs_file), + BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL); ret = blk_insert_bs(blk, bs_file, errp); if (ret < 0) { goto exit; diff --git a/block/vhdx.c b/block/vhdx.c index a143a57657..d6070b6fa8 100644 --- a/block/vhdx.c +++ b/block/vhdx.c @@ -1900,7 +1900,8 @@ static int coroutine_fn vhdx_co_create(BlockdevCreateOptions *opts, return -EIO; } - blk = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL); + blk = blk_new(bdrv_get_aio_context(bs), + BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL); ret = blk_insert_bs(blk, bs, errp); if (ret < 0) { goto delete_and_exit; diff --git a/block/vmdk.c b/block/vmdk.c index de8cb859f8..51067c774f 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -2356,7 +2356,8 @@ static BlockBackend *vmdk_co_create_cb(int64_t size, int idx, if (!bs) { return NULL; } - blk = blk_new(BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE | BLK_PERM_RESIZE, + blk = blk_new(bdrv_get_aio_context(bs), + BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL); if (blk_insert_bs(blk, bs, errp)) { bdrv_unref(bs); diff --git a/block/vpc.c b/block/vpc.c index 0c279b87c8..d4776ee8a5 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -1011,7 +1011,8 @@ static int coroutine_fn vpc_co_create(BlockdevCreateOptions *opts, return -EIO; } - blk = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL); + blk = blk_new(bdrv_get_aio_context(bs), + BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL); ret = blk_insert_bs(blk, bs, errp); if (ret < 0) { goto out; diff --git a/blockdev.c b/blockdev.c index d88dc115f2..e1ad92c559 100644 --- a/blockdev.c +++ b/blockdev.c @@ -574,7 +574,7 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, if ((!file || !*file) && !qdict_size(bs_opts)) { BlockBackendRootState *blk_rs; - blk = blk_new(0, BLK_PERM_ALL); + blk = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL); blk_rs = blk_get_root_state(blk); blk_rs->open_flags = bdrv_flags; blk_rs->read_only = read_only; @@ -3154,7 +3154,7 @@ void qmp_block_resize(bool has_device, const char *device, goto out; } - blk = blk_new(BLK_PERM_RESIZE, BLK_PERM_ALL); + blk = blk_new(bdrv_get_aio_context(bs), BLK_PERM_RESIZE, BLK_PERM_ALL); ret = blk_insert_bs(blk, bs, errp); if (ret < 0) { goto out; diff --git a/blockjob.c b/blockjob.c index cc5f18e7cd..29517ae162 100644 --- a/blockjob.c +++ b/blockjob.c @@ -392,7 +392,7 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, job_id = bdrv_get_device_name(bs); } - blk = blk_new(perm, shared_perm); + blk = blk_new(bdrv_get_aio_context(bs), perm, shared_perm); ret = blk_insert_bs(blk, bs, errp); if (ret < 0) { blk_unref(blk); diff --git a/hmp.c b/hmp.c index 56a3ed7375..be5e345c6f 100644 --- a/hmp.c +++ b/hmp.c @@ -2560,7 +2560,8 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict) if (!blk) { BlockDriverState *bs = bdrv_lookup_bs(NULL, device, &err); if (bs) { - blk = local_blk = blk_new(0, BLK_PERM_ALL); + blk = local_blk = blk_new(bdrv_get_aio_context(bs), + 0, BLK_PERM_ALL); ret = blk_insert_bs(blk, bs, &err); if (ret < 0) { goto fail; diff --git a/hw/block/fdc.c b/hw/block/fdc.c index 6f19f127a5..37ccedc9f7 100644 --- a/hw/block/fdc.c +++ b/hw/block/fdc.c @@ -538,7 +538,7 @@ static void floppy_drive_realize(DeviceState *qdev, Error **errp) if (!dev->conf.blk) { /* Anonymous BlockBackend for an empty drive */ - dev->conf.blk = blk_new(0, BLK_PERM_ALL); + dev->conf.blk = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL); ret = blk_attach_dev(dev->conf.blk, qdev); assert(ret == 0); } diff --git a/hw/block/xen-block.c b/hw/block/xen-block.c index ef635be4c2..31b0f5ccc8 100644 --- a/hw/block/xen-block.c +++ b/hw/block/xen-block.c @@ -609,7 +609,7 @@ static void xen_cdrom_realize(XenBlockDevice *blockdev, Error **errp) int rc; /* Set up an empty drive */ - conf->blk = blk_new(0, BLK_PERM_ALL); + conf->blk = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL); rc = blk_attach_dev(conf->blk, DEVICE(blockdev)); if (!rc) { diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c index b45a7ef54b..42e048f190 100644 --- a/hw/core/qdev-properties-system.c +++ b/hw/core/qdev-properties-system.c @@ -80,7 +80,9 @@ static void parse_drive(DeviceState *dev, const char *str, void **ptr, if (!blk) { BlockDriverState *bs = bdrv_lookup_bs(NULL, str, NULL); if (bs) { - blk = blk_new(0, BLK_PERM_ALL); + /* BlockBackends of devices start in the main context and are only + * later moved into another context if the device supports that. */ + blk = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL); blk_created = true; ret = blk_insert_bs(blk, bs, errp); diff --git a/hw/ide/qdev.c b/hw/ide/qdev.c index 573b022e1e..360cd20bd8 100644 --- a/hw/ide/qdev.c +++ b/hw/ide/qdev.c @@ -168,7 +168,7 @@ static void ide_dev_initfn(IDEDevice *dev, IDEDriveKind kind, Error **errp) return; } else { /* Anonymous BlockBackend for an empty drive */ - dev->conf.blk = blk_new(0, BLK_PERM_ALL); + dev->conf.blk = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL); ret = blk_attach_dev(dev->conf.blk, &dev->qdev); assert(ret == 0); } diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c index e7e865ab3b..91c5a8b1ac 100644 --- a/hw/scsi/scsi-disk.c +++ b/hw/scsi/scsi-disk.c @@ -2417,7 +2417,7 @@ static void scsi_cd_realize(SCSIDevice *dev, Error **errp) if (!dev->conf.blk) { /* Anonymous BlockBackend for an empty drive. As we put it into * dev->conf, qdev takes care of detaching on unplug. */ - dev->conf.blk = blk_new(0, BLK_PERM_ALL); + dev->conf.blk = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL); ret = blk_attach_dev(dev->conf.blk, &dev->qdev); assert(ret == 0); } diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h index 228fb3fb83..733c4957eb 100644 --- a/include/sysemu/block-backend.h +++ b/include/sysemu/block-backend.h @@ -76,7 +76,7 @@ typedef struct BlockBackendPublic { ThrottleGroupMember throttle_group_member; } BlockBackendPublic; -BlockBackend *blk_new(uint64_t perm, uint64_t shared_perm); +BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm); BlockBackend *blk_new_open(const char *filename, const char *reference, QDict *options, int flags, Error **errp); int blk_get_refcnt(BlockBackend *blk); diff --git a/migration/block.c b/migration/block.c index 83c633fb3f..91f98ef44a 100644 --- a/migration/block.c +++ b/migration/block.c @@ -417,7 +417,8 @@ static int init_blk_migration(QEMUFile *f) } bmds = g_new0(BlkMigDevState, 1); - bmds->blk = blk_new(BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL); + bmds->blk = blk_new(qemu_get_aio_context(), + BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL); bmds->blk_name = g_strdup(bdrv_get_device_name(bs)); bmds->bulk_completed = 0; bmds->total_sectors = sectors; diff --git a/nbd/server.c b/nbd/server.c index d1375350bc..aeca3893fe 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -1484,8 +1484,9 @@ NBDExport *nbd_export_new(BlockDriverState *bs, uint64_t dev_offset, if ((nbdflags & NBD_FLAG_READ_ONLY) == 0) { perm |= BLK_PERM_WRITE; } - blk = blk_new(perm, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED | - BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD); + blk = blk_new(bdrv_get_aio_context(bs), perm, + BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED | + BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD); ret = blk_insert_bs(blk, bs, errp); if (ret < 0) { goto fail; diff --git a/qemu-img.c b/qemu-img.c index b0535919b1..07b6e2a808 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -3313,7 +3313,8 @@ static int img_rebase(int argc, char **argv) BlockDriverState *base_bs = backing_bs(bs); if (base_bs) { - blk_old_backing = blk_new(BLK_PERM_CONSISTENT_READ, + blk_old_backing = blk_new(qemu_get_aio_context(), + BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL); ret = blk_insert_bs(blk_old_backing, base_bs, &local_err); @@ -3360,7 +3361,8 @@ static int img_rebase(int argc, char **argv) prefix_chain_bs = bdrv_find_backing_image(bs, out_real_path); if (prefix_chain_bs) { g_free(out_real_path); - blk_new_backing = blk_new(BLK_PERM_CONSISTENT_READ, + blk_new_backing = blk_new(qemu_get_aio_context(), + BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL); ret = blk_insert_bs(blk_new_backing, prefix_chain_bs, &local_err); diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c index e86798923f..fabbd52d93 100644 --- a/tests/test-bdrv-drain.c +++ b/tests/test-bdrv-drain.c @@ -206,7 +206,7 @@ static void test_drv_cb_common(enum drain_type drain_type, bool recursive) QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, NULL, 0); - blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR, &error_abort); s = bs->opaque; @@ -290,7 +290,7 @@ static void test_quiesce_common(enum drain_type drain_type, bool recursive) BlockBackend *blk; BlockDriverState *bs, *backing; - blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR, &error_abort); blk_insert_bs(blk, bs, &error_abort); @@ -353,7 +353,7 @@ static void test_nested(void) BDRVTestState *s, *backing_s; enum drain_type outer, inner; - blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR, &error_abort); s = bs->opaque; @@ -402,13 +402,13 @@ static void test_multiparent(void) BlockDriverState *bs_a, *bs_b, *backing; BDRVTestState *a_s, *b_s, *backing_s; - blk_a = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + blk_a = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR, &error_abort); a_s = bs_a->opaque; blk_insert_bs(blk_a, bs_a, &error_abort); - blk_b = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + blk_b = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR, &error_abort); b_s = bs_b->opaque; @@ -475,13 +475,13 @@ static void test_graph_change_drain_subtree(void) BlockDriverState *bs_a, *bs_b, *backing; BDRVTestState *a_s, *b_s, *backing_s; - blk_a = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + blk_a = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR, &error_abort); a_s = bs_a->opaque; blk_insert_bs(blk_a, bs_a, &error_abort); - blk_b = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + blk_b = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR, &error_abort); b_s = bs_b->opaque; @@ -555,7 +555,7 @@ static void test_graph_change_drain_all(void) BDRVTestState *a_s, *b_s; /* Create node A with a BlockBackend */ - blk_a = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + blk_a = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR, &error_abort); a_s = bs_a->opaque; @@ -571,7 +571,7 @@ static void test_graph_change_drain_all(void) g_assert_cmpint(a_s->drain_count, ==, 1); /* Create node B with a BlockBackend */ - blk_b = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + blk_b = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR, &error_abort); b_s = bs_b->opaque; @@ -672,7 +672,7 @@ static void test_iothread_common(enum drain_type drain_type, int drain_thread) goto out; } - blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR, &error_abort); s = bs->opaque; @@ -883,7 +883,7 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type, bdrv_set_backing_hd(src, src_backing, &error_abort); bdrv_unref(src_backing); - blk_src = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + blk_src = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); blk_insert_bs(blk_src, src_overlay, &error_abort); switch (drain_node) { @@ -910,7 +910,7 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type, target = bdrv_new_open_driver(&bdrv_test, "target", BDRV_O_RDWR, &error_abort); - blk_target = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + blk_target = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); blk_insert_bs(blk_target, target, &error_abort); aio_context_acquire(ctx); @@ -1205,7 +1205,7 @@ static void do_test_delete_by_drain(bool detach_instead_of_delete, &error_abort); bdrv_attach_child(bs, null_bs, "null-child", &child_file, &error_abort); - blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); blk_insert_bs(blk, bs, &error_abort); /* Referenced by blk now */ @@ -1368,7 +1368,7 @@ static void test_detach_indirect(bool by_parent_cb) c = bdrv_new_open_driver(&bdrv_test, "c", BDRV_O_RDWR, &error_abort); /* blk is a BB for parent-a */ - blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); blk_insert_bs(blk, parent_a, &error_abort); bdrv_unref(parent_a); @@ -1460,7 +1460,7 @@ static void test_append_to_drained(void) BlockDriverState *base, *overlay; BDRVTestState *base_s, *overlay_s; - blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); base = bdrv_new_open_driver(&bdrv_test, "base", BDRV_O_RDWR, &error_abort); base_s = base->opaque; blk_insert_bs(blk, base, &error_abort); diff --git a/tests/test-bdrv-graph-mod.c b/tests/test-bdrv-graph-mod.c index 747c0bf8fc..cfeec36566 100644 --- a/tests/test-bdrv-graph-mod.c +++ b/tests/test-bdrv-graph-mod.c @@ -102,7 +102,8 @@ static void test_update_perm_tree(void) { Error *local_err = NULL; - BlockBackend *root = blk_new(BLK_PERM_WRITE | BLK_PERM_CONSISTENT_READ, + BlockBackend *root = blk_new(qemu_get_aio_context(), + BLK_PERM_WRITE | BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL & ~BLK_PERM_WRITE); BlockDriverState *bs = no_perm_node("node"); BlockDriverState *filter = pass_through_node("filter"); @@ -165,7 +166,7 @@ static void test_update_perm_tree(void) */ static void test_should_update_child(void) { - BlockBackend *root = blk_new(0, BLK_PERM_ALL); + BlockBackend *root = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL); BlockDriverState *bs = no_perm_node("node"); BlockDriverState *filter = no_perm_node("filter"); BlockDriverState *target = no_perm_node("target"); diff --git a/tests/test-block-backend.c b/tests/test-block-backend.c index fd59f02bd0..5b5d6845c0 100644 --- a/tests/test-block-backend.c +++ b/tests/test-block-backend.c @@ -37,7 +37,8 @@ static void test_drain_aio_error_flush_cb(void *opaque, int ret) static void test_drain_aio_error(void) { - BlockBackend *blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + BlockBackend *blk = blk_new(qemu_get_aio_context(), + BLK_PERM_ALL, BLK_PERM_ALL); BlockAIOCB *acb; bool completed = false; @@ -53,7 +54,8 @@ static void test_drain_aio_error(void) static void test_drain_all_aio_error(void) { - BlockBackend *blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + BlockBackend *blk = blk_new(qemu_get_aio_context(), + BLK_PERM_ALL, BLK_PERM_ALL); BlockAIOCB *acb; bool completed = false; diff --git a/tests/test-block-iothread.c b/tests/test-block-iothread.c index 1d47ea9895..2200487d76 100644 --- a/tests/test-block-iothread.c +++ b/tests/test-block-iothread.c @@ -336,7 +336,7 @@ static void test_sync_op(const void *opaque) BlockDriverState *bs; BdrvChild *c; - blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); bs = bdrv_new_open_driver(&bdrv_test, "base", BDRV_O_RDWR, &error_abort); bs->total_sectors = 65536 / BDRV_SECTOR_SIZE; blk_insert_bs(blk, bs, &error_abort); @@ -415,7 +415,7 @@ static void test_attach_blockjob(void) BlockDriverState *bs; TestBlockJob *tjob; - blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); bs = bdrv_new_open_driver(&bdrv_test, "base", BDRV_O_RDWR, &error_abort); blk_insert_bs(blk, bs, &error_abort); @@ -481,7 +481,7 @@ static void test_propagate_basic(void) QDict *options; /* Create bs_a and its BlockBackend */ - blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); bs_a = bdrv_new_open_driver(&bdrv_test, "bs_a", BDRV_O_RDWR, &error_abort); blk_insert_bs(blk, bs_a, &error_abort); @@ -561,7 +561,7 @@ static void test_propagate_diamond(void) qdict_put_str(options, "raw", "bs_c"); bs_verify = bdrv_open(NULL, NULL, options, BDRV_O_RDWR, &error_abort); - blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); blk_insert_bs(blk, bs_verify, &error_abort); /* Switch the AioContext */ @@ -628,7 +628,7 @@ static void test_propagate_mirror(void) g_assert(bdrv_get_aio_context(filter) == main_ctx); /* With a BlockBackend on src, changing target must fail */ - blk = blk_new(0, BLK_PERM_ALL); + blk = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL); blk_insert_bs(blk, src, &error_abort); bdrv_try_set_aio_context(target, ctx, &local_err); diff --git a/tests/test-blockjob.c b/tests/test-blockjob.c index 652d1e8359..8c91980c70 100644 --- a/tests/test-blockjob.c +++ b/tests/test-blockjob.c @@ -68,7 +68,7 @@ static BlockJob *do_test_id(BlockBackend *blk, const char *id, static BlockBackend *create_blk(const char *name) { /* No I/O is performed on this device */ - BlockBackend *blk = blk_new(0, BLK_PERM_ALL); + BlockBackend *blk = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL); BlockDriverState *bs; bs = bdrv_open("null-co://", NULL, NULL, 0, &error_abort); diff --git a/tests/test-throttle.c b/tests/test-throttle.c index 948a42c991..5644cf95ca 100644 --- a/tests/test-throttle.c +++ b/tests/test-throttle.c @@ -675,9 +675,9 @@ static void test_groups(void) ThrottleGroupMember *tgm1, *tgm2, *tgm3; /* No actual I/O is performed on these devices */ - blk1 = blk_new(0, BLK_PERM_ALL); - blk2 = blk_new(0, BLK_PERM_ALL); - blk3 = blk_new(0, BLK_PERM_ALL); + blk1 = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL); + blk2 = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL); + blk3 = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL); blkp1 = blk_get_public(blk1); blkp2 = blk_get_public(blk2); -- cgit v1.2.3-55-g7522 From 307a5f60ebe0506a5b90f323fdcabb4333405484 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Mon, 29 Apr 2019 17:40:14 +0200 Subject: block: Add qdev_prop_drive_iothread property type Some qdev block devices have support for iothreads and take care of the AioContext they are running in, but most devices don't know about any of this. For the latter category, the qdev drive property must make sure that their BlockBackend is in the main AioContext. Unfortunately, while the current code just does the same thing for devices that do support iothreads, this is not correct and it would show as soon as we actually try to keep a consistent AioContext assignment across all nodes and users of a block graph subtree: If a node is already in a non-default AioContext because of one of its users, attaching a new device should still be possible if that device can work in the same AioContext. Switching the node back to the main context first and only then into the device AioContext causes failure (because the existing user wouldn't allow the switch to the main context). So devices that support iothreads need a different kind of drive property that leaves the node in its current AioContext, but by using this type, the device promises to check later that it can work with this context. This patch adds the qdev infrastructure that allows devices to signal that they handle iothreads and qdev should leave the AioContext alone. Signed-off-by: Kevin Wolf --- hw/core/qdev-properties-system.c | 43 +++++++++++++++++++++++++++++++++++----- include/hw/block/block.h | 7 +++++-- include/hw/qdev-properties.h | 3 +++ 3 files changed, 46 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c index 42e048f190..ba412dd2ca 100644 --- a/hw/core/qdev-properties-system.c +++ b/hw/core/qdev-properties-system.c @@ -69,8 +69,8 @@ static void set_pointer(Object *obj, Visitor *v, Property *prop, /* --- drive --- */ -static void parse_drive(DeviceState *dev, const char *str, void **ptr, - const char *propname, Error **errp) +static void do_parse_drive(DeviceState *dev, const char *str, void **ptr, + const char *propname, bool iothread, Error **errp) { BlockBackend *blk; bool blk_created = false; @@ -80,9 +80,16 @@ static void parse_drive(DeviceState *dev, const char *str, void **ptr, if (!blk) { BlockDriverState *bs = bdrv_lookup_bs(NULL, str, NULL); if (bs) { - /* BlockBackends of devices start in the main context and are only - * later moved into another context if the device supports that. */ - blk = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL); + /* + * If the device supports iothreads, it will make sure to move the + * block node to the right AioContext if necessary (or fail if this + * isn't possible because of other users). Devices that are not + * aware of iothreads require their BlockBackends to be in the main + * AioContext. + */ + AioContext *ctx = iothread ? bdrv_get_aio_context(bs) : + qemu_get_aio_context(); + blk = blk_new(ctx, 0, BLK_PERM_ALL); blk_created = true; ret = blk_insert_bs(blk, bs, errp); @@ -120,6 +127,18 @@ fail: } } +static void parse_drive(DeviceState *dev, const char *str, void **ptr, + const char *propname, Error **errp) +{ + do_parse_drive(dev, str, ptr, propname, false, errp); +} + +static void parse_drive_iothread(DeviceState *dev, const char *str, void **ptr, + const char *propname, Error **errp) +{ + do_parse_drive(dev, str, ptr, propname, true, errp); +} + static void release_drive(Object *obj, const char *name, void *opaque) { DeviceState *dev = DEVICE(obj); @@ -162,6 +181,12 @@ static void set_drive(Object *obj, Visitor *v, const char *name, void *opaque, set_pointer(obj, v, opaque, parse_drive, name, errp); } +static void set_drive_iothread(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + set_pointer(obj, v, opaque, parse_drive_iothread, name, errp); +} + const PropertyInfo qdev_prop_drive = { .name = "str", .description = "Node name or ID of a block device to use as a backend", @@ -170,6 +195,14 @@ const PropertyInfo qdev_prop_drive = { .release = release_drive, }; +const PropertyInfo qdev_prop_drive_iothread = { + .name = "str", + .description = "Node name or ID of a block device to use as a backend", + .get = get_drive, + .set = set_drive_iothread, + .release = release_drive, +}; + /* --- character device --- */ static void get_chr(Object *obj, Visitor *v, const char *name, void *opaque, diff --git a/include/hw/block/block.h b/include/hw/block/block.h index d06f25aa0f..607539057a 100644 --- a/include/hw/block/block.h +++ b/include/hw/block/block.h @@ -45,8 +45,7 @@ static inline unsigned int get_physical_block_exp(BlockConf *conf) return exp; } -#define DEFINE_BLOCK_PROPERTIES(_state, _conf) \ - DEFINE_PROP_DRIVE("drive", _state, _conf.blk), \ +#define DEFINE_BLOCK_PROPERTIES_BASE(_state, _conf) \ DEFINE_PROP_BLOCKSIZE("logical_block_size", _state, \ _conf.logical_block_size), \ DEFINE_PROP_BLOCKSIZE("physical_block_size", _state, \ @@ -59,6 +58,10 @@ static inline unsigned int get_physical_block_exp(BlockConf *conf) ON_OFF_AUTO_AUTO), \ DEFINE_PROP_BOOL("share-rw", _state, _conf.share_rw, false) +#define DEFINE_BLOCK_PROPERTIES(_state, _conf) \ + DEFINE_PROP_DRIVE("drive", _state, _conf.blk), \ + DEFINE_BLOCK_PROPERTIES_BASE(_state, _conf) + #define DEFINE_BLOCK_CHS_PROPERTIES(_state, _conf) \ DEFINE_PROP_UINT32("cyls", _state, _conf.cyls, 0), \ DEFINE_PROP_UINT32("heads", _state, _conf.heads, 0), \ diff --git a/include/hw/qdev-properties.h b/include/hw/qdev-properties.h index b6758c852e..1eae5ab056 100644 --- a/include/hw/qdev-properties.h +++ b/include/hw/qdev-properties.h @@ -28,6 +28,7 @@ extern const PropertyInfo qdev_prop_blockdev_on_error; extern const PropertyInfo qdev_prop_bios_chs_trans; extern const PropertyInfo qdev_prop_fdc_drive_type; extern const PropertyInfo qdev_prop_drive; +extern const PropertyInfo qdev_prop_drive_iothread; extern const PropertyInfo qdev_prop_netdev; extern const PropertyInfo qdev_prop_pci_devfn; extern const PropertyInfo qdev_prop_blocksize; @@ -198,6 +199,8 @@ extern const PropertyInfo qdev_prop_pcie_link_width; DEFINE_PROP(_n, _s, _f, qdev_prop_netdev, NICPeers) #define DEFINE_PROP_DRIVE(_n, _s, _f) \ DEFINE_PROP(_n, _s, _f, qdev_prop_drive, BlockBackend *) +#define DEFINE_PROP_DRIVE_IOTHREAD(_n, _s, _f) \ + DEFINE_PROP(_n, _s, _f, qdev_prop_drive_iothread, BlockBackend *) #define DEFINE_PROP_MACADDR(_n, _s, _f) \ DEFINE_PROP(_n, _s, _f, qdev_prop_macaddr, MACAddr) #define DEFINE_PROP_ON_OFF_AUTO(_n, _s, _f, _d) \ -- cgit v1.2.3-55-g7522 From 4f71fb436a6fbec7a7b2360cdba741deeb24be67 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 26 Apr 2019 19:29:47 +0200 Subject: scsi-disk: Use qdev_prop_drive_iothread This makes use of qdev_prop_drive_iothread for scsi-disk so that the disk can be attached to a node that is already in the target AioContext. We need to check that the HBA actually supports iothreads, otherwise scsi-disk must make sure that the node is already in the main AioContext. This changes the error message for conflicting iothread settings. Previously, virtio-scsi produced the error message, now it comes from blk_set_aio_context(). Update a test case accordingly. Signed-off-by: Kevin Wolf --- hw/scsi/scsi-disk.c | 22 +++++++++++++++------- hw/scsi/virtio-scsi.c | 15 ++++++++------- include/hw/scsi/scsi.h | 1 + tests/qemu-iotests/240.out | 2 +- 4 files changed, 25 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c index 91c5a8b1ac..7b89ac798b 100644 --- a/hw/scsi/scsi-disk.c +++ b/hw/scsi/scsi-disk.c @@ -2336,6 +2336,13 @@ static void scsi_realize(SCSIDevice *dev, Error **errp) return; } + if (blk_get_aio_context(s->qdev.conf.blk) != qemu_get_aio_context() && + !s->qdev.hba_supports_iothread) + { + error_setg(errp, "HBA does not support iothreads"); + return; + } + if (dev->type == TYPE_DISK) { if (!blkconf_geometry(&dev->conf, NULL, 65535, 255, 255, errp)) { return; @@ -2929,13 +2936,14 @@ static const TypeInfo scsi_disk_base_info = { .abstract = true, }; -#define DEFINE_SCSI_DISK_PROPERTIES() \ - DEFINE_BLOCK_PROPERTIES(SCSIDiskState, qdev.conf), \ - DEFINE_BLOCK_ERROR_PROPERTIES(SCSIDiskState, qdev.conf), \ - DEFINE_PROP_STRING("ver", SCSIDiskState, version), \ - DEFINE_PROP_STRING("serial", SCSIDiskState, serial), \ - DEFINE_PROP_STRING("vendor", SCSIDiskState, vendor), \ - DEFINE_PROP_STRING("product", SCSIDiskState, product), \ +#define DEFINE_SCSI_DISK_PROPERTIES() \ + DEFINE_PROP_DRIVE_IOTHREAD("drive", SCSIDiskState, qdev.conf.blk), \ + DEFINE_BLOCK_PROPERTIES_BASE(SCSIDiskState, qdev.conf), \ + DEFINE_BLOCK_ERROR_PROPERTIES(SCSIDiskState, qdev.conf), \ + DEFINE_PROP_STRING("ver", SCSIDiskState, version), \ + DEFINE_PROP_STRING("serial", SCSIDiskState, serial), \ + DEFINE_PROP_STRING("vendor", SCSIDiskState, vendor), \ + DEFINE_PROP_STRING("product", SCSIDiskState, product), \ DEFINE_PROP_STRING("device_id", SCSIDiskState, device_id) diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c index 01c2b85f90..2994f0738f 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c @@ -789,6 +789,13 @@ static void virtio_scsi_change(SCSIBus *bus, SCSIDevice *dev, SCSISense sense) } } +static void virtio_scsi_pre_hotplug(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + SCSIDevice *sd = SCSI_DEVICE(dev); + sd->hba_supports_iothread = true; +} + static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { @@ -798,16 +805,9 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, int ret; if (s->ctx && !s->dataplane_fenced) { - AioContext *ctx; if (blk_op_is_blocked(sd->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { return; } - ctx = blk_get_aio_context(sd->conf.blk); - if (ctx != s->ctx && ctx != qemu_get_aio_context()) { - error_setg(errp, "Cannot attach a blockdev that is using " - "a different iothread"); - return; - } virtio_scsi_acquire(s); ret = blk_set_aio_context(sd->conf.blk, s->ctx, errp); virtio_scsi_release(s); @@ -990,6 +990,7 @@ static void virtio_scsi_class_init(ObjectClass *klass, void *data) vdc->reset = virtio_scsi_reset; vdc->start_ioeventfd = virtio_scsi_dataplane_start; vdc->stop_ioeventfd = virtio_scsi_dataplane_stop; + hc->pre_plug = virtio_scsi_pre_hotplug; hc->plug = virtio_scsi_hotplug; hc->unplug = virtio_scsi_hotunplug; } diff --git a/include/hw/scsi/scsi.h b/include/hw/scsi/scsi.h index acef25faa4..426566a5c6 100644 --- a/include/hw/scsi/scsi.h +++ b/include/hw/scsi/scsi.h @@ -88,6 +88,7 @@ struct SCSIDevice int scsi_version; int default_scsi_version; bool needs_vpd_bl_emulation; + bool hba_supports_iothread; }; extern const VMStateDescription vmstate_scsi_device; diff --git a/tests/qemu-iotests/240.out b/tests/qemu-iotests/240.out index 84e0a43ce5..d00df50297 100644 --- a/tests/qemu-iotests/240.out +++ b/tests/qemu-iotests/240.out @@ -43,7 +43,7 @@ QMP_VERSION {"return": {}} {"return": {}} {"return": {}} -{"error": {"class": "GenericError", "desc": "Cannot attach a blockdev that is using a different iothread"}} +{"error": {"class": "GenericError", "desc": "Cannot change iothread of active block backend"}} {"return": {}} {"return": {}} {"return": {}} -- cgit v1.2.3-55-g7522 From 132ada80c4a6fea7b67e8bb0a5fd299994d927c6 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 24 Apr 2019 17:41:46 +0200 Subject: block: Adjust AioContexts when attaching nodes So far, we only made sure that updating the AioContext of a node affected the whole subtree. However, if a node is newly attached to a new parent, we also need to make sure that both the subtree of the node and the parent are in the same AioContext. This tries to move the new child node to the parent AioContext and returns an error if this isn't possible. BlockBackends now actually apply their AioContext to their root node. Signed-off-by: Kevin Wolf --- block.c | 35 ++++++++++++++++++++++++++++++++++- block/block-backend.c | 9 ++++----- blockjob.c | 10 ++++++++-- include/block/block_int.h | 1 + tests/test-bdrv-drain.c | 6 ++++-- 5 files changed, 51 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/block.c b/block.c index 69ceac6377..7d9ed1a724 100644 --- a/block.c +++ b/block.c @@ -2249,14 +2249,19 @@ static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs) * * On failure NULL is returned, errp is set and the reference to * child_bs is also dropped. + * + * The caller must hold the AioContext lock @child_bs, but not that of @ctx + * (unless @child_bs is already in @ctx). */ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, const char *child_name, const BdrvChildRole *child_role, + AioContext *ctx, uint64_t perm, uint64_t shared_perm, void *opaque, Error **errp) { BdrvChild *child; + Error *local_err = NULL; int ret; ret = bdrv_check_update_perm(child_bs, NULL, perm, shared_perm, NULL, errp); @@ -2276,6 +2281,31 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, .opaque = opaque, }; + /* If the AioContexts don't match, first try to move the subtree of + * child_bs into the AioContext of the new parent. If this doesn't work, + * try moving the parent into the AioContext of child_bs instead. */ + if (bdrv_get_aio_context(child_bs) != ctx) { + ret = bdrv_try_set_aio_context(child_bs, ctx, &local_err); + if (ret < 0 && child_role->can_set_aio_ctx) { + GSList *ignore = g_slist_prepend(NULL, child);; + ctx = bdrv_get_aio_context(child_bs); + if (child_role->can_set_aio_ctx(child, ctx, &ignore, NULL)) { + error_free(local_err); + ret = 0; + g_slist_free(ignore); + ignore = g_slist_prepend(NULL, child);; + child_role->set_aio_ctx(child, ctx, &ignore); + } + g_slist_free(ignore); + } + if (ret < 0) { + error_propagate(errp, local_err); + g_free(child); + bdrv_abort_perm_update(child_bs); + return NULL; + } + } + /* This performs the matching bdrv_set_perm() for the above check. */ bdrv_replace_child(child, child_bs); @@ -2289,6 +2319,9 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, * * On failure NULL is returned, errp is set and the reference to * child_bs is also dropped. + * + * If @parent_bs and @child_bs are in different AioContexts, the caller must + * hold the AioContext lock for @child_bs, but not for @parent_bs. */ BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, BlockDriverState *child_bs, @@ -2302,11 +2335,11 @@ BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, bdrv_get_cumulative_perm(parent_bs, &perm, &shared_perm); assert(parent_bs->drv); - assert(bdrv_get_aio_context(parent_bs) == bdrv_get_aio_context(child_bs)); bdrv_child_perm(parent_bs, child_bs, NULL, child_role, NULL, perm, shared_perm, &perm, &shared_perm); child = bdrv_root_attach_child(child_bs, child_name, child_role, + bdrv_get_aio_context(parent_bs), perm, shared_perm, parent_bs, errp); if (child == NULL) { return NULL; diff --git a/block/block-backend.c b/block/block-backend.c index f03f14acb0..f5d9407d20 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -392,7 +392,7 @@ BlockBackend *blk_new_open(const char *filename, const char *reference, return NULL; } - blk->root = bdrv_root_attach_child(bs, "root", &child_root, + blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk->ctx, perm, BLK_PERM_ALL, blk, errp); if (!blk->root) { blk_unref(blk); @@ -803,7 +803,7 @@ int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp) { ThrottleGroupMember *tgm = &blk->public.throttle_group_member; bdrv_ref(bs); - blk->root = bdrv_root_attach_child(bs, "root", &child_root, + blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk->ctx, blk->perm, blk->shared_perm, blk, errp); if (blk->root == NULL) { return -EPERM; @@ -1861,10 +1861,9 @@ AioContext *blk_get_aio_context(BlockBackend *blk) { BlockDriverState *bs = blk_bs(blk); - /* FIXME The AioContext of bs and blk can be inconsistent. For the moment, - * we prefer the one of bs for compatibility. */ if (bs) { - return bdrv_get_aio_context(blk_bs(blk)); + AioContext *ctx = bdrv_get_aio_context(blk_bs(blk)); + assert(ctx == blk->ctx); } return blk->ctx; diff --git a/blockjob.c b/blockjob.c index 29517ae162..931d675c0c 100644 --- a/blockjob.c +++ b/blockjob.c @@ -205,8 +205,14 @@ int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs, BdrvChild *c; bdrv_ref(bs); - c = bdrv_root_attach_child(bs, name, &child_job, perm, shared_perm, - job, errp); + if (job->job.aio_context != qemu_get_aio_context()) { + aio_context_release(job->job.aio_context); + } + c = bdrv_root_attach_child(bs, name, &child_job, job->job.aio_context, + perm, shared_perm, job, errp); + if (job->job.aio_context != qemu_get_aio_context()) { + aio_context_acquire(job->job.aio_context); + } if (c == NULL) { return -EPERM; } diff --git a/include/block/block_int.h b/include/block/block_int.h index 1eebc7c8f3..06df2bda1b 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -1163,6 +1163,7 @@ void hmp_drive_add_node(Monitor *mon, const char *optstr); BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, const char *child_name, const BdrvChildRole *child_role, + AioContext *ctx, uint64_t perm, uint64_t shared_perm, void *opaque, Error **errp); void bdrv_root_unref_child(BdrvChild *child); diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c index fabbd52d93..16ea2b813f 100644 --- a/tests/test-bdrv-drain.c +++ b/tests/test-bdrv-drain.c @@ -912,6 +912,7 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type, &error_abort); blk_target = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); blk_insert_bs(blk_target, target, &error_abort); + blk_set_allow_aio_context_change(blk_target, true); aio_context_acquire(ctx); tjob = block_job_create("job0", &test_job_driver, NULL, src, @@ -972,7 +973,7 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type, g_assert_false(job->job.paused); g_assert_true(job->job.busy); /* We're in qemu_co_sleep_ns() */ - do_drain_begin(drain_type, target); + do_drain_begin_unlocked(drain_type, target); if (drain_type == BDRV_DRAIN_ALL) { /* bdrv_drain_all() drains both src and target */ @@ -983,7 +984,7 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type, g_assert_true(job->job.paused); g_assert_false(job->job.busy); /* The job is paused */ - do_drain_end(drain_type, target); + do_drain_end_unlocked(drain_type, target); if (use_iothread) { /* paused is reset in the I/O thread, wait for it */ @@ -1002,6 +1003,7 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type, if (use_iothread) { blk_set_aio_context(blk_src, qemu_get_aio_context(), &error_abort); + blk_set_aio_context(blk_target, qemu_get_aio_context(), &error_abort); } aio_context_release(ctx); -- cgit v1.2.3-55-g7522 From 42a65f02f9b380bd8074882d5844d4ea033389cc Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 7 May 2019 18:31:38 +0200 Subject: block: Remove bdrv_set_aio_context() All callers of bdrv_set_aio_context() are eliminated now, they have moved to bdrv_try_set_aio_context() and related safe functions. Remove bdrv_set_aio_context(). With this, we can now know that the .set_aio_ctx callback must be present in bdrv_set_aio_context_ignore() because bdrv_can_set_aio_context() would have returned false previously, so instead of checking the condition, we can assert it. Signed-off-by: Kevin Wolf --- block.c | 30 ++++++++++++++---------------- docs/devel/multiple-iothreads.txt | 4 ++-- include/block/block.h | 9 --------- 3 files changed, 16 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/block.c b/block.c index ddfae15d9b..e3e77feee0 100644 --- a/block.c +++ b/block.c @@ -5789,8 +5789,17 @@ static void bdrv_attach_aio_context(BlockDriverState *bs, bs->walking_aio_notifiers = false; } -/* @ignore will accumulate all visited BdrvChild object. The caller is - * responsible for freeing the list afterwards. */ +/* + * Changes the AioContext used for fd handlers, timers, and BHs by this + * BlockDriverState and all its children and parents. + * + * The caller must own the AioContext lock for the old AioContext of bs, but it + * must not own the AioContext lock for new_context (unless new_context is the + * same as the current context of bs). + * + * @ignore will accumulate all visited BdrvChild object. The caller is + * responsible for freeing the list afterwards. + */ void bdrv_set_aio_context_ignore(BlockDriverState *bs, AioContext *new_context, GSList **ignore) { @@ -5813,10 +5822,9 @@ void bdrv_set_aio_context_ignore(BlockDriverState *bs, if (g_slist_find(*ignore, child)) { continue; } - if (child->role->set_aio_ctx) { - *ignore = g_slist_prepend(*ignore, child); - child->role->set_aio_ctx(child, new_context, ignore); - } + assert(child->role->set_aio_ctx); + *ignore = g_slist_prepend(*ignore, child); + child->role->set_aio_ctx(child, new_context, ignore); } bdrv_detach_aio_context(bs); @@ -5830,16 +5838,6 @@ void bdrv_set_aio_context_ignore(BlockDriverState *bs, aio_context_release(new_context); } -/* The caller must own the AioContext lock for the old AioContext of bs, but it - * must not own the AioContext lock for new_context (unless new_context is - * the same as the current context of bs). */ -void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context) -{ - GSList *ignore_list = NULL; - bdrv_set_aio_context_ignore(bs, new_context, &ignore_list); - g_slist_free(ignore_list); -} - static bool bdrv_parent_can_set_aio_context(BdrvChild *c, AioContext *ctx, GSList **ignore, Error **errp) { diff --git a/docs/devel/multiple-iothreads.txt b/docs/devel/multiple-iothreads.txt index 4f9012d154..aeb997bed5 100644 --- a/docs/devel/multiple-iothreads.txt +++ b/docs/devel/multiple-iothreads.txt @@ -109,7 +109,7 @@ The AioContext originates from the QEMU block layer, even though nowadays AioContext is a generic event loop that can be used by any QEMU subsystem. The block layer has support for AioContext integrated. Each BlockDriverState -is associated with an AioContext using bdrv_set_aio_context() and +is associated with an AioContext using bdrv_try_set_aio_context() and bdrv_get_aio_context(). This allows block layer code to process I/O inside the right AioContext. Other subsystems may wish to follow a similar approach. @@ -134,5 +134,5 @@ Long-running jobs (usually in the form of coroutines) are best scheduled in the BlockDriverState's AioContext to avoid the need to acquire/release around each bdrv_*() call. The functions bdrv_add/remove_aio_context_notifier, or alternatively blk_add/remove_aio_context_notifier if you use BlockBackends, -can be used to get a notification whenever bdrv_set_aio_context() moves a +can be used to get a notification whenever bdrv_try_set_aio_context() moves a BlockDriverState to a different AioContext. diff --git a/include/block/block.h b/include/block/block.h index 531cf595cf..13ea050a5b 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -583,15 +583,6 @@ AioContext *bdrv_get_aio_context(BlockDriverState *bs); */ void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co); -/** - * bdrv_set_aio_context: - * - * Changes the #AioContext used for fd handlers, timers, and BHs by this - * BlockDriverState and all its children. - * - * This function must be called with iothread lock held. - */ -void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context); void bdrv_set_aio_context_ignore(BlockDriverState *bs, AioContext *new_context, GSList **ignore); int bdrv_try_set_aio_context(BlockDriverState *bs, AioContext *ctx, -- cgit v1.2.3-55-g7522 From d93e57268892d555d7c71ec30b25276b0d8132b6 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Tue, 23 Apr 2019 15:57:05 +0300 Subject: block/io: bdrv_pdiscard: support int64_t bytes parameter This fixes at least one overflow in qcow2_process_discards, which passes 64bit region length to bdrv_pdiscard where bytes (or sectors in the past) parameter is int since its introduction in 0b919fae. Signed-off-by: Vladimir Sementsov-Ogievskiy Signed-off-by: Kevin Wolf --- block/io.c | 16 ++++++++-------- include/block/block.h | 4 ++-- 2 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/block/io.c b/block/io.c index 0f6ebd001c..9ba1bada36 100644 --- a/block/io.c +++ b/block/io.c @@ -2632,7 +2632,7 @@ int bdrv_flush(BlockDriverState *bs) typedef struct DiscardCo { BdrvChild *child; int64_t offset; - int bytes; + int64_t bytes; int ret; } DiscardCo; static void coroutine_fn bdrv_pdiscard_co_entry(void *opaque) @@ -2643,14 +2643,15 @@ static void coroutine_fn bdrv_pdiscard_co_entry(void *opaque) aio_wait_kick(); } -int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset, int bytes) +int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset, + int64_t bytes) { BdrvTrackedRequest req; int max_pdiscard, ret; int head, tail, align; BlockDriverState *bs = child->bs; - if (!bs || !bs->drv) { + if (!bs || !bs->drv || !bdrv_is_inserted(bs)) { return -ENOMEDIUM; } @@ -2658,9 +2659,8 @@ int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset, int bytes) return -EPERM; } - ret = bdrv_check_byte_request(bs, offset, bytes); - if (ret < 0) { - return ret; + if (offset < 0 || bytes < 0 || bytes > INT64_MAX - offset) { + return -EIO; } /* Do nothing if disabled. */ @@ -2695,7 +2695,7 @@ int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset, int bytes) assert(max_pdiscard >= bs->bl.request_alignment); while (bytes > 0) { - int num = bytes; + int64_t num = bytes; if (head) { /* Make small requests to get to alignment boundaries. */ @@ -2757,7 +2757,7 @@ out: return ret; } -int bdrv_pdiscard(BdrvChild *child, int64_t offset, int bytes) +int bdrv_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes) { Coroutine *co; DiscardCo rwco = { diff --git a/include/block/block.h b/include/block/block.h index 13ea050a5b..f9415ed740 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -434,8 +434,8 @@ void bdrv_drain_all(void); AIO_WAIT_WHILE(bdrv_get_aio_context(bs_), \ cond); }) -int bdrv_pdiscard(BdrvChild *child, int64_t offset, int bytes); -int bdrv_co_pdiscard(BdrvChild *child, int64_t offset, int bytes); +int bdrv_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes); +int bdrv_co_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes); int bdrv_has_zero_init_1(BlockDriverState *bs); int bdrv_has_zero_init(BlockDriverState *bs); bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs); -- cgit v1.2.3-55-g7522