From 4085f5c7a239567a292876f46cb59d9b19bcf6ac Mon Sep 17 00:00:00 2001 From: John Snow Date: Thu, 22 Sep 2016 21:45:50 -0400 Subject: block: reintroduce bdrv_flush_all Commit fe1a9cbc moved the flush_all routine from the bdrv layer to the block-backend layer. In doing so, however, the semantics of the routine changed slightly such that flush_all now used blk_flush instead of bdrv_flush. blk_flush can fail if the attached device model reports that it is not "available," (i.e. the tray is open.) This changed the semantics of flush_all such that it can now fail for e.g. open CDROM drives. Reintroduce bdrv_flush_all to regain the old semantics without having to alter the behavior of blk_flush or blk_flush_all, which are already 'doing the right thing.' Signed-off-by: John Snow Reviewed-by: Kevin Wolf Reviewed-by: Max Reitz Acked-by: Fam Zheng Signed-off-by: Kevin Wolf --- include/block/block.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/block/block.h b/include/block/block.h index e18233afe0..811b060f41 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -333,6 +333,7 @@ int bdrv_inactivate_all(void); /* Ensure contents are flushed to disk. */ int bdrv_flush(BlockDriverState *bs); int coroutine_fn bdrv_co_flush(BlockDriverState *bs); +int bdrv_flush_all(void); void bdrv_close_all(void); void bdrv_drain(BlockDriverState *bs); void coroutine_fn bdrv_co_drain(BlockDriverState *bs); -- cgit v1.2.3-55-g7522 From 49137bf6845eaecad51a047fc06dd11c56118460 Mon Sep 17 00:00:00 2001 From: John Snow Date: Thu, 22 Sep 2016 21:45:52 -0400 Subject: block-backend: remove blk_flush_all We can teach Xen to drain and flush each device as it needs to, instead of trying to flush ALL devices. This removes the last user of blk_flush_all. The function is therefore removed under the premise that any new uses of blk_flush_all would be the wrong paradigm: either flush the single device that requires flushing, or use an appropriate flush_all mechanism from outside of the BlkBackend layer. Signed-off-by: John Snow Reviewed-by: Max Reitz Acked-by: Fam Zheng Signed-off-by: Kevin Wolf --- block/block-backend.c | 22 ---------------------- hw/i386/xen/xen_platform.c | 2 -- hw/ide/piix.c | 4 ++++ include/sysemu/block-backend.h | 1 - 4 files changed, 4 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/block/block-backend.c b/block/block-backend.c index 0bd19abdfb..f34bad5840 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -1640,28 +1640,6 @@ int blk_commit_all(void) return 0; } -int blk_flush_all(void) -{ - BlockBackend *blk = NULL; - int result = 0; - - while ((blk = blk_all_next(blk)) != NULL) { - AioContext *aio_context = blk_get_aio_context(blk); - int ret; - - aio_context_acquire(aio_context); - if (blk_is_inserted(blk)) { - ret = blk_flush(blk); - if (ret < 0 && !result) { - result = ret; - } - } - aio_context_release(aio_context); - } - - return result; -} - /* throttling disk I/O limits */ void blk_set_io_limits(BlockBackend *blk, ThrottleConfig *cfg) diff --git a/hw/i386/xen/xen_platform.c b/hw/i386/xen/xen_platform.c index aa7839324c..f85635cc9a 100644 --- a/hw/i386/xen/xen_platform.c +++ b/hw/i386/xen/xen_platform.c @@ -134,8 +134,6 @@ static void platform_fixed_ioport_writew(void *opaque, uint32_t addr, uint32_t v devices, and bit 2 the non-primary-master IDE devices. */ if (val & UNPLUG_ALL_IDE_DISKS) { DPRINTF("unplug disks\n"); - blk_drain_all(); - blk_flush_all(); pci_unplug_disks(pci_dev->bus); } if (val & UNPLUG_ALL_NICS) { diff --git a/hw/ide/piix.c b/hw/ide/piix.c index c190fcaa3c..d5777fd0b3 100644 --- a/hw/ide/piix.c +++ b/hw/ide/piix.c @@ -179,6 +179,10 @@ int pci_piix3_xen_ide_unplug(DeviceState *dev) if (di != NULL && !di->media_cd) { BlockBackend *blk = blk_by_legacy_dinfo(di); DeviceState *ds = blk_get_attached_dev(blk); + + blk_drain(blk); + blk_flush(blk); + if (ds) { blk_detach_dev(blk, ds); } diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h index 3b29317349..24d1d85399 100644 --- a/include/sysemu/block-backend.h +++ b/include/sysemu/block-backend.h @@ -152,7 +152,6 @@ BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf, int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int count); int blk_co_flush(BlockBackend *blk); int blk_flush(BlockBackend *blk); -int blk_flush_all(void); int blk_commit_all(void); void blk_drain(BlockBackend *blk); void blk_drain_all(void); -- cgit v1.2.3-55-g7522 From b85114f8cfbede8b153db68875973ef0790bf296 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Mon, 12 Sep 2016 19:08:31 +0200 Subject: block: Use 'detect-zeroes' option for 'blockdev-change-medium' Instead of modifying the new BDS after it has been opened, use the newly supported 'detect-zeroes' option in bdrv_open_common() so that all requirements are checked (detect-zeroes=unmap requires discard=unmap). Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake Reviewed-by: Max Reitz --- block/block-backend.c | 9 ++++----- blockdev.c | 9 ++++++--- include/sysemu/block-backend.h | 2 +- 3 files changed, 11 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/block/block-backend.c b/block/block-backend.c index f34bad5840..639294b8e6 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -1592,13 +1592,12 @@ void blk_update_root_state(BlockBackend *blk) } /* - * Applies the information in the root state to the given BlockDriverState. This - * does not include the flags which have to be specified for bdrv_open(), use - * blk_get_open_flags_from_root_state() to inquire them. + * Returns the detect-zeroes setting to be used for bdrv_open() of a + * BlockDriverState which is supposed to inherit the root state. */ -void blk_apply_root_state(BlockBackend *blk, BlockDriverState *bs) +bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk) { - bs->detect_zeroes = blk->root_state.detect_zeroes; + return blk->root_state.detect_zeroes; } /* diff --git a/blockdev.c b/blockdev.c index 511260ce93..7b87bd8a71 100644 --- a/blockdev.c +++ b/blockdev.c @@ -2546,6 +2546,7 @@ void qmp_blockdev_change_medium(bool has_device, const char *device, BlockBackend *blk; BlockDriverState *medium_bs = NULL; int bdrv_flags; + bool detect_zeroes; int rc; QDict *options = NULL; Error *err = NULL; @@ -2585,8 +2586,12 @@ void qmp_blockdev_change_medium(bool has_device, const char *device, abort(); } + options = qdict_new(); + detect_zeroes = blk_get_detect_zeroes_from_root_state(blk); + qdict_put(options, "detect-zeroes", + qstring_from_str(detect_zeroes ? "on" : "off")); + if (has_format) { - options = qdict_new(); qdict_put(options, "driver", qstring_from_str(format)); } @@ -2623,8 +2628,6 @@ void qmp_blockdev_change_medium(bool has_device, const char *device, goto fail; } - blk_apply_root_state(blk, medium_bs); - qmp_blockdev_close_tray(has_device, device, has_id, id, errp); fail: diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h index 24d1d85399..a7993afcda 100644 --- a/include/sysemu/block-backend.h +++ b/include/sysemu/block-backend.h @@ -198,7 +198,7 @@ void blk_io_unplug(BlockBackend *blk); BlockAcctStats *blk_get_stats(BlockBackend *blk); BlockBackendRootState *blk_get_root_state(BlockBackend *blk); void blk_update_root_state(BlockBackend *blk); -void blk_apply_root_state(BlockBackend *blk, BlockDriverState *bs); +bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk); int blk_get_open_flags_from_root_state(BlockBackend *blk); void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk, -- cgit v1.2.3-55-g7522 From 818584a43ab0ef52c131865128ef110f867726cd Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Mon, 12 Sep 2016 18:03:18 +0200 Subject: block: Move 'discard' option to bdrv_open_common() This enables its use for nested child nodes. The compatibility between the 'discard' and 'detect-zeroes' setting is checked in bdrv_open_common() now as the former setting isn't available before calling bdrv_open() any more. Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake Reviewed-by: Max Reitz --- block.c | 17 ++++++++++++++++- blockdev.c | 25 ------------------------- include/block/block.h | 1 + 3 files changed, 17 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/block.c b/block.c index 1f1045738d..bb1f1ec957 100644 --- a/block.c +++ b/block.c @@ -765,7 +765,7 @@ static void bdrv_inherited_options(int *child_flags, QDict *child_options, /* Our block drivers take care to send flushes and respect unmap policy, * so we can default to enable both on lower layers regardless of the * corresponding parent options. */ - flags |= BDRV_O_UNMAP; + qdict_set_default_str(child_options, BDRV_OPT_DISCARD, "unmap"); /* Clear flags that only apply to the top layer */ flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ | @@ -960,6 +960,11 @@ static QemuOptsList bdrv_runtime_opts = { .type = QEMU_OPT_STRING, .help = "try to optimize zero writes (off, on, unmap)", }, + { + .name = "discard", + .type = QEMU_OPT_STRING, + .help = "discard operation (ignore/off, unmap/on)", + }, { /* end of list */ } }, }; @@ -976,6 +981,7 @@ static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file, const char *filename; const char *driver_name = NULL; const char *node_name = NULL; + const char *discard; const char *detect_zeroes; QemuOpts *opts; BlockDriver *drv; @@ -1045,6 +1051,15 @@ static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file, } } + discard = qemu_opt_get(opts, "discard"); + if (discard != NULL) { + if (bdrv_parse_discard_flags(discard, &bs->open_flags) != 0) { + error_setg(errp, "Invalid discard option"); + ret = -EINVAL; + goto fail_opts; + } + } + detect_zeroes = qemu_opt_get(opts, "detect-zeroes"); if (detect_zeroes) { BlockdevDetectZeroesOptions value = diff --git a/blockdev.c b/blockdev.c index 7b87bd8a71..e2ace04346 100644 --- a/blockdev.c +++ b/blockdev.c @@ -356,7 +356,6 @@ static void extract_common_blockdev_options(QemuOpts *opts, int *bdrv_flags, const char **throttling_group, ThrottleConfig *throttle_cfg, BlockdevDetectZeroesOptions *detect_zeroes, Error **errp) { - const char *discard; Error *local_error = NULL; const char *aio; @@ -365,13 +364,6 @@ static void extract_common_blockdev_options(QemuOpts *opts, int *bdrv_flags, *bdrv_flags |= BDRV_O_COPY_ON_READ; } - if ((discard = qemu_opt_get(opts, "discard")) != NULL) { - if (bdrv_parse_discard_flags(discard, bdrv_flags) != 0) { - error_setg(errp, "Invalid discard option"); - return; - } - } - if ((aio = qemu_opt_get(opts, "aio")) != NULL) { if (!strcmp(aio, "native")) { *bdrv_flags |= BDRV_O_NATIVE_AIO; @@ -449,15 +441,6 @@ static void extract_common_blockdev_options(QemuOpts *opts, int *bdrv_flags, error_propagate(errp, local_error); return; } - - if (bdrv_flags && - *detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP && - !(*bdrv_flags & BDRV_O_UNMAP)) - { - error_setg(errp, "setting detect-zeroes to unmap is not allowed " - "without setting discard operation to unmap"); - return; - } } } @@ -3989,10 +3972,6 @@ QemuOptsList qemu_common_drive_opts = { .name = "snapshot", .type = QEMU_OPT_BOOL, .help = "enable/disable snapshot mode", - },{ - .name = "discard", - .type = QEMU_OPT_STRING, - .help = "discard operation (ignore/off, unmap/on)", },{ .name = "aio", .type = QEMU_OPT_STRING, @@ -4125,10 +4104,6 @@ static QemuOptsList qemu_root_bds_opts = { .head = QTAILQ_HEAD_INITIALIZER(qemu_root_bds_opts.head), .desc = { { - .name = "discard", - .type = QEMU_OPT_STRING, - .help = "discard operation (ignore/off, unmap/on)", - },{ .name = "aio", .type = QEMU_OPT_STRING, .help = "host AIO implementation (threads, native)", diff --git a/include/block/block.h b/include/block/block.h index 811b060f41..107c603605 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -108,6 +108,7 @@ typedef struct HDGeometry { #define BDRV_OPT_CACHE_DIRECT "cache.direct" #define BDRV_OPT_CACHE_NO_FLUSH "cache.no-flush" #define BDRV_OPT_READ_ONLY "read-only" +#define BDRV_OPT_DISCARD "discard" #define BDRV_SECTOR_BITS 9 -- cgit v1.2.3-55-g7522 From 8737d9e0c4017aaa5ab1fcf1356c8ee4f7caf1df Mon Sep 17 00:00:00 2001 From: Peter Lieven Date: Tue, 27 Sep 2016 11:58:40 +0200 Subject: oslib-posix: add helpers for stack alloc and free the allocated stack will be adjusted to the minimum supported stack size by the OS and rounded up to be a multiple of the system pagesize. Additionally an architecture dependent guard page is added to the stack to catch stack overflows. Signed-off-by: Peter Lieven Signed-off-by: Kevin Wolf --- include/sysemu/os-posix.h | 27 +++++++++++++++++++++++++++ util/oslib-posix.c | 42 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) (limited to 'include') diff --git a/include/sysemu/os-posix.h b/include/sysemu/os-posix.h index 9c7dfdfbec..3cfedbc28b 100644 --- a/include/sysemu/os-posix.h +++ b/include/sysemu/os-posix.h @@ -60,4 +60,31 @@ int qemu_utimens(const char *path, const qemu_timespec *times); bool is_daemonized(void); +/** + * qemu_alloc_stack: + * @sz: pointer to a size_t holding the requested usable stack size + * + * Allocate memory that can be used as a stack, for instance for + * coroutines. If the memory cannot be allocated, this function + * will abort (like g_malloc()). This function also inserts an + * additional guard page to catch a potential stack overflow. + * Note that the memory required for the guard page and alignment + * and minimal stack size restrictions will increase the value of sz. + * + * The allocated stack must be freed with qemu_free_stack(). + * + * Returns: pointer to (the lowest address of) the stack memory. + */ +void *qemu_alloc_stack(size_t *sz); + +/** + * qemu_free_stack: + * @stack: stack to free + * @sz: size of stack in bytes + * + * Free a stack allocated via qemu_alloc_stack(). Note that sz must + * be exactly the adjusted stack size returned by qemu_alloc_stack. + */ +void qemu_free_stack(void *stack, size_t sz); + #endif diff --git a/util/oslib-posix.c b/util/oslib-posix.c index f2d4e9e592..d950c347e2 100644 --- a/util/oslib-posix.c +++ b/util/oslib-posix.c @@ -499,3 +499,45 @@ pid_t qemu_fork(Error **errp) } return pid; } + +void *qemu_alloc_stack(size_t *sz) +{ + void *ptr, *guardpage; + size_t pagesz = getpagesize(); +#ifdef _SC_THREAD_STACK_MIN + /* avoid stacks smaller than _SC_THREAD_STACK_MIN */ + long min_stack_sz = sysconf(_SC_THREAD_STACK_MIN); + *sz = MAX(MAX(min_stack_sz, 0), *sz); +#endif + /* adjust stack size to a multiple of the page size */ + *sz = ROUND_UP(*sz, pagesz); + /* allocate one extra page for the guard page */ + *sz += pagesz; + + ptr = mmap(NULL, *sz, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (ptr == MAP_FAILED) { + abort(); + } + +#if defined(HOST_IA64) + /* separate register stack */ + guardpage = ptr + (((*sz - pagesz) / 2) & ~pagesz); +#elif defined(HOST_HPPA) + /* stack grows up */ + guardpage = ptr + *sz - pagesz; +#else + /* stack grows down */ + guardpage = ptr; +#endif + if (mprotect(guardpage, pagesz, PROT_NONE) != 0) { + abort(); + } + + return ptr; +} + +void qemu_free_stack(void *stack, size_t sz) +{ + munmap(stack, sz); +} -- cgit v1.2.3-55-g7522 From 8adcd6fb6d14ff7fbb47179384dcddbd6dfd95a3 Mon Sep 17 00:00:00 2001 From: Peter Lieven Date: Tue, 27 Sep 2016 11:58:42 +0200 Subject: coroutine: add a macro for the coroutine stack size Signed-off-by: Peter Lieven Reviewed-by: Paolo Bonzini Reviewed-by: Richard Henderson Signed-off-by: Kevin Wolf --- include/qemu/coroutine_int.h | 2 ++ util/coroutine-sigaltstack.c | 2 +- util/coroutine-ucontext.c | 2 +- util/coroutine-win32.c | 2 +- 4 files changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/qemu/coroutine_int.h b/include/qemu/coroutine_int.h index 6df9d33352..14d4f1d1f2 100644 --- a/include/qemu/coroutine_int.h +++ b/include/qemu/coroutine_int.h @@ -28,6 +28,8 @@ #include "qemu/queue.h" #include "qemu/coroutine.h" +#define COROUTINE_STACK_SIZE (1 << 20) + typedef enum { COROUTINE_YIELD = 1, COROUTINE_TERMINATE = 2, diff --git a/util/coroutine-sigaltstack.c b/util/coroutine-sigaltstack.c index 171cd44b7f..a5bcb7e19e 100644 --- a/util/coroutine-sigaltstack.c +++ b/util/coroutine-sigaltstack.c @@ -143,7 +143,7 @@ static void coroutine_trampoline(int signal) Coroutine *qemu_coroutine_new(void) { - const size_t stack_size = 1 << 20; + const size_t stack_size = COROUTINE_STACK_SIZE; CoroutineSigAltStack *co; CoroutineThreadState *coTS; struct sigaction sa; diff --git a/util/coroutine-ucontext.c b/util/coroutine-ucontext.c index 2bb7e10d4b..31254abd4c 100644 --- a/util/coroutine-ucontext.c +++ b/util/coroutine-ucontext.c @@ -82,7 +82,7 @@ static void coroutine_trampoline(int i0, int i1) Coroutine *qemu_coroutine_new(void) { - const size_t stack_size = 1 << 20; + const size_t stack_size = COROUTINE_STACK_SIZE; CoroutineUContext *co; ucontext_t old_uc, uc; sigjmp_buf old_env; diff --git a/util/coroutine-win32.c b/util/coroutine-win32.c index 02e28e825f..de6bd4fd3e 100644 --- a/util/coroutine-win32.c +++ b/util/coroutine-win32.c @@ -71,7 +71,7 @@ static void CALLBACK coroutine_trampoline(void *co_) Coroutine *qemu_coroutine_new(void) { - const size_t stack_size = 1 << 20; + const size_t stack_size = COROUTINE_STACK_SIZE; CoroutineWin32 *co; co = g_malloc0(sizeof(*co)); -- cgit v1.2.3-55-g7522