diff options
94 files changed, 3399 insertions, 1878 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index ce7c351afa..15503f41d8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1403,6 +1403,13 @@ S: Odd Fixes W: https://www.kraxel.org/blog/2014/10/qemu-using-cirrus-considered-harmful/ F: hw/display/cirrus* +EDID Generator +M: Gerd Hoffmann <kraxel@redhat.com> +S: Maintained +F: hw/display/edid* +F: include/hw/display/edid.h +F: qemu-edid.c + Subsystems ---------- Audio @@ -543,6 +543,8 @@ qemu-bridge-helper$(EXESUF): qemu-bridge-helper.o $(COMMON_LDADDS) qemu-keymap$(EXESUF): qemu-keymap.o ui/input-keymap.o $(COMMON_LDADDS) +qemu-edid$(EXESUF): qemu-edid.o hw/display/edid-generate.o $(COMMON_LDADDS) + fsdev/virtfs-proxy-helper$(EXESUF): fsdev/virtfs-proxy-helper.o fsdev/9p-marshal.o fsdev/9p-iov-marshal.o $(COMMON_LDADDS) fsdev/virtfs-proxy-helper$(EXESUF): LIBS += -lcap @@ -978,7 +980,7 @@ txt: qemu-doc.txt docs/interop/qemu-qmp-ref.txt docs/interop/qemu-ga-ref.txt qemu-doc.html qemu-doc.info qemu-doc.pdf qemu-doc.txt: \ qemu-img.texi qemu-nbd.texi qemu-options.texi qemu-option-trace.texi \ - qemu-monitor.texi qemu-img-cmds.texi qemu-ga.texi \ + qemu-deprecated.texi qemu-monitor.texi qemu-img-cmds.texi qemu-ga.texi \ qemu-monitor-info.texi docs/qemu-block-drivers.texi \ docs/qemu-cpu-models.texi diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index 898c3bb3d1..9ffbbc2fbd 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -1282,8 +1282,7 @@ void tb_flush(CPUState *cpu) */ #ifdef CONFIG_USER_ONLY -static void -do_tb_invalidate_check(struct qht *ht, void *p, uint32_t hash, void *userp) +static void do_tb_invalidate_check(void *p, uint32_t hash, void *userp) { TranslationBlock *tb = p; target_ulong addr = *(target_ulong *)userp; @@ -1304,8 +1303,7 @@ static void tb_invalidate_check(target_ulong address) qht_iter(&tb_ctx.htable, do_tb_invalidate_check, &address); } -static void -do_tb_page_check(struct qht *ht, void *p, uint32_t hash, void *userp) +static void do_tb_page_check(void *p, uint32_t hash, void *userp) { TranslationBlock *tb = p; int flags1, flags2; @@ -764,6 +764,31 @@ static void bdrv_join_options(BlockDriverState *bs, QDict *options, } } +static BlockdevDetectZeroesOptions bdrv_parse_detect_zeroes(QemuOpts *opts, + int open_flags, + Error **errp) +{ + Error *local_err = NULL; + char *value = qemu_opt_get_del(opts, "detect-zeroes"); + BlockdevDetectZeroesOptions detect_zeroes = + qapi_enum_parse(&BlockdevDetectZeroesOptions_lookup, value, + BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF, &local_err); + g_free(value); + if (local_err) { + error_propagate(errp, local_err); + return detect_zeroes; + } + + if (detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP && + !(open_flags & BDRV_O_UNMAP)) + { + error_setg(errp, "setting detect-zeroes to unmap is not allowed " + "without setting discard operation to unmap"); + } + + return detect_zeroes; +} + /** * Set open flags for a given discard mode * @@ -1094,19 +1119,19 @@ static void update_flags_from_options(int *flags, QemuOpts *opts) *flags &= ~BDRV_O_CACHE_MASK; assert(qemu_opt_find(opts, BDRV_OPT_CACHE_NO_FLUSH)); - if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_NO_FLUSH, false)) { + if (qemu_opt_get_bool_del(opts, BDRV_OPT_CACHE_NO_FLUSH, false)) { *flags |= BDRV_O_NO_FLUSH; } assert(qemu_opt_find(opts, BDRV_OPT_CACHE_DIRECT)); - if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_DIRECT, false)) { + if (qemu_opt_get_bool_del(opts, BDRV_OPT_CACHE_DIRECT, false)) { *flags |= BDRV_O_NOCACHE; } *flags &= ~BDRV_O_RDWR; assert(qemu_opt_find(opts, BDRV_OPT_READ_ONLY)); - if (!qemu_opt_get_bool(opts, BDRV_OPT_READ_ONLY, false)) { + if (!qemu_opt_get_bool_del(opts, BDRV_OPT_READ_ONLY, false)) { *flags |= BDRV_O_RDWR; } @@ -1328,7 +1353,6 @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file, const char *driver_name = NULL; const char *node_name = NULL; const char *discard; - const char *detect_zeroes; QemuOpts *opts; BlockDriver *drv; Error *local_err = NULL; @@ -1417,29 +1441,12 @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file, } } - detect_zeroes = qemu_opt_get(opts, "detect-zeroes"); - if (detect_zeroes) { - BlockdevDetectZeroesOptions value = - qapi_enum_parse(&BlockdevDetectZeroesOptions_lookup, - detect_zeroes, - BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF, - &local_err); - if (local_err) { - error_propagate(errp, local_err); - ret = -EINVAL; - goto fail_opts; - } - - if (value == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP && - !(bs->open_flags & BDRV_O_UNMAP)) - { - error_setg(errp, "setting detect-zeroes to unmap is not allowed " - "without setting discard operation to unmap"); - ret = -EINVAL; - goto fail_opts; - } - - bs->detect_zeroes = value; + bs->detect_zeroes = + bdrv_parse_detect_zeroes(opts, bs->open_flags, &local_err); + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto fail_opts; } if (filename != NULL) { @@ -2763,12 +2770,15 @@ static BlockDriverState *bdrv_open_inherit(const char *filename, } } - /* Remove all children options from bs->options and bs->explicit_options */ + /* Remove all children options and references + * from bs->options and bs->explicit_options */ QLIST_FOREACH(child, &bs->children, next) { char *child_key_dot; child_key_dot = g_strdup_printf("%s.", child->name); qdict_extract_subqdict(bs->explicit_options, NULL, child_key_dot); qdict_extract_subqdict(bs->options, NULL, child_key_dot); + qdict_del(bs->explicit_options, child->name); + qdict_del(bs->options, child->name); g_free(child_key_dot); } @@ -3153,7 +3163,7 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, BlockDriver *drv; QemuOpts *opts; QDict *orig_reopen_opts; - const char *value; + char *discard = NULL; bool read_only; assert(reopen_state != NULL); @@ -3176,18 +3186,28 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, update_flags_from_options(&reopen_state->flags, opts); - /* node-name and driver must be unchanged. Put them back into the QDict, so - * that they are checked at the end of this function. */ - value = qemu_opt_get(opts, "node-name"); - if (value) { - qdict_put_str(reopen_state->options, "node-name", value); + discard = qemu_opt_get_del(opts, "discard"); + if (discard != NULL) { + if (bdrv_parse_discard_flags(discard, &reopen_state->flags) != 0) { + error_setg(errp, "Invalid discard option"); + ret = -EINVAL; + goto error; + } } - value = qemu_opt_get(opts, "driver"); - if (value) { - qdict_put_str(reopen_state->options, "driver", value); + reopen_state->detect_zeroes = + bdrv_parse_detect_zeroes(opts, reopen_state->flags, &local_err); + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto error; } + /* All other options (including node-name and driver) must be unchanged. + * Put them back into the QDict, so that they are checked at the end + * of this function. */ + qemu_opts_to_qdict(opts, reopen_state->options); + /* If we are to stay read-only, do not allow permission change * to r/w. Attempting to set to r/w may fail if either BDRV_O_ALLOW_RDWR is * not set, or if the BDS still has copy_on_read enabled */ @@ -3239,6 +3259,24 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, QObject *new = entry->value; QObject *old = qdict_get(reopen_state->bs->options, entry->key); + /* Allow child references (child_name=node_name) as long as they + * point to the current child (i.e. everything stays the same). */ + if (qobject_type(new) == QTYPE_QSTRING) { + BdrvChild *child; + QLIST_FOREACH(child, &reopen_state->bs->children, next) { + if (!strcmp(child->name, entry->key)) { + break; + } + } + + if (child) { + const char *str = qobject_get_try_str(new); + if (!strcmp(child->bs->node_name, str)) { + continue; /* Found child with this name, skip option */ + } + } + } + /* * TODO: When using -drive to specify blockdev options, all values * will be strings; however, when using -blockdev, blockdev-add or @@ -3278,6 +3316,7 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, error: qemu_opts_del(opts); qobject_unref(orig_reopen_opts); + g_free(discard); return ret; } @@ -3290,6 +3329,7 @@ void bdrv_reopen_commit(BDRVReopenState *reopen_state) { BlockDriver *drv; BlockDriverState *bs; + BdrvChild *child; bool old_can_write, new_can_write; assert(reopen_state != NULL); @@ -3313,6 +3353,14 @@ void bdrv_reopen_commit(BDRVReopenState *reopen_state) bs->options = reopen_state->options; bs->open_flags = reopen_state->flags; bs->read_only = !(reopen_state->flags & BDRV_O_RDWR); + bs->detect_zeroes = reopen_state->detect_zeroes; + + /* Remove child references from bs->options and bs->explicit_options. + * Child options were already removed in bdrv_reopen_queue_child() */ + QLIST_FOREACH(child, &bs->children, next) { + qdict_del(bs->explicit_options, child->name); + qdict_del(bs->options, child->name); + } bdrv_refresh_limits(bs, NULL); @@ -5139,23 +5187,12 @@ static bool append_open_options(QDict *d, BlockDriverState *bs) { const QDictEntry *entry; QemuOptDesc *desc; - BdrvChild *child; bool found_any = false; for (entry = qdict_first(bs->options); entry; entry = qdict_next(bs->options, entry)) { - /* Exclude node-name references to children */ - QLIST_FOREACH(child, &bs->children, next) { - if (!strcmp(entry->key, child->name)) { - break; - } - } - if (child) { - continue; - } - - /* And exclude all non-driver-specific options */ + /* Exclude all non-driver-specific options */ for (desc = bdrv_runtime_opts.desc; desc->name; desc++) { if (!strcmp(qdict_entry_key(entry), desc->name)) { break; diff --git a/block/blkreplay.c b/block/blkreplay.c index b5d9efdeca..b5d9efdeca 100755..100644 --- a/block/blkreplay.c +++ b/block/blkreplay.c diff --git a/block/block-backend.c b/block/block-backend.c index 7b1ec5071b..dc0cd57724 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -325,6 +325,9 @@ BlockBackend *blk_new(uint64_t perm, uint64_t shared_perm) blk->shared_perm = shared_perm; blk_set_enable_write_cache(blk, true); + blk->on_read_error = BLOCKDEV_ON_ERROR_REPORT; + blk->on_write_error = BLOCKDEV_ON_ERROR_ENOSPC; + block_acct_init(&blk->stats); notifier_list_init(&blk->remove_bs_notifiers); diff --git a/block/file-posix.c b/block/file-posix.c index fe83cbf0eb..2da3a76355 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -741,8 +741,6 @@ static int raw_check_lock_bytes(int fd, uint64_t perm, uint64_t shared_perm, "Failed to get \"%s\" lock", perm_name); g_free(perm_name); - error_append_hint(errp, - "Is another process using the image?\n"); return ret; } } @@ -758,8 +756,6 @@ static int raw_check_lock_bytes(int fd, uint64_t perm, uint64_t shared_perm, "Failed to get shared \"%s\" lock", perm_name); g_free(perm_name); - error_append_hint(errp, - "Is another process using the image?\n"); return ret; } } @@ -796,6 +792,9 @@ static int raw_handle_perm_lock(BlockDriverState *bs, if (!ret) { return 0; } + error_append_hint(errp, + "Is another process using the image [%s]?\n", + bs->filename); } op = RAW_PL_ABORT; /* fall through to unlock bytes. */ @@ -850,8 +849,13 @@ static int raw_reopen_prepare(BDRVReopenState *state, goto out; } - rs->check_cache_dropped = qemu_opt_get_bool(opts, "x-check-cache-dropped", - s->check_cache_dropped); + rs->check_cache_dropped = + qemu_opt_get_bool_del(opts, "x-check-cache-dropped", false); + + /* This driver's reopen function doesn't currently allow changing + * other options, so let's put them back in the original QDict and + * bdrv_reopen_prepare() will detect changes and complain. */ + qemu_opts_to_qdict(opts, state->options); if (s->type == FTYPE_CD) { rs->open_flags |= O_NONBLOCK; @@ -2217,6 +2221,9 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp) /* Step two: Check that nobody else has taken conflicting locks */ result = raw_check_lock_bytes(fd, perm, shared, errp); if (result < 0) { + error_append_hint(errp, + "Is another process using the image [%s]?\n", + file_opts->filename); goto out_unlock; } diff --git a/block/qcow2.c b/block/qcow2.c index c13153735a..7277feda13 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -777,29 +777,35 @@ static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts, uint64_t *refcount_cache_size, Error **errp) { BDRVQcow2State *s = bs->opaque; - uint64_t combined_cache_size; + uint64_t combined_cache_size, l2_cache_max_setting; bool l2_cache_size_set, refcount_cache_size_set, combined_cache_size_set; int min_refcount_cache = MIN_REFCOUNT_CACHE_SIZE * s->cluster_size; + uint64_t virtual_disk_size = bs->total_sectors * BDRV_SECTOR_SIZE; + uint64_t max_l2_cache = virtual_disk_size / (s->cluster_size / 8); combined_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_CACHE_SIZE); l2_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_L2_CACHE_SIZE); refcount_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_REFCOUNT_CACHE_SIZE); combined_cache_size = qemu_opt_get_size(opts, QCOW2_OPT_CACHE_SIZE, 0); - *l2_cache_size = qemu_opt_get_size(opts, QCOW2_OPT_L2_CACHE_SIZE, 0); + l2_cache_max_setting = qemu_opt_get_size(opts, QCOW2_OPT_L2_CACHE_SIZE, + DEFAULT_L2_CACHE_MAX_SIZE); *refcount_cache_size = qemu_opt_get_size(opts, QCOW2_OPT_REFCOUNT_CACHE_SIZE, 0); *l2_cache_entry_size = qemu_opt_get_size( opts, QCOW2_OPT_L2_CACHE_ENTRY_SIZE, s->cluster_size); + *l2_cache_size = MIN(max_l2_cache, l2_cache_max_setting); + if (combined_cache_size_set) { if (l2_cache_size_set && refcount_cache_size_set) { error_setg(errp, QCOW2_OPT_CACHE_SIZE ", " QCOW2_OPT_L2_CACHE_SIZE " and " QCOW2_OPT_REFCOUNT_CACHE_SIZE " may not be set " "at the same time"); return; - } else if (*l2_cache_size > combined_cache_size) { + } else if (l2_cache_size_set && + (l2_cache_max_setting > combined_cache_size)) { error_setg(errp, QCOW2_OPT_L2_CACHE_SIZE " may not exceed " QCOW2_OPT_CACHE_SIZE); return; @@ -814,9 +820,6 @@ static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts, } else if (refcount_cache_size_set) { *l2_cache_size = combined_cache_size - *refcount_cache_size; } else { - uint64_t virtual_disk_size = bs->total_sectors * BDRV_SECTOR_SIZE; - uint64_t max_l2_cache = virtual_disk_size / (s->cluster_size / 8); - /* Assign as much memory as possible to the L2 cache, and * use the remainder for the refcount cache */ if (combined_cache_size >= max_l2_cache + min_refcount_cache) { @@ -828,16 +831,9 @@ static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts, *l2_cache_size = combined_cache_size - *refcount_cache_size; } } - } else { - if (!l2_cache_size_set) { - *l2_cache_size = MAX(DEFAULT_L2_CACHE_BYTE_SIZE, - (uint64_t)DEFAULT_L2_CACHE_CLUSTERS - * s->cluster_size); - } - if (!refcount_cache_size_set) { - *refcount_cache_size = min_refcount_cache; - } } + /* l2_cache_size and refcount_cache_size are ensured to have at least + * their minimum values in qcow2_update_options_prepare() */ if (*l2_cache_entry_size < (1 << MIN_CLUSTER_BITS) || *l2_cache_entry_size > s->cluster_size || @@ -948,7 +944,7 @@ static int qcow2_update_options_prepare(BlockDriverState *bs, /* New interval for cache cleanup timer */ r->cache_clean_interval = qemu_opt_get_number(opts, QCOW2_OPT_CACHE_CLEAN_INTERVAL, - s->cache_clean_interval); + DEFAULT_CACHE_CLEAN_INTERVAL); #ifndef CONFIG_LINUX if (r->cache_clean_interval != 0) { error_setg(errp, QCOW2_OPT_CACHE_CLEAN_INTERVAL @@ -1328,7 +1324,7 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options, /* 2^(s->refcount_order - 3) is the refcount width in bytes */ s->refcount_block_bits = s->cluster_bits - (s->refcount_order - 3); s->refcount_block_size = 1 << s->refcount_block_bits; - bs->total_sectors = header.size / 512; + bs->total_sectors = header.size / BDRV_SECTOR_SIZE; s->csize_shift = (62 - (s->cluster_bits - 8)); s->csize_mask = (1 << (s->cluster_bits - 8)) - 1; s->cluster_offset_mask = (1LL << s->csize_shift) - 1; @@ -3422,6 +3418,7 @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, uint64_t old_length; int64_t new_l1_size; int ret; + QDict *options; if (prealloc != PREALLOC_MODE_OFF && prealloc != PREALLOC_MODE_METADATA && prealloc != PREALLOC_MODE_FALLOC && prealloc != PREALLOC_MODE_FULL) @@ -3453,7 +3450,7 @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, goto fail; } - old_length = bs->total_sectors * 512; + old_length = bs->total_sectors * BDRV_SECTOR_SIZE; new_l1_size = size_to_l1(s, offset); if (offset < old_length) { @@ -3646,6 +3643,8 @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, } } + bs->total_sectors = offset / BDRV_SECTOR_SIZE; + /* write updated header.size */ offset = cpu_to_be64(offset); ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, size), @@ -3656,6 +3655,14 @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, } s->l1_vm_state_index = new_l1_size; + + /* Update cache sizes */ + options = qdict_clone_shallow(bs->options); + ret = qcow2_update_options(bs, options, s->flags, errp); + qobject_unref(options); + if (ret < 0) { + goto fail; + } ret = 0; fail: qemu_co_mutex_unlock(&s->lock); diff --git a/block/qcow2.h b/block/qcow2.h index 81b844e936..ba430316b9 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -27,6 +27,7 @@ #include "crypto/block.h" #include "qemu/coroutine.h" +#include "qemu/units.h" //#define DEBUG_ALLOC //#define DEBUG_ALLOC2 @@ -43,11 +44,11 @@ /* 8 MB refcount table is enough for 2 PB images at 64k cluster size * (128 GB for 512 byte clusters, 2 EB for 2 MB clusters) */ -#define QCOW_MAX_REFTABLE_SIZE 0x800000 +#define QCOW_MAX_REFTABLE_SIZE S_8MiB /* 32 MB L1 table is enough for 2 PB images at 64k cluster size * (128 GB for 512 byte clusters, 2 EB for 2 MB clusters) */ -#define QCOW_MAX_L1_SIZE 0x2000000 +#define QCOW_MAX_L1_SIZE S_32MiB /* Allow for an average of 1k per snapshot table entry, should be plenty of * space for snapshot names and IDs */ @@ -73,12 +74,16 @@ /* Must be at least 4 to cover all cases of refcount table growth */ #define MIN_REFCOUNT_CACHE_SIZE 4 /* clusters */ -/* Whichever is more */ -#define DEFAULT_L2_CACHE_CLUSTERS 8 /* clusters */ -#define DEFAULT_L2_CACHE_BYTE_SIZE 1048576 /* bytes */ - -#define DEFAULT_CLUSTER_SIZE 65536 +#ifdef CONFIG_LINUX +#define DEFAULT_L2_CACHE_MAX_SIZE S_32MiB +#define DEFAULT_CACHE_CLEAN_INTERVAL 600 /* seconds */ +#else +#define DEFAULT_L2_CACHE_MAX_SIZE S_8MiB +/* Cache clean interval is currently available only on Linux, so must be 0 */ +#define DEFAULT_CACHE_CLEAN_INTERVAL 0 +#endif +#define DEFAULT_CLUSTER_SIZE S_64KiB #define QCOW2_OPT_LAZY_REFCOUNTS "lazy-refcounts" #define QCOW2_OPT_DISCARD_REQUEST "pass-discard-request" diff --git a/block/vmdk.c b/block/vmdk.c index a9d0084e36..2c9e86d98f 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -1698,6 +1698,27 @@ static int coroutine_fn vmdk_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov) { + if (bytes == 0) { + /* The caller will write bytes 0 to signal EOF. + * When receive it, we align EOF to a sector boundary. */ + BDRVVmdkState *s = bs->opaque; + int i, ret; + int64_t length; + + for (i = 0; i < s->num_extents; i++) { + length = bdrv_getlength(s->extents[i].file->bs); + if (length < 0) { + return length; + } + length = QEMU_ALIGN_UP(length, BDRV_SECTOR_SIZE); + ret = bdrv_truncate(s->extents[i].file, length, + PREALLOC_MODE_OFF, NULL); + if (ret < 0) { + return ret; + } + } + return 0; + } return vmdk_co_pwritev(bs, offset, bytes, qiov, 0); } @@ -5714,7 +5714,7 @@ fi tools="" if test "$want_tools" = "yes" ; then - tools="qemu-img\$(EXESUF) qemu-io\$(EXESUF) $tools" + tools="qemu-img\$(EXESUF) qemu-io\$(EXESUF) qemu-edid\$(EXESUF) $tools" if [ "$linux" = "yes" -o "$bsd" = "yes" -o "$solaris" = "yes" ] ; then tools="qemu-nbd\$(EXESUF) $tools" fi diff --git a/docs/COLO-FT.txt b/docs/COLO-FT.txt index d7c7dcda8f..70cfb9ce7d 100644 --- a/docs/COLO-FT.txt +++ b/docs/COLO-FT.txt @@ -104,7 +104,7 @@ Primary side. COLO Proxy: Delivers packets to Primary and Seconday, and then compare the responses from both side. Then decide whether to start a checkpoint according to some rules. -Please refer to docs/colo-proxy.txt for more informations. +Please refer to docs/colo-proxy.txt for more information. Note: HeartBeat has not been implemented yet, so you need to trigger failover process diff --git a/docs/interop/vhost-user.txt b/docs/interop/vhost-user.txt index f59667f498..c2194711d9 100644 --- a/docs/interop/vhost-user.txt +++ b/docs/interop/vhost-user.txt @@ -666,12 +666,12 @@ Master message types Equivalent ioctl: VHOST_SET_VRING_ENDIAN Master payload: vring state description - Set the endianess of a VQ for legacy devices. Little-endian is indicated + Set the endianness of a VQ for legacy devices. Little-endian is indicated with state.num set to 0 and big-endian is indicated with state.num set to 1. Other values are invalid. This request should be sent only when VHOST_USER_PROTOCOL_F_CROSS_ENDIAN has been negotiated. - Backends that negotiated this feature should handle both endianesses + Backends that negotiated this feature should handle both endiannesses and expect this message once (per VQ) during device configuration (ie. before the master starts the VQ). diff --git a/docs/qcow2-cache.txt b/docs/qcow2-cache.txt index 8a09a5cc5f..c459bf5dd3 100644 --- a/docs/qcow2-cache.txt +++ b/docs/qcow2-cache.txt @@ -79,14 +79,14 @@ Choosing the right cache sizes In order to choose the cache sizes we need to know how they relate to the amount of allocated space. -The amount of virtual disk that can be mapped by the L2 and refcount +The part of the virtual disk that can be mapped by the L2 and refcount caches (in bytes) is: disk_size = l2_cache_size * cluster_size / 8 disk_size = refcount_cache_size * cluster_size * 8 / refcount_bits With the default values for cluster_size (64KB) and refcount_bits -(16), that is +(16), this becomes: disk_size = l2_cache_size * 8192 disk_size = refcount_cache_size * 32768 @@ -97,12 +97,16 @@ need: l2_cache_size = disk_size_GB * 131072 refcount_cache_size = disk_size_GB * 32768 -QEMU has a default L2 cache of 1MB (1048576 bytes) and a refcount -cache of 256KB (262144 bytes), so using the formulas we've just seen -we have +For example, 1MB of L2 cache is needed to cover every 8 GB of the virtual +image size (given that the default cluster size is used): - 1048576 / 131072 = 8 GB of virtual disk covered by that cache - 262144 / 32768 = 8 GB + 8 GB / 8192 = 1 MB + +The refcount cache is 4 times the cluster size by default. With the default +cluster size of 64 KB, it is 256 KB (262144 bytes). This is sufficient for +8 GB of image size: + + 262144 * 32768 = 8 GB How to configure the cache sizes @@ -121,8 +125,15 @@ There are a few things that need to be taken into account: - Both caches must have a size that is a multiple of the cluster size (or the cache entry size: see "Using smaller cache sizes" below). - - The default L2 cache size is 8 clusters or 1MB (whichever is more), - and the minimum is 2 clusters (or 2 cache entries, see below). + - The maximum L2 cache size is 32 MB by default on Linux platforms (enough + for full coverage of 256 GB images, with the default cluster size). This + value can be modified using the "l2-cache-size" option. QEMU will not use + more memory than needed to hold all of the image's L2 tables, regardless + of this max. value. + On non-Linux platforms the maximal value is smaller by default (8 MB) and + this difference stems from the fact that on Linux the cache can be cleared + periodically if needed, using the "cache-clean-interval" option (see below). + The minimal L2 cache size is 2 clusters (or 2 cache entries, see below). - The default (and minimum) refcount cache size is 4 clusters. @@ -130,6 +141,9 @@ There are a few things that need to be taken into account: memory as possible to the L2 cache before increasing the refcount cache size. + - At most two of "l2-cache-size", "refcount-cache-size", and "cache-size" + can be set simultaneously. + Unlike L2 tables, refcount blocks are not used during normal I/O but only during allocations and internal snapshots. In most cases they are accessed sequentially (even during random guest I/O) so increasing the @@ -177,9 +191,10 @@ Some things to take into account: always uses the cluster size as the entry size. - If the L2 cache is big enough to hold all of the image's L2 tables - (as explained in the "Choosing the right cache sizes" section - earlier in this document) then none of this is necessary and you - can omit the "l2-cache-entry-size" parameter altogether. + (as explained in the "Choosing the right cache sizes" and "How to + configure the cache sizes" sections in this document) then none of + this is necessary and you can omit the "l2-cache-entry-size" + parameter altogether. Reducing the memory usage @@ -187,18 +202,18 @@ Reducing the memory usage It is possible to clean unused cache entries in order to reduce the memory usage during periods of low I/O activity. -The parameter "cache-clean-interval" defines an interval (in seconds). -All cache entries that haven't been accessed during that interval are -removed from memory. +The parameter "cache-clean-interval" defines an interval (in seconds), +after which all the cache entries that haven't been accessed during the +interval are removed from memory. Setting this parameter to 0 disables this +feature. -This example removes all unused cache entries every 15 minutes: +The following example removes all unused cache entries every 15 minutes: -drive file=hd.qcow2,cache-clean-interval=900 -If unset, the default value for this parameter is 0 and it disables -this feature. +If unset, the default value for this parameter is 600 on platforms which +support this functionality, and is 0 (disabled) on other platforms. -Note that this functionality currently relies on the MADV_DONTNEED -argument for madvise() to actually free the memory. This is a -Linux-specific feature, so cache-clean-interval is not supported in -other systems. +This functionality currently relies on the MADV_DONTNEED argument for +madvise() to actually free the memory. This is a Linux-specific feature, +so cache-clean-interval is not supported on other systems. diff --git a/docs/replay.txt b/docs/replay.txt index 2e21e9ccb0..3497585f5a 100644 --- a/docs/replay.txt +++ b/docs/replay.txt @@ -320,7 +320,7 @@ Here is the list of events that are written into the log: async event id from the following list: - REPLAY_ASYNC_EVENT_BH. Bottom-half callback. This event synchronizes callbacks that affect virtual machine state, but normally called - asyncronously. + asynchronously. Argument: 8-byte operation id. - REPLAY_ASYNC_EVENT_INPUT. Input device event. Contains parameters of keyboard and mouse input operations diff --git a/docs/specs/standard-vga.txt b/docs/specs/standard-vga.txt index 19d2a74509..18f75f1b30 100644 --- a/docs/specs/standard-vga.txt +++ b/docs/specs/standard-vga.txt @@ -61,7 +61,7 @@ MMIO area spec Likewise applies to the pci variant only for obvious reasons. -0000 - 03ff : reserved, for possible virtio extension. +0000 - 03ff : edid data blob. 0400 - 041f : vga ioports (0x3c0 -> 0x3df), remapped 1:1. word access is supported, bytes are written in little endia order (aka index port first), @@ -271,6 +271,19 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) info->xbzrle_cache->overflow); } + if (info->has_compression) { + monitor_printf(mon, "compression pages: %" PRIu64 " pages\n", + info->compression->pages); + monitor_printf(mon, "compression busy: %" PRIu64 "\n", + info->compression->busy); + monitor_printf(mon, "compression busy rate: %0.2f\n", + info->compression->busy_rate); + monitor_printf(mon, "compressed size: %" PRIu64 "\n", + info->compression->compressed_size); + monitor_printf(mon, "compression rate: %0.2f\n", + info->compression->compression_rate); + } + if (info->has_cpu_throttle_percentage) { monitor_printf(mon, "cpu throttle percentage: %" PRIu64 "\n", info->cpu_throttle_percentage); diff --git a/hw/core/machine.c b/hw/core/machine.c index 6b68e1218f..1987557833 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -324,6 +324,9 @@ static void machine_set_enforce_config_section(Object *obj, bool value, { MachineState *ms = MACHINE(obj); + warn_report("enforce-config-section is deprecated, please use " + "-global migration.send-configuration=on|off instead"); + ms->enforce_config_section = value; } diff --git a/hw/core/qdev.c b/hw/core/qdev.c index 36b788a66b..046d8f1f76 100644 --- a/hw/core/qdev.c +++ b/hw/core/qdev.c @@ -643,7 +643,7 @@ static void qdev_get_legacy_property(Object *obj, Visitor *v, * the string depends on the property type. Legacy properties are only * needed for "info qtree". * - * Do not use this is new code! QOM Properties added through this interface + * Do not use this in new code! QOM Properties added through this interface * will be given names in the "legacy" namespace. */ static void qdev_property_add_legacy(DeviceState *dev, Property *prop, diff --git a/hw/display/Makefile.objs b/hw/display/Makefile.objs index a606fb7404..780a76b9f0 100644 --- a/hw/display/Makefile.objs +++ b/hw/display/Makefile.objs @@ -1,3 +1,5 @@ +common-obj-y += edid-generate.o + common-obj-$(CONFIG_FW_CFG_DMA) += ramfb.o common-obj-$(CONFIG_FW_CFG_DMA) += ramfb-standalone.o @@ -13,6 +15,7 @@ common-obj-$(CONFIG_XEN) += xenfb.o common-obj-$(CONFIG_VGA_PCI) += vga-pci.o common-obj-$(CONFIG_VGA_PCI) += bochs-display.o +common-obj-$(CONFIG_VGA_PCI) += edid-region.o common-obj-$(CONFIG_VGA_ISA) += vga-isa.o common-obj-$(CONFIG_VGA_ISA_MM) += vga-isa-mm.o common-obj-$(CONFIG_VMWARE_VGA) += vmware_vga.o diff --git a/hw/display/edid-generate.c b/hw/display/edid-generate.c new file mode 100644 index 0000000000..c80397ea96 --- /dev/null +++ b/hw/display/edid-generate.c @@ -0,0 +1,439 @@ +/* + * QEMU EDID generator. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#include "qemu/osdep.h" +#include "qemu-common.h" +#include "qemu/bswap.h" +#include "hw/display/edid.h" + +static const struct edid_mode { + uint32_t xres; + uint32_t yres; + uint32_t byte; + uint32_t xtra3; + uint32_t bit; + uint32_t dta; +} modes[] = { + /* dea/dta extension timings (all @ 50 Hz) */ + { .xres = 5120, .yres = 2160, .dta = 125 }, + { .xres = 4096, .yres = 2160, .dta = 101 }, + { .xres = 3840, .yres = 2160, .dta = 96 }, + { .xres = 2560, .yres = 1080, .dta = 89 }, + { .xres = 2048, .yres = 1152 }, + { .xres = 1920, .yres = 1080, .dta = 31 }, + + /* additional standard timings 3 (all @ 60Hz) */ + { .xres = 1920, .yres = 1440, .xtra3 = 11, .bit = 5 }, + { .xres = 1920, .yres = 1200, .xtra3 = 10, .bit = 0 }, + { .xres = 1856, .yres = 1392, .xtra3 = 10, .bit = 3 }, + { .xres = 1792, .yres = 1344, .xtra3 = 10, .bit = 5 }, + { .xres = 1600, .yres = 1200, .xtra3 = 9, .bit = 2 }, + { .xres = 1680, .yres = 1050, .xtra3 = 9, .bit = 5 }, + { .xres = 1440, .yres = 1050, .xtra3 = 8, .bit = 1 }, + { .xres = 1440, .yres = 900, .xtra3 = 8, .bit = 5 }, + { .xres = 1360, .yres = 768, .xtra3 = 8, .bit = 7 }, + { .xres = 1280, .yres = 1024, .xtra3 = 7, .bit = 1 }, + { .xres = 1280, .yres = 960, .xtra3 = 7, .bit = 3 }, + { .xres = 1280, .yres = 768, .xtra3 = 7, .bit = 6 }, + + /* established timings (all @ 60Hz) */ + { .xres = 1024, .yres = 768, .byte = 36, .bit = 3 }, + { .xres = 800, .yres = 600, .byte = 35, .bit = 0 }, + { .xres = 640, .yres = 480, .byte = 35, .bit = 5 }, +}; + +static void edid_ext_dta(uint8_t *dta) +{ + dta[0] = 0x02; + dta[1] = 0x03; + dta[2] = 0x05; + dta[3] = 0x00; + + /* video data block */ + dta[4] = 0x40; +} + +static void edid_ext_dta_mode(uint8_t *dta, uint8_t nr) +{ + dta[dta[2]] = nr; + dta[2]++; + dta[4]++; +} + +static int edid_std_mode(uint8_t *mode, uint32_t xres, uint32_t yres) +{ + uint32_t aspect; + + if (xres == 0 || yres == 0) { + mode[0] = 0x01; + mode[1] = 0x01; + return 0; + + } else if (xres * 10 == yres * 16) { + aspect = 0; + } else if (xres * 3 == yres * 4) { + aspect = 1; + } else if (xres * 4 == yres * 5) { + aspect = 2; + } else if (xres * 9 == yres * 16) { + aspect = 3; + } else { + return -1; + } + + if ((xres / 8) - 31 > 255) { + return -1; + } + + mode[0] = (xres / 8) - 31; + mode[1] = ((aspect << 6) | (60 - 60)); + return 0; +} + +static void edid_fill_modes(uint8_t *edid, uint8_t *xtra3, uint8_t *dta, + uint32_t maxx, uint32_t maxy) +{ + const struct edid_mode *mode; + int std = 38; + int rc, i; + + for (i = 0; i < ARRAY_SIZE(modes); i++) { + mode = modes + i; + + if ((maxx && mode->xres > maxx) || + (maxy && mode->yres > maxy)) { + continue; + } + + if (mode->byte) { + edid[mode->byte] |= (1 << mode->bit); + } else if (mode->xtra3 && xtra3) { + xtra3[mode->xtra3] |= (1 << mode->bit); + } else if (std < 54) { + rc = edid_std_mode(edid + std, mode->xres, mode->yres); + if (rc == 0) { + std += 2; + } + } + + if (dta && mode->dta) { + edid_ext_dta_mode(dta, mode->dta); + } + } + + while (std < 54) { + edid_std_mode(edid + std, 0, 0); + std += 2; + } +} + +static void edid_checksum(uint8_t *edid) +{ + uint32_t sum = 0; + int i; + + for (i = 0; i < 127; i++) { + sum += edid[i]; + } + sum &= 0xff; + if (sum) { + edid[127] = 0x100 - sum; + } +} + +static void edid_desc_type(uint8_t *desc, uint8_t type) +{ + desc[0] = 0; + desc[1] = 0; + desc[2] = 0; + desc[3] = type; + desc[4] = 0; +} + +static void edid_desc_text(uint8_t *desc, uint8_t type, + const char *text) +{ + size_t len; + + edid_desc_type(desc, type); + memset(desc + 5, ' ', 13); + + len = strlen(text); + if (len > 12) { + len = 12; + } + strncpy((char *)(desc + 5), text, len); + desc[5 + len] = '\n'; +} + +static void edid_desc_ranges(uint8_t *desc) +{ + edid_desc_type(desc, 0xfd); + + /* vertical (50 -> 125 Hz) */ + desc[5] = 50; + desc[6] = 125; + + /* horizontal (30 -> 160 kHz) */ + desc[7] = 30; + desc[8] = 160; + + /* max dot clock (1200 MHz) */ + desc[9] = 1200 / 10; + + /* no extended timing information */ + desc[10] = 0x01; + + /* padding */ + desc[11] = '\n'; + memset(desc + 12, ' ', 6); +} + +/* additional standard timings 3 */ +static void edid_desc_xtra3_std(uint8_t *desc) +{ + edid_desc_type(desc, 0xf7); + desc[5] = 10; +} + +static void edid_desc_dummy(uint8_t *desc) +{ + edid_desc_type(desc, 0x10); +} + +static void edid_desc_timing(uint8_t *desc, + uint32_t xres, uint32_t yres, + uint32_t dpi) +{ + /* physical display size */ + uint32_t xmm = xres * dpi / 254; + uint32_t ymm = yres * dpi / 254; + + /* pull some realistic looking timings out of thin air */ + uint32_t xfront = xres * 25 / 100; + uint32_t xsync = xres * 3 / 100; + uint32_t xblank = xres * 35 / 100; + + uint32_t yfront = yres * 5 / 1000; + uint32_t ysync = yres * 5 / 1000; + uint32_t yblank = yres * 35 / 1000; + + uint32_t clock = 75 * (xres + xblank) * (yres + yblank); + + *(uint32_t *)(desc) = cpu_to_le32(clock / 10000); + + desc[2] = xres & 0xff; + desc[3] = xblank & 0xff; + desc[4] = (((xres & 0xf00) >> 4) | + ((xblank & 0xf00) >> 8)); + + desc[5] = yres & 0xff; + desc[6] = yblank & 0xff; + desc[7] = (((yres & 0xf00) >> 4) | + ((yblank & 0xf00) >> 8)); + + desc[8] = xfront & 0xff; + desc[9] = xsync & 0xff; + + desc[10] = (((yfront & 0x00f) << 4) | + ((ysync & 0x00f) << 0)); + desc[11] = (((xfront & 0x300) >> 2) | + ((xsync & 0x300) >> 4) | + ((yfront & 0x030) >> 2) | + ((ysync & 0x030) >> 4)); + + desc[12] = xmm & 0xff; + desc[13] = ymm & 0xff; + desc[14] = (((xmm & 0xf00) >> 4) | + ((ymm & 0xf00) >> 8)); + + desc[17] = 0x18; +} + +static uint32_t edid_to_10bit(float value) +{ + return (uint32_t)(value * 1024 + 0.5); +} + +static void edid_colorspace(uint8_t *edid, + float rx, float ry, + float gx, float gy, + float bx, float by, + float wx, float wy) +{ + uint32_t red_x = edid_to_10bit(rx); + uint32_t red_y = edid_to_10bit(ry); + uint32_t green_x = edid_to_10bit(gx); + uint32_t green_y = edid_to_10bit(gy); + uint32_t blue_x = edid_to_10bit(bx); + uint32_t blue_y = edid_to_10bit(by); + uint32_t white_x = edid_to_10bit(wx); + uint32_t white_y = edid_to_10bit(wy); + + edid[25] = (((red_x & 0x03) << 6) | + ((red_y & 0x03) << 4) | + ((green_x & 0x03) << 2) | + ((green_y & 0x03) << 0)); + edid[26] = (((blue_x & 0x03) << 6) | + ((blue_y & 0x03) << 4) | + ((white_x & 0x03) << 2) | + ((white_y & 0x03) << 0)); + edid[27] = red_x >> 2; + edid[28] = red_y >> 2; + edid[29] = green_x >> 2; + edid[30] = green_y >> 2; + edid[31] = blue_x >> 2; + edid[32] = blue_y >> 2; + edid[33] = white_x >> 2; + edid[34] = white_y >> 2; +} + +void qemu_edid_generate(uint8_t *edid, size_t size, + qemu_edid_info *info) +{ + uint32_t desc = 54; + uint8_t *xtra3 = NULL; + uint8_t *dta = NULL; + + /* =============== set defaults =============== */ + + if (!info->vendor || strlen(info->vendor) != 3) { + info->vendor = "EMU"; + } + if (!info->name) { + info->name = "QEMU Monitor"; + } + if (!info->dpi) { + info->dpi = 100; + } + if (!info->prefx) { + info->prefx = 1024; + } + if (!info->prefy) { + info->prefy = 768; + } + + /* =============== extensions =============== */ + + if (size >= 256) { + dta = edid + 128; + edid[126]++; + edid_ext_dta(dta); + } + + /* =============== header information =============== */ + + /* fixed */ + edid[0] = 0x00; + edid[1] = 0xff; + edid[2] = 0xff; + edid[3] = 0xff; + edid[4] = 0xff; + edid[5] = 0xff; + edid[6] = 0xff; + edid[7] = 0x00; + + /* manufacturer id, product code, serial number */ + uint16_t vendor_id = ((((info->vendor[0] - '@') & 0x1f) << 10) | + (((info->vendor[1] - '@') & 0x1f) << 5) | + (((info->vendor[2] - '@') & 0x1f) << 0)); + uint16_t model_nr = 0x1234; + uint32_t serial_nr = info->serial ? atoi(info->serial) : 0; + *(uint16_t *)(edid + 8) = cpu_to_be16(vendor_id); + *(uint16_t *)(edid + 10) = cpu_to_le16(model_nr); + *(uint32_t *)(edid + 12) = cpu_to_le32(serial_nr); + + /* manufacture week and year */ + edid[16] = 42; + edid[17] = 2014 - 1990; + + /* edid version */ + edid[18] = 1; + edid[19] = 4; + + + /* =============== basic display parameters =============== */ + + /* video input: digital, 8bpc, displayport */ + edid[20] = 0xa5; + + /* screen size: undefined */ + edid[21] = info->prefx * info->dpi / 2540; + edid[22] = info->prefy * info->dpi / 2540; + + /* display gamma: 2.2 */ + edid[23] = 220 - 100; + + /* supported features bitmap: std sRGB, preferred timing */ + edid[24] = 0x06; + + + /* =============== chromaticity coordinates =============== */ + + /* standard sRGB colorspace */ + edid_colorspace(edid, + 0.6400, 0.3300, /* red */ + 0.3000, 0.6000, /* green */ + 0.1500, 0.0600, /* blue */ + 0.3127, 0.3290); /* white point */ + + /* =============== established timing bitmap =============== */ + /* =============== standard timing information =============== */ + + /* both filled by edid_fill_modes() */ + + + /* =============== descriptor blocks =============== */ + + edid_desc_timing(edid + desc, info->prefx, info->prefy, info->dpi); + desc += 18; + + edid_desc_ranges(edid + desc); + desc += 18; + + if (info->name) { + edid_desc_text(edid + desc, 0xfc, info->name); + desc += 18; + } + + if (info->serial) { + edid_desc_text(edid + desc, 0xff, info->serial); + desc += 18; + } + + if (desc < 126) { + xtra3 = edid + desc; + edid_desc_xtra3_std(xtra3); + desc += 18; + } + + while (desc < 126) { + edid_desc_dummy(edid + desc); + desc += 18; + } + + /* =============== finish up =============== */ + + edid_fill_modes(edid, xtra3, dta, info->maxx, info->maxy); + edid_checksum(edid); + if (dta) { + edid_checksum(dta); + } +} + +size_t qemu_edid_size(uint8_t *edid) +{ + uint32_t exts; + + if (edid[0] != 0x00 || + edid[1] != 0xff) { + /* doesn't look like a valid edid block */ + return 0; + } + + exts = edid[126]; + return 128 * (exts + 1); +} diff --git a/hw/display/edid-region.c b/hw/display/edid-region.c new file mode 100644 index 0000000000..9a15734d3a --- /dev/null +++ b/hw/display/edid-region.c @@ -0,0 +1,33 @@ +#include "qemu/osdep.h" +#include "qemu-common.h" +#include "hw/display/edid.h" + +static uint64_t edid_region_read(void *ptr, hwaddr addr, unsigned size) +{ + uint8_t *edid = ptr; + + return edid[addr]; +} + +static void edid_region_write(void *ptr, hwaddr addr, + uint64_t val, unsigned size) +{ + /* read only */ +} + +static const MemoryRegionOps edid_region_ops = { + .read = edid_region_read, + .write = edid_region_write, + .valid.min_access_size = 1, + .valid.max_access_size = 4, + .impl.min_access_size = 1, + .impl.max_access_size = 1, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + +void qemu_edid_region_io(MemoryRegion *region, Object *owner, + uint8_t *edid, size_t size) +{ + memory_region_init_io(region, owner, &edid_region_ops, + edid, "edid", size); +} diff --git a/hw/display/qxl-render.c b/hw/display/qxl-render.c index c62b9a5e75..14ad2b352d 100644 --- a/hw/display/qxl-render.c +++ b/hw/display/qxl-render.c @@ -98,6 +98,8 @@ static void qxl_render_update_area_unlocked(PCIQXLDevice *qxl) { VGACommonState *vga = &qxl->vga; DisplaySurface *surface; + int width = qxl->guest_head0_width ?: qxl->guest_primary.surface.width; + int height = qxl->guest_head0_height ?: qxl->guest_primary.surface.height; int i; if (qxl->guest_primary.resized) { @@ -111,8 +113,8 @@ static void qxl_render_update_area_unlocked(PCIQXLDevice *qxl) qxl_set_rect_to_surface(qxl, &qxl->dirty[0]); qxl->num_dirty_rects = 1; trace_qxl_render_guest_primary_resized( - qxl->guest_primary.surface.width, - qxl->guest_primary.surface.height, + width, + height, qxl->guest_primary.qxl_stride, qxl->guest_primary.bytes_pp, qxl->guest_primary.bits_pp); @@ -120,15 +122,15 @@ static void qxl_render_update_area_unlocked(PCIQXLDevice *qxl) pixman_format_code_t format = qemu_default_pixman_format(qxl->guest_primary.bits_pp, true); surface = qemu_create_displaysurface_from - (qxl->guest_primary.surface.width, - qxl->guest_primary.surface.height, + (width, + height, format, qxl->guest_primary.abs_stride, qxl->guest_primary.data); } else { surface = qemu_create_displaysurface - (qxl->guest_primary.surface.width, - qxl->guest_primary.surface.height); + (width, + height); } dpy_gfx_replace_surface(vga->con, surface); } @@ -144,8 +146,8 @@ static void qxl_render_update_area_unlocked(PCIQXLDevice *qxl) qxl->dirty[i].top < 0 || qxl->dirty[i].left > qxl->dirty[i].right || qxl->dirty[i].top > qxl->dirty[i].bottom || - qxl->dirty[i].right > qxl->guest_primary.surface.width || - qxl->dirty[i].bottom > qxl->guest_primary.surface.height) { + qxl->dirty[i].right > width || + qxl->dirty[i].bottom > height) { continue; } qxl_blit(qxl, qxl->dirty+i); @@ -234,12 +236,28 @@ static QEMUCursor *qxl_cursor(PCIQXLDevice *qxl, QXLCursor *cursor, uint32_t group_id) { QEMUCursor *c; + uint8_t *and_mask, *xor_mask; size_t size; c = cursor_alloc(cursor->header.width, cursor->header.height); c->hot_x = cursor->header.hot_spot_x; c->hot_y = cursor->header.hot_spot_y; switch (cursor->header.type) { + case SPICE_CURSOR_TYPE_MONO: + /* Assume that the full cursor is available in a single chunk. */ + size = 2 * cursor_get_mono_bpl(c) * c->height; + if (size != cursor->data_size) { + fprintf(stderr, "%s: bad monochrome cursor %ux%u with size %u\n", + __func__, c->width, c->height, cursor->data_size); + goto fail; + } + and_mask = cursor->chunk.data; + xor_mask = and_mask + cursor_get_mono_bpl(c) * c->height; + cursor_set_mono(c, 0xffffff, 0x000000, xor_mask, 1, and_mask); + if (qxl->debug > 2) { + cursor_print_ascii_art(c, "qxl/mono"); + } + break; case SPICE_CURSOR_TYPE_ALPHA: size = sizeof(uint32_t) * cursor->header.width * cursor->header.height; qxl_unpack_chunks(c->data, size, qxl, &cursor->chunk, group_id); diff --git a/hw/display/qxl.c b/hw/display/qxl.c index 8e9135d9c6..747986478f 100644 --- a/hw/display/qxl.c +++ b/hw/display/qxl.c @@ -259,6 +259,8 @@ static void qxl_spice_destroy_surfaces(PCIQXLDevice *qxl, qxl_async_io async) static void qxl_spice_monitors_config_async(PCIQXLDevice *qxl, int replay) { + QXLMonitorsConfig *cfg; + trace_qxl_spice_monitors_config(qxl->id); if (replay) { /* @@ -286,6 +288,16 @@ static void qxl_spice_monitors_config_async(PCIQXLDevice *qxl, int replay) (uintptr_t)qxl_cookie_new(QXL_COOKIE_TYPE_IO, QXL_IO_MONITORS_CONFIG_ASYNC)); } + + cfg = qxl_phys2virt(qxl, qxl->guest_monitors_config, MEMSLOT_GROUP_GUEST); + if (cfg->count == 1) { + qxl->guest_primary.resized = 1; + qxl->guest_head0_width = cfg->heads[0].width; + qxl->guest_head0_height = cfg->heads[0].height; + } else { + qxl->guest_head0_width = 0; + qxl->guest_head0_height = 0; + } } void qxl_spice_reset_image_cache(PCIQXLDevice *qxl) diff --git a/hw/display/qxl.h b/hw/display/qxl.h index 6eacba080d..dd9c0522b7 100644 --- a/hw/display/qxl.h +++ b/hw/display/qxl.h @@ -78,6 +78,8 @@ typedef struct PCIQXLDevice { QXLPHYSICAL guest_cursor; QXLPHYSICAL guest_monitors_config; + uint32_t guest_head0_width; + uint32_t guest_head0_height; QemuMutex track_lock; diff --git a/hw/display/vga-pci.c b/hw/display/vga-pci.c index e9e62eac70..24ca1b3e1f 100644 --- a/hw/display/vga-pci.c +++ b/hw/display/vga-pci.c @@ -30,18 +30,22 @@ #include "ui/pixel_ops.h" #include "qemu/timer.h" #include "hw/loader.h" +#include "hw/display/edid.h" enum vga_pci_flags { PCI_VGA_FLAG_ENABLE_MMIO = 1, PCI_VGA_FLAG_ENABLE_QEXT = 2, + PCI_VGA_FLAG_ENABLE_EDID = 3, }; typedef struct PCIVGAState { PCIDevice dev; VGACommonState vga; uint32_t flags; + qemu_edid_info edid_info; MemoryRegion mmio; - MemoryRegion mrs[3]; + MemoryRegion mrs[4]; + uint8_t edid[256]; } PCIVGAState; #define TYPE_PCI_VGA "pci-vga" @@ -195,8 +199,10 @@ void pci_std_vga_mmio_region_init(VGACommonState *s, Object *owner, MemoryRegion *parent, MemoryRegion *subs, - bool qext) + bool qext, bool edid) { + PCIVGAState *d = container_of(s, PCIVGAState, vga); + memory_region_init_io(&subs[0], owner, &pci_vga_ioport_ops, s, "vga ioports remapped", PCI_VGA_IOPORT_SIZE); memory_region_add_subregion(parent, PCI_VGA_IOPORT_OFFSET, @@ -213,6 +219,12 @@ void pci_std_vga_mmio_region_init(VGACommonState *s, memory_region_add_subregion(parent, PCI_VGA_QEXT_OFFSET, &subs[2]); } + + if (edid) { + qemu_edid_generate(d->edid, sizeof(d->edid), &d->edid_info); + qemu_edid_region_io(&subs[3], owner, d->edid, sizeof(d->edid)); + memory_region_add_subregion(parent, 0, &subs[3]); + } } static void pci_std_vga_realize(PCIDevice *dev, Error **errp) @@ -220,6 +232,7 @@ static void pci_std_vga_realize(PCIDevice *dev, Error **errp) PCIVGAState *d = PCI_VGA(dev); VGACommonState *s = &d->vga; bool qext = false; + bool edid = false; /* vga + console init */ vga_common_init(s, OBJECT(dev)); @@ -240,7 +253,11 @@ static void pci_std_vga_realize(PCIDevice *dev, Error **errp) qext = true; pci_set_byte(&d->dev.config[PCI_REVISION_ID], 2); } - pci_std_vga_mmio_region_init(s, OBJECT(dev), &d->mmio, d->mrs, qext); + if (d->flags & (1 << PCI_VGA_FLAG_ENABLE_EDID)) { + edid = true; + } + pci_std_vga_mmio_region_init(s, OBJECT(dev), &d->mmio, d->mrs, + qext, edid); pci_register_bar(&d->dev, 2, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio); } @@ -263,6 +280,7 @@ static void pci_secondary_vga_realize(PCIDevice *dev, Error **errp) PCIVGAState *d = PCI_VGA(dev); VGACommonState *s = &d->vga; bool qext = false; + bool edid = false; /* vga + console init */ vga_common_init(s, OBJECT(dev)); @@ -276,7 +294,10 @@ static void pci_secondary_vga_realize(PCIDevice *dev, Error **errp) qext = true; pci_set_byte(&d->dev.config[PCI_REVISION_ID], 2); } - pci_std_vga_mmio_region_init(s, OBJECT(dev), &d->mmio, d->mrs, qext); + if (d->flags & (1 << PCI_VGA_FLAG_ENABLE_EDID)) { + edid = true; + } + pci_std_vga_mmio_region_init(s, OBJECT(dev), &d->mmio, d->mrs, qext, edid); pci_register_bar(&d->dev, 0, PCI_BASE_ADDRESS_MEM_PREFETCH, &s->vram); pci_register_bar(&d->dev, 2, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio); @@ -308,6 +329,9 @@ static Property vga_pci_properties[] = { DEFINE_PROP_BIT("mmio", PCIVGAState, flags, PCI_VGA_FLAG_ENABLE_MMIO, true), DEFINE_PROP_BIT("qemu-extended-regs", PCIVGAState, flags, PCI_VGA_FLAG_ENABLE_QEXT, true), + DEFINE_PROP_BIT("edid", + PCIVGAState, flags, PCI_VGA_FLAG_ENABLE_EDID, false), + DEFINE_EDID_PROPERTIES(PCIVGAState, edid_info), DEFINE_PROP_BOOL("global-vmstate", PCIVGAState, vga.global_vmstate, false), DEFINE_PROP_END_OF_LIST(), }; @@ -316,6 +340,9 @@ static Property secondary_pci_properties[] = { DEFINE_PROP_UINT32("vgamem_mb", PCIVGAState, vga.vram_size_mb, 16), DEFINE_PROP_BIT("qemu-extended-regs", PCIVGAState, flags, PCI_VGA_FLAG_ENABLE_QEXT, true), + DEFINE_PROP_BIT("edid", + PCIVGAState, flags, PCI_VGA_FLAG_ENABLE_EDID, false), + DEFINE_EDID_PROPERTIES(PCIVGAState, edid_info), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/display/vga_int.h b/hw/display/vga_int.h index 339661bc01..6e4fa48a79 100644 --- a/hw/display/vga_int.h +++ b/hw/display/vga_int.h @@ -197,6 +197,6 @@ void pci_std_vga_mmio_region_init(VGACommonState *s, Object *owner, MemoryRegion *parent, MemoryRegion *subs, - bool qext); + bool qext, bool edid); #endif diff --git a/hw/display/virtio-vga.c b/hw/display/virtio-vga.c index 1e601c1a3b..ab2e369b28 100644 --- a/hw/display/virtio-vga.c +++ b/hw/display/virtio-vga.c @@ -153,7 +153,7 @@ static void virtio_vga_realize(VirtIOPCIProxy *vpci_dev, Error **errp) /* add stdvga mmio regions */ pci_std_vga_mmio_region_init(vga, OBJECT(vvga), &vpci_dev->modern_bar, - vvga->vga_mrs, true); + vvga->vga_mrs, true, false); vga->con = g->scanout[0].con; graphic_console_set_hwops(vga->con, &virtio_vga_ops, vvga); diff --git a/hw/usb/dev-hub.c b/hw/usb/dev-hub.c index 5d9743ef93..dc368179d1 100644 --- a/hw/usb/dev-hub.c +++ b/hw/usb/dev-hub.c @@ -191,6 +191,10 @@ static void usb_hub_detach(USBPort *port1) port->wPortStatus &= ~PORT_STAT_ENABLE; port->wPortChange |= PORT_STAT_C_ENABLE; } + if (port->wPortStatus & PORT_STAT_SUSPEND) { + port->wPortStatus &= ~PORT_STAT_SUSPEND; + port->wPortChange |= PORT_STAT_C_SUSPEND; + } usb_wakeup(s->intr, 0); } diff --git a/hw/usb/dev-mtp.c b/hw/usb/dev-mtp.c index 3fdc4b0da1..00a3691bae 100644 --- a/hw/usb/dev-mtp.c +++ b/hw/usb/dev-mtp.c @@ -1568,6 +1568,7 @@ static void usb_mtp_handle_control(USBDevice *dev, USBPacket *p, if (s->write_pending) { g_free(s->dataset.filename); s->write_pending = false; + s->dataset.size = 0; } usb_mtp_data_free(s->data_out); s->data_out = NULL; @@ -1665,13 +1666,14 @@ static void usb_mtp_write_data(MTPState *s) goto success; } - rc = write_retry(d->fd, d->data, s->dataset.size); - if (!rc) { + rc = write_retry(d->fd, d->data, d->offset); + if (rc != d->offset) { usb_mtp_queue_result(s, RES_STORE_FULL, d->trans, 0, 0, 0, 0); goto done; } - if (rc != s->dataset.size) { + /* Only for < 4G file sizes */ + if (s->dataset.size != 0xFFFFFFFF && rc != s->dataset.size) { usb_mtp_queue_result(s, RES_INCOMPLETE_TRANSFER, d->trans, 0, 0, 0, 0); goto done; @@ -1692,6 +1694,7 @@ done: } free: g_free(s->dataset.filename); + s->dataset.size = 0; g_free(path); s->write_pending = false; } diff --git a/hw/usb/hcd-ohci.c b/hw/usb/hcd-ohci.c index 98da5f0f04..66656a1133 100644 --- a/hw/usb/hcd-ohci.c +++ b/hw/usb/hcd-ohci.c @@ -1253,12 +1253,12 @@ static int ohci_service_ed_list(OHCIState *ohci, uint32_t head, int completion) /* set a timer for EOF */ static void ohci_eof_timer(OHCIState *ohci) { - ohci->sof_time = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); timer_mod(ohci->eof_timer, ohci->sof_time + usb_frame_time); } /* Set a timer for EOF and generate a SOF event */ static void ohci_sof(OHCIState *ohci) { + ohci->sof_time += usb_frame_time; ohci_eof_timer(ohci); ohci_set_interrupt(ohci, OHCI_INTR_SF); } @@ -1362,6 +1362,7 @@ static int ohci_bus_start(OHCIState *ohci) * can meet some race conditions */ + ohci->sof_time = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); ohci_eof_timer(ohci); return 1; @@ -1476,6 +1477,9 @@ static uint32_t ohci_get_frame_remaining(OHCIState *ohci) * set already. */ tks = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) - ohci->sof_time; + if (tks < 0) { + tks = 0; + } /* avoid muldiv if possible */ if (tks >= usb_frame_time) diff --git a/include/block/block.h b/include/block/block.h index 4edc1e8afa..b189cf422e 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -184,6 +184,7 @@ typedef QSIMPLEQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue; typedef struct BDRVReopenState { BlockDriverState *bs; int flags; + BlockdevDetectZeroesOptions detect_zeroes; uint64_t perm, shared_perm; QDict *options; QDict *explicit_options; diff --git a/include/hw/display/edid.h b/include/hw/display/edid.h new file mode 100644 index 0000000000..bd51d26916 --- /dev/null +++ b/include/hw/display/edid.h @@ -0,0 +1,27 @@ +#ifndef EDID_H +#define EDID_H + +#include "hw/hw.h" + +typedef struct qemu_edid_info { + const char *vendor; + const char *name; + const char *serial; + uint32_t dpi; + uint32_t prefx; + uint32_t prefy; + uint32_t maxx; + uint32_t maxy; +} qemu_edid_info; + +void qemu_edid_generate(uint8_t *edid, size_t size, + qemu_edid_info *info); +size_t qemu_edid_size(uint8_t *edid); +void qemu_edid_region_io(MemoryRegion *region, Object *owner, + uint8_t *edid, size_t size); + +#define DEFINE_EDID_PROPERTIES(_state, _edid_info) \ + DEFINE_PROP_UINT32("xres", _state, _edid_info.prefx, 0), \ + DEFINE_PROP_UINT32("yres", _state, _edid_info.prefy, 0) + +#endif /* EDID_H */ diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h index f1fd0f8736..a24d0dd566 100644 --- a/include/hw/qdev-core.h +++ b/include/hw/qdev-core.h @@ -51,8 +51,9 @@ struct VMStateDescription; * Devices are constructed in two stages, * 1) object instantiation via object_initialize() and * 2) device realization via #DeviceState:realized property. - * The former may not fail (it might assert or exit), the latter may return - * error information to the caller and must be re-entrant. + * The former may not fail (and must not abort or exit, since it is called + * during device introspection already), and the latter may return error + * information to the caller and must be re-entrant. * Trivial field initializations should go into #TypeInfo.instance_init. * Operations depending on @props static properties should go into @realize. * After successful realization, setting static properties will fail. diff --git a/include/qemu/qht.h b/include/qemu/qht.h index c9a11cc29a..758c7ac6c8 100644 --- a/include/qemu/qht.h +++ b/include/qemu/qht.h @@ -43,7 +43,8 @@ struct qht_stats { }; typedef bool (*qht_lookup_func_t)(const void *obj, const void *userp); -typedef void (*qht_iter_func_t)(struct qht *ht, void *p, uint32_t h, void *up); +typedef void (*qht_iter_func_t)(void *p, uint32_t h, void *up); +typedef bool (*qht_iter_bool_func_t)(void *p, uint32_t h, void *up); #define QHT_MODE_AUTO_RESIZE 0x1 /* auto-resize when heavily loaded */ #define QHT_MODE_RAW_MUTEXES 0x2 /* bypass the profiler (QSP) */ @@ -103,7 +104,7 @@ bool qht_insert(struct qht *ht, void *p, uint32_t hash, void **existing); * Returns the corresponding pointer when a match is found. * Returns NULL otherwise. */ -void *qht_lookup_custom(struct qht *ht, const void *userp, uint32_t hash, +void *qht_lookup_custom(const struct qht *ht, const void *userp, uint32_t hash, qht_lookup_func_t func); /** @@ -114,7 +115,7 @@ void *qht_lookup_custom(struct qht *ht, const void *userp, uint32_t hash, * * Calls qht_lookup_custom() using @ht's default comparison function. */ -void *qht_lookup(struct qht *ht, const void *userp, uint32_t hash); +void *qht_lookup(const struct qht *ht, const void *userp, uint32_t hash); /** * qht_remove - remove a pointer from the hash table @@ -179,10 +180,27 @@ bool qht_resize(struct qht *ht, size_t n_elems); * * Each time it is called, user-provided @func is passed a pointer-hash pair, * plus @userp. + * + * Note: @ht cannot be accessed from @func + * See also: qht_iter_remove() */ void qht_iter(struct qht *ht, qht_iter_func_t func, void *userp); /** + * qht_iter_remove - Iterate over a QHT, optionally removing entries + * @ht: QHT to be iterated over + * @func: function to be called for each entry in QHT + * @userp: additional pointer to be passed to @func + * + * Each time it is called, user-provided @func is passed a pointer-hash pair, + * plus @userp. If @func returns true, the pointer-hash pair is removed. + * + * Note: @ht cannot be accessed from @func + * See also: qht_iter() + */ +void qht_iter_remove(struct qht *ht, qht_iter_bool_func_t func, void *userp); + +/** * qht_statistics_init - Gather statistics from a QHT * @ht: QHT to gather statistics from * @stats: pointer to a &struct qht_stats to be filled in @@ -193,7 +211,7 @@ void qht_iter(struct qht *ht, qht_iter_func_t func, void *userp); * When done with @stats, pass the struct to qht_statistics_destroy(). * Failing to do this will leak memory. */ -void qht_statistics_init(struct qht *ht, struct qht_stats *stats); +void qht_statistics_init(const struct qht *ht, struct qht_stats *stats); /** * qht_statistics_destroy - Destroy a &struct qht_stats diff --git a/include/qemu/units.h b/include/qemu/units.h index 692db3fbb2..68a7758650 100644 --- a/include/qemu/units.h +++ b/include/qemu/units.h @@ -17,4 +17,59 @@ #define PiB (INT64_C(1) << 50) #define EiB (INT64_C(1) << 60) +#define S_1KiB 1024 +#define S_2KiB 2048 +#define S_4KiB 4096 +#define S_8KiB 8192 +#define S_16KiB 16384 +#define S_32KiB 32768 +#define S_64KiB 65536 +#define S_128KiB 131072 +#define S_256KiB 262144 +#define S_512KiB 524288 +#define S_1MiB 1048576 +#define S_2MiB 2097152 +#define S_4MiB 4194304 +#define S_8MiB 8388608 +#define S_16MiB 16777216 +#define S_32MiB 33554432 +#define S_64MiB 67108864 +#define S_128MiB 134217728 +#define S_256MiB 268435456 +#define S_512MiB 536870912 +#define S_1GiB 1073741824 +#define S_2GiB 2147483648 +#define S_4GiB 4294967296 +#define S_8GiB 8589934592 +#define S_16GiB 17179869184 +#define S_32GiB 34359738368 +#define S_64GiB 68719476736 +#define S_128GiB 137438953472 +#define S_256GiB 274877906944 +#define S_512GiB 549755813888 +#define S_1TiB 1099511627776 +#define S_2TiB 2199023255552 +#define S_4TiB 4398046511104 +#define S_8TiB 8796093022208 +#define S_16TiB 17592186044416 +#define S_32TiB 35184372088832 +#define S_64TiB 70368744177664 +#define S_128TiB 140737488355328 +#define S_256TiB 281474976710656 +#define S_512TiB 562949953421312 +#define S_1PiB 1125899906842624 +#define S_2PiB 2251799813685248 +#define S_4PiB 4503599627370496 +#define S_8PiB 9007199254740992 +#define S_16PiB 18014398509481984 +#define S_32PiB 36028797018963968 +#define S_64PiB 72057594037927936 +#define S_128PiB 144115188075855872 +#define S_256PiB 288230376151711744 +#define S_512PiB 576460752303423488 +#define S_1EiB 1152921504606846976 +#define S_2EiB 2305843009213693952 +#define S_4EiB 4611686018427387904 +#define S_8EiB 9223372036854775808 + #endif diff --git a/linux-user/Makefile.objs b/linux-user/Makefile.objs index b5dfb71f25..769b8d8336 100644 --- a/linux-user/Makefile.objs +++ b/linux-user/Makefile.objs @@ -1,7 +1,7 @@ obj-y = main.o syscall.o strace.o mmap.o signal.o \ elfload.o linuxload.o uaccess.o uname.o \ safe-syscall.o $(TARGET_ABI_DIR)/signal.o \ - $(TARGET_ABI_DIR)/cpu_loop.o exit.o + $(TARGET_ABI_DIR)/cpu_loop.o exit.o fd-trans.o obj-$(TARGET_HAS_BFLT) += flatload.o obj-$(TARGET_I386) += vm86.o diff --git a/linux-user/elfload.c b/linux-user/elfload.c index e97c4cde49..10bca65b99 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -1439,7 +1439,10 @@ struct exec #define QMAGIC 0314 /* Necessary parameters */ -#define TARGET_ELF_EXEC_PAGESIZE TARGET_PAGE_SIZE +#define TARGET_ELF_EXEC_PAGESIZE \ + (((eppnt->p_align & ~qemu_host_page_mask) != 0) ? \ + TARGET_PAGE_SIZE : MAX(qemu_host_page_size, TARGET_PAGE_SIZE)) +#define TARGET_ELF_PAGELENGTH(_v) ROUND_UP((_v), TARGET_ELF_EXEC_PAGESIZE) #define TARGET_ELF_PAGESTART(_v) ((_v) & \ ~(abi_ulong)(TARGET_ELF_EXEC_PAGESIZE-1)) #define TARGET_ELF_PAGEOFFSET(_v) ((_v) & (TARGET_ELF_EXEC_PAGESIZE-1)) @@ -2281,7 +2284,7 @@ static void load_elf_image(const char *image_name, int image_fd, for (i = 0; i < ehdr->e_phnum; i++) { struct elf_phdr *eppnt = phdr + i; if (eppnt->p_type == PT_LOAD) { - abi_ulong vaddr, vaddr_po, vaddr_ps, vaddr_ef, vaddr_em; + abi_ulong vaddr, vaddr_po, vaddr_ps, vaddr_ef, vaddr_em, vaddr_len; int elf_prot = 0; if (eppnt->p_flags & PF_R) elf_prot = PROT_READ; @@ -2291,8 +2294,9 @@ static void load_elf_image(const char *image_name, int image_fd, vaddr = load_bias + eppnt->p_vaddr; vaddr_po = TARGET_ELF_PAGEOFFSET(vaddr); vaddr_ps = TARGET_ELF_PAGESTART(vaddr); + vaddr_len = TARGET_ELF_PAGELENGTH(eppnt->p_filesz + vaddr_po); - error = target_mmap(vaddr_ps, eppnt->p_filesz + vaddr_po, + error = target_mmap(vaddr_ps, vaddr_len, elf_prot, MAP_PRIVATE | MAP_FIXED, image_fd, eppnt->p_offset - vaddr_po); if (error == -1) { diff --git a/linux-user/fd-trans.c b/linux-user/fd-trans.c new file mode 100644 index 0000000000..216b9f0614 --- /dev/null +++ b/linux-user/fd-trans.c @@ -0,0 +1,1409 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" + +#include <sys/signalfd.h> +#include <linux/unistd.h> +#include <linux/audit.h> +#ifdef CONFIG_INOTIFY +#include <sys/inotify.h> +#endif +#include <linux/netlink.h> +#ifdef CONFIG_RTNETLINK +#include <linux/rtnetlink.h> +#include <linux/if_bridge.h> +#endif +#include "qemu.h" +#include "fd-trans.h" + +enum { + QEMU_IFLA_BR_UNSPEC, + QEMU_IFLA_BR_FORWARD_DELAY, + QEMU_IFLA_BR_HELLO_TIME, + QEMU_IFLA_BR_MAX_AGE, + QEMU_IFLA_BR_AGEING_TIME, + QEMU_IFLA_BR_STP_STATE, + QEMU_IFLA_BR_PRIORITY, + QEMU_IFLA_BR_VLAN_FILTERING, + QEMU_IFLA_BR_VLAN_PROTOCOL, + QEMU_IFLA_BR_GROUP_FWD_MASK, + QEMU_IFLA_BR_ROOT_ID, + QEMU_IFLA_BR_BRIDGE_ID, + QEMU_IFLA_BR_ROOT_PORT, + QEMU_IFLA_BR_ROOT_PATH_COST, + QEMU_IFLA_BR_TOPOLOGY_CHANGE, + QEMU_IFLA_BR_TOPOLOGY_CHANGE_DETECTED, + QEMU_IFLA_BR_HELLO_TIMER, + QEMU_IFLA_BR_TCN_TIMER, + QEMU_IFLA_BR_TOPOLOGY_CHANGE_TIMER, + QEMU_IFLA_BR_GC_TIMER, + QEMU_IFLA_BR_GROUP_ADDR, + QEMU_IFLA_BR_FDB_FLUSH, + QEMU_IFLA_BR_MCAST_ROUTER, + QEMU_IFLA_BR_MCAST_SNOOPING, + QEMU_IFLA_BR_MCAST_QUERY_USE_IFADDR, + QEMU_IFLA_BR_MCAST_QUERIER, + QEMU_IFLA_BR_MCAST_HASH_ELASTICITY, + QEMU_IFLA_BR_MCAST_HASH_MAX, + QEMU_IFLA_BR_MCAST_LAST_MEMBER_CNT, + QEMU_IFLA_BR_MCAST_STARTUP_QUERY_CNT, + QEMU_IFLA_BR_MCAST_LAST_MEMBER_INTVL, + QEMU_IFLA_BR_MCAST_MEMBERSHIP_INTVL, + QEMU_IFLA_BR_MCAST_QUERIER_INTVL, + QEMU_IFLA_BR_MCAST_QUERY_INTVL, + QEMU_IFLA_BR_MCAST_QUERY_RESPONSE_INTVL, + QEMU_IFLA_BR_MCAST_STARTUP_QUERY_INTVL, + QEMU_IFLA_BR_NF_CALL_IPTABLES, + QEMU_IFLA_BR_NF_CALL_IP6TABLES, + QEMU_IFLA_BR_NF_CALL_ARPTABLES, + QEMU_IFLA_BR_VLAN_DEFAULT_PVID, + QEMU_IFLA_BR_PAD, + QEMU_IFLA_BR_VLAN_STATS_ENABLED, + QEMU_IFLA_BR_MCAST_STATS_ENABLED, + QEMU_IFLA_BR_MCAST_IGMP_VERSION, + QEMU_IFLA_BR_MCAST_MLD_VERSION, + QEMU___IFLA_BR_MAX, +}; + +enum { + QEMU_IFLA_UNSPEC, + QEMU_IFLA_ADDRESS, + QEMU_IFLA_BROADCAST, + QEMU_IFLA_IFNAME, + QEMU_IFLA_MTU, + QEMU_IFLA_LINK, + QEMU_IFLA_QDISC, + QEMU_IFLA_STATS, + QEMU_IFLA_COST, + QEMU_IFLA_PRIORITY, + QEMU_IFLA_MASTER, + QEMU_IFLA_WIRELESS, + QEMU_IFLA_PROTINFO, + QEMU_IFLA_TXQLEN, + QEMU_IFLA_MAP, + QEMU_IFLA_WEIGHT, + QEMU_IFLA_OPERSTATE, + QEMU_IFLA_LINKMODE, + QEMU_IFLA_LINKINFO, + QEMU_IFLA_NET_NS_PID, + QEMU_IFLA_IFALIAS, + QEMU_IFLA_NUM_VF, + QEMU_IFLA_VFINFO_LIST, + QEMU_IFLA_STATS64, + QEMU_IFLA_VF_PORTS, + QEMU_IFLA_PORT_SELF, + QEMU_IFLA_AF_SPEC, + QEMU_IFLA_GROUP, + QEMU_IFLA_NET_NS_FD, + QEMU_IFLA_EXT_MASK, + QEMU_IFLA_PROMISCUITY, + QEMU_IFLA_NUM_TX_QUEUES, + QEMU_IFLA_NUM_RX_QUEUES, + QEMU_IFLA_CARRIER, + QEMU_IFLA_PHYS_PORT_ID, + QEMU_IFLA_CARRIER_CHANGES, + QEMU_IFLA_PHYS_SWITCH_ID, + QEMU_IFLA_LINK_NETNSID, + QEMU_IFLA_PHYS_PORT_NAME, + QEMU_IFLA_PROTO_DOWN, + QEMU_IFLA_GSO_MAX_SEGS, + QEMU_IFLA_GSO_MAX_SIZE, + QEMU_IFLA_PAD, + QEMU_IFLA_XDP, + QEMU_IFLA_EVENT, + QEMU_IFLA_NEW_NETNSID, + QEMU_IFLA_IF_NETNSID, + QEMU_IFLA_CARRIER_UP_COUNT, + QEMU_IFLA_CARRIER_DOWN_COUNT, + QEMU_IFLA_NEW_IFINDEX, + QEMU___IFLA_MAX +}; + +enum { + QEMU_IFLA_BRPORT_UNSPEC, + QEMU_IFLA_BRPORT_STATE, + QEMU_IFLA_BRPORT_PRIORITY, + QEMU_IFLA_BRPORT_COST, + QEMU_IFLA_BRPORT_MODE, + QEMU_IFLA_BRPORT_GUARD, + QEMU_IFLA_BRPORT_PROTECT, + QEMU_IFLA_BRPORT_FAST_LEAVE, + QEMU_IFLA_BRPORT_LEARNING, + QEMU_IFLA_BRPORT_UNICAST_FLOOD, + QEMU_IFLA_BRPORT_PROXYARP, + QEMU_IFLA_BRPORT_LEARNING_SYNC, + QEMU_IFLA_BRPORT_PROXYARP_WIFI, + QEMU_IFLA_BRPORT_ROOT_ID, + QEMU_IFLA_BRPORT_BRIDGE_ID, + QEMU_IFLA_BRPORT_DESIGNATED_PORT, + QEMU_IFLA_BRPORT_DESIGNATED_COST, + QEMU_IFLA_BRPORT_ID, + QEMU_IFLA_BRPORT_NO, + QEMU_IFLA_BRPORT_TOPOLOGY_CHANGE_ACK, + QEMU_IFLA_BRPORT_CONFIG_PENDING, + QEMU_IFLA_BRPORT_MESSAGE_AGE_TIMER, + QEMU_IFLA_BRPORT_FORWARD_DELAY_TIMER, + QEMU_IFLA_BRPORT_HOLD_TIMER, + QEMU_IFLA_BRPORT_FLUSH, + QEMU_IFLA_BRPORT_MULTICAST_ROUTER, + QEMU_IFLA_BRPORT_PAD, + QEMU_IFLA_BRPORT_MCAST_FLOOD, + QEMU_IFLA_BRPORT_MCAST_TO_UCAST, + QEMU_IFLA_BRPORT_VLAN_TUNNEL, + QEMU_IFLA_BRPORT_BCAST_FLOOD, + QEMU_IFLA_BRPORT_GROUP_FWD_MASK, + QEMU_IFLA_BRPORT_NEIGH_SUPPRESS, + QEMU___IFLA_BRPORT_MAX +}; + +enum { + QEMU_IFLA_TUN_UNSPEC, + QEMU_IFLA_TUN_OWNER, + QEMU_IFLA_TUN_GROUP, + QEMU_IFLA_TUN_TYPE, + QEMU_IFLA_TUN_PI, + QEMU_IFLA_TUN_VNET_HDR, + QEMU_IFLA_TUN_PERSIST, + QEMU_IFLA_TUN_MULTI_QUEUE, + QEMU_IFLA_TUN_NUM_QUEUES, + QEMU_IFLA_TUN_NUM_DISABLED_QUEUES, + QEMU___IFLA_TUN_MAX, +}; + +enum { + QEMU_IFLA_INFO_UNSPEC, + QEMU_IFLA_INFO_KIND, + QEMU_IFLA_INFO_DATA, + QEMU_IFLA_INFO_XSTATS, + QEMU_IFLA_INFO_SLAVE_KIND, + QEMU_IFLA_INFO_SLAVE_DATA, + QEMU___IFLA_INFO_MAX, +}; + +enum { + QEMU_IFLA_INET_UNSPEC, + QEMU_IFLA_INET_CONF, + QEMU___IFLA_INET_MAX, +}; + +enum { + QEMU_IFLA_INET6_UNSPEC, + QEMU_IFLA_INET6_FLAGS, + QEMU_IFLA_INET6_CONF, + QEMU_IFLA_INET6_STATS, + QEMU_IFLA_INET6_MCAST, + QEMU_IFLA_INET6_CACHEINFO, + QEMU_IFLA_INET6_ICMP6STATS, + QEMU_IFLA_INET6_TOKEN, + QEMU_IFLA_INET6_ADDR_GEN_MODE, + QEMU___IFLA_INET6_MAX +}; + +enum { + QEMU_IFLA_XDP_UNSPEC, + QEMU_IFLA_XDP_FD, + QEMU_IFLA_XDP_ATTACHED, + QEMU_IFLA_XDP_FLAGS, + QEMU_IFLA_XDP_PROG_ID, + QEMU___IFLA_XDP_MAX, +}; + +enum { + QEMU_RTA_UNSPEC, + QEMU_RTA_DST, + QEMU_RTA_SRC, + QEMU_RTA_IIF, + QEMU_RTA_OIF, + QEMU_RTA_GATEWAY, + QEMU_RTA_PRIORITY, + QEMU_RTA_PREFSRC, + QEMU_RTA_METRICS, + QEMU_RTA_MULTIPATH, + QEMU_RTA_PROTOINFO, /* no longer used */ + QEMU_RTA_FLOW, + QEMU_RTA_CACHEINFO, + QEMU_RTA_SESSION, /* no longer used */ + QEMU_RTA_MP_ALGO, /* no longer used */ + QEMU_RTA_TABLE, + QEMU_RTA_MARK, + QEMU_RTA_MFC_STATS, + QEMU_RTA_VIA, + QEMU_RTA_NEWDST, + QEMU_RTA_PREF, + QEMU_RTA_ENCAP_TYPE, + QEMU_RTA_ENCAP, + QEMU_RTA_EXPIRES, + QEMU_RTA_PAD, + QEMU_RTA_UID, + QEMU_RTA_TTL_PROPAGATE, + QEMU_RTA_IP_PROTO, + QEMU_RTA_SPORT, + QEMU_RTA_DPORT, + QEMU___RTA_MAX +}; + +TargetFdTrans **target_fd_trans; +unsigned int target_fd_max; + +static void tswap_nlmsghdr(struct nlmsghdr *nlh) +{ + nlh->nlmsg_len = tswap32(nlh->nlmsg_len); + nlh->nlmsg_type = tswap16(nlh->nlmsg_type); + nlh->nlmsg_flags = tswap16(nlh->nlmsg_flags); + nlh->nlmsg_seq = tswap32(nlh->nlmsg_seq); + nlh->nlmsg_pid = tswap32(nlh->nlmsg_pid); +} + +static abi_long host_to_target_for_each_nlmsg(struct nlmsghdr *nlh, + size_t len, + abi_long (*host_to_target_nlmsg) + (struct nlmsghdr *)) +{ + uint32_t nlmsg_len; + abi_long ret; + + while (len > sizeof(struct nlmsghdr)) { + + nlmsg_len = nlh->nlmsg_len; + if (nlmsg_len < sizeof(struct nlmsghdr) || + nlmsg_len > len) { + break; + } + + switch (nlh->nlmsg_type) { + case NLMSG_DONE: + tswap_nlmsghdr(nlh); + return 0; + case NLMSG_NOOP: + break; + case NLMSG_ERROR: + { + struct nlmsgerr *e = NLMSG_DATA(nlh); + e->error = tswap32(e->error); + tswap_nlmsghdr(&e->msg); + tswap_nlmsghdr(nlh); + return 0; + } + default: + ret = host_to_target_nlmsg(nlh); + if (ret < 0) { + tswap_nlmsghdr(nlh); + return ret; + } + break; + } + tswap_nlmsghdr(nlh); + len -= NLMSG_ALIGN(nlmsg_len); + nlh = (struct nlmsghdr *)(((char*)nlh) + NLMSG_ALIGN(nlmsg_len)); + } + return 0; +} + +static abi_long target_to_host_for_each_nlmsg(struct nlmsghdr *nlh, + size_t len, + abi_long (*target_to_host_nlmsg) + (struct nlmsghdr *)) +{ + int ret; + + while (len > sizeof(struct nlmsghdr)) { + if (tswap32(nlh->nlmsg_len) < sizeof(struct nlmsghdr) || + tswap32(nlh->nlmsg_len) > len) { + break; + } + tswap_nlmsghdr(nlh); + switch (nlh->nlmsg_type) { + case NLMSG_DONE: + return 0; + case NLMSG_NOOP: + break; + case NLMSG_ERROR: + { + struct nlmsgerr *e = NLMSG_DATA(nlh); + e->error = tswap32(e->error); + tswap_nlmsghdr(&e->msg); + return 0; + } + default: + ret = target_to_host_nlmsg(nlh); + if (ret < 0) { + return ret; + } + } + len -= NLMSG_ALIGN(nlh->nlmsg_len); + nlh = (struct nlmsghdr *)(((char *)nlh) + NLMSG_ALIGN(nlh->nlmsg_len)); + } + return 0; +} + +#ifdef CONFIG_RTNETLINK +static abi_long host_to_target_for_each_nlattr(struct nlattr *nlattr, + size_t len, void *context, + abi_long (*host_to_target_nlattr) + (struct nlattr *, + void *context)) +{ + unsigned short nla_len; + abi_long ret; + + while (len > sizeof(struct nlattr)) { + nla_len = nlattr->nla_len; + if (nla_len < sizeof(struct nlattr) || + nla_len > len) { + break; + } + ret = host_to_target_nlattr(nlattr, context); + nlattr->nla_len = tswap16(nlattr->nla_len); + nlattr->nla_type = tswap16(nlattr->nla_type); + if (ret < 0) { + return ret; + } + len -= NLA_ALIGN(nla_len); + nlattr = (struct nlattr *)(((char *)nlattr) + NLA_ALIGN(nla_len)); + } + return 0; +} + +static abi_long host_to_target_for_each_rtattr(struct rtattr *rtattr, + size_t len, + abi_long (*host_to_target_rtattr) + (struct rtattr *)) +{ + unsigned short rta_len; + abi_long ret; + + while (len > sizeof(struct rtattr)) { + rta_len = rtattr->rta_len; + if (rta_len < sizeof(struct rtattr) || + rta_len > len) { + break; + } + ret = host_to_target_rtattr(rtattr); + rtattr->rta_len = tswap16(rtattr->rta_len); + rtattr->rta_type = tswap16(rtattr->rta_type); + if (ret < 0) { + return ret; + } + len -= RTA_ALIGN(rta_len); + rtattr = (struct rtattr *)(((char *)rtattr) + RTA_ALIGN(rta_len)); + } + return 0; +} + +#define NLA_DATA(nla) ((void *)((char *)(nla)) + NLA_HDRLEN) + +static abi_long host_to_target_data_bridge_nlattr(struct nlattr *nlattr, + void *context) +{ + uint16_t *u16; + uint32_t *u32; + uint64_t *u64; + + switch (nlattr->nla_type) { + /* no data */ + case QEMU_IFLA_BR_FDB_FLUSH: + break; + /* binary */ + case QEMU_IFLA_BR_GROUP_ADDR: + break; + /* uint8_t */ + case QEMU_IFLA_BR_VLAN_FILTERING: + case QEMU_IFLA_BR_TOPOLOGY_CHANGE: + case QEMU_IFLA_BR_TOPOLOGY_CHANGE_DETECTED: + case QEMU_IFLA_BR_MCAST_ROUTER: + case QEMU_IFLA_BR_MCAST_SNOOPING: + case QEMU_IFLA_BR_MCAST_QUERY_USE_IFADDR: + case QEMU_IFLA_BR_MCAST_QUERIER: + case QEMU_IFLA_BR_NF_CALL_IPTABLES: + case QEMU_IFLA_BR_NF_CALL_IP6TABLES: + case QEMU_IFLA_BR_NF_CALL_ARPTABLES: + case QEMU_IFLA_BR_VLAN_STATS_ENABLED: + case QEMU_IFLA_BR_MCAST_STATS_ENABLED: + case QEMU_IFLA_BR_MCAST_IGMP_VERSION: + case QEMU_IFLA_BR_MCAST_MLD_VERSION: + break; + /* uint16_t */ + case QEMU_IFLA_BR_PRIORITY: + case QEMU_IFLA_BR_VLAN_PROTOCOL: + case QEMU_IFLA_BR_GROUP_FWD_MASK: + case QEMU_IFLA_BR_ROOT_PORT: + case QEMU_IFLA_BR_VLAN_DEFAULT_PVID: + u16 = NLA_DATA(nlattr); + *u16 = tswap16(*u16); + break; + /* uint32_t */ + case QEMU_IFLA_BR_FORWARD_DELAY: + case QEMU_IFLA_BR_HELLO_TIME: + case QEMU_IFLA_BR_MAX_AGE: + case QEMU_IFLA_BR_AGEING_TIME: + case QEMU_IFLA_BR_STP_STATE: + case QEMU_IFLA_BR_ROOT_PATH_COST: + case QEMU_IFLA_BR_MCAST_HASH_ELASTICITY: + case QEMU_IFLA_BR_MCAST_HASH_MAX: + case QEMU_IFLA_BR_MCAST_LAST_MEMBER_CNT: + case QEMU_IFLA_BR_MCAST_STARTUP_QUERY_CNT: + u32 = NLA_DATA(nlattr); + *u32 = tswap32(*u32); + break; + /* uint64_t */ + case QEMU_IFLA_BR_HELLO_TIMER: + case QEMU_IFLA_BR_TCN_TIMER: + case QEMU_IFLA_BR_GC_TIMER: + case QEMU_IFLA_BR_TOPOLOGY_CHANGE_TIMER: + case QEMU_IFLA_BR_MCAST_LAST_MEMBER_INTVL: + case QEMU_IFLA_BR_MCAST_MEMBERSHIP_INTVL: + case QEMU_IFLA_BR_MCAST_QUERIER_INTVL: + case QEMU_IFLA_BR_MCAST_QUERY_INTVL: + case QEMU_IFLA_BR_MCAST_QUERY_RESPONSE_INTVL: + case QEMU_IFLA_BR_MCAST_STARTUP_QUERY_INTVL: + u64 = NLA_DATA(nlattr); + *u64 = tswap64(*u64); + break; + /* ifla_bridge_id: uin8_t[] */ + case QEMU_IFLA_BR_ROOT_ID: + case QEMU_IFLA_BR_BRIDGE_ID: + break; + default: + gemu_log("Unknown QEMU_IFLA_BR type %d\n", nlattr->nla_type); + break; + } + return 0; +} + +static abi_long host_to_target_slave_data_bridge_nlattr(struct nlattr *nlattr, + void *context) +{ + uint16_t *u16; + uint32_t *u32; + uint64_t *u64; + + switch (nlattr->nla_type) { + /* uint8_t */ + case QEMU_IFLA_BRPORT_STATE: + case QEMU_IFLA_BRPORT_MODE: + case QEMU_IFLA_BRPORT_GUARD: + case QEMU_IFLA_BRPORT_PROTECT: + case QEMU_IFLA_BRPORT_FAST_LEAVE: + case QEMU_IFLA_BRPORT_LEARNING: + case QEMU_IFLA_BRPORT_UNICAST_FLOOD: + case QEMU_IFLA_BRPORT_PROXYARP: + case QEMU_IFLA_BRPORT_LEARNING_SYNC: + case QEMU_IFLA_BRPORT_PROXYARP_WIFI: + case QEMU_IFLA_BRPORT_TOPOLOGY_CHANGE_ACK: + case QEMU_IFLA_BRPORT_CONFIG_PENDING: + case QEMU_IFLA_BRPORT_MULTICAST_ROUTER: + case QEMU_IFLA_BRPORT_MCAST_FLOOD: + case QEMU_IFLA_BRPORT_MCAST_TO_UCAST: + case QEMU_IFLA_BRPORT_VLAN_TUNNEL: + case QEMU_IFLA_BRPORT_BCAST_FLOOD: + case QEMU_IFLA_BRPORT_NEIGH_SUPPRESS: + break; + /* uint16_t */ + case QEMU_IFLA_BRPORT_PRIORITY: + case QEMU_IFLA_BRPORT_DESIGNATED_PORT: + case QEMU_IFLA_BRPORT_DESIGNATED_COST: + case QEMU_IFLA_BRPORT_ID: + case QEMU_IFLA_BRPORT_NO: + case QEMU_IFLA_BRPORT_GROUP_FWD_MASK: + u16 = NLA_DATA(nlattr); + *u16 = tswap16(*u16); + break; + /* uin32_t */ + case QEMU_IFLA_BRPORT_COST: + u32 = NLA_DATA(nlattr); + *u32 = tswap32(*u32); + break; + /* uint64_t */ + case QEMU_IFLA_BRPORT_MESSAGE_AGE_TIMER: + case QEMU_IFLA_BRPORT_FORWARD_DELAY_TIMER: + case QEMU_IFLA_BRPORT_HOLD_TIMER: + u64 = NLA_DATA(nlattr); + *u64 = tswap64(*u64); + break; + /* ifla_bridge_id: uint8_t[] */ + case QEMU_IFLA_BRPORT_ROOT_ID: + case QEMU_IFLA_BRPORT_BRIDGE_ID: + break; + default: + gemu_log("Unknown QEMU_IFLA_BRPORT type %d\n", nlattr->nla_type); + break; + } + return 0; +} + +static abi_long host_to_target_data_tun_nlattr(struct nlattr *nlattr, + void *context) +{ + uint32_t *u32; + + switch (nlattr->nla_type) { + /* uint8_t */ + case QEMU_IFLA_TUN_TYPE: + case QEMU_IFLA_TUN_PI: + case QEMU_IFLA_TUN_VNET_HDR: + case QEMU_IFLA_TUN_PERSIST: + case QEMU_IFLA_TUN_MULTI_QUEUE: + break; + /* uint32_t */ + case QEMU_IFLA_TUN_NUM_QUEUES: + case QEMU_IFLA_TUN_NUM_DISABLED_QUEUES: + case QEMU_IFLA_TUN_OWNER: + case QEMU_IFLA_TUN_GROUP: + u32 = NLA_DATA(nlattr); + *u32 = tswap32(*u32); + break; + default: + gemu_log("Unknown QEMU_IFLA_TUN type %d\n", nlattr->nla_type); + break; + } + return 0; +} + +struct linkinfo_context { + int len; + char *name; + int slave_len; + char *slave_name; +}; + +static abi_long host_to_target_data_linkinfo_nlattr(struct nlattr *nlattr, + void *context) +{ + struct linkinfo_context *li_context = context; + + switch (nlattr->nla_type) { + /* string */ + case QEMU_IFLA_INFO_KIND: + li_context->name = NLA_DATA(nlattr); + li_context->len = nlattr->nla_len - NLA_HDRLEN; + break; + case QEMU_IFLA_INFO_SLAVE_KIND: + li_context->slave_name = NLA_DATA(nlattr); + li_context->slave_len = nlattr->nla_len - NLA_HDRLEN; + break; + /* stats */ + case QEMU_IFLA_INFO_XSTATS: + /* FIXME: only used by CAN */ + break; + /* nested */ + case QEMU_IFLA_INFO_DATA: + if (strncmp(li_context->name, "bridge", + li_context->len) == 0) { + return host_to_target_for_each_nlattr(NLA_DATA(nlattr), + nlattr->nla_len, + NULL, + host_to_target_data_bridge_nlattr); + } else if (strncmp(li_context->name, "tun", + li_context->len) == 0) { + return host_to_target_for_each_nlattr(NLA_DATA(nlattr), + nlattr->nla_len, + NULL, + host_to_target_data_tun_nlattr); + } else { + gemu_log("Unknown QEMU_IFLA_INFO_KIND %s\n", li_context->name); + } + break; + case QEMU_IFLA_INFO_SLAVE_DATA: + if (strncmp(li_context->slave_name, "bridge", + li_context->slave_len) == 0) { + return host_to_target_for_each_nlattr(NLA_DATA(nlattr), + nlattr->nla_len, + NULL, + host_to_target_slave_data_bridge_nlattr); + } else { + gemu_log("Unknown QEMU_IFLA_INFO_SLAVE_KIND %s\n", + li_context->slave_name); + } + break; + default: + gemu_log("Unknown host QEMU_IFLA_INFO type: %d\n", nlattr->nla_type); + break; + } + + return 0; +} + +static abi_long host_to_target_data_inet_nlattr(struct nlattr *nlattr, + void *context) +{ + uint32_t *u32; + int i; + + switch (nlattr->nla_type) { + case QEMU_IFLA_INET_CONF: + u32 = NLA_DATA(nlattr); + for (i = 0; i < (nlattr->nla_len - NLA_HDRLEN) / sizeof(*u32); + i++) { + u32[i] = tswap32(u32[i]); + } + break; + default: + gemu_log("Unknown host AF_INET type: %d\n", nlattr->nla_type); + } + return 0; +} + +static abi_long host_to_target_data_inet6_nlattr(struct nlattr *nlattr, + void *context) +{ + uint32_t *u32; + uint64_t *u64; + struct ifla_cacheinfo *ci; + int i; + + switch (nlattr->nla_type) { + /* binaries */ + case QEMU_IFLA_INET6_TOKEN: + break; + /* uint8_t */ + case QEMU_IFLA_INET6_ADDR_GEN_MODE: + break; + /* uint32_t */ + case QEMU_IFLA_INET6_FLAGS: + u32 = NLA_DATA(nlattr); + *u32 = tswap32(*u32); + break; + /* uint32_t[] */ + case QEMU_IFLA_INET6_CONF: + u32 = NLA_DATA(nlattr); + for (i = 0; i < (nlattr->nla_len - NLA_HDRLEN) / sizeof(*u32); + i++) { + u32[i] = tswap32(u32[i]); + } + break; + /* ifla_cacheinfo */ + case QEMU_IFLA_INET6_CACHEINFO: + ci = NLA_DATA(nlattr); + ci->max_reasm_len = tswap32(ci->max_reasm_len); + ci->tstamp = tswap32(ci->tstamp); + ci->reachable_time = tswap32(ci->reachable_time); + ci->retrans_time = tswap32(ci->retrans_time); + break; + /* uint64_t[] */ + case QEMU_IFLA_INET6_STATS: + case QEMU_IFLA_INET6_ICMP6STATS: + u64 = NLA_DATA(nlattr); + for (i = 0; i < (nlattr->nla_len - NLA_HDRLEN) / sizeof(*u64); + i++) { + u64[i] = tswap64(u64[i]); + } + break; + default: + gemu_log("Unknown host AF_INET6 type: %d\n", nlattr->nla_type); + } + return 0; +} + +static abi_long host_to_target_data_spec_nlattr(struct nlattr *nlattr, + void *context) +{ + switch (nlattr->nla_type) { + case AF_INET: + return host_to_target_for_each_nlattr(NLA_DATA(nlattr), nlattr->nla_len, + NULL, + host_to_target_data_inet_nlattr); + case AF_INET6: + return host_to_target_for_each_nlattr(NLA_DATA(nlattr), nlattr->nla_len, + NULL, + host_to_target_data_inet6_nlattr); + default: + gemu_log("Unknown host AF_SPEC type: %d\n", nlattr->nla_type); + break; + } + return 0; +} + +static abi_long host_to_target_data_xdp_nlattr(struct nlattr *nlattr, + void *context) +{ + uint32_t *u32; + + switch (nlattr->nla_type) { + /* uint8_t */ + case QEMU_IFLA_XDP_ATTACHED: + break; + /* uint32_t */ + case QEMU_IFLA_XDP_PROG_ID: + u32 = NLA_DATA(nlattr); + *u32 = tswap32(*u32); + break; + default: + gemu_log("Unknown host XDP type: %d\n", nlattr->nla_type); + break; + } + return 0; +} + +static abi_long host_to_target_data_link_rtattr(struct rtattr *rtattr) +{ + uint32_t *u32; + struct rtnl_link_stats *st; + struct rtnl_link_stats64 *st64; + struct rtnl_link_ifmap *map; + struct linkinfo_context li_context; + + switch (rtattr->rta_type) { + /* binary stream */ + case QEMU_IFLA_ADDRESS: + case QEMU_IFLA_BROADCAST: + /* string */ + case QEMU_IFLA_IFNAME: + case QEMU_IFLA_QDISC: + break; + /* uin8_t */ + case QEMU_IFLA_OPERSTATE: + case QEMU_IFLA_LINKMODE: + case QEMU_IFLA_CARRIER: + case QEMU_IFLA_PROTO_DOWN: + break; + /* uint32_t */ + case QEMU_IFLA_MTU: + case QEMU_IFLA_LINK: + case QEMU_IFLA_WEIGHT: + case QEMU_IFLA_TXQLEN: + case QEMU_IFLA_CARRIER_CHANGES: + case QEMU_IFLA_NUM_RX_QUEUES: + case QEMU_IFLA_NUM_TX_QUEUES: + case QEMU_IFLA_PROMISCUITY: + case QEMU_IFLA_EXT_MASK: + case QEMU_IFLA_LINK_NETNSID: + case QEMU_IFLA_GROUP: + case QEMU_IFLA_MASTER: + case QEMU_IFLA_NUM_VF: + case QEMU_IFLA_GSO_MAX_SEGS: + case QEMU_IFLA_GSO_MAX_SIZE: + case QEMU_IFLA_CARRIER_UP_COUNT: + case QEMU_IFLA_CARRIER_DOWN_COUNT: + u32 = RTA_DATA(rtattr); + *u32 = tswap32(*u32); + break; + /* struct rtnl_link_stats */ + case QEMU_IFLA_STATS: + st = RTA_DATA(rtattr); + st->rx_packets = tswap32(st->rx_packets); + st->tx_packets = tswap32(st->tx_packets); + st->rx_bytes = tswap32(st->rx_bytes); + st->tx_bytes = tswap32(st->tx_bytes); + st->rx_errors = tswap32(st->rx_errors); + st->tx_errors = tswap32(st->tx_errors); + st->rx_dropped = tswap32(st->rx_dropped); + st->tx_dropped = tswap32(st->tx_dropped); + st->multicast = tswap32(st->multicast); + st->collisions = tswap32(st->collisions); + + /* detailed rx_errors: */ + st->rx_length_errors = tswap32(st->rx_length_errors); + st->rx_over_errors = tswap32(st->rx_over_errors); + st->rx_crc_errors = tswap32(st->rx_crc_errors); + st->rx_frame_errors = tswap32(st->rx_frame_errors); + st->rx_fifo_errors = tswap32(st->rx_fifo_errors); + st->rx_missed_errors = tswap32(st->rx_missed_errors); + + /* detailed tx_errors */ + st->tx_aborted_errors = tswap32(st->tx_aborted_errors); + st->tx_carrier_errors = tswap32(st->tx_carrier_errors); + st->tx_fifo_errors = tswap32(st->tx_fifo_errors); + st->tx_heartbeat_errors = tswap32(st->tx_heartbeat_errors); + st->tx_window_errors = tswap32(st->tx_window_errors); + + /* for cslip etc */ + st->rx_compressed = tswap32(st->rx_compressed); + st->tx_compressed = tswap32(st->tx_compressed); + break; + /* struct rtnl_link_stats64 */ + case QEMU_IFLA_STATS64: + st64 = RTA_DATA(rtattr); + st64->rx_packets = tswap64(st64->rx_packets); + st64->tx_packets = tswap64(st64->tx_packets); + st64->rx_bytes = tswap64(st64->rx_bytes); + st64->tx_bytes = tswap64(st64->tx_bytes); + st64->rx_errors = tswap64(st64->rx_errors); + st64->tx_errors = tswap64(st64->tx_errors); + st64->rx_dropped = tswap64(st64->rx_dropped); + st64->tx_dropped = tswap64(st64->tx_dropped); + st64->multicast = tswap64(st64->multicast); + st64->collisions = tswap64(st64->collisions); + + /* detailed rx_errors: */ + st64->rx_length_errors = tswap64(st64->rx_length_errors); + st64->rx_over_errors = tswap64(st64->rx_over_errors); + st64->rx_crc_errors = tswap64(st64->rx_crc_errors); + st64->rx_frame_errors = tswap64(st64->rx_frame_errors); + st64->rx_fifo_errors = tswap64(st64->rx_fifo_errors); + st64->rx_missed_errors = tswap64(st64->rx_missed_errors); + + /* detailed tx_errors */ + st64->tx_aborted_errors = tswap64(st64->tx_aborted_errors); + st64->tx_carrier_errors = tswap64(st64->tx_carrier_errors); + st64->tx_fifo_errors = tswap64(st64->tx_fifo_errors); + st64->tx_heartbeat_errors = tswap64(st64->tx_heartbeat_errors); + st64->tx_window_errors = tswap64(st64->tx_window_errors); + + /* for cslip etc */ + st64->rx_compressed = tswap64(st64->rx_compressed); + st64->tx_compressed = tswap64(st64->tx_compressed); + break; + /* struct rtnl_link_ifmap */ + case QEMU_IFLA_MAP: + map = RTA_DATA(rtattr); + map->mem_start = tswap64(map->mem_start); + map->mem_end = tswap64(map->mem_end); + map->base_addr = tswap64(map->base_addr); + map->irq = tswap16(map->irq); + break; + /* nested */ + case QEMU_IFLA_LINKINFO: + memset(&li_context, 0, sizeof(li_context)); + return host_to_target_for_each_nlattr(RTA_DATA(rtattr), rtattr->rta_len, + &li_context, + host_to_target_data_linkinfo_nlattr); + case QEMU_IFLA_AF_SPEC: + return host_to_target_for_each_nlattr(RTA_DATA(rtattr), rtattr->rta_len, + NULL, + host_to_target_data_spec_nlattr); + case QEMU_IFLA_XDP: + return host_to_target_for_each_nlattr(RTA_DATA(rtattr), rtattr->rta_len, + NULL, + host_to_target_data_xdp_nlattr); + default: + gemu_log("Unknown host QEMU_IFLA type: %d\n", rtattr->rta_type); + break; + } + return 0; +} + +static abi_long host_to_target_data_addr_rtattr(struct rtattr *rtattr) +{ + uint32_t *u32; + struct ifa_cacheinfo *ci; + + switch (rtattr->rta_type) { + /* binary: depends on family type */ + case IFA_ADDRESS: + case IFA_LOCAL: + break; + /* string */ + case IFA_LABEL: + break; + /* u32 */ + case IFA_FLAGS: + case IFA_BROADCAST: + u32 = RTA_DATA(rtattr); + *u32 = tswap32(*u32); + break; + /* struct ifa_cacheinfo */ + case IFA_CACHEINFO: + ci = RTA_DATA(rtattr); + ci->ifa_prefered = tswap32(ci->ifa_prefered); + ci->ifa_valid = tswap32(ci->ifa_valid); + ci->cstamp = tswap32(ci->cstamp); + ci->tstamp = tswap32(ci->tstamp); + break; + default: + gemu_log("Unknown host IFA type: %d\n", rtattr->rta_type); + break; + } + return 0; +} + +static abi_long host_to_target_data_route_rtattr(struct rtattr *rtattr) +{ + uint32_t *u32; + struct rta_cacheinfo *ci; + + switch (rtattr->rta_type) { + /* binary: depends on family type */ + case QEMU_RTA_GATEWAY: + case QEMU_RTA_DST: + case QEMU_RTA_PREFSRC: + break; + /* u8 */ + case QEMU_RTA_PREF: + break; + /* u32 */ + case QEMU_RTA_PRIORITY: + case QEMU_RTA_TABLE: + case QEMU_RTA_OIF: + u32 = RTA_DATA(rtattr); + *u32 = tswap32(*u32); + break; + /* struct rta_cacheinfo */ + case QEMU_RTA_CACHEINFO: + ci = RTA_DATA(rtattr); + ci->rta_clntref = tswap32(ci->rta_clntref); + ci->rta_lastuse = tswap32(ci->rta_lastuse); + ci->rta_expires = tswap32(ci->rta_expires); + ci->rta_error = tswap32(ci->rta_error); + ci->rta_used = tswap32(ci->rta_used); +#if defined(RTNETLINK_HAVE_PEERINFO) + ci->rta_id = tswap32(ci->rta_id); + ci->rta_ts = tswap32(ci->rta_ts); + ci->rta_tsage = tswap32(ci->rta_tsage); +#endif + break; + default: + gemu_log("Unknown host RTA type: %d\n", rtattr->rta_type); + break; + } + return 0; +} + +static abi_long host_to_target_link_rtattr(struct rtattr *rtattr, + uint32_t rtattr_len) +{ + return host_to_target_for_each_rtattr(rtattr, rtattr_len, + host_to_target_data_link_rtattr); +} + +static abi_long host_to_target_addr_rtattr(struct rtattr *rtattr, + uint32_t rtattr_len) +{ + return host_to_target_for_each_rtattr(rtattr, rtattr_len, + host_to_target_data_addr_rtattr); +} + +static abi_long host_to_target_route_rtattr(struct rtattr *rtattr, + uint32_t rtattr_len) +{ + return host_to_target_for_each_rtattr(rtattr, rtattr_len, + host_to_target_data_route_rtattr); +} + +static abi_long host_to_target_data_route(struct nlmsghdr *nlh) +{ + uint32_t nlmsg_len; + struct ifinfomsg *ifi; + struct ifaddrmsg *ifa; + struct rtmsg *rtm; + + nlmsg_len = nlh->nlmsg_len; + switch (nlh->nlmsg_type) { + case RTM_NEWLINK: + case RTM_DELLINK: + case RTM_GETLINK: + if (nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(*ifi))) { + ifi = NLMSG_DATA(nlh); + ifi->ifi_type = tswap16(ifi->ifi_type); + ifi->ifi_index = tswap32(ifi->ifi_index); + ifi->ifi_flags = tswap32(ifi->ifi_flags); + ifi->ifi_change = tswap32(ifi->ifi_change); + host_to_target_link_rtattr(IFLA_RTA(ifi), + nlmsg_len - NLMSG_LENGTH(sizeof(*ifi))); + } + break; + case RTM_NEWADDR: + case RTM_DELADDR: + case RTM_GETADDR: + if (nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(*ifa))) { + ifa = NLMSG_DATA(nlh); + ifa->ifa_index = tswap32(ifa->ifa_index); + host_to_target_addr_rtattr(IFA_RTA(ifa), + nlmsg_len - NLMSG_LENGTH(sizeof(*ifa))); + } + break; + case RTM_NEWROUTE: + case RTM_DELROUTE: + case RTM_GETROUTE: + if (nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(*rtm))) { + rtm = NLMSG_DATA(nlh); + rtm->rtm_flags = tswap32(rtm->rtm_flags); + host_to_target_route_rtattr(RTM_RTA(rtm), + nlmsg_len - NLMSG_LENGTH(sizeof(*rtm))); + } + break; + default: + return -TARGET_EINVAL; + } + return 0; +} + +static inline abi_long host_to_target_nlmsg_route(struct nlmsghdr *nlh, + size_t len) +{ + return host_to_target_for_each_nlmsg(nlh, len, host_to_target_data_route); +} + +static abi_long target_to_host_for_each_rtattr(struct rtattr *rtattr, + size_t len, + abi_long (*target_to_host_rtattr) + (struct rtattr *)) +{ + abi_long ret; + + while (len >= sizeof(struct rtattr)) { + if (tswap16(rtattr->rta_len) < sizeof(struct rtattr) || + tswap16(rtattr->rta_len) > len) { + break; + } + rtattr->rta_len = tswap16(rtattr->rta_len); + rtattr->rta_type = tswap16(rtattr->rta_type); + ret = target_to_host_rtattr(rtattr); + if (ret < 0) { + return ret; + } + len -= RTA_ALIGN(rtattr->rta_len); + rtattr = (struct rtattr *)(((char *)rtattr) + + RTA_ALIGN(rtattr->rta_len)); + } + return 0; +} + +static abi_long target_to_host_data_link_rtattr(struct rtattr *rtattr) +{ + switch (rtattr->rta_type) { + default: + gemu_log("Unknown target QEMU_IFLA type: %d\n", rtattr->rta_type); + break; + } + return 0; +} + +static abi_long target_to_host_data_addr_rtattr(struct rtattr *rtattr) +{ + switch (rtattr->rta_type) { + /* binary: depends on family type */ + case IFA_LOCAL: + case IFA_ADDRESS: + break; + default: + gemu_log("Unknown target IFA type: %d\n", rtattr->rta_type); + break; + } + return 0; +} + +static abi_long target_to_host_data_route_rtattr(struct rtattr *rtattr) +{ + uint32_t *u32; + switch (rtattr->rta_type) { + /* binary: depends on family type */ + case QEMU_RTA_DST: + case QEMU_RTA_SRC: + case QEMU_RTA_GATEWAY: + break; + /* u32 */ + case QEMU_RTA_PRIORITY: + case QEMU_RTA_OIF: + u32 = RTA_DATA(rtattr); + *u32 = tswap32(*u32); + break; + default: + gemu_log("Unknown target RTA type: %d\n", rtattr->rta_type); + break; + } + return 0; +} + +static void target_to_host_link_rtattr(struct rtattr *rtattr, + uint32_t rtattr_len) +{ + target_to_host_for_each_rtattr(rtattr, rtattr_len, + target_to_host_data_link_rtattr); +} + +static void target_to_host_addr_rtattr(struct rtattr *rtattr, + uint32_t rtattr_len) +{ + target_to_host_for_each_rtattr(rtattr, rtattr_len, + target_to_host_data_addr_rtattr); +} + +static void target_to_host_route_rtattr(struct rtattr *rtattr, + uint32_t rtattr_len) +{ + target_to_host_for_each_rtattr(rtattr, rtattr_len, + target_to_host_data_route_rtattr); +} + +static abi_long target_to_host_data_route(struct nlmsghdr *nlh) +{ + struct ifinfomsg *ifi; + struct ifaddrmsg *ifa; + struct rtmsg *rtm; + + switch (nlh->nlmsg_type) { + case RTM_GETLINK: + break; + case RTM_NEWLINK: + case RTM_DELLINK: + if (nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(*ifi))) { + ifi = NLMSG_DATA(nlh); + ifi->ifi_type = tswap16(ifi->ifi_type); + ifi->ifi_index = tswap32(ifi->ifi_index); + ifi->ifi_flags = tswap32(ifi->ifi_flags); + ifi->ifi_change = tswap32(ifi->ifi_change); + target_to_host_link_rtattr(IFLA_RTA(ifi), nlh->nlmsg_len - + NLMSG_LENGTH(sizeof(*ifi))); + } + break; + case RTM_GETADDR: + case RTM_NEWADDR: + case RTM_DELADDR: + if (nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(*ifa))) { + ifa = NLMSG_DATA(nlh); + ifa->ifa_index = tswap32(ifa->ifa_index); + target_to_host_addr_rtattr(IFA_RTA(ifa), nlh->nlmsg_len - + NLMSG_LENGTH(sizeof(*ifa))); + } + break; + case RTM_GETROUTE: + break; + case RTM_NEWROUTE: + case RTM_DELROUTE: + if (nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(*rtm))) { + rtm = NLMSG_DATA(nlh); + rtm->rtm_flags = tswap32(rtm->rtm_flags); + target_to_host_route_rtattr(RTM_RTA(rtm), nlh->nlmsg_len - + NLMSG_LENGTH(sizeof(*rtm))); + } + break; + default: + return -TARGET_EOPNOTSUPP; + } + return 0; +} + +static abi_long target_to_host_nlmsg_route(struct nlmsghdr *nlh, size_t len) +{ + return target_to_host_for_each_nlmsg(nlh, len, target_to_host_data_route); +} +#endif /* CONFIG_RTNETLINK */ + +static abi_long host_to_target_data_audit(struct nlmsghdr *nlh) +{ + switch (nlh->nlmsg_type) { + default: + gemu_log("Unknown host audit message type %d\n", + nlh->nlmsg_type); + return -TARGET_EINVAL; + } + return 0; +} + +static inline abi_long host_to_target_nlmsg_audit(struct nlmsghdr *nlh, + size_t len) +{ + return host_to_target_for_each_nlmsg(nlh, len, host_to_target_data_audit); +} + +static abi_long target_to_host_data_audit(struct nlmsghdr *nlh) +{ + switch (nlh->nlmsg_type) { + case AUDIT_USER: + case AUDIT_FIRST_USER_MSG ... AUDIT_LAST_USER_MSG: + case AUDIT_FIRST_USER_MSG2 ... AUDIT_LAST_USER_MSG2: + break; + default: + gemu_log("Unknown target audit message type %d\n", + nlh->nlmsg_type); + return -TARGET_EINVAL; + } + + return 0; +} + +static abi_long target_to_host_nlmsg_audit(struct nlmsghdr *nlh, size_t len) +{ + return target_to_host_for_each_nlmsg(nlh, len, target_to_host_data_audit); +} + +static abi_long packet_target_to_host_sockaddr(void *host_addr, + abi_ulong target_addr, + socklen_t len) +{ + struct sockaddr *addr = host_addr; + struct target_sockaddr *target_saddr; + + target_saddr = lock_user(VERIFY_READ, target_addr, len, 1); + if (!target_saddr) { + return -TARGET_EFAULT; + } + + memcpy(addr, target_saddr, len); + addr->sa_family = tswap16(target_saddr->sa_family); + /* spkt_protocol is big-endian */ + + unlock_user(target_saddr, target_addr, 0); + return 0; +} + +TargetFdTrans target_packet_trans = { + .target_to_host_addr = packet_target_to_host_sockaddr, +}; + +#ifdef CONFIG_RTNETLINK +static abi_long netlink_route_target_to_host(void *buf, size_t len) +{ + abi_long ret; + + ret = target_to_host_nlmsg_route(buf, len); + if (ret < 0) { + return ret; + } + + return len; +} + +static abi_long netlink_route_host_to_target(void *buf, size_t len) +{ + abi_long ret; + + ret = host_to_target_nlmsg_route(buf, len); + if (ret < 0) { + return ret; + } + + return len; +} + +TargetFdTrans target_netlink_route_trans = { + .target_to_host_data = netlink_route_target_to_host, + .host_to_target_data = netlink_route_host_to_target, +}; +#endif /* CONFIG_RTNETLINK */ + +static abi_long netlink_audit_target_to_host(void *buf, size_t len) +{ + abi_long ret; + + ret = target_to_host_nlmsg_audit(buf, len); + if (ret < 0) { + return ret; + } + + return len; +} + +static abi_long netlink_audit_host_to_target(void *buf, size_t len) +{ + abi_long ret; + + ret = host_to_target_nlmsg_audit(buf, len); + if (ret < 0) { + return ret; + } + + return len; +} + +TargetFdTrans target_netlink_audit_trans = { + .target_to_host_data = netlink_audit_target_to_host, + .host_to_target_data = netlink_audit_host_to_target, +}; + +/* signalfd siginfo conversion */ + +static void +host_to_target_signalfd_siginfo(struct signalfd_siginfo *tinfo, + const struct signalfd_siginfo *info) +{ + int sig = host_to_target_signal(info->ssi_signo); + + /* linux/signalfd.h defines a ssi_addr_lsb + * not defined in sys/signalfd.h but used by some kernels + */ + +#ifdef BUS_MCEERR_AO + if (tinfo->ssi_signo == SIGBUS && + (tinfo->ssi_code == BUS_MCEERR_AR || + tinfo->ssi_code == BUS_MCEERR_AO)) { + uint16_t *ssi_addr_lsb = (uint16_t *)(&info->ssi_addr + 1); + uint16_t *tssi_addr_lsb = (uint16_t *)(&tinfo->ssi_addr + 1); + *tssi_addr_lsb = tswap16(*ssi_addr_lsb); + } +#endif + + tinfo->ssi_signo = tswap32(sig); + tinfo->ssi_errno = tswap32(tinfo->ssi_errno); + tinfo->ssi_code = tswap32(info->ssi_code); + tinfo->ssi_pid = tswap32(info->ssi_pid); + tinfo->ssi_uid = tswap32(info->ssi_uid); + tinfo->ssi_fd = tswap32(info->ssi_fd); + tinfo->ssi_tid = tswap32(info->ssi_tid); + tinfo->ssi_band = tswap32(info->ssi_band); + tinfo->ssi_overrun = tswap32(info->ssi_overrun); + tinfo->ssi_trapno = tswap32(info->ssi_trapno); + tinfo->ssi_status = tswap32(info->ssi_status); + tinfo->ssi_int = tswap32(info->ssi_int); + tinfo->ssi_ptr = tswap64(info->ssi_ptr); + tinfo->ssi_utime = tswap64(info->ssi_utime); + tinfo->ssi_stime = tswap64(info->ssi_stime); + tinfo->ssi_addr = tswap64(info->ssi_addr); +} + +static abi_long host_to_target_data_signalfd(void *buf, size_t len) +{ + int i; + + for (i = 0; i < len; i += sizeof(struct signalfd_siginfo)) { + host_to_target_signalfd_siginfo(buf + i, buf + i); + } + + return len; +} + +TargetFdTrans target_signalfd_trans = { + .host_to_target_data = host_to_target_data_signalfd, +}; + +static abi_long swap_data_eventfd(void *buf, size_t len) +{ + uint64_t *counter = buf; + int i; + + if (len < sizeof(uint64_t)) { + return -EINVAL; + } + + for (i = 0; i < len; i += sizeof(uint64_t)) { + *counter = tswap64(*counter); + counter++; + } + + return len; +} + +TargetFdTrans target_eventfd_trans = { + .host_to_target_data = swap_data_eventfd, + .target_to_host_data = swap_data_eventfd, +}; + +#if (defined(TARGET_NR_inotify_init) && defined(__NR_inotify_init)) || \ + (defined(CONFIG_INOTIFY1) && defined(TARGET_NR_inotify_init1) && \ + defined(__NR_inotify_init1)) +static abi_long host_to_target_data_inotify(void *buf, size_t len) +{ + struct inotify_event *ev; + int i; + uint32_t name_len; + + for (i = 0; i < len; i += sizeof(struct inotify_event) + name_len) { + ev = (struct inotify_event *)((char *)buf + i); + name_len = ev->len; + + ev->wd = tswap32(ev->wd); + ev->mask = tswap32(ev->mask); + ev->cookie = tswap32(ev->cookie); + ev->len = tswap32(name_len); + } + + return len; +} + +TargetFdTrans target_inotify_trans = { + .host_to_target_data = host_to_target_data_inotify, +}; +#endif diff --git a/linux-user/fd-trans.h b/linux-user/fd-trans.h new file mode 100644 index 0000000000..a3fcdaabc7 --- /dev/null +++ b/linux-user/fd-trans.h @@ -0,0 +1,97 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef FD_TRANS_H +#define FD_TRANS_H + +typedef abi_long (*TargetFdDataFunc)(void *, size_t); +typedef abi_long (*TargetFdAddrFunc)(void *, abi_ulong, socklen_t); +typedef struct TargetFdTrans { + TargetFdDataFunc host_to_target_data; + TargetFdDataFunc target_to_host_data; + TargetFdAddrFunc target_to_host_addr; +} TargetFdTrans; + +extern TargetFdTrans **target_fd_trans; + +extern unsigned int target_fd_max; + +static inline TargetFdDataFunc fd_trans_target_to_host_data(int fd) +{ + if (fd >= 0 && fd < target_fd_max && target_fd_trans[fd]) { + return target_fd_trans[fd]->target_to_host_data; + } + return NULL; +} + +static inline TargetFdDataFunc fd_trans_host_to_target_data(int fd) +{ + if (fd >= 0 && fd < target_fd_max && target_fd_trans[fd]) { + return target_fd_trans[fd]->host_to_target_data; + } + return NULL; +} + +static inline TargetFdAddrFunc fd_trans_target_to_host_addr(int fd) +{ + if (fd >= 0 && fd < target_fd_max && target_fd_trans[fd]) { + return target_fd_trans[fd]->target_to_host_addr; + } + return NULL; +} + +static inline void fd_trans_register(int fd, TargetFdTrans *trans) +{ + unsigned int oldmax; + + if (fd >= target_fd_max) { + oldmax = target_fd_max; + target_fd_max = ((fd >> 6) + 1) << 6; /* by slice of 64 entries */ + target_fd_trans = g_renew(TargetFdTrans *, + target_fd_trans, target_fd_max); + memset((void *)(target_fd_trans + oldmax), 0, + (target_fd_max - oldmax) * sizeof(TargetFdTrans *)); + } + target_fd_trans[fd] = trans; +} + +static inline void fd_trans_unregister(int fd) +{ + if (fd >= 0 && fd < target_fd_max) { + target_fd_trans[fd] = NULL; + } +} + +static inline void fd_trans_dup(int oldfd, int newfd) +{ + fd_trans_unregister(newfd); + if (oldfd < target_fd_max && target_fd_trans[oldfd]) { + fd_trans_register(newfd, target_fd_trans[oldfd]); + } +} + +extern TargetFdTrans target_packet_trans; +#ifdef CONFIG_RTNETLINK +extern TargetFdTrans target_netlink_route_trans; +#endif +extern TargetFdTrans target_netlink_audit_trans; +extern TargetFdTrans target_signalfd_trans; +extern TargetFdTrans target_eventfd_trans; +#if (defined(TARGET_NR_inotify_init) && defined(__NR_inotify_init)) || \ + (defined(CONFIG_INOTIFY1) && defined(TARGET_NR_inotify_init1) && \ + defined(__NR_inotify_init1)) +extern TargetFdTrans target_inotify_trans; +#endif +#endif diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 850b72a0c7..ae3c0dfef7 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -102,15 +102,11 @@ #include <linux/blkpg.h> #include <netpacket/packet.h> #include <linux/netlink.h> -#ifdef CONFIG_RTNETLINK -#include <linux/rtnetlink.h> -#include <linux/if_bridge.h> -#endif -#include <linux/audit.h> #include "linux_loop.h" #include "uname.h" #include "qemu.h" +#include "fd-trans.h" #ifndef CLONE_IO #define CLONE_IO 0x80000000 /* Clone io context */ @@ -360,298 +356,6 @@ static bitmask_transtbl fcntl_flags_tbl[] = { { 0, 0, 0, 0 } }; -enum { - QEMU_IFLA_BR_UNSPEC, - QEMU_IFLA_BR_FORWARD_DELAY, - QEMU_IFLA_BR_HELLO_TIME, - QEMU_IFLA_BR_MAX_AGE, - QEMU_IFLA_BR_AGEING_TIME, - QEMU_IFLA_BR_STP_STATE, - QEMU_IFLA_BR_PRIORITY, - QEMU_IFLA_BR_VLAN_FILTERING, - QEMU_IFLA_BR_VLAN_PROTOCOL, - QEMU_IFLA_BR_GROUP_FWD_MASK, - QEMU_IFLA_BR_ROOT_ID, - QEMU_IFLA_BR_BRIDGE_ID, - QEMU_IFLA_BR_ROOT_PORT, - QEMU_IFLA_BR_ROOT_PATH_COST, - QEMU_IFLA_BR_TOPOLOGY_CHANGE, - QEMU_IFLA_BR_TOPOLOGY_CHANGE_DETECTED, - QEMU_IFLA_BR_HELLO_TIMER, - QEMU_IFLA_BR_TCN_TIMER, - QEMU_IFLA_BR_TOPOLOGY_CHANGE_TIMER, - QEMU_IFLA_BR_GC_TIMER, - QEMU_IFLA_BR_GROUP_ADDR, - QEMU_IFLA_BR_FDB_FLUSH, - QEMU_IFLA_BR_MCAST_ROUTER, - QEMU_IFLA_BR_MCAST_SNOOPING, - QEMU_IFLA_BR_MCAST_QUERY_USE_IFADDR, - QEMU_IFLA_BR_MCAST_QUERIER, - QEMU_IFLA_BR_MCAST_HASH_ELASTICITY, - QEMU_IFLA_BR_MCAST_HASH_MAX, - QEMU_IFLA_BR_MCAST_LAST_MEMBER_CNT, - QEMU_IFLA_BR_MCAST_STARTUP_QUERY_CNT, - QEMU_IFLA_BR_MCAST_LAST_MEMBER_INTVL, - QEMU_IFLA_BR_MCAST_MEMBERSHIP_INTVL, - QEMU_IFLA_BR_MCAST_QUERIER_INTVL, - QEMU_IFLA_BR_MCAST_QUERY_INTVL, - QEMU_IFLA_BR_MCAST_QUERY_RESPONSE_INTVL, - QEMU_IFLA_BR_MCAST_STARTUP_QUERY_INTVL, - QEMU_IFLA_BR_NF_CALL_IPTABLES, - QEMU_IFLA_BR_NF_CALL_IP6TABLES, - QEMU_IFLA_BR_NF_CALL_ARPTABLES, - QEMU_IFLA_BR_VLAN_DEFAULT_PVID, - QEMU_IFLA_BR_PAD, - QEMU_IFLA_BR_VLAN_STATS_ENABLED, - QEMU_IFLA_BR_MCAST_STATS_ENABLED, - QEMU_IFLA_BR_MCAST_IGMP_VERSION, - QEMU_IFLA_BR_MCAST_MLD_VERSION, - QEMU___IFLA_BR_MAX, -}; - -enum { - QEMU_IFLA_UNSPEC, - QEMU_IFLA_ADDRESS, - QEMU_IFLA_BROADCAST, - QEMU_IFLA_IFNAME, - QEMU_IFLA_MTU, - QEMU_IFLA_LINK, - QEMU_IFLA_QDISC, - QEMU_IFLA_STATS, - QEMU_IFLA_COST, - QEMU_IFLA_PRIORITY, - QEMU_IFLA_MASTER, - QEMU_IFLA_WIRELESS, - QEMU_IFLA_PROTINFO, - QEMU_IFLA_TXQLEN, - QEMU_IFLA_MAP, - QEMU_IFLA_WEIGHT, - QEMU_IFLA_OPERSTATE, - QEMU_IFLA_LINKMODE, - QEMU_IFLA_LINKINFO, - QEMU_IFLA_NET_NS_PID, - QEMU_IFLA_IFALIAS, - QEMU_IFLA_NUM_VF, - QEMU_IFLA_VFINFO_LIST, - QEMU_IFLA_STATS64, - QEMU_IFLA_VF_PORTS, - QEMU_IFLA_PORT_SELF, - QEMU_IFLA_AF_SPEC, - QEMU_IFLA_GROUP, - QEMU_IFLA_NET_NS_FD, - QEMU_IFLA_EXT_MASK, - QEMU_IFLA_PROMISCUITY, - QEMU_IFLA_NUM_TX_QUEUES, - QEMU_IFLA_NUM_RX_QUEUES, - QEMU_IFLA_CARRIER, - QEMU_IFLA_PHYS_PORT_ID, - QEMU_IFLA_CARRIER_CHANGES, - QEMU_IFLA_PHYS_SWITCH_ID, - QEMU_IFLA_LINK_NETNSID, - QEMU_IFLA_PHYS_PORT_NAME, - QEMU_IFLA_PROTO_DOWN, - QEMU_IFLA_GSO_MAX_SEGS, - QEMU_IFLA_GSO_MAX_SIZE, - QEMU_IFLA_PAD, - QEMU_IFLA_XDP, - QEMU_IFLA_EVENT, - QEMU_IFLA_NEW_NETNSID, - QEMU_IFLA_IF_NETNSID, - QEMU_IFLA_CARRIER_UP_COUNT, - QEMU_IFLA_CARRIER_DOWN_COUNT, - QEMU_IFLA_NEW_IFINDEX, - QEMU___IFLA_MAX -}; - -enum { - QEMU_IFLA_BRPORT_UNSPEC, - QEMU_IFLA_BRPORT_STATE, - QEMU_IFLA_BRPORT_PRIORITY, - QEMU_IFLA_BRPORT_COST, - QEMU_IFLA_BRPORT_MODE, - QEMU_IFLA_BRPORT_GUARD, - QEMU_IFLA_BRPORT_PROTECT, - QEMU_IFLA_BRPORT_FAST_LEAVE, - QEMU_IFLA_BRPORT_LEARNING, - QEMU_IFLA_BRPORT_UNICAST_FLOOD, - QEMU_IFLA_BRPORT_PROXYARP, - QEMU_IFLA_BRPORT_LEARNING_SYNC, - QEMU_IFLA_BRPORT_PROXYARP_WIFI, - QEMU_IFLA_BRPORT_ROOT_ID, - QEMU_IFLA_BRPORT_BRIDGE_ID, - QEMU_IFLA_BRPORT_DESIGNATED_PORT, - QEMU_IFLA_BRPORT_DESIGNATED_COST, - QEMU_IFLA_BRPORT_ID, - QEMU_IFLA_BRPORT_NO, - QEMU_IFLA_BRPORT_TOPOLOGY_CHANGE_ACK, - QEMU_IFLA_BRPORT_CONFIG_PENDING, - QEMU_IFLA_BRPORT_MESSAGE_AGE_TIMER, - QEMU_IFLA_BRPORT_FORWARD_DELAY_TIMER, - QEMU_IFLA_BRPORT_HOLD_TIMER, - QEMU_IFLA_BRPORT_FLUSH, - QEMU_IFLA_BRPORT_MULTICAST_ROUTER, - QEMU_IFLA_BRPORT_PAD, - QEMU_IFLA_BRPORT_MCAST_FLOOD, - QEMU_IFLA_BRPORT_MCAST_TO_UCAST, - QEMU_IFLA_BRPORT_VLAN_TUNNEL, - QEMU_IFLA_BRPORT_BCAST_FLOOD, - QEMU_IFLA_BRPORT_GROUP_FWD_MASK, - QEMU_IFLA_BRPORT_NEIGH_SUPPRESS, - QEMU___IFLA_BRPORT_MAX -}; - -enum { - QEMU_IFLA_TUN_UNSPEC, - QEMU_IFLA_TUN_OWNER, - QEMU_IFLA_TUN_GROUP, - QEMU_IFLA_TUN_TYPE, - QEMU_IFLA_TUN_PI, - QEMU_IFLA_TUN_VNET_HDR, - QEMU_IFLA_TUN_PERSIST, - QEMU_IFLA_TUN_MULTI_QUEUE, - QEMU_IFLA_TUN_NUM_QUEUES, - QEMU_IFLA_TUN_NUM_DISABLED_QUEUES, - QEMU___IFLA_TUN_MAX, -}; - -enum { - QEMU_IFLA_INFO_UNSPEC, - QEMU_IFLA_INFO_KIND, - QEMU_IFLA_INFO_DATA, - QEMU_IFLA_INFO_XSTATS, - QEMU_IFLA_INFO_SLAVE_KIND, - QEMU_IFLA_INFO_SLAVE_DATA, - QEMU___IFLA_INFO_MAX, -}; - -enum { - QEMU_IFLA_INET_UNSPEC, - QEMU_IFLA_INET_CONF, - QEMU___IFLA_INET_MAX, -}; - -enum { - QEMU_IFLA_INET6_UNSPEC, - QEMU_IFLA_INET6_FLAGS, - QEMU_IFLA_INET6_CONF, - QEMU_IFLA_INET6_STATS, - QEMU_IFLA_INET6_MCAST, - QEMU_IFLA_INET6_CACHEINFO, - QEMU_IFLA_INET6_ICMP6STATS, - QEMU_IFLA_INET6_TOKEN, - QEMU_IFLA_INET6_ADDR_GEN_MODE, - QEMU___IFLA_INET6_MAX -}; - -enum { - QEMU_IFLA_XDP_UNSPEC, - QEMU_IFLA_XDP_FD, - QEMU_IFLA_XDP_ATTACHED, - QEMU_IFLA_XDP_FLAGS, - QEMU_IFLA_XDP_PROG_ID, - QEMU___IFLA_XDP_MAX, -}; - -enum { - QEMU_RTA_UNSPEC, - QEMU_RTA_DST, - QEMU_RTA_SRC, - QEMU_RTA_IIF, - QEMU_RTA_OIF, - QEMU_RTA_GATEWAY, - QEMU_RTA_PRIORITY, - QEMU_RTA_PREFSRC, - QEMU_RTA_METRICS, - QEMU_RTA_MULTIPATH, - QEMU_RTA_PROTOINFO, /* no longer used */ - QEMU_RTA_FLOW, - QEMU_RTA_CACHEINFO, - QEMU_RTA_SESSION, /* no longer used */ - QEMU_RTA_MP_ALGO, /* no longer used */ - QEMU_RTA_TABLE, - QEMU_RTA_MARK, - QEMU_RTA_MFC_STATS, - QEMU_RTA_VIA, - QEMU_RTA_NEWDST, - QEMU_RTA_PREF, - QEMU_RTA_ENCAP_TYPE, - QEMU_RTA_ENCAP, - QEMU_RTA_EXPIRES, - QEMU_RTA_PAD, - QEMU_RTA_UID, - QEMU_RTA_TTL_PROPAGATE, - QEMU_RTA_IP_PROTO, - QEMU_RTA_SPORT, - QEMU_RTA_DPORT, - QEMU___RTA_MAX -}; - -typedef abi_long (*TargetFdDataFunc)(void *, size_t); -typedef abi_long (*TargetFdAddrFunc)(void *, abi_ulong, socklen_t); -typedef struct TargetFdTrans { - TargetFdDataFunc host_to_target_data; - TargetFdDataFunc target_to_host_data; - TargetFdAddrFunc target_to_host_addr; -} TargetFdTrans; - -static TargetFdTrans **target_fd_trans; - -static unsigned int target_fd_max; - -static TargetFdDataFunc fd_trans_target_to_host_data(int fd) -{ - if (fd >= 0 && fd < target_fd_max && target_fd_trans[fd]) { - return target_fd_trans[fd]->target_to_host_data; - } - return NULL; -} - -static TargetFdDataFunc fd_trans_host_to_target_data(int fd) -{ - if (fd >= 0 && fd < target_fd_max && target_fd_trans[fd]) { - return target_fd_trans[fd]->host_to_target_data; - } - return NULL; -} - -static TargetFdAddrFunc fd_trans_target_to_host_addr(int fd) -{ - if (fd >= 0 && fd < target_fd_max && target_fd_trans[fd]) { - return target_fd_trans[fd]->target_to_host_addr; - } - return NULL; -} - -static void fd_trans_register(int fd, TargetFdTrans *trans) -{ - unsigned int oldmax; - - if (fd >= target_fd_max) { - oldmax = target_fd_max; - target_fd_max = ((fd >> 6) + 1) << 6; /* by slice of 64 entries */ - target_fd_trans = g_renew(TargetFdTrans *, - target_fd_trans, target_fd_max); - memset((void *)(target_fd_trans + oldmax), 0, - (target_fd_max - oldmax) * sizeof(TargetFdTrans *)); - } - target_fd_trans[fd] = trans; -} - -static void fd_trans_unregister(int fd) -{ - if (fd >= 0 && fd < target_fd_max) { - target_fd_trans[fd] = NULL; - } -} - -static void fd_trans_dup(int oldfd, int newfd) -{ - fd_trans_unregister(newfd); - if (oldfd < target_fd_max && target_fd_trans[oldfd]) { - fd_trans_register(newfd, target_fd_trans[oldfd]); - } -} - static int sys_getcwd1(char *buf, size_t size) { if (getcwd(buf, size) == NULL) { @@ -2076,968 +1780,6 @@ static inline abi_long host_to_target_cmsg(struct target_msghdr *target_msgh, return 0; } -static void tswap_nlmsghdr(struct nlmsghdr *nlh) -{ - nlh->nlmsg_len = tswap32(nlh->nlmsg_len); - nlh->nlmsg_type = tswap16(nlh->nlmsg_type); - nlh->nlmsg_flags = tswap16(nlh->nlmsg_flags); - nlh->nlmsg_seq = tswap32(nlh->nlmsg_seq); - nlh->nlmsg_pid = tswap32(nlh->nlmsg_pid); -} - -static abi_long host_to_target_for_each_nlmsg(struct nlmsghdr *nlh, - size_t len, - abi_long (*host_to_target_nlmsg) - (struct nlmsghdr *)) -{ - uint32_t nlmsg_len; - abi_long ret; - - while (len > sizeof(struct nlmsghdr)) { - - nlmsg_len = nlh->nlmsg_len; - if (nlmsg_len < sizeof(struct nlmsghdr) || - nlmsg_len > len) { - break; - } - - switch (nlh->nlmsg_type) { - case NLMSG_DONE: - tswap_nlmsghdr(nlh); - return 0; - case NLMSG_NOOP: - break; - case NLMSG_ERROR: - { - struct nlmsgerr *e = NLMSG_DATA(nlh); - e->error = tswap32(e->error); - tswap_nlmsghdr(&e->msg); - tswap_nlmsghdr(nlh); - return 0; - } - default: - ret = host_to_target_nlmsg(nlh); - if (ret < 0) { - tswap_nlmsghdr(nlh); - return ret; - } - break; - } - tswap_nlmsghdr(nlh); - len -= NLMSG_ALIGN(nlmsg_len); - nlh = (struct nlmsghdr *)(((char*)nlh) + NLMSG_ALIGN(nlmsg_len)); - } - return 0; -} - -static abi_long target_to_host_for_each_nlmsg(struct nlmsghdr *nlh, - size_t len, - abi_long (*target_to_host_nlmsg) - (struct nlmsghdr *)) -{ - int ret; - - while (len > sizeof(struct nlmsghdr)) { - if (tswap32(nlh->nlmsg_len) < sizeof(struct nlmsghdr) || - tswap32(nlh->nlmsg_len) > len) { - break; - } - tswap_nlmsghdr(nlh); - switch (nlh->nlmsg_type) { - case NLMSG_DONE: - return 0; - case NLMSG_NOOP: - break; - case NLMSG_ERROR: - { - struct nlmsgerr *e = NLMSG_DATA(nlh); - e->error = tswap32(e->error); - tswap_nlmsghdr(&e->msg); - return 0; - } - default: - ret = target_to_host_nlmsg(nlh); - if (ret < 0) { - return ret; - } - } - len -= NLMSG_ALIGN(nlh->nlmsg_len); - nlh = (struct nlmsghdr *)(((char *)nlh) + NLMSG_ALIGN(nlh->nlmsg_len)); - } - return 0; -} - -#ifdef CONFIG_RTNETLINK -static abi_long host_to_target_for_each_nlattr(struct nlattr *nlattr, - size_t len, void *context, - abi_long (*host_to_target_nlattr) - (struct nlattr *, - void *context)) -{ - unsigned short nla_len; - abi_long ret; - - while (len > sizeof(struct nlattr)) { - nla_len = nlattr->nla_len; - if (nla_len < sizeof(struct nlattr) || - nla_len > len) { - break; - } - ret = host_to_target_nlattr(nlattr, context); - nlattr->nla_len = tswap16(nlattr->nla_len); - nlattr->nla_type = tswap16(nlattr->nla_type); - if (ret < 0) { - return ret; - } - len -= NLA_ALIGN(nla_len); - nlattr = (struct nlattr *)(((char *)nlattr) + NLA_ALIGN(nla_len)); - } - return 0; -} - -static abi_long host_to_target_for_each_rtattr(struct rtattr *rtattr, - size_t len, - abi_long (*host_to_target_rtattr) - (struct rtattr *)) -{ - unsigned short rta_len; - abi_long ret; - - while (len > sizeof(struct rtattr)) { - rta_len = rtattr->rta_len; - if (rta_len < sizeof(struct rtattr) || - rta_len > len) { - break; - } - ret = host_to_target_rtattr(rtattr); - rtattr->rta_len = tswap16(rtattr->rta_len); - rtattr->rta_type = tswap16(rtattr->rta_type); - if (ret < 0) { - return ret; - } - len -= RTA_ALIGN(rta_len); - rtattr = (struct rtattr *)(((char *)rtattr) + RTA_ALIGN(rta_len)); - } - return 0; -} - -#define NLA_DATA(nla) ((void *)((char *)(nla)) + NLA_HDRLEN) - -static abi_long host_to_target_data_bridge_nlattr(struct nlattr *nlattr, - void *context) -{ - uint16_t *u16; - uint32_t *u32; - uint64_t *u64; - - switch (nlattr->nla_type) { - /* no data */ - case QEMU_IFLA_BR_FDB_FLUSH: - break; - /* binary */ - case QEMU_IFLA_BR_GROUP_ADDR: - break; - /* uint8_t */ - case QEMU_IFLA_BR_VLAN_FILTERING: - case QEMU_IFLA_BR_TOPOLOGY_CHANGE: - case QEMU_IFLA_BR_TOPOLOGY_CHANGE_DETECTED: - case QEMU_IFLA_BR_MCAST_ROUTER: - case QEMU_IFLA_BR_MCAST_SNOOPING: - case QEMU_IFLA_BR_MCAST_QUERY_USE_IFADDR: - case QEMU_IFLA_BR_MCAST_QUERIER: - case QEMU_IFLA_BR_NF_CALL_IPTABLES: - case QEMU_IFLA_BR_NF_CALL_IP6TABLES: - case QEMU_IFLA_BR_NF_CALL_ARPTABLES: - case QEMU_IFLA_BR_VLAN_STATS_ENABLED: - case QEMU_IFLA_BR_MCAST_STATS_ENABLED: - case QEMU_IFLA_BR_MCAST_IGMP_VERSION: - case QEMU_IFLA_BR_MCAST_MLD_VERSION: - break; - /* uint16_t */ - case QEMU_IFLA_BR_PRIORITY: - case QEMU_IFLA_BR_VLAN_PROTOCOL: - case QEMU_IFLA_BR_GROUP_FWD_MASK: - case QEMU_IFLA_BR_ROOT_PORT: - case QEMU_IFLA_BR_VLAN_DEFAULT_PVID: - u16 = NLA_DATA(nlattr); - *u16 = tswap16(*u16); - break; - /* uint32_t */ - case QEMU_IFLA_BR_FORWARD_DELAY: - case QEMU_IFLA_BR_HELLO_TIME: - case QEMU_IFLA_BR_MAX_AGE: - case QEMU_IFLA_BR_AGEING_TIME: - case QEMU_IFLA_BR_STP_STATE: - case QEMU_IFLA_BR_ROOT_PATH_COST: - case QEMU_IFLA_BR_MCAST_HASH_ELASTICITY: - case QEMU_IFLA_BR_MCAST_HASH_MAX: - case QEMU_IFLA_BR_MCAST_LAST_MEMBER_CNT: - case QEMU_IFLA_BR_MCAST_STARTUP_QUERY_CNT: - u32 = NLA_DATA(nlattr); - *u32 = tswap32(*u32); - break; - /* uint64_t */ - case QEMU_IFLA_BR_HELLO_TIMER: - case QEMU_IFLA_BR_TCN_TIMER: - case QEMU_IFLA_BR_GC_TIMER: - case QEMU_IFLA_BR_TOPOLOGY_CHANGE_TIMER: - case QEMU_IFLA_BR_MCAST_LAST_MEMBER_INTVL: - case QEMU_IFLA_BR_MCAST_MEMBERSHIP_INTVL: - case QEMU_IFLA_BR_MCAST_QUERIER_INTVL: - case QEMU_IFLA_BR_MCAST_QUERY_INTVL: - case QEMU_IFLA_BR_MCAST_QUERY_RESPONSE_INTVL: - case QEMU_IFLA_BR_MCAST_STARTUP_QUERY_INTVL: - u64 = NLA_DATA(nlattr); - *u64 = tswap64(*u64); - break; - /* ifla_bridge_id: uin8_t[] */ - case QEMU_IFLA_BR_ROOT_ID: - case QEMU_IFLA_BR_BRIDGE_ID: - break; - default: - gemu_log("Unknown QEMU_IFLA_BR type %d\n", nlattr->nla_type); - break; - } - return 0; -} - -static abi_long host_to_target_slave_data_bridge_nlattr(struct nlattr *nlattr, - void *context) -{ - uint16_t *u16; - uint32_t *u32; - uint64_t *u64; - - switch (nlattr->nla_type) { - /* uint8_t */ - case QEMU_IFLA_BRPORT_STATE: - case QEMU_IFLA_BRPORT_MODE: - case QEMU_IFLA_BRPORT_GUARD: - case QEMU_IFLA_BRPORT_PROTECT: - case QEMU_IFLA_BRPORT_FAST_LEAVE: - case QEMU_IFLA_BRPORT_LEARNING: - case QEMU_IFLA_BRPORT_UNICAST_FLOOD: - case QEMU_IFLA_BRPORT_PROXYARP: - case QEMU_IFLA_BRPORT_LEARNING_SYNC: - case QEMU_IFLA_BRPORT_PROXYARP_WIFI: - case QEMU_IFLA_BRPORT_TOPOLOGY_CHANGE_ACK: - case QEMU_IFLA_BRPORT_CONFIG_PENDING: - case QEMU_IFLA_BRPORT_MULTICAST_ROUTER: - case QEMU_IFLA_BRPORT_MCAST_FLOOD: - case QEMU_IFLA_BRPORT_MCAST_TO_UCAST: - case QEMU_IFLA_BRPORT_VLAN_TUNNEL: - case QEMU_IFLA_BRPORT_BCAST_FLOOD: - case QEMU_IFLA_BRPORT_NEIGH_SUPPRESS: - break; - /* uint16_t */ - case QEMU_IFLA_BRPORT_PRIORITY: - case QEMU_IFLA_BRPORT_DESIGNATED_PORT: - case QEMU_IFLA_BRPORT_DESIGNATED_COST: - case QEMU_IFLA_BRPORT_ID: - case QEMU_IFLA_BRPORT_NO: - case QEMU_IFLA_BRPORT_GROUP_FWD_MASK: - u16 = NLA_DATA(nlattr); - *u16 = tswap16(*u16); - break; - /* uin32_t */ - case QEMU_IFLA_BRPORT_COST: - u32 = NLA_DATA(nlattr); - *u32 = tswap32(*u32); - break; - /* uint64_t */ - case QEMU_IFLA_BRPORT_MESSAGE_AGE_TIMER: - case QEMU_IFLA_BRPORT_FORWARD_DELAY_TIMER: - case QEMU_IFLA_BRPORT_HOLD_TIMER: - u64 = NLA_DATA(nlattr); - *u64 = tswap64(*u64); - break; - /* ifla_bridge_id: uint8_t[] */ - case QEMU_IFLA_BRPORT_ROOT_ID: - case QEMU_IFLA_BRPORT_BRIDGE_ID: - break; - default: - gemu_log("Unknown QEMU_IFLA_BRPORT type %d\n", nlattr->nla_type); - break; - } - return 0; -} - -static abi_long host_to_target_data_tun_nlattr(struct nlattr *nlattr, - void *context) -{ - uint32_t *u32; - - switch (nlattr->nla_type) { - /* uint8_t */ - case QEMU_IFLA_TUN_TYPE: - case QEMU_IFLA_TUN_PI: - case QEMU_IFLA_TUN_VNET_HDR: - case QEMU_IFLA_TUN_PERSIST: - case QEMU_IFLA_TUN_MULTI_QUEUE: - break; - /* uint32_t */ - case QEMU_IFLA_TUN_NUM_QUEUES: - case QEMU_IFLA_TUN_NUM_DISABLED_QUEUES: - case QEMU_IFLA_TUN_OWNER: - case QEMU_IFLA_TUN_GROUP: - u32 = NLA_DATA(nlattr); - *u32 = tswap32(*u32); - break; - default: - gemu_log("Unknown QEMU_IFLA_TUN type %d\n", nlattr->nla_type); - break; - } - return 0; -} - -struct linkinfo_context { - int len; - char *name; - int slave_len; - char *slave_name; -}; - -static abi_long host_to_target_data_linkinfo_nlattr(struct nlattr *nlattr, - void *context) -{ - struct linkinfo_context *li_context = context; - - switch (nlattr->nla_type) { - /* string */ - case QEMU_IFLA_INFO_KIND: - li_context->name = NLA_DATA(nlattr); - li_context->len = nlattr->nla_len - NLA_HDRLEN; - break; - case QEMU_IFLA_INFO_SLAVE_KIND: - li_context->slave_name = NLA_DATA(nlattr); - li_context->slave_len = nlattr->nla_len - NLA_HDRLEN; - break; - /* stats */ - case QEMU_IFLA_INFO_XSTATS: - /* FIXME: only used by CAN */ - break; - /* nested */ - case QEMU_IFLA_INFO_DATA: - if (strncmp(li_context->name, "bridge", - li_context->len) == 0) { - return host_to_target_for_each_nlattr(NLA_DATA(nlattr), - nlattr->nla_len, - NULL, - host_to_target_data_bridge_nlattr); - } else if (strncmp(li_context->name, "tun", - li_context->len) == 0) { - return host_to_target_for_each_nlattr(NLA_DATA(nlattr), - nlattr->nla_len, - NULL, - host_to_target_data_tun_nlattr); - } else { - gemu_log("Unknown QEMU_IFLA_INFO_KIND %s\n", li_context->name); - } - break; - case QEMU_IFLA_INFO_SLAVE_DATA: - if (strncmp(li_context->slave_name, "bridge", - li_context->slave_len) == 0) { - return host_to_target_for_each_nlattr(NLA_DATA(nlattr), - nlattr->nla_len, - NULL, - host_to_target_slave_data_bridge_nlattr); - } else { - gemu_log("Unknown QEMU_IFLA_INFO_SLAVE_KIND %s\n", - li_context->slave_name); - } - break; - default: - gemu_log("Unknown host QEMU_IFLA_INFO type: %d\n", nlattr->nla_type); - break; - } - - return 0; -} - -static abi_long host_to_target_data_inet_nlattr(struct nlattr *nlattr, - void *context) -{ - uint32_t *u32; - int i; - - switch (nlattr->nla_type) { - case QEMU_IFLA_INET_CONF: - u32 = NLA_DATA(nlattr); - for (i = 0; i < (nlattr->nla_len - NLA_HDRLEN) / sizeof(*u32); - i++) { - u32[i] = tswap32(u32[i]); - } - break; - default: - gemu_log("Unknown host AF_INET type: %d\n", nlattr->nla_type); - } - return 0; -} - -static abi_long host_to_target_data_inet6_nlattr(struct nlattr *nlattr, - void *context) -{ - uint32_t *u32; - uint64_t *u64; - struct ifla_cacheinfo *ci; - int i; - - switch (nlattr->nla_type) { - /* binaries */ - case QEMU_IFLA_INET6_TOKEN: - break; - /* uint8_t */ - case QEMU_IFLA_INET6_ADDR_GEN_MODE: - break; - /* uint32_t */ - case QEMU_IFLA_INET6_FLAGS: - u32 = NLA_DATA(nlattr); - *u32 = tswap32(*u32); - break; - /* uint32_t[] */ - case QEMU_IFLA_INET6_CONF: - u32 = NLA_DATA(nlattr); - for (i = 0; i < (nlattr->nla_len - NLA_HDRLEN) / sizeof(*u32); - i++) { - u32[i] = tswap32(u32[i]); - } - break; - /* ifla_cacheinfo */ - case QEMU_IFLA_INET6_CACHEINFO: - ci = NLA_DATA(nlattr); - ci->max_reasm_len = tswap32(ci->max_reasm_len); - ci->tstamp = tswap32(ci->tstamp); - ci->reachable_time = tswap32(ci->reachable_time); - ci->retrans_time = tswap32(ci->retrans_time); - break; - /* uint64_t[] */ - case QEMU_IFLA_INET6_STATS: - case QEMU_IFLA_INET6_ICMP6STATS: - u64 = NLA_DATA(nlattr); - for (i = 0; i < (nlattr->nla_len - NLA_HDRLEN) / sizeof(*u64); - i++) { - u64[i] = tswap64(u64[i]); - } - break; - default: - gemu_log("Unknown host AF_INET6 type: %d\n", nlattr->nla_type); - } - return 0; -} - -static abi_long host_to_target_data_spec_nlattr(struct nlattr *nlattr, - void *context) -{ - switch (nlattr->nla_type) { - case AF_INET: - return host_to_target_for_each_nlattr(NLA_DATA(nlattr), nlattr->nla_len, - NULL, - host_to_target_data_inet_nlattr); - case AF_INET6: - return host_to_target_for_each_nlattr(NLA_DATA(nlattr), nlattr->nla_len, - NULL, - host_to_target_data_inet6_nlattr); - default: - gemu_log("Unknown host AF_SPEC type: %d\n", nlattr->nla_type); - break; - } - return 0; -} - -static abi_long host_to_target_data_xdp_nlattr(struct nlattr *nlattr, - void *context) -{ - uint32_t *u32; - - switch (nlattr->nla_type) { - /* uint8_t */ - case QEMU_IFLA_XDP_ATTACHED: - break; - /* uint32_t */ - case QEMU_IFLA_XDP_PROG_ID: - u32 = NLA_DATA(nlattr); - *u32 = tswap32(*u32); - break; - default: - gemu_log("Unknown host XDP type: %d\n", nlattr->nla_type); - break; - } - return 0; -} - -static abi_long host_to_target_data_link_rtattr(struct rtattr *rtattr) -{ - uint32_t *u32; - struct rtnl_link_stats *st; - struct rtnl_link_stats64 *st64; - struct rtnl_link_ifmap *map; - struct linkinfo_context li_context; - - switch (rtattr->rta_type) { - /* binary stream */ - case QEMU_IFLA_ADDRESS: - case QEMU_IFLA_BROADCAST: - /* string */ - case QEMU_IFLA_IFNAME: - case QEMU_IFLA_QDISC: - break; - /* uin8_t */ - case QEMU_IFLA_OPERSTATE: - case QEMU_IFLA_LINKMODE: - case QEMU_IFLA_CARRIER: - case QEMU_IFLA_PROTO_DOWN: - break; - /* uint32_t */ - case QEMU_IFLA_MTU: - case QEMU_IFLA_LINK: - case QEMU_IFLA_WEIGHT: - case QEMU_IFLA_TXQLEN: - case QEMU_IFLA_CARRIER_CHANGES: - case QEMU_IFLA_NUM_RX_QUEUES: - case QEMU_IFLA_NUM_TX_QUEUES: - case QEMU_IFLA_PROMISCUITY: - case QEMU_IFLA_EXT_MASK: - case QEMU_IFLA_LINK_NETNSID: - case QEMU_IFLA_GROUP: - case QEMU_IFLA_MASTER: - case QEMU_IFLA_NUM_VF: - case QEMU_IFLA_GSO_MAX_SEGS: - case QEMU_IFLA_GSO_MAX_SIZE: - case QEMU_IFLA_CARRIER_UP_COUNT: - case QEMU_IFLA_CARRIER_DOWN_COUNT: - u32 = RTA_DATA(rtattr); - *u32 = tswap32(*u32); - break; - /* struct rtnl_link_stats */ - case QEMU_IFLA_STATS: - st = RTA_DATA(rtattr); - st->rx_packets = tswap32(st->rx_packets); - st->tx_packets = tswap32(st->tx_packets); - st->rx_bytes = tswap32(st->rx_bytes); - st->tx_bytes = tswap32(st->tx_bytes); - st->rx_errors = tswap32(st->rx_errors); - st->tx_errors = tswap32(st->tx_errors); - st->rx_dropped = tswap32(st->rx_dropped); - st->tx_dropped = tswap32(st->tx_dropped); - st->multicast = tswap32(st->multicast); - st->collisions = tswap32(st->collisions); - - /* detailed rx_errors: */ - st->rx_length_errors = tswap32(st->rx_length_errors); - st->rx_over_errors = tswap32(st->rx_over_errors); - st->rx_crc_errors = tswap32(st->rx_crc_errors); - st->rx_frame_errors = tswap32(st->rx_frame_errors); - st->rx_fifo_errors = tswap32(st->rx_fifo_errors); - st->rx_missed_errors = tswap32(st->rx_missed_errors); - - /* detailed tx_errors */ - st->tx_aborted_errors = tswap32(st->tx_aborted_errors); - st->tx_carrier_errors = tswap32(st->tx_carrier_errors); - st->tx_fifo_errors = tswap32(st->tx_fifo_errors); - st->tx_heartbeat_errors = tswap32(st->tx_heartbeat_errors); - st->tx_window_errors = tswap32(st->tx_window_errors); - - /* for cslip etc */ - st->rx_compressed = tswap32(st->rx_compressed); - st->tx_compressed = tswap32(st->tx_compressed); - break; - /* struct rtnl_link_stats64 */ - case QEMU_IFLA_STATS64: - st64 = RTA_DATA(rtattr); - st64->rx_packets = tswap64(st64->rx_packets); - st64->tx_packets = tswap64(st64->tx_packets); - st64->rx_bytes = tswap64(st64->rx_bytes); - st64->tx_bytes = tswap64(st64->tx_bytes); - st64->rx_errors = tswap64(st64->rx_errors); - st64->tx_errors = tswap64(st64->tx_errors); - st64->rx_dropped = tswap64(st64->rx_dropped); - st64->tx_dropped = tswap64(st64->tx_dropped); - st64->multicast = tswap64(st64->multicast); - st64->collisions = tswap64(st64->collisions); - - /* detailed rx_errors: */ - st64->rx_length_errors = tswap64(st64->rx_length_errors); - st64->rx_over_errors = tswap64(st64->rx_over_errors); - st64->rx_crc_errors = tswap64(st64->rx_crc_errors); - st64->rx_frame_errors = tswap64(st64->rx_frame_errors); - st64->rx_fifo_errors = tswap64(st64->rx_fifo_errors); - st64->rx_missed_errors = tswap64(st64->rx_missed_errors); - - /* detailed tx_errors */ - st64->tx_aborted_errors = tswap64(st64->tx_aborted_errors); - st64->tx_carrier_errors = tswap64(st64->tx_carrier_errors); - st64->tx_fifo_errors = tswap64(st64->tx_fifo_errors); - st64->tx_heartbeat_errors = tswap64(st64->tx_heartbeat_errors); - st64->tx_window_errors = tswap64(st64->tx_window_errors); - - /* for cslip etc */ - st64->rx_compressed = tswap64(st64->rx_compressed); - st64->tx_compressed = tswap64(st64->tx_compressed); - break; - /* struct rtnl_link_ifmap */ - case QEMU_IFLA_MAP: - map = RTA_DATA(rtattr); - map->mem_start = tswap64(map->mem_start); - map->mem_end = tswap64(map->mem_end); - map->base_addr = tswap64(map->base_addr); - map->irq = tswap16(map->irq); - break; - /* nested */ - case QEMU_IFLA_LINKINFO: - memset(&li_context, 0, sizeof(li_context)); - return host_to_target_for_each_nlattr(RTA_DATA(rtattr), rtattr->rta_len, - &li_context, - host_to_target_data_linkinfo_nlattr); - case QEMU_IFLA_AF_SPEC: - return host_to_target_for_each_nlattr(RTA_DATA(rtattr), rtattr->rta_len, - NULL, - host_to_target_data_spec_nlattr); - case QEMU_IFLA_XDP: - return host_to_target_for_each_nlattr(RTA_DATA(rtattr), rtattr->rta_len, - NULL, - host_to_target_data_xdp_nlattr); - default: - gemu_log("Unknown host QEMU_IFLA type: %d\n", rtattr->rta_type); - break; - } - return 0; -} - -static abi_long host_to_target_data_addr_rtattr(struct rtattr *rtattr) -{ - uint32_t *u32; - struct ifa_cacheinfo *ci; - - switch (rtattr->rta_type) { - /* binary: depends on family type */ - case IFA_ADDRESS: - case IFA_LOCAL: - break; - /* string */ - case IFA_LABEL: - break; - /* u32 */ - case IFA_FLAGS: - case IFA_BROADCAST: - u32 = RTA_DATA(rtattr); - *u32 = tswap32(*u32); - break; - /* struct ifa_cacheinfo */ - case IFA_CACHEINFO: - ci = RTA_DATA(rtattr); - ci->ifa_prefered = tswap32(ci->ifa_prefered); - ci->ifa_valid = tswap32(ci->ifa_valid); - ci->cstamp = tswap32(ci->cstamp); - ci->tstamp = tswap32(ci->tstamp); - break; - default: - gemu_log("Unknown host IFA type: %d\n", rtattr->rta_type); - break; - } - return 0; -} - -static abi_long host_to_target_data_route_rtattr(struct rtattr *rtattr) -{ - uint32_t *u32; - struct rta_cacheinfo *ci; - - switch (rtattr->rta_type) { - /* binary: depends on family type */ - case QEMU_RTA_GATEWAY: - case QEMU_RTA_DST: - case QEMU_RTA_PREFSRC: - break; - /* u8 */ - case QEMU_RTA_PREF: - break; - /* u32 */ - case QEMU_RTA_PRIORITY: - case QEMU_RTA_TABLE: - case QEMU_RTA_OIF: - u32 = RTA_DATA(rtattr); - *u32 = tswap32(*u32); - break; - /* struct rta_cacheinfo */ - case QEMU_RTA_CACHEINFO: - ci = RTA_DATA(rtattr); - ci->rta_clntref = tswap32(ci->rta_clntref); - ci->rta_lastuse = tswap32(ci->rta_lastuse); - ci->rta_expires = tswap32(ci->rta_expires); - ci->rta_error = tswap32(ci->rta_error); - ci->rta_used = tswap32(ci->rta_used); -#if defined(RTNETLINK_HAVE_PEERINFO) - ci->rta_id = tswap32(ci->rta_id); - ci->rta_ts = tswap32(ci->rta_ts); - ci->rta_tsage = tswap32(ci->rta_tsage); -#endif - break; - default: - gemu_log("Unknown host RTA type: %d\n", rtattr->rta_type); - break; - } - return 0; -} - -static abi_long host_to_target_link_rtattr(struct rtattr *rtattr, - uint32_t rtattr_len) -{ - return host_to_target_for_each_rtattr(rtattr, rtattr_len, - host_to_target_data_link_rtattr); -} - -static abi_long host_to_target_addr_rtattr(struct rtattr *rtattr, - uint32_t rtattr_len) -{ - return host_to_target_for_each_rtattr(rtattr, rtattr_len, - host_to_target_data_addr_rtattr); -} - -static abi_long host_to_target_route_rtattr(struct rtattr *rtattr, - uint32_t rtattr_len) -{ - return host_to_target_for_each_rtattr(rtattr, rtattr_len, - host_to_target_data_route_rtattr); -} - -static abi_long host_to_target_data_route(struct nlmsghdr *nlh) -{ - uint32_t nlmsg_len; - struct ifinfomsg *ifi; - struct ifaddrmsg *ifa; - struct rtmsg *rtm; - - nlmsg_len = nlh->nlmsg_len; - switch (nlh->nlmsg_type) { - case RTM_NEWLINK: - case RTM_DELLINK: - case RTM_GETLINK: - if (nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(*ifi))) { - ifi = NLMSG_DATA(nlh); - ifi->ifi_type = tswap16(ifi->ifi_type); - ifi->ifi_index = tswap32(ifi->ifi_index); - ifi->ifi_flags = tswap32(ifi->ifi_flags); - ifi->ifi_change = tswap32(ifi->ifi_change); - host_to_target_link_rtattr(IFLA_RTA(ifi), - nlmsg_len - NLMSG_LENGTH(sizeof(*ifi))); - } - break; - case RTM_NEWADDR: - case RTM_DELADDR: - case RTM_GETADDR: - if (nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(*ifa))) { - ifa = NLMSG_DATA(nlh); - ifa->ifa_index = tswap32(ifa->ifa_index); - host_to_target_addr_rtattr(IFA_RTA(ifa), - nlmsg_len - NLMSG_LENGTH(sizeof(*ifa))); - } - break; - case RTM_NEWROUTE: - case RTM_DELROUTE: - case RTM_GETROUTE: - if (nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(*rtm))) { - rtm = NLMSG_DATA(nlh); - rtm->rtm_flags = tswap32(rtm->rtm_flags); - host_to_target_route_rtattr(RTM_RTA(rtm), - nlmsg_len - NLMSG_LENGTH(sizeof(*rtm))); - } - break; - default: - return -TARGET_EINVAL; - } - return 0; -} - -static inline abi_long host_to_target_nlmsg_route(struct nlmsghdr *nlh, - size_t len) -{ - return host_to_target_for_each_nlmsg(nlh, len, host_to_target_data_route); -} - -static abi_long target_to_host_for_each_rtattr(struct rtattr *rtattr, - size_t len, - abi_long (*target_to_host_rtattr) - (struct rtattr *)) -{ - abi_long ret; - - while (len >= sizeof(struct rtattr)) { - if (tswap16(rtattr->rta_len) < sizeof(struct rtattr) || - tswap16(rtattr->rta_len) > len) { - break; - } - rtattr->rta_len = tswap16(rtattr->rta_len); - rtattr->rta_type = tswap16(rtattr->rta_type); - ret = target_to_host_rtattr(rtattr); - if (ret < 0) { - return ret; - } - len -= RTA_ALIGN(rtattr->rta_len); - rtattr = (struct rtattr *)(((char *)rtattr) + - RTA_ALIGN(rtattr->rta_len)); - } - return 0; -} - -static abi_long target_to_host_data_link_rtattr(struct rtattr *rtattr) -{ - switch (rtattr->rta_type) { - default: - gemu_log("Unknown target QEMU_IFLA type: %d\n", rtattr->rta_type); - break; - } - return 0; -} - -static abi_long target_to_host_data_addr_rtattr(struct rtattr *rtattr) -{ - switch (rtattr->rta_type) { - /* binary: depends on family type */ - case IFA_LOCAL: - case IFA_ADDRESS: - break; - default: - gemu_log("Unknown target IFA type: %d\n", rtattr->rta_type); - break; - } - return 0; -} - -static abi_long target_to_host_data_route_rtattr(struct rtattr *rtattr) -{ - uint32_t *u32; - switch (rtattr->rta_type) { - /* binary: depends on family type */ - case QEMU_RTA_DST: - case QEMU_RTA_SRC: - case QEMU_RTA_GATEWAY: - break; - /* u32 */ - case QEMU_RTA_PRIORITY: - case QEMU_RTA_OIF: - u32 = RTA_DATA(rtattr); - *u32 = tswap32(*u32); - break; - default: - gemu_log("Unknown target RTA type: %d\n", rtattr->rta_type); - break; - } - return 0; -} - -static void target_to_host_link_rtattr(struct rtattr *rtattr, - uint32_t rtattr_len) -{ - target_to_host_for_each_rtattr(rtattr, rtattr_len, - target_to_host_data_link_rtattr); -} - -static void target_to_host_addr_rtattr(struct rtattr *rtattr, - uint32_t rtattr_len) -{ - target_to_host_for_each_rtattr(rtattr, rtattr_len, - target_to_host_data_addr_rtattr); -} - -static void target_to_host_route_rtattr(struct rtattr *rtattr, - uint32_t rtattr_len) -{ - target_to_host_for_each_rtattr(rtattr, rtattr_len, - target_to_host_data_route_rtattr); -} - -static abi_long target_to_host_data_route(struct nlmsghdr *nlh) -{ - struct ifinfomsg *ifi; - struct ifaddrmsg *ifa; - struct rtmsg *rtm; - - switch (nlh->nlmsg_type) { - case RTM_GETLINK: - break; - case RTM_NEWLINK: - case RTM_DELLINK: - if (nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(*ifi))) { - ifi = NLMSG_DATA(nlh); - ifi->ifi_type = tswap16(ifi->ifi_type); - ifi->ifi_index = tswap32(ifi->ifi_index); - ifi->ifi_flags = tswap32(ifi->ifi_flags); - ifi->ifi_change = tswap32(ifi->ifi_change); - target_to_host_link_rtattr(IFLA_RTA(ifi), nlh->nlmsg_len - - NLMSG_LENGTH(sizeof(*ifi))); - } - break; - case RTM_GETADDR: - case RTM_NEWADDR: - case RTM_DELADDR: - if (nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(*ifa))) { - ifa = NLMSG_DATA(nlh); - ifa->ifa_index = tswap32(ifa->ifa_index); - target_to_host_addr_rtattr(IFA_RTA(ifa), nlh->nlmsg_len - - NLMSG_LENGTH(sizeof(*ifa))); - } - break; - case RTM_GETROUTE: - break; - case RTM_NEWROUTE: - case RTM_DELROUTE: - if (nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(*rtm))) { - rtm = NLMSG_DATA(nlh); - rtm->rtm_flags = tswap32(rtm->rtm_flags); - target_to_host_route_rtattr(RTM_RTA(rtm), nlh->nlmsg_len - - NLMSG_LENGTH(sizeof(*rtm))); - } - break; - default: - return -TARGET_EOPNOTSUPP; - } - return 0; -} - -static abi_long target_to_host_nlmsg_route(struct nlmsghdr *nlh, size_t len) -{ - return target_to_host_for_each_nlmsg(nlh, len, target_to_host_data_route); -} -#endif /* CONFIG_RTNETLINK */ - -static abi_long host_to_target_data_audit(struct nlmsghdr *nlh) -{ - switch (nlh->nlmsg_type) { - default: - gemu_log("Unknown host audit message type %d\n", - nlh->nlmsg_type); - return -TARGET_EINVAL; - } - return 0; -} - -static inline abi_long host_to_target_nlmsg_audit(struct nlmsghdr *nlh, - size_t len) -{ - return host_to_target_for_each_nlmsg(nlh, len, host_to_target_data_audit); -} - -static abi_long target_to_host_data_audit(struct nlmsghdr *nlh) -{ - switch (nlh->nlmsg_type) { - case AUDIT_USER: - case AUDIT_FIRST_USER_MSG ... AUDIT_LAST_USER_MSG: - case AUDIT_FIRST_USER_MSG2 ... AUDIT_LAST_USER_MSG2: - break; - default: - gemu_log("Unknown target audit message type %d\n", - nlh->nlmsg_type); - return -TARGET_EINVAL; - } - - return 0; -} - -static abi_long target_to_host_nlmsg_audit(struct nlmsghdr *nlh, size_t len) -{ - return target_to_host_for_each_nlmsg(nlh, len, target_to_host_data_audit); -} - /* do_setsockopt() Must return target values and target errnos. */ static abi_long do_setsockopt(int sockfd, int level, int optname, abi_ulong optval_addr, socklen_t optlen) @@ -3290,6 +2032,24 @@ set_timeout: unlock_user (dev_ifname, optval_addr, 0); return ret; } + case TARGET_SO_LINGER: + { + struct linger lg; + struct target_linger *tlg; + + if (optlen != sizeof(struct target_linger)) { + return -TARGET_EINVAL; + } + if (!lock_user_struct(VERIFY_READ, tlg, optval_addr, 1)) { + return -TARGET_EFAULT; + } + __get_user(lg.l_onoff, &tlg->l_onoff); + __get_user(lg.l_linger, &tlg->l_linger); + ret = get_errno(setsockopt(sockfd, SOL_SOCKET, SO_LINGER, + &lg, sizeof(lg))); + unlock_user_struct(tlg, optval_addr, 0); + return ret; + } /* Options with 'int' argument. */ case TARGET_SO_DEBUG: optname = SO_DEBUG; @@ -3381,7 +2141,6 @@ static abi_long do_getsockopt(int sockfd, int level, int optname, level = SOL_SOCKET; switch (optname) { /* These don't just return a single integer */ - case TARGET_SO_LINGER: case TARGET_SO_RCVTIMEO: case TARGET_SO_SNDTIMEO: case TARGET_SO_PEERNAME: @@ -3419,6 +2178,39 @@ static abi_long do_getsockopt(int sockfd, int level, int optname, } break; } + case TARGET_SO_LINGER: + { + struct linger lg; + socklen_t lglen; + struct target_linger *tlg; + + if (get_user_u32(len, optlen)) { + return -TARGET_EFAULT; + } + if (len < 0) { + return -TARGET_EINVAL; + } + + lglen = sizeof(lg); + ret = get_errno(getsockopt(sockfd, level, SO_LINGER, + &lg, &lglen)); + if (ret < 0) { + return ret; + } + if (len > lglen) { + len = lglen; + } + if (!lock_user_struct(VERIFY_WRITE, tlg, optval_addr, 0)) { + return -TARGET_EFAULT; + } + __put_user(lg.l_onoff, &tlg->l_onoff); + __put_user(lg.l_linger, &tlg->l_linger); + unlock_user_struct(tlg, optval_addr, 1); + if (put_user_u32(len, optlen)) { + return -TARGET_EFAULT; + } + break; + } /* Options with 'int' argument. */ case TARGET_SO_DEBUG: optname = SO_DEBUG; @@ -3733,90 +2525,6 @@ static int sock_flags_fixup(int fd, int target_type) return fd; } -static abi_long packet_target_to_host_sockaddr(void *host_addr, - abi_ulong target_addr, - socklen_t len) -{ - struct sockaddr *addr = host_addr; - struct target_sockaddr *target_saddr; - - target_saddr = lock_user(VERIFY_READ, target_addr, len, 1); - if (!target_saddr) { - return -TARGET_EFAULT; - } - - memcpy(addr, target_saddr, len); - addr->sa_family = tswap16(target_saddr->sa_family); - /* spkt_protocol is big-endian */ - - unlock_user(target_saddr, target_addr, 0); - return 0; -} - -static TargetFdTrans target_packet_trans = { - .target_to_host_addr = packet_target_to_host_sockaddr, -}; - -#ifdef CONFIG_RTNETLINK -static abi_long netlink_route_target_to_host(void *buf, size_t len) -{ - abi_long ret; - - ret = target_to_host_nlmsg_route(buf, len); - if (ret < 0) { - return ret; - } - - return len; -} - -static abi_long netlink_route_host_to_target(void *buf, size_t len) -{ - abi_long ret; - - ret = host_to_target_nlmsg_route(buf, len); - if (ret < 0) { - return ret; - } - - return len; -} - -static TargetFdTrans target_netlink_route_trans = { - .target_to_host_data = netlink_route_target_to_host, - .host_to_target_data = netlink_route_host_to_target, -}; -#endif /* CONFIG_RTNETLINK */ - -static abi_long netlink_audit_target_to_host(void *buf, size_t len) -{ - abi_long ret; - - ret = target_to_host_nlmsg_audit(buf, len); - if (ret < 0) { - return ret; - } - - return len; -} - -static abi_long netlink_audit_host_to_target(void *buf, size_t len) -{ - abi_long ret; - - ret = host_to_target_nlmsg_audit(buf, len); - if (ret < 0) { - return ret; - } - - return len; -} - -static TargetFdTrans target_netlink_audit_trans = { - .target_to_host_data = netlink_audit_target_to_host, - .host_to_target_data = netlink_audit_host_to_target, -}; - /* do_socket() Must return target values and target errnos. */ static abi_long do_socket(int domain, int type, int protocol) { @@ -7596,61 +6304,6 @@ static abi_long do_open_by_handle_at(abi_long mount_fd, abi_long handle, #if defined(TARGET_NR_signalfd) || defined(TARGET_NR_signalfd4) -/* signalfd siginfo conversion */ - -static void -host_to_target_signalfd_siginfo(struct signalfd_siginfo *tinfo, - const struct signalfd_siginfo *info) -{ - int sig = host_to_target_signal(info->ssi_signo); - - /* linux/signalfd.h defines a ssi_addr_lsb - * not defined in sys/signalfd.h but used by some kernels - */ - -#ifdef BUS_MCEERR_AO - if (tinfo->ssi_signo == SIGBUS && - (tinfo->ssi_code == BUS_MCEERR_AR || - tinfo->ssi_code == BUS_MCEERR_AO)) { - uint16_t *ssi_addr_lsb = (uint16_t *)(&info->ssi_addr + 1); - uint16_t *tssi_addr_lsb = (uint16_t *)(&tinfo->ssi_addr + 1); - *tssi_addr_lsb = tswap16(*ssi_addr_lsb); - } -#endif - - tinfo->ssi_signo = tswap32(sig); - tinfo->ssi_errno = tswap32(tinfo->ssi_errno); - tinfo->ssi_code = tswap32(info->ssi_code); - tinfo->ssi_pid = tswap32(info->ssi_pid); - tinfo->ssi_uid = tswap32(info->ssi_uid); - tinfo->ssi_fd = tswap32(info->ssi_fd); - tinfo->ssi_tid = tswap32(info->ssi_tid); - tinfo->ssi_band = tswap32(info->ssi_band); - tinfo->ssi_overrun = tswap32(info->ssi_overrun); - tinfo->ssi_trapno = tswap32(info->ssi_trapno); - tinfo->ssi_status = tswap32(info->ssi_status); - tinfo->ssi_int = tswap32(info->ssi_int); - tinfo->ssi_ptr = tswap64(info->ssi_ptr); - tinfo->ssi_utime = tswap64(info->ssi_utime); - tinfo->ssi_stime = tswap64(info->ssi_stime); - tinfo->ssi_addr = tswap64(info->ssi_addr); -} - -static abi_long host_to_target_data_signalfd(void *buf, size_t len) -{ - int i; - - for (i = 0; i < len; i += sizeof(struct signalfd_siginfo)) { - host_to_target_signalfd_siginfo(buf + i, buf + i); - } - - return len; -} - -static TargetFdTrans target_signalfd_trans = { - .host_to_target_data = host_to_target_data_signalfd, -}; - static abi_long do_signalfd4(int fd, abi_long mask, int flags) { int host_flags; @@ -7976,55 +6629,6 @@ static target_timer_t get_timer_id(abi_long arg) return timerid; } -static abi_long swap_data_eventfd(void *buf, size_t len) -{ - uint64_t *counter = buf; - int i; - - if (len < sizeof(uint64_t)) { - return -EINVAL; - } - - for (i = 0; i < len; i += sizeof(uint64_t)) { - *counter = tswap64(*counter); - counter++; - } - - return len; -} - -static TargetFdTrans target_eventfd_trans = { - .host_to_target_data = swap_data_eventfd, - .target_to_host_data = swap_data_eventfd, -}; - -#if (defined(TARGET_NR_inotify_init) && defined(__NR_inotify_init)) || \ - (defined(CONFIG_INOTIFY1) && defined(TARGET_NR_inotify_init1) && \ - defined(__NR_inotify_init1)) -static abi_long host_to_target_data_inotify(void *buf, size_t len) -{ - struct inotify_event *ev; - int i; - uint32_t name_len; - - for (i = 0; i < len; i += sizeof(struct inotify_event) + name_len) { - ev = (struct inotify_event *)((char *)buf + i); - name_len = ev->len; - - ev->wd = tswap32(ev->wd); - ev->mask = tswap32(ev->mask); - ev->cookie = tswap32(ev->cookie); - ev->len = tswap32(name_len); - } - - return len; -} - -static TargetFdTrans target_inotify_trans = { - .host_to_target_data = host_to_target_data_inotify, -}; -#endif - static int target_to_host_cpu_mask(unsigned long *host_mask, size_t host_size, abi_ulong target_addr, @@ -8168,6 +6772,9 @@ static abi_long do_syscall1(void *cpu_env, int num, abi_long arg1, } return ret; case TARGET_NR_write: + if (arg2 == 0 && arg3 == 0) { + return get_errno(safe_write(arg1, 0, 0)); + } if (!(p = lock_user(VERIFY_READ, arg2, arg3, 1))) return -TARGET_EFAULT; if (fd_trans_target_to_host_data(arg1)) { @@ -9272,7 +7879,21 @@ static abi_long do_syscall1(void *cpu_env, int num, abi_long arg1, rlim.rlim_cur = target_to_host_rlim(target_rlim->rlim_cur); rlim.rlim_max = target_to_host_rlim(target_rlim->rlim_max); unlock_user_struct(target_rlim, arg2, 0); - return get_errno(setrlimit(resource, &rlim)); + /* + * If we just passed through resource limit settings for memory then + * they would also apply to QEMU's own allocations, and QEMU will + * crash or hang or die if its allocations fail. Ideally we would + * track the guest allocations in QEMU and apply the limits ourselves. + * For now, just tell the guest the call succeeded but don't actually + * limit anything. + */ + if (resource != RLIMIT_AS && + resource != RLIMIT_DATA && + resource != RLIMIT_STACK) { + return get_errno(setrlimit(resource, &rlim)); + } else { + return 0; + } } #endif #ifdef TARGET_NR_getrlimit diff --git a/linux-user/syscall_defs.h b/linux-user/syscall_defs.h index 40bb60ef4c..18d434d6dc 100644 --- a/linux-user/syscall_defs.h +++ b/linux-user/syscall_defs.h @@ -203,6 +203,11 @@ struct target_ip_mreq_source { uint32_t imr_sourceaddr; }; +struct target_linger { + abi_int l_onoff; /* Linger active */ + abi_int l_linger; /* How long to linger for */ +}; + struct target_timeval { abi_long tv_sec; abi_long tv_usec; diff --git a/migration/migration.c b/migration/migration.c index 05d0a7296a..d6ae879dc8 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -758,6 +758,18 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) info->xbzrle_cache->overflow = xbzrle_counters.overflow; } + if (migrate_use_compression()) { + info->has_compression = true; + info->compression = g_malloc0(sizeof(*info->compression)); + info->compression->pages = compression_counters.pages; + info->compression->busy = compression_counters.busy; + info->compression->busy_rate = compression_counters.busy_rate; + info->compression->compressed_size = + compression_counters.compressed_size; + info->compression->compression_rate = + compression_counters.compression_rate; + } + if (cpu_throttle_active()) { info->has_cpu_throttle_percentage = true; info->cpu_throttle_percentage = cpu_throttle_get_percentage(); @@ -2268,7 +2280,10 @@ out: */ if (postcopy_pause_return_path_thread(ms)) { /* Reload rp, reset the rest */ - rp = ms->rp_state.from_dst_file; + if (rp != ms->rp_state.from_dst_file) { + qemu_fclose(rp); + rp = ms->rp_state.from_dst_file; + } ms->rp_state.error = false; goto retry; } diff --git a/migration/ram.c b/migration/ram.c index f6fd8e5e09..bc38d98cc3 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -301,10 +301,19 @@ struct RAMState { uint64_t num_dirty_pages_period; /* xbzrle misses since the beginning of the period */ uint64_t xbzrle_cache_miss_prev; - /* number of iterations at the beginning of period */ - uint64_t iterations_prev; - /* Iterations since start */ - uint64_t iterations; + + /* compression statistics since the beginning of the period */ + /* amount of count that no free thread to compress data */ + uint64_t compress_thread_busy_prev; + /* amount bytes after compression */ + uint64_t compressed_size_prev; + /* amount of compressed pages */ + uint64_t compress_pages_prev; + + /* total handled target pages at the beginning of period */ + uint64_t target_page_count_prev; + /* total handled target pages since start */ + uint64_t target_page_count; /* number of dirty bits in the bitmap */ uint64_t migration_dirty_pages; /* protects modification of the bitmap */ @@ -338,6 +347,8 @@ struct PageSearchStatus { }; typedef struct PageSearchStatus PageSearchStatus; +CompressionStats compression_counters; + struct CompressParam { bool done; bool quit; @@ -420,28 +431,14 @@ static void *do_data_compress(void *opaque) return NULL; } -static inline void terminate_compression_threads(void) -{ - int idx, thread_count; - - thread_count = migrate_compress_threads(); - - for (idx = 0; idx < thread_count; idx++) { - qemu_mutex_lock(&comp_param[idx].mutex); - comp_param[idx].quit = true; - qemu_cond_signal(&comp_param[idx].cond); - qemu_mutex_unlock(&comp_param[idx].mutex); - } -} - static void compress_threads_save_cleanup(void) { int i, thread_count; - if (!migrate_use_compression()) { + if (!migrate_use_compression() || !comp_param) { return; } - terminate_compression_threads(); + thread_count = migrate_compress_threads(); for (i = 0; i < thread_count; i++) { /* @@ -451,6 +448,12 @@ static void compress_threads_save_cleanup(void) if (!comp_param[i].file) { break; } + + qemu_mutex_lock(&comp_param[i].mutex); + comp_param[i].quit = true; + qemu_cond_signal(&comp_param[i].cond); + qemu_mutex_unlock(&comp_param[i].mutex); + qemu_thread_join(compress_threads + i); qemu_mutex_destroy(&comp_param[i].mutex); qemu_cond_destroy(&comp_param[i].cond); @@ -648,8 +651,8 @@ static int multifd_recv_initial_packet(QIOChannel *c, Error **errp) return -1; } - be32_to_cpus(&msg.magic); - be32_to_cpus(&msg.version); + msg.magic = be32_to_cpu(msg.magic); + msg.version = be32_to_cpu(msg.version); if (msg.magic != MULTIFD_MAGIC) { error_setg(errp, "multifd: received packet magic %x " @@ -734,7 +737,7 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) RAMBlock *block; int i; - be32_to_cpus(&packet->magic); + packet->magic = be32_to_cpu(packet->magic); if (packet->magic != MULTIFD_MAGIC) { error_setg(errp, "multifd: received packet " "magic %x and expected magic %x", @@ -742,7 +745,7 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) return -1; } - be32_to_cpus(&packet->version); + packet->version = be32_to_cpu(packet->version); if (packet->version != MULTIFD_VERSION) { error_setg(errp, "multifd: received packet " "version %d and expected version %d", @@ -752,7 +755,7 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) p->flags = be32_to_cpu(packet->flags); - be32_to_cpus(&packet->size); + packet->size = be32_to_cpu(packet->size); if (packet->size > migrate_multifd_page_count()) { error_setg(errp, "multifd: received packet " "with size %d and expected maximum size %d", @@ -1592,21 +1595,42 @@ uint64_t ram_pagesize_summary(void) static void migration_update_rates(RAMState *rs, int64_t end_time) { - uint64_t iter_count = rs->iterations - rs->iterations_prev; + uint64_t page_count = rs->target_page_count - rs->target_page_count_prev; + double compressed_size; /* calculate period counters */ ram_counters.dirty_pages_rate = rs->num_dirty_pages_period * 1000 / (end_time - rs->time_last_bitmap_sync); - if (!iter_count) { + if (!page_count) { return; } if (migrate_use_xbzrle()) { xbzrle_counters.cache_miss_rate = (double)(xbzrle_counters.cache_miss - - rs->xbzrle_cache_miss_prev) / iter_count; + rs->xbzrle_cache_miss_prev) / page_count; rs->xbzrle_cache_miss_prev = xbzrle_counters.cache_miss; } + + if (migrate_use_compression()) { + compression_counters.busy_rate = (double)(compression_counters.busy - + rs->compress_thread_busy_prev) / page_count; + rs->compress_thread_busy_prev = compression_counters.busy; + + compressed_size = compression_counters.compressed_size - + rs->compressed_size_prev; + if (compressed_size) { + double uncompressed_size = (compression_counters.pages - + rs->compress_pages_prev) * TARGET_PAGE_SIZE; + + /* Compression-Ratio = Uncompressed-size / Compressed-size */ + compression_counters.compression_rate = + uncompressed_size / compressed_size; + + rs->compress_pages_prev = compression_counters.pages; + rs->compressed_size_prev = compression_counters.compressed_size; + } + } } static void migration_bitmap_sync(RAMState *rs) @@ -1662,7 +1686,7 @@ static void migration_bitmap_sync(RAMState *rs) migration_update_rates(rs, end_time); - rs->iterations_prev = rs->iterations; + rs->target_page_count_prev = rs->target_page_count; /* reset period counters */ rs->time_last_bitmap_sync = end_time; @@ -1888,17 +1912,25 @@ exit: static void update_compress_thread_counts(const CompressParam *param, int bytes_xmit) { + ram_counters.transferred += bytes_xmit; + if (param->zero_page) { ram_counters.duplicate++; + return; } - ram_counters.transferred += bytes_xmit; + + /* 8 means a header with RAM_SAVE_FLAG_CONTINUE. */ + compression_counters.compressed_size += bytes_xmit - 8; + compression_counters.pages++; } +static bool save_page_use_compression(RAMState *rs); + static void flush_compressed_data(RAMState *rs) { int idx, len, thread_count; - if (!migrate_use_compression()) { + if (!save_page_use_compression(rs)) { return; } thread_count = migrate_compress_threads(); @@ -1996,17 +2028,22 @@ static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again) pss->page = 0; pss->block = QLIST_NEXT_RCU(pss->block, next); if (!pss->block) { + /* + * If memory migration starts over, we will meet a dirtied page + * which may still exists in compression threads's ring, so we + * should flush the compressed data to make sure the new page + * is not overwritten by the old one in the destination. + * + * Also If xbzrle is on, stop using the data compression at this + * point. In theory, xbzrle can do better than compression. + */ + flush_compressed_data(rs); + /* Hit the end of the list */ pss->block = QLIST_FIRST_RCU(&ram_list.blocks); /* Flag that we've looped */ pss->complete_round = true; rs->ram_bulk_stage = false; - if (migrate_use_xbzrle()) { - /* If xbzrle is on, stop using the data compression at this - * point. In theory, xbzrle can do better than compression. - */ - flush_compressed_data(rs); - } } /* Didn't find anything this time, but try again on the new block */ *again = true; @@ -2259,6 +2296,7 @@ static bool save_compress_page(RAMState *rs, RAMBlock *block, ram_addr_t offset) return true; } + compression_counters.busy++; return false; } @@ -2372,7 +2410,8 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss, * * Called within an RCU critical section. * - * Returns the number of pages written where zero means no dirty pages + * Returns the number of pages written where zero means no dirty pages, + * or negative on error * * @rs: current RAM state * @last_stage: if we are at the completion stage @@ -3196,7 +3235,13 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) done = 1; break; } - rs->iterations++; + + if (pages < 0) { + qemu_file_set_error(f, pages); + break; + } + + rs->target_page_count += pages; /* we want to check in the 1st loop, just in case it was the 1st time and we had to sync the dirty bitmap. @@ -3212,7 +3257,6 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) } i++; } - flush_compressed_data(rs); rcu_read_unlock(); /* @@ -3238,7 +3282,7 @@ out: /** * ram_save_complete: function called to send the remaining amount of ram * - * Returns zero to indicate success + * Returns zero to indicate success or negative on error * * Called with iothread lock * @@ -3249,6 +3293,7 @@ static int ram_save_complete(QEMUFile *f, void *opaque) { RAMState **temp = opaque; RAMState *rs = *temp; + int ret = 0; rcu_read_lock(); @@ -3269,6 +3314,10 @@ static int ram_save_complete(QEMUFile *f, void *opaque) if (pages == 0) { break; } + if (pages < 0) { + ret = pages; + break; + } } flush_compressed_data(rs); @@ -3280,7 +3329,7 @@ static int ram_save_complete(QEMUFile *f, void *opaque) qemu_put_be64(f, RAM_SAVE_FLAG_EOS); qemu_fflush(f); - return 0; + return ret; } static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size, diff --git a/migration/ram.h b/migration/ram.h index 457bf54b8c..a139066846 100644 --- a/migration/ram.h +++ b/migration/ram.h @@ -36,6 +36,7 @@ extern MigrationStats ram_counters; extern XBZRLECacheStats xbzrle_counters; +extern CompressionStats compression_counters; int xbzrle_cache_resize(int64_t new_size, Error **errp); uint64_t ram_bytes_remaining(void); diff --git a/migration/rdma.c b/migration/rdma.c index ae07515e83..9b2e7e10aa 100644 --- a/migration/rdma.c +++ b/migration/rdma.c @@ -4012,7 +4012,7 @@ static void rdma_accept_incoming_migration(void *opaque) void rdma_start_incoming_migration(const char *host_port, Error **errp) { int ret; - RDMAContext *rdma, *rdma_return_path; + RDMAContext *rdma, *rdma_return_path = NULL; Error *local_err = NULL; trace_rdma_start_incoming_migration(); diff --git a/migration/savevm.c b/migration/savevm.c index 13e51f0e34..2d10e45582 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -1679,6 +1679,7 @@ static void *postcopy_ram_listen_thread(void *opaque) qemu_loadvm_state_cleanup(); rcu_unregister_thread(); + mis->have_listen_thread = false; return NULL; } @@ -2078,7 +2079,9 @@ qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis) /* Find savevm section */ se = find_se(idstr, instance_id); if (se == NULL) { - error_report("Unknown savevm section or instance '%s' %d", + error_report("Unknown savevm section or instance '%s' %d. " + "Make sure that your current VM setup matches your " + "saved VM setup, including any hotplugged devices", idstr, instance_id); return -EINVAL; } @@ -2330,11 +2333,13 @@ int qemu_loadvm_state(QEMUFile *f) if (migrate_get_current()->send_configuration) { if (qemu_get_byte(f) != QEMU_VM_CONFIGURATION) { error_report("Configuration section missing"); + qemu_loadvm_state_cleanup(); return -EINVAL; } ret = vmstate_load_state(f, &vmstate_configuration, &savevm_state, 0); if (ret) { + qemu_loadvm_state_cleanup(); return ret; } } diff --git a/nbd/server.c b/nbd/server.c index ea5fe0eb33..c3dd402b45 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -1844,37 +1844,68 @@ static int coroutine_fn nbd_co_send_sparse_read(NBDClient *client, return ret; } -static int blockstatus_to_extent_be(BlockDriverState *bs, uint64_t offset, - uint64_t bytes, NBDExtent *extent) +/* + * Populate @extents from block status. Update @bytes to be the actual + * length encoded (which may be smaller than the original), and update + * @nb_extents to the number of extents used. + * + * Returns zero on success and -errno on bdrv_block_status_above failure. + */ +static int blockstatus_to_extents(BlockDriverState *bs, uint64_t offset, + uint64_t *bytes, NBDExtent *extents, + unsigned int *nb_extents) { - uint64_t remaining_bytes = bytes; + uint64_t remaining_bytes = *bytes; + NBDExtent *extent = extents, *extents_end = extents + *nb_extents; + bool first_extent = true; + assert(*nb_extents); while (remaining_bytes) { uint32_t flags; int64_t num; int ret = bdrv_block_status_above(bs, NULL, offset, remaining_bytes, &num, NULL, NULL); + if (ret < 0) { return ret; } flags = (ret & BDRV_BLOCK_ALLOCATED ? 0 : NBD_STATE_HOLE) | (ret & BDRV_BLOCK_ZERO ? NBD_STATE_ZERO : 0); + offset += num; + remaining_bytes -= num; - if (remaining_bytes == bytes) { + if (first_extent) { extent->flags = flags; + extent->length = num; + first_extent = false; + continue; } - if (flags != extent->flags) { - break; + if (flags == extent->flags) { + /* extend current extent */ + extent->length += num; + } else { + if (extent + 1 == extents_end) { + break; + } + + /* start new extent */ + extent++; + extent->flags = flags; + extent->length = num; } + } - offset += num; - remaining_bytes -= num; + extents_end = extent + 1; + + for (extent = extents; extent < extents_end; extent++) { + cpu_to_be32s(&extent->flags); + cpu_to_be32s(&extent->length); } - cpu_to_be32s(&extent->flags); - extent->length = cpu_to_be32(bytes - remaining_bytes); + *bytes -= remaining_bytes; + *nb_extents = extents_end - extents; return 0; } @@ -1910,21 +1941,29 @@ static int nbd_co_send_extents(NBDClient *client, uint64_t handle, /* Get block status from the exported device and send it to the client */ static int nbd_co_send_block_status(NBDClient *client, uint64_t handle, BlockDriverState *bs, uint64_t offset, - uint32_t length, bool last, - uint32_t context_id, Error **errp) + uint32_t length, bool dont_fragment, + bool last, uint32_t context_id, + Error **errp) { int ret; - NBDExtent extent; + unsigned int nb_extents = dont_fragment ? 1 : NBD_MAX_BITMAP_EXTENTS; + NBDExtent *extents = g_new(NBDExtent, nb_extents); + uint64_t final_length = length; - ret = blockstatus_to_extent_be(bs, offset, length, &extent); + ret = blockstatus_to_extents(bs, offset, &final_length, extents, + &nb_extents); if (ret < 0) { + g_free(extents); return nbd_co_send_structured_error( client, handle, -ret, "can't get block status", errp); } - return nbd_co_send_extents(client, handle, &extent, 1, - be32_to_cpu(extent.length), last, - context_id, errp); + ret = nbd_co_send_extents(client, handle, extents, nb_extents, + final_length, last, context_id, errp); + + g_free(extents); + + return ret; } /* @@ -1951,6 +1990,8 @@ static unsigned int bitmap_to_extents(BdrvDirtyBitmap *bitmap, uint64_t offset, assert(begin < overall_end && nb_extents); while (begin < overall_end && i < nb_extents) { + bool next_dirty = !dirty; + if (dirty) { end = bdrv_dirty_bitmap_next_zero(bitmap, begin); } else { @@ -1962,6 +2003,7 @@ static unsigned int bitmap_to_extents(BdrvDirtyBitmap *bitmap, uint64_t offset, end = MIN(bdrv_dirty_bitmap_size(bitmap), begin + UINT32_MAX + 1 - bdrv_dirty_bitmap_granularity(bitmap)); + next_dirty = dirty; } if (dont_fragment && end > overall_end) { end = overall_end; @@ -1971,7 +2013,7 @@ static unsigned int bitmap_to_extents(BdrvDirtyBitmap *bitmap, uint64_t offset, extents[i].flags = cpu_to_be32(dirty ? NBD_STATE_DIRTY : 0); i++; begin = end; - dirty = !dirty; + dirty = next_dirty; } bdrv_dirty_iter_free(it); @@ -2228,10 +2270,12 @@ static coroutine_fn int nbd_handle_request(NBDClient *client, (client->export_meta.base_allocation || client->export_meta.bitmap)) { + bool dont_fragment = request->flags & NBD_CMD_FLAG_REQ_ONE; + if (client->export_meta.base_allocation) { ret = nbd_co_send_block_status(client, request->handle, blk_bs(exp->blk), request->from, - request->len, + request->len, dont_fragment, !client->export_meta.bitmap, NBD_META_ID_BASE_ALLOCATION, errp); @@ -2244,7 +2288,7 @@ static coroutine_fn int nbd_handle_request(NBDClient *client, ret = nbd_co_send_bitmap(client, request->handle, client->exp->export_bitmap, request->from, request->len, - request->flags & NBD_CMD_FLAG_REQ_ONE, + dont_fragment, true, NBD_META_ID_DIRTY_BITMAP, errp); if (ret < 0) { return ret; @@ -984,6 +984,10 @@ static int net_client_init1(const void *object, bool is_netdev, Error **errp) /* missing optional values have been initialized to "all bits zero" */ name = net->has_id ? net->id : net->name; + if (net->has_name) { + warn_report("The 'name' parameter is deprecated, use 'id' instead"); + } + /* Map the old options to the new flat type */ switch (opts->type) { case NET_LEGACY_OPTIONS_TYPE_NONE: diff --git a/net/slirp.c b/net/slirp.c index c18060f778..c93b64dd91 100644 --- a/net/slirp.c +++ b/net/slirp.c @@ -404,6 +404,8 @@ static SlirpState *slirp_lookup(Monitor *mon, const char *hub_id, monitor_printf(mon, "unrecognized (hub-id, stackname) pair\n"); return NULL; } + warn_report("Using 'hub-id' is deprecated, specify the netdev id " + "directly instead"); } else { nc = qemu_find_netdev(name); if (!nc) { diff --git a/pc-bios/hppa-firmware.img b/pc-bios/hppa-firmware.img Binary files differindex 4ec0dbfc4a..4ec0dbfc4a 100755..100644 --- a/pc-bios/hppa-firmware.img +++ b/pc-bios/hppa-firmware.img diff --git a/pc-bios/palcode-clipper b/pc-bios/palcode-clipper Binary files differindex 1df377a0fd..1df377a0fd 100755..100644 --- a/pc-bios/palcode-clipper +++ b/pc-bios/palcode-clipper diff --git a/pc-bios/u-boot-sam460-20100605.bin b/pc-bios/u-boot-sam460-20100605.bin Binary files differindex e17de77c19..e17de77c19 100755..100644 --- a/pc-bios/u-boot-sam460-20100605.bin +++ b/pc-bios/u-boot-sam460-20100605.bin diff --git a/pc-bios/u-boot.e500 b/pc-bios/u-boot.e500 Binary files differindex 25537f8fe3..25537f8fe3 100755..100644 --- a/pc-bios/u-boot.e500 +++ b/pc-bios/u-boot.e500 diff --git a/qapi/block-core.json b/qapi/block-core.json index ac3b48ee54..cfb37f8c1d 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -1935,6 +1935,8 @@ ## # @x-block-dirty-bitmap-merge: # +# FIXME: Rename @src_name and @dst_name to src-name and dst-name. +# # Merge @src_name dirty bitmap to @dst_name dirty bitmap. @src_name dirty # bitmap is unchanged. On error, @dst_name is unchanged. # @@ -2895,7 +2897,9 @@ # # @cache-clean-interval: clean unused entries in the L2 and refcount # caches. The interval is in seconds. The default value -# is 0 and it disables this feature (since 2.5) +# is 600 on supporting platforms, and 0 on other +# platforms. 0 disables this feature. (since 2.5) +# # @encrypt: Image decryption options. Mandatory for # encrypted images, except when doing a metadata-only # probe of the image. (since 2.10) diff --git a/qapi/migration.json b/qapi/migration.json index f62d3f9a4b..6e8c21258a 100644 --- a/qapi/migration.json +++ b/qapi/migration.json @@ -76,6 +76,27 @@ 'overflow': 'int' } } ## +# @CompressionStats: +# +# Detailed migration compression statistics +# +# @pages: amount of pages compressed and transferred to the target VM +# +# @busy: count of times that no free thread was available to compress data +# +# @busy-rate: rate of thread busy +# +# @compressed-size: amount of bytes after compression +# +# @compression-rate: rate of compressed size +# +# Since: 3.1 +## +{ 'struct': 'CompressionStats', + 'data': {'pages': 'int', 'busy': 'int', 'busy-rate': 'number', + 'compressed-size': 'int', 'compression-rate': 'number' } } + +## # @MigrationStatus: # # An enumeration of migration status. @@ -172,6 +193,8 @@ # only present when the postcopy-blocktime migration capability # is enabled. (Since 3.0) # +# @compression: migration compression statistics, only returned if compression +# feature is on and status is 'active' or 'completed' (Since 3.1) # # Since: 0.14.0 ## @@ -186,7 +209,8 @@ '*cpu-throttle-percentage': 'int', '*error-desc': 'str', '*postcopy-blocktime' : 'uint32', - '*postcopy-vcpu-blocktime': ['uint32']} } + '*postcopy-vcpu-blocktime': ['uint32'], + '*compression': 'CompressionStats'} } ## # @query-migrate: diff --git a/qapi/ui.json b/qapi/ui.json index 4ca91bb45a..bf9e157d5a 100644 --- a/qapi/ui.json +++ b/qapi/ui.json @@ -1022,12 +1022,20 @@ # GTK display options. # # @grab-on-hover: Grab keyboard input on mouse hover. +# @zoom-to-fit: Zoom guest display to fit into the host window. When +# turned off the host window will be resized instead. +# In case the display device can notify the guest on +# window resizes (virtio-gpu) this will default to "on", +# assuming the guest will resize the display to match +# the window size then. Otherwise it defaults to "off". +# Since 3.1 # # Since: 2.12 # ## { 'struct' : 'DisplayGTK', - 'data' : { '*grab-on-hover' : 'bool' } } + 'data' : { '*grab-on-hover' : 'bool', + '*zoom-to-fit' : 'bool' } } ## # @DisplayGLMode: diff --git a/qemu-deprecated.texi b/qemu-deprecated.texi index 2283fc52c3..16ff946b55 100644 --- a/qemu-deprecated.texi +++ b/qemu-deprecated.texi @@ -35,6 +35,11 @@ which is the default. @section System emulator command line arguments +@subsection -machine enforce-config-section=on|off (since 3.1) + +The @option{enforce-config-section} parameter is replaced by the +@option{-global migration.send-configuration=@var{on|off}} option. + @subsection -no-kvm (since 1.3.0) The ``-no-kvm'' argument is now a synonym for setting @@ -83,6 +88,11 @@ The 'file' driver for drives is no longer appropriate for character or host devices and will only accept regular files (S_IFREG). The correct driver for these file types is 'host_cdrom' or 'host_device' as appropriate. +@subsection -net ...,name=@var{name} (since 3.1) + +The @option{name} parameter of the @option{-net} option is a synonym +for the @option{id} parameter, which should now be used instead. + @section QEMU Machine Protocol (QMP) commands @subsection block-dirty-bitmap-add "autoload" parameter (since 2.12.0) @@ -99,6 +109,13 @@ The ``query-cpus'' command is replaced by the ``query-cpus-fast'' command. The ``arch'' output member of the ``query-cpus-fast'' command is replaced by the ``target'' output member. +@section System emulator human monitor commands + +@subsection The hub_id parameter of 'hostfwd_add' / 'hostfwd_remove' (since 3.1) + +The @option{[hub_id name]} parameter tuple of the 'hostfwd_add' and +'hostfwd_remove' HMP commands has been replaced by @option{netdev_id}. + @section System emulator devices @subsection ivshmem (since 2.6.0) diff --git a/qemu-doc.texi b/qemu-doc.texi index 7bd449f398..f7ad1dfe4b 100644 --- a/qemu-doc.texi +++ b/qemu-doc.texi @@ -943,7 +943,7 @@ for details Bulk-only transport storage device, see @url{https://git.qemu.org/?p=qemu.git;a=blob_plain;f=docs/usb-storage.txt,usb-storage.txt} for details here, too -@item usb-mtp,x-root=@var{dir} +@item usb-mtp,rootdir=@var{dir} Media transfer protocol device, using @var{dir} as root of the file tree that is presented to the guest. @item usb-host,hostbus=@var{bus},hostaddr=@var{addr} diff --git a/qemu-edid.c b/qemu-edid.c new file mode 100644 index 0000000000..ae8b8a6d9b --- /dev/null +++ b/qemu-edid.c @@ -0,0 +1,120 @@ +/* + * QEMU EDID test tool. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#include "qemu/osdep.h" +#include "qemu-common.h" +#include "qemu/bswap.h" +#include "qemu/cutils.h" +#include "hw/display/edid.h" + +static qemu_edid_info info; + +static void usage(FILE *out) +{ + fprintf(out, + "\n" + "This is a test tool for the qemu edid generator.\n" + "\n" + "Typically you'll pipe the output into edid-decode\n" + "to check if the generator works correctly.\n" + "\n" + "usage: qemu-edid <options>\n" + "options:\n" + " -h print this text\n" + " -o <file> set output file (stdout by default)\n" + " -v <vendor> set monitor vendor (three letters)\n" + " -n <name> set monitor name\n" + " -s <serial> set monitor serial\n" + " -d <dpi> set display resolution\n" + " -x <prefx> set preferred width\n" + " -y <prefy> set preferred height\n" + " -X <maxx> set maximum width\n" + " -Y <maxy> set maximum height\n" + "\n"); +} + +int main(int argc, char *argv[]) +{ + FILE *outfile = NULL; + uint8_t blob[256]; + int rc; + + for (;;) { + rc = getopt(argc, argv, "ho:x:y:X:Y:d:v:n:s:"); + if (rc == -1) { + break; + } + switch (rc) { + case 'o': + if (outfile) { + fprintf(stderr, "outfile specified twice\n"); + exit(1); + } + outfile = fopen(optarg, "w"); + if (outfile == NULL) { + fprintf(stderr, "open %s: %s\n", optarg, strerror(errno)); + exit(1); + } + break; + case 'x': + if (qemu_strtoui(optarg, NULL, 10, &info.prefx) < 0) { + fprintf(stderr, "not a number: %s\n", optarg); + exit(1); + } + break; + case 'y': + if (qemu_strtoui(optarg, NULL, 10, &info.prefy) < 0) { + fprintf(stderr, "not a number: %s\n", optarg); + exit(1); + } + break; + case 'X': + if (qemu_strtoui(optarg, NULL, 10, &info.maxx) < 0) { + fprintf(stderr, "not a number: %s\n", optarg); + exit(1); + } + break; + case 'Y': + if (qemu_strtoui(optarg, NULL, 10, &info.maxy) < 0) { + fprintf(stderr, "not a number: %s\n", optarg); + exit(1); + } + break; + case 'd': + if (qemu_strtoui(optarg, NULL, 10, &info.dpi) < 0) { + fprintf(stderr, "not a number: %s\n", optarg); + exit(1); + } + break; + case 'v': + info.vendor = optarg; + break; + case 'n': + info.name = optarg; + break; + case 's': + info.serial = optarg; + break; + case 'h': + usage(stdout); + exit(0); + default: + usage(stderr); + exit(1); + } + } + + if (outfile == NULL) { + outfile = stdout; + } + + memset(blob, 0, sizeof(blob)); + qemu_edid_generate(blob, sizeof(blob), &info); + fwrite(blob, sizeof(blob), 1, outfile); + fflush(outfile); + + exit(0); +} diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c index 5bf5f28178..db0b3ee5ef 100644 --- a/qemu-io-cmds.c +++ b/qemu-io-cmds.c @@ -2025,7 +2025,7 @@ static int reopen_f(BlockBackend *blk, int argc, char **argv) return -EINVAL; } - if (writethrough != blk_enable_write_cache(blk) && + if (!writethrough != blk_enable_write_cache(blk) && blk_get_attached_dev(blk)) { error_report("Cannot change cache.writeback: Device attached"); diff --git a/qemu-options.hx b/qemu-options.hx index a642ad297f..f139459e80 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -732,19 +732,23 @@ image file) @item cache-size The maximum total size of the L2 table and refcount block caches in bytes -(default: 1048576 bytes or 8 clusters, whichever is larger) +(default: the sum of l2-cache-size and refcount-cache-size) @item l2-cache-size The maximum size of the L2 table cache in bytes -(default: 4/5 of the total cache size) +(default: if cache-size is not specified - 32M on Linux platforms, and 8M on +non-Linux platforms; otherwise, as large as possible within the cache-size, +while permitting the requested or the minimal refcount cache size) @item refcount-cache-size The maximum size of the refcount block cache in bytes -(default: 1/5 of the total cache size) +(default: 4 times the cluster size; or if cache-size is specified, the part of +it which is not used for the L2 cache) @item cache-clean-interval Clean unused entries in the L2 and refcount caches. The interval is in seconds. -The default value is 0 and it disables this feature. +The default value is 600 on supporting platforms, and 0 on other platforms. +Setting it to 0 disables this feature. @item pass-discard-request Whether discard requests to the qcow2 device should be forwarded to the data diff --git a/qemu-seccomp.c b/qemu-seccomp.c index 4729eb107f..1baa5c69ed 100644 --- a/qemu-seccomp.c +++ b/qemu-seccomp.c @@ -282,7 +282,24 @@ static QemuOptsList qemu_sandbox_opts = { static void seccomp_register(void) { - qemu_add_opts(&qemu_sandbox_opts); + bool add = false; + + /* FIXME: use seccomp_api_get() >= 2 check when released */ + +#if defined(SECCOMP_FILTER_FLAG_TSYNC) + int check; + + /* check host TSYNC capability, it returns errno == ENOSYS if unavailable */ + check = qemu_seccomp(SECCOMP_SET_MODE_FILTER, + SECCOMP_FILTER_FLAG_TSYNC, NULL); + if (check < 0 && errno == EFAULT) { + add = true; + } +#endif + + if (add) { + qemu_add_opts(&qemu_sandbox_opts); + } } opts_init(seccomp_register); #endif diff --git a/replay/replay-char.c b/replay/replay-char.c index 736cc8c2e6..736cc8c2e6 100755..100644 --- a/replay/replay-char.c +++ b/replay/replay-char.c diff --git a/scripts/device-crash-test b/scripts/device-crash-test index e6c233e9bf..7045594bd4 100755 --- a/scripts/device-crash-test +++ b/scripts/device-crash-test @@ -125,7 +125,7 @@ ERROR_WHITELIST = [ {'device':'tpm-tis', 'expected':True}, # tpm_tis: backend driver with id (null) could not be found {'device':'unimplemented-device', 'expected':True}, # property 'size' not specified or zero {'device':'usb-braille', 'expected':True}, # Property chardev is required - {'device':'usb-mtp', 'expected':True}, # x-root property must be configured + {'device':'usb-mtp', 'expected':True}, # rootdir property must be configured {'device':'usb-redir', 'expected':True}, # Parameter 'chardev' is missing {'device':'usb-serial', 'expected':True}, # Property chardev is required {'device':'usb-storage', 'expected':True}, # drive property not set diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c index a91e4f1313..436195894b 100644 --- a/tcg/i386/tcg-target.inc.c +++ b/tcg/i386/tcg-target.inc.c @@ -302,11 +302,7 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type, return 0; } -#if TCG_TARGET_REG_BITS == 64 # define LOWREGMASK(x) ((x) & 7) -#else -# define LOWREGMASK(x) (x) -#endif #define P_EXT 0x100 /* 0x0f opcode prefix */ #define P_EXT38 0x200 /* 0x0f 0x38 opcode prefix */ diff --git a/tests/Makefile.include b/tests/Makefile.include index 87c81d1dcc..d0c0a92e67 100644 --- a/tests/Makefile.include +++ b/tests/Makefile.include @@ -255,12 +255,8 @@ check-qtest-pci-$(CONFIG_IVSHMEM_DEVICE) += tests/ivshmem-test$(EXESUF) gcov-files-pci-$(CONFIG_IVSHMEM_DEVICE) += hw/misc/ivshmem.c check-qtest-pci-y += tests/megasas-test$(EXESUF) gcov-files-pci-y += hw/scsi/megasas.c -check-qtest-$(CONFIG_VMXNET3_PCI) += tests/vmxnet3-test$(EXESUF) -gcov-files-$(CONFIG_VMXNET3_PCI) += hw/net/vmxnet3.c -check-qtest-$(CONFIG_ISA_TESTDEV) = tests/endianness-test$(EXESUF) -check-qtest-$(CONFIG_WDT_IB700) += tests/wdt_ib700-test$(EXESUF) -gcov-files-$(CONFIG_WDT_IB700) += hw/watchdog/watchdog.c hw/watchdog/wdt_ib700.c +check-qtest-i386-$(CONFIG_ISA_TESTDEV) = tests/endianness-test$(EXESUF) check-qtest-i386-y += tests/fdc-test$(EXESUF) gcov-files-i386-y = hw/block/fdc.c check-qtest-i386-y += tests/ide-test$(EXESUF) @@ -277,9 +273,13 @@ check-qtest-i386-y += tests/ipmi-bt-test$(EXESUF) check-qtest-i386-y += tests/i440fx-test$(EXESUF) check-qtest-i386-y += tests/fw_cfg-test$(EXESUF) check-qtest-i386-y += tests/drive_del-test$(EXESUF) +check-qtest-i386-$(CONFIG_WDT_IB700) += tests/wdt_ib700-test$(EXESUF) +gcov-files-i386-$(CONFIG_WDT_IB700) += hw/watchdog/watchdog.c hw/watchdog/wdt_ib700.c check-qtest-i386-y += tests/tco-test$(EXESUF) check-qtest-i386-y += $(check-qtest-pci-y) gcov-files-i386-y += $(gcov-files-pci-y) +check-qtest-i386-$(CONFIG_VMXNET3_PCI) += tests/vmxnet3-test$(EXESUF) +gcov-files-i386-$(CONFIG_VMXNET3_PCI) += hw/net/vmxnet3.c gcov-files-i386-y += hw/net/net_rx_pkt.c gcov-files-i386-y += hw/net/net_tx_pkt.c check-qtest-i386-$(CONFIG_PVPANIC) += tests/pvpanic-test$(EXESUF) @@ -332,8 +332,15 @@ check-qtest-m68k-y = tests/boot-serial-test$(EXESUF) check-qtest-microblaze-y = tests/boot-serial-test$(EXESUF) +check-qtest-mips-$(CONFIG_ISA_TESTDEV) = tests/endianness-test$(EXESUF) + +check-qtest-mips64-$(CONFIG_ISA_TESTDEV) = tests/endianness-test$(EXESUF) + +check-qtest-mips64el-$(CONFIG_ISA_TESTDEV) = tests/endianness-test$(EXESUF) + check-qtest-moxie-y = tests/boot-serial-test$(EXESUF) +check-qtest-ppc-$(CONFIG_ISA_TESTDEV) = tests/endianness-test$(EXESUF) check-qtest-ppc-y += tests/boot-order-test$(EXESUF) check-qtest-ppc-y += tests/prom-env-test$(EXESUF) check-qtest-ppc-y += tests/drive_del-test$(EXESUF) @@ -366,11 +373,16 @@ check-qtest-ppc64-$(CONFIG_IVSHMEM_DEVICE) += tests/ivshmem-test$(EXESUF) gcov-files-ppc64-$(CONFIG_IVSHMEM_DEVICE) += hw/misc/ivshmem.c check-qtest-ppc64-y += tests/cpu-plug-test$(EXESUF) +check-qtest-sh4-$(CONFIG_ISA_TESTDEV) = tests/endianness-test$(EXESUF) + +check-qtest-sh4eb-$(CONFIG_ISA_TESTDEV) = tests/endianness-test$(EXESUF) + check-qtest-sparc-y = tests/prom-env-test$(EXESUF) check-qtest-sparc-y += tests/m48t59-test$(EXESUF) gcov-files-sparc-y = hw/timer/m48t59.c check-qtest-sparc-y += tests/boot-serial-test$(EXESUF) +check-qtest-sparc64-$(CONFIG_ISA_TESTDEV) = tests/endianness-test$(EXESUF) check-qtest-sparc64-y += tests/prom-env-test$(EXESUF) check-qtest-sparc64-y += tests/boot-serial-test$(EXESUF) diff --git a/tests/migration-test.c b/tests/migration-test.c index 0e687b7512..20f38f1930 100644 --- a/tests/migration-test.c +++ b/tests/migration-test.c @@ -21,11 +21,13 @@ #include "chardev/char.h" #include "sysemu/sysemu.h" +#include "migration/migration-test.h" + /* TODO actually test the results and get rid of this */ #define qtest_qmp_discard_response(...) qobject_unref(qtest_qmp(__VA_ARGS__)) -const unsigned start_address = 1024 * 1024; -const unsigned end_address = 100 * 1024 * 1024; +unsigned start_address; +unsigned end_address; bool got_stop; static bool uffd_feature_thread_id; @@ -80,10 +82,10 @@ static bool ufd_version_check(void) static const char *tmpfs; -/* A simple PC boot sector that modifies memory (1-100MB) quickly - * outputting a 'B' every so often if it's still running. +/* The boot file modifies memory area in [start_address, end_address) + * repeatedly. It outputs a 'B' at a fixed rate while it's still running. */ -#include "tests/migration/x86-a-b-bootblock.h" +#include "tests/migration/i386/a-b-bootblock.h" static void init_bootfile_x86(const char *bootpath) { @@ -270,11 +272,11 @@ static void wait_for_migration_pass(QTestState *who) static void check_guests_ram(QTestState *who) { /* Our ASM test will have been incrementing one byte from each page from - * 1MB to <100MB in order. - * This gives us a constraint that any page's byte should be equal or less - * than the previous pages byte (mod 256); and they should all be equal - * except for one transition at the point where we meet the incrementer. - * (We're running this with the guest stopped). + * start_address to < end_address in order. This gives us a constraint + * that any page's byte should be equal or less than the previous pages + * byte (mod 256); and they should all be equal except for one transition + * at the point where we meet the incrementer. (We're running this with + * the guest stopped). */ unsigned address; uint8_t first_byte; @@ -285,7 +287,8 @@ static void check_guests_ram(QTestState *who) qtest_memread(who, start_address, &first_byte, 1); last_byte = first_byte; - for (address = start_address + 4096; address < end_address; address += 4096) + for (address = start_address + TEST_MEM_PAGE_SIZE; address < end_address; + address += TEST_MEM_PAGE_SIZE) { uint8_t b; qtest_memread(who, address, &b, 1); @@ -437,12 +440,14 @@ static int test_migrate_start(QTestState **from, QTestState **to, " -drive file=%s,format=raw" " -incoming %s", accel, tmpfs, bootpath, uri); + start_address = X86_TEST_MEM_START; + end_address = X86_TEST_MEM_END; } else if (strcmp(arch, "ppc64") == 0) { - cmd_src = g_strdup_printf("-machine accel=%s -m 256M" + cmd_src = g_strdup_printf("-machine accel=%s -m 256M -nodefaults" " -name source,debug-threads=on" " -serial file:%s/src_serial" - " -prom-env '" - "boot-command=hex .\" _\" begin %x %x " + " -prom-env 'use-nvramrc?=true' -prom-env " + "'nvramrc=hex .\" _\" begin %x %x " "do i c@ 1 + i c! 1000 +loop .\" B\" 0 " "until'", accel, tmpfs, end_address, start_address); @@ -451,6 +456,9 @@ static int test_migrate_start(QTestState **from, QTestState **to, " -serial file:%s/dest_serial" " -incoming %s", accel, tmpfs, uri); + + start_address = PPC_TEST_MEM_START; + end_address = PPC_TEST_MEM_END; } else { g_assert_not_reached(); } diff --git a/tests/migration/Makefile b/tests/migration/Makefile new file mode 100644 index 0000000000..dc3b551976 --- /dev/null +++ b/tests/migration/Makefile @@ -0,0 +1,35 @@ +# +# Copyright (c) 2018 Red Hat, Inc. and/or its affiliates +# +# This work is licensed under the terms of the GNU GPL, version 2 or later. +# See the COPYING file in the top-level directory. +# + +TARGET_LIST = i386 + +SRC_PATH = ../.. + +override define __note +/* This file is automatically generated from the assembly file in + * tests/migration/$@. Edit that file and then run "make all" + * inside tests/migration to update, and then remember to send both + * the header and the assembler differences in your patch submission. + */ +endef +export __note + +find-arch-cross-cc = $(lastword $(shell grep -h "CROSS_CC_GUEST=" $(wildcard $(SRC_PATH)/$(patsubst i386,*86*,$(1))-softmmu/config-target.mak) /dev/null)) +parse-cross-prefix = $(subst gcc,,$(patsubst cc,gcc,$(patsubst CROSS_CC_GUEST="%",%,$(call find-arch-cross-cc,$(1))))) +gen-cross-prefix = $(patsubst %-,CROSS_PREFIX=%-,$(call parse-cross-prefix,$(1))) + +.PHONY: all $(TARGET_LIST) + +all: $(TARGET_LIST) + +$(TARGET_LIST): + $(MAKE) -C $@ $(call gen-cross-prefix,$@) + +clean: + for target in $(TARGET_LIST); do \ + $(MAKE) -C $$target clean; \ + done diff --git a/tests/migration/i386/Makefile b/tests/migration/i386/Makefile new file mode 100644 index 0000000000..5c0324134a --- /dev/null +++ b/tests/migration/i386/Makefile @@ -0,0 +1,22 @@ +# To specify cross compiler prefix, use CROSS_PREFIX= +# $ make CROSS_PREFIX=x86_64-linux-gnu- + +.PHONY: all clean +all: a-b-bootblock.h + +a-b-bootblock.h: x86.bootsect + echo "$$__note" > header.tmp + xxd -i $< | sed -e 's/.*int.*//' >> header.tmp + mv header.tmp $@ + +x86.bootsect: x86.boot + dd if=$< of=$@ bs=256 count=2 skip=124 + +x86.boot: x86.o + $(CROSS_PREFIX)objcopy -O binary $< $@ + +x86.o: a-b-bootblock.S + $(CROSS_PREFIX)gcc -m32 -march=i486 -c $< -o $@ + +clean: + @rm -rf *.boot *.o *.bootsect diff --git a/tests/migration/x86-a-b-bootblock.s b/tests/migration/i386/a-b-bootblock.S index b1642641a7..3f97f28023 100644 --- a/tests/migration/x86-a-b-bootblock.s +++ b/tests/migration/i386/a-b-bootblock.S @@ -3,10 +3,6 @@ # range. # Outputs an initial 'A' on serial followed by repeated 'B's # -# run tests/migration/rebuild-x86-bootblock.sh -# to regenerate the hex, and remember to include both the .h and .s -# in any patches. -# # Copyright (c) 2016 Red Hat, Inc. and/or its affiliates # This work is licensed under the terms of the GNU GPL, version 2 or later. # See the COPYING file in the top-level directory. diff --git a/tests/migration/x86-a-b-bootblock.h b/tests/migration/i386/a-b-bootblock.h index 78a151fe2a..7d459d4fde 100644 --- a/tests/migration/x86-a-b-bootblock.h +++ b/tests/migration/i386/a-b-bootblock.h @@ -1,7 +1,7 @@ -/* This file is automatically generated from - * tests/migration/x86-a-b-bootblock.s, edit that and then run - * tests/migration/rebuild-x86-bootblock.sh to update, - * and then remember to send both in your patch submission. +/* This file is automatically generated from the assembly file in + * tests/migration/i386. Edit that file and then run "make all" + * inside tests/migration to update, and then remember to send both + * the header and the assembler differences in your patch submission. */ unsigned char x86_bootsect[] = { 0xfa, 0x0f, 0x01, 0x16, 0x74, 0x7c, 0x66, 0xb8, 0x01, 0x00, 0x00, 0x00, diff --git a/tests/migration/migration-test.h b/tests/migration/migration-test.h new file mode 100644 index 0000000000..c4c0c526b6 --- /dev/null +++ b/tests/migration/migration-test.h @@ -0,0 +1,21 @@ +/* + * Copyright (c) 2018 Red Hat, Inc. and/or its affiliates + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#ifndef _TEST_MIGRATION_H_ +#define _TEST_MIGRATION_H_ + +/* Common */ +#define TEST_MEM_PAGE_SIZE 4096 + +/* x86 */ +#define X86_TEST_MEM_START (1 * 1024 * 1024) +#define X86_TEST_MEM_END (100 * 1024 * 1024) + +/* PPC */ +#define PPC_TEST_MEM_START (1 * 1024 * 1024) +#define PPC_TEST_MEM_END (100 * 1024 * 1024) + +#endif /* _TEST_MIGRATION_H_ */ diff --git a/tests/migration/rebuild-x86-bootblock.sh b/tests/migration/rebuild-x86-bootblock.sh deleted file mode 100755 index 86cec5d284..0000000000 --- a/tests/migration/rebuild-x86-bootblock.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/sh -# Copyright (c) 2016-2018 Red Hat, Inc. and/or its affiliates -# This work is licensed under the terms of the GNU GPL, version 2 or later. -# See the COPYING file in the top-level directory. -# -# Author: dgilbert@redhat.com - -ASMFILE=$PWD/tests/migration/x86-a-b-bootblock.s -HEADER=$PWD/tests/migration/x86-a-b-bootblock.h - -if [ ! -e "$ASMFILE" ] -then - echo "Couldn't find $ASMFILE" >&2 - exit 1 -fi - -ASM_WORK_DIR=$(mktemp -d --tmpdir X86BB.XXXXXX) -cd "$ASM_WORK_DIR" && -as --32 -march=i486 "$ASMFILE" -o x86.o && -objcopy -O binary x86.o x86.boot && -dd if=x86.boot of=x86.bootsect bs=256 count=2 skip=124 && -xxd -i x86.bootsect | -sed -e 's/.*int.*//' > x86.hex && -cat - x86.hex <<HERE > "$HEADER" -/* This file is automatically generated from - * tests/migration/x86-a-b-bootblock.s, edit that and then run - * tests/migration/rebuild-x86-bootblock.sh to update, - * and then remember to send both in your patch submission. - */ -HERE - -rm x86.hex x86.bootsect x86.boot x86.o -cd .. && rmdir "$ASM_WORK_DIR" diff --git a/tests/qemu-iotests/067.out b/tests/qemu-iotests/067.out index 2e71cff3ce..b10c71db03 100644 --- a/tests/qemu-iotests/067.out +++ b/tests/qemu-iotests/067.out @@ -385,6 +385,7 @@ Testing: -device virtio-scsi -device scsi-cd,id=cd0 { "return": [ { + "io-status": "ok", "device": "", "locked": false, "removable": true, diff --git a/tests/qemu-iotests/137 b/tests/qemu-iotests/137 index 87965625d8..19e8597306 100755 --- a/tests/qemu-iotests/137 +++ b/tests/qemu-iotests/137 @@ -109,7 +109,6 @@ $QEMU_IO \ -c "reopen -o cache-size=1M,l2-cache-size=64k,refcount-cache-size=64k" \ -c "reopen -o cache-size=1M,l2-cache-size=2M" \ -c "reopen -o cache-size=1M,refcount-cache-size=2M" \ - -c "reopen -o l2-cache-size=256T" \ -c "reopen -o l2-cache-entry-size=33k" \ -c "reopen -o l2-cache-entry-size=128k" \ -c "reopen -o refcount-cache-size=256T" \ @@ -119,6 +118,13 @@ $QEMU_IO \ -c "reopen -o cache-clean-interval=-1" \ "$TEST_IMG" | _filter_qemu_io +IMGOPTS="cluster_size=256k" _make_test_img 32P +$QEMU_IO \ + -c "reopen -o l2-cache-entry-size=512,l2-cache-size=1T" \ + "$TEST_IMG" | _filter_qemu_io + +_make_test_img 64M + echo echo === Test transaction semantics === echo diff --git a/tests/qemu-iotests/137.out b/tests/qemu-iotests/137.out index 6a2ffc71fd..2c080b72f3 100644 --- a/tests/qemu-iotests/137.out +++ b/tests/qemu-iotests/137.out @@ -19,7 +19,6 @@ Parameter 'lazy-refcounts' expects 'on' or 'off' cache-size, l2-cache-size and refcount-cache-size may not be set at the same time l2-cache-size may not exceed cache-size refcount-cache-size may not exceed cache-size -L2 cache size too big L2 cache entry size must be a power of two between 512 and the cluster size (65536) L2 cache entry size must be a power of two between 512 and the cluster size (65536) Refcount cache size too big @@ -27,6 +26,9 @@ Conflicting values for qcow2 options 'overlap-check' ('constant') and 'overlap-c Unsupported value 'blubb' for qcow2 option 'overlap-check'. Allowed are any of the following: none, constant, cached, all Unsupported value 'blubb' for qcow2 option 'overlap-check'. Allowed are any of the following: none, constant, cached, all Cache clean interval too big +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=36028797018963968 +L2 cache size too big +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 === Test transaction semantics === diff --git a/tests/qemu-iotests/153.out b/tests/qemu-iotests/153.out index 93eaf10486..884254868c 100644 --- a/tests/qemu-iotests/153.out +++ b/tests/qemu-iotests/153.out @@ -12,11 +12,11 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=33554432 backing_file=TEST_DIR/t == Launching another QEMU, opts: '' == QEMU_PROG: -drive file=TEST_DIR/t.qcow2,if=none,: Failed to get "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? == Launching another QEMU, opts: 'read-only=on' == QEMU_PROG: -drive file=TEST_DIR/t.qcow2,if=none,read-only=on: Failed to get shared "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? == Launching another QEMU, opts: 'read-only=on,force-share=on' == @@ -24,77 +24,77 @@ Is another process using the image? _qemu_io_wrapper -c read 0 512 TEST_DIR/t.qcow2 can't open device TEST_DIR/t.qcow2: Failed to get "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? _qemu_io_wrapper -r -c read 0 512 TEST_DIR/t.qcow2 can't open device TEST_DIR/t.qcow2: Failed to get shared "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? _qemu_io_wrapper -c open TEST_DIR/t.qcow2 -c read 0 512 can't open device TEST_DIR/t.qcow2: Failed to get "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? no file open, try 'help open' _qemu_io_wrapper -c open -r TEST_DIR/t.qcow2 -c read 0 512 can't open device TEST_DIR/t.qcow2: Failed to get shared "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? no file open, try 'help open' _qemu_img_wrapper info TEST_DIR/t.qcow2 qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get shared "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? _qemu_img_wrapper check TEST_DIR/t.qcow2 qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get shared "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? _qemu_img_wrapper compare TEST_DIR/t.qcow2 TEST_DIR/t.qcow2 qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get shared "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? _qemu_img_wrapper map TEST_DIR/t.qcow2 qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get shared "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? _qemu_img_wrapper amend -o TEST_DIR/t.qcow2 qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? _qemu_img_wrapper commit TEST_DIR/t.qcow2 qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? _qemu_img_wrapper resize TEST_DIR/t.qcow2 32M qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? _qemu_img_wrapper rebase TEST_DIR/t.qcow2 -b TEST_DIR/t.qcow2.base qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? _qemu_img_wrapper snapshot -l TEST_DIR/t.qcow2 qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get shared "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? _qemu_img_wrapper convert TEST_DIR/t.qcow2 TEST_DIR/t.qcow2.convert qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get shared "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? _qemu_img_wrapper dd if=TEST_DIR/t.qcow2 of=TEST_DIR/t.qcow2.convert bs=512 count=1 qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get shared "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? _qemu_img_wrapper bench -c 1 TEST_DIR/t.qcow2 qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get shared "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? _qemu_img_wrapper bench -w -c 1 TEST_DIR/t.qcow2 qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? _qemu_img_wrapper create -f qcow2 TEST_DIR/t.qcow2 -b TEST_DIR/t.qcow2.base qemu-img: TEST_DIR/t.qcow2: Failed to get "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? file format: IMGFMT == Running utility commands -U == @@ -132,7 +132,7 @@ Try 'qemu-img --help' for more information _qemu_img_wrapper rebase -U TEST_DIR/t.qcow2 -b TEST_DIR/t.qcow2.base qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? _qemu_img_wrapper snapshot -l -U TEST_DIR/t.qcow2 @@ -157,7 +157,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=33554432 backing_file=TEST_DIR/t == Launching another QEMU, opts: '' == QEMU_PROG: -drive file=TEST_DIR/t.qcow2,if=none,: Failed to get "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? == Launching another QEMU, opts: 'read-only=on' == @@ -167,13 +167,13 @@ Is another process using the image? _qemu_io_wrapper -c read 0 512 TEST_DIR/t.qcow2 can't open device TEST_DIR/t.qcow2: Failed to get "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? _qemu_io_wrapper -r -c read 0 512 TEST_DIR/t.qcow2 _qemu_io_wrapper -c open TEST_DIR/t.qcow2 -c read 0 512 can't open device TEST_DIR/t.qcow2: Failed to get "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? no file open, try 'help open' _qemu_io_wrapper -c open -r TEST_DIR/t.qcow2 -c read 0 512 @@ -188,19 +188,19 @@ _qemu_img_wrapper map TEST_DIR/t.qcow2 _qemu_img_wrapper amend -o TEST_DIR/t.qcow2 qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? _qemu_img_wrapper commit TEST_DIR/t.qcow2 qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? _qemu_img_wrapper resize TEST_DIR/t.qcow2 32M qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? _qemu_img_wrapper rebase TEST_DIR/t.qcow2 -b TEST_DIR/t.qcow2.base qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? _qemu_img_wrapper snapshot -l TEST_DIR/t.qcow2 @@ -212,11 +212,11 @@ _qemu_img_wrapper bench -c 1 TEST_DIR/t.qcow2 _qemu_img_wrapper bench -w -c 1 TEST_DIR/t.qcow2 qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? _qemu_img_wrapper create -f qcow2 TEST_DIR/t.qcow2 -b TEST_DIR/t.qcow2.base qemu-img: TEST_DIR/t.qcow2: Failed to get "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? file format: IMGFMT == Running utility commands -U == @@ -254,7 +254,7 @@ Try 'qemu-img --help' for more information _qemu_img_wrapper rebase -U TEST_DIR/t.qcow2 -b TEST_DIR/t.qcow2.base qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? _qemu_img_wrapper snapshot -l -U TEST_DIR/t.qcow2 @@ -372,17 +372,17 @@ Round done == Two devices with the same image (read-only=off - read-only=off) == QEMU_PROG: -drive if=none,file=TEST_DIR/t.qcow2,read-only=off: Failed to get "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? == Two devices with the same image (read-only=off - read-only=on) == QEMU_PROG: -drive if=none,file=TEST_DIR/t.qcow2,read-only=on: Failed to get shared "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? == Two devices with the same image (read-only=off - read-only=on,force-share=on) == == Two devices with the same image (read-only=on - read-only=off) == QEMU_PROG: -drive if=none,file=TEST_DIR/t.qcow2,read-only=off: Failed to get "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? == Two devices with the same image (read-only=on - read-only=on) == @@ -403,13 +403,13 @@ Formatting 'TEST_DIR/t.IMGFMT.c', fmt=IMGFMT size=33554432 backing_file=TEST_DIR == Backing image also as an active device == QEMU_PROG: -drive if=none,file=TEST_DIR/t.qcow2: Failed to get "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? == Backing image also as an active device (ro) == == Symbolic link == QEMU_PROG: -drive if=none,file=TEST_DIR/t.qcow2: Failed to get "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? == Active commit to intermediate layer should work when base in use == {"return": {}} @@ -420,7 +420,7 @@ Adding drive _qemu_io_wrapper TEST_DIR/t.qcow2 -c write 0 512 can't open device TEST_DIR/t.qcow2: Failed to get "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? Creating overlay with qemu-img when the guest is running should be allowed _qemu_img_wrapper create -f qcow2 -b TEST_DIR/t.qcow2 TEST_DIR/t.qcow2.overlay @@ -433,7 +433,7 @@ _qemu_img_wrapper info TEST_DIR/t.qcow2 _qemu_io_wrapper TEST_DIR/t.qcow2 -c write 0 512 can't open device TEST_DIR/t.qcow2: Failed to get "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? Closing the other _qemu_io_wrapper TEST_DIR/t.qcow2 -c write 0 512 diff --git a/tests/qemu-iotests/182.out b/tests/qemu-iotests/182.out index 23a4dbf809..f1463c8862 100644 --- a/tests/qemu-iotests/182.out +++ b/tests/qemu-iotests/182.out @@ -4,5 +4,5 @@ Starting QEMU Starting a second QEMU using the same image should fail QEMU_PROG: -drive file=TEST_DIR/t.qcow2,if=none,id=drive0,file.locking=on: Failed to get "write" lock -Is another process using the image? +Is another process using the image [TEST_DIR/t.qcow2]? *** done diff --git a/tests/qht-bench.c b/tests/qht-bench.c index f492b3a20a..2089e2bed1 100644 --- a/tests/qht-bench.c +++ b/tests/qht-bench.c @@ -53,6 +53,7 @@ static unsigned long resize_delay = 1000; static double resize_rate; /* 0.0 to 1.0 */ static unsigned int n_rz_threads = 1; static QemuThread *rz_threads; +static bool precompute_hash; static double update_rate; /* 0.0 to 1.0 */ static uint64_t update_threshold; @@ -101,11 +102,18 @@ static bool is_equal(const void *ap, const void *bp) return *a == *b; } -static inline uint32_t h(unsigned long v) +static uint32_t h(unsigned long v) { return tb_hash_func7(v, 0, 0, 0, 0); } +static uint32_t hval(unsigned long v) +{ + return v; +} + +static uint32_t (*hfunc)(unsigned long v) = h; + /* * From: https://en.wikipedia.org/wiki/Xorshift * This is faster than rand_r(), and gives us a wider range (RAND_MAX is only @@ -149,7 +157,7 @@ static void do_rw(struct thread_info *info) bool read; p = &keys[info->r & (lookup_range - 1)]; - hash = h(*p); + hash = hfunc(*p); read = qht_lookup(&ht, p, hash); if (read) { stats->rd++; @@ -158,7 +166,7 @@ static void do_rw(struct thread_info *info) } } else { p = &keys[info->r & (update_range - 1)]; - hash = h(*p); + hash = hfunc(*p); if (info->write_op) { bool written = false; @@ -289,7 +297,9 @@ static void htable_init(void) /* avoid allocating memory later by allocating all the keys now */ keys = g_malloc(sizeof(*keys) * n); for (i = 0; i < n; i++) { - keys[i] = populate_offset + i; + long val = populate_offset + i; + + keys[i] = precompute_hash ? h(val) : hval(val); } /* some sanity checks */ @@ -321,7 +331,7 @@ static void htable_init(void) r = xorshift64star(r); p = &keys[r & (init_range - 1)]; - hash = h(*p); + hash = hfunc(*p); if (qht_insert(&ht, p, hash, NULL)) { break; } @@ -412,7 +422,7 @@ static void parse_args(int argc, char *argv[]) int c; for (;;) { - c = getopt(argc, argv, "d:D:g:k:K:l:hn:N:o:r:Rs:S:u:"); + c = getopt(argc, argv, "d:D:g:k:K:l:hn:N:o:pr:Rs:S:u:"); if (c < 0) { break; } @@ -451,6 +461,10 @@ static void parse_args(int argc, char *argv[]) case 'o': populate_offset = atol(optarg); break; + case 'p': + precompute_hash = true; + hfunc = hval; + break; case 'r': update_range = pow2ceil(atol(optarg)); break; diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c index c9f29c8b10..ee1740ff06 100644 --- a/tests/test-bdrv-drain.c +++ b/tests/test-bdrv-drain.c @@ -694,6 +694,8 @@ static void test_iothread_common(enum drain_type drain_type, int drain_thread) s->bh_indirection_ctx = ctx_b; aio_ret = -EINPROGRESS; + qemu_event_reset(&done_event); + if (drain_thread == 0) { acb = blk_aio_preadv(blk, 0, &qiov, 0, test_iothread_aio_cb, &aio_ret); } else { @@ -723,7 +725,6 @@ static void test_iothread_common(enum drain_type drain_type, int drain_thread) * but the drain in this thread can continue immediately after * bdrv_dec_in_flight() and aio_ret might be assigned only slightly * later. */ - qemu_event_reset(&done_event); do_drain_begin(drain_type, bs); g_assert_cmpint(bs->in_flight, ==, 0); @@ -743,7 +744,6 @@ static void test_iothread_common(enum drain_type drain_type, int drain_thread) } break; case 1: - qemu_event_reset(&done_event); aio_bh_schedule_oneshot(ctx_a, test_iothread_drain_entry, &data); qemu_event_wait(&done_event); break; diff --git a/tests/test-qht.c b/tests/test-qht.c index dda6a067be..4d23cefab6 100644 --- a/tests/test-qht.c +++ b/tests/test-qht.c @@ -41,7 +41,7 @@ static void insert(int a, int b) } } -static void rm(int init, int end) +static void do_rm(int init, int end, bool exist) { int i; @@ -49,10 +49,24 @@ static void rm(int init, int end) uint32_t hash; hash = arr[i]; - g_assert_true(qht_remove(&ht, &arr[i], hash)); + if (exist) { + g_assert_true(qht_remove(&ht, &arr[i], hash)); + } else { + g_assert_false(qht_remove(&ht, &arr[i], hash)); + } } } +static void rm(int init, int end) +{ + do_rm(init, end, true); +} + +static void rm_nonexist(int init, int end) +{ + do_rm(init, end, false); +} + static void check(int a, int b, bool expected) { struct qht_stats stats; @@ -84,7 +98,7 @@ static void check(int a, int b, bool expected) qht_statistics_destroy(&stats); } -static void count_func(struct qht *ht, void *p, uint32_t hash, void *userp) +static void count_func(void *p, uint32_t hash, void *userp) { unsigned int *curr = userp; @@ -108,14 +122,79 @@ static void iter_check(unsigned int count) g_assert_cmpuint(curr, ==, count); } +static void sum_func(void *p, uint32_t hash, void *userp) +{ + uint32_t *sum = userp; + uint32_t a = *(uint32_t *)p; + + *sum += a; +} + +static void iter_sum_check(unsigned int expected) +{ + unsigned int sum = 0; + + qht_iter(&ht, sum_func, &sum); + g_assert_cmpuint(sum, ==, expected); +} + +static bool rm_mod_func(void *p, uint32_t hash, void *userp) +{ + uint32_t a = *(uint32_t *)p; + unsigned int mod = *(unsigned int *)userp; + + return a % mod == 0; +} + +static void iter_rm_mod(unsigned int mod) +{ + qht_iter_remove(&ht, rm_mod_func, &mod); +} + +static void iter_rm_mod_check(unsigned int mod) +{ + unsigned int expected = 0; + unsigned int i; + + for (i = 0; i < N; i++) { + if (i % mod == 0) { + continue; + } + expected += i; + } + iter_sum_check(expected); +} + static void qht_do_test(unsigned int mode, size_t init_entries) { /* under KVM we might fetch stats from an uninitialized qht */ check_n(0); qht_init(&ht, is_equal, 0, mode); + rm_nonexist(0, 4); + /* + * Test that we successfully delete the last element in a bucket. + * This is a hard-to-reach code path when resizing is on, but without + * resizing we can easily hit it if init_entries <= 1. + * Given that the number of elements per bucket can be 4 or 6 depending on + * the host's pointer size, test the removal of the 4th and 6th elements. + */ + insert(0, 4); + rm_nonexist(5, 6); + rm(3, 4); + check_n(3); + insert(3, 6); + rm(5, 6); + check_n(5); + rm_nonexist(7, 8); + iter_rm_mod(1); + + if (!(mode & QHT_MODE_AUTO_RESIZE)) { + qht_resize(&ht, init_entries * 4 + 4); + } check_n(0); + rm_nonexist(0, 10); insert(0, N); check(0, N, true); check_n(N); @@ -138,8 +217,12 @@ static void qht_do_test(unsigned int mode, size_t init_entries) insert(10, 150); check_n(N); - rm(1, 2); - check_n(N - 1); + qht_reset(&ht); + insert(0, N); + rm_nonexist(N, N + 32); + iter_rm_mod(10); + iter_rm_mod_check(10); + check_n(N * 9 / 10); qht_reset_size(&ht, 0); check_n(0); check(0, N, false); diff --git a/tests/test-replication.c b/tests/test-replication.c index c8165ae954..f085d1993a 100644 --- a/tests/test-replication.c +++ b/tests/test-replication.c @@ -207,13 +207,17 @@ static BlockBackend *start_primary(void) static void teardown_primary(void) { BlockBackend *blk; + AioContext *ctx; /* remove P_ID */ blk = blk_by_name(P_ID); assert(blk); + ctx = blk_get_aio_context(blk); + aio_context_acquire(ctx); monitor_remove_blk(blk); blk_unref(blk); + aio_context_release(ctx); } static void test_primary_read(void) @@ -365,20 +369,27 @@ static void teardown_secondary(void) { /* only need to destroy two BBs */ BlockBackend *blk; + AioContext *ctx; /* remove S_LOCAL_DISK_ID */ blk = blk_by_name(S_LOCAL_DISK_ID); assert(blk); + ctx = blk_get_aio_context(blk); + aio_context_acquire(ctx); monitor_remove_blk(blk); blk_unref(blk); + aio_context_release(ctx); /* remove S_ID */ blk = blk_by_name(S_ID); assert(blk); + ctx = blk_get_aio_context(blk); + aio_context_acquire(ctx); monitor_remove_blk(blk); blk_unref(blk); + aio_context_release(ctx); } static void test_secondary_read(void) diff --git a/tests/vm/basevm.py b/tests/vm/basevm.py index 7e58d9e0ca..cafbc6b3a5 100755 --- a/tests/vm/basevm.py +++ b/tests/vm/basevm.py @@ -65,6 +65,7 @@ class BaseVM(object): self._stdout = self._devnull self._args = [ \ "-nodefaults", "-m", "4G", + "-cpu", "max", "-netdev", "user,id=vnet,hostfwd=:127.0.0.1:0-:22", "-device", "virtio-net-pci,netdev=vnet", "-vnc", "127.0.0.1:0,to=20", @@ -72,11 +73,9 @@ class BaseVM(object): if vcpus: self._args += ["-smp", str(vcpus)] if os.access("/dev/kvm", os.R_OK | os.W_OK): - self._args += ["-cpu", "host"] self._args += ["-enable-kvm"] else: logging.info("KVM not available, not using -enable-kvm") - self._args += ["-cpu", "max"] self._data_args = [] def _download_with_cache(self, url, sha256sum=None): diff --git a/ui/cursor.c b/ui/cursor.c index f3da0cee79..26ce69fe5e 100644 --- a/ui/cursor.c +++ b/ui/cursor.c @@ -128,13 +128,25 @@ void cursor_set_mono(QEMUCursor *c, uint32_t *data = c->data; uint8_t bit; int x,y,bpl; - + bool expand_bitmap_only = image == mask; + bool has_inverted_colors = false; + const uint32_t inverted = 0x80000000; + + /* + * Converts a monochrome bitmap with XOR mask 'image' and AND mask 'mask': + * https://docs.microsoft.com/en-us/windows-hardware/drivers/display/drawing-monochrome-pointers + */ bpl = cursor_get_mono_bpl(c); for (y = 0; y < c->height; y++) { bit = 0x80; for (x = 0; x < c->width; x++, data++) { if (transparent && mask[x/8] & bit) { - *data = 0x00000000; + if (!expand_bitmap_only && image[x / 8] & bit) { + *data = inverted; + has_inverted_colors = true; + } else { + *data = 0x00000000; + } } else if (!transparent && !(mask[x/8] & bit)) { *data = 0x00000000; } else if (image[x/8] & bit) { @@ -150,6 +162,32 @@ void cursor_set_mono(QEMUCursor *c, mask += bpl; image += bpl; } + + /* + * If there are any pixels with inverted colors, create an outline (fill + * transparent neighbors with the background color) and use the foreground + * color as "inverted" color. + */ + if (has_inverted_colors) { + data = c->data; + for (y = 0; y < c->height; y++) { + for (x = 0; x < c->width; x++, data++) { + if (*data == 0 /* transparent */ && + ((x > 0 && data[-1] == inverted) || + (x + 1 < c->width && data[1] == inverted) || + (y > 0 && data[-c->width] == inverted) || + (y + 1 < c->height && data[c->width] == inverted))) { + *data = 0xff000000 | background; + } + } + } + data = c->data; + for (x = 0; x < c->width * c->height; x++, data++) { + if (*data == inverted) { + *data = 0xff000000 | foreground; + } + } + } } void cursor_get_mono_image(QEMUCursor *c, int foreground, uint8_t *image) @@ -2136,6 +2136,8 @@ static GSList *gd_vc_gfx_init(GtkDisplayState *s, VirtualConsole *vc, QemuConsole *con, int idx, GSList *group, GtkWidget *view_menu) { + bool zoom_to_fit; + vc->label = qemu_console_get_label(con); vc->s = s; vc->gfx.scale_x = 1.0; @@ -2199,6 +2201,12 @@ static GSList *gd_vc_gfx_init(GtkDisplayState *s, VirtualConsole *vc, group = gd_vc_menu_init(s, vc, idx, group, view_menu); if (dpy_ui_info_supported(vc->gfx.dcl.con)) { + zoom_to_fit = true; + } + if (s->opts->u.gtk.has_zoom_to_fit) { + zoom_to_fit = s->opts->u.gtk.zoom_to_fit; + } + if (zoom_to_fit) { gtk_menu_item_activate(GTK_MENU_ITEM(s->zoom_fit_item)); s->free_scale = true; } @@ -806,7 +806,8 @@ static void sdl2_display_init(DisplayState *ds, DisplayOptions *o) for (i = 0; i < sdl2_num_outputs; i++) { QemuConsole *con = qemu_console_lookup_by_index(i); assert(con != NULL); - if (!qemu_console_is_graphic(con)) { + if (!qemu_console_is_graphic(con) && + qemu_console_get_index(con) != 0) { sdl2_console[i].hidden = true; } sdl2_console[i].idx = i; @@ -3821,9 +3821,6 @@ void vnc_display_open(const char *id, Error **errp) bool reverse = false; const char *credid; bool sasl = false; -#ifdef CONFIG_VNC_SASL - int saslErr; -#endif int acl = 0; int lock_key_sync = 1; int key_delay_ms; @@ -3963,10 +3960,14 @@ void vnc_display_open(const char *id, Error **errp) trace_vnc_auth_init(vd, 1, vd->ws_auth, vd->ws_subauth); #ifdef CONFIG_VNC_SASL - if ((saslErr = sasl_server_init(NULL, "qemu")) != SASL_OK) { - error_setg(errp, "Failed to initialize SASL auth: %s", - sasl_errstring(saslErr, NULL, NULL)); - goto fail; + if (sasl) { + int saslErr = sasl_server_init(NULL, "qemu"); + + if (saslErr != SASL_OK) { + error_setg(errp, "Failed to initialize SASL auth: %s", + sasl_errstring(saslErr, NULL, NULL)); + goto fail; + } } #endif vd->lock_key_sync = lock_key_sync; diff --git a/util/aio-posix.c b/util/aio-posix.c index 131ba6b4a8..621b3025d8 100644 --- a/util/aio-posix.c +++ b/util/aio-posix.c @@ -211,6 +211,7 @@ void aio_set_fd_handler(AioContext *ctx, AioHandler *node; bool is_new = false; bool deleted = false; + int poll_disable_change; qemu_lockcnt_lock(&ctx->list_lock); @@ -244,11 +245,9 @@ void aio_set_fd_handler(AioContext *ctx, QLIST_REMOVE(node, node); deleted = true; } - - if (!node->io_poll) { - ctx->poll_disable_cnt--; - } + poll_disable_change = -!node->io_poll; } else { + poll_disable_change = !io_poll - (node && !node->io_poll); if (node == NULL) { /* Alloc and insert if it's not already there */ node = g_new0(AioHandler, 1); @@ -257,10 +256,6 @@ void aio_set_fd_handler(AioContext *ctx, g_source_add_poll(&ctx->source, &node->pfd); is_new = true; - - ctx->poll_disable_cnt += !io_poll; - } else { - ctx->poll_disable_cnt += !io_poll - !node->io_poll; } /* Update handler with latest information */ @@ -274,6 +269,15 @@ void aio_set_fd_handler(AioContext *ctx, node->pfd.events |= (io_write ? G_IO_OUT | G_IO_ERR : 0); } + /* No need to order poll_disable_cnt writes against other updates; + * the counter is only used to avoid wasting time and latency on + * iterated polling when the system call will be ultimately necessary. + * Changing handlers is a rare event, and a little wasted polling until + * the aio_notify below is not an issue. + */ + atomic_set(&ctx->poll_disable_cnt, + atomic_read(&ctx->poll_disable_cnt) + poll_disable_change); + aio_epoll_update(ctx, node, is_new); qemu_lockcnt_unlock(&ctx->list_lock); aio_notify(ctx); @@ -486,7 +490,7 @@ static void add_pollfd(AioHandler *node) npfd++; } -static bool run_poll_handlers_once(AioContext *ctx) +static bool run_poll_handlers_once(AioContext *ctx, int64_t *timeout) { bool progress = false; AioHandler *node; @@ -494,9 +498,11 @@ static bool run_poll_handlers_once(AioContext *ctx) QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) { if (!node->deleted && node->io_poll && aio_node_check(ctx, node->is_external) && - node->io_poll(node->opaque) && - node->opaque != &ctx->notifier) { - progress = true; + node->io_poll(node->opaque)) { + *timeout = 0; + if (node->opaque != &ctx->notifier) { + progress = true; + } } /* Caller handles freeing deleted nodes. Don't do it here. */ @@ -518,31 +524,38 @@ static bool run_poll_handlers_once(AioContext *ctx) * * Returns: true if progress was made, false otherwise */ -static bool run_poll_handlers(AioContext *ctx, int64_t max_ns) +static bool run_poll_handlers(AioContext *ctx, int64_t max_ns, int64_t *timeout) { bool progress; - int64_t end_time; + int64_t start_time, elapsed_time; assert(ctx->notify_me); assert(qemu_lockcnt_count(&ctx->list_lock) > 0); - assert(ctx->poll_disable_cnt == 0); - - trace_run_poll_handlers_begin(ctx, max_ns); - end_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + max_ns; + trace_run_poll_handlers_begin(ctx, max_ns, *timeout); + start_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); do { - progress = run_poll_handlers_once(ctx); - } while (!progress && qemu_clock_get_ns(QEMU_CLOCK_REALTIME) < end_time); + progress = run_poll_handlers_once(ctx, timeout); + elapsed_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start_time; + } while (!progress && elapsed_time < max_ns + && !atomic_read(&ctx->poll_disable_cnt)); - trace_run_poll_handlers_end(ctx, progress); + /* If time has passed with no successful polling, adjust *timeout to + * keep the same ending time. + */ + if (*timeout != -1) { + *timeout -= MIN(*timeout, elapsed_time); + } + trace_run_poll_handlers_end(ctx, progress, *timeout); return progress; } /* try_poll_mode: * @ctx: the AioContext - * @blocking: busy polling is only attempted when blocking is true + * @timeout: timeout for blocking wait, computed by the caller and updated if + * polling succeeds. * * ctx->notify_me must be non-zero so this function can detect aio_notify(). * @@ -550,19 +563,16 @@ static bool run_poll_handlers(AioContext *ctx, int64_t max_ns) * * Returns: true if progress was made, false otherwise */ -static bool try_poll_mode(AioContext *ctx, bool blocking) +static bool try_poll_mode(AioContext *ctx, int64_t *timeout) { - if (blocking && ctx->poll_max_ns && ctx->poll_disable_cnt == 0) { - /* See qemu_soonest_timeout() uint64_t hack */ - int64_t max_ns = MIN((uint64_t)aio_compute_timeout(ctx), - (uint64_t)ctx->poll_ns); + /* See qemu_soonest_timeout() uint64_t hack */ + int64_t max_ns = MIN((uint64_t)*timeout, (uint64_t)ctx->poll_ns); - if (max_ns) { - poll_set_started(ctx, true); + if (max_ns && !atomic_read(&ctx->poll_disable_cnt)) { + poll_set_started(ctx, true); - if (run_poll_handlers(ctx, max_ns)) { - return true; - } + if (run_poll_handlers(ctx, max_ns, timeout)) { + return true; } } @@ -571,7 +581,7 @@ static bool try_poll_mode(AioContext *ctx, bool blocking) /* Even if we don't run busy polling, try polling once in case it can make * progress and the caller will be able to avoid ppoll(2)/epoll_wait(2). */ - return run_poll_handlers_once(ctx); + return run_poll_handlers_once(ctx, timeout); } bool aio_poll(AioContext *ctx, bool blocking) @@ -601,8 +611,14 @@ bool aio_poll(AioContext *ctx, bool blocking) start = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); } - progress = try_poll_mode(ctx, blocking); - if (!progress) { + timeout = blocking ? aio_compute_timeout(ctx) : 0; + progress = try_poll_mode(ctx, &timeout); + assert(!(timeout && progress)); + + /* If polling is allowed, non-blocking aio_poll does not need the + * system call---a single round of run_poll_handlers_once suffices. + */ + if (timeout || atomic_read(&ctx->poll_disable_cnt)) { assert(npfd == 0); /* fill pollfds */ @@ -616,8 +632,6 @@ bool aio_poll(AioContext *ctx, bool blocking) } } - timeout = blocking ? aio_compute_timeout(ctx) : 0; - /* wait until next event */ if (aio_epoll_check_poll(ctx, pollfds, npfd, timeout)) { AioHandler epoll_handler; diff --git a/util/memfd.c b/util/memfd.c index d248a53c3c..6287946b61 100644 --- a/util/memfd.c +++ b/util/memfd.c @@ -187,6 +187,7 @@ bool qemu_memfd_alloc_check(void) int fd; void *ptr; + fd = -1; ptr = qemu_memfd_alloc("test", 4096, 0, &fd, NULL); memfd_check = ptr ? MEMFD_OK : MEMFD_KO; qemu_memfd_free(ptr, 4096, fd); diff --git a/util/qht.c b/util/qht.c index 1e3a072e25..aa51be3c52 100644 --- a/util/qht.c +++ b/util/qht.c @@ -89,6 +89,19 @@ #define QHT_BUCKET_ENTRIES 4 #endif +enum qht_iter_type { + QHT_ITER_VOID, /* do nothing; use retvoid */ + QHT_ITER_RM, /* remove element if retbool returns true */ +}; + +struct qht_iter { + union { + qht_iter_func_t retvoid; + qht_iter_bool_func_t retbool; + } f; + enum qht_iter_type type; +}; + /* * Do _not_ use qemu_mutex_[try]lock directly! Use these macros, otherwise * the profiler (QSP) will deadlock. @@ -223,7 +236,7 @@ static inline void qht_head_init(struct qht_bucket *b) } static inline -struct qht_bucket *qht_map_to_bucket(struct qht_map *map, uint32_t hash) +struct qht_bucket *qht_map_to_bucket(const struct qht_map *map, uint32_t hash) { return &map->buckets[hash & (map->n_buckets - 1)]; } @@ -255,7 +268,8 @@ static void qht_map_unlock_buckets(struct qht_map *map) * Call with at least a bucket lock held. * @map should be the value read before acquiring the lock (or locks). */ -static inline bool qht_map_is_stale__locked(struct qht *ht, struct qht_map *map) +static inline bool qht_map_is_stale__locked(const struct qht *ht, + const struct qht_map *map) { return map != ht->map; } @@ -324,12 +338,12 @@ struct qht_bucket *qht_bucket_lock__no_stale(struct qht *ht, uint32_t hash, return b; } -static inline bool qht_map_needs_resize(struct qht_map *map) +static inline bool qht_map_needs_resize(const struct qht_map *map) { return atomic_read(&map->n_added_buckets) > map->n_added_buckets_threshold; } -static inline void qht_chain_destroy(struct qht_bucket *head) +static inline void qht_chain_destroy(const struct qht_bucket *head) { struct qht_bucket *curr = head->next; struct qht_bucket *prev; @@ -469,10 +483,10 @@ bool qht_reset_size(struct qht *ht, size_t n_elems) } static inline -void *qht_do_lookup(struct qht_bucket *head, qht_lookup_func_t func, +void *qht_do_lookup(const struct qht_bucket *head, qht_lookup_func_t func, const void *userp, uint32_t hash) { - struct qht_bucket *b = head; + const struct qht_bucket *b = head; int i; do { @@ -496,7 +510,7 @@ void *qht_do_lookup(struct qht_bucket *head, qht_lookup_func_t func, } static __attribute__((noinline)) -void *qht_lookup__slowpath(struct qht_bucket *b, qht_lookup_func_t func, +void *qht_lookup__slowpath(const struct qht_bucket *b, qht_lookup_func_t func, const void *userp, uint32_t hash) { unsigned int version; @@ -509,11 +523,11 @@ void *qht_lookup__slowpath(struct qht_bucket *b, qht_lookup_func_t func, return ret; } -void *qht_lookup_custom(struct qht *ht, const void *userp, uint32_t hash, +void *qht_lookup_custom(const struct qht *ht, const void *userp, uint32_t hash, qht_lookup_func_t func) { - struct qht_bucket *b; - struct qht_map *map; + const struct qht_bucket *b; + const struct qht_map *map; unsigned int version; void *ret; @@ -532,13 +546,16 @@ void *qht_lookup_custom(struct qht *ht, const void *userp, uint32_t hash, return qht_lookup__slowpath(b, func, userp, hash); } -void *qht_lookup(struct qht *ht, const void *userp, uint32_t hash) +void *qht_lookup(const struct qht *ht, const void *userp, uint32_t hash) { return qht_lookup_custom(ht, userp, hash, ht->cmp); } -/* call with head->lock held */ -static void *qht_insert__locked(struct qht *ht, struct qht_map *map, +/* + * call with head->lock held + * @ht is const since it is only used for ht->cmp() + */ +static void *qht_insert__locked(const struct qht *ht, struct qht_map *map, struct qht_bucket *head, void *p, uint32_t hash, bool *needs_resize) { @@ -632,7 +649,7 @@ bool qht_insert(struct qht *ht, void *p, uint32_t hash, void **existing) return false; } -static inline bool qht_entry_is_last(struct qht_bucket *b, int pos) +static inline bool qht_entry_is_last(const struct qht_bucket *b, int pos) { if (pos == QHT_BUCKET_ENTRIES - 1) { if (b->next == NULL) { @@ -658,7 +675,7 @@ qht_entry_move(struct qht_bucket *to, int i, struct qht_bucket *from, int j) } /* - * Find the last valid entry in @head, and swap it with @orig[pos], which has + * Find the last valid entry in @orig, and swap it with @orig[pos], which has * just been invalidated. */ static inline void qht_bucket_remove_entry(struct qht_bucket *orig, int pos) @@ -692,8 +709,7 @@ static inline void qht_bucket_remove_entry(struct qht_bucket *orig, int pos) /* call with b->lock held */ static inline -bool qht_remove__locked(struct qht_map *map, struct qht_bucket *head, - const void *p, uint32_t hash) +bool qht_remove__locked(struct qht_bucket *head, const void *p, uint32_t hash) { struct qht_bucket *b = head; int i; @@ -728,15 +744,16 @@ bool qht_remove(struct qht *ht, const void *p, uint32_t hash) qht_debug_assert(p); b = qht_bucket_lock__no_stale(ht, hash, &map); - ret = qht_remove__locked(map, b, p, hash); + ret = qht_remove__locked(b, p, hash); qht_bucket_debug__locked(b); qemu_spin_unlock(&b->lock); return ret; } -static inline void qht_bucket_iter(struct qht *ht, struct qht_bucket *b, - qht_iter_func_t func, void *userp) +static inline void qht_bucket_iter(struct qht_bucket *head, + const struct qht_iter *iter, void *userp) { + struct qht_bucket *b = head; int i; do { @@ -744,37 +761,83 @@ static inline void qht_bucket_iter(struct qht *ht, struct qht_bucket *b, if (b->pointers[i] == NULL) { return; } - func(ht, b->pointers[i], b->hashes[i], userp); + switch (iter->type) { + case QHT_ITER_VOID: + iter->f.retvoid(b->pointers[i], b->hashes[i], userp); + break; + case QHT_ITER_RM: + if (iter->f.retbool(b->pointers[i], b->hashes[i], userp)) { + /* replace i with the last valid element in the bucket */ + seqlock_write_begin(&head->sequence); + qht_bucket_remove_entry(b, i); + seqlock_write_end(&head->sequence); + qht_bucket_debug__locked(b); + /* reevaluate i, since it just got replaced */ + i--; + continue; + } + break; + default: + g_assert_not_reached(); + } } b = b->next; } while (b); } /* call with all of the map's locks held */ -static inline void qht_map_iter__all_locked(struct qht *ht, struct qht_map *map, - qht_iter_func_t func, void *userp) +static inline void qht_map_iter__all_locked(struct qht_map *map, + const struct qht_iter *iter, + void *userp) { size_t i; for (i = 0; i < map->n_buckets; i++) { - qht_bucket_iter(ht, &map->buckets[i], func, userp); + qht_bucket_iter(&map->buckets[i], iter, userp); } } -void qht_iter(struct qht *ht, qht_iter_func_t func, void *userp) +static inline void +do_qht_iter(struct qht *ht, const struct qht_iter *iter, void *userp) { struct qht_map *map; map = atomic_rcu_read(&ht->map); qht_map_lock_buckets(map); - /* Note: ht here is merely for carrying ht->mode; ht->map won't be read */ - qht_map_iter__all_locked(ht, map, func, userp); + qht_map_iter__all_locked(map, iter, userp); qht_map_unlock_buckets(map); } -static void qht_map_copy(struct qht *ht, void *p, uint32_t hash, void *userp) +void qht_iter(struct qht *ht, qht_iter_func_t func, void *userp) +{ + const struct qht_iter iter = { + .f.retvoid = func, + .type = QHT_ITER_VOID, + }; + + do_qht_iter(ht, &iter, userp); +} + +void qht_iter_remove(struct qht *ht, qht_iter_bool_func_t func, void *userp) +{ + const struct qht_iter iter = { + .f.retbool = func, + .type = QHT_ITER_RM, + }; + + do_qht_iter(ht, &iter, userp); +} + +struct qht_map_copy_data { + struct qht *ht; + struct qht_map *new; +}; + +static void qht_map_copy(void *p, uint32_t hash, void *userp) { - struct qht_map *new = userp; + struct qht_map_copy_data *data = userp; + struct qht *ht = data->ht; + struct qht_map *new = data->new; struct qht_bucket *b = qht_map_to_bucket(new, hash); /* no need to acquire b->lock because no thread has seen this map yet */ @@ -788,6 +851,11 @@ static void qht_map_copy(struct qht *ht, void *p, uint32_t hash, void *userp) static void qht_do_resize_reset(struct qht *ht, struct qht_map *new, bool reset) { struct qht_map *old; + const struct qht_iter iter = { + .f.retvoid = qht_map_copy, + .type = QHT_ITER_VOID, + }; + struct qht_map_copy_data data; old = ht->map; qht_map_lock_buckets(old); @@ -802,7 +870,9 @@ static void qht_do_resize_reset(struct qht *ht, struct qht_map *new, bool reset) } g_assert(new->n_buckets != old->n_buckets); - qht_map_iter__all_locked(ht, old, qht_map_copy, new); + data.ht = ht; + data.new = new; + qht_map_iter__all_locked(old, &iter, &data); qht_map_debug__all_locked(new); atomic_rcu_set(&ht->map, new); @@ -829,9 +899,9 @@ bool qht_resize(struct qht *ht, size_t n_elems) } /* pass @stats to qht_statistics_destroy() when done */ -void qht_statistics_init(struct qht *ht, struct qht_stats *stats) +void qht_statistics_init(const struct qht *ht, struct qht_stats *stats) { - struct qht_map *map; + const struct qht_map *map; int i; map = atomic_rcu_read(&ht->map); @@ -848,8 +918,8 @@ void qht_statistics_init(struct qht *ht, struct qht_stats *stats) stats->head_buckets = map->n_buckets; for (i = 0; i < map->n_buckets; i++) { - struct qht_bucket *head = &map->buckets[i]; - struct qht_bucket *b; + const struct qht_bucket *head = &map->buckets[i]; + const struct qht_bucket *b; unsigned int version; size_t buckets; size_t entries; diff --git a/util/qsp.c b/util/qsp.c index b0c2575d10..2de3a97594 100644 --- a/util/qsp.c +++ b/util/qsp.c @@ -533,7 +533,7 @@ static gint qsp_tree_cmp(gconstpointer ap, gconstpointer bp, gpointer up) } } -static void qsp_sort(struct qht *ht, void *p, uint32_t h, void *userp) +static void qsp_sort(void *p, uint32_t h, void *userp) { QSPEntry *e = p; GTree *tree = userp; @@ -541,7 +541,7 @@ static void qsp_sort(struct qht *ht, void *p, uint32_t h, void *userp) g_tree_insert(tree, e, NULL); } -static void qsp_aggregate(struct qht *global_ht, void *p, uint32_t h, void *up) +static void qsp_aggregate(void *p, uint32_t h, void *up) { struct qht *ht = up; const QSPEntry *e = p; @@ -553,7 +553,7 @@ static void qsp_aggregate(struct qht *global_ht, void *p, uint32_t h, void *up) qsp_entry_aggregate(agg, e); } -static void qsp_iter_diff(struct qht *orig, void *p, uint32_t hash, void *htp) +static void qsp_iter_diff(void *p, uint32_t hash, void *htp) { struct qht *ht = htp; QSPEntry *old = p; @@ -583,8 +583,7 @@ static void qsp_diff(struct qht *orig, struct qht *new) qht_iter(orig, qsp_iter_diff, new); } -static void -qsp_iter_callsite_coalesce(struct qht *orig, void *p, uint32_t h, void *htp) +static void qsp_iter_callsite_coalesce(void *p, uint32_t h, void *htp) { struct qht *ht = htp; QSPEntry *old = p; @@ -603,7 +602,7 @@ qsp_iter_callsite_coalesce(struct qht *orig, void *p, uint32_t h, void *htp) e->n_acqs += old->n_acqs; } -static void qsp_ht_delete(struct qht *ht, void *p, uint32_t h, void *htp) +static void qsp_ht_delete(void *p, uint32_t h, void *htp) { g_free(p); } diff --git a/util/trace-events b/util/trace-events index 4822434c89..79569b7fdf 100644 --- a/util/trace-events +++ b/util/trace-events @@ -1,8 +1,8 @@ # See docs/devel/tracing.txt for syntax documentation. # util/aio-posix.c -run_poll_handlers_begin(void *ctx, int64_t max_ns) "ctx %p max_ns %"PRId64 -run_poll_handlers_end(void *ctx, bool progress) "ctx %p progress %d" +run_poll_handlers_begin(void *ctx, int64_t max_ns, int64_t timeout) "ctx %p max_ns %"PRId64 " timeout %"PRId64 +run_poll_handlers_end(void *ctx, bool progress, int64_t timeout) "ctx %p progress %d new timeout %"PRId64 poll_shrink(void *ctx, int64_t old, int64_t new) "ctx %p old %"PRId64" new %"PRId64 poll_grow(void *ctx, int64_t old, int64_t new) "ctx %p old %"PRId64" new %"PRId64 @@ -3917,8 +3917,8 @@ int main(int argc, char **argv, char **envp) } #ifdef CONFIG_SECCOMP - if (qemu_opts_foreach(qemu_find_opts("sandbox"), - parse_sandbox, NULL, NULL)) { + olist = qemu_find_opts_err("sandbox", NULL); + if (olist && qemu_opts_foreach(olist, parse_sandbox, NULL, NULL)) { exit(1); } #endif @@ -4530,6 +4530,7 @@ int main(int argc, char **argv, char **envp) if (load_snapshot(loadvm, &local_err) < 0) { error_report_err(local_err); autostart = 0; + exit(1); } } |