diff options
224 files changed, 3624 insertions, 1628 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index ef2ec58a94..7df088259b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1005,6 +1005,14 @@ S: Supported F: hw/vfio/* F: include/hw/vfio/ +vfio-ccw +M: Cornelia Huck <cornelia.huck@de.ibm.com> +S: Supported +F: hw/vfio/ccw.c +F: hw/s390x/s390-ccw.c +F: include/hw/s390x/s390-ccw.h +T: git git://github.com/cohuck/qemu.git s390-next + vhost M: Michael S. Tsirkin <mst@redhat.com> S: Supported @@ -163,11 +163,16 @@ void path_combine(char *dest, int dest_size, if (path_is_absolute(filename)) { pstrcpy(dest, dest_size, filename); } else { - p = strchr(base_path, ':'); - if (p) - p++; - else - p = base_path; + const char *protocol_stripped = NULL; + + if (path_has_protocol(base_path)) { + protocol_stripped = strchr(base_path, ':'); + if (protocol_stripped) { + protocol_stripped++; + } + } + p = protocol_stripped ?: base_path; + p1 = strrchr(base_path, '/'); #ifdef _WIN32 { @@ -192,6 +197,41 @@ void path_combine(char *dest, int dest_size, } } +/* + * Helper function for bdrv_parse_filename() implementations to remove optional + * protocol prefixes (especially "file:") from a filename and for putting the + * stripped filename into the options QDict if there is such a prefix. + */ +void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix, + QDict *options) +{ + if (strstart(filename, prefix, &filename)) { + /* Stripping the explicit protocol prefix may result in a protocol + * prefix being (wrongly) detected (if the filename contains a colon) */ + if (path_has_protocol(filename)) { + QString *fat_filename; + + /* This means there is some colon before the first slash; therefore, + * this cannot be an absolute path */ + assert(!path_is_absolute(filename)); + + /* And we can thus fix the protocol detection issue by prefixing it + * by "./" */ + fat_filename = qstring_from_str("./"); + qstring_append(fat_filename, filename); + + assert(!path_has_protocol(qstring_get_str(fat_filename))); + + qdict_put(options, "filename", fat_filename); + } else { + /* If no protocol prefix was detected, we can use the shortened + * filename as-is */ + qdict_put_str(options, "filename", filename); + } + } +} + + /* Returns whether the image file is opened as read-only. Note that this can * return false and writing to the image file is still not possible because the * image is inactivated. */ diff --git a/block/backup.c b/block/backup.c index a4fb2884f9..5387fbd84e 100644 --- a/block/backup.c +++ b/block/backup.c @@ -692,7 +692,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, } if (job) { backup_clean(&job->common); - block_job_unref(&job->common); + block_job_early_fail(&job->common); } return NULL; diff --git a/block/commit.c b/block/commit.c index 76a0d98c6f..a3028b20f3 100644 --- a/block/commit.c +++ b/block/commit.c @@ -426,7 +426,7 @@ fail: if (commit_top_bs) { bdrv_set_backing_hd(overlay_bs, top, &error_abort); } - block_job_unref(&s->common); + block_job_early_fail(&s->common); } diff --git a/block/file-posix.c b/block/file-posix.c index 4354d49642..de2d3a2e3c 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -381,12 +381,7 @@ static void raw_parse_flags(int bdrv_flags, int *open_flags) static void raw_parse_filename(const char *filename, QDict *options, Error **errp) { - /* The filename does not have to be prefixed by the protocol name, since - * "file" is the default protocol; therefore, the return value of this - * function call can be ignored. */ - strstart(filename, "file:", &filename); - - qdict_put_str(options, "filename", filename); + bdrv_parse_filename_strip_prefix(filename, "file:", options); } static QemuOptsList raw_runtime_opts = { @@ -2395,10 +2390,7 @@ static int check_hdev_writable(BDRVRawState *s) static void hdev_parse_filename(const char *filename, QDict *options, Error **errp) { - /* The prefix is optional, just as for "file". */ - strstart(filename, "host_device:", &filename); - - qdict_put_str(options, "filename", filename); + bdrv_parse_filename_strip_prefix(filename, "host_device:", options); } static bool hdev_is_sg(BlockDriverState *bs) @@ -2697,10 +2689,7 @@ static BlockDriver bdrv_host_device = { static void cdrom_parse_filename(const char *filename, QDict *options, Error **errp) { - /* The prefix is optional, just as for "file". */ - strstart(filename, "host_cdrom:", &filename); - - qdict_put_str(options, "filename", filename); + bdrv_parse_filename_strip_prefix(filename, "host_cdrom:", options); } #endif diff --git a/block/file-win32.c b/block/file-win32.c index 8f14f0bdcd..ef2910b03f 100644 --- a/block/file-win32.c +++ b/block/file-win32.c @@ -276,12 +276,7 @@ static void raw_parse_flags(int flags, bool use_aio, int *access_flags, static void raw_parse_filename(const char *filename, QDict *options, Error **errp) { - /* The filename does not have to be prefixed by the protocol name, since - * "file" is the default protocol; therefore, the return value of this - * function call can be ignored. */ - strstart(filename, "file:", &filename); - - qdict_put_str(options, "filename", filename); + bdrv_parse_filename_strip_prefix(filename, "file:", options); } static QemuOptsList raw_runtime_opts = { @@ -671,10 +666,7 @@ static int hdev_probe_device(const char *filename) static void hdev_parse_filename(const char *filename, QDict *options, Error **errp) { - /* The prefix is optional, just as for "file". */ - strstart(filename, "host_device:", &filename); - - qdict_put_str(options, "filename", filename); + bdrv_parse_filename_strip_prefix(filename, "host_device:", options); } static int hdev_open(BlockDriverState *bs, QDict *options, int flags, diff --git a/block/gluster.c b/block/gluster.c index 7c76cd0988..8ba3bcca0b 100644 --- a/block/gluster.c +++ b/block/gluster.c @@ -1275,7 +1275,14 @@ static int find_allocation(BlockDriverState *bs, off_t start, if (offs < 0) { return -errno; /* D3 or D4 */ } - assert(offs >= start); + + if (offs < start) { + /* This is not a valid return by lseek(). We are safe to just return + * -EIO in this case, and we'll treat it like D4. Unfortunately some + * versions of gluster server will return offs < start, so an assert + * here will unnecessarily abort QEMU. */ + return -EIO; + } if (offs > start) { /* D2: in hole, next data at offs */ @@ -1307,7 +1314,14 @@ static int find_allocation(BlockDriverState *bs, off_t start, if (offs < 0) { return -errno; /* D1 and (H3 or H4) */ } - assert(offs >= start); + + if (offs < start) { + /* This is not a valid return by lseek(). We are safe to just return + * -EIO in this case, and we'll treat it like H4. Unfortunately some + * versions of gluster server will return offs < start, so an assert + * here will unnecessarily abort QEMU. */ + return -EIO; + } if (offs > start) { /* diff --git a/block/io.c b/block/io.c index fdd7485c22..ed31810c0a 100644 --- a/block/io.c +++ b/block/io.c @@ -26,6 +26,7 @@ #include "trace.h" #include "sysemu/block-backend.h" #include "block/blockjob.h" +#include "block/blockjob_int.h" #include "block/block_int.h" #include "qemu/cutils.h" #include "qapi/error.h" @@ -301,16 +302,9 @@ void bdrv_drain_all_begin(void) bool waited = true; BlockDriverState *bs; BdrvNextIterator it; - BlockJob *job = NULL; GSList *aio_ctxs = NULL, *ctx; - while ((job = block_job_next(job))) { - AioContext *aio_context = blk_get_aio_context(job->blk); - - aio_context_acquire(aio_context); - block_job_pause(job); - aio_context_release(aio_context); - } + block_job_pause_all(); for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { AioContext *aio_context = bdrv_get_aio_context(bs); @@ -354,7 +348,6 @@ void bdrv_drain_all_end(void) { BlockDriverState *bs; BdrvNextIterator it; - BlockJob *job = NULL; for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { AioContext *aio_context = bdrv_get_aio_context(bs); @@ -365,13 +358,7 @@ void bdrv_drain_all_end(void) aio_context_release(aio_context); } - while ((job = block_job_next(job))) { - AioContext *aio_context = blk_get_aio_context(job->blk); - - aio_context_acquire(aio_context); - block_job_resume(job); - aio_context_release(aio_context); - } + block_job_resume_all(); } void bdrv_drain_all(void) diff --git a/block/mirror.c b/block/mirror.c index e86f8f8ad7..a2a970301c 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -514,7 +514,12 @@ static void mirror_exit(BlockJob *job, void *opaque) /* Remove target parent that still uses BLK_PERM_WRITE/RESIZE before * inserting target_bs at s->to_replace, where we might not be able to get - * these permissions. */ + * these permissions. + * + * Note that blk_unref() alone doesn't necessarily drop permissions because + * we might be running nested inside mirror_drain(), which takes an extra + * reference, so use an explicit blk_set_perm() first. */ + blk_set_perm(s->target, 0, BLK_PERM_ALL, &error_abort); blk_unref(s->target); s->target = NULL; @@ -1252,7 +1257,7 @@ fail: g_free(s->replaces); blk_unref(s->target); - block_job_unref(&s->common); + block_job_early_fail(&s->common); } bdrv_child_try_set_perm(mirror_top_bs->backing, 0, BLK_PERM_ALL, diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index 347d94b0d2..d779ea19cf 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -1797,7 +1797,8 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table, } if (offset_into_cluster(s, offset)) { - qcow2_signal_corruption(bs, true, -1, -1, "Data cluster offset " + qcow2_signal_corruption(bs, true, -1, -1, + "Cluster allocation offset " "%#" PRIx64 " unaligned (L2 offset: %#" PRIx64 ", L2 index: %#x)", offset, l2_offset, j); diff --git a/block/qcow2.c b/block/qcow2.c index a8d61f0981..b3ba5daa93 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -3222,7 +3222,6 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, if (s->refcount_bits != refcount_bits) { int refcount_order = ctz32(refcount_bits); - Error *local_error = NULL; if (new_version < 3 && refcount_bits != 16) { error_report("Different refcount widths than 16 bits require " @@ -3234,9 +3233,9 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, helper_cb_info.current_operation = QCOW2_CHANGING_REFCOUNT_ORDER; ret = qcow2_change_refcount_order(bs, refcount_order, &qcow2_amend_helper_cb, - &helper_cb_info, &local_error); + &helper_cb_info, &local_err); if (ret < 0) { - error_report_err(local_error); + error_report_err(local_err); return ret; } } diff --git a/block/stream.c b/block/stream.c index 0113710845..52d329f5c6 100644 --- a/block/stream.c +++ b/block/stream.c @@ -280,6 +280,6 @@ void stream_start(const char *job_id, BlockDriverState *bs, fail: if (orig_bs_flags != bdrv_get_flags(bs)) { - bdrv_reopen(bs, s->bs_flags, NULL); + bdrv_reopen(bs, orig_bs_flags, NULL); } } diff --git a/blockdev.c b/blockdev.c index c63f4e82c7..892d768574 100644 --- a/blockdev.c +++ b/blockdev.c @@ -3715,7 +3715,6 @@ void qmp_block_job_resume(const char *device, Error **errp) } trace_qmp_block_job_resume(job); - block_job_iostatus_reset(job); block_job_user_resume(job); aio_context_release(aio_context); } diff --git a/blockjob.c b/blockjob.c index 6e489327ff..a0d7e29b83 100644 --- a/blockjob.c +++ b/blockjob.c @@ -55,35 +55,20 @@ struct BlockJobTxn { static QLIST_HEAD(, BlockJob) block_jobs = QLIST_HEAD_INITIALIZER(block_jobs); -static char *child_job_get_parent_desc(BdrvChild *c) -{ - BlockJob *job = c->opaque; - return g_strdup_printf("%s job '%s'", - BlockJobType_lookup[job->driver->job_type], - job->id); -} - -static const BdrvChildRole child_job = { - .get_parent_desc = child_job_get_parent_desc, - .stay_at_node = true, -}; - -static void block_job_drained_begin(void *opaque) -{ - BlockJob *job = opaque; - block_job_pause(job); -} - -static void block_job_drained_end(void *opaque) -{ - BlockJob *job = opaque; - block_job_resume(job); -} - -static const BlockDevOps block_job_dev_ops = { - .drained_begin = block_job_drained_begin, - .drained_end = block_job_drained_end, -}; +/* + * The block job API is composed of two categories of functions. + * + * The first includes functions used by the monitor. The monitor is + * peculiar in that it accesses the block job list with block_job_get, and + * therefore needs consistency across block_job_get and the actual operation + * (e.g. block_job_set_speed). The consistency is achieved with + * aio_context_acquire/release. These functions are declared in blockjob.h. + * + * The second includes functions used by the block job drivers and sometimes + * by the core block layer. These do not care about locking, because the + * whole coroutine runs under the AioContext lock, and are declared in + * blockjob_int.h. + */ BlockJob *block_job_next(BlockJob *job) { @@ -106,6 +91,80 @@ BlockJob *block_job_get(const char *id) return NULL; } +BlockJobTxn *block_job_txn_new(void) +{ + BlockJobTxn *txn = g_new0(BlockJobTxn, 1); + QLIST_INIT(&txn->jobs); + txn->refcnt = 1; + return txn; +} + +static void block_job_txn_ref(BlockJobTxn *txn) +{ + txn->refcnt++; +} + +void block_job_txn_unref(BlockJobTxn *txn) +{ + if (txn && --txn->refcnt == 0) { + g_free(txn); + } +} + +void block_job_txn_add_job(BlockJobTxn *txn, BlockJob *job) +{ + if (!txn) { + return; + } + + assert(!job->txn); + job->txn = txn; + + QLIST_INSERT_HEAD(&txn->jobs, job, txn_list); + block_job_txn_ref(txn); +} + +static void block_job_pause(BlockJob *job) +{ + job->pause_count++; +} + +static void block_job_resume(BlockJob *job) +{ + assert(job->pause_count > 0); + job->pause_count--; + if (job->pause_count) { + return; + } + block_job_enter(job); +} + +static void block_job_ref(BlockJob *job) +{ + ++job->refcnt; +} + +static void block_job_attached_aio_context(AioContext *new_context, + void *opaque); +static void block_job_detach_aio_context(void *opaque); + +static void block_job_unref(BlockJob *job) +{ + if (--job->refcnt == 0) { + BlockDriverState *bs = blk_bs(job->blk); + bs->job = NULL; + block_job_remove_all_bdrv(job); + blk_remove_aio_context_notifier(job->blk, + block_job_attached_aio_context, + block_job_detach_aio_context, job); + blk_unref(job->blk); + error_free(job->blocker); + g_free(job->id); + QLIST_REMOVE(job, job_list); + g_free(job); + } +} + static void block_job_attached_aio_context(AioContext *new_context, void *opaque) { @@ -145,6 +204,36 @@ static void block_job_detach_aio_context(void *opaque) block_job_unref(job); } +static char *child_job_get_parent_desc(BdrvChild *c) +{ + BlockJob *job = c->opaque; + return g_strdup_printf("%s job '%s'", + BlockJobType_lookup[job->driver->job_type], + job->id); +} + +static const BdrvChildRole child_job = { + .get_parent_desc = child_job_get_parent_desc, + .stay_at_node = true, +}; + +static void block_job_drained_begin(void *opaque) +{ + BlockJob *job = opaque; + block_job_pause(job); +} + +static void block_job_drained_end(void *opaque) +{ + BlockJob *job = opaque; + block_job_resume(job); +} + +static const BlockDevOps block_job_dev_ops = { + .drained_begin = block_job_drained_begin, + .drained_end = block_job_drained_end, +}; + void block_job_remove_all_bdrv(BlockJob *job) { GSList *l; @@ -175,90 +264,6 @@ int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs, return 0; } -void *block_job_create(const char *job_id, const BlockJobDriver *driver, - BlockDriverState *bs, uint64_t perm, - uint64_t shared_perm, int64_t speed, int flags, - BlockCompletionFunc *cb, void *opaque, Error **errp) -{ - BlockBackend *blk; - BlockJob *job; - int ret; - - if (bs->job) { - error_setg(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs)); - return NULL; - } - - if (job_id == NULL && !(flags & BLOCK_JOB_INTERNAL)) { - job_id = bdrv_get_device_name(bs); - if (!*job_id) { - error_setg(errp, "An explicit job ID is required for this node"); - return NULL; - } - } - - if (job_id) { - if (flags & BLOCK_JOB_INTERNAL) { - error_setg(errp, "Cannot specify job ID for internal block job"); - return NULL; - } - - if (!id_wellformed(job_id)) { - error_setg(errp, "Invalid job ID '%s'", job_id); - return NULL; - } - - if (block_job_get(job_id)) { - error_setg(errp, "Job ID '%s' already in use", job_id); - return NULL; - } - } - - blk = blk_new(perm, shared_perm); - ret = blk_insert_bs(blk, bs, errp); - if (ret < 0) { - blk_unref(blk); - return NULL; - } - - job = g_malloc0(driver->instance_size); - job->driver = driver; - job->id = g_strdup(job_id); - job->blk = blk; - job->cb = cb; - job->opaque = opaque; - job->busy = false; - job->paused = true; - job->pause_count = 1; - job->refcnt = 1; - - error_setg(&job->blocker, "block device is in use by block job: %s", - BlockJobType_lookup[driver->job_type]); - block_job_add_bdrv(job, "main node", bs, 0, BLK_PERM_ALL, &error_abort); - bs->job = job; - - blk_set_dev_ops(blk, &block_job_dev_ops, job); - bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker); - - QLIST_INSERT_HEAD(&block_jobs, job, job_list); - - blk_add_aio_context_notifier(blk, block_job_attached_aio_context, - block_job_detach_aio_context, job); - - /* Only set speed when necessary to avoid NotSupported error */ - if (speed != 0) { - Error *local_err = NULL; - - block_job_set_speed(job, speed, &local_err); - if (local_err) { - block_job_unref(job); - error_propagate(errp, local_err); - return NULL; - } - } - return job; -} - bool block_job_is_internal(BlockJob *job) { return (job->id == NULL); @@ -293,30 +298,10 @@ void block_job_start(BlockJob *job) bdrv_coroutine_enter(blk_bs(job->blk), job->co); } -void block_job_ref(BlockJob *job) -{ - ++job->refcnt; -} - -void block_job_unref(BlockJob *job) -{ - if (--job->refcnt == 0) { - BlockDriverState *bs = blk_bs(job->blk); - bs->job = NULL; - block_job_remove_all_bdrv(job); - blk_remove_aio_context_notifier(job->blk, - block_job_attached_aio_context, - block_job_detach_aio_context, job); - blk_unref(job->blk); - error_free(job->blocker); - g_free(job->id); - QLIST_REMOVE(job, job_list); - g_free(job); - } -} - static void block_job_completed_single(BlockJob *job) { + assert(job->completed); + if (!job->ret) { if (job->driver->commit) { job->driver->commit(job); @@ -354,11 +339,57 @@ static void block_job_completed_single(BlockJob *job) block_job_unref(job); } +static void block_job_cancel_async(BlockJob *job) +{ + if (job->iostatus != BLOCK_DEVICE_IO_STATUS_OK) { + block_job_iostatus_reset(job); + } + if (job->user_paused) { + /* Do not call block_job_enter here, the caller will handle it. */ + job->user_paused = false; + job->pause_count--; + } + job->cancelled = true; +} + +static int block_job_finish_sync(BlockJob *job, + void (*finish)(BlockJob *, Error **errp), + Error **errp) +{ + Error *local_err = NULL; + int ret; + + assert(blk_bs(job->blk)->job == job); + + block_job_ref(job); + + if (finish) { + finish(job, &local_err); + } + if (local_err) { + error_propagate(errp, local_err); + block_job_unref(job); + return -EBUSY; + } + /* block_job_drain calls block_job_enter, and it should be enough to + * induce progress until the job completes or moves to the main thread. + */ + while (!job->deferred_to_main_loop && !job->completed) { + block_job_drain(job); + } + while (!job->completed) { + aio_poll(qemu_get_aio_context(), true); + } + ret = (job->cancelled && job->ret == 0) ? -ECANCELED : job->ret; + block_job_unref(job); + return ret; +} + static void block_job_completed_txn_abort(BlockJob *job) { AioContext *ctx; BlockJobTxn *txn = job->txn; - BlockJob *other_job, *next; + BlockJob *other_job; if (txn->aborting) { /* @@ -367,29 +398,34 @@ static void block_job_completed_txn_abort(BlockJob *job) return; } txn->aborting = true; + block_job_txn_ref(txn); + /* We are the first failed job. Cancel other jobs. */ QLIST_FOREACH(other_job, &txn->jobs, txn_list) { ctx = blk_get_aio_context(other_job->blk); aio_context_acquire(ctx); } + + /* Other jobs are effectively cancelled by us, set the status for + * them; this job, however, may or may not be cancelled, depending + * on the caller, so leave it. */ QLIST_FOREACH(other_job, &txn->jobs, txn_list) { - if (other_job == job || other_job->completed) { - /* Other jobs are "effectively" cancelled by us, set the status for - * them; this job, however, may or may not be cancelled, depending - * on the caller, so leave it. */ - if (other_job != job) { - other_job->cancelled = true; - } - continue; + if (other_job != job) { + block_job_cancel_async(other_job); } - block_job_cancel_sync(other_job); - assert(other_job->completed); } - QLIST_FOREACH_SAFE(other_job, &txn->jobs, txn_list, next) { + while (!QLIST_EMPTY(&txn->jobs)) { + other_job = QLIST_FIRST(&txn->jobs); ctx = blk_get_aio_context(other_job->blk); + if (!other_job->completed) { + assert(other_job->cancelled); + block_job_finish_sync(other_job, NULL, NULL); + } block_job_completed_single(other_job); aio_context_release(ctx); } + + block_job_txn_unref(txn); } static void block_job_completed_txn_success(BlockJob *job) @@ -416,21 +452,6 @@ static void block_job_completed_txn_success(BlockJob *job) } } -void block_job_completed(BlockJob *job, int ret) -{ - assert(blk_bs(job->blk)->job == job); - assert(!job->completed); - job->completed = true; - job->ret = ret; - if (!job->txn) { - block_job_completed_single(job); - } else if (ret < 0 || block_job_is_cancelled(job)) { - block_job_completed_txn_abort(job); - } else { - block_job_completed_txn_success(job); - } -} - void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp) { Error *local_err = NULL; @@ -462,135 +483,36 @@ void block_job_complete(BlockJob *job, Error **errp) job->driver->complete(job, errp); } -void block_job_pause(BlockJob *job) -{ - job->pause_count++; -} - void block_job_user_pause(BlockJob *job) { job->user_paused = true; block_job_pause(job); } -static bool block_job_should_pause(BlockJob *job) -{ - return job->pause_count > 0; -} - bool block_job_user_paused(BlockJob *job) { - return job ? job->user_paused : 0; -} - -void coroutine_fn block_job_pause_point(BlockJob *job) -{ - assert(job && block_job_started(job)); - - if (!block_job_should_pause(job)) { - return; - } - if (block_job_is_cancelled(job)) { - return; - } - - if (job->driver->pause) { - job->driver->pause(job); - } - - if (block_job_should_pause(job) && !block_job_is_cancelled(job)) { - job->paused = true; - job->busy = false; - qemu_coroutine_yield(); /* wait for block_job_resume() */ - job->busy = true; - job->paused = false; - } - - if (job->driver->resume) { - job->driver->resume(job); - } -} - -void block_job_resume(BlockJob *job) -{ - assert(job->pause_count > 0); - job->pause_count--; - if (job->pause_count) { - return; - } - block_job_enter(job); + return job->user_paused; } void block_job_user_resume(BlockJob *job) { if (job && job->user_paused && job->pause_count > 0) { + block_job_iostatus_reset(job); job->user_paused = false; block_job_resume(job); } } -void block_job_enter(BlockJob *job) -{ - if (job->co && !job->busy) { - bdrv_coroutine_enter(blk_bs(job->blk), job->co); - } -} - void block_job_cancel(BlockJob *job) { if (block_job_started(job)) { - job->cancelled = true; - block_job_iostatus_reset(job); + block_job_cancel_async(job); block_job_enter(job); } else { block_job_completed(job, -ECANCELED); } } -bool block_job_is_cancelled(BlockJob *job) -{ - return job->cancelled; -} - -void block_job_iostatus_reset(BlockJob *job) -{ - job->iostatus = BLOCK_DEVICE_IO_STATUS_OK; - if (job->driver->iostatus_reset) { - job->driver->iostatus_reset(job); - } -} - -static int block_job_finish_sync(BlockJob *job, - void (*finish)(BlockJob *, Error **errp), - Error **errp) -{ - Error *local_err = NULL; - int ret; - - assert(blk_bs(job->blk)->job == job); - - block_job_ref(job); - - finish(job, &local_err); - if (local_err) { - error_propagate(errp, local_err); - block_job_unref(job); - return -EBUSY; - } - /* block_job_drain calls block_job_enter, and it should be enough to - * induce progress until the job completes or moves to the main thread. - */ - while (!job->deferred_to_main_loop && !job->completed) { - block_job_drain(job); - } - while (!job->completed) { - aio_poll(qemu_get_aio_context(), true); - } - ret = (job->cancelled && job->ret == 0) ? -ECANCELED : job->ret; - block_job_unref(job); - return ret; -} - /* A wrapper around block_job_cancel() taking an Error ** parameter so it may be * used with block_job_finish_sync() without the need for (rather nasty) * function pointer casts there. */ @@ -622,42 +544,6 @@ int block_job_complete_sync(BlockJob *job, Error **errp) return block_job_finish_sync(job, &block_job_complete, errp); } -void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns) -{ - assert(job->busy); - - /* Check cancellation *before* setting busy = false, too! */ - if (block_job_is_cancelled(job)) { - return; - } - - job->busy = false; - if (!block_job_should_pause(job)) { - co_aio_sleep_ns(blk_get_aio_context(job->blk), type, ns); - } - job->busy = true; - - block_job_pause_point(job); -} - -void block_job_yield(BlockJob *job) -{ - assert(job->busy); - - /* Check cancellation *before* setting busy = false, too! */ - if (block_job_is_cancelled(job)) { - return; - } - - job->busy = false; - if (!block_job_should_pause(job)) { - qemu_coroutine_yield(); - } - job->busy = true; - - block_job_pause_point(job); -} - BlockJobInfo *block_job_query(BlockJob *job, Error **errp) { BlockJobInfo *info; @@ -717,6 +603,236 @@ static void block_job_event_completed(BlockJob *job, const char *msg) &error_abort); } +/* + * API for block job drivers and the block layer. These functions are + * declared in blockjob_int.h. + */ + +void *block_job_create(const char *job_id, const BlockJobDriver *driver, + BlockDriverState *bs, uint64_t perm, + uint64_t shared_perm, int64_t speed, int flags, + BlockCompletionFunc *cb, void *opaque, Error **errp) +{ + BlockBackend *blk; + BlockJob *job; + int ret; + + if (bs->job) { + error_setg(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs)); + return NULL; + } + + if (job_id == NULL && !(flags & BLOCK_JOB_INTERNAL)) { + job_id = bdrv_get_device_name(bs); + if (!*job_id) { + error_setg(errp, "An explicit job ID is required for this node"); + return NULL; + } + } + + if (job_id) { + if (flags & BLOCK_JOB_INTERNAL) { + error_setg(errp, "Cannot specify job ID for internal block job"); + return NULL; + } + + if (!id_wellformed(job_id)) { + error_setg(errp, "Invalid job ID '%s'", job_id); + return NULL; + } + + if (block_job_get(job_id)) { + error_setg(errp, "Job ID '%s' already in use", job_id); + return NULL; + } + } + + blk = blk_new(perm, shared_perm); + ret = blk_insert_bs(blk, bs, errp); + if (ret < 0) { + blk_unref(blk); + return NULL; + } + + job = g_malloc0(driver->instance_size); + job->driver = driver; + job->id = g_strdup(job_id); + job->blk = blk; + job->cb = cb; + job->opaque = opaque; + job->busy = false; + job->paused = true; + job->pause_count = 1; + job->refcnt = 1; + + error_setg(&job->blocker, "block device is in use by block job: %s", + BlockJobType_lookup[driver->job_type]); + block_job_add_bdrv(job, "main node", bs, 0, BLK_PERM_ALL, &error_abort); + bs->job = job; + + blk_set_dev_ops(blk, &block_job_dev_ops, job); + bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker); + + QLIST_INSERT_HEAD(&block_jobs, job, job_list); + + blk_add_aio_context_notifier(blk, block_job_attached_aio_context, + block_job_detach_aio_context, job); + + /* Only set speed when necessary to avoid NotSupported error */ + if (speed != 0) { + Error *local_err = NULL; + + block_job_set_speed(job, speed, &local_err); + if (local_err) { + block_job_unref(job); + error_propagate(errp, local_err); + return NULL; + } + } + return job; +} + +void block_job_pause_all(void) +{ + BlockJob *job = NULL; + while ((job = block_job_next(job))) { + AioContext *aio_context = blk_get_aio_context(job->blk); + + aio_context_acquire(aio_context); + block_job_pause(job); + aio_context_release(aio_context); + } +} + +void block_job_early_fail(BlockJob *job) +{ + block_job_unref(job); +} + +void block_job_completed(BlockJob *job, int ret) +{ + assert(blk_bs(job->blk)->job == job); + assert(!job->completed); + job->completed = true; + job->ret = ret; + if (!job->txn) { + block_job_completed_single(job); + } else if (ret < 0 || block_job_is_cancelled(job)) { + block_job_completed_txn_abort(job); + } else { + block_job_completed_txn_success(job); + } +} + +static bool block_job_should_pause(BlockJob *job) +{ + return job->pause_count > 0; +} + +void coroutine_fn block_job_pause_point(BlockJob *job) +{ + assert(job && block_job_started(job)); + + if (!block_job_should_pause(job)) { + return; + } + if (block_job_is_cancelled(job)) { + return; + } + + if (job->driver->pause) { + job->driver->pause(job); + } + + if (block_job_should_pause(job) && !block_job_is_cancelled(job)) { + job->paused = true; + job->busy = false; + qemu_coroutine_yield(); /* wait for block_job_resume() */ + job->busy = true; + job->paused = false; + } + + if (job->driver->resume) { + job->driver->resume(job); + } +} + +void block_job_resume_all(void) +{ + BlockJob *job = NULL; + while ((job = block_job_next(job))) { + AioContext *aio_context = blk_get_aio_context(job->blk); + + aio_context_acquire(aio_context); + block_job_resume(job); + aio_context_release(aio_context); + } +} + +void block_job_enter(BlockJob *job) +{ + if (!block_job_started(job)) { + return; + } + if (job->deferred_to_main_loop) { + return; + } + + if (!job->busy) { + bdrv_coroutine_enter(blk_bs(job->blk), job->co); + } +} + +bool block_job_is_cancelled(BlockJob *job) +{ + return job->cancelled; +} + +void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns) +{ + assert(job->busy); + + /* Check cancellation *before* setting busy = false, too! */ + if (block_job_is_cancelled(job)) { + return; + } + + job->busy = false; + if (!block_job_should_pause(job)) { + co_aio_sleep_ns(blk_get_aio_context(job->blk), type, ns); + } + job->busy = true; + + block_job_pause_point(job); +} + +void block_job_yield(BlockJob *job) +{ + assert(job->busy); + + /* Check cancellation *before* setting busy = false, too! */ + if (block_job_is_cancelled(job)) { + return; + } + + job->busy = false; + if (!block_job_should_pause(job)) { + qemu_coroutine_yield(); + } + job->busy = true; + + block_job_pause_point(job); +} + +void block_job_iostatus_reset(BlockJob *job) +{ + if (job->iostatus == BLOCK_DEVICE_IO_STATUS_OK) { + return; + } + assert(job->user_paused && job->pause_count > 0); + job->iostatus = BLOCK_DEVICE_IO_STATUS_OK; +} + void block_job_event_ready(BlockJob *job) { job->ready = true; @@ -790,7 +906,6 @@ static void block_job_defer_to_main_loop_bh(void *opaque) aio_context_acquire(aio_context); } - data->job->deferred_to_main_loop = false; data->fn(data->job, data->opaque); if (aio_context != data->aio_context) { @@ -816,36 +931,3 @@ void block_job_defer_to_main_loop(BlockJob *job, aio_bh_schedule_oneshot(qemu_get_aio_context(), block_job_defer_to_main_loop_bh, data); } - -BlockJobTxn *block_job_txn_new(void) -{ - BlockJobTxn *txn = g_new0(BlockJobTxn, 1); - QLIST_INIT(&txn->jobs); - txn->refcnt = 1; - return txn; -} - -static void block_job_txn_ref(BlockJobTxn *txn) -{ - txn->refcnt++; -} - -void block_job_txn_unref(BlockJobTxn *txn) -{ - if (txn && --txn->refcnt == 0) { - g_free(txn); - } -} - -void block_job_txn_add_job(BlockJobTxn *txn, BlockJob *job) -{ - if (!txn) { - return; - } - - assert(!job->txn); - job->txn = txn; - - QLIST_INSERT_HEAD(&txn->jobs, job, txn_list); - block_job_txn_ref(txn); -} @@ -3629,25 +3629,6 @@ if compile_prog "" "" ; then inotify1=yes fi -# check if utimensat and futimens are supported -utimens=no -cat > $TMPC << EOF -#define _ATFILE_SOURCE -#include <stddef.h> -#include <fcntl.h> -#include <sys/stat.h> - -int main(void) -{ - utimensat(AT_FDCWD, "foo", NULL, 0); - futimens(0, NULL); - return 0; -} -EOF -if compile_prog "" "" ; then - utimens=yes -fi - # check if pipe2 is there pipe2=no cat > $TMPC << EOF @@ -5434,9 +5415,6 @@ fi if test "$curses" = "yes" ; then echo "CONFIG_CURSES=y" >> $config_host_mak fi -if test "$utimens" = "yes" ; then - echo "CONFIG_UTIMENSAT=y" >> $config_host_mak -fi if test "$pipe2" = "yes" ; then echo "CONFIG_PIPE2=y" >> $config_host_mak fi diff --git a/default-configs/pci.mak b/default-configs/pci.mak index 60dc6510ec..3bbeb62d9a 100644 --- a/default-configs/pci.mak +++ b/default-configs/pci.mak @@ -7,6 +7,7 @@ CONFIG_USB_UHCI=y CONFIG_USB_OHCI=y CONFIG_USB_EHCI=y CONFIG_USB_XHCI=y +CONFIG_USB_XHCI_NEC=y CONFIG_NE2000_PCI=y CONFIG_EEPRO100_PCI=y CONFIG_PCNET_PCI=y diff --git a/default-configs/s390x-softmmu.mak b/default-configs/s390x-softmmu.mak index 9615a48f80..18aed56fc0 100644 --- a/default-configs/s390x-softmmu.mak +++ b/default-configs/s390x-softmmu.mak @@ -5,4 +5,5 @@ CONFIG_SCLPCONSOLE=y CONFIG_TERMINAL3270=y CONFIG_S390_FLIC=y CONFIG_S390_FLIC_KVM=$(CONFIG_KVM) +CONFIG_VFIO_CCW=$(CONFIG_LINUX) CONFIG_WDT_DIAG288=y @@ -486,7 +486,8 @@ static MemoryRegionSection address_space_do_translate(AddressSpace *as, break; } - iotlb = mr->iommu_ops->translate(mr, addr, is_write); + iotlb = mr->iommu_ops->translate(mr, addr, is_write ? + IOMMU_WO : IOMMU_RO); addr = ((iotlb.translated_addr & ~iotlb.addr_mask) | (addr & iotlb.addr_mask)); *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1); diff --git a/fsdev/9p-iov-marshal.c b/fsdev/9p-iov-marshal.c index 1d16f8df4b..a1c9beddd2 100644 --- a/fsdev/9p-iov-marshal.c +++ b/fsdev/9p-iov-marshal.c @@ -168,7 +168,7 @@ ssize_t v9fs_iov_vunmarshal(struct iovec *out_sg, int out_num, size_t offset, break; } default: - break; + g_assert_not_reached(); } if (copied < 0) { return copied; @@ -281,7 +281,7 @@ ssize_t v9fs_iov_vmarshal(struct iovec *in_sg, int in_num, size_t offset, break; } default: - break; + g_assert_not_reached(); } if (copied < 0) { return copied; diff --git a/fsdev/virtfs-proxy-helper.c b/fsdev/virtfs-proxy-helper.c index 54f7ad1c48..6c066ec9a0 100644 --- a/fsdev/virtfs-proxy-helper.c +++ b/fsdev/virtfs-proxy-helper.c @@ -945,7 +945,8 @@ static int process_requests(int sock) &spec[0].tv_sec, &spec[0].tv_nsec, &spec[1].tv_sec, &spec[1].tv_nsec); if (retval > 0) { - retval = qemu_utimens(path.data, spec); + retval = utimensat(AT_FDCWD, path.data, spec, + AT_SYMLINK_NOFOLLOW); if (retval < 0) { retval = -errno; } @@ -1129,14 +1130,14 @@ int main(int argc, char **argv) } } - if (chdir("/") < 0) { - do_perror("chdir"); - goto error; - } if (chroot(rpath) < 0) { do_perror("chroot"); goto error; } + if (chdir("/") < 0) { + do_perror("chdir"); + goto error; + } get_version = false; #ifdef FS_IOC_GETVERSION diff --git a/hmp-commands.hx b/hmp-commands.hx index baeac47a72..e763606fe5 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -676,7 +676,8 @@ ETEXI STEXI @item usb_add @var{devname} @findex usb_add -Add the USB device @var{devname}. For details of available devices see +Add the USB device @var{devname}. This command is deprecated, please +use @code{device_add} instead. For details of available devices see @ref{usb_devices} ETEXI @@ -693,7 +694,8 @@ STEXI @findex usb_del Remove the USB device @var{devname} from the QEMU virtual USB hub. @var{devname} has the syntax @code{bus.addr}. Use the monitor -command @code{info usb} to see the devices you can remove. +command @code{info usb} to see the devices you can remove. This +command is deprecated, please use @code{device_del} instead. ETEXI { diff --git a/hw/9pfs/9p-handle.c b/hw/9pfs/9p-handle.c index 1687661bc9..9875f1894c 100644 --- a/hw/9pfs/9p-handle.c +++ b/hw/9pfs/9p-handle.c @@ -378,7 +378,6 @@ static int handle_utimensat(FsContext *ctx, V9fsPath *fs_path, const struct timespec *buf) { int ret; -#ifdef CONFIG_UTIMENSAT int fd; struct handle_data *data = (struct handle_data *)ctx->private; @@ -388,10 +387,6 @@ static int handle_utimensat(FsContext *ctx, V9fsPath *fs_path, } ret = futimens(fd, buf); close(fd); -#else - ret = -1; - errno = ENOSYS; -#endif return ret; } diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c index a2486566af..1e78b7c9e9 100644 --- a/hw/9pfs/9p-local.c +++ b/hw/9pfs/9p-local.c @@ -53,13 +53,37 @@ int local_open_nofollow(FsContext *fs_ctx, const char *path, int flags, mode_t mode) { LocalData *data = fs_ctx->private; - - /* All paths are relative to the path data->mountfd points to */ - while (*path == '/') { - path++; + int fd = data->mountfd; + + while (*path && fd != -1) { + const char *c; + int next_fd; + char *head; + + /* Only relative paths without consecutive slashes */ + assert(*path != '/'); + + head = g_strdup(path); + c = strchrnul(path, '/'); + if (*c) { + /* Intermediate path element */ + head[c - path] = 0; + path = c + 1; + next_fd = openat_dir(fd, head); + } else { + /* Rightmost path element */ + next_fd = openat_file(fd, head, flags, mode); + path = c; + } + g_free(head); + if (fd != data->mountfd) { + close_preserve_errno(fd); + } + fd = next_fd; } - return relative_openat_nofollow(data->mountfd, path, flags, mode); + assert(fd != data->mountfd); + return fd; } int local_opendir_nofollow(FsContext *fs_ctx, const char *path) @@ -83,6 +107,7 @@ static void unlinkat_preserve_errno(int dirfd, const char *path, int flags) } #define VIRTFS_META_DIR ".virtfs_metadata" +#define VIRTFS_META_ROOT_FILE VIRTFS_META_DIR "_root" static FILE *local_fopenat(int dirfd, const char *name, const char *mode) { @@ -119,13 +144,17 @@ static void local_mapped_file_attr(int dirfd, const char *name, char buf[ATTR_MAX]; int map_dirfd; - map_dirfd = openat_dir(dirfd, VIRTFS_META_DIR); - if (map_dirfd == -1) { - return; - } + if (strcmp(name, ".")) { + map_dirfd = openat_dir(dirfd, VIRTFS_META_DIR); + if (map_dirfd == -1) { + return; + } - fp = local_fopenat(map_dirfd, name, "r"); - close_preserve_errno(map_dirfd); + fp = local_fopenat(map_dirfd, name, "r"); + close_preserve_errno(map_dirfd); + } else { + fp = local_fopenat(dirfd, VIRTFS_META_ROOT_FILE, "r"); + } if (!fp) { return; } @@ -203,26 +232,38 @@ static int local_set_mapped_file_attrat(int dirfd, const char *name, int ret; char buf[ATTR_MAX]; int uid = -1, gid = -1, mode = -1, rdev = -1; - int map_dirfd; - - ret = mkdirat(dirfd, VIRTFS_META_DIR, 0700); - if (ret < 0 && errno != EEXIST) { - return -1; - } - - map_dirfd = openat_dir(dirfd, VIRTFS_META_DIR); - if (map_dirfd == -1) { - return -1; - } + int map_dirfd = -1, map_fd; + bool is_root = !strcmp(name, "."); + + if (is_root) { + fp = local_fopenat(dirfd, VIRTFS_META_ROOT_FILE, "r"); + if (!fp) { + if (errno == ENOENT) { + goto update_map_file; + } else { + return -1; + } + } + } else { + ret = mkdirat(dirfd, VIRTFS_META_DIR, 0700); + if (ret < 0 && errno != EEXIST) { + return -1; + } - fp = local_fopenat(map_dirfd, name, "r"); - if (!fp) { - if (errno == ENOENT) { - goto update_map_file; - } else { - close_preserve_errno(map_dirfd); + map_dirfd = openat_dir(dirfd, VIRTFS_META_DIR); + if (map_dirfd == -1) { return -1; } + + fp = local_fopenat(map_dirfd, name, "r"); + if (!fp) { + if (errno == ENOENT) { + goto update_map_file; + } else { + close_preserve_errno(map_dirfd); + return -1; + } + } } memset(buf, 0, ATTR_MAX); while (fgets(buf, ATTR_MAX, fp)) { @@ -240,12 +281,26 @@ static int local_set_mapped_file_attrat(int dirfd, const char *name, fclose(fp); update_map_file: - fp = local_fopenat(map_dirfd, name, "w"); - close_preserve_errno(map_dirfd); + if (is_root) { + fp = local_fopenat(dirfd, VIRTFS_META_ROOT_FILE, "w"); + } else { + fp = local_fopenat(map_dirfd, name, "w"); + /* We can't go this far with map_dirfd not being a valid file descriptor + * but some versions of gcc aren't smart enough to see it. + */ + if (map_dirfd != -1) { + close_preserve_errno(map_dirfd); + } + } if (!fp) { return -1; } + map_fd = fileno(fp); + assert(map_fd != -1); + ret = fchmod(map_fd, 0600); + assert(ret == 0); + if (credp->fc_uid != -1) { uid = credp->fc_uid; } @@ -454,7 +509,8 @@ static off_t local_telldir(FsContext *ctx, V9fsFidOpenState *fs) static bool local_is_mapped_file_metadata(FsContext *fs_ctx, const char *name) { - return !strcmp(name, VIRTFS_META_DIR); + return + !strcmp(name, VIRTFS_META_DIR) || !strcmp(name, VIRTFS_META_ROOT_FILE); } static struct dirent *local_readdir(FsContext *ctx, V9fsFidOpenState *fs) @@ -471,7 +527,7 @@ again: entry->d_type = DT_UNKNOWN; } else if (ctx->export_flags & V9FS_SM_MAPPED_FILE) { if (local_is_mapped_file_metadata(ctx, entry->d_name)) { - /* skip the meta data directory */ + /* skip the meta data */ goto again; } entry->d_type = DT_UNKNOWN; @@ -992,6 +1048,14 @@ static int local_unlinkat_common(FsContext *ctx, int dirfd, const char *name, if (ctx->export_flags & V9FS_SM_MAPPED_FILE) { int map_dirfd; + /* We need to remove the metadata as well: + * - the metadata directory if we're removing a directory + * - the metadata file in the parent's metadata directory + * + * If any of these are missing (ie, ENOENT) then we're probably + * trying to remove something that wasn't created in mapped-file + * mode. We just ignore the error. + */ if (flags == AT_REMOVEDIR) { int fd; @@ -999,32 +1063,20 @@ static int local_unlinkat_common(FsContext *ctx, int dirfd, const char *name, if (fd == -1) { goto err_out; } - /* - * If directory remove .virtfs_metadata contained in the - * directory - */ ret = unlinkat(fd, VIRTFS_META_DIR, AT_REMOVEDIR); close_preserve_errno(fd); if (ret < 0 && errno != ENOENT) { - /* - * We didn't had the .virtfs_metadata file. May be file created - * in non-mapped mode ?. Ignore ENOENT. - */ goto err_out; } } - /* - * Now remove the name from parent directory - * .virtfs_metadata directory. - */ map_dirfd = openat_dir(dirfd, VIRTFS_META_DIR); - ret = unlinkat(map_dirfd, name, 0); - close_preserve_errno(map_dirfd); - if (ret < 0 && errno != ENOENT) { - /* - * We didn't had the .virtfs_metadata file. May be file created - * in non-mapped mode ?. Ignore ENOENT. - */ + if (map_dirfd != -1) { + ret = unlinkat(map_dirfd, name, 0); + close_preserve_errno(map_dirfd); + if (ret < 0 && errno != ENOENT) { + goto err_out; + } + } else if (errno != ENOENT) { goto err_out; } } @@ -1138,14 +1190,32 @@ static int local_name_to_path(FsContext *ctx, V9fsPath *dir_path, } if (dir_path) { - v9fs_path_sprintf(target, "%s/%s", dir_path->data, name); - } else if (strcmp(name, "/")) { - v9fs_path_sprintf(target, "%s", name); + if (!strcmp(name, ".")) { + /* "." relative to "foo/bar" is "foo/bar" */ + v9fs_path_copy(target, dir_path); + } else if (!strcmp(name, "..")) { + if (!strcmp(dir_path->data, ".")) { + /* ".." relative to the root is "." */ + v9fs_path_sprintf(target, "."); + } else { + char *tmp = g_path_get_dirname(dir_path->data); + /* Symbolic links are resolved by the client. We can assume + * that ".." relative to "foo/bar" is equivalent to "foo" + */ + v9fs_path_sprintf(target, "%s", tmp); + g_free(tmp); + } + } else { + assert(!strchr(name, '/')); + v9fs_path_sprintf(target, "%s/%s", dir_path->data, name); + } + } else if (!strcmp(name, "/") || !strcmp(name, ".") || + !strcmp(name, "..")) { + /* This is the root fid */ + v9fs_path_sprintf(target, "."); } else { - /* We want the path of the export root to be relative, otherwise - * "*at()" syscalls would treat it as "/" in the host. - */ - v9fs_path_sprintf(target, "%s", "."); + assert(!strchr(name, '/')); + v9fs_path_sprintf(target, "./%s", name); } return 0; } diff --git a/hw/9pfs/9p-util.c b/hw/9pfs/9p-util.c index fdb4d57376..f709c27a1f 100644 --- a/hw/9pfs/9p-util.c +++ b/hw/9pfs/9p-util.c @@ -14,49 +14,6 @@ #include "qemu/xattr.h" #include "9p-util.h" -int relative_openat_nofollow(int dirfd, const char *path, int flags, - mode_t mode) -{ - int fd; - - fd = dup(dirfd); - if (fd == -1) { - return -1; - } - - while (*path) { - const char *c; - int next_fd; - char *head; - - /* Only relative paths without consecutive slashes */ - assert(path[0] != '/'); - - head = g_strdup(path); - c = strchr(path, '/'); - if (c) { - head[c - path] = 0; - next_fd = openat_dir(fd, head); - } else { - next_fd = openat_file(fd, head, flags, mode); - } - g_free(head); - if (next_fd == -1) { - close_preserve_errno(fd); - return -1; - } - close(fd); - fd = next_fd; - - if (!c) { - break; - } - path = c + 1; - } - - return fd; -} - ssize_t fgetxattrat_nofollow(int dirfd, const char *filename, const char *name, void *value, size_t size) { diff --git a/hw/9pfs/9p-util.h b/hw/9pfs/9p-util.h index 517027c520..91299a24b8 100644 --- a/hw/9pfs/9p-util.h +++ b/hw/9pfs/9p-util.h @@ -50,8 +50,6 @@ static inline int openat_file(int dirfd, const char *name, int flags, return fd; } -int relative_openat_nofollow(int dirfd, const char *path, int flags, - mode_t mode); ssize_t fgetxattrat_nofollow(int dirfd, const char *path, const char *name, void *value, size_t size); int fsetxattrat_nofollow(int dirfd, const char *path, const char *name, diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c index ab3e22f231..96d2683348 100644 --- a/hw/9pfs/9p.c +++ b/hw/9pfs/9p.c @@ -65,11 +65,6 @@ ssize_t pdu_unmarshal(V9fsPDU *pdu, size_t offset, const char *fmt, ...) return ret; } -static void pdu_push_and_notify(V9fsPDU *pdu) -{ - pdu->s->transport->push_and_notify(pdu); -} - static int omode_to_uflags(int8_t mode) { int ret = 0; @@ -668,7 +663,7 @@ static void coroutine_fn pdu_complete(V9fsPDU *pdu, ssize_t len) pdu->size = len; pdu->id = id; - pdu_push_and_notify(pdu); + pdu->s->transport->push_and_notify(pdu); /* Now wakeup anybody waiting in flush for this request */ if (!qemu_co_queue_next(&pdu->complete)) { @@ -2576,7 +2571,10 @@ static int coroutine_fn v9fs_complete_rename(V9fsPDU *pdu, V9fsFidState *fidp, err = -EINVAL; goto out; } - v9fs_co_name_to_path(pdu, &dirfidp->path, name->data, &new_path); + err = v9fs_co_name_to_path(pdu, &dirfidp->path, name->data, &new_path); + if (err < 0) { + goto out; + } } else { old_name = fidp->path.data; end = strrchr(old_name, '/'); @@ -2588,8 +2586,11 @@ static int coroutine_fn v9fs_complete_rename(V9fsPDU *pdu, V9fsFidState *fidp, new_name = g_malloc0(end - old_name + name->size + 1); strncat(new_name, old_name, end - old_name); strncat(new_name + (end - old_name), name->data, name->size); - v9fs_co_name_to_path(pdu, NULL, new_name, &new_path); + err = v9fs_co_name_to_path(pdu, NULL, new_name, &new_path); g_free(new_name); + if (err < 0) { + goto out; + } } err = v9fs_co_rename(pdu, &fidp->path, &new_path); if (err < 0) { @@ -2669,20 +2670,26 @@ out_nofid: v9fs_string_free(&name); } -static void coroutine_fn v9fs_fix_fid_paths(V9fsPDU *pdu, V9fsPath *olddir, - V9fsString *old_name, - V9fsPath *newdir, - V9fsString *new_name) +static int coroutine_fn v9fs_fix_fid_paths(V9fsPDU *pdu, V9fsPath *olddir, + V9fsString *old_name, + V9fsPath *newdir, + V9fsString *new_name) { V9fsFidState *tfidp; V9fsPath oldpath, newpath; V9fsState *s = pdu->s; - + int err; v9fs_path_init(&oldpath); v9fs_path_init(&newpath); - v9fs_co_name_to_path(pdu, olddir, old_name->data, &oldpath); - v9fs_co_name_to_path(pdu, newdir, new_name->data, &newpath); + err = v9fs_co_name_to_path(pdu, olddir, old_name->data, &oldpath); + if (err < 0) { + goto out; + } + err = v9fs_co_name_to_path(pdu, newdir, new_name->data, &newpath); + if (err < 0) { + goto out; + } /* * Fixup fid's pointing to the old name to @@ -2694,8 +2701,10 @@ static void coroutine_fn v9fs_fix_fid_paths(V9fsPDU *pdu, V9fsPath *olddir, v9fs_fix_path(&tfidp->path, &newpath, strlen(oldpath.data)); } } +out: v9fs_path_free(&oldpath); v9fs_path_free(&newpath); + return err; } static int coroutine_fn v9fs_complete_renameat(V9fsPDU *pdu, int32_t olddirfid, @@ -2729,8 +2738,8 @@ static int coroutine_fn v9fs_complete_renameat(V9fsPDU *pdu, int32_t olddirfid, } if (s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT) { /* Only for path based fid we need to do the below fixup */ - v9fs_fix_fid_paths(pdu, &olddirfidp->path, old_name, - &newdirfidp->path, new_name); + err = v9fs_fix_fid_paths(pdu, &olddirfidp->path, old_name, + &newdirfidp->path, new_name); } out: if (olddirfidp) { @@ -3446,12 +3455,16 @@ static inline bool is_read_only_op(V9fsPDU *pdu) } } -void pdu_submit(V9fsPDU *pdu) +void pdu_submit(V9fsPDU *pdu, P9MsgHeader *hdr) { Coroutine *co; CoroutineEntry *handler; V9fsState *s = pdu->s; + pdu->size = le32_to_cpu(hdr->size_le); + pdu->id = hdr->id; + pdu->tag = le16_to_cpu(hdr->tag_le); + if (pdu->id >= ARRAY_SIZE(pdu_co_handlers) || (pdu_co_handlers[pdu->id] == NULL)) { handler = v9fs_op_not_supp; @@ -3462,6 +3475,8 @@ void pdu_submit(V9fsPDU *pdu) if (is_ro_export(&s->ctx) && !is_read_only_op(pdu)) { handler = v9fs_fs_ro; } + + qemu_co_queue_init(&pdu->complete); co = qemu_coroutine_create(handler, pdu); qemu_coroutine_enter(co); } diff --git a/hw/9pfs/9p.h b/hw/9pfs/9p.h index 5312d8a424..c886ba78d2 100644 --- a/hw/9pfs/9p.h +++ b/hw/9pfs/9p.h @@ -347,7 +347,7 @@ ssize_t pdu_marshal(V9fsPDU *pdu, size_t offset, const char *fmt, ...); ssize_t pdu_unmarshal(V9fsPDU *pdu, size_t offset, const char *fmt, ...); V9fsPDU *pdu_alloc(V9fsState *s); void pdu_free(V9fsPDU *pdu); -void pdu_submit(V9fsPDU *pdu); +void pdu_submit(V9fsPDU *pdu, P9MsgHeader *hdr); void v9fs_reset(V9fsState *s); struct V9fsTransport { diff --git a/hw/9pfs/virtio-9p-device.c b/hw/9pfs/virtio-9p-device.c index 3782f43702..245abd8aae 100644 --- a/hw/9pfs/virtio-9p-device.c +++ b/hw/9pfs/virtio-9p-device.c @@ -70,13 +70,7 @@ static void handle_9p_output(VirtIODevice *vdev, VirtQueue *vq) goto out_free_req; } - pdu->size = le32_to_cpu(out.size_le); - - pdu->id = out.id; - pdu->tag = le16_to_cpu(out.tag_le); - - qemu_co_queue_init(&pdu->complete); - pdu_submit(pdu); + pdu_submit(pdu, &out); } return; diff --git a/hw/9pfs/xen-9p-backend.c b/hw/9pfs/xen-9p-backend.c index 5df97c90fa..922cc967be 100644 --- a/hw/9pfs/xen-9p-backend.c +++ b/hw/9pfs/xen-9p-backend.c @@ -243,14 +243,10 @@ static int xen_9pfs_receive(Xen9pfsRing *ring) /* cannot fail, because we only handle one request per ring at a time */ pdu = pdu_alloc(&ring->priv->state); - pdu->size = le32_to_cpu(h.size_le); - pdu->id = h.id; - pdu->tag = le32_to_cpu(h.tag_le); ring->out_size = le32_to_cpu(h.size_le); ring->out_cons = cons + le32_to_cpu(h.size_le); - qemu_co_queue_init(&pdu->complete); - pdu_submit(pdu); + pdu_submit(pdu, &h); return 0; } diff --git a/hw/acpi/core.c b/hw/acpi/core.c index e890a5d675..95fcac95a2 100644 --- a/hw/acpi/core.c +++ b/hw/acpi/core.c @@ -561,7 +561,7 @@ static void acpi_pm1_cnt_write(ACPIREGS *ar, uint16_t val) uint16_t sus_typ = (val >> 10) & 7; switch(sus_typ) { case 0: /* soft power off */ - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); break; case 1: qemu_system_suspend_request(); @@ -569,7 +569,7 @@ static void acpi_pm1_cnt_write(ACPIREGS *ar, uint16_t val) default: if (sus_typ == ar->pm1.cnt.s4_val) { /* S4 request */ qapi_event_send_suspend_disk(&error_abort); - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); } break; } diff --git a/hw/alpha/typhoon.c b/hw/alpha/typhoon.c index f50f5cf186..c1cf7802a4 100644 --- a/hw/alpha/typhoon.c +++ b/hw/alpha/typhoon.c @@ -664,7 +664,7 @@ static bool window_translate(TyphoonWindow *win, hwaddr addr, /* TODO: A translation failure here ought to set PCI error codes on the Pchip and generate a machine check interrupt. */ static IOMMUTLBEntry typhoon_translate_iommu(MemoryRegion *iommu, hwaddr addr, - bool is_write) + IOMMUAccessFlags flag) { TyphoonPchip *pchip = container_of(iommu, TyphoonPchip, iommu); IOMMUTLBEntry ret; diff --git a/hw/arm/highbank.c b/hw/arm/highbank.c index 0a4508cef3..d209b97dee 100644 --- a/hw/arm/highbank.c +++ b/hw/arm/highbank.c @@ -108,9 +108,9 @@ static void hb_regs_write(void *opaque, hwaddr offset, if (offset == 0xf00) { if (value == 1 || value == 2) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } else if (value == 3) { - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); } } diff --git a/hw/arm/integratorcp.c b/hw/arm/integratorcp.c index 5610ffc9ce..ca3eca1d16 100644 --- a/hw/arm/integratorcp.c +++ b/hw/arm/integratorcp.c @@ -158,7 +158,7 @@ static void integratorcm_do_remap(IntegratorCMState *s) static void integratorcm_set_ctrl(IntegratorCMState *s, uint32_t value) { if (value & 8) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } if ((s->cm_ctrl ^ value) & 1) { /* (value & 1) != 0 means the green "MISC LED" is lit. diff --git a/hw/arm/musicpal.c b/hw/arm/musicpal.c index cbbca4e17a..9c710f74b4 100644 --- a/hw/arm/musicpal.c +++ b/hw/arm/musicpal.c @@ -898,7 +898,7 @@ static void mv88w8618_pit_write(void *opaque, hwaddr offset, case MP_BOARD_RESET: if (value == MP_BOARD_RESET_MAGIC) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } break; } diff --git a/hw/arm/omap1.c b/hw/arm/omap1.c index b3cf0ec690..54582bd148 100644 --- a/hw/arm/omap1.c +++ b/hw/arm/omap1.c @@ -355,7 +355,7 @@ static void omap_wd_timer_write(void *opaque, hwaddr addr, /* XXX: on T|E hardware somehow this has no effect, * on Zire 71 it works as specified. */ s->reset = 1; - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } } s->last_wr = value & 0xff; @@ -1545,8 +1545,10 @@ static inline void omap_clkm_idlect1_update(struct omap_mpu_state_s *s, if (value & (1 << 11)) { /* SETARM_IDLE */ cpu_interrupt(CPU(s->cpu), CPU_INTERRUPT_HALT); } - if (!(value & (1 << 10))) /* WKUP_MODE */ - qemu_system_shutdown_request(); /* XXX: disable wakeup from IRQ */ + if (!(value & (1 << 10))) { /* WKUP_MODE */ + /* XXX: disable wakeup from IRQ */ + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); + } #define SET_CANIDLE(clock, bit) \ if (diff & (1 << bit)) { \ @@ -1693,7 +1695,7 @@ static void omap_clkm_write(void *opaque, hwaddr addr, diff = s->clkm.arm_rstct1 ^ value; s->clkm.arm_rstct1 = value & 0x0007; if (value & 9) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); s->clkm.cold_start = 0xa; } if (diff & ~value & 4) { /* DSP_RST */ diff --git a/hw/arm/omap2.c b/hw/arm/omap2.c index cf1b4ba58f..8afb854c74 100644 --- a/hw/arm/omap2.c +++ b/hw/arm/omap2.c @@ -1610,7 +1610,7 @@ static void omap_prcm_write(void *opaque, hwaddr addr, case 0x450: /* RM_RSTCTRL_WKUP */ /* TODO: reset */ if (value & 2) - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); break; case 0x454: /* RM_RSTTIME_WKUP */ s->rsttime_wkup = value & 0x1fff; diff --git a/hw/arm/spitz.c b/hw/arm/spitz.c index 324626847c..93bde14743 100644 --- a/hw/arm/spitz.c +++ b/hw/arm/spitz.c @@ -848,7 +848,7 @@ static void spitz_lcd_hsync_handler(void *opaque, int line, int level) static void spitz_reset(void *opaque, int line, int level) { if (level) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } } diff --git a/hw/arm/stellaris.c b/hw/arm/stellaris.c index ea7a8094e1..cf6e7be083 100644 --- a/hw/arm/stellaris.c +++ b/hw/arm/stellaris.c @@ -1197,7 +1197,7 @@ static void do_sys_reset(void *opaque, int n, int level) { if (level) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } } diff --git a/hw/arm/tosa.c b/hw/arm/tosa.c index 9f58a23fb5..2421b8150d 100644 --- a/hw/arm/tosa.c +++ b/hw/arm/tosa.c @@ -90,7 +90,7 @@ static void tosa_out_switch(void *opaque, int line, int level) static void tosa_reset(void *opaque, int line, int level) { if (level) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } } diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 7428db9f0c..381dc7c5fb 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -9,7 +9,7 @@ */ /** - * Reference Specs: http://www.nvmexpress.org, 1.1, 1.0e + * Reference Specs: http://www.nvmexpress.org, 1.2, 1.1, 1.0e * * http://www.nvmexpress.org/resources/ */ @@ -17,7 +17,11 @@ /** * Usage: add options: * -drive file=<file>,if=none,id=<drive_id> - * -device nvme,drive=<drive_id>,serial=<serial>,id=<id[optional]> + * -device nvme,drive=<drive_id>,serial=<serial>,id=<id[optional]>, \ + * cmb_size_mb=<cmb_size_mb[optional]> + * + * Note cmb_size_mb denotes size of CMB in MB. CMB is assumed to be at + * offset 0 in BAR2 and supports SQS only for now. */ #include "qemu/osdep.h" @@ -34,6 +38,16 @@ static void nvme_process_sq(void *opaque); +static void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size) +{ + if (n->cmbsz && addr >= n->ctrl_mem.addr && + addr < (n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size))) { + memcpy(buf, (void *)&n->cmbuf[addr - n->ctrl_mem.addr], size); + } else { + pci_dma_read(&n->parent_obj, addr, buf, size); + } +} + static int nvme_check_sqid(NvmeCtrl *n, uint16_t sqid) { return sqid < n->num_queues && n->sq[sqid] != NULL ? 0 : -1; @@ -637,7 +651,7 @@ static void nvme_process_sq(void *opaque) while (!(nvme_sq_empty(sq) || QTAILQ_EMPTY(&sq->req_list))) { addr = sq->dma_addr + sq->head * n->sqe_size; - pci_dma_read(&n->parent_obj, addr, (void *)&cmd, sizeof(cmd)); + nvme_addr_read(n, addr, (void *)&cmd, sizeof(cmd)); nvme_inc_sq_head(sq); req = QTAILQ_FIRST(&sq->req_list); @@ -852,6 +866,32 @@ static const MemoryRegionOps nvme_mmio_ops = { }, }; +static void nvme_cmb_write(void *opaque, hwaddr addr, uint64_t data, + unsigned size) +{ + NvmeCtrl *n = (NvmeCtrl *)opaque; + memcpy(&n->cmbuf[addr], &data, size); +} + +static uint64_t nvme_cmb_read(void *opaque, hwaddr addr, unsigned size) +{ + uint64_t val; + NvmeCtrl *n = (NvmeCtrl *)opaque; + + memcpy(&val, &n->cmbuf[addr], size); + return val; +} + +static const MemoryRegionOps nvme_cmb_ops = { + .read = nvme_cmb_read, + .write = nvme_cmb_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .impl = { + .min_access_size = 2, + .max_access_size = 8, + }, +}; + static int nvme_init(PCIDevice *pci_dev) { NvmeCtrl *n = NVME(pci_dev); @@ -936,9 +976,31 @@ static int nvme_init(PCIDevice *pci_dev) NVME_CAP_SET_CSS(n->bar.cap, 1); NVME_CAP_SET_MPSMAX(n->bar.cap, 4); - n->bar.vs = 0x00010100; + n->bar.vs = 0x00010200; n->bar.intmc = n->bar.intms = 0; + if (n->cmb_size_mb) { + + NVME_CMBLOC_SET_BIR(n->bar.cmbloc, 2); + NVME_CMBLOC_SET_OFST(n->bar.cmbloc, 0); + + NVME_CMBSZ_SET_SQS(n->bar.cmbsz, 1); + NVME_CMBSZ_SET_CQS(n->bar.cmbsz, 0); + NVME_CMBSZ_SET_LISTS(n->bar.cmbsz, 0); + NVME_CMBSZ_SET_RDS(n->bar.cmbsz, 0); + NVME_CMBSZ_SET_WDS(n->bar.cmbsz, 0); + NVME_CMBSZ_SET_SZU(n->bar.cmbsz, 2); /* MBs */ + NVME_CMBSZ_SET_SZ(n->bar.cmbsz, n->cmb_size_mb); + + n->cmbuf = g_malloc0(NVME_CMBSZ_GETSIZE(n->bar.cmbsz)); + memory_region_init_io(&n->ctrl_mem, OBJECT(n), &nvme_cmb_ops, n, + "nvme-cmb", NVME_CMBSZ_GETSIZE(n->bar.cmbsz)); + pci_register_bar(&n->parent_obj, NVME_CMBLOC_BIR(n->bar.cmbloc), + PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64 | + PCI_BASE_ADDRESS_MEM_PREFETCH, &n->ctrl_mem); + + } + for (i = 0; i < n->num_namespaces; i++) { NvmeNamespace *ns = &n->namespaces[i]; NvmeIdNs *id_ns = &ns->id_ns; @@ -964,12 +1026,17 @@ static void nvme_exit(PCIDevice *pci_dev) g_free(n->namespaces); g_free(n->cq); g_free(n->sq); + if (n->cmbsz) { + memory_region_unref(&n->ctrl_mem); + } + msix_uninit_exclusive_bar(pci_dev); } static Property nvme_props[] = { DEFINE_BLOCK_PROPERTIES(NvmeCtrl, conf), DEFINE_PROP_STRING("serial", NvmeCtrl, serial), + DEFINE_PROP_UINT32("cmb_size_mb", NvmeCtrl, cmb_size_mb, 0), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/block/nvme.h b/hw/block/nvme.h index a0d15649f9..b4961d2547 100644 --- a/hw/block/nvme.h +++ b/hw/block/nvme.h @@ -14,6 +14,8 @@ typedef struct NvmeBar { uint32_t aqa; uint64_t asq; uint64_t acq; + uint32_t cmbloc; + uint32_t cmbsz; } NvmeBar; enum NvmeCapShift { @@ -138,6 +140,72 @@ enum NvmeAqaMask { #define NVME_AQA_ASQS(aqa) ((aqa >> AQA_ASQS_SHIFT) & AQA_ASQS_MASK) #define NVME_AQA_ACQS(aqa) ((aqa >> AQA_ACQS_SHIFT) & AQA_ACQS_MASK) +enum NvmeCmblocShift { + CMBLOC_BIR_SHIFT = 0, + CMBLOC_OFST_SHIFT = 12, +}; + +enum NvmeCmblocMask { + CMBLOC_BIR_MASK = 0x7, + CMBLOC_OFST_MASK = 0xfffff, +}; + +#define NVME_CMBLOC_BIR(cmbloc) ((cmbloc >> CMBLOC_BIR_SHIFT) & \ + CMBLOC_BIR_MASK) +#define NVME_CMBLOC_OFST(cmbloc)((cmbloc >> CMBLOC_OFST_SHIFT) & \ + CMBLOC_OFST_MASK) + +#define NVME_CMBLOC_SET_BIR(cmbloc, val) \ + (cmbloc |= (uint64_t)(val & CMBLOC_BIR_MASK) << CMBLOC_BIR_SHIFT) +#define NVME_CMBLOC_SET_OFST(cmbloc, val) \ + (cmbloc |= (uint64_t)(val & CMBLOC_OFST_MASK) << CMBLOC_OFST_SHIFT) + +enum NvmeCmbszShift { + CMBSZ_SQS_SHIFT = 0, + CMBSZ_CQS_SHIFT = 1, + CMBSZ_LISTS_SHIFT = 2, + CMBSZ_RDS_SHIFT = 3, + CMBSZ_WDS_SHIFT = 4, + CMBSZ_SZU_SHIFT = 8, + CMBSZ_SZ_SHIFT = 12, +}; + +enum NvmeCmbszMask { + CMBSZ_SQS_MASK = 0x1, + CMBSZ_CQS_MASK = 0x1, + CMBSZ_LISTS_MASK = 0x1, + CMBSZ_RDS_MASK = 0x1, + CMBSZ_WDS_MASK = 0x1, + CMBSZ_SZU_MASK = 0xf, + CMBSZ_SZ_MASK = 0xfffff, +}; + +#define NVME_CMBSZ_SQS(cmbsz) ((cmbsz >> CMBSZ_SQS_SHIFT) & CMBSZ_SQS_MASK) +#define NVME_CMBSZ_CQS(cmbsz) ((cmbsz >> CMBSZ_CQS_SHIFT) & CMBSZ_CQS_MASK) +#define NVME_CMBSZ_LISTS(cmbsz)((cmbsz >> CMBSZ_LISTS_SHIFT) & CMBSZ_LISTS_MASK) +#define NVME_CMBSZ_RDS(cmbsz) ((cmbsz >> CMBSZ_RDS_SHIFT) & CMBSZ_RDS_MASK) +#define NVME_CMBSZ_WDS(cmbsz) ((cmbsz >> CMBSZ_WDS_SHIFT) & CMBSZ_WDS_MASK) +#define NVME_CMBSZ_SZU(cmbsz) ((cmbsz >> CMBSZ_SZU_SHIFT) & CMBSZ_SZU_MASK) +#define NVME_CMBSZ_SZ(cmbsz) ((cmbsz >> CMBSZ_SZ_SHIFT) & CMBSZ_SZ_MASK) + +#define NVME_CMBSZ_SET_SQS(cmbsz, val) \ + (cmbsz |= (uint64_t)(val & CMBSZ_SQS_MASK) << CMBSZ_SQS_SHIFT) +#define NVME_CMBSZ_SET_CQS(cmbsz, val) \ + (cmbsz |= (uint64_t)(val & CMBSZ_CQS_MASK) << CMBSZ_CQS_SHIFT) +#define NVME_CMBSZ_SET_LISTS(cmbsz, val) \ + (cmbsz |= (uint64_t)(val & CMBSZ_LISTS_MASK) << CMBSZ_LISTS_SHIFT) +#define NVME_CMBSZ_SET_RDS(cmbsz, val) \ + (cmbsz |= (uint64_t)(val & CMBSZ_RDS_MASK) << CMBSZ_RDS_SHIFT) +#define NVME_CMBSZ_SET_WDS(cmbsz, val) \ + (cmbsz |= (uint64_t)(val & CMBSZ_WDS_MASK) << CMBSZ_WDS_SHIFT) +#define NVME_CMBSZ_SET_SZU(cmbsz, val) \ + (cmbsz |= (uint64_t)(val & CMBSZ_SZU_MASK) << CMBSZ_SZU_SHIFT) +#define NVME_CMBSZ_SET_SZ(cmbsz, val) \ + (cmbsz |= (uint64_t)(val & CMBSZ_SZ_MASK) << CMBSZ_SZ_SHIFT) + +#define NVME_CMBSZ_GETSIZE(cmbsz) \ + (NVME_CMBSZ_SZ(cmbsz) * (1 << (12 + 4 * NVME_CMBSZ_SZU(cmbsz)))) + typedef struct NvmeCmd { uint8_t opcode; uint8_t fuse; @@ -688,6 +756,7 @@ typedef struct NvmeNamespace { typedef struct NvmeCtrl { PCIDevice parent_obj; MemoryRegion iomem; + MemoryRegion ctrl_mem; NvmeBar bar; BlockConf conf; @@ -701,6 +770,10 @@ typedef struct NvmeCtrl { uint32_t num_queues; uint32_t max_q_ents; uint64_t ns_size; + uint32_t cmb_size_mb; + uint32_t cmbsz; + uint32_t cmbloc; + uint8_t *cmbuf; char *serial; NvmeNamespace *namespaces; diff --git a/hw/core/machine.c b/hw/core/machine.c index fd6a436064..3adebf14c4 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -21,6 +21,7 @@ #include "qemu/error-report.h" #include "qemu/cutils.h" #include "sysemu/numa.h" +#include "sysemu/qtest.h" static char *machine_get_accel(Object *obj, Error **errp) { @@ -722,7 +723,7 @@ static void machine_numa_validate(MachineState *machine) g_free(cpu_str); } } - if (s->len) { + if (s->len && !qtest_enabled()) { error_report("warning: CPU(s) not present in any NUMA nodes: %s", s->str); error_report("warning: All CPU(s) up to maxcpus should be described " diff --git a/hw/dma/rc4030.c b/hw/dma/rc4030.c index 0080141905..edf9432051 100644 --- a/hw/dma/rc4030.c +++ b/hw/dma/rc4030.c @@ -489,7 +489,7 @@ static const MemoryRegionOps jazzio_ops = { }; static IOMMUTLBEntry rc4030_dma_translate(MemoryRegion *iommu, hwaddr addr, - bool is_write) + IOMMUAccessFlags flag) { rc4030State *s = container_of(iommu, rc4030State, dma_mr); IOMMUTLBEntry ret = { diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index afcadacd2e..82bd44f38e 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -2404,14 +2404,17 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) } /* - * Entry is required for Windows to enable memory hotplug in OS. + * Entry is required for Windows to enable memory hotplug in OS + * and for Linux to enable SWIOTLB when booted with less than + * 4G of RAM. Windows works better if the entry sets proximity + * to the highest NUMA node in the machine. * Memory devices may override proximity set by this entry, * providing _PXM method if necessary. */ if (hotplugabble_address_space_size) { numamem = acpi_data_push(table_data, sizeof *numamem); build_srat_memory(numamem, pcms->hotplug_memory.base, - hotplugabble_address_space_size, 0, + hotplugabble_address_space_size, pcms->numa_nodes - 1, MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED); } diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c index 329058dac8..7b6d4ea3f3 100644 --- a/hw/i386/amd_iommu.c +++ b/hw/i386/amd_iommu.c @@ -988,7 +988,7 @@ static inline bool amdvi_is_interrupt_addr(hwaddr addr) } static IOMMUTLBEntry amdvi_translate(MemoryRegion *iommu, hwaddr addr, - bool is_write) + IOMMUAccessFlags flag) { AMDVIAddressSpace *as = container_of(iommu, AMDVIAddressSpace, iommu); AMDVIState *s = as->iommu_state; @@ -1017,7 +1017,7 @@ static IOMMUTLBEntry amdvi_translate(MemoryRegion *iommu, hwaddr addr, return ret; } - amdvi_do_translate(as, addr, is_write, &ret); + amdvi_do_translate(as, addr, flag & IOMMU_WO, &ret); trace_amdvi_translation_result(as->bus_num, PCI_SLOT(as->devfn), PCI_FUNC(as->devfn), addr, ret.translated_addr); return ret; diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 9ba2162cd9..15610b9de8 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -512,7 +512,7 @@ static int vtd_get_root_entry(IntelIOMMUState *s, uint8_t index, return 0; } -static inline bool vtd_context_entry_present(VTDContextEntry *context) +static inline bool vtd_ce_present(VTDContextEntry *context) { return context->lo & VTD_CONTEXT_ENTRY_P; } @@ -533,7 +533,7 @@ static int vtd_get_context_entry_from_root(VTDRootEntry *root, uint8_t index, return 0; } -static inline dma_addr_t vtd_get_slpt_base_from_context(VTDContextEntry *ce) +static inline dma_addr_t vtd_ce_get_slpt_base(VTDContextEntry *ce) { return ce->lo & VTD_CONTEXT_ENTRY_SLPTPTR; } @@ -585,19 +585,49 @@ static inline bool vtd_is_level_supported(IntelIOMMUState *s, uint32_t level) /* Get the page-table level that hardware should use for the second-level * page-table walk from the Address Width field of context-entry. */ -static inline uint32_t vtd_get_level_from_context_entry(VTDContextEntry *ce) +static inline uint32_t vtd_ce_get_level(VTDContextEntry *ce) { return 2 + (ce->hi & VTD_CONTEXT_ENTRY_AW); } -static inline uint32_t vtd_get_agaw_from_context_entry(VTDContextEntry *ce) +static inline uint32_t vtd_ce_get_agaw(VTDContextEntry *ce) { return 30 + (ce->hi & VTD_CONTEXT_ENTRY_AW) * 9; } +static inline uint32_t vtd_ce_get_type(VTDContextEntry *ce) +{ + return ce->lo & VTD_CONTEXT_ENTRY_TT; +} + +/* Return true if check passed, otherwise false */ +static inline bool vtd_ce_type_check(X86IOMMUState *x86_iommu, + VTDContextEntry *ce) +{ + switch (vtd_ce_get_type(ce)) { + case VTD_CONTEXT_TT_MULTI_LEVEL: + /* Always supported */ + break; + case VTD_CONTEXT_TT_DEV_IOTLB: + if (!x86_iommu->dt_supported) { + return false; + } + break; + case VTD_CONTEXT_TT_PASS_THROUGH: + if (!x86_iommu->pt_supported) { + return false; + } + break; + default: + /* Unknwon type */ + return false; + } + return true; +} + static inline uint64_t vtd_iova_limit(VTDContextEntry *ce) { - uint32_t ce_agaw = vtd_get_agaw_from_context_entry(ce); + uint32_t ce_agaw = vtd_ce_get_agaw(ce); return 1ULL << MIN(ce_agaw, VTD_MGAW); } @@ -635,6 +665,29 @@ static bool vtd_slpte_nonzero_rsvd(uint64_t slpte, uint32_t level) } } +/* Find the VTD address space associated with a given bus number */ +static VTDBus *vtd_find_as_from_bus_num(IntelIOMMUState *s, uint8_t bus_num) +{ + VTDBus *vtd_bus = s->vtd_as_by_bus_num[bus_num]; + if (!vtd_bus) { + /* + * Iterate over the registered buses to find the one which + * currently hold this bus number, and update the bus_num + * lookup table: + */ + GHashTableIter iter; + + g_hash_table_iter_init(&iter, s->vtd_as_by_busptr); + while (g_hash_table_iter_next(&iter, NULL, (void **)&vtd_bus)) { + if (pci_bus_num(vtd_bus->bus) == bus_num) { + s->vtd_as_by_bus_num[bus_num] = vtd_bus; + return vtd_bus; + } + } + } + return vtd_bus; +} + /* Given the @iova, get relevant @slptep. @slpte_level will be the last level * of the translation, can be used for deciding the size of large page. */ @@ -642,8 +695,8 @@ static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool is_write, uint64_t *slptep, uint32_t *slpte_level, bool *reads, bool *writes) { - dma_addr_t addr = vtd_get_slpt_base_from_context(ce); - uint32_t level = vtd_get_level_from_context_entry(ce); + dma_addr_t addr = vtd_ce_get_slpt_base(ce); + uint32_t level = vtd_ce_get_level(ce); uint32_t offset; uint64_t slpte; uint64_t access_right_check; @@ -664,7 +717,7 @@ static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool is_write, VTD_DPRINTF(GENERAL, "error: fail to access second-level paging " "entry at level %"PRIu32 " for iova 0x%"PRIx64, level, iova); - if (level == vtd_get_level_from_context_entry(ce)) { + if (level == vtd_ce_get_level(ce)) { /* Invalid programming of context-entry */ return -VTD_FR_CONTEXT_ENTRY_INV; } else { @@ -809,8 +862,8 @@ static int vtd_page_walk(VTDContextEntry *ce, uint64_t start, uint64_t end, vtd_page_walk_hook hook_fn, void *private, bool notify_unmap) { - dma_addr_t addr = vtd_get_slpt_base_from_context(ce); - uint32_t level = vtd_get_level_from_context_entry(ce); + dma_addr_t addr = vtd_ce_get_slpt_base(ce); + uint32_t level = vtd_ce_get_level(ce); if (!vtd_iova_range_check(start, ce)) { return -VTD_FR_ADDR_BEYOND_MGAW; @@ -831,6 +884,7 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num, { VTDRootEntry re; int ret_fr; + X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s); ret_fr = vtd_get_root_entry(s, bus_num, &re); if (ret_fr) { @@ -841,7 +895,9 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num, /* Not error - it's okay we don't have root entry. */ trace_vtd_re_not_present(bus_num); return -VTD_FR_ROOT_ENTRY_P; - } else if (re.rsvd || (re.val & VTD_ROOT_ENTRY_RSVD)) { + } + + if (re.rsvd || (re.val & VTD_ROOT_ENTRY_RSVD)) { trace_vtd_re_invalid(re.rsvd, re.val); return -VTD_FR_ROOT_ENTRY_RSVD; } @@ -851,31 +907,116 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num, return ret_fr; } - if (!vtd_context_entry_present(ce)) { + if (!vtd_ce_present(ce)) { /* Not error - it's okay we don't have context entry. */ trace_vtd_ce_not_present(bus_num, devfn); return -VTD_FR_CONTEXT_ENTRY_P; - } else if ((ce->hi & VTD_CONTEXT_ENTRY_RSVD_HI) || - (ce->lo & VTD_CONTEXT_ENTRY_RSVD_LO)) { + } + + if ((ce->hi & VTD_CONTEXT_ENTRY_RSVD_HI) || + (ce->lo & VTD_CONTEXT_ENTRY_RSVD_LO)) { trace_vtd_ce_invalid(ce->hi, ce->lo); return -VTD_FR_CONTEXT_ENTRY_RSVD; } + /* Check if the programming of context-entry is valid */ - if (!vtd_is_level_supported(s, vtd_get_level_from_context_entry(ce))) { + if (!vtd_is_level_supported(s, vtd_ce_get_level(ce))) { + trace_vtd_ce_invalid(ce->hi, ce->lo); + return -VTD_FR_CONTEXT_ENTRY_INV; + } + + /* Do translation type check */ + if (!vtd_ce_type_check(x86_iommu, ce)) { trace_vtd_ce_invalid(ce->hi, ce->lo); return -VTD_FR_CONTEXT_ENTRY_INV; + } + + return 0; +} + +/* + * Fetch translation type for specific device. Returns <0 if error + * happens, otherwise return the shifted type to check against + * VTD_CONTEXT_TT_*. + */ +static int vtd_dev_get_trans_type(VTDAddressSpace *as) +{ + IntelIOMMUState *s; + VTDContextEntry ce; + int ret; + + s = as->iommu_state; + + ret = vtd_dev_to_context_entry(s, pci_bus_num(as->bus), + as->devfn, &ce); + if (ret) { + return ret; + } + + return vtd_ce_get_type(&ce); +} + +static bool vtd_dev_pt_enabled(VTDAddressSpace *as) +{ + int ret; + + assert(as); + + ret = vtd_dev_get_trans_type(as); + if (ret < 0) { + /* + * Possibly failed to parse the context entry for some reason + * (e.g., during init, or any guest configuration errors on + * context entries). We should assume PT not enabled for + * safety. + */ + return false; + } + + return ret == VTD_CONTEXT_TT_PASS_THROUGH; +} + +/* Return whether the device is using IOMMU translation. */ +static bool vtd_switch_address_space(VTDAddressSpace *as) +{ + bool use_iommu; + + assert(as); + + use_iommu = as->iommu_state->dmar_enabled & !vtd_dev_pt_enabled(as); + + trace_vtd_switch_address_space(pci_bus_num(as->bus), + VTD_PCI_SLOT(as->devfn), + VTD_PCI_FUNC(as->devfn), + use_iommu); + + /* Turn off first then on the other */ + if (use_iommu) { + memory_region_set_enabled(&as->sys_alias, false); + memory_region_set_enabled(&as->iommu, true); } else { - switch (ce->lo & VTD_CONTEXT_ENTRY_TT) { - case VTD_CONTEXT_TT_MULTI_LEVEL: - /* fall through */ - case VTD_CONTEXT_TT_DEV_IOTLB: - break; - default: - trace_vtd_ce_invalid(ce->hi, ce->lo); - return -VTD_FR_CONTEXT_ENTRY_INV; + memory_region_set_enabled(&as->iommu, false); + memory_region_set_enabled(&as->sys_alias, true); + } + + return use_iommu; +} + +static void vtd_switch_address_space_all(IntelIOMMUState *s) +{ + GHashTableIter iter; + VTDBus *vtd_bus; + int i; + + g_hash_table_iter_init(&iter, s->vtd_as_by_busptr); + while (g_hash_table_iter_next(&iter, NULL, (void **)&vtd_bus)) { + for (i = 0; i < X86_IOMMU_PCI_DEVFN_MAX; i++) { + if (!vtd_bus->dev_as[i]) { + continue; + } + vtd_switch_address_space(vtd_bus->dev_as[i]); } } - return 0; } static inline uint16_t vtd_make_source_id(uint8_t bus_num, uint8_t devfn) @@ -915,6 +1056,31 @@ static inline bool vtd_is_interrupt_addr(hwaddr addr) return VTD_INTERRUPT_ADDR_FIRST <= addr && addr <= VTD_INTERRUPT_ADDR_LAST; } +static void vtd_pt_enable_fast_path(IntelIOMMUState *s, uint16_t source_id) +{ + VTDBus *vtd_bus; + VTDAddressSpace *vtd_as; + bool success = false; + + vtd_bus = vtd_find_as_from_bus_num(s, VTD_SID_TO_BUS(source_id)); + if (!vtd_bus) { + goto out; + } + + vtd_as = vtd_bus->dev_as[VTD_SID_TO_DEVFN(source_id)]; + if (!vtd_as) { + goto out; + } + + if (vtd_switch_address_space(vtd_as) == false) { + /* We switched off IOMMU region successfully. */ + success = true; + } + +out: + trace_vtd_pt_enable_fast_path(source_id, success); +} + /* Map dev to context-entry then do a paging-structures walk to do a iommu * translation. * @@ -986,6 +1152,30 @@ static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, cc_entry->context_cache_gen = s->context_cache_gen; } + /* + * We don't need to translate for pass-through context entries. + * Also, let's ignore IOTLB caching as well for PT devices. + */ + if (vtd_ce_get_type(&ce) == VTD_CONTEXT_TT_PASS_THROUGH) { + entry->translated_addr = entry->iova; + entry->addr_mask = VTD_PAGE_SIZE - 1; + entry->perm = IOMMU_RW; + trace_vtd_translate_pt(source_id, entry->iova); + + /* + * When this happens, it means firstly caching-mode is not + * enabled, and this is the first passthrough translation for + * the device. Let's enable the fast path for passthrough. + * + * When passthrough is disabled again for the device, we can + * capture it via the context entry invalidation, then the + * IOMMU region can be swapped back. + */ + vtd_pt_enable_fast_path(s, source_id); + + return; + } + ret_fr = vtd_iova_to_slpte(&ce, addr, is_write, &slpte, &level, &reads, &writes); if (ret_fr) { @@ -1005,7 +1195,7 @@ out: entry->iova = addr & page_mask; entry->translated_addr = vtd_get_slpte_addr(slpte) & page_mask; entry->addr_mask = ~page_mask; - entry->perm = (writes ? 2 : 0) + (reads ? 1 : 0); + entry->perm = IOMMU_ACCESS_FLAG(reads, writes); } static void vtd_root_table_setup(IntelIOMMUState *s) @@ -1055,6 +1245,7 @@ static void vtd_context_global_invalidate(IntelIOMMUState *s) if (s->context_cache_gen == VTD_CONTEXT_CACHE_GEN_MAX) { vtd_reset_context_cache(s); } + vtd_switch_address_space_all(s); /* * From VT-d spec 6.5.2.1, a global context entry invalidation * should be followed by a IOTLB global invalidation, so we should @@ -1065,29 +1256,6 @@ static void vtd_context_global_invalidate(IntelIOMMUState *s) vtd_iommu_replay_all(s); } - -/* Find the VTD address space currently associated with a given bus number, - */ -static VTDBus *vtd_find_as_from_bus_num(IntelIOMMUState *s, uint8_t bus_num) -{ - VTDBus *vtd_bus = s->vtd_as_by_bus_num[bus_num]; - if (!vtd_bus) { - /* Iterate over the registered buses to find the one - * which currently hold this bus number, and update the bus_num lookup table: - */ - GHashTableIter iter; - - g_hash_table_iter_init(&iter, s->vtd_as_by_busptr); - while (g_hash_table_iter_next (&iter, NULL, (void**)&vtd_bus)) { - if (pci_bus_num(vtd_bus->bus) == bus_num) { - s->vtd_as_by_bus_num[bus_num] = vtd_bus; - return vtd_bus; - } - } - } - return vtd_bus; -} - /* Do a context-cache device-selective invalidation. * @func_mask: FM field after shifting */ @@ -1130,6 +1298,11 @@ static void vtd_context_device_invalidate(IntelIOMMUState *s, VTD_PCI_FUNC(devfn_it)); vtd_as->context_cache_entry.context_cache_gen = 0; /* + * Do switch address space when needed, in case if the + * device passthrough bit is switched. + */ + vtd_switch_address_space(vtd_as); + /* * So a device is moving out of (or moving into) a * domain, a replay() suites here to notify all the * IOMMU_NOTIFIER_MAP registers about this change. @@ -1361,42 +1534,6 @@ static void vtd_handle_gcmd_sirtp(IntelIOMMUState *s) vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_IRTPS); } -static void vtd_switch_address_space(VTDAddressSpace *as) -{ - assert(as); - - trace_vtd_switch_address_space(pci_bus_num(as->bus), - VTD_PCI_SLOT(as->devfn), - VTD_PCI_FUNC(as->devfn), - as->iommu_state->dmar_enabled); - - /* Turn off first then on the other */ - if (as->iommu_state->dmar_enabled) { - memory_region_set_enabled(&as->sys_alias, false); - memory_region_set_enabled(&as->iommu, true); - } else { - memory_region_set_enabled(&as->iommu, false); - memory_region_set_enabled(&as->sys_alias, true); - } -} - -static void vtd_switch_address_space_all(IntelIOMMUState *s) -{ - GHashTableIter iter; - VTDBus *vtd_bus; - int i; - - g_hash_table_iter_init(&iter, s->vtd_as_by_busptr); - while (g_hash_table_iter_next(&iter, NULL, (void **)&vtd_bus)) { - for (i = 0; i < X86_IOMMU_PCI_DEVFN_MAX; i++) { - if (!vtd_bus->dev_as[i]) { - continue; - } - vtd_switch_address_space(vtd_bus->dev_as[i]); - } - } -} - /* Handle Translation Enable/Disable */ static void vtd_handle_gcmd_te(IntelIOMMUState *s, bool en) { @@ -2221,7 +2358,7 @@ static void vtd_mem_write(void *opaque, hwaddr addr, } static IOMMUTLBEntry vtd_iommu_translate(MemoryRegion *iommu, hwaddr addr, - bool is_write) + IOMMUAccessFlags flag) { VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu); IntelIOMMUState *s = vtd_as->iommu_state; @@ -2243,7 +2380,7 @@ static IOMMUTLBEntry vtd_iommu_translate(MemoryRegion *iommu, hwaddr addr, } vtd_do_iommu_translate(vtd_as, vtd_as->bus, vtd_as->devfn, addr, - is_write, &ret); + flag & IOMMU_WO, &ret); VTD_DPRINTF(MMU, "bus %"PRIu8 " slot %"PRIu8 " func %"PRIu8 " devfn %"PRIu8 " iova 0x%"PRIx64 " hpa 0x%"PRIx64, pci_bus_num(vtd_as->bus), @@ -2844,6 +2981,10 @@ static void vtd_init(IntelIOMMUState *s) s->ecap |= VTD_ECAP_DT; } + if (x86_iommu->pt_supported) { + s->ecap |= VTD_ECAP_PT; + } + if (s->caching_mode) { s->cap |= VTD_CAP_CM; } diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h index 29d67075f4..0e73a65bf2 100644 --- a/hw/i386/intel_iommu_internal.h +++ b/hw/i386/intel_iommu_internal.h @@ -187,6 +187,7 @@ /* Interrupt Remapping support */ #define VTD_ECAP_IR (1ULL << 3) #define VTD_ECAP_EIM (1ULL << 4) +#define VTD_ECAP_PT (1ULL << 6) #define VTD_ECAP_MHMV (15ULL << 20) /* CAP_REG */ diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 816bfa872c..107a34125b 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -519,7 +519,7 @@ static void port92_write(void *opaque, hwaddr addr, uint64_t val, s->outport = val; qemu_set_irq(s->a20_out, (val >> 1) & 1); if ((val & 1) && !(oldval & 1)) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } } diff --git a/hw/i386/trace-events b/hw/i386/trace-events index 04a6980800..72556dad48 100644 --- a/hw/i386/trace-events +++ b/hw/i386/trace-events @@ -38,6 +38,8 @@ vtd_page_walk_skip_perm(uint64_t iova, uint64_t next) "Page walk skip iova 0x%"P vtd_page_walk_skip_reserve(uint64_t iova, uint64_t next) "Page walk skip iova 0x%"PRIx64" - 0x%"PRIx64" due to rsrv set" vtd_switch_address_space(uint8_t bus, uint8_t slot, uint8_t fn, bool on) "Device %02x:%02x.%x switching address space (iommu enabled=%d)" vtd_as_unmap_whole(uint8_t bus, uint8_t slot, uint8_t fn, uint64_t iova, uint64_t size) "Device %02x:%02x.%x start 0x%"PRIx64" size 0x%"PRIx64 +vtd_translate_pt(uint16_t sid, uint64_t addr) "source id 0x%"PRIu16", iova 0x%"PRIx64 +vtd_pt_enable_fast_path(uint16_t sid, bool success) "sid 0x%"PRIu16" %d" # hw/i386/amd_iommu.c amdvi_evntlog_fail(uint64_t addr, uint32_t head) "error: fail to write at addr 0x%"PRIx64" + offset 0x%"PRIx32 diff --git a/hw/i386/x86-iommu.c b/hw/i386/x86-iommu.c index 23dcd3f039..293caf83ef 100644 --- a/hw/i386/x86-iommu.c +++ b/hw/i386/x86-iommu.c @@ -88,55 +88,23 @@ static void x86_iommu_realize(DeviceState *dev, Error **errp) x86_iommu_set_default(X86_IOMMU_DEVICE(dev)); } +static Property x86_iommu_properties[] = { + DEFINE_PROP_BOOL("intremap", X86IOMMUState, intr_supported, false), + DEFINE_PROP_BOOL("device-iotlb", X86IOMMUState, dt_supported, false), + DEFINE_PROP_BOOL("pt", X86IOMMUState, pt_supported, true), + DEFINE_PROP_END_OF_LIST(), +}; + static void x86_iommu_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); dc->realize = x86_iommu_realize; -} - -static bool x86_iommu_intremap_prop_get(Object *o, Error **errp) -{ - X86IOMMUState *s = X86_IOMMU_DEVICE(o); - return s->intr_supported; -} - -static void x86_iommu_intremap_prop_set(Object *o, bool value, Error **errp) -{ - X86IOMMUState *s = X86_IOMMU_DEVICE(o); - s->intr_supported = value; -} - -static bool x86_iommu_device_iotlb_prop_get(Object *o, Error **errp) -{ - X86IOMMUState *s = X86_IOMMU_DEVICE(o); - return s->dt_supported; -} - -static void x86_iommu_device_iotlb_prop_set(Object *o, bool value, Error **errp) -{ - X86IOMMUState *s = X86_IOMMU_DEVICE(o); - s->dt_supported = value; -} - -static void x86_iommu_instance_init(Object *o) -{ - X86IOMMUState *s = X86_IOMMU_DEVICE(o); - - /* By default, do not support IR */ - s->intr_supported = false; - object_property_add_bool(o, "intremap", x86_iommu_intremap_prop_get, - x86_iommu_intremap_prop_set, NULL); - s->dt_supported = false; - object_property_add_bool(o, "device-iotlb", - x86_iommu_device_iotlb_prop_get, - x86_iommu_device_iotlb_prop_set, - NULL); + dc->props = x86_iommu_properties; } static const TypeInfo x86_iommu_info = { .name = TYPE_X86_IOMMU_DEVICE, .parent = TYPE_SYS_BUS_DEVICE, - .instance_init = x86_iommu_instance_init, .instance_size = sizeof(X86IOMMUState), .class_init = x86_iommu_class_init, .class_size = sizeof(X86IOMMUClass), diff --git a/hw/i386/xen/xen-hvm.c b/hw/i386/xen/xen-hvm.c index b1c05ffb86..919f09b694 100644 --- a/hw/i386/xen/xen-hvm.c +++ b/hw/i386/xen/xen-hvm.c @@ -1089,11 +1089,14 @@ static void cpu_handle_ioreq(void *opaque) * causes Xen to powerdown the domain. */ if (runstate_is_running()) { + ShutdownCause request; + if (qemu_shutdown_requested_get()) { destroy_hvm_domain(false); } - if (qemu_reset_requested_get()) { - qemu_system_reset(VMRESET_REPORT); + request = qemu_reset_requested_get(); + if (request) { + qemu_system_reset(request); destroy_hvm_domain(true); } } @@ -1395,7 +1398,7 @@ void xen_shutdown_fatal_error(const char *fmt, ...) va_end(ap); fprintf(stderr, "Will destroy the domain.\n"); /* destroy the domain */ - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_HOST_ERROR); } void xen_hvm_modified_memory(ram_addr_t start, ram_addr_t length) diff --git a/hw/input/pckbd.c b/hw/input/pckbd.c index d414288839..c479f827b6 100644 --- a/hw/input/pckbd.c +++ b/hw/input/pckbd.c @@ -226,7 +226,7 @@ static void outport_write(KBDState *s, uint32_t val) s->outport = val; qemu_set_irq(s->a20_out, (val >> 1) & 1); if (!(val & 1)) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } } @@ -301,7 +301,7 @@ static void kbd_write_command(void *opaque, hwaddr addr, s->outport &= ~KBD_OUT_A20; break; case KBD_CCMD_RESET: - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); break; case KBD_CCMD_NO_OP: /* ignore that */ diff --git a/hw/intc/xics.c b/hw/intc/xics.c index 292fffecd3..ea3516794a 100644 --- a/hw/intc/xics.c +++ b/hw/intc/xics.c @@ -357,6 +357,10 @@ static void icp_realize(DeviceState *dev, Error **errp) qemu_register_reset(icp_reset, dev); } +static void icp_unrealize(DeviceState *dev, Error **errp) +{ + qemu_unregister_reset(icp_reset, dev); +} static void icp_class_init(ObjectClass *klass, void *data) { @@ -364,6 +368,7 @@ static void icp_class_init(ObjectClass *klass, void *data) dc->vmsd = &vmstate_icp_server; dc->realize = icp_realize; + dc->unrealize = icp_unrealize; } static const TypeInfo icp_info = { diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c index dd93531ae3..14b8f6f6e4 100644 --- a/hw/intc/xics_kvm.c +++ b/hw/intc/xics_kvm.c @@ -42,6 +42,14 @@ static int kernel_xics_fd = -1; +typedef struct KVMEnabledICP { + unsigned long vcpu_id; + QLIST_ENTRY(KVMEnabledICP) node; +} KVMEnabledICP; + +static QLIST_HEAD(, KVMEnabledICP) + kvm_enabled_icps = QLIST_HEAD_INITIALIZER(&kvm_enabled_icps); + /* * ICP-KVM */ @@ -121,6 +129,8 @@ static void icp_kvm_reset(void *dev) static void icp_kvm_cpu_setup(ICPState *icp, PowerPCCPU *cpu) { CPUState *cs = CPU(cpu); + KVMEnabledICP *enabled_icp; + unsigned long vcpu_id = kvm_arch_vcpu_id(cs); int ret; if (kernel_xics_fd == -1) { @@ -132,18 +142,21 @@ static void icp_kvm_cpu_setup(ICPState *icp, PowerPCCPU *cpu) * which was hot-removed earlier we don't have to renable * KVM_CAP_IRQ_XICS capability again. */ - if (icp->cap_irq_xics_enabled) { - return; + QLIST_FOREACH(enabled_icp, &kvm_enabled_icps, node) { + if (enabled_icp->vcpu_id == vcpu_id) { + return; + } } - ret = kvm_vcpu_enable_cap(cs, KVM_CAP_IRQ_XICS, 0, kernel_xics_fd, - kvm_arch_vcpu_id(cs)); + ret = kvm_vcpu_enable_cap(cs, KVM_CAP_IRQ_XICS, 0, kernel_xics_fd, vcpu_id); if (ret < 0) { - error_report("Unable to connect CPU%ld to kernel XICS: %s", - kvm_arch_vcpu_id(cs), strerror(errno)); + error_report("Unable to connect CPU%ld to kernel XICS: %s", vcpu_id, + strerror(errno)); exit(1); } - icp->cap_irq_xics_enabled = true; + enabled_icp = g_malloc(sizeof(*enabled_icp)); + enabled_icp->vcpu_id = vcpu_id; + QLIST_INSERT_HEAD(&kvm_enabled_icps, enabled_icp, node); } static void icp_kvm_realize(DeviceState *dev, Error **errp) @@ -151,12 +164,18 @@ static void icp_kvm_realize(DeviceState *dev, Error **errp) qemu_register_reset(icp_kvm_reset, dev); } +static void icp_kvm_unrealize(DeviceState *dev, Error **errp) +{ + qemu_unregister_reset(icp_kvm_reset, dev); +} + static void icp_kvm_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); ICPStateClass *icpc = ICP_CLASS(klass); dc->realize = icp_kvm_realize; + dc->unrealize = icp_kvm_unrealize; icpc->pre_save = icp_get_kvm_state; icpc->post_load = icp_set_kvm_state; icpc->cpu_setup = icp_kvm_cpu_setup; diff --git a/hw/intc/xics_spapr.c b/hw/intc/xics_spapr.c index f05308b897..d98ea8b130 100644 --- a/hw/intc/xics_spapr.c +++ b/hw/intc/xics_spapr.c @@ -229,7 +229,7 @@ static void rtas_int_on(PowerPCCPU *cpu, sPAPRMachineState *spapr, rtas_st(rets, 0, RTAS_OUT_SUCCESS); } -int xics_spapr_init(sPAPRMachineState *spapr, Error **errp) +void xics_spapr_init(sPAPRMachineState *spapr) { /* Registration of global state belongs into realize */ spapr_rtas_register(RTAS_IBM_SET_XIVE, "ibm,set-xive", rtas_set_xive); @@ -243,7 +243,6 @@ int xics_spapr_init(sPAPRMachineState *spapr, Error **errp) spapr_register_hypercall(H_XIRR_X, h_xirr_x); spapr_register_hypercall(H_EOI, h_eoi); spapr_register_hypercall(H_IPOLL, h_ipoll); - return 0; } #define ICS_IRQ_FREE(ics, srcno) \ diff --git a/hw/ipmi/ipmi.c b/hw/ipmi/ipmi.c index 5cf1caa88a..afafe1400f 100644 --- a/hw/ipmi/ipmi.c +++ b/hw/ipmi/ipmi.c @@ -44,14 +44,14 @@ static int ipmi_do_hw_op(IPMIInterface *s, enum ipmi_op op, int checkonly) if (checkonly) { return 0; } - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); return 0; case IPMI_POWEROFF_CHASSIS: if (checkonly) { return 0; } - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); return 0; case IPMI_SEND_NMI: diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c index e2215dcf4d..ac8416d42b 100644 --- a/hw/isa/lpc_ich9.c +++ b/hw/isa/lpc_ich9.c @@ -606,7 +606,7 @@ static void ich9_rst_cnt_write(void *opaque, hwaddr addr, uint64_t val, ICH9LPCState *lpc = opaque; if (val & 4) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); return; } lpc->rst_cnt = val & 0xA; /* keep FULL_RST (bit 3) and SYS_RST (bit 1) */ diff --git a/hw/mips/boston.c b/hw/mips/boston.c index 83f7b82386..53d1e0ce45 100644 --- a/hw/mips/boston.c +++ b/hw/mips/boston.c @@ -232,7 +232,7 @@ static void boston_platreg_write(void *opaque, hwaddr addr, break; case PLAT_SOFTRST_CTL: if (val & PLAT_SOFTRST_CTL_SYSRESET) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } break; default: diff --git a/hw/mips/mips_malta.c b/hw/mips/mips_malta.c index 5dd177e961..7814c39654 100644 --- a/hw/mips/mips_malta.c +++ b/hw/mips/mips_malta.c @@ -470,7 +470,7 @@ static void malta_fpga_write(void *opaque, hwaddr addr, /* SOFTRES Register */ case 0x00500: if (val == 0x42) - qemu_system_reset_request (); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); break; /* BRKRES Register */ diff --git a/hw/mips/mips_r4k.c b/hw/mips/mips_r4k.c index 748586ed77..f4de9fc343 100644 --- a/hw/mips/mips_r4k.c +++ b/hw/mips/mips_r4k.c @@ -53,9 +53,9 @@ static void mips_qemu_write (void *opaque, hwaddr addr, uint64_t val, unsigned size) { if ((addr & 0xffff) == 0 && val == 42) - qemu_system_reset_request (); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); else if ((addr & 0xffff) == 4 && val == 42) - qemu_system_shutdown_request (); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); } static uint64_t mips_qemu_read (void *opaque, hwaddr addr, diff --git a/hw/misc/arm_sysctl.c b/hw/misc/arm_sysctl.c index 8524008708..b20b44ea20 100644 --- a/hw/misc/arm_sysctl.c +++ b/hw/misc/arm_sysctl.c @@ -351,13 +351,13 @@ static bool vexpress_cfgctrl_write(arm_sysctl_state *s, unsigned int dcc, break; case SYS_CFG_SHUTDOWN: if (site == SYS_CFG_SITE_MB && device == 0) { - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); return true; } break; case SYS_CFG_REBOOT: if (site == SYS_CFG_SITE_MB && device == 0) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); return true; } break; @@ -429,7 +429,7 @@ static void arm_sysctl_write(void *opaque, hwaddr offset, if (s->lockval == LOCK_VALUE) { s->resetlevel = val; if (val & 0x100) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } } break; @@ -438,7 +438,7 @@ static void arm_sysctl_write(void *opaque, hwaddr offset, if (s->lockval == LOCK_VALUE) { s->resetlevel = val; if (val & 0x04) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } } break; diff --git a/hw/misc/cbus.c b/hw/misc/cbus.c index 0c207e3104..677274ce3e 100644 --- a/hw/misc/cbus.c +++ b/hw/misc/cbus.c @@ -356,7 +356,7 @@ static inline void retu_write(CBusRetu *s, int reg, uint16_t val) case RETU_REG_WATCHDOG: if (val == 0 && (s->cc[0] & 2)) - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); break; case RETU_REG_TXCR: diff --git a/hw/misc/macio/cuda.c b/hw/misc/macio/cuda.c index 05c02fb3a4..008d8bd4d5 100644 --- a/hw/misc/macio/cuda.c +++ b/hw/misc/macio/cuda.c @@ -612,7 +612,7 @@ static bool cuda_cmd_powerdown(CUDAState *s, return false; } - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); return true; } @@ -624,7 +624,7 @@ static bool cuda_cmd_reset_system(CUDAState *s, return false; } - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); return true; } diff --git a/hw/misc/slavio_misc.c b/hw/misc/slavio_misc.c index edd5de0702..18ff677512 100644 --- a/hw/misc/slavio_misc.c +++ b/hw/misc/slavio_misc.c @@ -258,7 +258,7 @@ static void slavio_aux2_mem_writeb(void *opaque, hwaddr addr, val &= AUX2_PWROFF; s->aux2 = val; if (val & AUX2_PWROFF) - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); slavio_misc_update_irq(s); } @@ -338,7 +338,7 @@ static void slavio_sysctrl_mem_writel(void *opaque, hwaddr addr, case 0: if (val & SYS_RESET) { s->sysctrl = SYS_RESETSTAT; - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } break; default: diff --git a/hw/misc/zynq_slcr.c b/hw/misc/zynq_slcr.c index 7891219001..44304d48be 100644 --- a/hw/misc/zynq_slcr.c +++ b/hw/misc/zynq_slcr.c @@ -405,7 +405,7 @@ static void zynq_slcr_write(void *opaque, hwaddr offset, switch (offset) { case PSS_RST_CTRL: if (val & R_PSS_RST_CTRL_SOFT_RST) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } break; } diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 98bd683f31..9a3d769aa2 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -589,7 +589,15 @@ static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features, if (!get_vhost_net(nc->peer)) { return features; } - return vhost_net_get_features(get_vhost_net(nc->peer), features); + features = vhost_net_get_features(get_vhost_net(nc->peer), features); + vdev->backend_features = features; + + if (n->mtu_bypass_backend && + (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) { + features |= (1ULL << VIRTIO_NET_F_MTU); + } + + return features; } static uint64_t virtio_net_bad_features(VirtIODevice *vdev) @@ -640,6 +648,11 @@ static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) VirtIONet *n = VIRTIO_NET(vdev); int i; + if (n->mtu_bypass_backend && + !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) { + features &= ~(1ULL << VIRTIO_NET_F_MTU); + } + virtio_net_set_multiqueue(n, virtio_has_feature(features, VIRTIO_NET_F_MQ)); @@ -2093,6 +2106,8 @@ static Property virtio_net_properties[] = { DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE), DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0), + DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend, + true), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/pci-host/apb.c b/hw/pci-host/apb.c index edc88f4c65..326f5ef024 100644 --- a/hw/pci-host/apb.c +++ b/hw/pci-host/apb.c @@ -209,7 +209,7 @@ static AddressSpace *pbm_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn) /* Called from RCU critical section */ static IOMMUTLBEntry pbm_translate_iommu(MemoryRegion *iommu, hwaddr addr, - bool is_write) + IOMMUAccessFlags flag) { IOMMUState *is = container_of(iommu, IOMMUState, iommu); hwaddr baseaddr, offset; @@ -482,9 +482,9 @@ static void apb_config_writel (void *opaque, hwaddr addr, s->reset_control |= val & RESET_WMASK; if (val & SOFT_POR) { s->nr_resets = 0; - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } else if (val & SOFT_XIR) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } } break; diff --git a/hw/pci-host/bonito.c b/hw/pci-host/bonito.c index 85a3bb0dd2..89133a9dd3 100644 --- a/hw/pci-host/bonito.c +++ b/hw/pci-host/bonito.c @@ -269,7 +269,7 @@ static void bonito_writel(void *opaque, hwaddr addr, } s->regs[saddr] = val; if (reset) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } break; case BONITO_INTENSET: diff --git a/hw/pci-host/piix.c b/hw/pci-host/piix.c index 2d02de12d9..4ce201ea65 100644 --- a/hw/pci-host/piix.c +++ b/hw/pci-host/piix.c @@ -632,7 +632,7 @@ static void rcr_write(void *opaque, hwaddr addr, uint64_t val, unsigned len) PIIX3State *d = opaque; if (val & 4) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); return; } d->rcr = val & 2; /* keep System Reset type only */ diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c index f7df2388c1..62f1857206 100644 --- a/hw/ppc/e500.c +++ b/hw/ppc/e500.c @@ -774,7 +774,7 @@ static qemu_irq *ppce500_init_mpic(MachineState *machine, PPCE500Params *params, static void ppce500_power_off(void *opaque, int line, int on) { if (on) { - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); } } diff --git a/hw/ppc/mpc8544_guts.c b/hw/ppc/mpc8544_guts.c index ba69178d69..ce1254b5d4 100644 --- a/hw/ppc/mpc8544_guts.c +++ b/hw/ppc/mpc8544_guts.c @@ -98,7 +98,7 @@ static void mpc8544_guts_write(void *opaque, hwaddr addr, switch (addr) { case MPC8544_GUTS_ADDR_RSTCR: if (value & MPC8544_GUTS_RSTCR_RESET) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } break; default: diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c index 5f93083d4a..224184d66d 100644 --- a/hw/ppc/ppc.c +++ b/hw/ppc/ppc.c @@ -412,7 +412,7 @@ static void ppce500_set_irq(void *opaque, int pin, int level) if (level) { LOG_IRQ("%s: reset the PowerPC system\n", __func__); - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } break; case PPCE500_INPUT_RESET_CORE: diff --git a/hw/ppc/ppc405_uc.c b/hw/ppc/ppc405_uc.c index d5df94aa6e..fc32e96bf4 100644 --- a/hw/ppc/ppc405_uc.c +++ b/hw/ppc/ppc405_uc.c @@ -1807,7 +1807,7 @@ void ppc40x_chip_reset(PowerPCCPU *cpu) void ppc40x_system_reset(PowerPCCPU *cpu) { printf("Reset PowerPC system\n"); - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } void store_40x_dbcr0 (CPUPPCState *env, uint32_t val) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 0980d733cd..ab3aab1279 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -101,21 +101,26 @@ static ICSState *spapr_ics_create(sPAPRMachineState *spapr, const char *type_ics, int nr_irqs, Error **errp) { - Error *err = NULL, *local_err = NULL; + Error *local_err = NULL; Object *obj; obj = object_new(type_ics); - object_property_add_child(OBJECT(spapr), "ics", obj, NULL); + object_property_add_child(OBJECT(spapr), "ics", obj, &error_abort); object_property_add_const_link(obj, "xics", OBJECT(spapr), &error_abort); - object_property_set_int(obj, nr_irqs, "nr-irqs", &err); + object_property_set_int(obj, nr_irqs, "nr-irqs", &local_err); + if (local_err) { + goto error; + } object_property_set_bool(obj, true, "realized", &local_err); - error_propagate(&err, local_err); - if (err) { - error_propagate(errp, err); - return NULL; + if (local_err) { + goto error; } return ICS_SIMPLE(obj); + +error: + error_propagate(errp, local_err); + return NULL; } static void xics_system_init(MachineState *machine, int nr_irqs, Error **errp) @@ -123,25 +128,24 @@ static void xics_system_init(MachineState *machine, int nr_irqs, Error **errp) sPAPRMachineState *spapr = SPAPR_MACHINE(machine); if (kvm_enabled()) { - Error *err = NULL; - if (machine_kernel_irqchip_allowed(machine) && !xics_kvm_init(spapr, errp)) { spapr->icp_type = TYPE_KVM_ICP; - spapr->ics = spapr_ics_create(spapr, TYPE_ICS_KVM, nr_irqs, &err); + spapr->ics = spapr_ics_create(spapr, TYPE_ICS_KVM, nr_irqs, errp); } if (machine_kernel_irqchip_required(machine) && !spapr->ics) { - error_reportf_err(err, - "kernel_irqchip requested but unavailable: "); - } else { - error_free(err); + error_prepend(errp, "kernel_irqchip requested but unavailable: "); + return; } } if (!spapr->ics) { - xics_spapr_init(spapr, errp); + xics_spapr_init(spapr); spapr->icp_type = TYPE_ICP; spapr->ics = spapr_ics_create(spapr, TYPE_ICS_SIMPLE, nr_irqs, errp); + if (!spapr->ics) { + return; + } } } @@ -1222,16 +1226,21 @@ static int spapr_hpt_shift_for_ramsize(uint64_t ramsize) return shift; } +void spapr_free_hpt(sPAPRMachineState *spapr) +{ + g_free(spapr->htab); + spapr->htab = NULL; + spapr->htab_shift = 0; + close_htab_fd(spapr); +} + static void spapr_reallocate_hpt(sPAPRMachineState *spapr, int shift, Error **errp) { long rc; /* Clean up any HPT info from a previous boot */ - g_free(spapr->htab); - spapr->htab = NULL; - spapr->htab_shift = 0; - close_htab_fd(spapr); + spapr_free_hpt(spapr); rc = kvmppc_reset_htab(shift); if (rc < 0) { @@ -2050,6 +2059,7 @@ static void ppc_spapr_init(MachineState *machine) msi_nonbroken = true; QLIST_INIT(&spapr->phbs); + QTAILQ_INIT(&spapr->pending_dimm_unplugs); /* Allocate RMA if necessary */ rma_alloc_size = kvmppc_alloc_rma(&rma); @@ -2569,20 +2579,6 @@ static void spapr_memory_plug(HotplugHandler *hotplug_dev, DeviceState *dev, uint64_t align = memory_region_get_alignment(mr); uint64_t size = memory_region_size(mr); uint64_t addr; - char *mem_dev; - - if (size % SPAPR_MEMORY_BLOCK_SIZE) { - error_setg(&local_err, "Hotplugged memory size must be a multiple of " - "%lld MB", SPAPR_MEMORY_BLOCK_SIZE/M_BYTE); - goto out; - } - - mem_dev = object_property_get_str(OBJECT(dimm), PC_DIMM_MEMDEV_PROP, NULL); - if (mem_dev && !kvmppc_is_mem_backend_page_size_ok(mem_dev)) { - error_setg(&local_err, "Memory backend has bad page size. " - "Use 'memory-backend-file' with correct mem-path."); - goto out; - } pc_dimm_memory_plug(dev, &ms->hotplug_memory, mr, align, &local_err); if (local_err) { @@ -2603,56 +2599,121 @@ out: error_propagate(errp, local_err); } -typedef struct sPAPRDIMMState { +static void spapr_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp) +{ + PCDIMMDevice *dimm = PC_DIMM(dev); + PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); + MemoryRegion *mr = ddc->get_memory_region(dimm); + uint64_t size = memory_region_size(mr); + char *mem_dev; + + if (size % SPAPR_MEMORY_BLOCK_SIZE) { + error_setg(errp, "Hotplugged memory size must be a multiple of " + "%lld MB", SPAPR_MEMORY_BLOCK_SIZE / M_BYTE); + return; + } + + mem_dev = object_property_get_str(OBJECT(dimm), PC_DIMM_MEMDEV_PROP, NULL); + if (mem_dev && !kvmppc_is_mem_backend_page_size_ok(mem_dev)) { + error_setg(errp, "Memory backend has bad page size. " + "Use 'memory-backend-file' with correct mem-path."); + return; + } +} + +struct sPAPRDIMMState { + PCDIMMDevice *dimm; uint32_t nr_lmbs; -} sPAPRDIMMState; + QTAILQ_ENTRY(sPAPRDIMMState) next; +}; -static void spapr_lmb_release(DeviceState *dev, void *opaque) +static sPAPRDIMMState *spapr_pending_dimm_unplugs_find(sPAPRMachineState *s, + PCDIMMDevice *dimm) { - sPAPRDIMMState *ds = (sPAPRDIMMState *)opaque; - HotplugHandler *hotplug_ctrl; + sPAPRDIMMState *dimm_state = NULL; - if (--ds->nr_lmbs) { - return; + QTAILQ_FOREACH(dimm_state, &s->pending_dimm_unplugs, next) { + if (dimm_state->dimm == dimm) { + break; + } } + return dimm_state; +} - g_free(ds); +static void spapr_pending_dimm_unplugs_add(sPAPRMachineState *spapr, + sPAPRDIMMState *dimm_state) +{ + g_assert(!spapr_pending_dimm_unplugs_find(spapr, dimm_state->dimm)); + QTAILQ_INSERT_HEAD(&spapr->pending_dimm_unplugs, dimm_state, next); +} - /* - * Now that all the LMBs have been removed by the guest, call the - * pc-dimm unplug handler to cleanup up the pc-dimm device. - */ - hotplug_ctrl = qdev_get_hotplug_handler(dev); - hotplug_handler_unplug(hotplug_ctrl, dev, &error_abort); +static void spapr_pending_dimm_unplugs_remove(sPAPRMachineState *spapr, + sPAPRDIMMState *dimm_state) +{ + QTAILQ_REMOVE(&spapr->pending_dimm_unplugs, dimm_state, next); + g_free(dimm_state); } -static void spapr_del_lmbs(DeviceState *dev, uint64_t addr_start, uint64_t size, - Error **errp) +static sPAPRDIMMState *spapr_recover_pending_dimm_state(sPAPRMachineState *ms, + PCDIMMDevice *dimm) { sPAPRDRConnector *drc; - sPAPRDRConnectorClass *drck; + PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); + MemoryRegion *mr = ddc->get_memory_region(dimm); + uint64_t size = memory_region_size(mr); uint32_t nr_lmbs = size / SPAPR_MEMORY_BLOCK_SIZE; + uint32_t avail_lmbs = 0; + uint64_t addr_start, addr; int i; - sPAPRDIMMState *ds = g_malloc0(sizeof(sPAPRDIMMState)); - uint64_t addr = addr_start; + sPAPRDIMMState *ds; - ds->nr_lmbs = nr_lmbs; + addr_start = object_property_get_int(OBJECT(dimm), PC_DIMM_ADDR_PROP, + &error_abort); + + addr = addr_start; for (i = 0; i < nr_lmbs; i++) { drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB, - addr / SPAPR_MEMORY_BLOCK_SIZE); + addr / SPAPR_MEMORY_BLOCK_SIZE); g_assert(drc); - - drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); - drck->detach(drc, dev, spapr_lmb_release, ds, errp); + if (drc->indicator_state != SPAPR_DR_INDICATOR_STATE_INACTIVE) { + avail_lmbs++; + } addr += SPAPR_MEMORY_BLOCK_SIZE; } - drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB, - addr_start / SPAPR_MEMORY_BLOCK_SIZE); - drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); - spapr_hotplug_req_remove_by_count_indexed(SPAPR_DR_CONNECTOR_TYPE_LMB, - nr_lmbs, - drck->get_index(drc)); + ds = g_malloc0(sizeof(sPAPRDIMMState)); + ds->nr_lmbs = avail_lmbs; + ds->dimm = dimm; + spapr_pending_dimm_unplugs_add(ms, ds); + return ds; +} + +/* Callback to be called during DRC release. */ +void spapr_lmb_release(DeviceState *dev) +{ + HotplugHandler *hotplug_ctrl = qdev_get_hotplug_handler(dev); + sPAPRMachineState *spapr = SPAPR_MACHINE(hotplug_ctrl); + sPAPRDIMMState *ds = spapr_pending_dimm_unplugs_find(spapr, PC_DIMM(dev)); + + /* This information will get lost if a migration occurs + * during the unplug process. In this case recover it. */ + if (ds == NULL) { + ds = spapr_recover_pending_dimm_state(spapr, PC_DIMM(dev)); + if (ds->nr_lmbs) { + return; + } + } else if (--ds->nr_lmbs) { + return; + } + + spapr_pending_dimm_unplugs_remove(spapr, ds); + + /* + * Now that all the LMBs have been removed by the guest, call the + * pc-dimm unplug handler to cleanup up the pc-dimm device. + */ + hotplug_handler_unplug(hotplug_ctrl, dev, &error_abort); } static void spapr_memory_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, @@ -2670,19 +2731,47 @@ static void spapr_memory_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, static void spapr_memory_unplug_request(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { + sPAPRMachineState *spapr = SPAPR_MACHINE(hotplug_dev); Error *local_err = NULL; PCDIMMDevice *dimm = PC_DIMM(dev); PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); MemoryRegion *mr = ddc->get_memory_region(dimm); uint64_t size = memory_region_size(mr); - uint64_t addr; + uint32_t nr_lmbs = size / SPAPR_MEMORY_BLOCK_SIZE; + uint64_t addr_start, addr; + int i; + sPAPRDRConnector *drc; + sPAPRDRConnectorClass *drck; + sPAPRDIMMState *ds; - addr = object_property_get_int(OBJECT(dimm), PC_DIMM_ADDR_PROP, &local_err); + addr_start = object_property_get_int(OBJECT(dimm), PC_DIMM_ADDR_PROP, + &local_err); if (local_err) { goto out; } - spapr_del_lmbs(dev, addr, size, &error_abort); + ds = g_malloc0(sizeof(sPAPRDIMMState)); + ds->nr_lmbs = nr_lmbs; + ds->dimm = dimm; + spapr_pending_dimm_unplugs_add(spapr, ds); + + addr = addr_start; + for (i = 0; i < nr_lmbs; i++) { + drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB, + addr / SPAPR_MEMORY_BLOCK_SIZE); + g_assert(drc); + + drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + drck->detach(drc, dev, errp); + addr += SPAPR_MEMORY_BLOCK_SIZE; + } + + drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB, + addr_start / SPAPR_MEMORY_BLOCK_SIZE); + drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + spapr_hotplug_req_remove_by_count_indexed(SPAPR_DR_CONNECTOR_TYPE_LMB, + nr_lmbs, + drck->get_index(drc)); out: error_propagate(errp, local_err); } @@ -2715,11 +2804,13 @@ static void spapr_core_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, CPUCore *cc = CPU_CORE(dev); CPUArchId *core_slot = spapr_find_cpu_slot(ms, cc->core_id, NULL); + assert(core_slot); core_slot->cpu = NULL; object_unparent(OBJECT(dev)); } -static void spapr_core_release(DeviceState *dev, void *opaque) +/* Callback to be called during DRC release. */ +void spapr_core_release(DeviceState *dev) { HotplugHandler *hotplug_ctrl; @@ -2752,7 +2843,7 @@ void spapr_core_unplug_request(HotplugHandler *hotplug_dev, DeviceState *dev, g_assert(drc); drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); - drck->detach(drc, dev, spapr_core_release, NULL, &local_err); + drck->detach(drc, dev, &local_err); if (local_err) { error_propagate(errp, local_err); return; @@ -2853,7 +2944,13 @@ static void spapr_core_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, goto out; } - if (cc->nr_threads != smp_threads) { + /* + * In general we should have homogeneous threads-per-core, but old + * (pre hotplug support) machine types allow the last core to have + * reduced threads as a compatibility hack for when we allowed + * total vcpus not a multiple of threads-per-core. + */ + if (mc->has_hotpluggable_cpus && (cc->nr_threads != smp_threads)) { error_setg(errp, "invalid nr-threads %d, must be %d", cc->nr_threads, smp_threads); return; @@ -2990,7 +3087,9 @@ static void spapr_machine_device_unplug_request(HotplugHandler *hotplug_dev, static void spapr_machine_device_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { - if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) { + if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { + spapr_memory_pre_plug(hotplug_dev, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) { spapr_core_pre_plug(hotplug_dev, dev, errp); } } diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c index a17ea07ef1..ff7058ecc0 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -143,29 +143,30 @@ static void spapr_cpu_core_realize_child(Object *child, Error **errp) Object *obj; obj = object_new(spapr->icp_type); - object_property_add_child(OBJECT(cpu), "icp", obj, NULL); + object_property_add_child(OBJECT(cpu), "icp", obj, &error_abort); + object_unref(obj); object_property_add_const_link(obj, "xics", OBJECT(spapr), &error_abort); object_property_set_bool(obj, true, "realized", &local_err); if (local_err) { - error_propagate(errp, local_err); - return; + goto error; } object_property_set_bool(child, true, "realized", &local_err); if (local_err) { - object_unparent(obj); - error_propagate(errp, local_err); - return; + goto error; } spapr_cpu_init(spapr, cpu, &local_err); if (local_err) { - object_unparent(obj); - error_propagate(errp, local_err); - return; + goto error; } xics_cpu_setup(XICS_FABRIC(spapr), cpu, ICP(obj)); + return; + +error: + object_unparent(obj); + error_propagate(errp, local_err); } static void spapr_cpu_core_realize(DeviceState *dev, Error **errp) diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c index 9fa5545991..cc2400bcd5 100644 --- a/hw/ppc/spapr_drc.c +++ b/hw/ppc/spapr_drc.c @@ -20,6 +20,7 @@ #include "qapi/visitor.h" #include "qemu/error-report.h" #include "hw/ppc/spapr.h" /* for RTAS return codes */ +#include "hw/pci-host/spapr.h" /* spapr_phb_remove_pci_device_cb callback */ #include "trace.h" #define DRC_CONTAINER_PATH "/dr-connector" @@ -99,8 +100,7 @@ static uint32_t set_isolation_state(sPAPRDRConnector *drc, if (drc->awaiting_release) { if (drc->configured) { trace_spapr_drc_set_isolation_state_finalizing(get_index(drc)); - drck->detach(drc, DEVICE(drc->dev), drc->detach_cb, - drc->detach_cb_opaque, NULL); + drck->detach(drc, DEVICE(drc->dev), NULL); } else { trace_spapr_drc_set_isolation_state_deferring(get_index(drc)); } @@ -153,8 +153,7 @@ static uint32_t set_allocation_state(sPAPRDRConnector *drc, if (drc->awaiting_release && drc->allocation_state == SPAPR_DR_ALLOCATION_STATE_UNUSABLE) { trace_spapr_drc_set_allocation_state_finalizing(get_index(drc)); - drck->detach(drc, DEVICE(drc->dev), drc->detach_cb, - drc->detach_cb_opaque, NULL); + drck->detach(drc, DEVICE(drc->dev), NULL); } else if (drc->allocation_state == SPAPR_DR_ALLOCATION_STATE_USABLE) { drc->awaiting_allocation = false; } @@ -404,15 +403,10 @@ static void attach(sPAPRDRConnector *drc, DeviceState *d, void *fdt, NULL, 0, NULL); } -static void detach(sPAPRDRConnector *drc, DeviceState *d, - spapr_drc_detach_cb *detach_cb, - void *detach_cb_opaque, Error **errp) +static void detach(sPAPRDRConnector *drc, DeviceState *d, Error **errp) { trace_spapr_drc_detach(get_index(drc)); - drc->detach_cb = detach_cb; - drc->detach_cb_opaque = detach_cb_opaque; - /* if we've signalled device presence to the guest, or if the guest * has gone ahead and configured the device (via manually-executed * device add via drmgr in guest, namely), we need to wait @@ -456,8 +450,21 @@ static void detach(sPAPRDRConnector *drc, DeviceState *d, drc->indicator_state = SPAPR_DR_INDICATOR_STATE_INACTIVE; - if (drc->detach_cb) { - drc->detach_cb(drc->dev, drc->detach_cb_opaque); + /* Calling release callbacks based on drc->type. */ + switch (drc->type) { + case SPAPR_DR_CONNECTOR_TYPE_CPU: + spapr_core_release(drc->dev); + break; + case SPAPR_DR_CONNECTOR_TYPE_PCI: + spapr_phb_remove_pci_device_cb(drc->dev); + break; + case SPAPR_DR_CONNECTOR_TYPE_LMB: + spapr_lmb_release(drc->dev); + break; + case SPAPR_DR_CONNECTOR_TYPE_PHB: + case SPAPR_DR_CONNECTOR_TYPE_VIO: + default: + g_assert(false); } drc->awaiting_release = false; @@ -467,8 +474,6 @@ static void detach(sPAPRDRConnector *drc, DeviceState *d, drc->fdt_start_offset = 0; object_property_del(OBJECT(drc), "device", NULL); drc->dev = NULL; - drc->detach_cb = NULL; - drc->detach_cb_opaque = NULL; } static bool release_pending(sPAPRDRConnector *drc) @@ -498,8 +503,7 @@ static void reset(DeviceState *d) * force removal if we are */ if (drc->awaiting_release) { - drck->detach(drc, DEVICE(drc->dev), drc->detach_cb, - drc->detach_cb_opaque, NULL); + drck->detach(drc, DEVICE(drc->dev), NULL); } /* non-PCI devices may be awaiting a transition to UNUSABLE */ @@ -515,6 +519,60 @@ static void reset(DeviceState *d) } } +static bool spapr_drc_needed(void *opaque) +{ + sPAPRDRConnector *drc = (sPAPRDRConnector *)opaque; + sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + bool rc = false; + sPAPRDREntitySense value; + drck->entity_sense(drc, &value); + + /* If no dev is plugged in there is no need to migrate the DRC state */ + if (value != SPAPR_DR_ENTITY_SENSE_PRESENT) { + return false; + } + + /* + * If there is dev plugged in, we need to migrate the DRC state when + * it is different from cold-plugged state + */ + switch (drc->type) { + case SPAPR_DR_CONNECTOR_TYPE_PCI: + rc = !((drc->isolation_state == SPAPR_DR_ISOLATION_STATE_UNISOLATED) && + (drc->allocation_state == SPAPR_DR_ALLOCATION_STATE_USABLE) && + drc->configured && drc->signalled && !drc->awaiting_release); + break; + case SPAPR_DR_CONNECTOR_TYPE_CPU: + case SPAPR_DR_CONNECTOR_TYPE_LMB: + rc = !((drc->isolation_state == SPAPR_DR_ISOLATION_STATE_ISOLATED) && + (drc->allocation_state == SPAPR_DR_ALLOCATION_STATE_UNUSABLE) && + drc->configured && drc->signalled && !drc->awaiting_release); + break; + case SPAPR_DR_CONNECTOR_TYPE_PHB: + case SPAPR_DR_CONNECTOR_TYPE_VIO: + default: + g_assert(false); + } + return rc; +} + +static const VMStateDescription vmstate_spapr_drc = { + .name = "spapr_drc", + .version_id = 1, + .minimum_version_id = 1, + .needed = spapr_drc_needed, + .fields = (VMStateField []) { + VMSTATE_UINT32(isolation_state, sPAPRDRConnector), + VMSTATE_UINT32(allocation_state, sPAPRDRConnector), + VMSTATE_UINT32(indicator_state, sPAPRDRConnector), + VMSTATE_BOOL(configured, sPAPRDRConnector), + VMSTATE_BOOL(awaiting_release, sPAPRDRConnector), + VMSTATE_BOOL(awaiting_allocation, sPAPRDRConnector), + VMSTATE_BOOL(signalled, sPAPRDRConnector), + VMSTATE_END_OF_LIST() + } +}; + static void realize(DeviceState *d, Error **errp) { sPAPRDRConnector *drc = SPAPR_DR_CONNECTOR(d); @@ -543,6 +601,8 @@ static void realize(DeviceState *d, Error **errp) object_unref(OBJECT(drc)); } g_free(child_name); + vmstate_register(DEVICE(drc), drck->get_index(drc), &vmstate_spapr_drc, + drc); trace_spapr_drc_realize_complete(drck->get_index(drc)); } diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c index f0b28d8112..73e2a1884f 100644 --- a/hw/ppc/spapr_events.c +++ b/hw/ppc/spapr_events.c @@ -342,20 +342,18 @@ static int rtas_event_log_to_irq(sPAPRMachineState *spapr, int log_type) return source->irq; } -static void rtas_event_log_queue(int log_type, void *data, bool exception) +static void rtas_event_log_queue(int log_type, void *data) { sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); sPAPREventLogEntry *entry = g_new(sPAPREventLogEntry, 1); g_assert(data); entry->log_type = log_type; - entry->exception = exception; entry->data = data; QTAILQ_INSERT_TAIL(&spapr->pending_events, entry, next); } -static sPAPREventLogEntry *rtas_event_log_dequeue(uint32_t event_mask, - bool exception) +static sPAPREventLogEntry *rtas_event_log_dequeue(uint32_t event_mask) { sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); sPAPREventLogEntry *entry = NULL; @@ -364,10 +362,6 @@ static sPAPREventLogEntry *rtas_event_log_dequeue(uint32_t event_mask, const sPAPREventSource *source = rtas_event_log_to_source(spapr, entry->log_type); - if (entry->exception != exception) { - continue; - } - if (source->mask & event_mask) { break; } @@ -380,7 +374,7 @@ static sPAPREventLogEntry *rtas_event_log_dequeue(uint32_t event_mask, return entry; } -static bool rtas_event_log_contains(uint32_t event_mask, bool exception) +static bool rtas_event_log_contains(uint32_t event_mask) { sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); sPAPREventLogEntry *entry = NULL; @@ -389,10 +383,6 @@ static bool rtas_event_log_contains(uint32_t event_mask, bool exception) const sPAPREventSource *source = rtas_event_log_to_source(spapr, entry->log_type); - if (entry->exception != exception) { - continue; - } - if (source->mask & event_mask) { return true; } @@ -479,7 +469,7 @@ static void spapr_powerdown_req(Notifier *n, void *opaque) epow->event_modifier = RTAS_LOG_V6_EPOW_MODIFIER_NORMAL; epow->extended_modifier = RTAS_LOG_V6_EPOW_XMODIFIER_PARTITION_SPECIFIC; - rtas_event_log_queue(RTAS_LOG_TYPE_EPOW, new_epow, true); + rtas_event_log_queue(RTAS_LOG_TYPE_EPOW, new_epow); qemu_irq_pulse(xics_get_qirq(XICS_FABRIC(spapr), rtas_event_log_to_irq(spapr, @@ -572,7 +562,7 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action, cpu_to_be32(drc_id->count_indexed.index); } - rtas_event_log_queue(RTAS_LOG_TYPE_HOTPLUG, new_hp, true); + rtas_event_log_queue(RTAS_LOG_TYPE_HOTPLUG, new_hp); qemu_irq_pulse(xics_get_qirq(XICS_FABRIC(spapr), rtas_event_log_to_irq(spapr, @@ -667,7 +657,7 @@ static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr, xinfo |= (uint64_t)rtas_ld(args, 6) << 32; } - event = rtas_event_log_dequeue(mask, true); + event = rtas_event_log_dequeue(mask); if (!event) { goto out_no_events; } @@ -690,7 +680,7 @@ static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr, * interrupts. */ for (i = 0; i < EVENT_CLASS_MAX; i++) { - if (rtas_event_log_contains(EVENT_CLASS_MASK(i), true)) { + if (rtas_event_log_contains(EVENT_CLASS_MASK(i))) { const sPAPREventSource *source = spapr_event_sources_get_source(spapr->event_sources, i); @@ -710,38 +700,10 @@ static void event_scan(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong args, uint32_t nret, target_ulong rets) { - uint32_t mask, buf, len, event_len; - sPAPREventLogEntry *event; - struct rtas_error_log *hdr; - if (nargs != 4 || nret != 1) { rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); return; } - - mask = rtas_ld(args, 0); - buf = rtas_ld(args, 2); - len = rtas_ld(args, 3); - - event = rtas_event_log_dequeue(mask, false); - if (!event) { - goto out_no_events; - } - - hdr = event->data; - event_len = be32_to_cpu(hdr->extended_length) + sizeof(*hdr); - - if (event_len < len) { - len = event_len; - } - - cpu_physical_memory_write(buf, event->data, len); - rtas_st(rets, 0, RTAS_OUT_SUCCESS); - g_free(event->data); - g_free(event); - return; - -out_no_events: rtas_st(rets, 0, RTAS_OUT_NO_ERRORS_FOUND); } diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c index 0d608d6e28..aae5a62a61 100644 --- a/hw/ppc/spapr_hcall.c +++ b/hw/ppc/spapr_hcall.c @@ -913,10 +913,7 @@ static void spapr_check_setup_free_hpt(sPAPRMachineState *spapr, /* We assume RADIX, so this catches all the "Do Nothing" cases */ } else if (!(patbe_old & PATBE1_GR)) { /* HASH->RADIX : Free HPT */ - g_free(spapr->htab); - spapr->htab = NULL; - spapr->htab_shift = 0; - close_htab_fd(spapr); + spapr_free_hpt(spapr); } else if (!(patbe_new & PATBE1_GR)) { /* RADIX->HASH || NOTHING->HASH : Allocate HPT */ spapr_setup_hpt_and_vrma(spapr); @@ -1047,19 +1044,13 @@ static target_ulong h_signal_sys_reset(PowerPCCPU *cpu, } } -static target_ulong h_client_architecture_support(PowerPCCPU *cpu, - sPAPRMachineState *spapr, - target_ulong opcode, - target_ulong *args) +static uint32_t cas_check_pvr(PowerPCCPU *cpu, target_ulong *addr, + Error **errp) { - target_ulong list = ppc64_phys_to_real(args[0]); - target_ulong ov_table; bool explicit_match = false; /* Matched the CPU's real PVR */ uint32_t max_compat = cpu->max_compat; uint32_t best_compat = 0; int i; - sPAPROptionVector *ov1_guest, *ov5_guest, *ov5_cas_old, *ov5_updates; - bool guest_radix; /* * We scan the supplied table of PVRs looking for two things @@ -1069,9 +1060,9 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu, for (i = 0; i < 512; ++i) { uint32_t pvr, pvr_mask; - pvr_mask = ldl_be_phys(&address_space_memory, list); - pvr = ldl_be_phys(&address_space_memory, list + 4); - list += 8; + pvr_mask = ldl_be_phys(&address_space_memory, *addr); + pvr = ldl_be_phys(&address_space_memory, *addr + 4); + *addr += 8; if (~pvr_mask & pvr) { break; /* Terminator record */ @@ -1090,17 +1081,38 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu, /* We couldn't find a suitable compatibility mode, and either * the guest doesn't support "raw" mode for this CPU, or raw * mode is disabled because a maximum compat mode is set */ - return H_HARDWARE; + error_setg(errp, "Couldn't negotiate a suitable PVR during CAS"); + return 0; } /* Parsing finished */ trace_spapr_cas_pvr(cpu->compat_pvr, explicit_match, best_compat); - /* Update CPUs */ - if (cpu->compat_pvr != best_compat) { - Error *local_err = NULL; + return best_compat; +} - ppc_set_compat_all(best_compat, &local_err); +static target_ulong h_client_architecture_support(PowerPCCPU *cpu, + sPAPRMachineState *spapr, + target_ulong opcode, + target_ulong *args) +{ + /* Working address in data buffer */ + target_ulong addr = ppc64_phys_to_real(args[0]); + target_ulong ov_table; + uint32_t cas_pvr; + sPAPROptionVector *ov1_guest, *ov5_guest, *ov5_cas_old, *ov5_updates; + bool guest_radix; + Error *local_err = NULL; + + cas_pvr = cas_check_pvr(cpu, &addr, &local_err); + if (local_err) { + error_report_err(local_err); + return H_HARDWARE; + } + + /* Update CPUs */ + if (cpu->compat_pvr != cas_pvr) { + ppc_set_compat_all(cas_pvr, &local_err); if (local_err) { error_report_err(local_err); return H_HARDWARE; @@ -1108,7 +1120,7 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu, } /* For the future use: here @ov_table points to the first option vector */ - ov_table = list; + ov_table = addr; ov1_guest = spapr_ovec_parse_vector(ov_table, 1); ov5_guest = spapr_ovec_parse_vector(ov_table, 5); @@ -1162,7 +1174,7 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu, spapr_ovec_cleanup(ov5_updates); if (spapr->cas_reboot) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } else { /* If ppc_spapr_reset() did not set up a HPT but one is necessary * (because the guest isn't going to use radix) then set it up here. */ diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c index 29c80bb3c8..0341bc069d 100644 --- a/hw/ppc/spapr_iommu.c +++ b/hw/ppc/spapr_iommu.c @@ -111,7 +111,7 @@ static void spapr_tce_free_table(uint64_t *table, int fd, uint32_t nb_table) /* Called from RCU critical section */ static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr, - bool is_write) + IOMMUAccessFlags flag) { sPAPRTCETable *tcet = container_of(iommu, sPAPRTCETable, iommu); uint64_t tce; diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index a7cff32bbf..e4daf8d5f1 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -1369,7 +1369,8 @@ out: } } -static void spapr_phb_remove_pci_device_cb(DeviceState *dev, void *opaque) +/* Callback to be called during DRC release. */ +void spapr_phb_remove_pci_device_cb(DeviceState *dev) { /* some version guests do not wait for completion of a device * cleanup (generally done asynchronously by the kernel) before @@ -1392,7 +1393,7 @@ static void spapr_phb_remove_pci_device(sPAPRDRConnector *drc, { sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); - drck->detach(drc, DEVICE(pdev), spapr_phb_remove_pci_device_cb, phb, errp); + drck->detach(drc, DEVICE(pdev), errp); } static sPAPRDRConnector *spapr_phb_get_pci_func_drc(sPAPRPHBState *phb, diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c index 619f32c054..128d993d04 100644 --- a/hw/ppc/spapr_rtas.c +++ b/hw/ppc/spapr_rtas.c @@ -110,7 +110,7 @@ static void rtas_power_off(PowerPCCPU *cpu, sPAPRMachineState *spapr, rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); return; } - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); cpu_stop_current(); rtas_st(rets, 0, RTAS_OUT_SUCCESS); } @@ -124,7 +124,7 @@ static void rtas_system_reboot(PowerPCCPU *cpu, sPAPRMachineState *spapr, rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); return; } - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); rtas_st(rets, 0, RTAS_OUT_SUCCESS); } diff --git a/hw/s390x/3270-ccw.c b/hw/s390x/3270-ccw.c index a7a5b412e4..6e6eee4e90 100644 --- a/hw/s390x/3270-ccw.c +++ b/hw/s390x/3270-ccw.c @@ -98,9 +98,13 @@ static void emulated_ccw_3270_realize(DeviceState *ds, Error **errp) EmulatedCcw3270Class *ck = EMULATED_CCW_3270_GET_CLASS(dev); CcwDevice *cdev = CCW_DEVICE(ds); CCWDeviceClass *cdk = CCW_DEVICE_GET_CLASS(cdev); - SubchDev *sch = css_create_virtual_sch(cdev->devno, errp); + DeviceState *parent = DEVICE(cdev); + BusState *qbus = qdev_get_parent_bus(parent); + VirtualCssBus *cbus = VIRTUAL_CSS_BUS(qbus); + SubchDev *sch; Error *err = NULL; + sch = css_create_sch(cdev->devno, true, cbus->squash_mcss, errp); if (!sch) { return; } diff --git a/hw/s390x/Makefile.objs b/hw/s390x/Makefile.objs index 36bd4b1645..a8e5575a8a 100644 --- a/hw/s390x/Makefile.objs +++ b/hw/s390x/Makefile.objs @@ -14,3 +14,4 @@ obj-y += ccw-device.o obj-y += s390-pci-bus.o s390-pci-inst.o obj-y += s390-skeys.o obj-$(CONFIG_KVM) += s390-skeys-kvm.o +obj-y += s390-ccw.o diff --git a/hw/s390x/css-bridge.c b/hw/s390x/css-bridge.c index b54ac01d37..823747fcd7 100644 --- a/hw/s390x/css-bridge.c +++ b/hw/s390x/css-bridge.c @@ -17,6 +17,7 @@ #include "hw/s390x/css.h" #include "ccw-device.h" #include "hw/s390x/css-bridge.h" +#include "cpu.h" /* * Invoke device-specific unplug handler, disable the subchannel @@ -103,6 +104,7 @@ VirtualCssBus *virtual_css_bus_init(void) /* Create bus on bridge device */ bus = qbus_create(TYPE_VIRTUAL_CSS_BUS, dev, "virtual-css"); cbus = VIRTUAL_CSS_BUS(bus); + cbus->squash_mcss = s390_get_squash_mcss(); /* Enable hotplugging */ qbus_set_hotplug_handler(bus, dev, &error_abort); diff --git a/hw/s390x/css.c b/hw/s390x/css.c index 15c4f4b249..1e2f26b65a 100644 --- a/hw/s390x/css.c +++ b/hw/s390x/css.c @@ -13,6 +13,7 @@ #include "qapi/error.h" #include "qapi/visitor.h" #include "hw/qdev.h" +#include "qemu/error-report.h" #include "qemu/bitops.h" #include "exec/address-spaces.h" #include "cpu.h" @@ -258,7 +259,7 @@ uint16_t css_build_subchannel_id(SubchDev *sch) return css_do_build_subchannel_id(sch->cssid, sch->ssid); } -static void css_inject_io_interrupt(SubchDev *sch) +void css_inject_io_interrupt(SubchDev *sch) { uint8_t isc = (sch->curr_status.pmcw.flags & PMCW_FLAGS_MASK_ISC) >> 11; @@ -523,7 +524,7 @@ static int css_interpret_ccw(SubchDev *sch, hwaddr ccw_addr, return ret; } -static void sch_handle_start_func(SubchDev *sch, ORB *orb) +static void sch_handle_start_func_virtual(SubchDev *sch, ORB *orb) { PMCW *p = &sch->curr_status.pmcw; @@ -625,13 +626,58 @@ static void sch_handle_start_func(SubchDev *sch, ORB *orb) } +static int sch_handle_start_func_passthrough(SubchDev *sch, ORB *orb) +{ + + PMCW *p = &sch->curr_status.pmcw; + SCSW *s = &sch->curr_status.scsw; + int ret; + + if (!(s->ctrl & SCSW_ACTL_SUSP)) { + assert(orb != NULL); + p->intparm = orb->intparm; + } + + /* + * Only support prefetch enable mode. + * Only support 64bit addressing idal. + */ + if (!(orb->ctrl0 & ORB_CTRL0_MASK_PFCH) || + !(orb->ctrl0 & ORB_CTRL0_MASK_C64)) { + return -EINVAL; + } + + ret = s390_ccw_cmd_request(orb, s, sch->driver_data); + switch (ret) { + /* Currently we don't update control block and just return the cc code. */ + case 0: + break; + case -EBUSY: + break; + case -ENODEV: + break; + case -EACCES: + /* Let's reflect an inaccessible host device by cc 3. */ + ret = -ENODEV; + break; + default: + /* + * All other return codes will trigger a program check, + * or set cc to 1. + */ + break; + }; + + return ret; +} + /* * On real machines, this would run asynchronously to the main vcpus. * We might want to make some parts of the ssch handling (interpreting * read/writes) asynchronous later on if we start supporting more than * our current very simple devices. */ -static void do_subchannel_work(SubchDev *sch, ORB *orb) +int do_subchannel_work_virtual(SubchDev *sch, ORB *orb) { SCSW *s = &sch->curr_status.scsw; @@ -642,12 +688,45 @@ static void do_subchannel_work(SubchDev *sch, ORB *orb) sch_handle_halt_func(sch); } else if (s->ctrl & SCSW_FCTL_START_FUNC) { /* Triggered by both ssch and rsch. */ - sch_handle_start_func(sch, orb); + sch_handle_start_func_virtual(sch, orb); } else { /* Cannot happen. */ - return; + return 0; } css_inject_io_interrupt(sch); + return 0; +} + +int do_subchannel_work_passthrough(SubchDev *sch, ORB *orb) +{ + int ret; + SCSW *s = &sch->curr_status.scsw; + + if (s->ctrl & SCSW_FCTL_CLEAR_FUNC) { + /* TODO: Clear handling */ + sch_handle_clear_func(sch); + ret = 0; + } else if (s->ctrl & SCSW_FCTL_HALT_FUNC) { + /* TODO: Halt handling */ + sch_handle_halt_func(sch); + ret = 0; + } else if (s->ctrl & SCSW_FCTL_START_FUNC) { + ret = sch_handle_start_func_passthrough(sch, orb); + } else { + /* Cannot happen. */ + return -ENODEV; + } + + return ret; +} + +static int do_subchannel_work(SubchDev *sch, ORB *orb) +{ + if (sch->do_subchannel_work) { + return sch->do_subchannel_work(sch, orb); + } else { + return -EINVAL; + } } static void copy_pmcw_to_guest(PMCW *dest, const PMCW *src) @@ -670,7 +749,7 @@ static void copy_pmcw_to_guest(PMCW *dest, const PMCW *src) dest->chars = cpu_to_be32(src->chars); } -static void copy_scsw_to_guest(SCSW *dest, const SCSW *src) +void copy_scsw_to_guest(SCSW *dest, const SCSW *src) { dest->flags = cpu_to_be16(src->flags); dest->ctrl = cpu_to_be16(src->ctrl); @@ -966,8 +1045,7 @@ int css_do_ssch(SubchDev *sch, ORB *orb) s->ctrl |= (SCSW_FCTL_START_FUNC | SCSW_ACTL_START_PEND); s->flags &= ~SCSW_FLAGS_MASK_PNO; - do_subchannel_work(sch, orb); - ret = 0; + ret = do_subchannel_work(sch, orb); out: return ret; @@ -1326,7 +1404,8 @@ unsigned int css_find_free_chpid(uint8_t cssid) return MAX_CHPID + 1; } -static int css_add_virtual_chpid(uint8_t cssid, uint8_t chpid, uint8_t type) +static int css_add_chpid(uint8_t cssid, uint8_t chpid, uint8_t type, + bool is_virt) { CssImage *css; @@ -1340,7 +1419,7 @@ static int css_add_virtual_chpid(uint8_t cssid, uint8_t chpid, uint8_t type) } css->chpids[chpid].in_use = 1; css->chpids[chpid].type = type; - css->chpids[chpid].is_virtual = 1; + css->chpids[chpid].is_virtual = is_virt; css_generate_chp_crws(cssid, chpid); @@ -1364,7 +1443,7 @@ void css_sch_build_virtual_schib(SubchDev *sch, uint8_t chpid, uint8_t type) p->pam = 0x80; p->chpid[0] = chpid; if (!css->chpids[chpid].in_use) { - css_add_virtual_chpid(sch->cssid, chpid, type); + css_add_chpid(sch->cssid, chpid, type, true); } memset(s, 0, sizeof(SCSW)); @@ -1946,28 +2025,59 @@ PropertyInfo css_devid_ro_propinfo = { .get = get_css_devid, }; -SubchDev *css_create_virtual_sch(CssDevId bus_id, Error **errp) +SubchDev *css_create_sch(CssDevId bus_id, bool is_virtual, bool squash_mcss, + Error **errp) { uint16_t schid = 0; SubchDev *sch; if (bus_id.valid) { - /* Enforce use of virtual cssid. */ - if (bus_id.cssid != VIRTUAL_CSSID) { - error_setg(errp, "cssid %hhx not valid for virtual devices", - bus_id.cssid); + if (is_virtual != (bus_id.cssid == VIRTUAL_CSSID)) { + error_setg(errp, "cssid %hhx not valid for %s devices", + bus_id.cssid, + (is_virtual ? "virtual" : "non-virtual")); return NULL; } + } + + if (bus_id.valid) { + if (squash_mcss) { + bus_id.cssid = channel_subsys.default_cssid; + } else if (!channel_subsys.css[bus_id.cssid]) { + css_create_css_image(bus_id.cssid, false); + } + if (!css_find_free_subch_for_devno(bus_id.cssid, bus_id.ssid, bus_id.devid, &schid, errp)) { return NULL; } - } else { - bus_id.cssid = VIRTUAL_CSSID; + } else if (squash_mcss || is_virtual) { + bus_id.cssid = channel_subsys.default_cssid; + if (!css_find_free_subch_and_devno(bus_id.cssid, &bus_id.ssid, &bus_id.devid, &schid, errp)) { return NULL; } + } else { + for (bus_id.cssid = 0; bus_id.cssid < MAX_CSSID; ++bus_id.cssid) { + if (bus_id.cssid == VIRTUAL_CSSID) { + continue; + } + + if (!channel_subsys.css[bus_id.cssid]) { + css_create_css_image(bus_id.cssid, false); + } + + if (css_find_free_subch_and_devno(bus_id.cssid, &bus_id.ssid, + &bus_id.devid, &schid, + NULL)) { + break; + } + if (bus_id.cssid == MAX_CSSID) { + error_setg(errp, "Virtual channel subsystem is full!"); + return NULL; + } + } } sch = g_malloc0(sizeof(*sch)); @@ -1978,3 +2088,147 @@ SubchDev *css_create_virtual_sch(CssDevId bus_id, Error **errp) css_subch_assign(sch->cssid, sch->ssid, schid, sch->devno, sch); return sch; } + +static int css_sch_get_chpids(SubchDev *sch, CssDevId *dev_id) +{ + char *fid_path; + FILE *fd; + uint32_t chpid[8]; + int i; + PMCW *p = &sch->curr_status.pmcw; + + fid_path = g_strdup_printf("/sys/bus/css/devices/%x.%x.%04x/chpids", + dev_id->cssid, dev_id->ssid, dev_id->devid); + fd = fopen(fid_path, "r"); + if (fd == NULL) { + error_report("%s: open %s failed", __func__, fid_path); + g_free(fid_path); + return -EINVAL; + } + + if (fscanf(fd, "%x %x %x %x %x %x %x %x", + &chpid[0], &chpid[1], &chpid[2], &chpid[3], + &chpid[4], &chpid[5], &chpid[6], &chpid[7]) != 8) { + fclose(fd); + g_free(fid_path); + return -EINVAL; + } + + for (i = 0; i < ARRAY_SIZE(p->chpid); i++) { + p->chpid[i] = chpid[i]; + } + + fclose(fd); + g_free(fid_path); + + return 0; +} + +static int css_sch_get_path_masks(SubchDev *sch, CssDevId *dev_id) +{ + char *fid_path; + FILE *fd; + uint32_t pim, pam, pom; + PMCW *p = &sch->curr_status.pmcw; + + fid_path = g_strdup_printf("/sys/bus/css/devices/%x.%x.%04x/pimpampom", + dev_id->cssid, dev_id->ssid, dev_id->devid); + fd = fopen(fid_path, "r"); + if (fd == NULL) { + error_report("%s: open %s failed", __func__, fid_path); + g_free(fid_path); + return -EINVAL; + } + + if (fscanf(fd, "%x %x %x", &pim, &pam, &pom) != 3) { + fclose(fd); + g_free(fid_path); + return -EINVAL; + } + + p->pim = pim; + p->pam = pam; + p->pom = pom; + fclose(fd); + g_free(fid_path); + + return 0; +} + +static int css_sch_get_chpid_type(uint8_t chpid, uint32_t *type, + CssDevId *dev_id) +{ + char *fid_path; + FILE *fd; + + fid_path = g_strdup_printf("/sys/devices/css%x/chp0.%02x/type", + dev_id->cssid, chpid); + fd = fopen(fid_path, "r"); + if (fd == NULL) { + error_report("%s: open %s failed", __func__, fid_path); + g_free(fid_path); + return -EINVAL; + } + + if (fscanf(fd, "%x", type) != 1) { + fclose(fd); + g_free(fid_path); + return -EINVAL; + } + + fclose(fd); + g_free(fid_path); + + return 0; +} + +/* + * We currently retrieve the real device information from sysfs to build the + * guest subchannel information block without considering the migration feature. + * We need to revisit this problem when we want to add migration support. + */ +int css_sch_build_schib(SubchDev *sch, CssDevId *dev_id) +{ + CssImage *css = channel_subsys.css[sch->cssid]; + PMCW *p = &sch->curr_status.pmcw; + SCSW *s = &sch->curr_status.scsw; + uint32_t type; + int i, ret; + + assert(css != NULL); + memset(p, 0, sizeof(PMCW)); + p->flags |= PMCW_FLAGS_MASK_DNV; + /* We are dealing with I/O subchannels only. */ + p->devno = sch->devno; + + /* Grab path mask from sysfs. */ + ret = css_sch_get_path_masks(sch, dev_id); + if (ret) { + return ret; + } + + /* Grab chpids from sysfs. */ + ret = css_sch_get_chpids(sch, dev_id); + if (ret) { + return ret; + } + + /* Build chpid type. */ + for (i = 0; i < ARRAY_SIZE(p->chpid); i++) { + if (p->chpid[i] && !css->chpids[p->chpid[i]].in_use) { + ret = css_sch_get_chpid_type(p->chpid[i], &type, dev_id); + if (ret) { + return ret; + } + css_add_chpid(sch->cssid, p->chpid[i], type, false); + } + } + + memset(s, 0, sizeof(SCSW)); + sch->curr_status.mba = 0; + for (i = 0; i < ARRAY_SIZE(sch->curr_status.mda); i++) { + sch->curr_status.mda[i] = 0; + } + + return 0; +} diff --git a/hw/s390x/ipl.c b/hw/s390x/ipl.c index 75d3c681a4..4e6469db0f 100644 --- a/hw/s390x/ipl.c +++ b/hw/s390x/ipl.c @@ -396,7 +396,7 @@ void s390_reipl_request(void) S390IPLState *ipl = get_ipl_device(); ipl->reipl_requested = true; - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } void s390_ipl_prepare_cpu(S390CPU *cpu) diff --git a/hw/s390x/s390-ccw.c b/hw/s390x/s390-ccw.c new file mode 100644 index 0000000000..8614dda6f8 --- /dev/null +++ b/hw/s390x/s390-ccw.c @@ -0,0 +1,153 @@ +/* + * s390 CCW Assignment Support + * + * Copyright 2017 IBM Corp + * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com> + * Xiao Feng Ren <renxiaof@linux.vnet.ibm.com> + * Pierre Morel <pmorel@linux.vnet.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 + * or (at your option) any later version. See the COPYING file in the + * top-level directory. + */ +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "hw/sysbus.h" +#include "libgen.h" +#include "hw/s390x/css.h" +#include "hw/s390x/css-bridge.h" +#include "hw/s390x/s390-ccw.h" + +int s390_ccw_cmd_request(ORB *orb, SCSW *scsw, void *data) +{ + S390CCWDeviceClass *cdc = S390_CCW_DEVICE_GET_CLASS(data); + + if (cdc->handle_request) { + return cdc->handle_request(orb, scsw, data); + } else { + return -ENOSYS; + } +} + +static void s390_ccw_get_dev_info(S390CCWDevice *cdev, + char *sysfsdev, + Error **errp) +{ + unsigned int cssid, ssid, devid; + char dev_path[PATH_MAX] = {0}, *tmp; + + if (!sysfsdev) { + error_setg(errp, "No host device provided"); + error_append_hint(errp, + "Use -device vfio-ccw,sysfsdev=PATH_TO_DEVICE\n"); + return; + } + + if (!realpath(sysfsdev, dev_path)) { + error_setg_errno(errp, errno, "Host device '%s' not found", sysfsdev); + return; + } + + cdev->mdevid = g_strdup(basename(dev_path)); + + tmp = basename(dirname(dev_path)); + if (sscanf(tmp, "%2x.%1x.%4x", &cssid, &ssid, &devid) != 3) { + error_setg_errno(errp, errno, "Failed to read %s", tmp); + return; + } + + cdev->hostid.cssid = cssid; + cdev->hostid.ssid = ssid; + cdev->hostid.devid = devid; + cdev->hostid.valid = true; +} + +static void s390_ccw_realize(S390CCWDevice *cdev, char *sysfsdev, Error **errp) +{ + CcwDevice *ccw_dev = CCW_DEVICE(cdev); + CCWDeviceClass *ck = CCW_DEVICE_GET_CLASS(ccw_dev); + DeviceState *parent = DEVICE(ccw_dev); + BusState *qbus = qdev_get_parent_bus(parent); + VirtualCssBus *cbus = VIRTUAL_CSS_BUS(qbus); + SubchDev *sch; + int ret; + Error *err = NULL; + + s390_ccw_get_dev_info(cdev, sysfsdev, &err); + if (err) { + goto out_err_propagate; + } + + sch = css_create_sch(ccw_dev->devno, false, cbus->squash_mcss, &err); + if (!sch) { + goto out_mdevid_free; + } + sch->driver_data = cdev; + sch->do_subchannel_work = do_subchannel_work_passthrough; + + ccw_dev->sch = sch; + ret = css_sch_build_schib(sch, &cdev->hostid); + if (ret) { + error_setg_errno(&err, -ret, "%s: Failed to build initial schib", + __func__); + goto out_err; + } + + ck->realize(ccw_dev, &err); + if (err) { + goto out_err; + } + + css_generate_sch_crws(sch->cssid, sch->ssid, sch->schid, + parent->hotplugged, 1); + return; + +out_err: + css_subch_assign(sch->cssid, sch->ssid, sch->schid, sch->devno, NULL); + ccw_dev->sch = NULL; + g_free(sch); +out_mdevid_free: + g_free(cdev->mdevid); +out_err_propagate: + error_propagate(errp, err); +} + +static void s390_ccw_unrealize(S390CCWDevice *cdev, Error **errp) +{ + CcwDevice *ccw_dev = CCW_DEVICE(cdev); + SubchDev *sch = ccw_dev->sch; + + if (sch) { + css_subch_assign(sch->cssid, sch->ssid, sch->schid, sch->devno, NULL); + g_free(sch); + ccw_dev->sch = NULL; + } + + g_free(cdev->mdevid); +} + +static void s390_ccw_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + S390CCWDeviceClass *cdc = S390_CCW_DEVICE_CLASS(klass); + + dc->bus_type = TYPE_VIRTUAL_CSS_BUS; + cdc->realize = s390_ccw_realize; + cdc->unrealize = s390_ccw_unrealize; +} + +static const TypeInfo s390_ccw_info = { + .name = TYPE_S390_CCW, + .parent = TYPE_CCW_DEVICE, + .instance_size = sizeof(S390CCWDevice), + .class_size = sizeof(S390CCWDeviceClass), + .class_init = s390_ccw_class_init, + .abstract = true, +}; + +static void register_s390_ccw_type(void) +{ + type_register_static(&s390_ccw_info); +} + +type_init(register_s390_ccw_type) diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c index 66a6fbeb8c..5651483781 100644 --- a/hw/s390x/s390-pci-bus.c +++ b/hw/s390x/s390-pci-bus.c @@ -357,7 +357,7 @@ out: } static IOMMUTLBEntry s390_translate_iommu(MemoryRegion *mr, hwaddr addr, - bool is_write) + IOMMUAccessFlags flag) { uint64_t pte; uint32_t flags; diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c index 314a9cbad4..8bc7c98682 100644 --- a/hw/s390x/s390-pci-inst.c +++ b/hw/s390x/s390-pci-inst.c @@ -624,7 +624,7 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2) mr = &iommu->iommu_mr; while (start < end) { - entry = mr->iommu_ops->translate(mr, start, 0); + entry = mr->iommu_ops->translate(mr, start, IOMMU_NONE); if (!entry.translated_addr) { pbdev->state = ZPCI_FS_ERROR; diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c index fdd4384ff0..c9021f2fa9 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c @@ -136,10 +136,15 @@ static void ccw_init(MachineState *machine) kvm_s390_enable_css_support(s390_cpu_addr2state(0)); } /* - * Create virtual css and set it as default so that non mcss-e - * enabled guests only see virtio devices. + * Non mcss-e enabled guests only see the devices from the default + * css, which is determined by the value of the squash_mcss property. + * Note: we must not squash non virtual devices to css 0xFE. */ - ret = css_create_css_image(VIRTUAL_CSSID, true); + if (css_bus->squash_mcss) { + ret = css_create_css_image(0, true); + } else { + ret = css_create_css_image(VIRTUAL_CSSID, true); + } assert(ret == 0); /* Create VirtIO network adapters */ @@ -303,6 +308,20 @@ static void machine_set_loadparm(Object *obj, const char *val, Error **errp) ms->loadparm[i] = ' '; /* pad right with spaces */ } } +static inline bool machine_get_squash_mcss(Object *obj, Error **errp) +{ + S390CcwMachineState *ms = S390_CCW_MACHINE(obj); + + return ms->s390_squash_mcss; +} + +static inline void machine_set_squash_mcss(Object *obj, bool value, + Error **errp) +{ + S390CcwMachineState *ms = S390_CCW_MACHINE(obj); + + ms->s390_squash_mcss = value; +} static inline void s390_machine_initfn(Object *obj) { @@ -328,6 +347,13 @@ static inline void s390_machine_initfn(Object *obj) " to upper case) to pass to machine loader, boot manager," " and guest kernel", NULL); + object_property_add_bool(obj, "s390-squash-mcss", + machine_get_squash_mcss, + machine_set_squash_mcss, NULL); + object_property_set_description(obj, "s390-squash-mcss", + "enable/disable squashing subchannels into the default css", + NULL); + object_property_set_bool(obj, false, "s390-squash-mcss", NULL); } static const TypeInfo ccw_machine_info = { diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c index e7167e3d05..e6a6f74be3 100644 --- a/hw/s390x/virtio-ccw.c +++ b/hw/s390x/virtio-ccw.c @@ -680,9 +680,13 @@ static void virtio_ccw_device_realize(VirtioCcwDevice *dev, Error **errp) VirtIOCCWDeviceClass *k = VIRTIO_CCW_DEVICE_GET_CLASS(dev); CcwDevice *ccw_dev = CCW_DEVICE(dev); CCWDeviceClass *ck = CCW_DEVICE_GET_CLASS(ccw_dev); - SubchDev *sch = css_create_virtual_sch(ccw_dev->devno, errp); + DeviceState *parent = DEVICE(ccw_dev); + BusState *qbus = qdev_get_parent_bus(parent); + VirtualCssBus *cbus = VIRTUAL_CSS_BUS(qbus); + SubchDev *sch; Error *err = NULL; + sch = css_create_sch(ccw_dev->devno, true, cbus->squash_mcss, errp); if (!sch) { return; } @@ -697,6 +701,7 @@ static void virtio_ccw_device_realize(VirtioCcwDevice *dev, Error **errp) sch->disable_cb = virtio_sch_disable_cb; sch->id.reserved = 0xff; sch->id.cu_type = VIRTIO_CCW_CU_TYPE; + sch->do_subchannel_work = do_subchannel_work_virtual; ccw_dev->sch = sch; dev->indicators = NULL; dev->revision = -1; diff --git a/hw/sh4/r2d.c b/hw/sh4/r2d.c index 8f520cec1c..e6fc74ed87 100644 --- a/hw/sh4/r2d.c +++ b/hw/sh4/r2d.c @@ -164,7 +164,7 @@ r2d_fpga_write(void *opaque, hwaddr addr, uint64_t value, unsigned int size) break; case PA_POWOFF: if (value & 1) { - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); } break; case PA_VERREG: diff --git a/hw/timer/etraxfs_timer.c b/hw/timer/etraxfs_timer.c index 8e18236c5a..d13bc30b2d 100644 --- a/hw/timer/etraxfs_timer.c +++ b/hw/timer/etraxfs_timer.c @@ -207,7 +207,7 @@ static void watchdog_hit(void *opaque) qemu_irq_raise(t->nmi); } else - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); t->wd_hits++; } diff --git a/hw/timer/m48t59.c b/hw/timer/m48t59.c index 474981a6ac..4a064fbfd2 100644 --- a/hw/timer/m48t59.c +++ b/hw/timer/m48t59.c @@ -1,7 +1,7 @@ /* * QEMU M48T59 and M48T08 NVRAM emulation for PPC PREP and Sparc platforms * - * Copyright (c) 2003-2005, 2007 Jocelyn Mayer + * Copyright (c) 2003-2005, 2007, 2017 Jocelyn Mayer * Copyright (c) 2013 Hervé Poussineau * * Permission is hereby granted, free of charge, to any person obtaining a copy @@ -159,7 +159,7 @@ static void watchdog_cb (void *opaque) NVRAM->buffer[0x1FF7] = 0x00; NVRAM->buffer[0x1FFC] &= ~0x40; /* May it be a hw CPU Reset instead ? */ - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } else { qemu_set_irq(NVRAM->IRQ, 1); qemu_set_irq(NVRAM->IRQ, 0); diff --git a/hw/timer/milkymist-sysctl.c b/hw/timer/milkymist-sysctl.c index 44885907c9..93bc6e1790 100644 --- a/hw/timer/milkymist-sysctl.c +++ b/hw/timer/milkymist-sysctl.c @@ -90,7 +90,7 @@ static void sysctl_icap_write(MilkymistSysctlState *s, uint32_t value) trace_milkymist_sysctl_icap_write(value); switch (value & 0xffff) { case 0x000e: - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); break; } } @@ -195,7 +195,7 @@ static void sysctl_write(void *opaque, hwaddr addr, uint64_t value, s->regs[addr] = 1; break; case R_SYSTEM_ID: - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); break; case R_GPIO_IN: diff --git a/hw/timer/pxa2xx_timer.c b/hw/timer/pxa2xx_timer.c index 59002b407e..68ba5a70b3 100644 --- a/hw/timer/pxa2xx_timer.c +++ b/hw/timer/pxa2xx_timer.c @@ -401,7 +401,7 @@ static void pxa2xx_timer_tick(void *opaque) if (t->num == 3) if (i->reset3 & 1) { i->reset3 = 0; - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } } diff --git a/hw/usb/Makefile.objs b/hw/usb/Makefile.objs index 5958be8ce3..97f1c4561a 100644 --- a/hw/usb/Makefile.objs +++ b/hw/usb/Makefile.objs @@ -8,6 +8,7 @@ common-obj-$(CONFIG_USB_OHCI) += hcd-ohci.o common-obj-$(CONFIG_USB_EHCI) += hcd-ehci.o hcd-ehci-pci.o common-obj-$(CONFIG_USB_EHCI_SYSBUS) += hcd-ehci-sysbus.o common-obj-$(CONFIG_USB_XHCI) += hcd-xhci.o +common-obj-$(CONFIG_USB_XHCI_NEC) += hcd-xhci-nec.o common-obj-$(CONFIG_USB_MUSB) += hcd-musb.o obj-$(CONFIG_TUSB6010) += tusb6010.o diff --git a/hw/usb/core.c b/hw/usb/core.c index 45fa00c517..241ae66b15 100644 --- a/hw/usb/core.c +++ b/hw/usb/core.c @@ -98,6 +98,14 @@ void usb_wakeup(USBEndpoint *ep, unsigned int stream) USBDevice *dev = ep->dev; USBBus *bus = usb_bus_from_device(dev); + if (!qdev_hotplug) { + /* + * This is machine init cold plug. No need to wakeup anyone, + * all devices will be reset anyway. And trying to wakeup can + * cause problems due to hitting uninitialized devices. + */ + return; + } if (dev->remote_wakeup && dev->port && dev->port->ops->wakeup) { dev->port->ops->wakeup(dev->port); } diff --git a/hw/usb/dev-hub.c b/hw/usb/dev-hub.c index 47b7519910..e82a6a6c44 100644 --- a/hw/usb/dev-hub.c +++ b/hw/usb/dev-hub.c @@ -402,7 +402,20 @@ static void usb_hub_handle_control(USBDevice *dev, USBPacket *p, port->wPortChange &= ~PORT_STAT_C_ENABLE; break; case PORT_SUSPEND: - port->wPortStatus &= ~PORT_STAT_SUSPEND; + if (port->wPortStatus & PORT_STAT_SUSPEND) { + port->wPortStatus &= ~PORT_STAT_SUSPEND; + + /* + * USB Spec rev2.0 11.24.2.7.2.3 C_PORT_SUSPEND + * "This bit is set on the following transitions: + * - On transition from the Resuming state to the + * SendEOP [sic] state" + * + * Note that this includes both remote wake-up and + * explicit ClearPortFeature(PORT_SUSPEND). + */ + port->wPortChange |= PORT_STAT_C_SUSPEND; + } break; case PORT_C_SUSPEND: port->wPortChange &= ~PORT_STAT_C_SUSPEND; diff --git a/hw/usb/dev-serial.c b/hw/usb/dev-serial.c index 83a4f0e6fb..76ceca1f5c 100644 --- a/hw/usb/dev-serial.c +++ b/hw/usb/dev-serial.c @@ -516,27 +516,16 @@ static USBDevice *usb_serial_init(USBBus *bus, const char *filename) char label[32]; static int index; - while (*filename && *filename != ':') { - const char *p; - - if (strstart(filename, "vendorid=", &p)) { - error_report("vendorid is not supported anymore"); - return NULL; - } else if (strstart(filename, "productid=", &p)) { - error_report("productid is not supported anymore"); - return NULL; - } else { - error_report("unrecognized serial USB option %s", filename); - return NULL; - } - while(*filename == ',') - filename++; + if (*filename == ':') { + filename++; + } else if (*filename) { + error_report("unrecognized serial USB option %s", filename); + return NULL; } if (!*filename) { error_report("character device specification needed"); return NULL; } - filename++; snprintf(label, sizeof(label), "usbserial%d", index++); cdrv = qemu_chr_new(label, filename); diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c index 50ef817f93..17c572c55f 100644 --- a/hw/usb/hcd-ehci.c +++ b/hw/usb/hcd-ehci.c @@ -2232,13 +2232,13 @@ static void ehci_update_frindex(EHCIState *ehci, int uframes) ehci->frindex = (ehci->frindex + uframes) % 0x4000; } -static void ehci_frame_timer(void *opaque) +static void ehci_work_bh(void *opaque) { EHCIState *ehci = opaque; int need_timer = 0; int64_t expire_time, t_now; uint64_t ns_elapsed; - int uframes, skipped_uframes; + uint64_t uframes, skipped_uframes; int i; t_now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); @@ -2324,6 +2324,13 @@ static void ehci_frame_timer(void *opaque) } } +static void ehci_work_timer(void *opaque) +{ + EHCIState *ehci = opaque; + + qemu_bh_schedule(ehci->async_bh); +} + static const MemoryRegionOps ehci_mmio_caps_ops = { .read = ehci_caps_read, .write = ehci_caps_write, @@ -2478,8 +2485,8 @@ void usb_ehci_realize(EHCIState *s, DeviceState *dev, Error **errp) s->ports[i].dev = 0; } - s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, ehci_frame_timer, s); - s->async_bh = qemu_bh_new(ehci_frame_timer, s); + s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, ehci_work_timer, s); + s->async_bh = qemu_bh_new(ehci_work_bh, s); s->device = dev; s->vmstate = qemu_add_vm_change_state_handler(usb_ehci_vm_state_change, s); diff --git a/hw/usb/hcd-xhci-nec.c b/hw/usb/hcd-xhci-nec.c new file mode 100644 index 0000000000..75715a048a --- /dev/null +++ b/hw/usb/hcd-xhci-nec.c @@ -0,0 +1,63 @@ +/* + * USB xHCI controller emulation + * + * Copyright (c) 2011 Securiforest + * Date: 2011-05-11 ; Author: Hector Martin <hector@marcansoft.com> + * Based on usb-ohci.c, emulates Renesas NEC USB 3.0 + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "hw/hw.h" +#include "hw/usb.h" +#include "hw/pci/pci.h" + +#include "hcd-xhci.h" + +static Property nec_xhci_properties[] = { + DEFINE_PROP_ON_OFF_AUTO("msi", XHCIState, msi, ON_OFF_AUTO_AUTO), + DEFINE_PROP_ON_OFF_AUTO("msix", XHCIState, msix, ON_OFF_AUTO_AUTO), + DEFINE_PROP_BIT("superspeed-ports-first", + XHCIState, flags, XHCI_FLAG_SS_FIRST, true), + DEFINE_PROP_BIT("force-pcie-endcap", XHCIState, flags, + XHCI_FLAG_FORCE_PCIE_ENDCAP, false), + DEFINE_PROP_UINT32("intrs", XHCIState, numintrs, MAXINTRS), + DEFINE_PROP_UINT32("slots", XHCIState, numslots, MAXSLOTS), + DEFINE_PROP_END_OF_LIST(), +}; + +static void nec_xhci_class_init(ObjectClass *klass, void *data) +{ + PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->props = nec_xhci_properties; + k->vendor_id = PCI_VENDOR_ID_NEC; + k->device_id = PCI_DEVICE_ID_NEC_UPD720200; + k->revision = 0x03; +} + +static const TypeInfo nec_xhci_info = { + .name = TYPE_NEC_XHCI, + .parent = TYPE_XHCI, + .class_init = nec_xhci_class_init, +}; + +static void nec_xhci_register_types(void) +{ + type_register_static(&nec_xhci_info); +} + +type_init(nec_xhci_register_types) diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c index 77d8e1137a..a0c7960a7b 100644 --- a/hw/usb/hcd-xhci.c +++ b/hw/usb/hcd-xhci.c @@ -29,6 +29,8 @@ #include "trace.h" #include "qapi/error.h" +#include "hcd-xhci.h" + //#define DEBUG_XHCI //#define DEBUG_DATA @@ -40,16 +42,6 @@ #define FIXME(_msg) do { fprintf(stderr, "FIXME %s:%d %s\n", \ __func__, __LINE__, _msg); abort(); } while (0) -#define MAXPORTS_2 15 -#define MAXPORTS_3 15 - -#define MAXPORTS (MAXPORTS_2+MAXPORTS_3) -#define MAXSLOTS 64 -#define MAXINTRS 16 - -/* Very pessimistic, let's hope it's enough for all cases */ -#define EV_QUEUE (((3 * 24) + 16) * MAXSLOTS) - #define TRB_LINK_LIMIT 32 #define COMMAND_LIMIT 256 #define TRANSFER_LIMIT 256 @@ -164,84 +156,8 @@ enum { PLS_RESUME = 15, }; -typedef enum TRBType { - TRB_RESERVED = 0, - TR_NORMAL, - TR_SETUP, - TR_DATA, - TR_STATUS, - TR_ISOCH, - TR_LINK, - TR_EVDATA, - TR_NOOP, - CR_ENABLE_SLOT, - CR_DISABLE_SLOT, - CR_ADDRESS_DEVICE, - CR_CONFIGURE_ENDPOINT, - CR_EVALUATE_CONTEXT, - CR_RESET_ENDPOINT, - CR_STOP_ENDPOINT, - CR_SET_TR_DEQUEUE, - CR_RESET_DEVICE, - CR_FORCE_EVENT, - CR_NEGOTIATE_BW, - CR_SET_LATENCY_TOLERANCE, - CR_GET_PORT_BANDWIDTH, - CR_FORCE_HEADER, - CR_NOOP, - ER_TRANSFER = 32, - ER_COMMAND_COMPLETE, - ER_PORT_STATUS_CHANGE, - ER_BANDWIDTH_REQUEST, - ER_DOORBELL, - ER_HOST_CONTROLLER, - ER_DEVICE_NOTIFICATION, - ER_MFINDEX_WRAP, - /* vendor specific bits */ - CR_VENDOR_NEC_FIRMWARE_REVISION = 49, - CR_VENDOR_NEC_CHALLENGE_RESPONSE = 50, -} TRBType; - #define CR_LINK TR_LINK -typedef enum TRBCCode { - CC_INVALID = 0, - CC_SUCCESS, - CC_DATA_BUFFER_ERROR, - CC_BABBLE_DETECTED, - CC_USB_TRANSACTION_ERROR, - CC_TRB_ERROR, - CC_STALL_ERROR, - CC_RESOURCE_ERROR, - CC_BANDWIDTH_ERROR, - CC_NO_SLOTS_ERROR, - CC_INVALID_STREAM_TYPE_ERROR, - CC_SLOT_NOT_ENABLED_ERROR, - CC_EP_NOT_ENABLED_ERROR, - CC_SHORT_PACKET, - CC_RING_UNDERRUN, - CC_RING_OVERRUN, - CC_VF_ER_FULL, - CC_PARAMETER_ERROR, - CC_BANDWIDTH_OVERRUN, - CC_CONTEXT_STATE_ERROR, - CC_NO_PING_RESPONSE_ERROR, - CC_EVENT_RING_FULL_ERROR, - CC_INCOMPATIBLE_DEVICE_ERROR, - CC_MISSED_SERVICE_ERROR, - CC_COMMAND_RING_STOPPED, - CC_COMMAND_ABORTED, - CC_STOPPED, - CC_STOPPED_LENGTH_INVALID, - CC_MAX_EXIT_LATENCY_TOO_LARGE_ERROR = 29, - CC_ISOCH_BUFFER_OVERRUN = 31, - CC_EVENT_LOST_ERROR, - CC_UNDEFINED_ERROR, - CC_INVALID_STREAM_ID_ERROR, - CC_SECONDARY_BANDWIDTH_ERROR, - CC_SPLIT_TRANSACTION_ERROR -} TRBCCode; - #define TRB_C (1<<0) #define TRB_TYPE_SHIFT 10 #define TRB_TYPE_MASK 0x3f @@ -301,10 +217,6 @@ typedef enum TRBCCode { #define SLOT_CONTEXT_ENTRIES_MASK 0x1f #define SLOT_CONTEXT_ENTRIES_SHIFT 27 -typedef struct XHCIState XHCIState; -typedef struct XHCIStreamContext XHCIStreamContext; -typedef struct XHCIEPContext XHCIEPContext; - #define get_field(data, field) \ (((data) >> field##_SHIFT) & field##_MASK) @@ -326,21 +238,6 @@ typedef enum EPType { ET_INTR_IN, } EPType; -typedef struct XHCIRing { - dma_addr_t dequeue; - bool ccs; -} XHCIRing; - -typedef struct XHCIPort { - XHCIState *xhci; - uint32_t portsc; - uint32_t portnr; - USBPort *uport; - uint32_t speedmask; - char name[16]; - MemoryRegion mem; -} XHCIPort; - typedef struct XHCITransfer { XHCIEPContext *epctx; USBPacket packet; @@ -402,101 +299,6 @@ struct XHCIEPContext { QEMUTimer *kick_timer; }; -typedef struct XHCISlot { - bool enabled; - bool addressed; - dma_addr_t ctx; - USBPort *uport; - XHCIEPContext * eps[31]; -} XHCISlot; - -typedef struct XHCIEvent { - TRBType type; - TRBCCode ccode; - uint64_t ptr; - uint32_t length; - uint32_t flags; - uint8_t slotid; - uint8_t epid; -} XHCIEvent; - -typedef struct XHCIInterrupter { - uint32_t iman; - uint32_t imod; - uint32_t erstsz; - uint32_t erstba_low; - uint32_t erstba_high; - uint32_t erdp_low; - uint32_t erdp_high; - - bool msix_used, er_pcs; - - dma_addr_t er_start; - uint32_t er_size; - unsigned int er_ep_idx; - - /* kept for live migration compat only */ - bool er_full_unused; - XHCIEvent ev_buffer[EV_QUEUE]; - unsigned int ev_buffer_put; - unsigned int ev_buffer_get; - -} XHCIInterrupter; - -struct XHCIState { - /*< private >*/ - PCIDevice parent_obj; - /*< public >*/ - - USBBus bus; - MemoryRegion mem; - MemoryRegion mem_cap; - MemoryRegion mem_oper; - MemoryRegion mem_runtime; - MemoryRegion mem_doorbell; - - /* properties */ - uint32_t numports_2; - uint32_t numports_3; - uint32_t numintrs; - uint32_t numslots; - uint32_t flags; - uint32_t max_pstreams_mask; - OnOffAuto msi; - OnOffAuto msix; - - /* Operational Registers */ - uint32_t usbcmd; - uint32_t usbsts; - uint32_t dnctrl; - uint32_t crcr_low; - uint32_t crcr_high; - uint32_t dcbaap_low; - uint32_t dcbaap_high; - uint32_t config; - - USBPort uports[MAX(MAXPORTS_2, MAXPORTS_3)]; - XHCIPort ports[MAXPORTS]; - XHCISlot slots[MAXSLOTS]; - uint32_t numports; - - /* Runtime Registers */ - int64_t mfindex_start; - QEMUTimer *mfwrap_timer; - XHCIInterrupter intr[MAXINTRS]; - - XHCIRing cmd_ring; - - bool nec_quirks; -}; - -#define TYPE_XHCI "base-xhci" -#define TYPE_NEC_XHCI "nec-usb-xhci" -#define TYPE_QEMU_XHCI "qemu-xhci" - -#define XHCI(obj) \ - OBJECT_CHECK(XHCIState, (obj), TYPE_XHCI) - typedef struct XHCIEvRingSeg { uint32_t addr_low; uint32_t addr_high; @@ -504,12 +306,6 @@ typedef struct XHCIEvRingSeg { uint32_t rsvd; } XHCIEvRingSeg; -enum xhci_flags { - XHCI_FLAG_SS_FIRST = 1, - XHCI_FLAG_FORCE_PCIE_ENDCAP, - XHCI_FLAG_ENABLE_STREAMS, -}; - static void xhci_kick_ep(XHCIState *xhci, unsigned int slotid, unsigned int epid, unsigned int streamid); static void xhci_kick_epctx(XHCIEPContext *epctx, unsigned int streamid); @@ -3843,18 +3639,6 @@ static const VMStateDescription vmstate_xhci = { } }; -static Property nec_xhci_properties[] = { - DEFINE_PROP_ON_OFF_AUTO("msi", XHCIState, msi, ON_OFF_AUTO_AUTO), - DEFINE_PROP_ON_OFF_AUTO("msix", XHCIState, msix, ON_OFF_AUTO_AUTO), - DEFINE_PROP_BIT("superspeed-ports-first", - XHCIState, flags, XHCI_FLAG_SS_FIRST, true), - DEFINE_PROP_BIT("force-pcie-endcap", XHCIState, flags, - XHCI_FLAG_FORCE_PCIE_ENDCAP, false), - DEFINE_PROP_UINT32("intrs", XHCIState, numintrs, MAXINTRS), - DEFINE_PROP_UINT32("slots", XHCIState, numslots, MAXSLOTS), - DEFINE_PROP_END_OF_LIST(), -}; - static Property xhci_properties[] = { DEFINE_PROP_BIT("streams", XHCIState, flags, XHCI_FLAG_ENABLE_STREAMS, true), @@ -3886,23 +3670,6 @@ static const TypeInfo xhci_info = { .abstract = true, }; -static void nec_xhci_class_init(ObjectClass *klass, void *data) -{ - PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); - DeviceClass *dc = DEVICE_CLASS(klass); - - dc->props = nec_xhci_properties; - k->vendor_id = PCI_VENDOR_ID_NEC; - k->device_id = PCI_DEVICE_ID_NEC_UPD720200; - k->revision = 0x03; -} - -static const TypeInfo nec_xhci_info = { - .name = TYPE_NEC_XHCI, - .parent = TYPE_XHCI, - .class_init = nec_xhci_class_init, -}; - static void qemu_xhci_class_init(ObjectClass *klass, void *data) { PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); @@ -3933,7 +3700,6 @@ static const TypeInfo qemu_xhci_info = { static void xhci_register_types(void) { type_register_static(&xhci_info); - type_register_static(&nec_xhci_info); type_register_static(&qemu_xhci_info); } diff --git a/hw/usb/hcd-xhci.h b/hw/usb/hcd-xhci.h new file mode 100644 index 0000000000..fc36a4c787 --- /dev/null +++ b/hw/usb/hcd-xhci.h @@ -0,0 +1,226 @@ +/* + * USB xHCI controller emulation + * + * Copyright (c) 2011 Securiforest + * Date: 2011-05-11 ; Author: Hector Martin <hector@marcansoft.com> + * Based on usb-ohci.c, emulates Renesas NEC USB 3.0 + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#define TYPE_XHCI "base-xhci" +#define TYPE_NEC_XHCI "nec-usb-xhci" +#define TYPE_QEMU_XHCI "qemu-xhci" + +#define XHCI(obj) \ + OBJECT_CHECK(XHCIState, (obj), TYPE_XHCI) + +#define MAXPORTS_2 15 +#define MAXPORTS_3 15 + +#define MAXPORTS (MAXPORTS_2 + MAXPORTS_3) +#define MAXSLOTS 64 +#define MAXINTRS 16 + +/* Very pessimistic, let's hope it's enough for all cases */ +#define EV_QUEUE (((3 * 24) + 16) * MAXSLOTS) + +typedef struct XHCIState XHCIState; +typedef struct XHCIStreamContext XHCIStreamContext; +typedef struct XHCIEPContext XHCIEPContext; + +enum xhci_flags { + XHCI_FLAG_SS_FIRST = 1, + XHCI_FLAG_FORCE_PCIE_ENDCAP, + XHCI_FLAG_ENABLE_STREAMS, +}; + +typedef enum TRBType { + TRB_RESERVED = 0, + TR_NORMAL, + TR_SETUP, + TR_DATA, + TR_STATUS, + TR_ISOCH, + TR_LINK, + TR_EVDATA, + TR_NOOP, + CR_ENABLE_SLOT, + CR_DISABLE_SLOT, + CR_ADDRESS_DEVICE, + CR_CONFIGURE_ENDPOINT, + CR_EVALUATE_CONTEXT, + CR_RESET_ENDPOINT, + CR_STOP_ENDPOINT, + CR_SET_TR_DEQUEUE, + CR_RESET_DEVICE, + CR_FORCE_EVENT, + CR_NEGOTIATE_BW, + CR_SET_LATENCY_TOLERANCE, + CR_GET_PORT_BANDWIDTH, + CR_FORCE_HEADER, + CR_NOOP, + ER_TRANSFER = 32, + ER_COMMAND_COMPLETE, + ER_PORT_STATUS_CHANGE, + ER_BANDWIDTH_REQUEST, + ER_DOORBELL, + ER_HOST_CONTROLLER, + ER_DEVICE_NOTIFICATION, + ER_MFINDEX_WRAP, + /* vendor specific bits */ + CR_VENDOR_NEC_FIRMWARE_REVISION = 49, + CR_VENDOR_NEC_CHALLENGE_RESPONSE = 50, +} TRBType; + +typedef enum TRBCCode { + CC_INVALID = 0, + CC_SUCCESS, + CC_DATA_BUFFER_ERROR, + CC_BABBLE_DETECTED, + CC_USB_TRANSACTION_ERROR, + CC_TRB_ERROR, + CC_STALL_ERROR, + CC_RESOURCE_ERROR, + CC_BANDWIDTH_ERROR, + CC_NO_SLOTS_ERROR, + CC_INVALID_STREAM_TYPE_ERROR, + CC_SLOT_NOT_ENABLED_ERROR, + CC_EP_NOT_ENABLED_ERROR, + CC_SHORT_PACKET, + CC_RING_UNDERRUN, + CC_RING_OVERRUN, + CC_VF_ER_FULL, + CC_PARAMETER_ERROR, + CC_BANDWIDTH_OVERRUN, + CC_CONTEXT_STATE_ERROR, + CC_NO_PING_RESPONSE_ERROR, + CC_EVENT_RING_FULL_ERROR, + CC_INCOMPATIBLE_DEVICE_ERROR, + CC_MISSED_SERVICE_ERROR, + CC_COMMAND_RING_STOPPED, + CC_COMMAND_ABORTED, + CC_STOPPED, + CC_STOPPED_LENGTH_INVALID, + CC_MAX_EXIT_LATENCY_TOO_LARGE_ERROR = 29, + CC_ISOCH_BUFFER_OVERRUN = 31, + CC_EVENT_LOST_ERROR, + CC_UNDEFINED_ERROR, + CC_INVALID_STREAM_ID_ERROR, + CC_SECONDARY_BANDWIDTH_ERROR, + CC_SPLIT_TRANSACTION_ERROR +} TRBCCode; + +typedef struct XHCIRing { + dma_addr_t dequeue; + bool ccs; +} XHCIRing; + +typedef struct XHCIPort { + XHCIState *xhci; + uint32_t portsc; + uint32_t portnr; + USBPort *uport; + uint32_t speedmask; + char name[16]; + MemoryRegion mem; +} XHCIPort; + +typedef struct XHCISlot { + bool enabled; + bool addressed; + dma_addr_t ctx; + USBPort *uport; + XHCIEPContext *eps[31]; +} XHCISlot; + +typedef struct XHCIEvent { + TRBType type; + TRBCCode ccode; + uint64_t ptr; + uint32_t length; + uint32_t flags; + uint8_t slotid; + uint8_t epid; +} XHCIEvent; + +typedef struct XHCIInterrupter { + uint32_t iman; + uint32_t imod; + uint32_t erstsz; + uint32_t erstba_low; + uint32_t erstba_high; + uint32_t erdp_low; + uint32_t erdp_high; + + bool msix_used, er_pcs; + + dma_addr_t er_start; + uint32_t er_size; + unsigned int er_ep_idx; + + /* kept for live migration compat only */ + bool er_full_unused; + XHCIEvent ev_buffer[EV_QUEUE]; + unsigned int ev_buffer_put; + unsigned int ev_buffer_get; + +} XHCIInterrupter; + +struct XHCIState { + /*< private >*/ + PCIDevice parent_obj; + /*< public >*/ + + USBBus bus; + MemoryRegion mem; + MemoryRegion mem_cap; + MemoryRegion mem_oper; + MemoryRegion mem_runtime; + MemoryRegion mem_doorbell; + + /* properties */ + uint32_t numports_2; + uint32_t numports_3; + uint32_t numintrs; + uint32_t numslots; + uint32_t flags; + uint32_t max_pstreams_mask; + OnOffAuto msi; + OnOffAuto msix; + + /* Operational Registers */ + uint32_t usbcmd; + uint32_t usbsts; + uint32_t dnctrl; + uint32_t crcr_low; + uint32_t crcr_high; + uint32_t dcbaap_low; + uint32_t dcbaap_high; + uint32_t config; + + USBPort uports[MAX(MAXPORTS_2, MAXPORTS_3)]; + XHCIPort ports[MAXPORTS]; + XHCISlot slots[MAXSLOTS]; + uint32_t numports; + + /* Runtime Registers */ + int64_t mfindex_start; + QEMUTimer *mfwrap_timer; + XHCIInterrupter intr[MAXINTRS]; + + XHCIRing cmd_ring; + + bool nec_quirks; +}; diff --git a/hw/vfio/Makefile.objs b/hw/vfio/Makefile.objs index 05e7fbb93f..c3ab9097f1 100644 --- a/hw/vfio/Makefile.objs +++ b/hw/vfio/Makefile.objs @@ -1,6 +1,7 @@ ifeq ($(CONFIG_LINUX), y) obj-$(CONFIG_SOFTMMU) += common.o obj-$(CONFIG_PCI) += pci.o pci-quirks.o +obj-$(CONFIG_VFIO_CCW) += ccw.o obj-$(CONFIG_SOFTMMU) += platform.o obj-$(CONFIG_VFIO_XGMAC) += calxeda-xgmac.o obj-$(CONFIG_VFIO_AMD_XGBE) += amd-xgbe.o diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c new file mode 100644 index 0000000000..12d0262336 --- /dev/null +++ b/hw/vfio/ccw.c @@ -0,0 +1,434 @@ +/* + * vfio based subchannel assignment support + * + * Copyright 2017 IBM Corp. + * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com> + * Xiao Feng Ren <renxiaof@linux.vnet.ibm.com> + * Pierre Morel <pmorel@linux.vnet.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or(at + * your option) any version. See the COPYING file in the top-level + * directory. + */ + +#include <linux/vfio.h> +#include <linux/vfio_ccw.h> +#include <sys/ioctl.h> + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "hw/sysbus.h" +#include "hw/vfio/vfio.h" +#include "hw/vfio/vfio-common.h" +#include "hw/s390x/s390-ccw.h" +#include "hw/s390x/ccw-device.h" +#include "qemu/error-report.h" + +#define TYPE_VFIO_CCW "vfio-ccw" +typedef struct VFIOCCWDevice { + S390CCWDevice cdev; + VFIODevice vdev; + uint64_t io_region_size; + uint64_t io_region_offset; + struct ccw_io_region *io_region; + EventNotifier io_notifier; +} VFIOCCWDevice; + +static void vfio_ccw_compute_needs_reset(VFIODevice *vdev) +{ + vdev->needs_reset = false; +} + +/* + * We don't need vfio_hot_reset_multi and vfio_eoi operations for + * vfio_ccw device now. + */ +struct VFIODeviceOps vfio_ccw_ops = { + .vfio_compute_needs_reset = vfio_ccw_compute_needs_reset, +}; + +static int vfio_ccw_handle_request(ORB *orb, SCSW *scsw, void *data) +{ + S390CCWDevice *cdev = data; + VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev); + struct ccw_io_region *region = vcdev->io_region; + int ret; + + QEMU_BUILD_BUG_ON(sizeof(region->orb_area) != sizeof(ORB)); + QEMU_BUILD_BUG_ON(sizeof(region->scsw_area) != sizeof(SCSW)); + QEMU_BUILD_BUG_ON(sizeof(region->irb_area) != sizeof(IRB)); + + memset(region, 0, sizeof(*region)); + + memcpy(region->orb_area, orb, sizeof(ORB)); + memcpy(region->scsw_area, scsw, sizeof(SCSW)); + +again: + ret = pwrite(vcdev->vdev.fd, region, + vcdev->io_region_size, vcdev->io_region_offset); + if (ret != vcdev->io_region_size) { + if (errno == EAGAIN) { + goto again; + } + error_report("vfio-ccw: wirte I/O region failed with errno=%d", errno); + return -errno; + } + + return region->ret_code; +} + +static void vfio_ccw_reset(DeviceState *dev) +{ + CcwDevice *ccw_dev = DO_UPCAST(CcwDevice, parent_obj, dev); + S390CCWDevice *cdev = DO_UPCAST(S390CCWDevice, parent_obj, ccw_dev); + VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev); + + ioctl(vcdev->vdev.fd, VFIO_DEVICE_RESET); +} + +static void vfio_ccw_io_notifier_handler(void *opaque) +{ + VFIOCCWDevice *vcdev = opaque; + struct ccw_io_region *region = vcdev->io_region; + S390CCWDevice *cdev = S390_CCW_DEVICE(vcdev); + CcwDevice *ccw_dev = CCW_DEVICE(cdev); + SubchDev *sch = ccw_dev->sch; + SCSW *s = &sch->curr_status.scsw; + PMCW *p = &sch->curr_status.pmcw; + IRB irb; + int size; + + if (!event_notifier_test_and_clear(&vcdev->io_notifier)) { + return; + } + + size = pread(vcdev->vdev.fd, region, vcdev->io_region_size, + vcdev->io_region_offset); + if (size == -1) { + switch (errno) { + case ENODEV: + /* Generate a deferred cc 3 condition. */ + s->flags |= SCSW_FLAGS_MASK_CC; + s->ctrl &= ~SCSW_CTRL_MASK_STCTL; + s->ctrl |= (SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND); + goto read_err; + case EFAULT: + /* Memory problem, generate channel data check. */ + s->ctrl &= ~SCSW_ACTL_START_PEND; + s->cstat = SCSW_CSTAT_DATA_CHECK; + s->ctrl &= ~SCSW_CTRL_MASK_STCTL; + s->ctrl |= SCSW_STCTL_PRIMARY | SCSW_STCTL_SECONDARY | + SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND; + goto read_err; + default: + /* Error, generate channel program check. */ + s->ctrl &= ~SCSW_ACTL_START_PEND; + s->cstat = SCSW_CSTAT_PROG_CHECK; + s->ctrl &= ~SCSW_CTRL_MASK_STCTL; + s->ctrl |= SCSW_STCTL_PRIMARY | SCSW_STCTL_SECONDARY | + SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND; + goto read_err; + } + } else if (size != vcdev->io_region_size) { + /* Information transfer error, generate channel-control check. */ + s->ctrl &= ~SCSW_ACTL_START_PEND; + s->cstat = SCSW_CSTAT_CHN_CTRL_CHK; + s->ctrl &= ~SCSW_CTRL_MASK_STCTL; + s->ctrl |= SCSW_STCTL_PRIMARY | SCSW_STCTL_SECONDARY | + SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND; + goto read_err; + } + + memcpy(&irb, region->irb_area, sizeof(IRB)); + + /* Update control block via irb. */ + copy_scsw_to_guest(s, &irb.scsw); + + /* If a uint check is pending, copy sense data. */ + if ((s->dstat & SCSW_DSTAT_UNIT_CHECK) && + (p->chars & PMCW_CHARS_MASK_CSENSE)) { + memcpy(sch->sense_data, irb.ecw, sizeof(irb.ecw)); + } + +read_err: + css_inject_io_interrupt(sch); +} + +static void vfio_ccw_register_io_notifier(VFIOCCWDevice *vcdev, Error **errp) +{ + VFIODevice *vdev = &vcdev->vdev; + struct vfio_irq_info *irq_info; + struct vfio_irq_set *irq_set; + size_t argsz; + int32_t *pfd; + + if (vdev->num_irqs < VFIO_CCW_IO_IRQ_INDEX + 1) { + error_setg(errp, "vfio: unexpected number of io irqs %u", + vdev->num_irqs); + return; + } + + argsz = sizeof(*irq_set); + irq_info = g_malloc0(argsz); + irq_info->index = VFIO_CCW_IO_IRQ_INDEX; + irq_info->argsz = argsz; + if (ioctl(vdev->fd, VFIO_DEVICE_GET_IRQ_INFO, + irq_info) < 0 || irq_info->count < 1) { + error_setg_errno(errp, errno, "vfio: Error getting irq info"); + goto out_free_info; + } + + if (event_notifier_init(&vcdev->io_notifier, 0)) { + error_setg_errno(errp, errno, + "vfio: Unable to init event notifier for IO"); + goto out_free_info; + } + + argsz = sizeof(*irq_set) + sizeof(*pfd); + irq_set = g_malloc0(argsz); + irq_set->argsz = argsz; + irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | + VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_CCW_IO_IRQ_INDEX; + irq_set->start = 0; + irq_set->count = 1; + pfd = (int32_t *) &irq_set->data; + + *pfd = event_notifier_get_fd(&vcdev->io_notifier); + qemu_set_fd_handler(*pfd, vfio_ccw_io_notifier_handler, NULL, vcdev); + if (ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, irq_set)) { + error_setg(errp, "vfio: Failed to set up io notification"); + qemu_set_fd_handler(*pfd, NULL, NULL, vcdev); + event_notifier_cleanup(&vcdev->io_notifier); + } + + g_free(irq_set); + +out_free_info: + g_free(irq_info); +} + +static void vfio_ccw_unregister_io_notifier(VFIOCCWDevice *vcdev) +{ + struct vfio_irq_set *irq_set; + size_t argsz; + int32_t *pfd; + + argsz = sizeof(*irq_set) + sizeof(*pfd); + irq_set = g_malloc0(argsz); + irq_set->argsz = argsz; + irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | + VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_CCW_IO_IRQ_INDEX; + irq_set->start = 0; + irq_set->count = 1; + pfd = (int32_t *) &irq_set->data; + *pfd = -1; + + if (ioctl(vcdev->vdev.fd, VFIO_DEVICE_SET_IRQS, irq_set)) { + error_report("vfio: Failed to de-assign device io fd: %m"); + } + + qemu_set_fd_handler(event_notifier_get_fd(&vcdev->io_notifier), + NULL, NULL, vcdev); + event_notifier_cleanup(&vcdev->io_notifier); + + g_free(irq_set); +} + +static void vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error **errp) +{ + VFIODevice *vdev = &vcdev->vdev; + struct vfio_region_info *info; + int ret; + + /* Sanity check device */ + if (!(vdev->flags & VFIO_DEVICE_FLAGS_CCW)) { + error_setg(errp, "vfio: Um, this isn't a vfio-ccw device"); + return; + } + + if (vdev->num_regions < VFIO_CCW_CONFIG_REGION_INDEX + 1) { + error_setg(errp, "vfio: Unexpected number of the I/O region %u", + vdev->num_regions); + return; + } + + ret = vfio_get_region_info(vdev, VFIO_CCW_CONFIG_REGION_INDEX, &info); + if (ret) { + error_setg_errno(errp, -ret, "vfio: Error getting config info"); + return; + } + + vcdev->io_region_size = info->size; + if (sizeof(*vcdev->io_region) != vcdev->io_region_size) { + error_setg(errp, "vfio: Unexpected size of the I/O region"); + g_free(info); + return; + } + + vcdev->io_region_offset = info->offset; + vcdev->io_region = g_malloc0(info->size); + + g_free(info); +} + +static void vfio_ccw_put_region(VFIOCCWDevice *vcdev) +{ + g_free(vcdev->io_region); +} + +static void vfio_put_device(VFIOCCWDevice *vcdev) +{ + g_free(vcdev->vdev.name); + vfio_put_base_device(&vcdev->vdev); +} + +static VFIOGroup *vfio_ccw_get_group(S390CCWDevice *cdev, Error **errp) +{ + char *tmp, group_path[PATH_MAX]; + ssize_t len; + int groupid; + + tmp = g_strdup_printf("/sys/bus/css/devices/%x.%x.%04x/%s/iommu_group", + cdev->hostid.cssid, cdev->hostid.ssid, + cdev->hostid.devid, cdev->mdevid); + len = readlink(tmp, group_path, sizeof(group_path)); + g_free(tmp); + + if (len <= 0 || len >= sizeof(group_path)) { + error_setg(errp, "vfio: no iommu_group found"); + return NULL; + } + + group_path[len] = 0; + + if (sscanf(basename(group_path), "%d", &groupid) != 1) { + error_setg(errp, "vfio: failed to read %s", group_path); + return NULL; + } + + return vfio_get_group(groupid, &address_space_memory, errp); +} + +static void vfio_ccw_realize(DeviceState *dev, Error **errp) +{ + VFIODevice *vbasedev; + VFIOGroup *group; + CcwDevice *ccw_dev = DO_UPCAST(CcwDevice, parent_obj, dev); + S390CCWDevice *cdev = DO_UPCAST(S390CCWDevice, parent_obj, ccw_dev); + VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev); + S390CCWDeviceClass *cdc = S390_CCW_DEVICE_GET_CLASS(cdev); + Error *err = NULL; + + /* Call the class init function for subchannel. */ + if (cdc->realize) { + cdc->realize(cdev, vcdev->vdev.sysfsdev, &err); + if (err) { + goto out_err_propagate; + } + } + + group = vfio_ccw_get_group(cdev, &err); + if (!group) { + goto out_group_err; + } + + vcdev->vdev.ops = &vfio_ccw_ops; + vcdev->vdev.type = VFIO_DEVICE_TYPE_CCW; + vcdev->vdev.name = g_strdup_printf("%x.%x.%04x", cdev->hostid.cssid, + cdev->hostid.ssid, cdev->hostid.devid); + QLIST_FOREACH(vbasedev, &group->device_list, next) { + if (strcmp(vbasedev->name, vcdev->vdev.name) == 0) { + error_setg(&err, "vfio: subchannel %s has already been attached", + vcdev->vdev.name); + goto out_device_err; + } + } + + if (vfio_get_device(group, cdev->mdevid, &vcdev->vdev, &err)) { + goto out_device_err; + } + + vfio_ccw_get_region(vcdev, &err); + if (err) { + goto out_region_err; + } + + vfio_ccw_register_io_notifier(vcdev, &err); + if (err) { + goto out_notifier_err; + } + + return; + +out_notifier_err: + vfio_ccw_put_region(vcdev); +out_region_err: + vfio_put_device(vcdev); +out_device_err: + vfio_put_group(group); +out_group_err: + if (cdc->unrealize) { + cdc->unrealize(cdev, NULL); + } +out_err_propagate: + error_propagate(errp, err); +} + +static void vfio_ccw_unrealize(DeviceState *dev, Error **errp) +{ + CcwDevice *ccw_dev = DO_UPCAST(CcwDevice, parent_obj, dev); + S390CCWDevice *cdev = DO_UPCAST(S390CCWDevice, parent_obj, ccw_dev); + VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev); + S390CCWDeviceClass *cdc = S390_CCW_DEVICE_GET_CLASS(cdev); + VFIOGroup *group = vcdev->vdev.group; + + vfio_ccw_unregister_io_notifier(vcdev); + vfio_ccw_put_region(vcdev); + vfio_put_device(vcdev); + vfio_put_group(group); + + if (cdc->unrealize) { + cdc->unrealize(cdev, errp); + } +} + +static Property vfio_ccw_properties[] = { + DEFINE_PROP_STRING("sysfsdev", VFIOCCWDevice, vdev.sysfsdev), + DEFINE_PROP_END_OF_LIST(), +}; + +static const VMStateDescription vfio_ccw_vmstate = { + .name = TYPE_VFIO_CCW, + .unmigratable = 1, +}; + +static void vfio_ccw_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + S390CCWDeviceClass *cdc = S390_CCW_DEVICE_CLASS(klass); + + dc->props = vfio_ccw_properties; + dc->vmsd = &vfio_ccw_vmstate; + dc->desc = "VFIO-based subchannel assignment"; + dc->realize = vfio_ccw_realize; + dc->unrealize = vfio_ccw_unrealize; + dc->reset = vfio_ccw_reset; + + cdc->handle_request = vfio_ccw_handle_request; +} + +static const TypeInfo vfio_ccw_info = { + .name = TYPE_VFIO_CCW, + .parent = TYPE_S390_CCW, + .instance_size = sizeof(VFIOCCWDevice), + .class_init = vfio_ccw_class_init, +}; + +static void register_vfio_ccw_type(void) +{ + type_register_static(&vfio_ccw_info); +} + +type_init(register_vfio_ccw_type) diff --git a/hw/vfio/common.c b/hw/vfio/common.c index a8f12eeb35..b9abe77f5a 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -502,7 +502,7 @@ static void vfio_listener_region_add(MemoryListener *listener, QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next); memory_region_register_iommu_notifier(giommu->iommu, &giommu->n); - memory_region_iommu_replay(giommu->iommu, &giommu->n, false); + memory_region_iommu_replay(giommu->iommu, &giommu->n); return; } diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index b87a176770..dde094abb4 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -162,11 +162,11 @@ fail: } static int process_message_reply(struct vhost_dev *dev, - VhostUserMsg msg) + const VhostUserMsg *msg) { VhostUserMsg msg_reply; - if ((msg.flags & VHOST_USER_NEED_REPLY_MASK) == 0) { + if ((msg->flags & VHOST_USER_NEED_REPLY_MASK) == 0) { return 0; } @@ -174,10 +174,10 @@ static int process_message_reply(struct vhost_dev *dev, return -1; } - if (msg_reply.request != msg.request) { + if (msg_reply.request != msg->request) { error_report("Received unexpected msg type." "Expected %d received %d", - msg.request, msg_reply.request); + msg->request, msg_reply.request); return -1; } @@ -324,7 +324,7 @@ static int vhost_user_set_mem_table(struct vhost_dev *dev, } if (reply_supported) { - return process_message_reply(dev, msg); + return process_message_reply(dev, &msg); } return 0; @@ -716,7 +716,7 @@ static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu) /* If reply_ack supported, slave has to ack specified MTU is valid */ if (reply_supported) { - return process_message_reply(dev, msg); + return process_message_reply(dev, &msg); } return 0; diff --git a/hw/watchdog/watchdog.c b/hw/watchdog/watchdog.c index 2aeaf1fbc9..0c5c9cde1c 100644 --- a/hw/watchdog/watchdog.c +++ b/hw/watchdog/watchdog.c @@ -110,7 +110,7 @@ void watchdog_perform_action(void) switch (watchdog_action) { case WDT_RESET: /* same as 'system_reset' in monitor */ qapi_event_send_watchdog(WATCHDOG_EXPIRATION_ACTION_RESET, &error_abort); - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); break; case WDT_SHUTDOWN: /* same as 'system_powerdown' in monitor */ diff --git a/hw/xenpv/xen_domainbuild.c b/hw/xenpv/xen_domainbuild.c index 457a8976c3..c89ced2e88 100644 --- a/hw/xenpv/xen_domainbuild.c +++ b/hw/xenpv/xen_domainbuild.c @@ -148,7 +148,7 @@ static void xen_domain_poll(void *opaque) return; quit: - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); } static int xen_domain_watcher(void) diff --git a/hw/xtensa/xtfpga.c b/hw/xtensa/xtfpga.c index 11176e26bd..4636f8e934 100644 --- a/hw/xtensa/xtfpga.c +++ b/hw/xtensa/xtfpga.c @@ -100,7 +100,7 @@ static void lx60_fpga_write(void *opaque, hwaddr addr, case 0x10: /*board reset*/ if (val == 0xdead) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } break; } diff --git a/include/block/block_int.h b/include/block/block_int.h index 8d3724cce6..e5eb473e53 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -682,6 +682,9 @@ int get_tmp_filename(char *filename, int size); BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size, const char *filename); +void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix, + QDict *options); + /** * bdrv_add_before_write_notifier: diff --git a/include/block/blockjob.h b/include/block/blockjob.h index 9e906f7d7e..09c7c694b5 100644 --- a/include/block/blockjob.h +++ b/include/block/blockjob.h @@ -235,14 +235,6 @@ void block_job_complete(BlockJob *job, Error **errp); BlockJobInfo *block_job_query(BlockJob *job, Error **errp); /** - * block_job_pause: - * @job: The job to be paused. - * - * Asynchronously pause the specified job. - */ -void block_job_pause(BlockJob *job); - -/** * block_job_user_pause: * @job: The job to be paused. * @@ -260,14 +252,6 @@ void block_job_user_pause(BlockJob *job); bool block_job_user_paused(BlockJob *job); /** - * block_job_resume: - * @job: The job to be resumed. - * - * Resume the specified job. Must be paired with a preceding block_job_pause. - */ -void block_job_resume(BlockJob *job); - -/** * block_job_user_resume: * @job: The job to be resumed. * diff --git a/include/block/blockjob_int.h b/include/block/blockjob_int.h index 3f86cc5acc..f13ad05c0d 100644 --- a/include/block/blockjob_int.h +++ b/include/block/blockjob_int.h @@ -44,9 +44,6 @@ struct BlockJobDriver { /** Optional callback for job types that support setting a speed limit */ void (*set_speed)(BlockJob *job, int64_t speed, Error **errp); - /** Optional callback for job types that need to forward I/O status reset */ - void (*iostatus_reset)(BlockJob *job); - /** Mandatory: Entrypoint for the Coroutine. */ CoroutineEntry *start; @@ -159,21 +156,26 @@ void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns); void block_job_yield(BlockJob *job); /** - * block_job_ref: - * @bs: The block device. + * block_job_pause_all: + * + * Asynchronously pause all jobs. + */ +void block_job_pause_all(void); + +/** + * block_job_resume_all: * - * Grab a reference to the block job. Should be paired with block_job_unref. + * Resume all block jobs. Must be paired with a preceding block_job_pause_all. */ -void block_job_ref(BlockJob *job); +void block_job_resume_all(void); /** - * block_job_unref: + * block_job_early_fail: * @bs: The block device. * - * Release reference to the block job and release resources if it is the last - * reference. + * The block job could not be started, free it. */ -void block_job_unref(BlockJob *job); +void block_job_early_fail(BlockJob *job); /** * block_job_completed: @@ -239,7 +241,8 @@ typedef void BlockJobDeferToMainLoopFn(BlockJob *job, void *opaque); * @fn: The function to run in the main loop * @opaque: The opaque value that is passed to @fn * - * Execute a given function in the main loop with the BlockDriverState + * This function must be called by the main job coroutine just before it + * returns. @fn is executed in the main loop with the BlockDriverState * AioContext acquired. Block jobs must call bdrv_unref(), bdrv_close(), and * anything that uses bdrv_drain_all() in the main loop. * diff --git a/include/exec/memory.h b/include/exec/memory.h index 99e0f54d86..bfdc685f24 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -185,8 +185,14 @@ struct MemoryRegionOps { typedef struct MemoryRegionIOMMUOps MemoryRegionIOMMUOps; struct MemoryRegionIOMMUOps { - /* Return a TLB entry that contains a given address. */ - IOMMUTLBEntry (*translate)(MemoryRegion *iommu, hwaddr addr, bool is_write); + /* + * Return a TLB entry that contains a given address. Flag should + * be the access permission of this translation operation. We can + * set flag to IOMMU_NONE to mean that we don't need any + * read/write permission checks, like, when for region replay. + */ + IOMMUTLBEntry (*translate)(MemoryRegion *iommu, hwaddr addr, + IOMMUAccessFlags flag); /* Returns minimum supported page size */ uint64_t (*get_min_page_size)(MemoryRegion *iommu); /* Called when IOMMU Notifier flag changed */ @@ -725,11 +731,8 @@ void memory_region_register_iommu_notifier(MemoryRegion *mr, * * @mr: the memory region to observe * @n: the notifier to which to replay iommu mappings - * @is_write: Whether to treat the replay as a translate "write" - * through the iommu */ -void memory_region_iommu_replay(MemoryRegion *mr, IOMMUNotifier *n, - bool is_write); +void memory_region_iommu_replay(MemoryRegion *mr, IOMMUNotifier *n); /** * memory_region_iommu_replay_all: replay existing IOMMU translations diff --git a/include/hw/compat.h b/include/hw/compat.h index 55b176507a..400c64b318 100644 --- a/include/hw/compat.h +++ b/include/hw/compat.h @@ -6,6 +6,14 @@ .driver = "pci-bridge",\ .property = "shpc",\ .value = "off",\ + },{\ + .driver = "intel-iommu",\ + .property = "pt",\ + .value = "off",\ + },{\ + .driver = "virtio-net-device",\ + .property = "x-mtu-bypass-backend",\ + .value = "off",\ }, #define HW_COMPAT_2_8 \ diff --git a/include/hw/i386/x86-iommu.h b/include/hw/i386/x86-iommu.h index 361c07cdc6..ef89c0c646 100644 --- a/include/hw/i386/x86-iommu.h +++ b/include/hw/i386/x86-iommu.h @@ -74,6 +74,7 @@ struct X86IOMMUState { SysBusDevice busdev; bool intr_supported; /* Whether vIOMMU supports IR */ bool dt_supported; /* Whether vIOMMU supports DT */ + bool pt_supported; /* Whether vIOMMU supports pass-through */ IommuType type; /* IOMMU type - AMD/Intel */ QLIST_HEAD(, IEC_Notifier) iec_notifiers; /* IEC notify list */ }; diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h index 1c2e970da2..38470b2f0e 100644 --- a/include/hw/pci-host/spapr.h +++ b/include/hw/pci-host/spapr.h @@ -123,6 +123,9 @@ sPAPRPHBState *spapr_pci_find_phb(sPAPRMachineState *spapr, uint64_t buid); PCIDevice *spapr_pci_find_dev(sPAPRMachineState *spapr, uint64_t buid, uint32_t config_addr); +/* PCI release callback. */ +void spapr_phb_remove_pci_device_cb(DeviceState *dev); + /* VFIO EEH hooks */ #ifdef CONFIG_LINUX bool spapr_phb_eeh_available(sPAPRPHBState *sphb); diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index 5802f888c3..98fb78b012 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -32,6 +32,7 @@ struct sPAPRRTCState { int64_t ns_offset; }; +typedef struct sPAPRDIMMState sPAPRDIMMState; typedef struct sPAPRMachineClass sPAPRMachineClass; #define TYPE_SPAPR_MACHINE "spapr-machine" @@ -104,6 +105,11 @@ struct sPAPRMachineState { /* RTAS state */ QTAILQ_HEAD(, sPAPRConfigureConnectorState) ccs_list; + /* Pending DIMM unplug cache. It is populated when a LMB + * unplug starts. It can be regenerated if a migration + * occurs during the unplug process. */ + QTAILQ_HEAD(, sPAPRDIMMState) pending_dimm_unplugs; + /*< public >*/ char *kvm_type; MemoryHotplugState hotplug_memory; @@ -598,7 +604,6 @@ sPAPRTCETable *spapr_tce_find_by_liobn(target_ulong liobn); struct sPAPREventLogEntry { int log_type; - bool exception; void *data; QTAILQ_ENTRY(sPAPREventLogEntry) next; }; @@ -610,6 +615,7 @@ int spapr_h_cas_compose_response(sPAPRMachineState *sm, sPAPROptionVector *ov5_updates); void close_htab_fd(sPAPRMachineState *spapr); void spapr_setup_hpt_and_vrma(sPAPRMachineState *spapr); +void spapr_free_hpt(sPAPRMachineState *spapr); sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn); void spapr_tce_table_enable(sPAPRTCETable *tcet, uint32_t page_shift, uint64_t bus_offset, @@ -636,6 +642,10 @@ void spapr_hotplug_req_remove_by_count_indexed(sPAPRDRConnectorType drc_type, void *spapr_populate_hotplug_cpu_dt(CPUState *cs, int *fdt_offset, sPAPRMachineState *spapr); +/* CPU and LMB DRC release callbacks. */ +void spapr_core_release(DeviceState *dev); +void spapr_lmb_release(DeviceState *dev); + /* rtas-configure-connector state */ struct sPAPRConfigureConnectorState { uint32_t drc_index; diff --git a/include/hw/ppc/spapr_drc.h b/include/hw/ppc/spapr_drc.h index 5524247cdc..813b9ffd60 100644 --- a/include/hw/ppc/spapr_drc.h +++ b/include/hw/ppc/spapr_drc.h @@ -130,8 +130,6 @@ typedef enum { SPAPR_DR_CC_RESPONSE_NOT_CONFIGURABLE = -9003, } sPAPRDRCCResponse; -typedef void (spapr_drc_detach_cb)(DeviceState *d, void *opaque); - typedef struct sPAPRDRConnector { /*< private >*/ DeviceState parent; @@ -158,8 +156,6 @@ typedef struct sPAPRDRConnector { /* device pointer, via link property */ DeviceState *dev; - spapr_drc_detach_cb *detach_cb; - void *detach_cb_opaque; } sPAPRDRConnector; typedef struct sPAPRDRConnectorClass { @@ -188,9 +184,7 @@ typedef struct sPAPRDRConnectorClass { /* QEMU interfaces for managing hotplug operations */ void (*attach)(sPAPRDRConnector *drc, DeviceState *d, void *fdt, int fdt_start_offset, bool coldplug, Error **errp); - void (*detach)(sPAPRDRConnector *drc, DeviceState *d, - spapr_drc_detach_cb *detach_cb, - void *detach_cb_opaque, Error **errp); + void (*detach)(sPAPRDRConnector *drc, DeviceState *d, Error **errp); bool (*release_pending)(sPAPRDRConnector *drc); void (*set_signalled)(sPAPRDRConnector *drc); } sPAPRDRConnectorClass; diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h index 05e6acbb35..a3073f9053 100644 --- a/include/hw/ppc/xics.h +++ b/include/hw/ppc/xics.h @@ -81,7 +81,6 @@ struct ICPState { uint8_t pending_priority; uint8_t mfrr; qemu_irq output; - bool cap_irq_xics_enabled; XICSFabric *xics; }; @@ -206,6 +205,6 @@ void icp_resend(ICPState *ss); typedef struct sPAPRMachineState sPAPRMachineState; int xics_kvm_init(sPAPRMachineState *spapr, Error **errp); -int xics_spapr_init(sPAPRMachineState *spapr, Error **errp); +void xics_spapr_init(sPAPRMachineState *spapr); #endif /* XICS_H */ diff --git a/include/hw/s390x/css-bridge.h b/include/hw/s390x/css-bridge.h index 5a0203be5f..cf0860432a 100644 --- a/include/hw/s390x/css-bridge.h +++ b/include/hw/s390x/css-bridge.h @@ -28,6 +28,7 @@ typedef struct VirtualCssBridge { /* virtual css bus type */ typedef struct VirtualCssBus { BusState parent_obj; + bool squash_mcss; } VirtualCssBus; #define TYPE_VIRTUAL_CSS_BUS "virtual-css-bus" diff --git a/include/hw/s390x/css.h b/include/hw/s390x/css.h index e61fa74d9b..596a2f2ef3 100644 --- a/include/hw/s390x/css.h +++ b/include/hw/s390x/css.h @@ -91,10 +91,29 @@ struct SubchDev { /* transport-provided data: */ int (*ccw_cb) (SubchDev *, CCW1); void (*disable_cb)(SubchDev *); + int (*do_subchannel_work) (SubchDev *, ORB *); SenseId id; void *driver_data; }; +/* + * Identify a device within the channel subsystem. + * Note that this can be used to identify either the subchannel or + * the attached I/O device, as there's always one I/O device per + * subchannel. + */ +typedef struct CssDevId { + uint8_t cssid; + uint8_t ssid; + uint16_t devid; + bool valid; +} CssDevId; + +extern PropertyInfo css_devid_propinfo; + +#define DEFINE_PROP_CSS_DEV_ID(_n, _s, _f) \ + DEFINE_PROP(_n, _s, _f, css_devid_propinfo, CssDevId) + typedef struct IndAddr { hwaddr addr; uint64_t map; @@ -116,8 +135,11 @@ bool css_devno_used(uint8_t cssid, uint8_t ssid, uint16_t devno); void css_subch_assign(uint8_t cssid, uint8_t ssid, uint16_t schid, uint16_t devno, SubchDev *sch); void css_sch_build_virtual_schib(SubchDev *sch, uint8_t chpid, uint8_t type); +int css_sch_build_schib(SubchDev *sch, CssDevId *dev_id); unsigned int css_find_free_chpid(uint8_t cssid); uint16_t css_build_subchannel_id(SubchDev *sch); +void copy_scsw_to_guest(SCSW *dest, const SCSW *src); +void css_inject_io_interrupt(SubchDev *sch); void css_reset(void); void css_reset_sch(SubchDev *sch); void css_queue_crw(uint8_t rsc, uint8_t erc, int chain, uint16_t rsid); @@ -127,6 +149,9 @@ void css_generate_chp_crws(uint8_t cssid, uint8_t chpid); void css_generate_css_crws(uint8_t cssid); void css_clear_sei_pending(void); void css_adapter_interrupt(uint8_t isc); +int s390_ccw_cmd_request(ORB *orb, SCSW *scsw, void *data); +int do_subchannel_work_virtual(SubchDev *sub, ORB *orb); +int do_subchannel_work_passthrough(SubchDev *sub, ORB *orb); typedef enum { CSS_IO_ADAPTER_VIRTIO = 0, @@ -164,23 +189,6 @@ int css_do_rsch(SubchDev *sch); int css_do_rchp(uint8_t cssid, uint8_t chpid); bool css_present(uint8_t cssid); #endif -/* - * Identify a device within the channel subsystem. - * Note that this can be used to identify either the subchannel or - * the attached I/O device, as there's always one I/O device per - * subchannel. - */ -typedef struct CssDevId { - uint8_t cssid; - uint8_t ssid; - uint16_t devid; - bool valid; -} CssDevId; - -extern PropertyInfo css_devid_propinfo; - -#define DEFINE_PROP_CSS_DEV_ID(_n, _s, _f) \ - DEFINE_PROP(_n, _s, _f, css_devid_propinfo, CssDevId) extern PropertyInfo css_devid_ro_propinfo; @@ -190,16 +198,25 @@ extern PropertyInfo css_devid_ro_propinfo; /** * Create a subchannel for the given bus id. * - * If @p bus_id is valid, verify that it uses the virtual channel - * subsystem id and is not already in use, and find a free subchannel - * id for it. If @p bus_id is not valid, find a free subchannel id and - * device number across all subchannel sets. If either of the former - * actions succeed, allocate a subchannel structure, initialise it - * with the bus id, subchannel id and device number, register it with - * the CSS and return it. Otherwise return NULL. + * If @p bus_id is valid, and @p squash_mcss is true, verify that it is + * not already in use in the default css, and find a free devno from the + * default css image for it. + * If @p bus_id is valid, and @p squash_mcss is false, verify that it is + * not already in use, and find a free devno for it. + * If @p bus_id is not valid, and if either @p squash_mcss or @p is_virtual + * is true, find a free subchannel id and device number across all + * subchannel sets from the default css image. + * If @p bus_id is not valid, and if both @p squash_mcss and @p is_virtual + * are false, find a non-full css image and find a free subchannel id and + * device number across all subchannel sets from it. + * + * If either of the former actions succeed, allocate a subchannel structure, + * initialise it with the bus id, subchannel id and device number, register + * it with the CSS and return it. Otherwise return NULL. * * The caller becomes owner of the returned subchannel structure and * is responsible for unregistering and freeing it. */ -SubchDev *css_create_virtual_sch(CssDevId bus_id, Error **errp); +SubchDev *css_create_sch(CssDevId bus_id, bool is_virtual, bool squash_mcss, + Error **errp); #endif diff --git a/include/hw/s390x/s390-ccw.h b/include/hw/s390x/s390-ccw.h new file mode 100644 index 0000000000..9f45cf1347 --- /dev/null +++ b/include/hw/s390x/s390-ccw.h @@ -0,0 +1,39 @@ +/* + * s390 CCW Assignment Support + * + * Copyright 2017 IBM Corp. + * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com> + * Xiao Feng Ren <renxiaof@linux.vnet.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at + * your option) any later version. See the COPYING file in the top-level + * directory. + */ + +#ifndef HW_S390_CCW_H +#define HW_S390_CCW_H + +#include "hw/s390x/ccw-device.h" + +#define TYPE_S390_CCW "s390-ccw" +#define S390_CCW_DEVICE(obj) \ + OBJECT_CHECK(S390CCWDevice, (obj), TYPE_S390_CCW) +#define S390_CCW_DEVICE_CLASS(klass) \ + OBJECT_CLASS_CHECK(S390CCWDeviceClass, (klass), TYPE_S390_CCW) +#define S390_CCW_DEVICE_GET_CLASS(obj) \ + OBJECT_GET_CLASS(S390CCWDeviceClass, (obj), TYPE_S390_CCW) + +typedef struct S390CCWDevice { + CcwDevice parent_obj; + CssDevId hostid; + char *mdevid; +} S390CCWDevice; + +typedef struct S390CCWDeviceClass { + CCWDeviceClass parent_class; + void (*realize)(S390CCWDevice *dev, char *sysfsdev, Error **errp); + void (*unrealize)(S390CCWDevice *dev, Error **errp); + int (*handle_request) (ORB *, SCSW *, void *); +} S390CCWDeviceClass; + +#endif diff --git a/include/hw/s390x/s390-virtio-ccw.h b/include/hw/s390x/s390-virtio-ccw.h index 7b8a3e4d74..3027555f6d 100644 --- a/include/hw/s390x/s390-virtio-ccw.h +++ b/include/hw/s390x/s390-virtio-ccw.h @@ -29,6 +29,7 @@ typedef struct S390CcwMachineState { bool aes_key_wrap; bool dea_key_wrap; uint8_t loadparm[8]; + bool s390_squash_mcss; } S390CcwMachineState; typedef struct S390CcwMachineClass { diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index c582de18c9..9521013d52 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -45,6 +45,7 @@ enum { VFIO_DEVICE_TYPE_PCI = 0, VFIO_DEVICE_TYPE_PLATFORM = 1, + VFIO_DEVICE_TYPE_CCW = 2, }; typedef struct VFIOMmap { diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h index 1eec9a2da3..602b4868d4 100644 --- a/include/hw/virtio/virtio-net.h +++ b/include/hw/virtio/virtio-net.h @@ -97,6 +97,7 @@ typedef struct VirtIONet { QEMUTimer *announce_timer; int announce_counter; bool needs_vnet_hdr_swap; + bool mtu_bypass_backend; } VirtIONet; void virtio_net_set_netclient_name(VirtIONet *n, const char *name, diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index 7b6edbafd7..80c45c321e 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -79,6 +79,7 @@ struct VirtIODevice uint16_t queue_sel; uint64_t guest_features; uint64_t host_features; + uint64_t backend_features; size_t config_len; void *config; uint16_t config_vector; diff --git a/include/standard-headers/asm-x86/hyperv.h b/include/standard-headers/asm-x86/hyperv.h index eca9a2ca22..d0c6e0a079 100644 --- a/include/standard-headers/asm-x86/hyperv.h +++ b/include/standard-headers/asm-x86/hyperv.h @@ -124,7 +124,7 @@ * Recommend using hypercall for address space switches rather * than MOV to CR3 instruction */ -#define HV_X64_MWAIT_RECOMMENDED (1 << 0) +#define HV_X64_AS_SWITCH_RECOMMENDED (1 << 0) /* Recommend using hypercall for local TLB flushes rather * than INVLPG or MOV to CR3 instructions */ #define HV_X64_LOCAL_TLB_FLUSH_RECOMMENDED (1 << 1) @@ -148,6 +148,11 @@ #define HV_X64_RELAXED_TIMING_RECOMMENDED (1 << 5) /* + * Virtual APIC support + */ +#define HV_X64_DEPRECATING_AEOI_RECOMMENDED (1 << 9) + +/* * Crash notification flag. */ #define HV_CRASH_CTL_CRASH_NOTIFY (1ULL << 63) diff --git a/include/standard-headers/linux/input-event-codes.h b/include/standard-headers/linux/input-event-codes.h index c8b3338375..29d463af37 100644 --- a/include/standard-headers/linux/input-event-codes.h +++ b/include/standard-headers/linux/input-event-codes.h @@ -641,6 +641,7 @@ * e.g. teletext or data broadcast application (MHEG, MHP, HbbTV, etc.) */ #define KEY_DATA 0x277 +#define KEY_ONSCREEN_KEYBOARD 0x278 #define BTN_TRIGGER_HAPPY 0x2c0 #define BTN_TRIGGER_HAPPY1 0x2c0 diff --git a/include/standard-headers/linux/input.h b/include/standard-headers/linux/input.h index b472b8530c..666e201ddb 100644 --- a/include/standard-headers/linux/input.h +++ b/include/standard-headers/linux/input.h @@ -58,9 +58,14 @@ struct input_id { * Note that input core does not clamp reported values to the * [minimum, maximum] limits, such task is left to userspace. * - * Resolution for main axes (ABS_X, ABS_Y, ABS_Z) is reported in - * units per millimeter (units/mm), resolution for rotational axes - * (ABS_RX, ABS_RY, ABS_RZ) is reported in units per radian. + * The default resolution for main axes (ABS_X, ABS_Y, ABS_Z) + * is reported in units per millimeter (units/mm), resolution + * for rotational axes (ABS_RX, ABS_RY, ABS_RZ) is reported + * in units per radian. + * When INPUT_PROP_ACCELEROMETER is set the resolution changes. + * The main axes (ABS_X, ABS_Y, ABS_Z) are then reported in + * in units per g (units/g) and in units per degree per second + * (units/deg/s) for rotational axes (ABS_RX, ABS_RY, ABS_RZ). */ struct input_absinfo { int32_t value; diff --git a/include/standard-headers/linux/pci_regs.h b/include/standard-headers/linux/pci_regs.h index 634c9c44ed..d56bb00510 100644 --- a/include/standard-headers/linux/pci_regs.h +++ b/include/standard-headers/linux/pci_regs.h @@ -114,7 +114,7 @@ #define PCI_SUBSYSTEM_ID 0x2e #define PCI_ROM_ADDRESS 0x30 /* Bits 31..11 are address, 10..1 reserved */ #define PCI_ROM_ADDRESS_ENABLE 0x01 -#define PCI_ROM_ADDRESS_MASK (~0x7ffUL) +#define PCI_ROM_ADDRESS_MASK (~0x7ffU) #define PCI_CAPABILITY_LIST 0x34 /* Offset of first capability list entry */ @@ -630,6 +630,7 @@ #define PCI_EXP_DEVCTL2_COMP_TIMEOUT 0x000f /* Completion Timeout Value */ #define PCI_EXP_DEVCTL2_ARI 0x0020 /* Alternative Routing-ID */ #define PCI_EXP_DEVCTL2_ATOMIC_REQ 0x0040 /* Set Atomic requests */ +#define PCI_EXP_DEVCTL2_ATOMIC_EGRESS_BLOCK 0x0080 /* Block atomic egress */ #define PCI_EXP_DEVCTL2_IDO_REQ_EN 0x0100 /* Allow IDO for requests */ #define PCI_EXP_DEVCTL2_IDO_CMP_EN 0x0200 /* Allow IDO for completions */ #define PCI_EXP_DEVCTL2_LTR_EN 0x0400 /* Enable LTR mechanism */ diff --git a/include/sysemu/os-posix.h b/include/sysemu/os-posix.h index 900bdcb45a..629c8c648b 100644 --- a/include/sysemu/os-posix.h +++ b/include/sysemu/os-posix.h @@ -51,17 +51,6 @@ int os_mlock(void); typedef struct timeval qemu_timeval; #define qemu_gettimeofday(tp) gettimeofday(tp, NULL) -#ifndef CONFIG_UTIMENSAT -#ifndef UTIME_NOW -# define UTIME_NOW ((1l << 30) - 1l) -#endif -#ifndef UTIME_OMIT -# define UTIME_OMIT ((1l << 30) - 2l) -#endif -#endif -typedef struct timespec qemu_timespec; -int qemu_utimens(const char *path, const qemu_timespec *times); - bool is_daemonized(void); /** diff --git a/include/sysemu/replay.h b/include/sysemu/replay.h index f1c0712795..fa14d0ec0b 100644 --- a/include/sysemu/replay.h +++ b/include/sysemu/replay.h @@ -13,6 +13,7 @@ */ #include "qapi-types.h" +#include "sysemu.h" /* replay clock kinds */ enum ReplayClockKind { @@ -98,7 +99,7 @@ int64_t replay_read_clock(ReplayClockKind kind); /* Events */ /*! Called when qemu shutdown is requested. */ -void replay_shutdown_request(void); +void replay_shutdown_request(ShutdownCause cause); /*! Should be called at check points in the execution. These check points are skipped, if they were not met. Saves checkpoint in the SAVE mode and validates in the PLAY mode. diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h index ed8fe3bf34..723c8dcb1a 100644 --- a/include/sysemu/sysemu.h +++ b/include/sysemu/sysemu.h @@ -33,8 +33,26 @@ VMChangeStateEntry *qemu_add_vm_change_state_handler(VMChangeStateHandler *cb, void qemu_del_vm_change_state_handler(VMChangeStateEntry *e); void vm_state_notify(int running, RunState state); -#define VMRESET_SILENT false -#define VMRESET_REPORT true +/* Enumeration of various causes for shutdown. */ +typedef enum ShutdownCause { + SHUTDOWN_CAUSE_NONE, /* No shutdown request pending */ + SHUTDOWN_CAUSE_HOST_ERROR, /* An error prevents further use of guest */ + SHUTDOWN_CAUSE_HOST_QMP, /* Reaction to a QMP command, like 'quit' */ + SHUTDOWN_CAUSE_HOST_SIGNAL, /* Reaction to a signal, such as SIGINT */ + SHUTDOWN_CAUSE_HOST_UI, /* Reaction to UI event, like window close */ + SHUTDOWN_CAUSE_GUEST_SHUTDOWN,/* Guest shutdown/suspend request, via + ACPI or other hardware-specific means */ + SHUTDOWN_CAUSE_GUEST_RESET, /* Guest reset request, and command line + turns that into a shutdown */ + SHUTDOWN_CAUSE_GUEST_PANIC, /* Guest panicked, and command line turns + that into a shutdown */ + SHUTDOWN_CAUSE__MAX, +} ShutdownCause; + +static inline bool shutdown_caused_by_guest(ShutdownCause cause) +{ + return cause >= SHUTDOWN_CAUSE_GUEST_SHUTDOWN; +} void vm_start(void); int vm_prepare_start(void); @@ -49,23 +67,23 @@ typedef enum WakeupReason { QEMU_WAKEUP_REASON_OTHER, } WakeupReason; -void qemu_system_reset_request(void); +void qemu_system_reset_request(ShutdownCause reason); void qemu_system_suspend_request(void); void qemu_register_suspend_notifier(Notifier *notifier); void qemu_system_wakeup_request(WakeupReason reason); void qemu_system_wakeup_enable(WakeupReason reason, bool enabled); void qemu_register_wakeup_notifier(Notifier *notifier); -void qemu_system_shutdown_request(void); +void qemu_system_shutdown_request(ShutdownCause reason); void qemu_system_powerdown_request(void); void qemu_register_powerdown_notifier(Notifier *notifier); void qemu_system_debug_request(void); void qemu_system_vmstop_request(RunState reason); void qemu_system_vmstop_request_prepare(void); bool qemu_vmstop_requested(RunState *r); -int qemu_shutdown_requested_get(void); -int qemu_reset_requested_get(void); +ShutdownCause qemu_shutdown_requested_get(void); +ShutdownCause qemu_reset_requested_get(void); void qemu_system_killed(int signal, pid_t pid); -void qemu_system_reset(bool report); +void qemu_system_reset(ShutdownCause reason); void qemu_system_guest_panicked(GuestPanicInformation *info); void qemu_add_exit_notifier(Notifier *notify); @@ -79,53 +97,6 @@ int load_vmstate(const char *name, Error **errp); void qemu_announce_self(void); -/* Subcommands for QEMU_VM_COMMAND */ -enum qemu_vm_cmd { - MIG_CMD_INVALID = 0, /* Must be 0 */ - MIG_CMD_OPEN_RETURN_PATH, /* Tell the dest to open the Return path */ - MIG_CMD_PING, /* Request a PONG on the RP */ - - MIG_CMD_POSTCOPY_ADVISE, /* Prior to any page transfers, just - warn we might want to do PC */ - MIG_CMD_POSTCOPY_LISTEN, /* Start listening for incoming - pages as it's running. */ - MIG_CMD_POSTCOPY_RUN, /* Start execution */ - - MIG_CMD_POSTCOPY_RAM_DISCARD, /* A list of pages to discard that - were previously sent during - precopy but are dirty. */ - MIG_CMD_PACKAGED, /* Send a wrapped stream within this stream */ - MIG_CMD_MAX -}; - -#define MAX_VM_CMD_PACKAGED_SIZE (1ul << 24) - -bool qemu_savevm_state_blocked(Error **errp); -void qemu_savevm_state_begin(QEMUFile *f); -void qemu_savevm_state_header(QEMUFile *f); -int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy); -void qemu_savevm_state_cleanup(void); -void qemu_savevm_state_complete_postcopy(QEMUFile *f); -void qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only); -void qemu_savevm_state_pending(QEMUFile *f, uint64_t max_size, - uint64_t *res_non_postcopiable, - uint64_t *res_postcopiable); -void qemu_savevm_command_send(QEMUFile *f, enum qemu_vm_cmd command, - uint16_t len, uint8_t *data); -void qemu_savevm_send_ping(QEMUFile *f, uint32_t value); -void qemu_savevm_send_open_return_path(QEMUFile *f); -int qemu_savevm_send_packaged(QEMUFile *f, const uint8_t *buf, size_t len); -void qemu_savevm_send_postcopy_advise(QEMUFile *f); -void qemu_savevm_send_postcopy_listen(QEMUFile *f); -void qemu_savevm_send_postcopy_run(QEMUFile *f); - -void qemu_savevm_send_postcopy_ram_discard(QEMUFile *f, const char *name, - uint16_t len, - uint64_t *start_list, - uint64_t *length_list); - -int qemu_loadvm_state(QEMUFile *f); - extern int autostart; typedef enum { @@ -2052,7 +2052,7 @@ int kvm_cpu_exec(CPUState *cpu) break; case KVM_EXIT_SHUTDOWN: DPRINTF("shutdown\n"); - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); ret = EXCP_INTERRUPT; break; case KVM_EXIT_UNKNOWN: @@ -2066,11 +2066,11 @@ int kvm_cpu_exec(CPUState *cpu) case KVM_EXIT_SYSTEM_EVENT: switch (run->system_event.type) { case KVM_SYSTEM_EVENT_SHUTDOWN: - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); ret = EXCP_INTERRUPT; break; case KVM_SYSTEM_EVENT_RESET: - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); ret = EXCP_INTERRUPT; break; case KVM_SYSTEM_EVENT_CRASH: diff --git a/linux-headers/asm-arm/kvm.h b/linux-headers/asm-arm/kvm.h index 1101d55d2f..7258a00225 100644 --- a/linux-headers/asm-arm/kvm.h +++ b/linux-headers/asm-arm/kvm.h @@ -27,6 +27,8 @@ #define __KVM_HAVE_IRQ_LINE #define __KVM_HAVE_READONLY_MEM +#define KVM_COALESCED_MMIO_PAGE_OFFSET 1 + #define KVM_REG_SIZE(id) \ (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) @@ -114,6 +116,8 @@ struct kvm_debug_exit_arch { }; struct kvm_sync_regs { + /* Used with KVM_CAP_ARM_USER_IRQ */ + __u64 device_irq_level; }; struct kvm_arch_memory_slot { @@ -192,13 +196,17 @@ struct kvm_arch_memory_slot { #define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5 #define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6 #define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7 +#define KVM_DEV_ARM_VGIC_GRP_ITS_REGS 8 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \ (0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT) #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff #define VGIC_LEVEL_INFO_LINE_LEVEL 0 -#define KVM_DEV_ARM_VGIC_CTRL_INIT 0 +#define KVM_DEV_ARM_VGIC_CTRL_INIT 0 +#define KVM_DEV_ARM_ITS_SAVE_TABLES 1 +#define KVM_DEV_ARM_ITS_RESTORE_TABLES 2 +#define KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES 3 /* KVM_IRQ_LINE irq field index values */ #define KVM_ARM_IRQ_TYPE_SHIFT 24 diff --git a/linux-headers/asm-arm/unistd-common.h b/linux-headers/asm-arm/unistd-common.h index 13a74afd02..8d5ceaee1a 100644 --- a/linux-headers/asm-arm/unistd-common.h +++ b/linux-headers/asm-arm/unistd-common.h @@ -353,5 +353,6 @@ #define __NR_pkey_mprotect (__NR_SYSCALL_BASE + 394) #define __NR_pkey_alloc (__NR_SYSCALL_BASE + 395) #define __NR_pkey_free (__NR_SYSCALL_BASE + 396) +#define __NR_statx (__NR_SYSCALL_BASE + 397) #endif /* _ASM_ARM_UNISTD_COMMON_H */ diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h index 651ec30040..31bb1dd924 100644 --- a/linux-headers/asm-arm64/kvm.h +++ b/linux-headers/asm-arm64/kvm.h @@ -39,6 +39,8 @@ #define __KVM_HAVE_IRQ_LINE #define __KVM_HAVE_READONLY_MEM +#define KVM_COALESCED_MMIO_PAGE_OFFSET 1 + #define KVM_REG_SIZE(id) \ (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) @@ -143,6 +145,8 @@ struct kvm_debug_exit_arch { #define KVM_GUESTDBG_USE_HW (1 << 17) struct kvm_sync_regs { + /* Used with KVM_CAP_ARM_USER_IRQ */ + __u64 device_irq_level; }; struct kvm_arch_memory_slot { @@ -212,13 +216,17 @@ struct kvm_arch_memory_slot { #define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5 #define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6 #define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7 +#define KVM_DEV_ARM_VGIC_GRP_ITS_REGS 8 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \ (0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT) #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff #define VGIC_LEVEL_INFO_LINE_LEVEL 0 -#define KVM_DEV_ARM_VGIC_CTRL_INIT 0 +#define KVM_DEV_ARM_VGIC_CTRL_INIT 0 +#define KVM_DEV_ARM_ITS_SAVE_TABLES 1 +#define KVM_DEV_ARM_ITS_RESTORE_TABLES 2 +#define KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES 3 /* Device Control API on vcpu fd */ #define KVM_ARM_VCPU_PMU_V3_CTRL 0 diff --git a/linux-headers/asm-powerpc/kvm.h b/linux-headers/asm-powerpc/kvm.h index 4edbe4bb0e..07fbeb9278 100644 --- a/linux-headers/asm-powerpc/kvm.h +++ b/linux-headers/asm-powerpc/kvm.h @@ -29,6 +29,9 @@ #define __KVM_HAVE_IRQ_LINE #define __KVM_HAVE_GUEST_DEBUG +/* Not always available, but if it is, this is the correct offset. */ +#define KVM_COALESCED_MMIO_PAGE_OFFSET 1 + struct kvm_regs { __u64 pc; __u64 cr; diff --git a/linux-headers/asm-powerpc/unistd.h b/linux-headers/asm-powerpc/unistd.h index 598043c7b6..a1786340e9 100644 --- a/linux-headers/asm-powerpc/unistd.h +++ b/linux-headers/asm-powerpc/unistd.h @@ -393,5 +393,6 @@ #define __NR_preadv2 380 #define __NR_pwritev2 381 #define __NR_kexec_file_load 382 +#define __NR_statx 383 #endif /* _ASM_POWERPC_UNISTD_H_ */ diff --git a/linux-headers/asm-s390/kvm.h b/linux-headers/asm-s390/kvm.h index ac63ca630b..243f195776 100644 --- a/linux-headers/asm-s390/kvm.h +++ b/linux-headers/asm-s390/kvm.h @@ -26,6 +26,8 @@ #define KVM_DEV_FLIC_ADAPTER_REGISTER 6 #define KVM_DEV_FLIC_ADAPTER_MODIFY 7 #define KVM_DEV_FLIC_CLEAR_IO_IRQ 8 +#define KVM_DEV_FLIC_AISM 9 +#define KVM_DEV_FLIC_AIRQ_INJECT 10 /* * We can have up to 4*64k pending subchannels + 8 adapter interrupts, * as well as up to ASYNC_PF_PER_VCPU*KVM_MAX_VCPUS pfault done interrupts. @@ -41,7 +43,14 @@ struct kvm_s390_io_adapter { __u8 isc; __u8 maskable; __u8 swap; - __u8 pad; + __u8 flags; +}; + +#define KVM_S390_ADAPTER_SUPPRESSIBLE 0x01 + +struct kvm_s390_ais_req { + __u8 isc; + __u16 mode; }; #define KVM_S390_IO_ADAPTER_MASK 1 @@ -110,6 +119,7 @@ struct kvm_s390_vm_cpu_machine { #define KVM_S390_VM_CPU_FEAT_CMMA 10 #define KVM_S390_VM_CPU_FEAT_PFMFI 11 #define KVM_S390_VM_CPU_FEAT_SIGPIF 12 +#define KVM_S390_VM_CPU_FEAT_KSS 13 struct kvm_s390_vm_cpu_feat { __u64 feat[16]; }; @@ -131,7 +141,8 @@ struct kvm_s390_vm_cpu_subfunc { __u8 kmo[16]; /* with MSA4 */ __u8 pcc[16]; /* with MSA4 */ __u8 ppno[16]; /* with MSA5 */ - __u8 reserved[1824]; + __u8 kma[16]; /* with MSA8 */ + __u8 reserved[1808]; }; /* kvm attributes for crypto */ @@ -197,6 +208,10 @@ struct kvm_guest_debug_arch { #define KVM_SYNC_VRS (1UL << 6) #define KVM_SYNC_RICCB (1UL << 7) #define KVM_SYNC_FPRS (1UL << 8) +#define KVM_SYNC_GSCB (1UL << 9) +/* length and alignment of the sdnx as a power of two */ +#define SDNXC 8 +#define SDNXL (1UL << SDNXC) /* definition of registers in kvm_run */ struct kvm_sync_regs { __u64 prefix; /* prefix register */ @@ -217,8 +232,16 @@ struct kvm_sync_regs { }; __u8 reserved[512]; /* for future vector expansion */ __u32 fpc; /* valid on KVM_SYNC_VRS or KVM_SYNC_FPRS */ - __u8 padding[52]; /* riccb needs to be 64byte aligned */ + __u8 padding1[52]; /* riccb needs to be 64byte aligned */ __u8 riccb[64]; /* runtime instrumentation controls block */ + __u8 padding2[192]; /* sdnx needs to be 256byte aligned */ + union { + __u8 sdnx[SDNXL]; /* state description annex */ + struct { + __u64 reserved1[2]; + __u64 gscb[4]; + }; + }; }; #define KVM_REG_S390_TODPR (KVM_REG_S390 | KVM_REG_SIZE_U32 | 0x1) diff --git a/linux-headers/asm-s390/unistd.h b/linux-headers/asm-s390/unistd.h index 8a404fd3a1..65e7e59dbb 100644 --- a/linux-headers/asm-s390/unistd.h +++ b/linux-headers/asm-s390/unistd.h @@ -313,7 +313,9 @@ #define __NR_copy_file_range 375 #define __NR_preadv2 376 #define __NR_pwritev2 377 -#define NR_syscalls 378 +#define __NR_s390_guarded_storage 378 +#define __NR_statx 379 +#define NR_syscalls 380 /* * There are some system calls that are not present on 64 bit, some diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h index 739c0c5940..c2824d02ba 100644 --- a/linux-headers/asm-x86/kvm.h +++ b/linux-headers/asm-x86/kvm.h @@ -9,6 +9,9 @@ #include <linux/types.h> #include <linux/ioctl.h> +#define KVM_PIO_PAGE_OFFSET 1 +#define KVM_COALESCED_MMIO_PAGE_OFFSET 2 + #define DE_VECTOR 0 #define DB_VECTOR 1 #define BP_VECTOR 3 diff --git a/linux-headers/asm-x86/unistd_32.h b/linux-headers/asm-x86/unistd_32.h index d45ea28e15..8a206df454 100644 --- a/linux-headers/asm-x86/unistd_32.h +++ b/linux-headers/asm-x86/unistd_32.h @@ -380,5 +380,7 @@ #define __NR_pkey_mprotect 380 #define __NR_pkey_alloc 381 #define __NR_pkey_free 382 +#define __NR_statx 383 +#define __NR_arch_prctl 384 #endif /* _ASM_X86_UNISTD_32_H */ diff --git a/linux-headers/asm-x86/unistd_64.h b/linux-headers/asm-x86/unistd_64.h index e22db9171e..336c2e4aaa 100644 --- a/linux-headers/asm-x86/unistd_64.h +++ b/linux-headers/asm-x86/unistd_64.h @@ -333,5 +333,6 @@ #define __NR_pkey_mprotect 329 #define __NR_pkey_alloc 330 #define __NR_pkey_free 331 +#define __NR_statx 332 #endif /* _ASM_X86_UNISTD_64_H */ diff --git a/linux-headers/asm-x86/unistd_x32.h b/linux-headers/asm-x86/unistd_x32.h index 84e58b202d..cb98a52998 100644 --- a/linux-headers/asm-x86/unistd_x32.h +++ b/linux-headers/asm-x86/unistd_x32.h @@ -286,6 +286,7 @@ #define __NR_pkey_mprotect (__X32_SYSCALL_BIT + 329) #define __NR_pkey_alloc (__X32_SYSCALL_BIT + 330) #define __NR_pkey_free (__X32_SYSCALL_BIT + 331) +#define __NR_statx (__X32_SYSCALL_BIT + 332) #define __NR_rt_sigaction (__X32_SYSCALL_BIT + 512) #define __NR_rt_sigreturn (__X32_SYSCALL_BIT + 513) #define __NR_ioctl (__X32_SYSCALL_BIT + 514) diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h index 4e082a81b4..d2892da172 100644 --- a/linux-headers/linux/kvm.h +++ b/linux-headers/linux/kvm.h @@ -702,6 +702,10 @@ struct kvm_ppc_resize_hpt { #define KVM_VM_PPC_HV 1 #define KVM_VM_PPC_PR 2 +/* on MIPS, 0 forces trap & emulate, 1 forces VZ ASE */ +#define KVM_VM_MIPS_TE 0 +#define KVM_VM_MIPS_VZ 1 + #define KVM_S390_SIE_PAGE_OFFSET 1 /* @@ -883,6 +887,14 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_PPC_MMU_RADIX 134 #define KVM_CAP_PPC_MMU_HASH_V3 135 #define KVM_CAP_IMMEDIATE_EXIT 136 +#define KVM_CAP_MIPS_VZ 137 +#define KVM_CAP_MIPS_TE 138 +#define KVM_CAP_MIPS_64BIT 139 +#define KVM_CAP_S390_GS 140 +#define KVM_CAP_S390_AIS 141 +#define KVM_CAP_SPAPR_TCE_VFIO 142 +#define KVM_CAP_X86_GUEST_MWAIT 143 +#define KVM_CAP_ARM_USER_IRQ 144 #ifdef KVM_CAP_IRQ_ROUTING @@ -1087,6 +1099,7 @@ struct kvm_device_attr { #define KVM_DEV_VFIO_GROUP 1 #define KVM_DEV_VFIO_GROUP_ADD 1 #define KVM_DEV_VFIO_GROUP_DEL 2 +#define KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE 3 enum kvm_device_type { KVM_DEV_TYPE_FSL_MPIC_20 = 1, @@ -1108,6 +1121,11 @@ enum kvm_device_type { KVM_DEV_TYPE_MAX, }; +struct kvm_vfio_spapr_tce { + __s32 groupfd; + __s32 tablefd; +}; + /* * ioctls for VM fds */ @@ -1354,4 +1372,11 @@ struct kvm_assigned_msix_entry { #define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0) #define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1) +/* Available with KVM_CAP_ARM_USER_IRQ */ + +/* Bits for run->s.regs.device_irq_level */ +#define KVM_ARM_DEV_EL1_VTIMER (1 << 0) +#define KVM_ARM_DEV_EL1_PTIMER (1 << 1) +#define KVM_ARM_DEV_PMU (1 << 2) + #endif /* __LINUX_KVM_H */ diff --git a/linux-headers/linux/userfaultfd.h b/linux-headers/linux/userfaultfd.h index 2ed5dc3775..9701772497 100644 --- a/linux-headers/linux/userfaultfd.h +++ b/linux-headers/linux/userfaultfd.h @@ -20,7 +20,8 @@ #define UFFD_API ((__u64)0xAA) #define UFFD_API_FEATURES (UFFD_FEATURE_EVENT_FORK | \ UFFD_FEATURE_EVENT_REMAP | \ - UFFD_FEATURE_EVENT_MADVDONTNEED | \ + UFFD_FEATURE_EVENT_REMOVE | \ + UFFD_FEATURE_EVENT_UNMAP | \ UFFD_FEATURE_MISSING_HUGETLBFS | \ UFFD_FEATURE_MISSING_SHMEM) #define UFFD_API_IOCTLS \ @@ -92,7 +93,7 @@ struct uffd_msg { struct { __u64 start; __u64 end; - } madv_dn; + } remove; struct { /* unused reserved fields */ @@ -109,7 +110,8 @@ struct uffd_msg { #define UFFD_EVENT_PAGEFAULT 0x12 #define UFFD_EVENT_FORK 0x13 #define UFFD_EVENT_REMAP 0x14 -#define UFFD_EVENT_MADVDONTNEED 0x15 +#define UFFD_EVENT_REMOVE 0x15 +#define UFFD_EVENT_UNMAP 0x16 /* flags for UFFD_EVENT_PAGEFAULT */ #define UFFD_PAGEFAULT_FLAG_WRITE (1<<0) /* If this was a write fault */ @@ -155,9 +157,10 @@ struct uffdio_api { #define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0) #define UFFD_FEATURE_EVENT_FORK (1<<1) #define UFFD_FEATURE_EVENT_REMAP (1<<2) -#define UFFD_FEATURE_EVENT_MADVDONTNEED (1<<3) +#define UFFD_FEATURE_EVENT_REMOVE (1<<3) #define UFFD_FEATURE_MISSING_HUGETLBFS (1<<4) #define UFFD_FEATURE_MISSING_SHMEM (1<<5) +#define UFFD_FEATURE_EVENT_UNMAP (1<<6) __u64 features; __u64 ioctls; diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h index 531cb2eda9..4e7ab4c52a 100644 --- a/linux-headers/linux/vfio.h +++ b/linux-headers/linux/vfio.h @@ -198,6 +198,7 @@ struct vfio_device_info { #define VFIO_DEVICE_FLAGS_PCI (1 << 1) /* vfio-pci device */ #define VFIO_DEVICE_FLAGS_PLATFORM (1 << 2) /* vfio-platform device */ #define VFIO_DEVICE_FLAGS_AMBA (1 << 3) /* vfio-amba device */ +#define VFIO_DEVICE_FLAGS_CCW (1 << 4) /* vfio-ccw device */ __u32 num_regions; /* Max region index + 1 */ __u32 num_irqs; /* Max IRQ index + 1 */ }; @@ -212,6 +213,7 @@ struct vfio_device_info { #define VFIO_DEVICE_API_PCI_STRING "vfio-pci" #define VFIO_DEVICE_API_PLATFORM_STRING "vfio-platform" #define VFIO_DEVICE_API_AMBA_STRING "vfio-amba" +#define VFIO_DEVICE_API_CCW_STRING "vfio-ccw" /** * VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8, @@ -446,6 +448,22 @@ enum { VFIO_PCI_NUM_IRQS }; +/* + * The vfio-ccw bus driver makes use of the following fixed region and + * IRQ index mapping. Unimplemented regions return a size of zero. + * Unimplemented IRQ types return a count of zero. + */ + +enum { + VFIO_CCW_CONFIG_REGION_INDEX, + VFIO_CCW_NUM_REGIONS +}; + +enum { + VFIO_CCW_IO_IRQ_INDEX, + VFIO_CCW_NUM_IRQS +}; + /** * VFIO_DEVICE_GET_PCI_HOT_RESET_INFO - _IORW(VFIO_TYPE, VFIO_BASE + 12, * struct vfio_pci_hot_reset_info) diff --git a/linux-headers/linux/vfio_ccw.h b/linux-headers/linux/vfio_ccw.h new file mode 100644 index 0000000000..3a565511ab --- /dev/null +++ b/linux-headers/linux/vfio_ccw.h @@ -0,0 +1,24 @@ +/* + * Interfaces for vfio-ccw + * + * Copyright IBM Corp. 2017 + * + * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com> + */ + +#ifndef _VFIO_CCW_H_ +#define _VFIO_CCW_H_ + +#include <linux/types.h> + +struct ccw_io_region { +#define ORB_AREA_SIZE 12 + __u8 orb_area[ORB_AREA_SIZE]; +#define SCSW_AREA_SIZE 12 + __u8 scsw_area[SCSW_AREA_SIZE]; +#define IRB_AREA_SIZE 96 + __u8 irb_area[IRB_AREA_SIZE]; + __u32 ret_code; +} __attribute__((packed)); + +#endif @@ -1620,8 +1620,7 @@ uint64_t memory_region_iommu_get_min_page_size(MemoryRegion *mr) return TARGET_PAGE_SIZE; } -void memory_region_iommu_replay(MemoryRegion *mr, IOMMUNotifier *n, - bool is_write) +void memory_region_iommu_replay(MemoryRegion *mr, IOMMUNotifier *n) { hwaddr addr, granularity; IOMMUTLBEntry iotlb; @@ -1635,7 +1634,7 @@ void memory_region_iommu_replay(MemoryRegion *mr, IOMMUNotifier *n, granularity = memory_region_iommu_get_min_page_size(mr); for (addr = 0; addr < memory_region_size(mr); addr += granularity) { - iotlb = mr->iommu_ops->translate(mr, addr, is_write); + iotlb = mr->iommu_ops->translate(mr, addr, IOMMU_NONE); if (iotlb.perm != IOMMU_NONE) { n->notify(n, &iotlb); } @@ -1653,7 +1652,7 @@ void memory_region_iommu_replay_all(MemoryRegion *mr) IOMMUNotifier *notifier; IOMMU_NOTIFIER_FOREACH(notifier, mr) { - memory_region_iommu_replay(mr, notifier, false); + memory_region_iommu_replay(mr, notifier); } } diff --git a/migration/colo.c b/migration/colo.c index 929b31c50c..3dd1390573 100644 --- a/migration/colo.c +++ b/migration/colo.c @@ -16,6 +16,7 @@ #include "qemu-file-channel.h" #include "migration/migration.h" #include "migration/qemu-file.h" +#include "savevm.h" #include "migration/colo.h" #include "migration/block.h" #include "io/channel-buffer.h" @@ -626,7 +627,7 @@ void *colo_process_incoming_thread(void *opaque) } qemu_mutex_lock_iothread(); - qemu_system_reset(VMRESET_SILENT); + qemu_system_reset(SHUTDOWN_CAUSE_NONE); vmstate_loading = true; if (qemu_loadvm_state(fb) < 0) { error_report("COLO: loadvm failed"); diff --git a/migration/migration.c b/migration/migration.c index ad29e53400..7087d1abbb 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -19,6 +19,7 @@ #include "qemu/main-loop.h" #include "migration/blocker.h" #include "migration/migration.h" +#include "savevm.h" #include "qemu-file-channel.h" #include "migration/qemu-file.h" #include "migration/vmstate.h" diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index a37620dac6..3f9ae1bff2 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -22,6 +22,7 @@ #include "exec/target_page.h" #include "migration/migration.h" #include "migration/qemu-file.h" +#include "savevm.h" #include "postcopy-ram.h" #include "sysemu/sysemu.h" #include "sysemu/balloon.h" diff --git a/migration/ram.c b/migration/ram.c index c07a9c08d9..26e03a5dfa 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -673,10 +673,6 @@ static void migration_bitmap_sync(RAMState *rs) rs->bitmap_sync_count++; - if (!rs->bytes_xfer_prev) { - rs->bytes_xfer_prev = ram_bytes_transferred(); - } - if (!rs->time_last_bitmap_sync) { rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); } @@ -698,23 +694,25 @@ static void migration_bitmap_sync(RAMState *rs) /* more than 1 second = 1000 millisecons */ if (end_time > rs->time_last_bitmap_sync + 1000) { + /* calculate period counters */ + rs->dirty_pages_rate = rs->num_dirty_pages_period * 1000 + / (end_time - rs->time_last_bitmap_sync); + bytes_xfer_now = ram_bytes_transferred(); + if (migrate_auto_converge()) { /* The following detection logic can be refined later. For now: Check to see if the dirtied bytes is 50% more than the approx. amount of bytes that just got transferred since the last time we were in this routine. If that happens twice, start or increase throttling */ - bytes_xfer_now = ram_bytes_transferred(); - if (rs->dirty_pages_rate && - (rs->num_dirty_pages_period * TARGET_PAGE_SIZE > + if ((rs->num_dirty_pages_period * TARGET_PAGE_SIZE > (bytes_xfer_now - rs->bytes_xfer_prev) / 2) && - (rs->dirty_rate_high_cnt++ >= 2)) { + (++rs->dirty_rate_high_cnt >= 2)) { trace_migration_throttle(); rs->dirty_rate_high_cnt = 0; mig_throttle_guest_down(); - } - rs->bytes_xfer_prev = bytes_xfer_now; + } } if (migrate_use_xbzrle()) { @@ -727,10 +725,11 @@ static void migration_bitmap_sync(RAMState *rs) rs->iterations_prev = rs->iterations; rs->xbzrle_cache_miss_prev = rs->xbzrle_cache_miss; } - rs->dirty_pages_rate = rs->num_dirty_pages_period * 1000 - / (end_time - rs->time_last_bitmap_sync); + + /* reset period counters */ rs->time_last_bitmap_sync = end_time; rs->num_dirty_pages_period = 0; + rs->bytes_xfer_prev = bytes_xfer_now; } if (migrate_use_events()) { qapi_event_send_migration_pass(rs->bitmap_sync_count, NULL); diff --git a/migration/savevm.c b/migration/savevm.c index d971e5ee47..a2d4f9c53c 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -36,6 +36,7 @@ #include "qemu/timer.h" #include "migration/migration.h" #include "qemu-file-channel.h" +#include "savevm.h" #include "postcopy-ram.h" #include "qapi/qmp/qerror.h" #include "qemu/error-report.h" @@ -63,6 +64,26 @@ const unsigned int postcopy_ram_discard_version = 0; static bool skip_section_footers; +/* Subcommands for QEMU_VM_COMMAND */ +enum qemu_vm_cmd { + MIG_CMD_INVALID = 0, /* Must be 0 */ + MIG_CMD_OPEN_RETURN_PATH, /* Tell the dest to open the Return path */ + MIG_CMD_PING, /* Request a PONG on the RP */ + + MIG_CMD_POSTCOPY_ADVISE, /* Prior to any page transfers, just + warn we might want to do PC */ + MIG_CMD_POSTCOPY_LISTEN, /* Start listening for incoming + pages as it's running. */ + MIG_CMD_POSTCOPY_RUN, /* Start execution */ + + MIG_CMD_POSTCOPY_RAM_DISCARD, /* A list of pages to discard that + were previously sent during + precopy but are dirty. */ + MIG_CMD_PACKAGED, /* Send a wrapped stream within this stream */ + MIG_CMD_MAX +}; + +#define MAX_VM_CMD_PACKAGED_SIZE (1ul << 24) static struct mig_cmd_args { ssize_t len; /* -1 = variable */ const char *name; @@ -807,10 +828,10 @@ static void save_section_footer(QEMUFile *f, SaveStateEntry *se) * @len: Length of associated data * @data: Data associated with command. */ -void qemu_savevm_command_send(QEMUFile *f, - enum qemu_vm_cmd command, - uint16_t len, - uint8_t *data) +static void qemu_savevm_command_send(QEMUFile *f, + enum qemu_vm_cmd command, + uint16_t len, + uint8_t *data) { trace_savevm_command_send(command, len); qemu_put_byte(f, QEMU_VM_COMMAND); @@ -2281,7 +2302,7 @@ int load_vmstate(const char *name, Error **errp) return -EINVAL; } - qemu_system_reset(VMRESET_SILENT); + qemu_system_reset(SHUTDOWN_CAUSE_NONE); mis->from_src_file = f; aio_context_acquire(aio_context); diff --git a/migration/savevm.h b/migration/savevm.h new file mode 100644 index 0000000000..eb4487771a --- /dev/null +++ b/migration/savevm.h @@ -0,0 +1,41 @@ +/* + * QEMU save vm functions + * + * Copyright (c) 2003-2008 Fabrice Bellard + * Copyright (c) 2009-2017 Red Hat Inc + * + * Authors: + * Juan Quintela <quintela@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef MIGRATION_SAVEVM_H +#define MIGRATION_SAVEVM_H + +bool qemu_savevm_state_blocked(Error **errp); +void qemu_savevm_state_begin(QEMUFile *f); +void qemu_savevm_state_header(QEMUFile *f); +int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy); +void qemu_savevm_state_cleanup(void); +void qemu_savevm_state_complete_postcopy(QEMUFile *f); +void qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only); +void qemu_savevm_state_pending(QEMUFile *f, uint64_t max_size, + uint64_t *res_non_postcopiable, + uint64_t *res_postcopiable); +void qemu_savevm_send_ping(QEMUFile *f, uint32_t value); +void qemu_savevm_send_open_return_path(QEMUFile *f); +int qemu_savevm_send_packaged(QEMUFile *f, const uint8_t *buf, size_t len); +void qemu_savevm_send_postcopy_advise(QEMUFile *f); +void qemu_savevm_send_postcopy_listen(QEMUFile *f); +void qemu_savevm_send_postcopy_run(QEMUFile *f); + +void qemu_savevm_send_postcopy_ram_discard(QEMUFile *f, const char *name, + uint16_t len, + uint64_t *start_list, + uint64_t *length_list); + +int qemu_loadvm_state(QEMUFile *f); + +#endif @@ -231,8 +231,7 @@ static void parse_numa_distance(NumaDistOptions *dist, Error **errp) if (src >= MAX_NODES || dst >= MAX_NODES) { error_setg(errp, - "Invalid node %" PRIu16 - ", max possible could be %" PRIu16, + "Invalid node %d, max possible could be %d", MAX(src, dst), MAX_NODES); return; } diff --git a/os-win32.c b/os-win32.c index ae9857448f..586a7c7d49 100644 --- a/os-win32.c +++ b/os-win32.c @@ -52,7 +52,7 @@ int setenv(const char *name, const char *value, int overwrite) static BOOL WINAPI qemu_ctrl_handler(DWORD type) { - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_HOST_SIGNAL); /* Windows 7 kills application when the function returns. Sleep here to give QEMU a try for closing. Sleep period is 10000ms because Windows kills the program diff --git a/pc-bios/s390-ccw.img b/pc-bios/s390-ccw.img Binary files differindex 0b01d49495..5ad0564000 100644 --- a/pc-bios/s390-ccw.img +++ b/pc-bios/s390-ccw.img diff --git a/pc-bios/s390-ccw/s390-ccw.h b/pc-bios/s390-ccw/s390-ccw.h index 07d8cbcb20..2089274842 100644 --- a/pc-bios/s390-ccw/s390-ccw.h +++ b/pc-bios/s390-ccw/s390-ccw.h @@ -42,6 +42,13 @@ typedef unsigned long long __u64; #ifndef NULL #define NULL 0 #endif +#ifndef MIN +#define MIN(a, b) (((a) < (b)) ? (a) : (b)) +#endif +#ifndef MIN_NON_ZERO +#define MIN_NON_ZERO(a, b) ((a) == 0 ? (b) : \ + ((b) == 0 ? (a) : (MIN(a, b)))) +#endif #include "cio.h" #include "iplb.h" diff --git a/pc-bios/s390-ccw/scsi.h b/pc-bios/s390-ccw/scsi.h index fc830f7e52..fe3fd5ac05 100644 --- a/pc-bios/s390-ccw/scsi.h +++ b/pc-bios/s390-ccw/scsi.h @@ -26,6 +26,15 @@ #define SCSI_SENSE_KEY_NO_SENSE 0 #define SCSI_SENSE_KEY_UNIT_ATTENTION 6 +/* SCSI Inquiry Types */ +#define SCSI_INQUIRY_STANDARD 0x00U +#define SCSI_INQUIRY_EVPD 0x01U + +/* SCSI Inquiry Pages */ +#define SCSI_INQUIRY_STANDARD_NONE 0x00U +#define SCSI_INQUIRY_EVPD_SUPPORTED_PAGES 0x00U +#define SCSI_INQUIRY_EVPD_BLOCK_LIMITS 0xb0U + union ScsiLun { uint64_t v64; /* numeric shortcut */ uint8_t v8[8]; /* generic 8 bytes representation */ @@ -71,6 +80,27 @@ struct ScsiInquiryStd { } __attribute__((packed)); typedef struct ScsiInquiryStd ScsiInquiryStd; +struct ScsiInquiryEvpdPages { + uint8_t peripheral_qdt; /* b0, use (b0 & 0x1f) to get SCSI_INQ_RDT */ + uint8_t page_code; /* b1 */ + uint16_t page_length; /* b2..b3 length = N-3 */ + uint8_t byte[28]; /* b4..bN Supported EVPD pages (N=31 here) */ +} __attribute__((packed)); +typedef struct ScsiInquiryEvpdPages ScsiInquiryEvpdPages; + +struct ScsiInquiryEvpdBl { + uint8_t peripheral_qdt; /* b0, use (b0 & 0x1f) to get SCSI_INQ_RDT */ + uint8_t page_code; + uint16_t page_length; + uint8_t b4; + uint8_t b5; + uint16_t b6; + uint32_t max_transfer; /* b8 */ + uint32_t b12[7]; /* b12..b43 (defined fields) */ + uint32_t b44[5]; /* b44..b63 (reserved fields) */ +} __attribute__((packed)); +typedef struct ScsiInquiryEvpdBl ScsiInquiryEvpdBl; + struct ScsiCdbInquiry { uint8_t command; /* b0, == 0x12 */ uint8_t b1; /* b1, |= 0x01 (evpd) */ diff --git a/pc-bios/s390-ccw/virtio-scsi.c b/pc-bios/s390-ccw/virtio-scsi.c index d850a8deed..f61ecf0205 100644 --- a/pc-bios/s390-ccw/virtio-scsi.c +++ b/pc-bios/s390-ccw/virtio-scsi.c @@ -19,6 +19,8 @@ static VirtioScsiCmdReq req; static VirtioScsiCmdResp resp; static uint8_t scsi_inquiry_std_response[256]; +static ScsiInquiryEvpdPages scsi_inquiry_evpd_pages_response; +static ScsiInquiryEvpdBl scsi_inquiry_evpd_bl_response; static inline void vs_assert(bool term, const char **msgs) { @@ -89,10 +91,13 @@ static void vs_run(const char *title, VirtioCmd *cmd, VDev *vdev, /* SCSI protocol implementation routines */ -static bool scsi_inquiry(VDev *vdev, void *data, uint32_t data_size) +static bool scsi_inquiry(VDev *vdev, uint8_t evpd, uint8_t page, + void *data, uint32_t data_size) { ScsiCdbInquiry cdb = { .command = 0x12, + .b1 = evpd, + .b2 = page, .alloc_len = data_size < 65535 ? data_size : 65535, }; VirtioCmd inquiry[] = { @@ -142,19 +147,18 @@ static bool scsi_report_luns(VDev *vdev, void *data, uint32_t data_size) } static bool scsi_read_10(VDev *vdev, - ulong sector, int sectors, void *data) + ulong sector, int sectors, void *data, + unsigned int data_size) { - int f = vdev->blk_factor; - unsigned int data_size = sectors * virtio_get_block_size() * f; ScsiCdbRead10 cdb = { .command = 0x28, - .lba = sector * f, - .xfer_length = sectors * f, + .lba = sector, + .xfer_length = sectors, }; VirtioCmd read_10[] = { { &req, sizeof(req), VRING_DESC_F_NEXT }, { &resp, sizeof(resp), VRING_DESC_F_WRITE | VRING_DESC_F_NEXT }, - { data, data_size * f, VRING_DESC_F_WRITE }, + { data, data_size, VRING_DESC_F_WRITE }, }; debug_print_int("read_10 sector", sector); @@ -203,6 +207,7 @@ static void virtio_scsi_locate_device(VDev *vdev) debug_print_int("config.scsi.max_channel", vdev->config.scsi.max_channel); debug_print_int("config.scsi.max_target ", vdev->config.scsi.max_target); debug_print_int("config.scsi.max_lun ", vdev->config.scsi.max_lun); + debug_print_int("config.scsi.max_sectors", vdev->config.scsi.max_sectors); if (vdev->scsi_device_selected) { sdev->channel = vdev->selected_scsi_device.channel; @@ -255,9 +260,23 @@ static void virtio_scsi_locate_device(VDev *vdev) int virtio_scsi_read_many(VDev *vdev, ulong sector, void *load_addr, int sec_num) { - if (!scsi_read_10(vdev, sector, sec_num, load_addr)) { - virtio_scsi_verify_response(&resp, "virtio-scsi:read_many"); - } + int sector_count; + int f = vdev->blk_factor; + unsigned int data_size; + unsigned int max_transfer = MIN_NON_ZERO(vdev->config.scsi.max_sectors, + vdev->max_transfer); + + do { + sector_count = MIN_NON_ZERO(sec_num, max_transfer); + data_size = sector_count * virtio_get_block_size() * f; + if (!scsi_read_10(vdev, sector * f, sector_count * f, load_addr, + data_size)) { + virtio_scsi_verify_response(&resp, "virtio-scsi:read_many"); + } + load_addr += data_size; + sector += sector_count; + sec_num -= sector_count; + } while (sec_num > 0); return 0; } @@ -304,6 +323,9 @@ void virtio_scsi_setup(VDev *vdev) int retry_test_unit_ready = 3; uint8_t data[256]; uint32_t data_size = sizeof(data); + ScsiInquiryEvpdPages *evpd = &scsi_inquiry_evpd_pages_response; + ScsiInquiryEvpdBl *evpd_bl = &scsi_inquiry_evpd_bl_response; + int i; vdev->scsi_device = &default_scsi_device; virtio_scsi_locate_device(vdev); @@ -334,7 +356,10 @@ void virtio_scsi_setup(VDev *vdev) } /* read and cache SCSI INQUIRY response */ - if (!scsi_inquiry(vdev, scsi_inquiry_std_response, + if (!scsi_inquiry(vdev, + SCSI_INQUIRY_STANDARD, + SCSI_INQUIRY_STANDARD_NONE, + scsi_inquiry_std_response, sizeof(scsi_inquiry_std_response))) { virtio_scsi_verify_response(&resp, "virtio-scsi:setup:inquiry"); } @@ -345,6 +370,44 @@ void virtio_scsi_setup(VDev *vdev) vdev->scsi_block_size = VIRTIO_ISO_BLOCK_SIZE; } + if (!scsi_inquiry(vdev, + SCSI_INQUIRY_EVPD, + SCSI_INQUIRY_EVPD_SUPPORTED_PAGES, + evpd, + sizeof(*evpd))) { + virtio_scsi_verify_response(&resp, "virtio-scsi:setup:supported_pages"); + } + + debug_print_int("EVPD length", evpd->page_length); + + for (i = 0; i <= evpd->page_length; i++) { + debug_print_int("supported EVPD page", evpd->byte[i]); + + if (evpd->byte[i] != SCSI_INQUIRY_EVPD_BLOCK_LIMITS) { + continue; + } + + if (!scsi_inquiry(vdev, + SCSI_INQUIRY_EVPD, + SCSI_INQUIRY_EVPD_BLOCK_LIMITS, + evpd_bl, + sizeof(*evpd_bl))) { + virtio_scsi_verify_response(&resp, "virtio-scsi:setup:blocklimits"); + } + + debug_print_int("max transfer", evpd_bl->max_transfer); + vdev->max_transfer = evpd_bl->max_transfer; + } + + /* + * The host sg driver will often be unhappy with particularly large + * I/Os that exceed the block iovec limits. Let's enforce something + * reasonable, despite what the device configuration tells us. + */ + + vdev->max_transfer = MIN_NON_ZERO(VIRTIO_SCSI_MAX_SECTORS, + vdev->max_transfer); + if (!scsi_read_capacity(vdev, data, data_size)) { virtio_scsi_verify_response(&resp, "virtio-scsi:setup:read_capacity"); } diff --git a/pc-bios/s390-ccw/virtio-scsi.h b/pc-bios/s390-ccw/virtio-scsi.h index f50b38b18b..4c4f4bbc31 100644 --- a/pc-bios/s390-ccw/virtio-scsi.h +++ b/pc-bios/s390-ccw/virtio-scsi.h @@ -19,6 +19,8 @@ #define VIRTIO_SCSI_CDB_SIZE SCSI_DEFAULT_CDB_SIZE #define VIRTIO_SCSI_SENSE_SIZE SCSI_DEFAULT_SENSE_SIZE +#define VIRTIO_SCSI_MAX_SECTORS 2048 + /* command-specific response values */ #define VIRTIO_SCSI_S_OK 0x00 #define VIRTIO_SCSI_S_BAD_TARGET 0x03 diff --git a/pc-bios/s390-ccw/virtio.h b/pc-bios/s390-ccw/virtio.h index 3388a423e5..1eaf865b1f 100644 --- a/pc-bios/s390-ccw/virtio.h +++ b/pc-bios/s390-ccw/virtio.h @@ -277,6 +277,7 @@ struct VDev { bool scsi_device_selected; ScsiDevice selected_scsi_device; uint64_t netboot_start_addr; + uint32_t max_transfer; }; typedef struct VDev VDev; diff --git a/qapi-schema.json b/qapi-schema.json index e38c5f0423..4b50b652d3 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -3057,8 +3057,6 @@ # @name: the type name found in the search # # Since: 1.1 -# -# Notes: This command is experimental and may change syntax in future releases. ## { 'struct': 'ObjectTypeInfo', 'data': { 'name': 'str' } } diff --git a/qapi/event.json b/qapi/event.json index e80f3f4446..6d22b025cc 100644 --- a/qapi/event.json +++ b/qapi/event.json @@ -10,6 +10,10 @@ # Emitted when the virtual machine has shut down, indicating that qemu is # about to exit. # +# @guest: If true, the shutdown was triggered by a guest request (such as +# a guest-initiated ACPI shutdown request or other hardware-specific action) +# rather than a host request (such as sending qemu a SIGINT). (since 2.10) +# # Note: If the command-line option "-no-shutdown" has been specified, qemu will # not exit, and a STOP event will eventually follow the SHUTDOWN event # @@ -17,11 +21,11 @@ # # Example: # -# <- { "event": "SHUTDOWN", +# <- { "event": "SHUTDOWN", "data": { "guest": true }, # "timestamp": { "seconds": 1267040730, "microseconds": 682951 } } # ## -{ 'event': 'SHUTDOWN' } +{ 'event': 'SHUTDOWN', 'data': { 'guest': 'bool' } } ## # @POWERDOWN: @@ -44,15 +48,20 @@ # # Emitted when the virtual machine is reset # +# @guest: If true, the reset was triggered by a guest request (such as +# a guest-initiated ACPI reboot request or other hardware-specific action) +# rather than a host request (such as the QMP command system_reset). +# (since 2.10) +# # Since: 0.12.0 # # Example: # -# <- { "event": "RESET", +# <- { "event": "RESET", "data": { "guest": false }, # "timestamp": { "seconds": 1267041653, "microseconds": 9518 } } # ## -{ 'event': 'RESET' } +{ 'event': 'RESET', 'data': { 'guest': 'bool' } } ## # @STOP: diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx index e5bc28fc3c..a39fcdba71 100644 --- a/qemu-img-cmds.hx +++ b/qemu-img-cmds.hx @@ -40,9 +40,9 @@ STEXI ETEXI DEF("convert", img_convert, - "convert [--object objectdef] [--image-opts] [-U] [-c] [-p] [-q] [-n] [-f fmt] [-t cache] [-T src_cache] [-O output_fmt] [-o options] [-s snapshot_id_or_name] [-l snapshot_param] [-S sparse_size] [-m num_coroutines] [-W] filename [filename2 [...]] output_filename") + "convert [--object objectdef] [--image-opts] [--target-image-opts] [-U] [-c] [-p] [-q] [-n] [-f fmt] [-t cache] [-T src_cache] [-O output_fmt] [-B backing_file] [-o options] [-s snapshot_id_or_name] [-l snapshot_param] [-S sparse_size] [-m num_coroutines] [-W] filename [filename2 [...]] output_filename") STEXI -@item convert [--object @var{objectdef}] [--image-opts] [-U] [-c] [-p] [-q] [-n] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_id_or_name}] [-l @var{snapshot_param}] [-S @var{sparse_size}] [-m @var{num_coroutines}] [-W] @var{filename} [@var{filename2} [...]] @var{output_filename} +@item convert [--object @var{objectdef}] [--image-opts] [--target-image-opts] [-U] [-c] [-p] [-q] [-n] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-O @var{output_fmt}] [-B @var{backing_file}] [-o @var{options}] [-s @var{snapshot_id_or_name}] [-l @var{snapshot_param}] [-S @var{sparse_size}] [-m @var{num_coroutines}] [-W] @var{filename} [@var{filename2} [...]] @var{output_filename} ETEXI DEF("dd", img_dd, diff --git a/qemu-img.c b/qemu-img.c index b506839ef0..0ad698d7f1 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -60,6 +60,7 @@ enum { OPTION_PATTERN = 260, OPTION_FLUSH_INTERVAL = 261, OPTION_NO_DRAIN = 262, + OPTION_TARGET_IMAGE_OPTS = 263, }; typedef enum OutputFormat { @@ -294,9 +295,10 @@ static BlockBackend *img_open_opts(const char *optstr, if (qdict_haskey(options, BDRV_OPT_FORCE_SHARE) && !qdict_get_bool(options, BDRV_OPT_FORCE_SHARE)) { error_report("--force-share/-U conflicts with image options"); + QDECREF(options); return NULL; } - qdict_put(options, BDRV_OPT_FORCE_SHARE, qbool_from_bool(true)); + qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true); } blk = blk_new_open(NULL, NULL, options, flags, &local_err); if (!blk) { @@ -313,20 +315,23 @@ static BlockBackend *img_open_opts(const char *optstr, } static BlockBackend *img_open_file(const char *filename, + QDict *options, const char *fmt, int flags, bool writethrough, bool quiet, bool force_share) { BlockBackend *blk; Error *local_err = NULL; - QDict *options = qdict_new(); + if (!options) { + options = qdict_new(); + } if (fmt) { qdict_put_str(options, "driver", fmt); } if (force_share) { - qdict_put(options, BDRV_OPT_FORCE_SHARE, qbool_from_bool(true)); + qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true); } blk = blk_new_open(filename, NULL, options, flags, &local_err); if (!blk) { @@ -343,6 +348,35 @@ static BlockBackend *img_open_file(const char *filename, } +static int img_add_key_secrets(void *opaque, + const char *name, const char *value, + Error **errp) +{ + QDict *options = opaque; + + if (g_str_has_suffix(name, "key-secret")) { + qdict_put(options, name, qstring_from_str(value)); + } + + return 0; +} + +static BlockBackend *img_open_new_file(const char *filename, + QemuOpts *create_opts, + const char *fmt, int flags, + bool writethrough, bool quiet, + bool force_share) +{ + QDict *options = NULL; + + options = qdict_new(); + qemu_opt_foreach(create_opts, img_add_key_secrets, options, &error_abort); + + return img_open_file(filename, options, fmt, flags, writethrough, quiet, + force_share); +} + + static BlockBackend *img_open(bool image_opts, const char *filename, const char *fmt, int flags, bool writethrough, @@ -363,7 +397,7 @@ static BlockBackend *img_open(bool image_opts, blk = img_open_opts(filename, opts, flags, writethrough, quiet, force_share); } else { - blk = img_open_file(filename, fmt, flags, writethrough, quiet, + blk = img_open_file(filename, NULL, fmt, flags, writethrough, quiet, force_share); } return blk; @@ -1913,10 +1947,10 @@ static int convert_do_copy(ImgConvertState *s) static int img_convert(int argc, char **argv) { int c, bs_i, flags, src_flags = 0; - const char *fmt = NULL, *out_fmt = "raw", *cache = "unsafe", + const char *fmt = NULL, *out_fmt = NULL, *cache = "unsafe", *src_cache = BDRV_DEFAULT_CACHE, *out_baseimg = NULL, *out_filename, *out_baseimg_param, *snapshot_name = NULL; - BlockDriver *drv, *proto_drv; + BlockDriver *drv = NULL, *proto_drv = NULL; BlockDriverInfo bdi; BlockDriverState *out_bs; QemuOpts *opts = NULL, *sn_opts = NULL; @@ -1924,7 +1958,7 @@ static int img_convert(int argc, char **argv) char *options = NULL; Error *local_err = NULL; bool writethrough, src_writethrough, quiet = false, image_opts = false, - skip_create = false, progress = false; + skip_create = false, progress = false, tgt_image_opts = false; int64_t ret = -EINVAL; bool force_share = false; @@ -1942,6 +1976,7 @@ static int img_convert(int argc, char **argv) {"object", required_argument, 0, OPTION_OBJECT}, {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS}, {"force-share", no_argument, 0, 'U'}, + {"target-image-opts", no_argument, 0, OPTION_TARGET_IMAGE_OPTS}, {0, 0, 0, 0} }; c = getopt_long(argc, argv, ":hf:O:B:ce6o:s:l:S:pt:T:qnm:WU", @@ -2062,9 +2097,16 @@ static int img_convert(int argc, char **argv) case OPTION_IMAGE_OPTS: image_opts = true; break; + case OPTION_TARGET_IMAGE_OPTS: + tgt_image_opts = true; + break; } } + if (!out_fmt && !tgt_image_opts) { + out_fmt = "raw"; + } + if (qemu_opts_foreach(&qemu_object_opts, user_creatable_add_opts_foreach, NULL, NULL)) { @@ -2076,12 +2118,22 @@ static int img_convert(int argc, char **argv) goto fail_getopt; } + if (tgt_image_opts && !skip_create) { + error_report("--target-image-opts requires use of -n flag"); + goto fail_getopt; + } + s.src_num = argc - optind - 1; out_filename = s.src_num >= 1 ? argv[argc - 1] : NULL; if (options && has_help_option(options)) { - ret = print_block_option_help(out_filename, out_fmt); - goto fail_getopt; + if (out_fmt) { + ret = print_block_option_help(out_filename, out_fmt); + goto fail_getopt; + } else { + error_report("Option help requires a format be specified"); + goto fail_getopt; + } } if (s.src_num < 1) { @@ -2146,22 +2198,22 @@ static int img_convert(int argc, char **argv) goto out; } - /* Find driver and parse its options */ - drv = bdrv_find_format(out_fmt); - if (!drv) { - error_report("Unknown file format '%s'", out_fmt); - ret = -1; - goto out; - } + if (!skip_create) { + /* Find driver and parse its options */ + drv = bdrv_find_format(out_fmt); + if (!drv) { + error_report("Unknown file format '%s'", out_fmt); + ret = -1; + goto out; + } - proto_drv = bdrv_find_protocol(out_filename, true, &local_err); - if (!proto_drv) { - error_report_err(local_err); - ret = -1; - goto out; - } + proto_drv = bdrv_find_protocol(out_filename, true, &local_err); + if (!proto_drv) { + error_report_err(local_err); + ret = -1; + goto out; + } - if (!skip_create) { if (!drv->create_opts) { error_report("Format driver '%s' does not support image creation", drv->format_name); @@ -2218,7 +2270,7 @@ static int img_convert(int argc, char **argv) const char *preallocation = qemu_opt_get(opts, BLOCK_OPT_PREALLOC); - if (!drv->bdrv_co_pwritev_compressed) { + if (drv && !drv->bdrv_co_pwritev_compressed) { error_report("Compression not supported for this file format"); ret = -1; goto out; @@ -2258,19 +2310,30 @@ static int img_convert(int argc, char **argv) goto out; } - /* XXX we should allow --image-opts to trigger use of - * img_open() here, but then we have trouble with - * the bdrv_create() call which takes different params. - * Not critical right now, so fix can wait... - */ - s.target = img_open_file(out_filename, out_fmt, flags, writethrough, quiet, - false); + if (skip_create) { + s.target = img_open(tgt_image_opts, out_filename, out_fmt, + flags, writethrough, quiet, false); + } else { + /* TODO ultimately we should allow --target-image-opts + * to be used even when -n is not given. + * That has to wait for bdrv_create to be improved + * to allow filenames in option syntax + */ + s.target = img_open_new_file(out_filename, opts, out_fmt, + flags, writethrough, quiet, false); + } if (!s.target) { ret = -1; goto out; } out_bs = blk_bs(s.target); + if (s.compressed && !out_bs->drv->bdrv_co_pwritev_compressed) { + error_report("Compression not supported for this file format"); + ret = -1; + goto out; + } + /* increase bufsectors from the default 4096 (2M) if opt_transfer * or discard_alignment of the out_bs is greater. Limit to 32768 (16MB) * as maximum. */ @@ -3156,8 +3219,7 @@ static int img_rebase(int argc, char **argv) if (!options) { options = qdict_new(); } - qdict_put(options, BDRV_OPT_FORCE_SHARE, - qbool_from_bool(true)); + qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true); } bdrv_get_backing_filename(bs, backing_name, sizeof(backing_name)); blk_old_backing = blk_new_open(backing_name, NULL, @@ -4158,6 +4220,7 @@ static int img_dd(int argc, char **argv) }; const struct option long_options[] = { { "help", no_argument, 0, 'h'}, + { "object", required_argument, 0, OPTION_OBJECT}, { "image-opts", no_argument, 0, OPTION_IMAGE_OPTS}, { "force-share", no_argument, 0, 'U'}, { 0, 0, 0, 0 } @@ -4186,6 +4249,15 @@ static int img_dd(int argc, char **argv) case 'U': force_share = true; break; + case OPTION_OBJECT: { + QemuOpts *opts; + opts = qemu_opts_parse_noisily(&qemu_object_opts, + optarg, true); + if (!opts) { + ret = -1; + goto out; + } + } break; case OPTION_IMAGE_OPTS: image_opts = true; break; @@ -4230,6 +4302,14 @@ static int img_dd(int argc, char **argv) ret = -1; goto out; } + + if (qemu_opts_foreach(&qemu_object_opts, + user_creatable_add_opts_foreach, + NULL, NULL)) { + ret = -1; + goto out; + } + blk1 = img_open(image_opts, in.filename, fmt, 0, false, false, force_share); @@ -4298,8 +4378,13 @@ static int img_dd(int argc, char **argv) goto out; } - blk2 = img_open(image_opts, out.filename, out_fmt, BDRV_O_RDWR, - false, false, false); + /* TODO, we can't honour --image-opts for the target, + * since it needs to be given in a format compatible + * with the bdrv_create() call above which does not + * support image-opts style. + */ + blk2 = img_open_file(out.filename, NULL, out_fmt, BDRV_O_RDWR, + false, false, false); if (!blk2) { ret = -1; diff --git a/qemu-img.texi b/qemu-img.texi index 50a2364e80..5b925ecf41 100644 --- a/qemu-img.texi +++ b/qemu-img.texi @@ -45,9 +45,17 @@ keys. @item --image-opts -Indicates that the @var{filename} parameter is to be interpreted as a +Indicates that the source @var{filename} parameter is to be interpreted as a full option string, not a plain filename. This parameter is mutually -exclusive with the @var{-f} and @var{-F} parameters. +exclusive with the @var{-f} parameter. + +@item --target-image-opts + +Indicates that the @var{output_filename} parameter(s) are to be interpreted as +a full option string, not a plain filename. This parameter is mutually +exclusive with the @var{-O} parameters. It is currently required to also use +the @var{-n} parameter to skip image creation. This restriction may be relaxed +in a future release. @item fmt is the disk image format. It is guessed automatically in most cases. See below @@ -76,7 +76,7 @@ static int openfile(char *name, int flags, bool writethrough, bool force_share, QDECREF(opts); return 1; } - qdict_put(opts, BDRV_OPT_FORCE_SHARE, qbool_from_bool(true)); + qdict_put_bool(opts, BDRV_OPT_FORCE_SHARE, true); } qemuio_blk = blk_new_open(name, NULL, opts, flags, &local_err); if (!qemuio_blk) { diff --git a/qemu-options.hx b/qemu-options.hx index f63f7dc946..a6c9b9e763 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -42,7 +42,8 @@ DEF("machine", HAS_ARG, QEMU_OPTION_machine, \ " dea-key-wrap=on|off controls support for DEA key wrapping (default=on)\n" " suppress-vmdesc=on|off disables self-describing migration (default=off)\n" " nvdimm=on|off controls NVDIMM support (default=off)\n" - " enforce-config-section=on|off enforce configuration section migration (default=off)\n", + " enforce-config-section=on|off enforce configuration section migration (default=off)\n" + " s390-squash-mcss=on|off controls support for squashing into default css (default=off)\n", QEMU_ARCH_ALL) STEXI @item -machine [type=]@var{name}[,prop=@var{value}[,...]] @@ -81,6 +82,9 @@ controls whether DEA wrapping keys will be created to allow execution of DEA cryptographic functions. The default is on. @item nvdimm=on|off Enables or disables NVDIMM support. The default is off. +@item s390-squash-mcss=on|off +Enables or disables squashing subchannels into the default css. +The default is off. @end table ETEXI @@ -980,12 +984,12 @@ STEXI ETEXI DEF("usb", 0, QEMU_OPTION_usb, - "-usb enable the USB driver (will be the default soon)\n", + "-usb enable the USB driver (if it is not used by default yet)\n", QEMU_ARCH_ALL) STEXI @item -usb @findex -usb -Enable the USB driver (will be the default soon) +Enable the USB driver (if it is not used by default yet). ETEXI DEF("usbdevice", HAS_ARG, QEMU_OPTION_usbdevice, @@ -995,7 +999,8 @@ STEXI @item -usbdevice @var{devname} @findex -usbdevice -Add the USB device @var{devname}. @xref{usb_devices}. +Add the USB device @var{devname}. Note that this option is deprecated, +please use @code{-device usb-...} instead. @xref{usb_devices}. @table @option @@ -1373,7 +1378,7 @@ output such as guest graphics, guest console, and the QEMU monitor in a window. With this option, you can have QEMU listen on VNC display @var{display} and redirect the VGA display over the VNC session. It is very useful to enable the usb tablet device when using this option -(option @option{-usbdevice tablet}). When using the VNC display, you +(option @option{-device usb-tablet}). When using the VNC display, you must use the @option{-k} parameter to set the keyboard layout if you are not using en-us. Valid syntax for the @var{display} is @@ -84,7 +84,7 @@ UuidInfo *qmp_query_uuid(Error **errp) void qmp_quit(Error **errp) { no_shutdown = 0; - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_HOST_QMP); } void qmp_stop(Error **errp) @@ -105,7 +105,7 @@ void qmp_stop(Error **errp) void qmp_system_reset(Error **errp) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_HOST_QMP); } void qmp_system_powerdown(Error **erp) diff --git a/replay/replay-internal.h b/replay/replay-internal.h index ed66ed803c..3ebb19912a 100644 --- a/replay/replay-internal.h +++ b/replay/replay-internal.h @@ -22,8 +22,9 @@ enum ReplayEvents { EVENT_EXCEPTION, /* for async events */ EVENT_ASYNC, - /* for shutdown request */ + /* for shutdown requests, range allows recovery of ShutdownCause */ EVENT_SHUTDOWN, + EVENT_SHUTDOWN_LAST = EVENT_SHUTDOWN + SHUTDOWN_CAUSE__MAX, /* for character device write event */ EVENT_CHAR_WRITE, /* for character device read all event */ diff --git a/replay/replay.c b/replay/replay.c index f810628cac..ff58a5adf9 100644 --- a/replay/replay.c +++ b/replay/replay.c @@ -49,9 +49,10 @@ bool replay_next_event_is(int event) res = true; } switch (replay_state.data_kind) { - case EVENT_SHUTDOWN: + case EVENT_SHUTDOWN ... EVENT_SHUTDOWN_LAST: replay_finish_event(); - qemu_system_shutdown_request(); + qemu_system_shutdown_request(replay_state.data_kind - + EVENT_SHUTDOWN); break; default: /* clock, time_t, checkpoint and other events */ @@ -170,11 +171,11 @@ bool replay_has_interrupt(void) return res; } -void replay_shutdown_request(void) +void replay_shutdown_request(ShutdownCause cause) { if (replay_mode == REPLAY_MODE_RECORD) { replay_mutex_lock(); - replay_put_event(EVENT_SHUTDOWN); + replay_put_event(EVENT_SHUTDOWN + cause); replay_mutex_unlock(); } } diff --git a/scripts/qmp/qom-set b/scripts/qmp/qom-set index 54ecfecc53..94e2778922 100755 --- a/scripts/qmp/qom-set +++ b/scripts/qmp/qom-set @@ -61,4 +61,4 @@ else: srv = QEMUMonitorProtocol(socket_path) srv.connect() -print srv.command('qom-set', path=path, property=prop, value=sys.argv[2]) +print srv.command('qom-set', path=path, property=prop, value=value) diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh index 6a370a8669..2f906c4d16 100755 --- a/scripts/update-linux-headers.sh +++ b/scripts/update-linux-headers.sh @@ -113,7 +113,7 @@ done rm -rf "$output/linux-headers/linux" mkdir -p "$output/linux-headers/linux" -for header in kvm.h kvm_para.h vfio.h vhost.h \ +for header in kvm.h kvm_para.h vfio.h vfio_ccw.h vhost.h \ psci.h userfaultfd.h; do cp "$tmpdir/include/linux/$header" "$output/linux-headers/linux" done diff --git a/slirp/socket.c b/slirp/socket.c index 86927722e1..3b49a69a93 100644 --- a/slirp/socket.c +++ b/slirp/socket.c @@ -100,6 +100,9 @@ sofree(struct socket *so) if(so->so_next && so->so_prev) remque(so); /* crashes if so is not in a queue */ + if (so->so_tcpcb) { + free(so->so_tcpcb); + } free(so); } diff --git a/slirp/tcp_input.c b/slirp/tcp_input.c index edb98f06f3..07bcbdb2dd 100644 --- a/slirp/tcp_input.c +++ b/slirp/tcp_input.c @@ -1587,11 +1587,11 @@ tcp_mss(struct tcpcb *tp, u_int offer) switch (so->so_ffamily) { case AF_INET: mss = MIN(IF_MTU, IF_MRU) - sizeof(struct tcphdr) - + sizeof(struct ip); + - sizeof(struct ip); break; case AF_INET6: mss = MIN(IF_MTU, IF_MRU) - sizeof(struct tcphdr) - + sizeof(struct ip6); + - sizeof(struct ip6); break; default: g_assert_not_reached(); diff --git a/slirp/tcp_subr.c b/slirp/tcp_subr.c index ed16e1807f..dc8b4bbb50 100644 --- a/slirp/tcp_subr.c +++ b/slirp/tcp_subr.c @@ -204,7 +204,7 @@ tcp_respond(struct tcpcb *tp, struct tcpiphdr *ti, struct mbuf *m, m->m_len -= sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - sizeof(struct ip); ip = mtod(m, struct ip *); - ip->ip_len = tlen; + ip->ip_len = m->m_len; ip->ip_dst = tcpiph_save.ti_dst; ip->ip_src = tcpiph_save.ti_src; ip->ip_p = tcpiph_save.ti_pr; @@ -224,7 +224,7 @@ tcp_respond(struct tcpcb *tp, struct tcpiphdr *ti, struct mbuf *m, m->m_len -= sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - sizeof(struct ip6); ip6 = mtod(m, struct ip6 *); - ip6->ip_pl = tlen; + ip6->ip_pl = tcpiph_save.ti_len; ip6->ip_dst = tcpiph_save.ti_dst6; ip6->ip_src = tcpiph_save.ti_src6; ip6->ip_nh = tcpiph_save.ti_nh6; diff --git a/target/alpha/sys_helper.c b/target/alpha/sys_helper.c index 652195de6f..ac22323191 100644 --- a/target/alpha/sys_helper.c +++ b/target/alpha/sys_helper.c @@ -60,9 +60,9 @@ void helper_tb_flush(CPUAlphaState *env) void helper_halt(uint64_t restart) { if (restart) { - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); } else { - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); } } diff --git a/target/arm/psci.c b/target/arm/psci.c index ade9fe2ede..fc34b263d3 100644 --- a/target/arm/psci.c +++ b/target/arm/psci.c @@ -137,7 +137,7 @@ void arm_handle_psci_call(ARMCPU *cpu) } break; case QEMU_PSCI_0_2_FN_SYSTEM_RESET: - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); /* QEMU reset and shutdown are async requests, but PSCI * mandates that we never return from the reset/shutdown * call, so power the CPU off now so it doesn't execute @@ -145,7 +145,7 @@ void arm_handle_psci_call(ARMCPU *cpu) */ goto cpu_off; case QEMU_PSCI_0_2_FN_SYSTEM_OFF: - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); goto cpu_off; case QEMU_PSCI_0_1_FN_CPU_ON: case QEMU_PSCI_0_2_FN_CPU_ON: diff --git a/target/i386/excp_helper.c b/target/i386/excp_helper.c index ee596c6082..b76977243f 100644 --- a/target/i386/excp_helper.c +++ b/target/i386/excp_helper.c @@ -59,7 +59,7 @@ static int check_exception(CPUX86State *env, int intno, int *error_code, qemu_log_mask(CPU_LOG_RESET, "Triple fault\n"); - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); return EXCP_HLT; } #endif diff --git a/target/i386/hax-all.c b/target/i386/hax-all.c index ef13015215..73469311d6 100644 --- a/target/i386/hax-all.c +++ b/target/i386/hax-all.c @@ -540,14 +540,14 @@ static int hax_vcpu_hax_exec(CPUArchState *env) /* Guest state changed, currently only for shutdown */ case HAX_EXIT_STATECHANGE: fprintf(stdout, "VCPU shutdown request\n"); - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); hax_vcpu_sync_state(env, 0); ret = 1; break; case HAX_EXIT_UNKNOWN_VMEXIT: fprintf(stderr, "Unknown VMX exit %x from guest\n", ht->_exit_reason); - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); hax_vcpu_sync_state(env, 0); cpu_dump_state(cpu, stderr, fprintf, 0); ret = -1; @@ -578,7 +578,7 @@ static int hax_vcpu_hax_exec(CPUArchState *env) break; default: fprintf(stderr, "Unknown exit %x from HAX\n", ht->_exit_status); - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); hax_vcpu_sync_state(env, 0); cpu_dump_state(cpu, stderr, fprintf, 0); ret = 1; diff --git a/target/i386/helper.c b/target/i386/helper.c index f11cac63a1..ee7eff2f6f 100644 --- a/target/i386/helper.c +++ b/target/i386/helper.c @@ -1212,7 +1212,7 @@ static void do_inject_x86_mce(CPUState *cs, run_on_cpu_data data) " triple fault\n", cs->cpu_index); qemu_log_mask(CPU_LOG_RESET, "Triple fault\n"); - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); return; } if (banks[1] & MCI_STATUS_VAL) { diff --git a/target/i386/kvm.c b/target/i386/kvm.c index 011d4a55b1..49b6115eae 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -2930,7 +2930,7 @@ int kvm_arch_process_async_events(CPUState *cs) if (env->exception_injected == EXCP08_DBLE) { /* this means triple fault */ - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); cs->exit_request = 1; return 0; } diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c index a6bcb47aa2..9cb2123187 100644 --- a/target/ppc/excp_helper.c +++ b/target/ppc/excp_helper.c @@ -995,6 +995,9 @@ static inline void do_rfi(CPUPPCState *env, target_ulong nip, target_ulong msr) */ cs->interrupt_request |= CPU_INTERRUPT_EXITTB; + /* Reset the reservation */ + env->reserve_addr = -1; + /* Context synchronizing: check if TCG TLB needs flush */ check_tlb_flush(env, false); } diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c index a1bf2ba5a7..a69005d9b5 100644 --- a/target/s390x/cpu.c +++ b/target/s390x/cpu.c @@ -92,9 +92,10 @@ static void s390_cpu_initial_reset(CPUState *s) int i; s390_cpu_reset(s); - /* initial reset does not touch regs,fregs and aregs */ - memset(&env->fpc, 0, offsetof(CPUS390XState, end_reset_fields) - - offsetof(CPUS390XState, fpc)); + /* initial reset does not clear everything! */ + memset(&env->start_initial_reset_fields, 0, + offsetof(CPUS390XState, end_reset_fields) - + offsetof(CPUS390XState, start_initial_reset_fields)); /* architectured initial values for CR 0 and 14 */ env->cregs[0] = CR0_RESET; diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h index 240b8a5c22..c74b4193ee 100644 --- a/target/s390x/cpu.h +++ b/target/s390x/cpu.h @@ -88,6 +88,10 @@ typedef struct CPUS390XState { */ CPU_DoubleU vregs[32][2]; /* vector registers */ uint32_t aregs[16]; /* access registers */ + uint8_t riccb[64]; /* runtime instrumentation control */ + + /* Fields up to this point are not cleared by initial CPU reset */ + struct {} start_initial_reset_fields; uint32_t fpc; /* floating-point control register */ uint32_t cc_op; @@ -137,8 +141,6 @@ typedef struct CPUS390XState { uint64_t gbea; uint64_t pp; - uint8_t riccb[64]; - /* Fields up to this point are cleared by a CPU reset */ struct {} end_reset_fields; @@ -1256,6 +1258,16 @@ static inline void s390_crypto_reset(void) } } +static inline bool s390_get_squash_mcss(void) +{ + if (object_property_get_bool(OBJECT(qdev_get_machine()), "s390-squash-mcss", + NULL)) { + return true; + } + + return false; +} + /* machine check interruption code */ /* subclasses */ diff --git a/target/s390x/helper.c b/target/s390x/helper.c index 997849008f..4f8aadf305 100644 --- a/target/s390x/helper.c +++ b/target/s390x/helper.c @@ -266,7 +266,7 @@ void load_psw(CPUS390XState *env, uint64_t mask, uint64_t addr) S390CPU *cpu = s390_env_get_cpu(env); if (s390_cpu_halt(cpu) == 0) { #ifndef CONFIG_USER_ONLY - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); #endif } } diff --git a/target/s390x/ioinst.c b/target/s390x/ioinst.c index 590bfa4f12..62a777100c 100644 --- a/target/s390x/ioinst.c +++ b/target/s390x/ioinst.c @@ -244,6 +244,15 @@ void ioinst_handle_ssch(S390CPU *cpu, uint64_t reg1, uint32_t ipb) case -EBUSY: cc = 2; break; + case -EFAULT: + /* + * TODO: + * I'm wondering whether there is something better + * to do for us here (like setting some device or + * subchannel status). + */ + program_interrupt(env, PGM_ADDRESSING, 4); + return; case 0: cc = 0; break; diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c index fb105429be..ba1e60f8a6 100644 --- a/target/s390x/kvm.c +++ b/target/s390x/kvm.c @@ -1927,7 +1927,7 @@ static int handle_intercept(S390CPU *cpu) cpu_synchronize_state(cs); if (s390_cpu_halt(cpu) == 0) { if (is_special_wait_psw(cs)) { - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); } else { qemu_system_guest_panicked(NULL); } @@ -1936,7 +1936,7 @@ static int handle_intercept(S390CPU *cpu) break; case ICPT_CPU_STOP: if (s390_cpu_set_state(CPU_STATE_STOPPED, cpu) == 0) { - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); } if (cpu->env.sigp_order == SIGP_STOP_STORE_STATUS) { kvm_s390_store_status(cpu, KVM_S390_STORE_STATUS_DEF_ADDR, diff --git a/target/s390x/misc_helper.c b/target/s390x/misc_helper.c index 23ec52cf35..1b9f448875 100644 --- a/target/s390x/misc_helper.c +++ b/target/s390x/misc_helper.c @@ -532,11 +532,11 @@ uint32_t HELPER(sigp)(CPUS390XState *env, uint64_t order_code, uint32_t r1, break; #if !defined(CONFIG_USER_ONLY) case SIGP_RESTART: - qemu_system_reset_request(); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); cpu_loop_exit(CPU(s390_env_get_cpu(env))); break; case SIGP_STOP: - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); cpu_loop_exit(CPU(s390_env_get_cpu(env))); break; #endif diff --git a/target/sh4/cpu.h b/target/sh4/cpu.h index 6c07c6b24b..ffb91687b8 100644 --- a/target/sh4/cpu.h +++ b/target/sh4/cpu.h @@ -91,8 +91,10 @@ #define FPSCR_RM_NEAREST (0 << 0) #define FPSCR_RM_ZERO (1 << 0) +#define DELAY_SLOT_MASK 0x7 #define DELAY_SLOT (1 << 0) #define DELAY_SLOT_CONDITIONAL (1 << 1) +#define DELAY_SLOT_RTE (1 << 2) typedef struct tlb_t { uint32_t vpn; /* virtual page number */ @@ -263,7 +265,13 @@ void cpu_load_tlb(CPUSH4State * env); #define MMU_USER_IDX 1 static inline int cpu_mmu_index (CPUSH4State *env, bool ifetch) { - return (env->sr & (1u << SR_MD)) == 0 ? 1 : 0; + /* The instruction in a RTE delay slot is fetched in privileged + mode, but executed in user mode. */ + if (ifetch && (env->flags & DELAY_SLOT_RTE)) { + return 0; + } else { + return (env->sr & (1u << SR_MD)) == 0 ? 1 : 0; + } } #include "exec/cpu-all.h" @@ -380,7 +388,7 @@ static inline void cpu_get_tb_cpu_state(CPUSH4State *env, target_ulong *pc, { *pc = env->pc; *cs_base = 0; - *flags = (env->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) /* Bits 0-1 */ + *flags = (env->flags & DELAY_SLOT_MASK) /* Bits 0- 2 */ | (env->fpscr & (FPSCR_FR | FPSCR_SZ | FPSCR_PR)) /* Bits 19-21 */ | (env->sr & ((1u << SR_MD) | (1u << SR_RB))) /* Bits 29-30 */ | (env->sr & (1u << SR_FD)) /* Bit 15 */ diff --git a/target/sh4/helper.c b/target/sh4/helper.c index 8f8ce81401..28d93c2543 100644 --- a/target/sh4/helper.c +++ b/target/sh4/helper.c @@ -21,6 +21,7 @@ #include "cpu.h" #include "exec/exec-all.h" #include "exec/log.h" +#include "sysemu/sysemu.h" #if !defined(CONFIG_USER_ONLY) #include "hw/sh4/sh_intc.h" @@ -92,7 +93,14 @@ void superh_cpu_do_interrupt(CPUState *cs) if (env->sr & (1u << SR_BL)) { if (do_exp && cs->exception_index != 0x1e0) { - cs->exception_index = 0x000; /* masked exception -> reset */ + /* In theory a masked exception generates a reset exception, + which in turn jumps to the reset vector. However this only + works when using a bootloader. When using a kernel and an + initrd, they need to be reloaded and the program counter + should be loaded with the kernel entry point. + qemu_system_reset_request takes care of that. */ + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); + return; } if (do_irq && !env->in_sleep) { return; /* masked */ @@ -164,11 +172,11 @@ void superh_cpu_do_interrupt(CPUState *cs) env->sgr = env->gregs[15]; env->sr |= (1u << SR_BL) | (1u << SR_MD) | (1u << SR_RB); - if (env->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) { + if (env->flags & DELAY_SLOT_MASK) { /* Branch instruction should be executed again before delay slot. */ env->spc -= 2; /* Clear flags for exception/interrupt routine. */ - env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL); + env->flags &= ~DELAY_SLOT_MASK; } if (do_exp) { @@ -420,7 +428,7 @@ static int get_physical_address(CPUSH4State * env, target_ulong * physical, if (!(env->sr & (1u << SR_MD)) && (address < 0xe0000000 || address >= 0xe4000000)) { /* Unauthorized access in user mode (only store queues are available) */ - fprintf(stderr, "Unauthorized access\n"); + qemu_log_mask(LOG_GUEST_ERROR, "Unauthorized access\n"); if (rw == 0) return MMU_DADDR_ERROR_READ; else if (rw == 1) @@ -863,8 +871,16 @@ int cpu_sh4_is_cached(CPUSH4State * env, target_ulong addr) bool superh_cpu_exec_interrupt(CPUState *cs, int interrupt_request) { if (interrupt_request & CPU_INTERRUPT_HARD) { - superh_cpu_do_interrupt(cs); - return true; + SuperHCPU *cpu = SUPERH_CPU(cs); + CPUSH4State *env = &cpu->env; + + /* Delay slots are indivisible, ignore interrupts */ + if (env->flags & DELAY_SLOT_MASK) { + return false; + } else { + superh_cpu_do_interrupt(cs); + return true; + } } return false; } diff --git a/target/sh4/translate.c b/target/sh4/translate.c index 0bc2f9ff19..8bc132b27b 100644 --- a/target/sh4/translate.c +++ b/target/sh4/translate.c @@ -185,6 +185,9 @@ void superh_cpu_dump_state(CPUState *cs, FILE *f, } else if (env->flags & DELAY_SLOT_CONDITIONAL) { cpu_fprintf(f, "in conditional delay slot (delayed_pc=0x%08x)\n", env->delayed_pc); + } else if (env->flags & DELAY_SLOT_RTE) { + cpu_fprintf(f, "in rte delay slot (delayed_pc=0x%08x)\n", + env->delayed_pc); } } @@ -217,8 +220,7 @@ static inline void gen_save_cpu_state(DisasContext *ctx, bool save_pc) if (ctx->delayed_pc != (uint32_t) -1) { tcg_gen_movi_i32(cpu_delayed_pc, ctx->delayed_pc); } - if ((ctx->tbflags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) - != ctx->envflags) { + if ((ctx->tbflags & DELAY_SLOT_MASK) != ctx->envflags) { tcg_gen_movi_i32(cpu_flags, ctx->envflags); } } @@ -329,7 +331,7 @@ static inline void gen_store_fpr64 (TCGv_i64 t, int reg) #define DREG(x) FREG(x) /* Assumes lsb of (x) is always 0 */ #define CHECK_NOT_DELAY_SLOT \ - if (ctx->envflags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) { \ + if (ctx->envflags & DELAY_SLOT_MASK) { \ gen_save_cpu_state(ctx, true); \ gen_helper_raise_slot_illegal_instruction(cpu_env); \ ctx->bstate = BS_EXCP; \ @@ -339,7 +341,7 @@ static inline void gen_store_fpr64 (TCGv_i64 t, int reg) #define CHECK_PRIVILEGED \ if (IS_USER(ctx)) { \ gen_save_cpu_state(ctx, true); \ - if (ctx->envflags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) { \ + if (ctx->envflags & DELAY_SLOT_MASK) { \ gen_helper_raise_slot_illegal_instruction(cpu_env); \ } else { \ gen_helper_raise_illegal_instruction(cpu_env); \ @@ -351,7 +353,7 @@ static inline void gen_store_fpr64 (TCGv_i64 t, int reg) #define CHECK_FPU_ENABLED \ if (ctx->tbflags & (1u << SR_FD)) { \ gen_save_cpu_state(ctx, true); \ - if (ctx->envflags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) { \ + if (ctx->envflags & DELAY_SLOT_MASK) { \ gen_helper_raise_slot_fpu_disable(cpu_env); \ } else { \ gen_helper_raise_fpu_disable(cpu_env); \ @@ -428,8 +430,9 @@ static void _decode_opc(DisasContext * ctx) CHECK_NOT_DELAY_SLOT gen_write_sr(cpu_ssr); tcg_gen_mov_i32(cpu_delayed_pc, cpu_spc); - ctx->envflags |= DELAY_SLOT; + ctx->envflags |= DELAY_SLOT_RTE; ctx->delayed_pc = (uint32_t) - 1; + ctx->bstate = BS_STOP; return; case 0x0058: /* sets */ tcg_gen_ori_i32(cpu_sr, cpu_sr, (1u << SR_S)); @@ -1784,7 +1787,7 @@ static void _decode_opc(DisasContext * ctx) fflush(stderr); #endif gen_save_cpu_state(ctx, true); - if (ctx->envflags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) { + if (ctx->envflags & DELAY_SLOT_MASK) { gen_helper_raise_slot_illegal_instruction(cpu_env); } else { gen_helper_raise_illegal_instruction(cpu_env); @@ -1798,14 +1801,14 @@ static void decode_opc(DisasContext * ctx) _decode_opc(ctx); - if (old_flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) { + if (old_flags & DELAY_SLOT_MASK) { /* go out of the delay slot */ - ctx->envflags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL); + ctx->envflags &= ~DELAY_SLOT_MASK; tcg_gen_movi_i32(cpu_flags, ctx->envflags); ctx->bstate = BS_BRANCH; if (old_flags & DELAY_SLOT_CONDITIONAL) { gen_delayed_conditional_jump(ctx); - } else if (old_flags & DELAY_SLOT) { + } else { gen_jump(ctx); } @@ -1824,7 +1827,7 @@ void gen_intermediate_code(CPUSH4State * env, struct TranslationBlock *tb) pc_start = tb->pc; ctx.pc = pc_start; ctx.tbflags = (uint32_t)tb->flags; - ctx.envflags = tb->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL); + ctx.envflags = tb->flags & DELAY_SLOT_MASK; ctx.bstate = BS_NONE; ctx.memidx = (ctx.tbflags & (1u << SR_MD)) == 0 ? 1 : 0; /* We don't know if the delayed pc came from a dynamic or static branch, diff --git a/target/sparc/int32_helper.c b/target/sparc/int32_helper.c index 09afe136e5..eec9a4d49f 100644 --- a/target/sparc/int32_helper.c +++ b/target/sparc/int32_helper.c @@ -109,7 +109,7 @@ void sparc_cpu_do_interrupt(CPUState *cs) if (env->psret == 0) { if (cs->exception_index == 0x80 && env->def->features & CPU_FEATURE_TA0_SHUTDOWN) { - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); } else { cpu_abort(cs, "Trap 0x%02x while interrupts disabled, Error state", cs->exception_index); diff --git a/tests/acpi-test-data/pc/SRAT.memhp b/tests/acpi-test-data/pc/SRAT.memhp Binary files differindex a7dddf7760..e508b4ae3c 100644 --- a/tests/acpi-test-data/pc/SRAT.memhp +++ b/tests/acpi-test-data/pc/SRAT.memhp diff --git a/tests/acpi-test-data/q35/SRAT.memhp b/tests/acpi-test-data/q35/SRAT.memhp Binary files differindex a7dddf7760..e508b4ae3c 100644 --- a/tests/acpi-test-data/q35/SRAT.memhp +++ b/tests/acpi-test-data/q35/SRAT.memhp diff --git a/tests/numa-test.c b/tests/numa-test.c index c3475d6d5e..3f636840b1 100644 --- a/tests/numa-test.c +++ b/tests/numa-test.c @@ -92,7 +92,7 @@ static QList *get_cpus(QDict **resp) *resp = qmp("{ 'execute': 'query-cpus' }"); g_assert(*resp); g_assert(qdict_haskey(*resp, "return")); - return qdict_get_qlist(*resp, "return"); + return qdict_get_qlist(*resp, "return"); } static void test_query_cpus(const void *data) @@ -100,7 +100,7 @@ static void test_query_cpus(const void *data) char *cli; QDict *resp; QList *cpus; - const QObject *e; + QObject *e; cli = make_cli(data, "-smp 8 -numa node,cpus=0-3 -numa node,cpus=4-7"); qtest_start(cli); @@ -124,6 +124,7 @@ static void test_query_cpus(const void *data) } else { g_assert_cmpint(node, ==, 1); } + qobject_decref(e); } QDECREF(resp); @@ -136,7 +137,7 @@ static void pc_numa_cpu(const void *data) char *cli; QDict *resp; QList *cpus; - const QObject *e; + QObject *e; cli = make_cli(data, "-cpu pentium -smp 8,sockets=2,cores=2,threads=2 " "-numa node,nodeid=0 -numa node,nodeid=1 " @@ -176,6 +177,7 @@ static void pc_numa_cpu(const void *data) } else { g_assert(false); } + qobject_decref(e); } QDECREF(resp); @@ -188,7 +190,7 @@ static void spapr_numa_cpu(const void *data) char *cli; QDict *resp; QList *cpus; - const QObject *e; + QObject *e; cli = make_cli(data, "-smp 4,cores=4 " "-numa node,nodeid=0 -numa node,nodeid=1 " @@ -220,6 +222,7 @@ static void spapr_numa_cpu(const void *data) } else { g_assert(false); } + qobject_decref(e); } QDECREF(resp); @@ -232,7 +235,7 @@ static void aarch64_numa_cpu(const void *data) char *cli; QDict *resp; QList *cpus; - const QObject *e; + QObject *e; cli = make_cli(data, "-smp 2 " "-numa node,nodeid=0 -numa node,nodeid=1 " @@ -262,6 +265,7 @@ static void aarch64_numa_cpu(const void *data) } else { g_assert(false); } + qobject_decref(e); } QDECREF(resp); diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030 index e00c11b804..feee86115d 100755 --- a/tests/qemu-iotests/030 +++ b/tests/qemu-iotests/030 @@ -147,6 +147,10 @@ class TestSingleDrive(iotests.QMPTestCase): result = self.vm.qmp('block-stream', device='nonexistent') self.assert_qmp(result, 'error/class', 'GenericError') + def test_job_id_missing(self): + result = self.vm.qmp('block-stream', device='mid') + self.assert_qmp(result, 'error/class', 'GenericError') + class TestParallelOps(iotests.QMPTestCase): num_ops = 4 # Number of parallel block-stream operations diff --git a/tests/qemu-iotests/030.out b/tests/qemu-iotests/030.out index 84bfd63fba..391c8573ca 100644 --- a/tests/qemu-iotests/030.out +++ b/tests/qemu-iotests/030.out @@ -1,5 +1,5 @@ -...................... +....................... ---------------------------------------------------------------------- -Ran 22 tests +Ran 23 tests OK diff --git a/tests/qemu-iotests/060.out b/tests/qemu-iotests/060.out index 9e8f5b9d79..3bc14616be 100644 --- a/tests/qemu-iotests/060.out +++ b/tests/qemu-iotests/060.out @@ -143,7 +143,7 @@ read failed: Input/output error Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 wrote 65536/65536 bytes at offset 0 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -qcow2: Marking image as corrupt: Data cluster offset 0x52a00 unaligned (L2 offset: 0x40000, L2 index: 0); further corruption events will be suppressed +qcow2: Marking image as corrupt: Cluster allocation offset 0x52a00 unaligned (L2 offset: 0x40000, L2 index: 0); further corruption events will be suppressed qemu-img: Error while amending options: Input/output error === Testing unaligned reftable entry === diff --git a/tests/qemu-iotests/071.out b/tests/qemu-iotests/071.out index dd879f1212..1d5e28d730 100644 --- a/tests/qemu-iotests/071.out +++ b/tests/qemu-iotests/071.out @@ -46,7 +46,7 @@ QMP_VERSION read failed: Input/output error {"return": ""} {"return": {}} -{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN"} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}} === Testing blkverify on existing block device === @@ -85,7 +85,7 @@ wrote 512/512 bytes at offset 0 read failed: Input/output error {"return": ""} {"return": {}} -{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN"} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}} QEMU_PROG: Failed to flush the L2 table cache: Input/output error QEMU_PROG: Failed to flush the refcount block cache: Input/output error diff --git a/tests/qemu-iotests/081.out b/tests/qemu-iotests/081.out index 97df69d71c..2533c31c78 100644 --- a/tests/qemu-iotests/081.out +++ b/tests/qemu-iotests/081.out @@ -36,7 +36,7 @@ read 10485760/10485760 bytes at offset 0 10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) {"return": ""} {"return": {}} -{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN"} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}} == using quorum rewrite corrupted mode == diff --git a/tests/qemu-iotests/087.out b/tests/qemu-iotests/087.out index dc6baf9366..59c5208272 100644 --- a/tests/qemu-iotests/087.out +++ b/tests/qemu-iotests/087.out @@ -8,7 +8,7 @@ QMP_VERSION {"return": {}} {"error": {"class": "GenericError", "desc": "'node-name' must be specified for the root node"}} {"return": {}} -{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN"} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}} === Duplicate ID === @@ -19,7 +19,7 @@ QMP_VERSION {"error": {"class": "GenericError", "desc": "node-name=disk is conflicting with a device id"}} {"error": {"class": "GenericError", "desc": "Duplicate node name"}} {"return": {}} -{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN"} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}} === aio=native without O_DIRECT === @@ -29,7 +29,7 @@ QMP_VERSION {"return": {}} {"error": {"class": "GenericError", "desc": "aio=native was specified, but it requires cache.direct=on, which was not specified."}} {"return": {}} -{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN"} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}} === Encrypted image === @@ -40,14 +40,14 @@ QMP_VERSION {"return": {}} {"error": {"class": "GenericError", "desc": "Use of AES-CBC encrypted IMGFMT images is no longer supported in system emulators"}} {"return": {}} -{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN"} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}} Testing: QMP_VERSION {"return": {}} {"error": {"class": "GenericError", "desc": "Use of AES-CBC encrypted IMGFMT images is no longer supported in system emulators"}} {"return": {}} -{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN"} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}} === Missing driver === @@ -58,6 +58,6 @@ QMP_VERSION {"return": {}} {"error": {"class": "GenericError", "desc": "Parameter 'driver' is missing"}} {"return": {}} -{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN"} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}} *** done diff --git a/tests/qemu-iotests/094.out b/tests/qemu-iotests/094.out index b66dc0787d..f52baffe70 100644 --- a/tests/qemu-iotests/094.out +++ b/tests/qemu-iotests/094.out @@ -7,5 +7,5 @@ Formatting 'TEST_DIR/source.IMGFMT', fmt=IMGFMT size=67108864 {"return": {}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "src", "len": 67108864, "offset": 67108864, "speed": 0, "type": "mirror"}} {"return": {}} -{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN"} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}} *** done diff --git a/tests/qemu-iotests/117.out b/tests/qemu-iotests/117.out index f52dc1a357..851e214144 100644 --- a/tests/qemu-iotests/117.out +++ b/tests/qemu-iotests/117.out @@ -7,7 +7,7 @@ wrote 65536/65536 bytes at offset 0 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) {"return": ""} {"return": {}} -{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN"} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}} No errors were found on the image. read 65536/65536 bytes at offset 0 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) diff --git a/tests/qemu-iotests/119.out b/tests/qemu-iotests/119.out index 58e7114e8b..a8743b810e 100644 --- a/tests/qemu-iotests/119.out +++ b/tests/qemu-iotests/119.out @@ -6,6 +6,6 @@ read 65536/65536 bytes at offset 0 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) {"return": ""} {"return": {}} -{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN"} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}} *** done diff --git a/tests/qemu-iotests/120.out b/tests/qemu-iotests/120.out index 9131b1bce9..1af1aeb38d 100644 --- a/tests/qemu-iotests/120.out +++ b/tests/qemu-iotests/120.out @@ -6,7 +6,7 @@ wrote 65536/65536 bytes at offset 0 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) {"return": ""} {"return": {}} -{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN"} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}} read 65536/65536 bytes at offset 0 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) read 65536/65536 bytes at offset 0 diff --git a/tests/qemu-iotests/140.out b/tests/qemu-iotests/140.out index 6c0445603a..0689b2b41c 100644 --- a/tests/qemu-iotests/140.out +++ b/tests/qemu-iotests/140.out @@ -10,5 +10,5 @@ read 65536/65536 bytes at offset 0 {"return": {}} can't open device nbd+unix:///drv?socket=TEST_DIR/nbd: No export with name 'drv' available {"return": {}} -{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN"} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}} *** done diff --git a/tests/qemu-iotests/143.out b/tests/qemu-iotests/143.out index d24ad20db3..0978b8985a 100644 --- a/tests/qemu-iotests/143.out +++ b/tests/qemu-iotests/143.out @@ -3,5 +3,5 @@ QA output created by 143 {"return": {}} can't open device nbd+unix:///no_such_export?socket=TEST_DIR/nbd: No export with name 'no_such_export' available {"return": {}} -{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN"} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}} *** done diff --git a/tests/qemu-iotests/147 b/tests/qemu-iotests/147 index 32afea63d4..db34838cd0 100755 --- a/tests/qemu-iotests/147 +++ b/tests/qemu-iotests/147 @@ -147,6 +147,13 @@ class BuiltinNBD(NBDBlockdevAddBase): self._server_down() def test_inet6(self): + try: + socket.getaddrinfo("::0", "0", socket.AF_INET6, + socket.SOCK_STREAM, socket.IPPROTO_TCP, + socket.AI_ADDRCONFIG | socket.AI_CANONNAME) + except socket.gaierror: + # IPv6 not available, skip + return address = { 'type': 'inet', 'data': { 'host': '::1', diff --git a/tests/qemu-iotests/156.out b/tests/qemu-iotests/156.out index 3af82ae540..f96a564c1d 100644 --- a/tests/qemu-iotests/156.out +++ b/tests/qemu-iotests/156.out @@ -34,7 +34,7 @@ read 65536/65536 bytes at offset 196608 {"return": ""} {"return": {}} -{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN"} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}} read 65536/65536 bytes at offset 0 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) diff --git a/tests/test-blockjob-txn.c b/tests/test-blockjob-txn.c index 0f80194e85..c77343fc04 100644 --- a/tests/test-blockjob-txn.c +++ b/tests/test-blockjob-txn.c @@ -167,6 +167,11 @@ static void test_pair_jobs(int expected1, int expected2) block_job_start(job1); block_job_start(job2); + /* Release our reference now to trigger as many nice + * use-after-free bugs as possible. + */ + block_job_txn_unref(txn); + if (expected1 == -ECANCELED) { block_job_cancel(job1); } @@ -187,8 +192,6 @@ static void test_pair_jobs(int expected1, int expected2) g_assert_cmpint(result1, ==, expected1); g_assert_cmpint(result2, ==, expected2); - - block_job_txn_unref(txn); } static void test_pair_jobs_success(void) diff --git a/tests/test-blockjob.c b/tests/test-blockjob.c index 740e740398..23bdf1a932 100644 --- a/tests/test-blockjob.c +++ b/tests/test-blockjob.c @@ -116,11 +116,11 @@ static void test_job_ids(void) job[1] = do_test_id(blk[1], "id0", false); /* But once job[0] finishes we can reuse its ID */ - block_job_unref(job[0]); + block_job_early_fail(job[0]); job[1] = do_test_id(blk[1], "id0", true); /* No job ID specified, defaults to the backend name ('drive1') */ - block_job_unref(job[1]); + block_job_early_fail(job[1]); job[1] = do_test_id(blk[1], NULL, true); /* Duplicate job ID */ @@ -133,9 +133,9 @@ static void test_job_ids(void) /* This one is valid */ job[2] = do_test_id(blk[2], "id_2", true); - block_job_unref(job[0]); - block_job_unref(job[1]); - block_job_unref(job[2]); + block_job_early_fail(job[0]); + block_job_early_fail(job[1]); + block_job_early_fail(job[2]); destroy_blk(blk[0]); destroy_blk(blk[1]); diff --git a/trace-events b/trace-events index e582d6315d..433865fa97 100644 --- a/trace-events +++ b/trace-events @@ -38,7 +38,7 @@ vm_state_notify(int running, int reason) "running %d reason %d" load_file(const char *name, const char *path) "name %s location %s" runstate_set(int new_state) "new state %d" system_wakeup_request(int reason) "reason=%d" -qemu_system_shutdown_request(void) "" +qemu_system_shutdown_request(int reason) "reason=%d" qemu_system_powerdown_request(void) "" # spice-qemu-char.c diff --git a/ui/cocoa.m b/ui/cocoa.m index 3a9bc4da5f..004ec2711c 100644 --- a/ui/cocoa.m +++ b/ui/cocoa.m @@ -934,7 +934,7 @@ QemuCocoaView *cocoaView; { COCOA_DEBUG("QemuCocoaAppController: applicationWillTerminate\n"); - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_HOST_UI); exit(0); } @@ -837,7 +837,7 @@ static void sdl_refresh(DisplayChangeListener *dcl) case SDL_QUIT: if (!no_quit) { no_shutdown = 0; - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_HOST_UI); } break; case SDL_MOUSEMOTION: @@ -568,7 +568,7 @@ static void handle_windowevent(SDL_Event *ev) case SDL_WINDOWEVENT_CLOSE: if (!no_quit) { no_shutdown = 0; - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_HOST_UI); } break; case SDL_WINDOWEVENT_SHOWN: @@ -611,7 +611,7 @@ void sdl2_poll_events(struct sdl2_console *scon) case SDL_QUIT: if (!no_quit) { no_shutdown = 0; - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_HOST_UI); } break; case SDL_MOUSEMOTION: diff --git a/util/oslib-posix.c b/util/oslib-posix.c index 4d9189e9ef..7e28c161b2 100644 --- a/util/oslib-posix.c +++ b/util/oslib-posix.c @@ -207,53 +207,6 @@ int qemu_pipe(int pipefd[2]) return ret; } -int qemu_utimens(const char *path, const struct timespec *times) -{ - struct timeval tv[2], tv_now; - struct stat st; - int i; -#ifdef CONFIG_UTIMENSAT - int ret; - - ret = utimensat(AT_FDCWD, path, times, AT_SYMLINK_NOFOLLOW); - if (ret != -1 || errno != ENOSYS) { - return ret; - } -#endif - /* Fallback: use utimes() instead of utimensat() */ - - /* happy if special cases */ - if (times[0].tv_nsec == UTIME_OMIT && times[1].tv_nsec == UTIME_OMIT) { - return 0; - } - if (times[0].tv_nsec == UTIME_NOW && times[1].tv_nsec == UTIME_NOW) { - return utimes(path, NULL); - } - - /* prepare for hard cases */ - if (times[0].tv_nsec == UTIME_NOW || times[1].tv_nsec == UTIME_NOW) { - gettimeofday(&tv_now, NULL); - } - if (times[0].tv_nsec == UTIME_OMIT || times[1].tv_nsec == UTIME_OMIT) { - stat(path, &st); - } - - for (i = 0; i < 2; i++) { - if (times[i].tv_nsec == UTIME_NOW) { - tv[i].tv_sec = tv_now.tv_sec; - tv[i].tv_usec = tv_now.tv_usec; - } else if (times[i].tv_nsec == UTIME_OMIT) { - tv[i].tv_sec = (i == 0) ? st.st_atime : st.st_mtime; - tv[i].tv_usec = 0; - } else { - tv[i].tv_sec = times[i].tv_sec; - tv[i].tv_usec = times[i].tv_nsec / 1000; - } - } - - return utimes(path, &tv[0]); -} - char * qemu_get_local_state_pathname(const char *relative_pathname) { diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c index d8183f79d7..b39ae74fe0 100644 --- a/util/qemu-sockets.c +++ b/util/qemu-sockets.c @@ -1338,12 +1338,14 @@ char *socket_address_to_string(struct SocketAddress *addr, Error **errp) SocketAddress *socket_address_flatten(SocketAddressLegacy *addr_legacy) { - SocketAddress *addr = g_new(SocketAddress, 1); + SocketAddress *addr; if (!addr_legacy) { return NULL; } + addr = g_new(SocketAddress, 1); + switch (addr_legacy->type) { case SOCKET_ADDRESS_LEGACY_KIND_INET: addr->type = SOCKET_ADDRESS_TYPE_INET; @@ -1436,6 +1436,9 @@ static int usb_parse(const char *cmdline) void hmp_usb_add(Monitor *mon, const QDict *qdict) { const char *devname = qdict_get_str(qdict, "devname"); + + error_report("usb_add is deprecated, please use device_add instead"); + if (usb_device_add(devname) < 0) { error_report("could not add USB device '%s'", devname); } @@ -1444,6 +1447,9 @@ void hmp_usb_add(Monitor *mon, const QDict *qdict) void hmp_usb_del(Monitor *mon, const QDict *qdict) { const char *devname = qdict_get_str(qdict, "devname"); + + error_report("usb_del is deprecated, please use device_del instead"); + if (usb_device_del(devname) < 0) { error_report("could not delete USB device '%s'", devname); } @@ -1598,8 +1604,9 @@ void vm_state_notify(int running, RunState state) } } -static int reset_requested; -static int shutdown_requested, shutdown_signal = -1; +static ShutdownCause reset_requested; +static ShutdownCause shutdown_requested; +static int shutdown_signal; static pid_t shutdown_pid; static int powerdown_requested; static int debug_requested; @@ -1613,24 +1620,24 @@ static NotifierList wakeup_notifiers = NOTIFIER_LIST_INITIALIZER(wakeup_notifiers); static uint32_t wakeup_reason_mask = ~(1 << QEMU_WAKEUP_REASON_NONE); -int qemu_shutdown_requested_get(void) +ShutdownCause qemu_shutdown_requested_get(void) { return shutdown_requested; } -int qemu_reset_requested_get(void) +ShutdownCause qemu_reset_requested_get(void) { return reset_requested; } static int qemu_shutdown_requested(void) { - return atomic_xchg(&shutdown_requested, 0); + return atomic_xchg(&shutdown_requested, SHUTDOWN_CAUSE_NONE); } static void qemu_kill_report(void) { - if (!qtest_driver() && shutdown_signal != -1) { + if (!qtest_driver() && shutdown_signal) { if (shutdown_pid == 0) { /* This happens for eg ^C at the terminal, so it's worth * avoiding printing an odd message in that case. @@ -1644,18 +1651,19 @@ static void qemu_kill_report(void) shutdown_cmd ? shutdown_cmd : "<unknown process>"); g_free(shutdown_cmd); } - shutdown_signal = -1; + shutdown_signal = 0; } } -static int qemu_reset_requested(void) +static ShutdownCause qemu_reset_requested(void) { - int r = reset_requested; + ShutdownCause r = reset_requested; + if (r && replay_checkpoint(CHECKPOINT_RESET_REQUESTED)) { - reset_requested = 0; + reset_requested = SHUTDOWN_CAUSE_NONE; return r; } - return false; + return SHUTDOWN_CAUSE_NONE; } static int qemu_suspend_requested(void) @@ -1687,7 +1695,10 @@ static int qemu_debug_requested(void) return r; } -void qemu_system_reset(bool report) +/* + * Reset the VM. Issue an event unless @reason is SHUTDOWN_CAUSE_NONE. + */ +void qemu_system_reset(ShutdownCause reason) { MachineClass *mc; @@ -1700,8 +1711,9 @@ void qemu_system_reset(bool report) } else { qemu_devices_reset(); } - if (report) { - qapi_event_send_reset(&error_abort); + if (reason) { + qapi_event_send_reset(shutdown_caused_by_guest(reason), + &error_abort); } cpu_synchronize_all_post_reset(); } @@ -1719,7 +1731,7 @@ void qemu_system_guest_panicked(GuestPanicInformation *info) if (!no_shutdown) { qapi_event_send_guest_panicked(GUEST_PANIC_ACTION_POWEROFF, !!info, info, &error_abort); - qemu_system_shutdown_request(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_PANIC); } if (info) { @@ -1736,12 +1748,12 @@ void qemu_system_guest_panicked(GuestPanicInformation *info) } } -void qemu_system_reset_request(void) +void qemu_system_reset_request(ShutdownCause reason) { if (no_reboot) { - shutdown_requested = 1; + shutdown_requested = reason; } else { - reset_requested = 1; + reset_requested = reason; } cpu_stop_current(); qemu_notify_event(); @@ -1808,15 +1820,15 @@ void qemu_system_killed(int signal, pid_t pid) /* Cannot call qemu_system_shutdown_request directly because * we are in a signal handler. */ - shutdown_requested = 1; + shutdown_requested = SHUTDOWN_CAUSE_HOST_SIGNAL; qemu_notify_event(); } -void qemu_system_shutdown_request(void) +void qemu_system_shutdown_request(ShutdownCause reason) { - trace_qemu_system_shutdown_request(); - replay_shutdown_request(); - shutdown_requested = 1; + trace_qemu_system_shutdown_request(reason); + replay_shutdown_request(reason); + shutdown_requested = reason; qemu_notify_event(); } @@ -1847,24 +1859,29 @@ void qemu_system_debug_request(void) static bool main_loop_should_exit(void) { RunState r; + ShutdownCause request; + if (qemu_debug_requested()) { vm_stop(RUN_STATE_DEBUG); } if (qemu_suspend_requested()) { qemu_system_suspend(); } - if (qemu_shutdown_requested()) { + request = qemu_shutdown_requested(); + if (request) { qemu_kill_report(); - qapi_event_send_shutdown(&error_abort); + qapi_event_send_shutdown(shutdown_caused_by_guest(request), + &error_abort); if (no_shutdown) { vm_stop(RUN_STATE_SHUTDOWN); } else { return true; } } - if (qemu_reset_requested()) { + request = qemu_reset_requested(); + if (request) { pause_all_vcpus(); - qemu_system_reset(VMRESET_REPORT); + qemu_system_reset(request); resume_all_vcpus(); if (!runstate_check(RUN_STATE_RUNNING) && !runstate_check(RUN_STATE_INMIGRATE)) { @@ -1873,7 +1890,7 @@ static bool main_loop_should_exit(void) } if (qemu_wakeup_requested()) { pause_all_vcpus(); - qemu_system_reset(VMRESET_SILENT); + qemu_system_reset(SHUTDOWN_CAUSE_NONE); notifier_list_notify(&wakeup_notifiers, &wakeup_reason); wakeup_reason = QEMU_WAKEUP_REASON_NONE; resume_all_vcpus(); @@ -3759,6 +3776,8 @@ int main(int argc, char **argv, char **envp) qemu_opts_parse_noisily(olist, "usb=on", false); break; case QEMU_OPTION_usbdevice: + error_report("'-usbdevice' is deprecated, please use " + "'-device usb-...' instead"); olist = qemu_find_opts("machine"); qemu_opts_parse_noisily(olist, "usb=on", false); add_device_config(DEV_USB, optarg); @@ -4697,7 +4716,7 @@ int main(int argc, char **argv, char **envp) reading from the other reads, because timer polling functions query clock values from the log. */ replay_checkpoint(CHECKPOINT_RESET); - qemu_system_reset(VMRESET_SILENT); + qemu_system_reset(SHUTDOWN_CAUSE_NONE); register_global_state(); if (replay_mode != REPLAY_MODE_NONE) { replay_vmstate_init(); |