diff options
135 files changed, 4436 insertions, 1106 deletions
diff --git a/.gitignore b/.gitignore index e32a58417a..090f974cb9 100644 --- a/.gitignore +++ b/.gitignore @@ -109,3 +109,4 @@ cscope.* tags TAGS *~ +/tests/qemu-iotests/common.env diff --git a/MAINTAINERS b/MAINTAINERS index 7fc3cdb1d6..430688dcab 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -538,6 +538,7 @@ S390 Virtio M: Alexander Graf <agraf@suse.de> S: Maintained F: hw/s390x/s390-*.c +X: hw/s390x/*pci*.[hc] S390 Virtio-ccw M: Cornelia Huck <cornelia.huck@de.ibm.com> @@ -548,6 +549,7 @@ F: hw/s390x/s390-virtio-ccw.c F: hw/s390x/css.[hc] F: hw/s390x/sclp*.[hc] F: hw/s390x/ipl*.[hc] +F: hw/s390x/*pci*.[hc] F: include/hw/s390x/ F: pc-bios/s390-ccw/ T: git git://github.com/cohuck/qemu virtio-ccw-upstr @@ -755,6 +757,7 @@ F: aio-*.c F: block* F: block/ F: hw/block/ +F: migration/block* F: qemu-img* F: qemu-io* F: tests/image-fuzzer/ @@ -1103,7 +1106,6 @@ S: Supported F: block/ssh.c ARCHIPELAGO -M: Chrysostomos Nanakos <cnanakos@grnet.gr> M: Chrysostomos Nanakos <chris@include.gr> S: Maintained F: block/archipelago.c diff --git a/arch_init.c b/arch_init.c index 7680d28be4..cfedbf08af 100644 --- a/arch_init.c +++ b/arch_init.c @@ -522,7 +522,7 @@ static void migration_bitmap_sync(void) address_space_sync_dirty_bitmap(&address_space_memory); QTAILQ_FOREACH(block, &ram_list.blocks, next) { - migration_bitmap_sync_range(block->mr->ram_addr, block->length); + migration_bitmap_sync_range(block->mr->ram_addr, block->used_length); } trace_migration_bitmap_sync_end(migration_dirty_pages - num_dirty_pages_init); @@ -668,7 +668,7 @@ static int ram_find_and_save_block(QEMUFile *f, bool last_stage) offset >= last_offset) { break; } - if (offset >= block->length) { + if (offset >= block->used_length) { offset = 0; block = QTAILQ_NEXT(block, next); if (!block) { @@ -727,7 +727,7 @@ uint64_t ram_bytes_total(void) uint64_t total = 0; QTAILQ_FOREACH(block, &ram_list.blocks, next) - total += block->length; + total += block->used_length; return total; } @@ -831,7 +831,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque) QTAILQ_FOREACH(block, &ram_list.blocks, next) { uint64_t block_pages; - block_pages = block->length >> TARGET_PAGE_BITS; + block_pages = block->used_length >> TARGET_PAGE_BITS; migration_dirty_pages += block_pages; } @@ -844,7 +844,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque) QTAILQ_FOREACH(block, &ram_list.blocks, next) { qemu_put_byte(f, strlen(block->idstr)); qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr)); - qemu_put_be64(f, block->length); + qemu_put_be64(f, block->used_length); } qemu_mutex_unlock_ramlist(); @@ -1015,7 +1015,7 @@ static inline void *host_from_stream_offset(QEMUFile *f, uint8_t len; if (flags & RAM_SAVE_FLAG_CONTINUE) { - if (!block || block->length <= offset) { + if (!block || block->max_length <= offset) { error_report("Ack, bad migration stream!"); return NULL; } @@ -1028,7 +1028,8 @@ static inline void *host_from_stream_offset(QEMUFile *f, id[len] = 0; QTAILQ_FOREACH(block, &ram_list.blocks, next) { - if (!strncmp(id, block->idstr, sizeof(id)) && block->length > offset) { + if (!strncmp(id, block->idstr, sizeof(id)) && + block->max_length > offset) { return memory_region_get_ram_ptr(block->mr) + offset; } } @@ -1085,11 +1086,14 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) QTAILQ_FOREACH(block, &ram_list.blocks, next) { if (!strncmp(id, block->idstr, sizeof(id))) { - if (block->length != length) { - error_report("Length mismatch: %s: 0x" RAM_ADDR_FMT - " in != 0x" RAM_ADDR_FMT, id, length, - block->length); - ret = -EINVAL; + if (length != block->used_length) { + Error *local_err = NULL; + + ret = qemu_ram_resize(block->offset, length, &local_err); + if (local_err) { + error_report("%s", error_get_pretty(local_err)); + error_free(local_err); + } } break; } @@ -44,10 +44,12 @@ struct QEMUBH { QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque) { QEMUBH *bh; - bh = g_new0(QEMUBH, 1); - bh->ctx = ctx; - bh->cb = cb; - bh->opaque = opaque; + bh = g_new(QEMUBH, 1); + *bh = (QEMUBH){ + .ctx = ctx, + .cb = cb, + .opaque = opaque, + }; qemu_mutex_lock(&ctx->bh_lock); bh->next = ctx->first_bh; /* Make sure that the members are ready before putting bh into list */ @@ -300,6 +302,7 @@ AioContext *aio_context_new(Error **errp) error_setg_errno(errp, -ret, "Failed to initialize event notifier"); return NULL; } + g_source_set_can_recurse(&ctx->source, true); aio_set_event_notifier(ctx, &ctx->notifier, (EventNotifierHandler *) event_notifier_test_and_clear); @@ -97,6 +97,10 @@ static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states = static QLIST_HEAD(, BlockDriver) bdrv_drivers = QLIST_HEAD_INITIALIZER(bdrv_drivers); +static void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, + int nr_sectors); +static void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, + int nr_sectors); /* If non-zero, use only whitelisted block drivers */ static int use_bdrv_whitelist; @@ -303,15 +307,32 @@ void path_combine(char *dest, int dest_size, } } -void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz) +void bdrv_get_full_backing_filename_from_filename(const char *backed, + const char *backing, + char *dest, size_t sz, + Error **errp) { - if (bs->backing_file[0] == '\0' || path_has_protocol(bs->backing_file)) { - pstrcpy(dest, sz, bs->backing_file); + if (backing[0] == '\0' || path_has_protocol(backing) || + path_is_absolute(backing)) + { + pstrcpy(dest, sz, backing); + } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) { + error_setg(errp, "Cannot use relative backing file names for '%s'", + backed); } else { - path_combine(dest, sz, bs->filename, bs->backing_file); + path_combine(dest, sz, backed, backing); } } +void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz, + Error **errp) +{ + char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename; + + bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file, + dest, sz, errp); +} + void bdrv_register(BlockDriver *bdrv) { /* Block drivers without coroutine functions need emulation */ @@ -1179,7 +1200,7 @@ void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd) bdrv_op_block_all(bs->backing_hd, bs->backing_blocker); /* Otherwise we won't be able to commit due to check in bdrv_commit */ - bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT, + bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, bs->backing_blocker); out: bdrv_refresh_limits(bs, NULL); @@ -1217,7 +1238,14 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp) QDECREF(options); goto free_exit; } else { - bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX); + bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX, + &local_err); + if (local_err) { + ret = -EINVAL; + error_propagate(errp, local_err); + QDECREF(options); + goto free_exit; + } } if (!bs->drv || !bs->drv->supports_backing) { @@ -2188,8 +2216,8 @@ int bdrv_commit(BlockDriverState *bs) return -ENOTSUP; } - if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT, NULL) || - bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT, NULL)) { + if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) || + bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) { return -EBUSY; } @@ -3034,18 +3062,16 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs, max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num), align >> BDRV_SECTOR_BITS); - if (max_nb_sectors > 0) { + if (nb_sectors < max_nb_sectors) { + ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov); + } else if (max_nb_sectors > 0) { QEMUIOVector local_qiov; - size_t local_sectors; - - max_nb_sectors = MIN(max_nb_sectors, SIZE_MAX / BDRV_SECTOR_BITS); - local_sectors = MIN(max_nb_sectors, nb_sectors); qemu_iovec_init(&local_qiov, qiov->niov); qemu_iovec_concat(&local_qiov, qiov, 0, - local_sectors * BDRV_SECTOR_SIZE); + max_nb_sectors * BDRV_SECTOR_SIZE); - ret = drv->bdrv_co_readv(bs, sector_num, local_sectors, + ret = drv->bdrv_co_readv(bs, sector_num, max_nb_sectors, &local_qiov); qemu_iovec_destroy(&local_qiov); @@ -3218,6 +3244,9 @@ static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs, if (ret == -ENOTSUP) { /* Fall back to bounce buffer if write zeroes is unsupported */ + int max_xfer_len = MIN_NON_ZERO(bs->bl.max_transfer_length, + MAX_WRITE_ZEROES_DEFAULT); + num = MIN(num, max_xfer_len); iov.iov_len = num * BDRV_SECTOR_SIZE; if (iov.iov_base == NULL) { iov.iov_base = qemu_try_blockalign(bs, num * BDRV_SECTOR_SIZE); @@ -3234,7 +3263,7 @@ static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs, /* Keep bounce buffer around if it is big enough for all * all future requests. */ - if (num < max_write_zeroes) { + if (num < max_xfer_len) { qemu_vfree(iov.iov_base); iov.iov_base = NULL; } @@ -5389,8 +5418,20 @@ void bdrv_dirty_iter_init(BlockDriverState *bs, hbitmap_iter_init(hbi, bitmap->bitmap, 0); } -void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, - int nr_sectors) +void bdrv_set_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, + int64_t cur_sector, int nr_sectors) +{ + hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors); +} + +void bdrv_reset_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, + int64_t cur_sector, int nr_sectors) +{ + hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors); +} + +static void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, + int nr_sectors) { BdrvDirtyBitmap *bitmap; QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) { @@ -5398,7 +5439,8 @@ void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, } } -void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors) +static void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, + int nr_sectors) { BdrvDirtyBitmap *bitmap; QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) { @@ -5637,16 +5679,26 @@ void bdrv_img_create(const char *filename, const char *fmt, if (size == -1) { if (backing_file) { BlockDriverState *bs; + char *full_backing = g_new0(char, PATH_MAX); int64_t size; int back_flags; + bdrv_get_full_backing_filename_from_filename(filename, backing_file, + full_backing, PATH_MAX, + &local_err); + if (local_err) { + g_free(full_backing); + goto out; + } + /* backing files always opened read-only */ back_flags = flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING); bs = NULL; - ret = bdrv_open(&bs, backing_file, NULL, NULL, back_flags, + ret = bdrv_open(&bs, full_backing, NULL, NULL, back_flags, backing_drv, &local_err); + g_free(full_backing); if (ret < 0) { goto out; } diff --git a/block/backup.c b/block/backup.c index 792e65514b..1c535b1ab9 100644 --- a/block/backup.c +++ b/block/backup.c @@ -360,6 +360,7 @@ static void coroutine_fn backup_run(void *opaque) hbitmap_free(job->bitmap); bdrv_iostatus_disable(target); + bdrv_op_unblock_all(target, job->common.blocker); data = g_malloc(sizeof(*data)); data->ret = ret; @@ -379,6 +380,11 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target, assert(target); assert(cb); + if (bs == target) { + error_setg(errp, "Source and target cannot be the same"); + return; + } + if ((on_source_error == BLOCKDEV_ON_ERROR_STOP || on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) && !bdrv_iostatus_is_enabled(bs)) { @@ -386,6 +392,26 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target, return; } + if (!bdrv_is_inserted(bs)) { + error_setg(errp, "Device is not inserted: %s", + bdrv_get_device_name(bs)); + return; + } + + if (!bdrv_is_inserted(target)) { + error_setg(errp, "Device is not inserted: %s", + bdrv_get_device_name(target)); + return; + } + + if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) { + return; + } + + if (bdrv_op_is_blocked(target, BLOCK_OP_TYPE_BACKUP_TARGET, errp)) { + return; + } + len = bdrv_getlength(bs); if (len < 0) { error_setg_errno(errp, -len, "unable to get length for '%s'", @@ -399,6 +425,8 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target, return; } + bdrv_op_block_all(target, job->common.blocker); + job->on_source_error = on_source_error; job->on_target_error = on_target_error; job->target = target; diff --git a/block/block-backend.c b/block/block-backend.c index ef16d7356a..d00c129f15 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -260,9 +260,6 @@ int blk_attach_dev(BlockBackend *blk, void *dev) blk_ref(blk); blk->dev = dev; bdrv_iostatus_reset(blk->bs); - - /* We're expecting I/O from the device so bump up coroutine pool size */ - qemu_coroutine_adjust_pool_size(COROUTINE_POOL_RESERVATION); return 0; } @@ -290,7 +287,6 @@ void blk_detach_dev(BlockBackend *blk, void *dev) blk->dev_ops = NULL; blk->dev_opaque = NULL; bdrv_set_guest_block_size(blk->bs, 512); - qemu_coroutine_adjust_pool_size(-COROUTINE_POOL_RESERVATION); blk_unref(blk); } diff --git a/block/mirror.c b/block/mirror.c index 2c6dd2a4c1..9019d1ba56 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -128,7 +128,8 @@ static void mirror_write_complete(void *opaque, int ret) BlockDriverState *source = s->common.bs; BlockErrorAction action; - bdrv_set_dirty(source, op->sector_num, op->nb_sectors); + bdrv_set_dirty_bitmap(source, s->dirty_bitmap, op->sector_num, + op->nb_sectors); action = mirror_error_action(s, false, -ret); if (action == BLOCK_ERROR_ACTION_REPORT && s->ret >= 0) { s->ret = ret; @@ -145,7 +146,8 @@ static void mirror_read_complete(void *opaque, int ret) BlockDriverState *source = s->common.bs; BlockErrorAction action; - bdrv_set_dirty(source, op->sector_num, op->nb_sectors); + bdrv_set_dirty_bitmap(source, s->dirty_bitmap, op->sector_num, + op->nb_sectors); action = mirror_error_action(s, true, -ret); if (action == BLOCK_ERROR_ACTION_REPORT && s->ret >= 0) { s->ret = ret; @@ -286,7 +288,8 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) next_sector += sectors_per_chunk; } - bdrv_reset_dirty(source, sector_num, nb_sectors); + bdrv_reset_dirty_bitmap(source, s->dirty_bitmap, sector_num, + nb_sectors); /* Copy the dirty cluster. */ s->in_flight++; @@ -442,7 +445,7 @@ static void coroutine_fn mirror_run(void *opaque) assert(n > 0); if (ret == 1) { - bdrv_set_dirty(bs, sector_num, n); + bdrv_set_dirty_bitmap(bs, s->dirty_bitmap, sector_num, n); sector_num = next; } else { sector_num += n; diff --git a/block/qapi.c b/block/qapi.c index fa68ba731f..a6fd6f7ab2 100644 --- a/block/qapi.c +++ b/block/qapi.c @@ -214,7 +214,12 @@ void bdrv_query_image_info(BlockDriverState *bs, info->backing_filename = g_strdup(backing_filename); info->has_backing_filename = true; bdrv_get_full_backing_filename(bs, backing_filename2, - sizeof(backing_filename2)); + sizeof(backing_filename2), &err); + if (err) { + error_propagate(errp, err); + qapi_free_ImageInfo(info); + return; + } if (strcmp(backing_filename, backing_filename2) != 0) { info->full_backing_filename = diff --git a/block/vmdk.c b/block/vmdk.c index bfff900ba6..52cb8888e5 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -1891,8 +1891,19 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp) } if (backing_file) { BlockDriverState *bs = NULL; - ret = bdrv_open(&bs, backing_file, NULL, NULL, BDRV_O_NO_BACKING, NULL, + char *full_backing = g_new0(char, PATH_MAX); + bdrv_get_full_backing_filename_from_filename(filename, backing_file, + full_backing, PATH_MAX, + &local_err); + if (local_err) { + g_free(full_backing); + error_propagate(errp, local_err); + ret = -ENOENT; + goto exit; + } + ret = bdrv_open(&bs, full_backing, NULL, NULL, BDRV_O_NO_BACKING, NULL, errp); + g_free(full_backing); if (ret != 0) { goto exit; } diff --git a/blockdev.c b/blockdev.c index 5651a8e140..d59efd3f15 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1559,6 +1559,79 @@ static void drive_backup_clean(BlkTransactionState *common) } } +typedef struct BlockdevBackupState { + BlkTransactionState common; + BlockDriverState *bs; + BlockJob *job; + AioContext *aio_context; +} BlockdevBackupState; + +static void blockdev_backup_prepare(BlkTransactionState *common, Error **errp) +{ + BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common); + BlockdevBackup *backup; + BlockDriverState *bs, *target; + Error *local_err = NULL; + + assert(common->action->kind == TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP); + backup = common->action->blockdev_backup; + + bs = bdrv_find(backup->device); + if (!bs) { + error_set(errp, QERR_DEVICE_NOT_FOUND, backup->device); + return; + } + + target = bdrv_find(backup->target); + if (!target) { + error_set(errp, QERR_DEVICE_NOT_FOUND, backup->target); + return; + } + + /* AioContext is released in .clean() */ + state->aio_context = bdrv_get_aio_context(bs); + if (state->aio_context != bdrv_get_aio_context(target)) { + state->aio_context = NULL; + error_setg(errp, "Backup between two IO threads is not implemented"); + return; + } + aio_context_acquire(state->aio_context); + + qmp_blockdev_backup(backup->device, backup->target, + backup->sync, + backup->has_speed, backup->speed, + backup->has_on_source_error, backup->on_source_error, + backup->has_on_target_error, backup->on_target_error, + &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + state->bs = bs; + state->job = state->bs->job; +} + +static void blockdev_backup_abort(BlkTransactionState *common) +{ + BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common); + BlockDriverState *bs = state->bs; + + /* Only cancel if it's the job we started */ + if (bs && bs->job && bs->job == state->job) { + block_job_cancel_sync(bs->job); + } +} + +static void blockdev_backup_clean(BlkTransactionState *common) +{ + BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common); + + if (state->aio_context) { + aio_context_release(state->aio_context); + } +} + static void abort_prepare(BlkTransactionState *common, Error **errp) { error_setg(errp, "Transaction aborted using Abort action"); @@ -1582,6 +1655,12 @@ static const BdrvActionOps actions[] = { .abort = drive_backup_abort, .clean = drive_backup_clean, }, + [TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP] = { + .instance_size = sizeof(BlockdevBackupState), + .prepare = blockdev_backup_prepare, + .abort = blockdev_backup_abort, + .clean = blockdev_backup_clean, + }, [TRANSACTION_ACTION_KIND_ABORT] = { .instance_size = sizeof(BlkTransactionState), .prepare = abort_prepare, @@ -2139,7 +2218,7 @@ void qmp_block_commit(const char *device, /* drain all i/o before commits */ bdrv_drain_all(); - if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT, errp)) { + if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, errp)) { goto out; } @@ -2172,6 +2251,10 @@ void qmp_block_commit(const char *device, assert(bdrv_get_aio_context(base_bs) == aio_context); + if (bdrv_op_is_blocked(base_bs, BLOCK_OP_TYPE_COMMIT_TARGET, errp)) { + goto out; + } + /* Do not allow attempts to commit an image into itself */ if (top_bs == base_bs) { error_setg(errp, "cannot commit an image into itself"); @@ -2240,6 +2323,8 @@ void qmp_drive_backup(const char *device, const char *target, aio_context = bdrv_get_aio_context(bs); aio_context_acquire(aio_context); + /* Although backup_run has this check too, we need to use bs->drv below, so + * do an early check redundantly. */ if (!bdrv_is_inserted(bs)) { error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, device); goto out; @@ -2256,6 +2341,7 @@ void qmp_drive_backup(const char *device, const char *target, } } + /* Early check to avoid creating target */ if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) { goto out; } @@ -2323,6 +2409,57 @@ BlockDeviceInfoList *qmp_query_named_block_nodes(Error **errp) return bdrv_named_nodes_list(); } +void qmp_blockdev_backup(const char *device, const char *target, + enum MirrorSyncMode sync, + bool has_speed, int64_t speed, + bool has_on_source_error, + BlockdevOnError on_source_error, + bool has_on_target_error, + BlockdevOnError on_target_error, + Error **errp) +{ + BlockDriverState *bs; + BlockDriverState *target_bs; + Error *local_err = NULL; + AioContext *aio_context; + + if (!has_speed) { + speed = 0; + } + if (!has_on_source_error) { + on_source_error = BLOCKDEV_ON_ERROR_REPORT; + } + if (!has_on_target_error) { + on_target_error = BLOCKDEV_ON_ERROR_REPORT; + } + + bs = bdrv_find(device); + if (!bs) { + error_set(errp, QERR_DEVICE_NOT_FOUND, device); + return; + } + + aio_context = bdrv_get_aio_context(bs); + aio_context_acquire(aio_context); + + target_bs = bdrv_find(target); + if (!target_bs) { + error_set(errp, QERR_DEVICE_NOT_FOUND, target); + goto out; + } + + bdrv_ref(target_bs); + bdrv_set_aio_context(target_bs, aio_context); + backup_start(bs, target_bs, speed, sync, on_source_error, on_target_error, + block_job_cb, bs, &local_err); + if (local_err != NULL) { + bdrv_unref(target_bs); + error_propagate(errp, local_err); + } +out: + aio_context_release(aio_context); +} + #define DEFAULT_MIRROR_BUF_SIZE (10 << 20) void qmp_drive_mirror(const char *device, const char *target, @@ -1830,7 +1830,7 @@ if test "$seccomp" != "no" ; then seccomp="yes" else if test "$seccomp" = "yes"; then - feature_not_found "libseccomp" "Install libseccomp devel >= 2.1.0" + feature_not_found "libseccomp" "Install libseccomp devel >= 2.1.1" fi seccomp="no" fi diff --git a/coroutine-ucontext.c b/coroutine-ucontext.c index 4bf2cde279..259fcb48a4 100644 --- a/coroutine-ucontext.c +++ b/coroutine-ucontext.c @@ -25,7 +25,6 @@ #include <stdlib.h> #include <setjmp.h> #include <stdint.h> -#include <pthread.h> #include <ucontext.h> #include "qemu-common.h" #include "block/coroutine_int.h" @@ -48,15 +47,8 @@ typedef struct { /** * Per-thread coroutine bookkeeping */ -typedef struct { - /** Currently executing coroutine */ - Coroutine *current; - - /** The default coroutine */ - CoroutineUContext leader; -} CoroutineThreadState; - -static pthread_key_t thread_state_key; +static __thread CoroutineUContext leader; +static __thread Coroutine *current; /* * va_args to makecontext() must be type 'int', so passing @@ -68,36 +60,6 @@ union cc_arg { int i[2]; }; -static CoroutineThreadState *coroutine_get_thread_state(void) -{ - CoroutineThreadState *s = pthread_getspecific(thread_state_key); - - if (!s) { - s = g_malloc0(sizeof(*s)); - s->current = &s->leader.base; - pthread_setspecific(thread_state_key, s); - } - return s; -} - -static void qemu_coroutine_thread_cleanup(void *opaque) -{ - CoroutineThreadState *s = opaque; - - g_free(s); -} - -static void __attribute__((constructor)) coroutine_init(void) -{ - int ret; - - ret = pthread_key_create(&thread_state_key, qemu_coroutine_thread_cleanup); - if (ret != 0) { - fprintf(stderr, "unable to create leader key: %s\n", strerror(errno)); - abort(); - } -} - static void coroutine_trampoline(int i0, int i1) { union cc_arg arg; @@ -193,15 +155,23 @@ void qemu_coroutine_delete(Coroutine *co_) g_free(co); } -CoroutineAction qemu_coroutine_switch(Coroutine *from_, Coroutine *to_, - CoroutineAction action) +/* This function is marked noinline to prevent GCC from inlining it + * into coroutine_trampoline(). If we allow it to do that then it + * hoists the code to get the address of the TLS variable "current" + * out of the while() loop. This is an invalid transformation because + * the sigsetjmp() call may be called when running thread A but + * return in thread B, and so we might be in a different thread + * context each time round the loop. + */ +CoroutineAction __attribute__((noinline)) +qemu_coroutine_switch(Coroutine *from_, Coroutine *to_, + CoroutineAction action) { CoroutineUContext *from = DO_UPCAST(CoroutineUContext, base, from_); CoroutineUContext *to = DO_UPCAST(CoroutineUContext, base, to_); - CoroutineThreadState *s = coroutine_get_thread_state(); int ret; - s->current = to_; + current = to_; ret = sigsetjmp(from->env, 0); if (ret == 0) { @@ -212,14 +182,13 @@ CoroutineAction qemu_coroutine_switch(Coroutine *from_, Coroutine *to_, Coroutine *qemu_coroutine_self(void) { - CoroutineThreadState *s = coroutine_get_thread_state(); - - return s->current; + if (!current) { + current = &leader.base; + } + return current; } bool qemu_in_coroutine(void) { - CoroutineThreadState *s = pthread_getspecific(thread_state_key); - - return s && s->current->caller; + return current && current->caller; } diff --git a/default-configs/s390x-softmmu.mak b/default-configs/s390x-softmmu.mak index 126d88dc15..6ee2ff89ff 100644 --- a/default-configs/s390x-softmmu.mak +++ b/default-configs/s390x-softmmu.mak @@ -1,3 +1,4 @@ +include pci.mak CONFIG_VIRTIO=y CONFIG_SCLPCONSOLE=y CONFIG_S390_FLIC=y diff --git a/device_tree.c b/device_tree.c index df9eed9cbc..4cb1cd50aa 100644 --- a/device_tree.c +++ b/device_tree.c @@ -324,6 +324,7 @@ int qemu_fdt_setprop_sized_cells_from_array(void *fdt, uint64_t value; int cellnum, vnum, ncells; uint32_t hival; + int ret; propcells = g_new0(uint32_t, numvalues * 2); @@ -331,18 +332,23 @@ int qemu_fdt_setprop_sized_cells_from_array(void *fdt, for (vnum = 0; vnum < numvalues; vnum++) { ncells = values[vnum * 2]; if (ncells != 1 && ncells != 2) { - return -1; + ret = -1; + goto out; } value = values[vnum * 2 + 1]; hival = cpu_to_be32(value >> 32); if (ncells > 1) { propcells[cellnum++] = hival; } else if (hival != 0) { - return -1; + ret = -1; + goto out; } propcells[cellnum++] = cpu_to_be32(value); } - return qemu_fdt_setprop(fdt, node_path, property, propcells, - cellnum * sizeof(uint32_t)); + ret = qemu_fdt_setprop(fdt, node_path, property, propcells, + cellnum * sizeof(uint32_t)); +out: + g_free(propcells); + return ret; } @@ -75,6 +75,11 @@ static MemoryRegion io_mem_unassigned; /* RAM is mmap-ed with MAP_SHARED */ #define RAM_SHARED (1 << 1) +/* Only a portion of RAM (used_length) is actually used, and migrated. + * This used_length size can change across reboots. + */ +#define RAM_RESIZEABLE (1 << 2) + #endif struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus); @@ -812,11 +817,11 @@ static RAMBlock *qemu_get_ram_block(ram_addr_t addr) /* The list is protected by the iothread lock here. */ block = ram_list.mru_block; - if (block && addr - block->offset < block->length) { + if (block && addr - block->offset < block->max_length) { goto found; } QTAILQ_FOREACH(block, &ram_list.blocks, next) { - if (addr - block->offset < block->length) { + if (addr - block->offset < block->max_length) { goto found; } } @@ -850,7 +855,7 @@ void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length, { if (length == 0) return; - cpu_physical_memory_clear_dirty_range(start, length, client); + cpu_physical_memory_clear_dirty_range_type(start, length, client); if (tcg_enabled()) { tlb_reset_dirty_range_all(start, length); @@ -1186,7 +1191,7 @@ static ram_addr_t find_ram_offset(ram_addr_t size) QTAILQ_FOREACH(block, &ram_list.blocks, next) { ram_addr_t end, next = RAM_ADDR_MAX; - end = block->offset + block->length; + end = block->offset + block->max_length; QTAILQ_FOREACH(next_block, &ram_list.blocks, next) { if (next_block->offset >= end) { @@ -1214,7 +1219,7 @@ ram_addr_t last_ram_offset(void) ram_addr_t last = 0; QTAILQ_FOREACH(block, &ram_list.blocks, next) - last = MAX(last, block->offset + block->length); + last = MAX(last, block->offset + block->max_length); return last; } @@ -1296,6 +1301,49 @@ static int memory_try_enable_merging(void *addr, size_t len) return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE); } +/* Only legal before guest might have detected the memory size: e.g. on + * incoming migration, or right after reset. + * + * As memory core doesn't know how is memory accessed, it is up to + * resize callback to update device state and/or add assertions to detect + * misuse, if necessary. + */ +int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp) +{ + RAMBlock *block = find_ram_block(base); + + assert(block); + + if (block->used_length == newsize) { + return 0; + } + + if (!(block->flags & RAM_RESIZEABLE)) { + error_setg_errno(errp, EINVAL, + "Length mismatch: %s: 0x" RAM_ADDR_FMT + " in != 0x" RAM_ADDR_FMT, block->idstr, + newsize, block->used_length); + return -EINVAL; + } + + if (block->max_length < newsize) { + error_setg_errno(errp, EINVAL, + "Length too large: %s: 0x" RAM_ADDR_FMT + " > 0x" RAM_ADDR_FMT, block->idstr, + newsize, block->max_length); + return -EINVAL; + } + + cpu_physical_memory_clear_dirty_range(block->offset, block->used_length); + block->used_length = newsize; + cpu_physical_memory_set_dirty_range(block->offset, block->used_length); + memory_region_set_size(block->mr, newsize); + if (block->resized) { + block->resized(block->idstr, newsize, block->host); + } + return 0; +} + static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp) { RAMBlock *block; @@ -1305,13 +1353,14 @@ static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp) /* This assumes the iothread lock is taken here too. */ qemu_mutex_lock_ramlist(); - new_block->offset = find_ram_offset(new_block->length); + new_block->offset = find_ram_offset(new_block->max_length); if (!new_block->host) { if (xen_enabled()) { - xen_ram_alloc(new_block->offset, new_block->length, new_block->mr); + xen_ram_alloc(new_block->offset, new_block->max_length, + new_block->mr); } else { - new_block->host = phys_mem_alloc(new_block->length, + new_block->host = phys_mem_alloc(new_block->max_length, &new_block->mr->align); if (!new_block->host) { error_setg_errno(errp, errno, @@ -1320,13 +1369,13 @@ static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp) qemu_mutex_unlock_ramlist(); return -1; } - memory_try_enable_merging(new_block->host, new_block->length); + memory_try_enable_merging(new_block->host, new_block->max_length); } } /* Keep the list sorted from biggest to smallest block. */ QTAILQ_FOREACH(block, &ram_list.blocks, next) { - if (block->length < new_block->length) { + if (block->max_length < new_block->max_length) { break; } } @@ -1350,14 +1399,15 @@ static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp) old_ram_size, new_ram_size); } } - cpu_physical_memory_set_dirty_range(new_block->offset, new_block->length); + cpu_physical_memory_set_dirty_range(new_block->offset, + new_block->used_length); - qemu_ram_setup_dump(new_block->host, new_block->length); - qemu_madvise(new_block->host, new_block->length, QEMU_MADV_HUGEPAGE); - qemu_madvise(new_block->host, new_block->length, QEMU_MADV_DONTFORK); + qemu_ram_setup_dump(new_block->host, new_block->max_length); + qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE); + qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK); if (kvm_enabled()) { - kvm_setup_guest_memory(new_block->host, new_block->length); + kvm_setup_guest_memory(new_block->host, new_block->max_length); } return new_block->offset; @@ -1391,7 +1441,8 @@ ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, size = TARGET_PAGE_ALIGN(size); new_block = g_malloc0(sizeof(*new_block)); new_block->mr = mr; - new_block->length = size; + new_block->used_length = size; + new_block->max_length = size; new_block->flags = share ? RAM_SHARED : 0; new_block->host = file_ram_alloc(new_block, size, mem_path, errp); @@ -1410,7 +1461,12 @@ ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, } #endif -ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, +static +ram_addr_t qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size, + void (*resized)(const char*, + uint64_t length, + void *host), + void *host, bool resizeable, MemoryRegion *mr, Error **errp) { RAMBlock *new_block; @@ -1418,14 +1474,21 @@ ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, Error *local_err = NULL; size = TARGET_PAGE_ALIGN(size); + max_size = TARGET_PAGE_ALIGN(max_size); new_block = g_malloc0(sizeof(*new_block)); new_block->mr = mr; - new_block->length = size; + new_block->resized = resized; + new_block->used_length = size; + new_block->max_length = max_size; + assert(max_size >= size); new_block->fd = -1; new_block->host = host; if (host) { new_block->flags |= RAM_PREALLOC; } + if (resizeable) { + new_block->flags |= RAM_RESIZEABLE; + } addr = ram_block_add(new_block, &local_err); if (local_err) { g_free(new_block); @@ -1435,9 +1498,24 @@ ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, return addr; } +ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, + MemoryRegion *mr, Error **errp) +{ + return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp); +} + ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp) { - return qemu_ram_alloc_from_ptr(size, NULL, mr, errp); + return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp); +} + +ram_addr_t qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz, + void (*resized)(const char*, + uint64_t length, + void *host), + MemoryRegion *mr, Error **errp) +{ + return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp); } void qemu_ram_free_from_ptr(ram_addr_t addr) @@ -1475,11 +1553,11 @@ void qemu_ram_free(ram_addr_t addr) xen_invalidate_map_cache_entry(block->host); #ifndef _WIN32 } else if (block->fd >= 0) { - munmap(block->host, block->length); + munmap(block->host, block->max_length); close(block->fd); #endif } else { - qemu_anon_ram_free(block->host, block->length); + qemu_anon_ram_free(block->host, block->max_length); } g_free(block); break; @@ -1499,7 +1577,7 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length) QTAILQ_FOREACH(block, &ram_list.blocks, next) { offset = addr - block->offset; - if (offset < block->length) { + if (offset < block->max_length) { vaddr = ramblock_ptr(block, offset); if (block->flags & RAM_PREALLOC) { ; @@ -1575,7 +1653,7 @@ void *qemu_get_ram_ptr(ram_addr_t addr) return xen_map_cache(addr, 0, 0); } else if (block->host == NULL) { block->host = - xen_map_cache(block->offset, block->length, 1); + xen_map_cache(block->offset, block->max_length, 1); } } return ramblock_ptr(block, addr - block->offset); @@ -1594,9 +1672,9 @@ static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size) RAMBlock *block; QTAILQ_FOREACH(block, &ram_list.blocks, next) { - if (addr - block->offset < block->length) { - if (addr - block->offset + *size > block->length) - *size = block->length - addr + block->offset; + if (addr - block->offset < block->max_length) { + if (addr - block->offset + *size > block->max_length) + *size = block->max_length - addr + block->offset; return ramblock_ptr(block, addr - block->offset); } } @@ -1619,7 +1697,7 @@ MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr) } block = ram_list.mru_block; - if (block && block->host && host - block->host < block->length) { + if (block && block->host && host - block->host < block->max_length) { goto found; } @@ -1628,7 +1706,7 @@ MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr) if (block->host == NULL) { continue; } - if (host - block->host < block->length) { + if (host - block->host < block->max_length) { goto found; } } @@ -2882,7 +2960,7 @@ void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque) RAMBlock *block; QTAILQ_FOREACH(block, &ram_list.blocks, next) { - func(block->host, block->offset, block->length, opaque); + func(block->host, block->offset, block->used_length, opaque); } } #endif diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c index 2a28978cba..39c5d7103c 100644 --- a/hw/block/dataplane/virtio-blk.c +++ b/hw/block/dataplane/virtio-blk.c @@ -198,7 +198,8 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf, blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_DRIVE_DEL, s->blocker); blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_BACKUP_SOURCE, s->blocker); blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_CHANGE, s->blocker); - blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_COMMIT, s->blocker); + blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_COMMIT_SOURCE, s->blocker); + blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_COMMIT_TARGET, s->blocker); blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_EJECT, s->blocker); blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_EXTERNAL_SNAPSHOT, s->blocker); blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_INTERNAL_SNAPSHOT, s->blocker); diff --git a/hw/block/nvme.c b/hw/block/nvme.c index aa1ed986d2..ce079aefdd 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -476,7 +476,8 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) switch (dw10) { case NVME_NUMBER_OF_QUEUES: - req->cqe.result = cpu_to_le32(n->num_queues); + req->cqe.result = + cpu_to_le32((n->num_queues - 1) | ((n->num_queues - 1) << 16)); break; default: return NVME_INVALID_FIELD | NVME_DNR; @@ -490,7 +491,8 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) switch (dw10) { case NVME_NUMBER_OF_QUEUES: - req->cqe.result = cpu_to_le32(n->num_queues); + req->cqe.result = + cpu_to_le32((n->num_queues - 1) | ((n->num_queues - 1) << 16)); break; default: return NVME_INVALID_FIELD | NVME_DNR; @@ -813,7 +815,7 @@ static int nvme_init(PCIDevice *pci_dev) NVME_CAP_SET_CSS(n->bar.cap, 1); NVME_CAP_SET_MPSMAX(n->bar.cap, 4); - n->bar.vs = 0x00010001; + n->bar.vs = 0x00010100; n->bar.intmc = n->bar.intms = 0; for (i = 0; i < n->num_namespaces; i++) { diff --git a/hw/core/loader.c b/hw/core/loader.c index f2b34da240..fcd4705d78 100644 --- a/hw/core/loader.c +++ b/hw/core/loader.c @@ -724,12 +724,22 @@ static void rom_insert(Rom *rom) QTAILQ_INSERT_TAIL(&roms, rom, next); } +static void fw_cfg_resized(const char *id, uint64_t length, void *host) +{ + if (fw_cfg) { + fw_cfg_modify_file(fw_cfg, id + strlen("/rom@"), host, length); + } +} + static void *rom_set_mr(Rom *rom, Object *owner, const char *name) { void *data; rom->mr = g_malloc(sizeof(*rom->mr)); - memory_region_init_ram(rom->mr, owner, name, rom->datasize, &error_abort); + memory_region_init_resizeable_ram(rom->mr, owner, name, + rom->datasize, rom->romsize, + fw_cfg_resized, + &error_abort); memory_region_set_readonly(rom->mr, true); vmstate_register_ram_global(rom->mr); @@ -824,7 +834,7 @@ err: } ram_addr_t rom_add_blob(const char *name, const void *blob, size_t len, - hwaddr addr, const char *fw_file_name, + size_t max_len, hwaddr addr, const char *fw_file_name, FWCfgReadCallback fw_callback, void *callback_opaque) { Rom *rom; @@ -833,7 +843,7 @@ ram_addr_t rom_add_blob(const char *name, const void *blob, size_t len, rom = g_malloc0(sizeof(*rom)); rom->name = g_strdup(name); rom->addr = addr; - rom->romsize = len; + rom->romsize = max_len ? max_len : len; rom->datasize = len; rom->data = g_malloc0(rom->datasize); memcpy(rom->data, blob, len); @@ -853,7 +863,7 @@ ram_addr_t rom_add_blob(const char *name, const void *blob, size_t len, fw_cfg_add_file_callback(fw_cfg, fw_file_name, fw_callback, callback_opaque, - data, rom->romsize); + data, rom->datasize); } return ret; } diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index a4d0c0c8bf..6a2e9c52bc 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -68,6 +68,9 @@ #define ACPI_BUILD_TABLE_SIZE 0x20000 +/* Reserve RAM space for tables: add another order of magnitude. */ +#define ACPI_BUILD_TABLE_MAX_SIZE 0x200000 + /* #define DEBUG_ACPI_BUILD */ #ifdef DEBUG_ACPI_BUILD #define ACPI_BUILD_DPRINTF(fmt, ...) \ @@ -1718,6 +1721,11 @@ static void acpi_build_update(void *build_opaque, uint32_t offset) acpi_build(build_state->guest_info, &tables); assert(acpi_data_len(tables.table_data) == build_state->table_size); + + /* Make sure RAM size is correct - in case it got changed by migration */ + qemu_ram_resize(build_state->table_ram, build_state->table_size, + &error_abort); + memcpy(qemu_get_ram_ptr(build_state->table_ram), tables.table_data->data, build_state->table_size); @@ -1734,10 +1742,10 @@ static void acpi_build_reset(void *build_opaque) } static ram_addr_t acpi_add_rom_blob(AcpiBuildState *build_state, GArray *blob, - const char *name) + const char *name, uint64_t max_size) { - return rom_add_blob(name, blob->data, acpi_data_len(blob), -1, name, - acpi_build_update, build_state); + return rom_add_blob(name, blob->data, acpi_data_len(blob), max_size, -1, + name, acpi_build_update, build_state); } static const VMStateDescription vmstate_acpi_build = { @@ -1781,11 +1789,12 @@ void acpi_setup(PcGuestInfo *guest_info) /* Now expose it all to Guest */ build_state->table_ram = acpi_add_rom_blob(build_state, tables.table_data, - ACPI_BUILD_TABLE_FILE); + ACPI_BUILD_TABLE_FILE, + ACPI_BUILD_TABLE_MAX_SIZE); assert(build_state->table_ram != RAM_ADDR_MAX); build_state->table_size = acpi_data_len(tables.table_data); - acpi_add_rom_blob(NULL, tables.linker, "etc/table-loader"); + acpi_add_rom_blob(NULL, tables.linker, "etc/table-loader", 0); fw_cfg_add_file(guest_info->fw_cfg, ACPI_BUILD_TPMLOG_FILE, tables.tcpalog->data, acpi_data_len(tables.tcpalog)); diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c index c63b7e556e..a71e6e014f 100644 --- a/hw/ide/atapi.c +++ b/hw/ide/atapi.c @@ -621,20 +621,107 @@ static void cmd_request_sense(IDEState *s, uint8_t *buf) static void cmd_inquiry(IDEState *s, uint8_t *buf) { + uint8_t page_code = buf[2]; int max_len = buf[4]; - buf[0] = 0x05; /* CD-ROM */ - buf[1] = 0x80; /* removable */ - buf[2] = 0x00; /* ISO */ - buf[3] = 0x21; /* ATAPI-2 (XXX: put ATAPI-4 ?) */ - buf[4] = 31; /* additional length */ - buf[5] = 0; /* reserved */ - buf[6] = 0; /* reserved */ - buf[7] = 0; /* reserved */ - padstr8(buf + 8, 8, "QEMU"); - padstr8(buf + 16, 16, "QEMU DVD-ROM"); - padstr8(buf + 32, 4, s->version); - ide_atapi_cmd_reply(s, 36, max_len); + unsigned idx = 0; + unsigned size_idx; + unsigned preamble_len; + + /* If the EVPD (Enable Vital Product Data) bit is set in byte 1, + * we are being asked for a specific page of info indicated by byte 2. */ + if (buf[1] & 0x01) { + preamble_len = 4; + size_idx = 3; + + buf[idx++] = 0x05; /* CD-ROM */ + buf[idx++] = page_code; /* Page Code */ + buf[idx++] = 0x00; /* reserved */ + idx++; /* length (set later) */ + + switch (page_code) { + case 0x00: + /* Supported Pages: List of supported VPD responses. */ + buf[idx++] = 0x00; /* 0x00: Supported Pages, and: */ + buf[idx++] = 0x83; /* 0x83: Device Identification. */ + break; + + case 0x83: + /* Device Identification. Each entry is optional, but the entries + * included here are modeled after libata's VPD responses. + * If the response is given, at least one entry must be present. */ + + /* Entry 1: Serial */ + if (idx + 24 > max_len) { + /* Not enough room for even the first entry: */ + /* 4 byte header + 20 byte string */ + ide_atapi_cmd_error(s, ILLEGAL_REQUEST, + ASC_DATA_PHASE_ERROR); + return; + } + buf[idx++] = 0x02; /* Ascii */ + buf[idx++] = 0x00; /* Vendor Specific */ + buf[idx++] = 0x00; + buf[idx++] = 20; /* Remaining length */ + padstr8(buf + idx, 20, s->drive_serial_str); + idx += 20; + + /* Entry 2: Drive Model and Serial */ + if (idx + 72 > max_len) { + /* 4 (header) + 8 (vendor) + 60 (model & serial) */ + goto out; + } + buf[idx++] = 0x02; /* Ascii */ + buf[idx++] = 0x01; /* T10 Vendor */ + buf[idx++] = 0x00; + buf[idx++] = 68; + padstr8(buf + idx, 8, "ATA"); /* Generic T10 vendor */ + idx += 8; + padstr8(buf + idx, 40, s->drive_model_str); + idx += 40; + padstr8(buf + idx, 20, s->drive_serial_str); + idx += 20; + + /* Entry 3: WWN */ + if (s->wwn && (idx + 12 <= max_len)) { + /* 4 byte header + 8 byte wwn */ + buf[idx++] = 0x01; /* Binary */ + buf[idx++] = 0x03; /* NAA */ + buf[idx++] = 0x00; + buf[idx++] = 0x08; + stq_be_p(&buf[idx], s->wwn); + idx += 8; + } + break; + + default: + /* SPC-3, revision 23 sec. 6.4 */ + ide_atapi_cmd_error(s, ILLEGAL_REQUEST, + ASC_INV_FIELD_IN_CMD_PACKET); + return; + } + } else { + preamble_len = 5; + size_idx = 4; + + buf[0] = 0x05; /* CD-ROM */ + buf[1] = 0x80; /* removable */ + buf[2] = 0x00; /* ISO */ + buf[3] = 0x21; /* ATAPI-2 (XXX: put ATAPI-4 ?) */ + /* buf[size_idx] set below. */ + buf[5] = 0; /* reserved */ + buf[6] = 0; /* reserved */ + buf[7] = 0; /* reserved */ + padstr8(buf + 8, 8, "QEMU"); + padstr8(buf + 16, 16, "QEMU DVD-ROM"); + padstr8(buf + 32, 4, s->version); + idx = 36; + } + + out: + buf[size_idx] = idx - preamble_len; + ide_atapi_cmd_reply(s, idx, max_len); + return; } static void cmd_get_configuration(IDEState *s, uint8_t *buf) diff --git a/hw/ide/internal.h b/hw/ide/internal.h index 8a3eca40d2..c998003bf3 100644 --- a/hw/ide/internal.h +++ b/hw/ide/internal.h @@ -296,6 +296,7 @@ typedef struct IDEDMAOps IDEDMAOps; #define ASC_INCOMPATIBLE_FORMAT 0x30 #define ASC_MEDIUM_NOT_PRESENT 0x3a #define ASC_SAVING_PARAMETERS_NOT_SUPPORTED 0x39 +#define ASC_DATA_PHASE_ERROR 0x4b #define ASC_MEDIA_REMOVAL_PREVENTED 0x53 #define CFA_NO_ERROR 0x00 diff --git a/hw/lm32/lm32_hwsetup.h b/hw/lm32/lm32_hwsetup.h index 9fd5e697a7..838754d5d8 100644 --- a/hw/lm32/lm32_hwsetup.h +++ b/hw/lm32/lm32_hwsetup.h @@ -73,7 +73,8 @@ static inline void hwsetup_free(HWSetup *hw) static inline void hwsetup_create_rom(HWSetup *hw, hwaddr base) { - rom_add_blob("hwsetup", hw->data, TARGET_PAGE_SIZE, base, NULL, NULL, NULL); + rom_add_blob("hwsetup", hw->data, TARGET_PAGE_SIZE, + TARGET_PAGE_SIZE, base, NULL, NULL, NULL); } static inline void hwsetup_add_u8(HWSetup *hw, uint8_t u) diff --git a/hw/net/allwinner_emac.c b/hw/net/allwinner_emac.c index d780ba0fcb..0407dee6da 100644 --- a/hw/net/allwinner_emac.c +++ b/hw/net/allwinner_emac.c @@ -218,13 +218,6 @@ static ssize_t aw_emac_receive(NetClientState *nc, const uint8_t *buf, return size; } -static void aw_emac_cleanup(NetClientState *nc) -{ - AwEmacState *s = qemu_get_nic_opaque(nc); - - s->nic = NULL; -} - static void aw_emac_reset(DeviceState *dev) { AwEmacState *s = AW_EMAC(dev); @@ -433,7 +426,6 @@ static NetClientInfo net_aw_emac_info = { .size = sizeof(NICState), .can_receive = aw_emac_can_receive, .receive = aw_emac_receive, - .cleanup = aw_emac_cleanup, .link_status_changed = aw_emac_set_link, }; diff --git a/hw/net/cadence_gem.c b/hw/net/cadence_gem.c index de26609c9d..55b629387c 100644 --- a/hw/net/cadence_gem.c +++ b/hw/net/cadence_gem.c @@ -1209,14 +1209,6 @@ static const MemoryRegionOps gem_ops = { .endianness = DEVICE_LITTLE_ENDIAN, }; -static void gem_cleanup(NetClientState *nc) -{ - GemState *s = qemu_get_nic_opaque(nc); - - DB_PRINT("\n"); - s->nic = NULL; -} - static void gem_set_link(NetClientState *nc) { DB_PRINT("\n"); @@ -1228,7 +1220,6 @@ static NetClientInfo net_gem_info = { .size = sizeof(NICState), .can_receive = gem_can_receive, .receive = gem_receive, - .cleanup = gem_cleanup, .link_status_changed = gem_set_link, }; diff --git a/hw/net/dp8393x.c b/hw/net/dp8393x.c index 7eab7ad0cc..7ce13d2b46 100644 --- a/hw/net/dp8393x.c +++ b/hw/net/dp8393x.c @@ -859,22 +859,11 @@ static void nic_reset(void *opaque) dp8393x_update_irq(s); } -static void nic_cleanup(NetClientState *nc) -{ - dp8393xState *s = qemu_get_nic_opaque(nc); - - timer_del(s->watchdog); - timer_free(s->watchdog); - - g_free(s); -} - static NetClientInfo net_dp83932_info = { .type = NET_CLIENT_OPTIONS_KIND_NIC, .size = sizeof(NICState), .can_receive = nic_can_receive, .receive = nic_receive, - .cleanup = nic_cleanup, }; void dp83932_init(NICInfo *nd, hwaddr base, int it_shift, diff --git a/hw/net/e1000.c b/hw/net/e1000.c index 89c5788b1c..a207e21bcf 100644 --- a/hw/net/e1000.c +++ b/hw/net/e1000.c @@ -1503,14 +1503,6 @@ e1000_mmio_setup(E1000State *d) } static void -e1000_cleanup(NetClientState *nc) -{ - E1000State *s = qemu_get_nic_opaque(nc); - - s->nic = NULL; -} - -static void pci_e1000_uninit(PCIDevice *dev) { E1000State *d = E1000(dev); @@ -1528,7 +1520,6 @@ static NetClientInfo net_e1000_info = { .can_receive = e1000_can_receive, .receive = e1000_receive, .receive_iov = e1000_receive_iov, - .cleanup = e1000_cleanup, .link_status_changed = e1000_set_link_status, }; diff --git a/hw/net/eepro100.c b/hw/net/eepro100.c index 4877bfd4d3..7a4f9f8a08 100644 --- a/hw/net/eepro100.c +++ b/hw/net/eepro100.c @@ -1832,13 +1832,6 @@ static const VMStateDescription vmstate_eepro100 = { } }; -static void nic_cleanup(NetClientState *nc) -{ - EEPRO100State *s = qemu_get_nic_opaque(nc); - - s->nic = NULL; -} - static void pci_nic_uninit(PCIDevice *pci_dev) { EEPRO100State *s = DO_UPCAST(EEPRO100State, dev, pci_dev); @@ -1853,7 +1846,6 @@ static NetClientInfo net_eepro100_info = { .size = sizeof(NICState), .can_receive = nic_can_receive, .receive = nic_receive, - .cleanup = nic_cleanup, }; static int e100_nic_init(PCIDevice *pci_dev) diff --git a/hw/net/etraxfs_eth.c b/hw/net/etraxfs_eth.c index 6a3c86db48..4773dea927 100644 --- a/hw/net/etraxfs_eth.c +++ b/hw/net/etraxfs_eth.c @@ -581,24 +581,11 @@ static const MemoryRegionOps eth_ops = { } }; -static void eth_cleanup(NetClientState *nc) -{ - ETRAXFSEthState *eth = qemu_get_nic_opaque(nc); - - /* Disconnect the client. */ - eth->dma_out->client.push = NULL; - eth->dma_out->client.opaque = NULL; - eth->dma_in->client.opaque = NULL; - eth->dma_in->client.pull = NULL; - g_free(eth); -} - static NetClientInfo net_etraxfs_info = { .type = NET_CLIENT_OPTIONS_KIND_NIC, .size = sizeof(NICState), .can_receive = eth_can_receive, .receive = eth_receive, - .cleanup = eth_cleanup, .link_status_changed = eth_set_link, }; diff --git a/hw/net/fsl_etsec/etsec.c b/hw/net/fsl_etsec/etsec.c index d4b4429446..2fbbc6ccc0 100644 --- a/hw/net/fsl_etsec/etsec.c +++ b/hw/net/fsl_etsec/etsec.c @@ -338,11 +338,6 @@ static void etsec_reset(DeviceState *d) MII_SR_100X_FD_CAPS | MII_SR_100T4_CAPS; } -static void etsec_cleanup(NetClientState *nc) -{ - /* qemu_log("eTSEC cleanup\n"); */ -} - static int etsec_can_receive(NetClientState *nc) { eTSEC *etsec = qemu_get_nic_opaque(nc); @@ -377,7 +372,6 @@ static NetClientInfo net_etsec_info = { .size = sizeof(NICState), .can_receive = etsec_can_receive, .receive = etsec_receive, - .cleanup = etsec_cleanup, .link_status_changed = etsec_set_link_status, }; diff --git a/hw/net/lan9118.c b/hw/net/lan9118.c index e528290b41..f169c383df 100644 --- a/hw/net/lan9118.c +++ b/hw/net/lan9118.c @@ -1309,19 +1309,11 @@ static const MemoryRegionOps lan9118_16bit_mem_ops = { .endianness = DEVICE_NATIVE_ENDIAN, }; -static void lan9118_cleanup(NetClientState *nc) -{ - lan9118_state *s = qemu_get_nic_opaque(nc); - - s->nic = NULL; -} - static NetClientInfo net_lan9118_info = { .type = NET_CLIENT_OPTIONS_KIND_NIC, .size = sizeof(NICState), .can_receive = lan9118_can_receive, .receive = lan9118_receive, - .cleanup = lan9118_cleanup, .link_status_changed = lan9118_set_link, }; diff --git a/hw/net/lance.c b/hw/net/lance.c index a1c49f1b97..ff7e789563 100644 --- a/hw/net/lance.c +++ b/hw/net/lance.c @@ -91,20 +91,12 @@ static const MemoryRegionOps lance_mem_ops = { }, }; -static void lance_cleanup(NetClientState *nc) -{ - PCNetState *d = qemu_get_nic_opaque(nc); - - pcnet_common_cleanup(d); -} - static NetClientInfo net_lance_info = { .type = NET_CLIENT_OPTIONS_KIND_NIC, .size = sizeof(NICState), .can_receive = pcnet_can_receive, .receive = pcnet_receive, .link_status_changed = pcnet_set_link_status, - .cleanup = lance_cleanup, }; static const VMStateDescription vmstate_lance = { diff --git a/hw/net/mcf_fec.c b/hw/net/mcf_fec.c index 22cd7cf870..0255612f10 100644 --- a/hw/net/mcf_fec.c +++ b/hw/net/mcf_fec.c @@ -439,19 +439,11 @@ static const MemoryRegionOps mcf_fec_ops = { .endianness = DEVICE_NATIVE_ENDIAN, }; -static void mcf_fec_cleanup(NetClientState *nc) -{ - mcf_fec_state *s = qemu_get_nic_opaque(nc); - - g_free(s); -} - static NetClientInfo net_mcf_fec_info = { .type = NET_CLIENT_OPTIONS_KIND_NIC, .size = sizeof(NICState), .can_receive = mcf_fec_can_receive, .receive = mcf_fec_receive, - .cleanup = mcf_fec_cleanup, }; void mcf_fec_init(MemoryRegion *sysmem, NICInfo *nd, diff --git a/hw/net/milkymist-minimac2.c b/hw/net/milkymist-minimac2.c index c6326728eb..f06afaa581 100644 --- a/hw/net/milkymist-minimac2.c +++ b/hw/net/milkymist-minimac2.c @@ -425,13 +425,6 @@ static int minimac2_can_rx(NetClientState *nc) return 0; } -static void minimac2_cleanup(NetClientState *nc) -{ - MilkymistMinimac2State *s = qemu_get_nic_opaque(nc); - - s->nic = NULL; -} - static void milkymist_minimac2_reset(DeviceState *d) { MilkymistMinimac2State *s = MILKYMIST_MINIMAC2(d); @@ -454,7 +447,6 @@ static NetClientInfo net_milkymist_minimac2_info = { .size = sizeof(NICState), .can_receive = minimac2_can_rx, .receive = minimac2_rx, - .cleanup = minimac2_cleanup, }; static int milkymist_minimac2_init(SysBusDevice *sbd) diff --git a/hw/net/mipsnet.c b/hw/net/mipsnet.c index b26c369178..c813e0caa8 100644 --- a/hw/net/mipsnet.c +++ b/hw/net/mipsnet.c @@ -211,19 +211,11 @@ static const VMStateDescription vmstate_mipsnet = { } }; -static void mipsnet_cleanup(NetClientState *nc) -{ - MIPSnetState *s = qemu_get_nic_opaque(nc); - - s->nic = NULL; -} - static NetClientInfo net_mipsnet_info = { .type = NET_CLIENT_OPTIONS_KIND_NIC, .size = sizeof(NICState), .can_receive = mipsnet_can_receive, .receive = mipsnet_receive, - .cleanup = mipsnet_cleanup, }; static const MemoryRegionOps mipsnet_ioport_ops = { diff --git a/hw/net/ne2000-isa.c b/hw/net/ne2000-isa.c index 82e2ba17c1..17e7199f70 100644 --- a/hw/net/ne2000-isa.c +++ b/hw/net/ne2000-isa.c @@ -41,19 +41,11 @@ typedef struct ISANE2000State { NE2000State ne2000; } ISANE2000State; -static void isa_ne2000_cleanup(NetClientState *nc) -{ - NE2000State *s = qemu_get_nic_opaque(nc); - - s->nic = NULL; -} - static NetClientInfo net_ne2000_isa_info = { .type = NET_CLIENT_OPTIONS_KIND_NIC, .size = sizeof(NICState), .can_receive = ne2000_can_receive, .receive = ne2000_receive, - .cleanup = isa_ne2000_cleanup, }; static const VMStateDescription vmstate_isa_ne2000 = { diff --git a/hw/net/ne2000.c b/hw/net/ne2000.c index 3ab2d03696..4dea70178d 100644 --- a/hw/net/ne2000.c +++ b/hw/net/ne2000.c @@ -702,19 +702,11 @@ void ne2000_setup_io(NE2000State *s, DeviceState *dev, unsigned size) memory_region_init_io(&s->io, OBJECT(dev), &ne2000_ops, s, "ne2000", size); } -static void ne2000_cleanup(NetClientState *nc) -{ - NE2000State *s = qemu_get_nic_opaque(nc); - - s->nic = NULL; -} - static NetClientInfo net_ne2000_info = { .type = NET_CLIENT_OPTIONS_KIND_NIC, .size = sizeof(NICState), .can_receive = ne2000_can_receive, .receive = ne2000_receive, - .cleanup = ne2000_cleanup, }; static int pci_ne2000_init(PCIDevice *pci_dev) diff --git a/hw/net/opencores_eth.c b/hw/net/opencores_eth.c index 4a443049dd..3642046efa 100644 --- a/hw/net/opencores_eth.c +++ b/hw/net/opencores_eth.c @@ -472,16 +472,11 @@ static ssize_t open_eth_receive(NetClientState *nc, return size; } -static void open_eth_cleanup(NetClientState *nc) -{ -} - static NetClientInfo net_open_eth_info = { .type = NET_CLIENT_OPTIONS_KIND_NIC, .size = sizeof(NICState), .can_receive = open_eth_can_receive, .receive = open_eth_receive, - .cleanup = open_eth_cleanup, .link_status_changed = open_eth_set_link_status, }; diff --git a/hw/net/pcnet-pci.c b/hw/net/pcnet-pci.c index fb5f5d6237..b86bc0d79b 100644 --- a/hw/net/pcnet-pci.c +++ b/hw/net/pcnet-pci.c @@ -271,13 +271,6 @@ static void pci_physical_memory_read(void *dma_opaque, hwaddr addr, pci_dma_read(dma_opaque, addr, buf, len); } -static void pci_pcnet_cleanup(NetClientState *nc) -{ - PCNetState *d = qemu_get_nic_opaque(nc); - - pcnet_common_cleanup(d); -} - static void pci_pcnet_uninit(PCIDevice *dev) { PCIPCNetState *d = PCI_PCNET(dev); @@ -294,7 +287,6 @@ static NetClientInfo net_pci_pcnet_info = { .can_receive = pcnet_can_receive, .receive = pcnet_receive, .link_status_changed = pcnet_set_link_status, - .cleanup = pci_pcnet_cleanup, }; static int pci_pcnet_init(PCIDevice *pci_dev) diff --git a/hw/net/pcnet.c b/hw/net/pcnet.c index f409b9293d..8a1c8f17b0 100644 --- a/hw/net/pcnet.c +++ b/hw/net/pcnet.c @@ -1724,11 +1724,6 @@ const VMStateDescription vmstate_pcnet = { } }; -void pcnet_common_cleanup(PCNetState *d) -{ - d->nic = NULL; -} - int pcnet_common_init(DeviceState *dev, PCNetState *s, NetClientInfo *info) { int i; diff --git a/hw/net/pcnet.h b/hw/net/pcnet.h index f8e8a6f6ba..3f12fe3c13 100644 --- a/hw/net/pcnet.h +++ b/hw/net/pcnet.h @@ -63,7 +63,6 @@ uint32_t pcnet_bcr_readw(PCNetState *s, uint32_t rap); int pcnet_can_receive(NetClientState *nc); ssize_t pcnet_receive(NetClientState *nc, const uint8_t *buf, size_t size_); void pcnet_set_link_status(NetClientState *nc); -void pcnet_common_cleanup(PCNetState *d); int pcnet_common_init(DeviceState *dev, PCNetState *s, NetClientInfo *info); extern const VMStateDescription vmstate_pcnet; #endif diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c index 5f0197c9df..6fa9e0aa15 100644 --- a/hw/net/rtl8139.c +++ b/hw/net/rtl8139.c @@ -3455,13 +3455,6 @@ static void rtl8139_timer(void *opaque) rtl8139_set_next_tctr_time(s, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL)); } -static void rtl8139_cleanup(NetClientState *nc) -{ - RTL8139State *s = qemu_get_nic_opaque(nc); - - s->nic = NULL; -} - static void pci_rtl8139_uninit(PCIDevice *dev) { RTL8139State *s = RTL8139(dev); @@ -3494,7 +3487,6 @@ static NetClientInfo net_rtl8139_info = { .size = sizeof(NICState), .can_receive = rtl8139_can_receive, .receive = rtl8139_receive, - .cleanup = rtl8139_cleanup, .link_status_changed = rtl8139_set_link_status, }; diff --git a/hw/net/smc91c111.c b/hw/net/smc91c111.c index d1dca8f4e2..74e06e6c77 100644 --- a/hw/net/smc91c111.c +++ b/hw/net/smc91c111.c @@ -736,19 +736,11 @@ static const MemoryRegionOps smc91c111_mem_ops = { .endianness = DEVICE_NATIVE_ENDIAN, }; -static void smc91c111_cleanup(NetClientState *nc) -{ - smc91c111_state *s = qemu_get_nic_opaque(nc); - - s->nic = NULL; -} - static NetClientInfo net_smc91c111_info = { .type = NET_CLIENT_OPTIONS_KIND_NIC, .size = sizeof(NICState), .can_receive = smc91c111_can_receive, .receive = smc91c111_receive, - .cleanup = smc91c111_cleanup, }; static int smc91c111_init1(SysBusDevice *sbd) diff --git a/hw/net/spapr_llan.c b/hw/net/spapr_llan.c index 2c8b038227..c255d925a7 100644 --- a/hw/net/spapr_llan.c +++ b/hw/net/spapr_llan.c @@ -187,19 +187,11 @@ static ssize_t spapr_vlan_receive(NetClientState *nc, const uint8_t *buf, return size; } -static void spapr_vlan_cleanup(NetClientState *nc) -{ - VIOsPAPRVLANDevice *dev = qemu_get_nic_opaque(nc); - - dev->nic = NULL; -} - static NetClientInfo net_spapr_vlan_info = { .type = NET_CLIENT_OPTIONS_KIND_NIC, .size = sizeof(NICState), .can_receive = spapr_vlan_can_receive, .receive = spapr_vlan_receive, - .cleanup = spapr_vlan_cleanup, }; static void spapr_vlan_reset(VIOsPAPRDevice *sdev) diff --git a/hw/net/stellaris_enet.c b/hw/net/stellaris_enet.c index c07e5137c2..278a6545c3 100644 --- a/hw/net/stellaris_enet.c +++ b/hw/net/stellaris_enet.c @@ -451,19 +451,11 @@ static void stellaris_enet_reset(stellaris_enet_state *s) s->tx_fifo_len = 0; } -static void stellaris_enet_cleanup(NetClientState *nc) -{ - stellaris_enet_state *s = qemu_get_nic_opaque(nc); - - s->nic = NULL; -} - static NetClientInfo net_stellaris_enet_info = { .type = NET_CLIENT_OPTIONS_KIND_NIC, .size = sizeof(NICState), .can_receive = stellaris_enet_can_receive, .receive = stellaris_enet_receive, - .cleanup = stellaris_enet_cleanup, }; static int stellaris_enet_init(SysBusDevice *sbd) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index e574bd4322..45da34ad61 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -1522,19 +1522,11 @@ static int virtio_net_load_device(VirtIODevice *vdev, QEMUFile *f, return 0; } -static void virtio_net_cleanup(NetClientState *nc) -{ - VirtIONet *n = qemu_get_nic_opaque(nc); - - n->nic = NULL; -} - static NetClientInfo net_virtio_info = { .type = NET_CLIENT_OPTIONS_KIND_NIC, .size = sizeof(NICState), .can_receive = virtio_net_can_receive, .receive = virtio_net_receive, - .cleanup = virtio_net_cleanup, .link_status_changed = virtio_net_set_link_status, .query_rx_filter = virtio_net_query_rxfilter, }; diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c index 8eea58989b..a83d2a11ff 100644 --- a/hw/net/vmxnet3.c +++ b/hw/net/vmxnet3.c @@ -1912,12 +1912,6 @@ vmxnet3_receive(NetClientState *nc, const uint8_t *buf, size_t size) return bytes_indicated; } -static void vmxnet3_cleanup(NetClientState *nc) -{ - VMXNET3State *s = qemu_get_nic_opaque(nc); - s->nic = NULL; -} - static void vmxnet3_set_link_status(NetClientState *nc) { VMXNET3State *s = qemu_get_nic_opaque(nc); @@ -1937,7 +1931,6 @@ static NetClientInfo net_vmxnet3_info = { .size = sizeof(NICState), .can_receive = vmxnet3_can_receive, .receive = vmxnet3_receive, - .cleanup = vmxnet3_cleanup, .link_status_changed = vmxnet3_set_link_status, }; diff --git a/hw/net/xen_nic.c b/hw/net/xen_nic.c index 63918ae1a0..19ecfc4ccf 100644 --- a/hw/net/xen_nic.c +++ b/hw/net/xen_nic.c @@ -370,11 +370,16 @@ static int net_connect(struct XenDevice *xendev) netdev->xendev.dom, netdev->tx_ring_ref, PROT_READ | PROT_WRITE); + if (!netdev->txs) { + return -1; + } netdev->rxs = xc_gnttab_map_grant_ref(netdev->xendev.gnttabdev, netdev->xendev.dom, netdev->rx_ring_ref, PROT_READ | PROT_WRITE); - if (!netdev->txs || !netdev->rxs) { + if (!netdev->rxs) { + xc_gnttab_munmap(netdev->xendev.gnttabdev, netdev->txs, 1); + netdev->txs = NULL; return -1; } BACK_RING_INIT(&netdev->tx_ring, netdev->txs, XC_PAGE_SIZE); @@ -405,10 +410,6 @@ static void net_disconnect(struct XenDevice *xendev) xc_gnttab_munmap(netdev->xendev.gnttabdev, netdev->rxs, 1); netdev->rxs = NULL; } - if (netdev->nic) { - qemu_del_nic(netdev->nic); - netdev->nic = NULL; - } } static void net_event(struct XenDevice *xendev) @@ -422,7 +423,12 @@ static int net_free(struct XenDevice *xendev) { struct XenNetDev *netdev = container_of(xendev, struct XenNetDev, xendev); + if (netdev->nic) { + qemu_del_nic(netdev->nic); + netdev->nic = NULL; + } g_free(netdev->mac); + netdev->mac = NULL; return 0; } diff --git a/hw/net/xgmac.c b/hw/net/xgmac.c index aeffcb58b7..b068f3a0d6 100644 --- a/hw/net/xgmac.c +++ b/hw/net/xgmac.c @@ -368,19 +368,11 @@ out: return ret; } -static void eth_cleanup(NetClientState *nc) -{ - XgmacState *s = qemu_get_nic_opaque(nc); - - s->nic = NULL; -} - static NetClientInfo net_xgmac_enet_info = { .type = NET_CLIENT_OPTIONS_KIND_NIC, .size = sizeof(NICState), .can_receive = eth_can_rx, .receive = eth_rx, - .cleanup = eth_cleanup, }; static int xgmac_enet_init(SysBusDevice *sbd) diff --git a/hw/net/xilinx_axienet.c b/hw/net/xilinx_axienet.c index cd952d2514..21efedfc3e 100644 --- a/hw/net/xilinx_axienet.c +++ b/hw/net/xilinx_axienet.c @@ -857,14 +857,6 @@ static ssize_t eth_rx(NetClientState *nc, const uint8_t *buf, size_t size) return size; } -static void eth_cleanup(NetClientState *nc) -{ - /* FIXME. */ - XilinxAXIEnet *s = qemu_get_nic_opaque(nc); - g_free(s->rxmem); - g_free(s); -} - static size_t xilinx_axienet_control_stream_push(StreamSlave *obj, uint8_t *buf, size_t len) { @@ -936,7 +928,6 @@ static NetClientInfo net_xilinx_enet_info = { .size = sizeof(NICState), .can_receive = eth_can_rx, .receive = eth_rx, - .cleanup = eth_cleanup, }; static void xilinx_enet_realize(DeviceState *dev, Error **errp) diff --git a/hw/net/xilinx_ethlite.c b/hw/net/xilinx_ethlite.c index 1b177b3dae..9536f64584 100644 --- a/hw/net/xilinx_ethlite.c +++ b/hw/net/xilinx_ethlite.c @@ -212,19 +212,11 @@ static void xilinx_ethlite_reset(DeviceState *dev) s->rxbuf = 0; } -static void eth_cleanup(NetClientState *nc) -{ - struct xlx_ethlite *s = qemu_get_nic_opaque(nc); - - s->nic = NULL; -} - static NetClientInfo net_xilinx_ethlite_info = { .type = NET_CLIENT_OPTIONS_KIND_NIC, .size = sizeof(NICState), .can_receive = eth_can_rx, .receive = eth_rx, - .cleanup = eth_cleanup, }; static void xilinx_ethlite_realize(DeviceState *dev, Error **errp) diff --git a/hw/pci-host/ppce500.c b/hw/pci-host/ppce500.c index 1b4c0f0023..574f8b2efb 100644 --- a/hw/pci-host/ppce500.c +++ b/hw/pci-host/ppce500.c @@ -62,11 +62,19 @@ #define PPCE500_PCI_NR_POBS 5 #define PPCE500_PCI_NR_PIBS 3 +#define PIWAR_EN 0x80000000 /* Enable */ +#define PIWAR_PF 0x20000000 /* prefetch */ +#define PIWAR_TGI_LOCAL 0x00f00000 /* target - local memory */ +#define PIWAR_READ_SNOOP 0x00050000 +#define PIWAR_WRITE_SNOOP 0x00005000 +#define PIWAR_SZ_MASK 0x0000003f + struct pci_outbound { uint32_t potar; uint32_t potear; uint32_t powbar; uint32_t powar; + MemoryRegion mem; }; struct pci_inbound { @@ -74,6 +82,7 @@ struct pci_inbound { uint32_t piwbar; uint32_t piwbear; uint32_t piwar; + MemoryRegion mem; }; #define TYPE_PPC_E500_PCI_HOST_BRIDGE "e500-pcihost" @@ -91,10 +100,13 @@ struct PPCE500PCIState { uint32_t irq_num[PCI_NUM_PINS]; uint32_t first_slot; uint32_t first_pin_irq; + AddressSpace bm_as; + MemoryRegion bm; /* mmio maps */ MemoryRegion container; MemoryRegion iomem; MemoryRegion pio; + MemoryRegion busmem; }; #define TYPE_PPC_E500_PCI_BRIDGE "e500-host-bridge" @@ -181,6 +193,71 @@ static uint64_t pci_reg_read4(void *opaque, hwaddr addr, return value; } +/* DMA mapping */ +static void e500_update_piw(PPCE500PCIState *pci, int idx) +{ + uint64_t tar = ((uint64_t)pci->pib[idx].pitar) << 12; + uint64_t wbar = ((uint64_t)pci->pib[idx].piwbar) << 12; + uint64_t war = pci->pib[idx].piwar; + uint64_t size = 2ULL << (war & PIWAR_SZ_MASK); + MemoryRegion *address_space_mem = get_system_memory(); + MemoryRegion *mem = &pci->pib[idx].mem; + MemoryRegion *bm = &pci->bm; + char *name; + + if (memory_region_is_mapped(mem)) { + /* Before we modify anything, unmap and destroy the region */ + memory_region_del_subregion(bm, mem); + object_unparent(OBJECT(mem)); + } + + if (!(war & PIWAR_EN)) { + /* Not enabled, nothing to do */ + return; + } + + name = g_strdup_printf("PCI Inbound Window %d", idx); + memory_region_init_alias(mem, OBJECT(pci), name, address_space_mem, tar, + size); + memory_region_add_subregion_overlap(bm, wbar, mem, -1); + g_free(name); + + pci_debug("%s: Added window of size=%#lx from PCI=%#lx to CPU=%#lx\n", + __func__, size, wbar, tar); +} + +/* BAR mapping */ +static void e500_update_pow(PPCE500PCIState *pci, int idx) +{ + uint64_t tar = ((uint64_t)pci->pob[idx].potar) << 12; + uint64_t wbar = ((uint64_t)pci->pob[idx].powbar) << 12; + uint64_t war = pci->pob[idx].powar; + uint64_t size = 2ULL << (war & PIWAR_SZ_MASK); + MemoryRegion *mem = &pci->pob[idx].mem; + MemoryRegion *address_space_mem = get_system_memory(); + char *name; + + if (memory_region_is_mapped(mem)) { + /* Before we modify anything, unmap and destroy the region */ + memory_region_del_subregion(address_space_mem, mem); + object_unparent(OBJECT(mem)); + } + + if (!(war & PIWAR_EN)) { + /* Not enabled, nothing to do */ + return; + } + + name = g_strdup_printf("PCI Outbound Window %d", idx); + memory_region_init_alias(mem, OBJECT(pci), name, &pci->busmem, tar, + size); + memory_region_add_subregion(address_space_mem, wbar, mem); + g_free(name); + + pci_debug("%s: Added window of size=%#lx from CPU=%#lx to PCI=%#lx\n", + __func__, size, wbar, tar); +} + static void pci_reg_write4(void *opaque, hwaddr addr, uint64_t value, unsigned size) { @@ -199,18 +276,22 @@ static void pci_reg_write4(void *opaque, hwaddr addr, case PPCE500_PCI_OW3: case PPCE500_PCI_OW4: idx = (addr >> 5) & 0x7; - switch (addr & 0xC) { + switch (addr & 0x1F) { case PCI_POTAR: pci->pob[idx].potar = value; + e500_update_pow(pci, idx); break; case PCI_POTEAR: pci->pob[idx].potear = value; + e500_update_pow(pci, idx); break; case PCI_POWBAR: pci->pob[idx].powbar = value; + e500_update_pow(pci, idx); break; case PCI_POWAR: pci->pob[idx].powar = value; + e500_update_pow(pci, idx); break; default: break; @@ -221,18 +302,22 @@ static void pci_reg_write4(void *opaque, hwaddr addr, case PPCE500_PCI_IW2: case PPCE500_PCI_IW1: idx = ((addr >> 5) & 0x3) - 1; - switch (addr & 0xC) { + switch (addr & 0x1F) { case PCI_PITAR: pci->pib[idx].pitar = value; + e500_update_piw(pci, idx); break; case PCI_PIWBAR: pci->pib[idx].piwbar = value; + e500_update_piw(pci, idx); break; case PCI_PIWBEAR: pci->pib[idx].piwbear = value; + e500_update_piw(pci, idx); break; case PCI_PIWAR: pci->pib[idx].piwar = value; + e500_update_piw(pci, idx); break; default: break; @@ -349,13 +434,20 @@ static int e500_pcihost_bridge_initfn(PCIDevice *d) return 0; } +static AddressSpace *e500_pcihost_set_iommu(PCIBus *bus, void *opaque, + int devfn) +{ + PPCE500PCIState *s = opaque; + + return &s->bm_as; +} + static int e500_pcihost_initfn(SysBusDevice *dev) { PCIHostState *h; PPCE500PCIState *s; PCIBus *b; int i; - MemoryRegion *address_space_mem = get_system_memory(); h = PCI_HOST_BRIDGE(dev); s = PPC_E500_PCI_HOST_BRIDGE(dev); @@ -369,12 +461,22 @@ static int e500_pcihost_initfn(SysBusDevice *dev) } memory_region_init(&s->pio, OBJECT(s), "pci-pio", PCIE500_PCI_IOLEN); + memory_region_init(&s->busmem, OBJECT(s), "pci bus memory", UINT64_MAX); + + /* PIO lives at the bottom of our bus space */ + memory_region_add_subregion_overlap(&s->busmem, 0, &s->pio, -2); b = pci_register_bus(DEVICE(dev), NULL, mpc85xx_pci_set_irq, - mpc85xx_pci_map_irq, s, address_space_mem, - &s->pio, PCI_DEVFN(s->first_slot, 0), 4, TYPE_PCI_BUS); + mpc85xx_pci_map_irq, s, &s->busmem, &s->pio, + PCI_DEVFN(s->first_slot, 0), 4, TYPE_PCI_BUS); h->bus = b; + /* Set up PCI view of memory */ + memory_region_init(&s->bm, OBJECT(s), "bm-e500", UINT64_MAX); + memory_region_add_subregion(&s->bm, 0x0, &s->busmem); + address_space_init(&s->bm_as, &s->bm, "pci-bm"); + pci_setup_iommu(b, e500_pcihost_set_iommu, s); + pci_create_simple(b, 0, "e500-host-bridge"); memory_region_init(&s->container, OBJECT(h), "pci-container", PCIE500_ALL_SIZE); @@ -388,7 +490,6 @@ static int e500_pcihost_initfn(SysBusDevice *dev) memory_region_add_subregion(&s->container, PCIE500_CFGDATA, &h->data_mem); memory_region_add_subregion(&s->container, PCIE500_REG_BASE, &s->iomem); sysbus_init_mmio(dev, &s->container); - sysbus_init_mmio(dev, &s->pio); pci_bus_set_route_irq_fn(b, e500_route_intx_pin_to_irq); return 0; diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c index 2832fc0da4..7e17d180c6 100644 --- a/hw/ppc/e500.c +++ b/hw/ppc/e500.c @@ -51,21 +51,16 @@ #define RAM_SIZES_ALIGN (64UL << 20) /* TODO: parameterize */ -#define MPC8544_CCSRBAR_BASE 0xE0000000ULL #define MPC8544_CCSRBAR_SIZE 0x00100000ULL #define MPC8544_MPIC_REGS_OFFSET 0x40000ULL #define MPC8544_MSI_REGS_OFFSET 0x41600ULL #define MPC8544_SERIAL0_REGS_OFFSET 0x4500ULL #define MPC8544_SERIAL1_REGS_OFFSET 0x4600ULL #define MPC8544_PCI_REGS_OFFSET 0x8000ULL -#define MPC8544_PCI_REGS_BASE (MPC8544_CCSRBAR_BASE + \ - MPC8544_PCI_REGS_OFFSET) #define MPC8544_PCI_REGS_SIZE 0x1000ULL -#define MPC8544_PCI_IO 0xE1000000ULL #define MPC8544_UTIL_OFFSET 0xe0000ULL -#define MPC8544_SPIN_BASE 0xEF000000ULL #define MPC8XXX_GPIO_OFFSET 0x000FF000ULL -#define MPC8XXX_GPIO_IRQ 43 +#define MPC8XXX_GPIO_IRQ 47 struct boot_info { @@ -293,12 +288,12 @@ static int ppce500_load_device_tree(MachineState *machine, int len; uint32_t pci_ranges[14] = { - 0x2000000, 0x0, 0xc0000000, - 0x0, 0xc0000000, + 0x2000000, 0x0, params->pci_mmio_bus_base, + params->pci_mmio_base >> 32, params->pci_mmio_base, 0x0, 0x20000000, 0x1000000, 0x0, 0x0, - 0x0, 0xe1000000, + params->pci_pio_base >> 32, params->pci_pio_base, 0x0, 0x10000, }; QemuOpts *machine_opts = qemu_get_machine_opts(); @@ -389,7 +384,7 @@ static int ppce500_load_device_tree(MachineState *machine, CPUState *cpu; PowerPCCPU *pcpu; char cpu_name[128]; - uint64_t cpu_release_addr = MPC8544_SPIN_BASE + (i * 0x20); + uint64_t cpu_release_addr = params->spin_base + (i * 0x20); cpu = qemu_get_cpu(i); if (cpu == NULL) { @@ -426,7 +421,7 @@ static int ppce500_load_device_tree(MachineState *machine, qemu_fdt_add_subnode(fdt, "/aliases"); /* XXX These should go into their respective devices' code */ - snprintf(soc, sizeof(soc), "/soc@%llx", MPC8544_CCSRBAR_BASE); + snprintf(soc, sizeof(soc), "/soc@%"PRIx64, params->ccsrbar_base); qemu_fdt_add_subnode(fdt, soc); qemu_fdt_setprop_string(fdt, soc, "device_type", "soc"); qemu_fdt_setprop(fdt, soc, "compatible", compatible_sb, @@ -434,7 +429,7 @@ static int ppce500_load_device_tree(MachineState *machine, qemu_fdt_setprop_cell(fdt, soc, "#address-cells", 1); qemu_fdt_setprop_cell(fdt, soc, "#size-cells", 1); qemu_fdt_setprop_cells(fdt, soc, "ranges", 0x0, - MPC8544_CCSRBAR_BASE >> 32, MPC8544_CCSRBAR_BASE, + params->ccsrbar_base >> 32, params->ccsrbar_base, MPC8544_CCSRBAR_SIZE); /* XXX should contain a reasonable value */ qemu_fdt_setprop_cell(fdt, soc, "bus-frequency", 0); @@ -493,7 +488,8 @@ static int ppce500_load_device_tree(MachineState *machine, qemu_fdt_setprop_cell(fdt, msi, "phandle", msi_ph); qemu_fdt_setprop_cell(fdt, msi, "linux,phandle", msi_ph); - snprintf(pci, sizeof(pci), "/pci@%llx", MPC8544_PCI_REGS_BASE); + snprintf(pci, sizeof(pci), "/pci@%llx", + params->ccsrbar_base + MPC8544_PCI_REGS_OFFSET); qemu_fdt_add_subnode(fdt, pci); qemu_fdt_setprop_cell(fdt, pci, "cell-index", 0); qemu_fdt_setprop_string(fdt, pci, "compatible", "fsl,mpc8540-pci"); @@ -512,8 +508,10 @@ static int ppce500_load_device_tree(MachineState *machine, } qemu_fdt_setprop_cell(fdt, pci, "fsl,msi", msi_ph); qemu_fdt_setprop(fdt, pci, "ranges", pci_ranges, sizeof(pci_ranges)); - qemu_fdt_setprop_cells(fdt, pci, "reg", MPC8544_PCI_REGS_BASE >> 32, - MPC8544_PCI_REGS_BASE, 0, 0x1000); + qemu_fdt_setprop_cells(fdt, pci, "reg", + (params->ccsrbar_base + MPC8544_PCI_REGS_OFFSET) >> 32, + (params->ccsrbar_base + MPC8544_PCI_REGS_OFFSET), + 0, 0x1000); qemu_fdt_setprop_cell(fdt, pci, "clock-frequency", 66666666); qemu_fdt_setprop_cell(fdt, pci, "#interrupt-cells", 1); qemu_fdt_setprop_cell(fdt, pci, "#size-cells", 2); @@ -841,7 +839,7 @@ void ppce500_init(MachineState *machine, PPCE500Params *params) irqs[i][OPENPIC_OUTPUT_INT] = input[PPCE500_INPUT_INT]; irqs[i][OPENPIC_OUTPUT_CINT] = input[PPCE500_INPUT_CINT]; env->spr_cb[SPR_BOOKE_PIR].default_value = cs->cpu_index = i; - env->mpic_iack = MPC8544_CCSRBAR_BASE + + env->mpic_iack = params->ccsrbar_base + MPC8544_MPIC_REGS_OFFSET + 0xa0; ppc_booke_timers_init(cpu, 400000000, PPC_TIMER_E500); @@ -875,7 +873,7 @@ void ppce500_init(MachineState *machine, PPCE500Params *params) qdev_init_nofail(dev); ccsr = CCSR(dev); ccsr_addr_space = &ccsr->ccsr_space; - memory_region_add_subregion(address_space_mem, MPC8544_CCSRBAR_BASE, + memory_region_add_subregion(address_space_mem, params->ccsrbar_base, ccsr_addr_space); mpic = ppce500_init_mpic(params, ccsr_addr_space, irqs); @@ -917,8 +915,6 @@ void ppce500_init(MachineState *machine, PPCE500Params *params) if (!pci_bus) printf("couldn't create PCI controller!\n"); - sysbus_mmio_map(SYS_BUS_DEVICE(dev), 1, MPC8544_PCI_IO); - if (pci_bus) { /* Register network interfaces. */ for (i = 0; i < nb_nics; i++) { @@ -927,7 +923,7 @@ void ppce500_init(MachineState *machine, PPCE500Params *params) } /* Register spinning region */ - sysbus_create_simple("e500-spin", MPC8544_SPIN_BASE, NULL); + sysbus_create_simple("e500-spin", params->spin_base, NULL); if (cur_base < (32 * 1024 * 1024)) { /* u-boot occupies memory up to 32MB, so load blobs above */ diff --git a/hw/ppc/e500.h b/hw/ppc/e500.h index 9f61ab2b1c..ef224ea5e6 100644 --- a/hw/ppc/e500.h +++ b/hw/ppc/e500.h @@ -17,6 +17,11 @@ typedef struct PPCE500Params { hwaddr platform_bus_size; int platform_bus_first_irq; int platform_bus_num_irqs; + hwaddr ccsrbar_base; + hwaddr pci_pio_base; + hwaddr pci_mmio_base; + hwaddr pci_mmio_bus_base; + hwaddr spin_base; } PPCE500Params; void ppce500_init(MachineState *machine, PPCE500Params *params); diff --git a/hw/ppc/e500plat.c b/hw/ppc/e500plat.c index d50ae000ee..14b14eaa7d 100644 --- a/hw/ppc/e500plat.c +++ b/hw/ppc/e500plat.c @@ -41,6 +41,11 @@ static void e500plat_init(MachineState *machine) .platform_bus_size = (128ULL * 1024 * 1024), .platform_bus_first_irq = 5, .platform_bus_num_irqs = 10, + .ccsrbar_base = 0xFE0000000ULL, + .pci_pio_base = 0xFE1000000ULL, + .pci_mmio_base = 0xC00000000ULL, + .pci_mmio_bus_base = 0xE0000000ULL, + .spin_base = 0xFEF000000ULL, }; /* Older KVM versions don't support EPR which breaks guests when we announce diff --git a/hw/ppc/mpc8544ds.c b/hw/ppc/mpc8544ds.c index b99f74af75..3a3b141e43 100644 --- a/hw/ppc/mpc8544ds.c +++ b/hw/ppc/mpc8544ds.c @@ -15,6 +15,7 @@ #include "hw/boards.h" #include "sysemu/device_tree.h" #include "hw/ppc/openpic.h" +#include "qemu/error-report.h" static void mpc8544ds_fixup_devtree(PPCE500Params *params, void *fdt) { @@ -33,8 +34,18 @@ static void mpc8544ds_init(MachineState *machine) .pci_nr_slots = 2, .fixup_devtree = mpc8544ds_fixup_devtree, .mpic_version = OPENPIC_MODEL_FSL_MPIC_20, + .ccsrbar_base = 0xE0000000ULL, + .pci_mmio_base = 0xC0000000ULL, + .pci_mmio_bus_base = 0xC0000000ULL, + .pci_pio_base = 0xE1000000ULL, + .spin_base = 0xEF000000ULL, }; + if (machine->ram_size > 0xc0000000) { + error_report("The MPC8544DS board only supports up to 3GB of RAM"); + exit(1); + } + ppce500_init(machine, ¶ms); } diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c index bec82cd7a9..5ce565d5ec 100644 --- a/hw/ppc/ppc.c +++ b/hw/ppc/ppc.c @@ -844,7 +844,7 @@ static void timebase_pre_save(void *opaque) return; } - tb->time_of_the_day_ns = get_clock_realtime(); + tb->time_of_the_day_ns = qemu_clock_get_ns(QEMU_CLOCK_HOST); /* * tb_offset is only expected to be changed by migration so * there is no need to update it from KVM here @@ -873,7 +873,7 @@ static int timebase_post_load(void *opaque, int version_id) * We try to adjust timebase by downtime if host clocks are not * too much out of sync (1 second for now). */ - host_ns = get_clock_realtime(); + host_ns = qemu_clock_get_ns(QEMU_CLOCK_HOST); ns_diff = MAX(0, host_ns - tb_remote->time_of_the_day_ns); migration_duration_ns = MIN(NSEC_PER_SEC, ns_diff); migration_duration_tb = muldiv64(migration_duration_ns, freq, NSEC_PER_SEC); diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 53c4116ed3..b560459e83 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -819,9 +819,16 @@ static void emulate_spapr_hypercall(PowerPCCPU *cpu) } } +#define HPTE(_table, _i) (void *)(((uint64_t *)(_table)) + ((_i) * 2)) +#define HPTE_VALID(_hpte) (tswap64(*((uint64_t *)(_hpte))) & HPTE64_V_VALID) +#define HPTE_DIRTY(_hpte) (tswap64(*((uint64_t *)(_hpte))) & HPTE64_V_HPTE_DIRTY) +#define CLEAN_HPTE(_hpte) ((*(uint64_t *)(_hpte)) &= tswap64(~HPTE64_V_HPTE_DIRTY)) +#define DIRTY_HPTE(_hpte) ((*(uint64_t *)(_hpte)) |= tswap64(HPTE64_V_HPTE_DIRTY)) + static void spapr_reset_htab(sPAPREnvironment *spapr) { long shift; + int index; /* allocate hash page table. For now we always make this 16mb, * later we should probably make it scale to the size of guest @@ -833,6 +840,11 @@ static void spapr_reset_htab(sPAPREnvironment *spapr) /* Kernel handles htab, we don't need to allocate one */ spapr->htab_shift = shift; kvmppc_kern_htab = true; + + /* Tell readers to update their file descriptor */ + if (spapr->htab_fd >= 0) { + spapr->htab_fd_stale = true; + } } else { if (!spapr->htab) { /* Allocate an htab if we don't yet have one */ @@ -841,6 +853,10 @@ static void spapr_reset_htab(sPAPREnvironment *spapr) /* And clear it */ memset(spapr->htab, 0, HTAB_SIZE(spapr)); + + for (index = 0; index < HTAB_SIZE(spapr) / HASH_PTE_SIZE_64; index++) { + DIRTY_HPTE(HPTE(spapr->htab, index)); + } } /* Update the RMA size if necessary */ @@ -867,6 +883,28 @@ static int find_unknown_sysbus_device(SysBusDevice *sbdev, void *opaque) return 0; } +/* + * A guest reset will cause spapr->htab_fd to become stale if being used. + * Reopen the file descriptor to make sure the whole HTAB is properly read. + */ +static int spapr_check_htab_fd(sPAPREnvironment *spapr) +{ + int rc = 0; + + if (spapr->htab_fd_stale) { + close(spapr->htab_fd); + spapr->htab_fd = kvmppc_get_htab_fd(false); + if (spapr->htab_fd < 0) { + error_report("Unable to open fd for reading hash table from KVM: " + "%s", strerror(errno)); + rc = -1; + } + spapr->htab_fd_stale = false; + } + + return rc; +} + static void ppc_spapr_reset(void) { PowerPCCPU *first_ppc_cpu; @@ -986,11 +1024,6 @@ static const VMStateDescription vmstate_spapr = { }, }; -#define HPTE(_table, _i) (void *)(((uint64_t *)(_table)) + ((_i) * 2)) -#define HPTE_VALID(_hpte) (tswap64(*((uint64_t *)(_hpte))) & HPTE64_V_VALID) -#define HPTE_DIRTY(_hpte) (tswap64(*((uint64_t *)(_hpte))) & HPTE64_V_HPTE_DIRTY) -#define CLEAN_HPTE(_hpte) ((*(uint64_t *)(_hpte)) &= tswap64(~HPTE64_V_HPTE_DIRTY)) - static int htab_save_setup(QEMUFile *f, void *opaque) { sPAPREnvironment *spapr = opaque; @@ -1005,6 +1038,7 @@ static int htab_save_setup(QEMUFile *f, void *opaque) assert(kvm_enabled()); spapr->htab_fd = kvmppc_get_htab_fd(false); + spapr->htab_fd_stale = false; if (spapr->htab_fd < 0) { fprintf(stderr, "Unable to open fd for reading hash table from KVM: %s\n", strerror(errno)); @@ -1037,7 +1071,7 @@ static void htab_save_first_pass(QEMUFile *f, sPAPREnvironment *spapr, /* Consume valid HPTEs */ chunkstart = index; - while ((index < htabslots) + while ((index < htabslots) && (index - chunkstart < USHRT_MAX) && HPTE_VALID(HPTE(spapr->htab, index))) { index++; CLEAN_HPTE(HPTE(spapr->htab, index)); @@ -1089,7 +1123,7 @@ static int htab_save_later_pass(QEMUFile *f, sPAPREnvironment *spapr, chunkstart = index; /* Consume valid dirty HPTEs */ - while ((index < htabslots) + while ((index < htabslots) && (index - chunkstart < USHRT_MAX) && HPTE_DIRTY(HPTE(spapr->htab, index)) && HPTE_VALID(HPTE(spapr->htab, index))) { CLEAN_HPTE(HPTE(spapr->htab, index)); @@ -1099,7 +1133,7 @@ static int htab_save_later_pass(QEMUFile *f, sPAPREnvironment *spapr, invalidstart = index; /* Consume invalid dirty HPTEs */ - while ((index < htabslots) + while ((index < htabslots) && (index - invalidstart < USHRT_MAX) && HPTE_DIRTY(HPTE(spapr->htab, index)) && !HPTE_VALID(HPTE(spapr->htab, index))) { CLEAN_HPTE(HPTE(spapr->htab, index)); @@ -1157,6 +1191,11 @@ static int htab_save_iterate(QEMUFile *f, void *opaque) if (!spapr->htab) { assert(kvm_enabled()); + rc = spapr_check_htab_fd(spapr); + if (rc < 0) { + return rc; + } + rc = kvmppc_save_htab(f, spapr->htab_fd, MAX_KVM_BUF_SIZE, MAX_ITERATION_NS); if (rc < 0) { @@ -1188,6 +1227,11 @@ static int htab_save_complete(QEMUFile *f, void *opaque) assert(kvm_enabled()); + rc = spapr_check_htab_fd(spapr); + if (rc < 0) { + return rc; + } + rc = kvmppc_save_htab(f, spapr->htab_fd, MAX_KVM_BUF_SIZE, -1); if (rc < 0) { return rc; @@ -1438,7 +1482,7 @@ static void ppc_spapr_init(MachineState *machine) } if (spapr->rtas_size > RTAS_MAX_SIZE) { hw_error("RTAS too big ! 0x%zx bytes (max is 0x%x)\n", - spapr->rtas_size, RTAS_MAX_SIZE); + (size_t)spapr->rtas_size, RTAS_MAX_SIZE); exit(1); } g_free(filename); diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c index 6c91d8edd8..da474740c0 100644 --- a/hw/ppc/spapr_iommu.c +++ b/hw/ppc/spapr_iommu.c @@ -173,9 +173,9 @@ sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn, return tcet; } -static void spapr_tce_table_finalize(Object *obj) +static void spapr_tce_table_unrealize(DeviceState *dev, Error **errp) { - sPAPRTCETable *tcet = SPAPR_TCE_TABLE(obj); + sPAPRTCETable *tcet = SPAPR_TCE_TABLE(dev); QLIST_REMOVE(tcet, list); @@ -420,6 +420,7 @@ static void spapr_tce_table_class_init(ObjectClass *klass, void *data) DeviceClass *dc = DEVICE_CLASS(klass); dc->init = spapr_tce_table_realize; dc->reset = spapr_tce_reset; + dc->unrealize = spapr_tce_table_unrealize; QLIST_INIT(&spapr_tce_tables); @@ -435,7 +436,6 @@ static TypeInfo spapr_tce_table_info = { .parent = TYPE_DEVICE, .instance_size = sizeof(sPAPRTCETable), .class_init = spapr_tce_table_class_init, - .instance_finalize = spapr_tce_table_finalize, }; static void register_types(void) diff --git a/hw/s390x/Makefile.objs b/hw/s390x/Makefile.objs index 1ba6c3ab70..27cd75a932 100644 --- a/hw/s390x/Makefile.objs +++ b/hw/s390x/Makefile.objs @@ -8,3 +8,4 @@ obj-y += ipl.o obj-y += css.o obj-y += s390-virtio-ccw.o obj-y += virtio-ccw.o +obj-y += s390-pci-bus.o s390-pci-inst.o diff --git a/hw/s390x/css.c b/hw/s390x/css.c index b67c039a70..d0c5ddeece 100644 --- a/hw/s390x/css.c +++ b/hw/s390x/css.c @@ -1299,6 +1299,11 @@ void css_generate_chp_crws(uint8_t cssid, uint8_t chpid) /* TODO */ } +void css_generate_css_crws(uint8_t cssid) +{ + css_queue_crw(CRW_RSC_CSS, 0, 0, cssid); +} + int css_enable_mcsse(void) { trace_css_enable_facility("mcsse"); diff --git a/hw/s390x/css.h b/hw/s390x/css.h index 33104ac58e..7e53148700 100644 --- a/hw/s390x/css.h +++ b/hw/s390x/css.h @@ -101,6 +101,7 @@ void css_queue_crw(uint8_t rsc, uint8_t erc, int chain, uint16_t rsid); void css_generate_sch_crws(uint8_t cssid, uint8_t ssid, uint16_t schid, int hotplugged, int add); void css_generate_chp_crws(uint8_t cssid, uint8_t chpid); +void css_generate_css_crws(uint8_t cssid); void css_adapter_interrupt(uint8_t isc); #define CSS_IO_ADAPTER_VIRTIO 1 diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c new file mode 100644 index 0000000000..1201b8d57c --- /dev/null +++ b/hw/s390x/s390-pci-bus.c @@ -0,0 +1,591 @@ +/* + * s390 PCI BUS + * + * Copyright 2014 IBM Corp. + * Author(s): Frank Blaschka <frank.blaschka@de.ibm.com> + * Hong Bo Li <lihbbj@cn.ibm.com> + * Yi Min Zhao <zyimin@cn.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at + * your option) any later version. See the COPYING file in the top-level + * directory. + */ + +#include "s390-pci-bus.h" +#include <hw/pci/pci_bus.h> +#include <hw/pci/msi.h> +#include <qemu/error-report.h> + +/* #define DEBUG_S390PCI_BUS */ +#ifdef DEBUG_S390PCI_BUS +#define DPRINTF(fmt, ...) \ + do { fprintf(stderr, "S390pci-bus: " fmt, ## __VA_ARGS__); } while (0) +#else +#define DPRINTF(fmt, ...) \ + do { } while (0) +#endif + +int chsc_sei_nt2_get_event(void *res) +{ + ChscSeiNt2Res *nt2_res = (ChscSeiNt2Res *)res; + PciCcdfAvail *accdf; + PciCcdfErr *eccdf; + int rc = 1; + SeiContainer *sei_cont; + S390pciState *s = S390_PCI_HOST_BRIDGE( + object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL)); + + if (!s) { + return rc; + } + + sei_cont = QTAILQ_FIRST(&s->pending_sei); + if (sei_cont) { + QTAILQ_REMOVE(&s->pending_sei, sei_cont, link); + nt2_res->nt = 2; + nt2_res->cc = sei_cont->cc; + switch (sei_cont->cc) { + case 1: /* error event */ + eccdf = (PciCcdfErr *)nt2_res->ccdf; + eccdf->fid = cpu_to_be32(sei_cont->fid); + eccdf->fh = cpu_to_be32(sei_cont->fh); + eccdf->e = cpu_to_be32(sei_cont->e); + eccdf->faddr = cpu_to_be64(sei_cont->faddr); + eccdf->pec = cpu_to_be16(sei_cont->pec); + break; + case 2: /* availability event */ + accdf = (PciCcdfAvail *)nt2_res->ccdf; + accdf->fid = cpu_to_be32(sei_cont->fid); + accdf->fh = cpu_to_be32(sei_cont->fh); + accdf->pec = cpu_to_be16(sei_cont->pec); + break; + default: + abort(); + } + g_free(sei_cont); + rc = 0; + } + + return rc; +} + +int chsc_sei_nt2_have_event(void) +{ + S390pciState *s = S390_PCI_HOST_BRIDGE( + object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL)); + + if (!s) { + return 0; + } + + return !QTAILQ_EMPTY(&s->pending_sei); +} + +S390PCIBusDevice *s390_pci_find_dev_by_fid(uint32_t fid) +{ + S390PCIBusDevice *pbdev; + int i; + S390pciState *s = S390_PCI_HOST_BRIDGE( + object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL)); + + if (!s) { + return NULL; + } + + for (i = 0; i < PCI_SLOT_MAX; i++) { + pbdev = &s->pbdev[i]; + if ((pbdev->fh != 0) && (pbdev->fid == fid)) { + return pbdev; + } + } + + return NULL; +} + +void s390_pci_sclp_configure(int configure, SCCB *sccb) +{ + PciCfgSccb *psccb = (PciCfgSccb *)sccb; + S390PCIBusDevice *pbdev = s390_pci_find_dev_by_fid(be32_to_cpu(psccb->aid)); + uint16_t rc; + + if (pbdev) { + if ((configure == 1 && pbdev->configured == true) || + (configure == 0 && pbdev->configured == false)) { + rc = SCLP_RC_NO_ACTION_REQUIRED; + } else { + pbdev->configured = !pbdev->configured; + rc = SCLP_RC_NORMAL_COMPLETION; + } + } else { + DPRINTF("sclp config %d no dev found\n", configure); + rc = SCLP_RC_ADAPTER_ID_NOT_RECOGNIZED; + } + + psccb->header.response_code = cpu_to_be16(rc); + return; +} + +static uint32_t s390_pci_get_pfid(PCIDevice *pdev) +{ + return PCI_SLOT(pdev->devfn); +} + +static uint32_t s390_pci_get_pfh(PCIDevice *pdev) +{ + return PCI_SLOT(pdev->devfn) | FH_VIRT; +} + +S390PCIBusDevice *s390_pci_find_dev_by_idx(uint32_t idx) +{ + S390PCIBusDevice *pbdev; + int i; + int j = 0; + S390pciState *s = S390_PCI_HOST_BRIDGE( + object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL)); + + if (!s) { + return NULL; + } + + for (i = 0; i < PCI_SLOT_MAX; i++) { + pbdev = &s->pbdev[i]; + + if (pbdev->fh == 0) { + continue; + } + + if (j == idx) { + return pbdev; + } + j++; + } + + return NULL; +} + +S390PCIBusDevice *s390_pci_find_dev_by_fh(uint32_t fh) +{ + S390PCIBusDevice *pbdev; + int i; + S390pciState *s = S390_PCI_HOST_BRIDGE( + object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL)); + + if (!s) { + return NULL; + } + + for (i = 0; i < PCI_SLOT_MAX; i++) { + pbdev = &s->pbdev[i]; + if (pbdev->fh == fh) { + return pbdev; + } + } + + return NULL; +} + +static void s390_pci_generate_event(uint8_t cc, uint16_t pec, uint32_t fh, + uint32_t fid, uint64_t faddr, uint32_t e) +{ + SeiContainer *sei_cont = g_malloc0(sizeof(SeiContainer)); + S390pciState *s = S390_PCI_HOST_BRIDGE( + object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL)); + + if (!s) { + return; + } + + sei_cont->fh = fh; + sei_cont->fid = fid; + sei_cont->cc = cc; + sei_cont->pec = pec; + sei_cont->faddr = faddr; + sei_cont->e = e; + + QTAILQ_INSERT_TAIL(&s->pending_sei, sei_cont, link); + css_generate_css_crws(0); +} + +static void s390_pci_generate_plug_event(uint16_t pec, uint32_t fh, + uint32_t fid) +{ + s390_pci_generate_event(2, pec, fh, fid, 0, 0); +} + +static void s390_pci_generate_error_event(uint16_t pec, uint32_t fh, + uint32_t fid, uint64_t faddr, + uint32_t e) +{ + s390_pci_generate_event(1, pec, fh, fid, faddr, e); +} + +static void s390_pci_set_irq(void *opaque, int irq, int level) +{ + /* nothing to do */ +} + +static int s390_pci_map_irq(PCIDevice *pci_dev, int irq_num) +{ + /* nothing to do */ + return 0; +} + +static uint64_t s390_pci_get_table_origin(uint64_t iota) +{ + return iota & ~ZPCI_IOTA_RTTO_FLAG; +} + +static unsigned int calc_rtx(dma_addr_t ptr) +{ + return ((unsigned long) ptr >> ZPCI_RT_SHIFT) & ZPCI_INDEX_MASK; +} + +static unsigned int calc_sx(dma_addr_t ptr) +{ + return ((unsigned long) ptr >> ZPCI_ST_SHIFT) & ZPCI_INDEX_MASK; +} + +static unsigned int calc_px(dma_addr_t ptr) +{ + return ((unsigned long) ptr >> PAGE_SHIFT) & ZPCI_PT_MASK; +} + +static uint64_t get_rt_sto(uint64_t entry) +{ + return ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_RTX) + ? (entry & ZPCI_RTE_ADDR_MASK) + : 0; +} + +static uint64_t get_st_pto(uint64_t entry) +{ + return ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_SX) + ? (entry & ZPCI_STE_ADDR_MASK) + : 0; +} + +static uint64_t s390_guest_io_table_walk(uint64_t guest_iota, + uint64_t guest_dma_address) +{ + uint64_t sto_a, pto_a, px_a; + uint64_t sto, pto, pte; + uint32_t rtx, sx, px; + + rtx = calc_rtx(guest_dma_address); + sx = calc_sx(guest_dma_address); + px = calc_px(guest_dma_address); + + sto_a = guest_iota + rtx * sizeof(uint64_t); + sto = ldq_phys(&address_space_memory, sto_a); + sto = get_rt_sto(sto); + if (!sto) { + pte = 0; + goto out; + } + + pto_a = sto + sx * sizeof(uint64_t); + pto = ldq_phys(&address_space_memory, pto_a); + pto = get_st_pto(pto); + if (!pto) { + pte = 0; + goto out; + } + + px_a = pto + px * sizeof(uint64_t); + pte = ldq_phys(&address_space_memory, px_a); + +out: + return pte; +} + +static IOMMUTLBEntry s390_translate_iommu(MemoryRegion *iommu, hwaddr addr, + bool is_write) +{ + uint64_t pte; + uint32_t flags; + S390PCIBusDevice *pbdev = container_of(iommu, S390PCIBusDevice, mr); + S390pciState *s = S390_PCI_HOST_BRIDGE(pci_device_root_bus(pbdev->pdev) + ->qbus.parent); + IOMMUTLBEntry ret = { + .target_as = &address_space_memory, + .iova = 0, + .translated_addr = 0, + .addr_mask = ~(hwaddr)0, + .perm = IOMMU_NONE, + }; + + DPRINTF("iommu trans addr 0x%" PRIx64 "\n", addr); + + /* s390 does not have an APIC mapped to main storage so we use + * a separate AddressSpace only for msix notifications + */ + if (addr == ZPCI_MSI_ADDR) { + ret.target_as = &s->msix_notify_as; + ret.iova = addr; + ret.translated_addr = addr; + ret.addr_mask = 0xfff; + ret.perm = IOMMU_RW; + return ret; + } + + if (!pbdev->g_iota) { + pbdev->error_state = true; + pbdev->lgstg_blocked = true; + s390_pci_generate_error_event(ERR_EVENT_INVALAS, pbdev->fh, pbdev->fid, + addr, 0); + return ret; + } + + if (addr < pbdev->pba || addr > pbdev->pal) { + pbdev->error_state = true; + pbdev->lgstg_blocked = true; + s390_pci_generate_error_event(ERR_EVENT_OORANGE, pbdev->fh, pbdev->fid, + addr, 0); + return ret; + } + + pte = s390_guest_io_table_walk(s390_pci_get_table_origin(pbdev->g_iota), + addr); + + if (!pte) { + pbdev->error_state = true; + pbdev->lgstg_blocked = true; + s390_pci_generate_error_event(ERR_EVENT_SERR, pbdev->fh, pbdev->fid, + addr, ERR_EVENT_Q_BIT); + return ret; + } + + flags = pte & ZPCI_PTE_FLAG_MASK; + ret.iova = addr; + ret.translated_addr = pte & ZPCI_PTE_ADDR_MASK; + ret.addr_mask = 0xfff; + + if (flags & ZPCI_PTE_INVALID) { + ret.perm = IOMMU_NONE; + } else { + ret.perm = IOMMU_RW; + } + + return ret; +} + +static const MemoryRegionIOMMUOps s390_iommu_ops = { + .translate = s390_translate_iommu, +}; + +static AddressSpace *s390_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn) +{ + S390pciState *s = opaque; + + return &s->pbdev[PCI_SLOT(devfn)].as; +} + +static uint8_t set_ind_atomic(uint64_t ind_loc, uint8_t to_be_set) +{ + uint8_t ind_old, ind_new; + hwaddr len = 1; + uint8_t *ind_addr; + + ind_addr = cpu_physical_memory_map(ind_loc, &len, 1); + if (!ind_addr) { + s390_pci_generate_error_event(ERR_EVENT_AIRERR, 0, 0, 0, 0); + return -1; + } + do { + ind_old = *ind_addr; + ind_new = ind_old | to_be_set; + } while (atomic_cmpxchg(ind_addr, ind_old, ind_new) != ind_old); + cpu_physical_memory_unmap(ind_addr, len, 1, len); + + return ind_old; +} + +static void s390_msi_ctrl_write(void *opaque, hwaddr addr, uint64_t data, + unsigned int size) +{ + S390PCIBusDevice *pbdev; + uint32_t io_int_word; + uint32_t fid = data >> ZPCI_MSI_VEC_BITS; + uint32_t vec = data & ZPCI_MSI_VEC_MASK; + uint64_t ind_bit; + uint32_t sum_bit; + uint32_t e = 0; + + DPRINTF("write_msix data 0x%" PRIx64 " fid %d vec 0x%x\n", data, fid, vec); + + pbdev = s390_pci_find_dev_by_fid(fid); + if (!pbdev) { + e |= (vec << ERR_EVENT_MVN_OFFSET); + s390_pci_generate_error_event(ERR_EVENT_NOMSI, 0, fid, addr, e); + return; + } + + ind_bit = pbdev->routes.adapter.ind_offset; + sum_bit = pbdev->routes.adapter.summary_offset; + + set_ind_atomic(pbdev->routes.adapter.ind_addr + (ind_bit + vec) / 8, + 0x80 >> ((ind_bit + vec) % 8)); + if (!set_ind_atomic(pbdev->routes.adapter.summary_addr + sum_bit / 8, + 0x80 >> (sum_bit % 8))) { + io_int_word = (pbdev->isc << 27) | IO_INT_WORD_AI; + s390_io_interrupt(0, 0, 0, io_int_word); + } + + return; +} + +static uint64_t s390_msi_ctrl_read(void *opaque, hwaddr addr, unsigned size) +{ + return 0xffffffff; +} + +static const MemoryRegionOps s390_msi_ctrl_ops = { + .write = s390_msi_ctrl_write, + .read = s390_msi_ctrl_read, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + +static void s390_pcihost_init_as(S390pciState *s) +{ + int i; + + for (i = 0; i < PCI_SLOT_MAX; i++) { + memory_region_init_iommu(&s->pbdev[i].mr, OBJECT(s), + &s390_iommu_ops, "iommu-s390", UINT64_MAX); + address_space_init(&s->pbdev[i].as, &s->pbdev[i].mr, "iommu-pci"); + } + + memory_region_init_io(&s->msix_notify_mr, OBJECT(s), + &s390_msi_ctrl_ops, s, "msix-s390", UINT64_MAX); + address_space_init(&s->msix_notify_as, &s->msix_notify_mr, "msix-pci"); +} + +static int s390_pcihost_init(SysBusDevice *dev) +{ + PCIBus *b; + BusState *bus; + PCIHostState *phb = PCI_HOST_BRIDGE(dev); + S390pciState *s = S390_PCI_HOST_BRIDGE(dev); + + DPRINTF("host_init\n"); + + b = pci_register_bus(DEVICE(dev), NULL, + s390_pci_set_irq, s390_pci_map_irq, NULL, + get_system_memory(), get_system_io(), 0, 64, + TYPE_PCI_BUS); + s390_pcihost_init_as(s); + pci_setup_iommu(b, s390_pci_dma_iommu, s); + + bus = BUS(b); + qbus_set_hotplug_handler(bus, DEVICE(dev), NULL); + phb->bus = b; + QTAILQ_INIT(&s->pending_sei); + return 0; +} + +static int s390_pcihost_setup_msix(S390PCIBusDevice *pbdev) +{ + uint8_t pos; + uint16_t ctrl; + uint32_t table, pba; + + pos = pci_find_capability(pbdev->pdev, PCI_CAP_ID_MSIX); + if (!pos) { + pbdev->msix.available = false; + return 0; + } + + ctrl = pci_host_config_read_common(pbdev->pdev, pos + PCI_CAP_FLAGS, + pci_config_size(pbdev->pdev), sizeof(ctrl)); + table = pci_host_config_read_common(pbdev->pdev, pos + PCI_MSIX_TABLE, + pci_config_size(pbdev->pdev), sizeof(table)); + pba = pci_host_config_read_common(pbdev->pdev, pos + PCI_MSIX_PBA, + pci_config_size(pbdev->pdev), sizeof(pba)); + + pbdev->msix.table_bar = table & PCI_MSIX_FLAGS_BIRMASK; + pbdev->msix.table_offset = table & ~PCI_MSIX_FLAGS_BIRMASK; + pbdev->msix.pba_bar = pba & PCI_MSIX_FLAGS_BIRMASK; + pbdev->msix.pba_offset = pba & ~PCI_MSIX_FLAGS_BIRMASK; + pbdev->msix.entries = (ctrl & PCI_MSIX_FLAGS_QSIZE) + 1; + pbdev->msix.available = true; + return 0; +} + +static void s390_pcihost_hot_plug(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + PCIDevice *pci_dev = PCI_DEVICE(dev); + S390PCIBusDevice *pbdev; + S390pciState *s = S390_PCI_HOST_BRIDGE(pci_device_root_bus(pci_dev) + ->qbus.parent); + + pbdev = &s->pbdev[PCI_SLOT(pci_dev->devfn)]; + + pbdev->fid = s390_pci_get_pfid(pci_dev); + pbdev->pdev = pci_dev; + pbdev->configured = true; + pbdev->fh = s390_pci_get_pfh(pci_dev); + + s390_pcihost_setup_msix(pbdev); + + if (dev->hotplugged) { + s390_pci_generate_plug_event(HP_EVENT_RESERVED_TO_STANDBY, + pbdev->fh, pbdev->fid); + s390_pci_generate_plug_event(HP_EVENT_TO_CONFIGURED, + pbdev->fh, pbdev->fid); + } + return; +} + +static void s390_pcihost_hot_unplug(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + PCIDevice *pci_dev = PCI_DEVICE(dev); + S390pciState *s = S390_PCI_HOST_BRIDGE(pci_device_root_bus(pci_dev) + ->qbus.parent); + S390PCIBusDevice *pbdev = &s->pbdev[PCI_SLOT(pci_dev->devfn)]; + + if (pbdev->configured) { + pbdev->configured = false; + s390_pci_generate_plug_event(HP_EVENT_CONFIGURED_TO_STBRES, + pbdev->fh, pbdev->fid); + } + + s390_pci_generate_plug_event(HP_EVENT_STANDBY_TO_RESERVED, + pbdev->fh, pbdev->fid); + pbdev->fh = 0; + pbdev->fid = 0; + pbdev->pdev = NULL; + object_unparent(OBJECT(pci_dev)); +} + +static void s390_pcihost_class_init(ObjectClass *klass, void *data) +{ + SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass); + DeviceClass *dc = DEVICE_CLASS(klass); + HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(klass); + + dc->cannot_instantiate_with_device_add_yet = true; + k->init = s390_pcihost_init; + hc->plug = s390_pcihost_hot_plug; + hc->unplug = s390_pcihost_hot_unplug; + msi_supported = true; +} + +static const TypeInfo s390_pcihost_info = { + .name = TYPE_S390_PCI_HOST_BRIDGE, + .parent = TYPE_PCI_HOST_BRIDGE, + .instance_size = sizeof(S390pciState), + .class_init = s390_pcihost_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_HOTPLUG_HANDLER }, + { } + } +}; + +static void s390_pci_register_types(void) +{ + type_register_static(&s390_pcihost_info); +} + +type_init(s390_pci_register_types) diff --git a/hw/s390x/s390-pci-bus.h b/hw/s390x/s390-pci-bus.h new file mode 100644 index 0000000000..464a92eedf --- /dev/null +++ b/hw/s390x/s390-pci-bus.h @@ -0,0 +1,251 @@ +/* + * s390 PCI BUS definitions + * + * Copyright 2014 IBM Corp. + * Author(s): Frank Blaschka <frank.blaschka@de.ibm.com> + * Hong Bo Li <lihbbj@cn.ibm.com> + * Yi Min Zhao <zyimin@cn.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at + * your option) any later version. See the COPYING file in the top-level + * directory. + */ + +#ifndef HW_S390_PCI_BUS_H +#define HW_S390_PCI_BUS_H + +#include <hw/pci/pci.h> +#include <hw/pci/pci_host.h> +#include "hw/s390x/sclp.h" +#include "hw/s390x/s390_flic.h" +#include "hw/s390x/css.h" + +#define TYPE_S390_PCI_HOST_BRIDGE "s390-pcihost" +#define FH_VIRT 0x00ff0000 +#define ENABLE_BIT_OFFSET 31 +#define S390_PCIPT_ADAPTER 2 + +#define S390_PCI_HOST_BRIDGE(obj) \ + OBJECT_CHECK(S390pciState, (obj), TYPE_S390_PCI_HOST_BRIDGE) + +#define HP_EVENT_TO_CONFIGURED 0x0301 +#define HP_EVENT_RESERVED_TO_STANDBY 0x0302 +#define HP_EVENT_CONFIGURED_TO_STBRES 0x0304 +#define HP_EVENT_STANDBY_TO_RESERVED 0x0308 + +#define ERR_EVENT_INVALAS 0x1 +#define ERR_EVENT_OORANGE 0x2 +#define ERR_EVENT_INVALTF 0x3 +#define ERR_EVENT_TPROTE 0x4 +#define ERR_EVENT_APROTE 0x5 +#define ERR_EVENT_KEYE 0x6 +#define ERR_EVENT_INVALTE 0x7 +#define ERR_EVENT_INVALTL 0x8 +#define ERR_EVENT_TT 0x9 +#define ERR_EVENT_INVALMS 0xa +#define ERR_EVENT_SERR 0xb +#define ERR_EVENT_NOMSI 0x10 +#define ERR_EVENT_INVALBV 0x11 +#define ERR_EVENT_AIBV 0x12 +#define ERR_EVENT_AIRERR 0x13 +#define ERR_EVENT_FMBA 0x2a +#define ERR_EVENT_FMBUP 0x2b +#define ERR_EVENT_FMBPRO 0x2c +#define ERR_EVENT_CCONF 0x30 +#define ERR_EVENT_SERVAC 0x3a +#define ERR_EVENT_PERMERR 0x3b + +#define ERR_EVENT_Q_BIT 0x2 +#define ERR_EVENT_MVN_OFFSET 16 + +#define ZPCI_MSI_VEC_BITS 11 +#define ZPCI_MSI_VEC_MASK 0x7ff + +#define ZPCI_MSI_ADDR 0xfe00000000000000ULL +#define ZPCI_SDMA_ADDR 0x100000000ULL +#define ZPCI_EDMA_ADDR 0x1ffffffffffffffULL + +#define PAGE_SHIFT 12 +#define PAGE_MASK (~(PAGE_SIZE-1)) +#define PAGE_DEFAULT_ACC 0 +#define PAGE_DEFAULT_KEY (PAGE_DEFAULT_ACC << 4) + +/* I/O Translation Anchor (IOTA) */ +enum ZpciIoatDtype { + ZPCI_IOTA_STO = 0, + ZPCI_IOTA_RTTO = 1, + ZPCI_IOTA_RSTO = 2, + ZPCI_IOTA_RFTO = 3, + ZPCI_IOTA_PFAA = 4, + ZPCI_IOTA_IOPFAA = 5, + ZPCI_IOTA_IOPTO = 7 +}; + +#define ZPCI_IOTA_IOT_ENABLED 0x800ULL +#define ZPCI_IOTA_DT_ST (ZPCI_IOTA_STO << 2) +#define ZPCI_IOTA_DT_RT (ZPCI_IOTA_RTTO << 2) +#define ZPCI_IOTA_DT_RS (ZPCI_IOTA_RSTO << 2) +#define ZPCI_IOTA_DT_RF (ZPCI_IOTA_RFTO << 2) +#define ZPCI_IOTA_DT_PF (ZPCI_IOTA_PFAA << 2) +#define ZPCI_IOTA_FS_4K 0 +#define ZPCI_IOTA_FS_1M 1 +#define ZPCI_IOTA_FS_2G 2 +#define ZPCI_KEY (PAGE_DEFAULT_KEY << 5) + +#define ZPCI_IOTA_STO_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_ST) +#define ZPCI_IOTA_RTTO_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_RT) +#define ZPCI_IOTA_RSTO_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_RS) +#define ZPCI_IOTA_RFTO_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_RF) +#define ZPCI_IOTA_RFAA_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY |\ + ZPCI_IOTA_DT_PF | ZPCI_IOTA_FS_2G) + +/* I/O Region and segment tables */ +#define ZPCI_INDEX_MASK 0x7ffULL + +#define ZPCI_TABLE_TYPE_MASK 0xc +#define ZPCI_TABLE_TYPE_RFX 0xc +#define ZPCI_TABLE_TYPE_RSX 0x8 +#define ZPCI_TABLE_TYPE_RTX 0x4 +#define ZPCI_TABLE_TYPE_SX 0x0 + +#define ZPCI_TABLE_LEN_RFX 0x3 +#define ZPCI_TABLE_LEN_RSX 0x3 +#define ZPCI_TABLE_LEN_RTX 0x3 + +#define ZPCI_TABLE_OFFSET_MASK 0xc0 +#define ZPCI_TABLE_SIZE 0x4000 +#define ZPCI_TABLE_ALIGN ZPCI_TABLE_SIZE +#define ZPCI_TABLE_ENTRY_SIZE (sizeof(unsigned long)) +#define ZPCI_TABLE_ENTRIES (ZPCI_TABLE_SIZE / ZPCI_TABLE_ENTRY_SIZE) + +#define ZPCI_TABLE_BITS 11 +#define ZPCI_PT_BITS 8 +#define ZPCI_ST_SHIFT (ZPCI_PT_BITS + PAGE_SHIFT) +#define ZPCI_RT_SHIFT (ZPCI_ST_SHIFT + ZPCI_TABLE_BITS) + +#define ZPCI_RTE_FLAG_MASK 0x3fffULL +#define ZPCI_RTE_ADDR_MASK (~ZPCI_RTE_FLAG_MASK) +#define ZPCI_STE_FLAG_MASK 0x7ffULL +#define ZPCI_STE_ADDR_MASK (~ZPCI_STE_FLAG_MASK) + +/* I/O Page tables */ +#define ZPCI_PTE_VALID_MASK 0x400 +#define ZPCI_PTE_INVALID 0x400 +#define ZPCI_PTE_VALID 0x000 +#define ZPCI_PT_SIZE 0x800 +#define ZPCI_PT_ALIGN ZPCI_PT_SIZE +#define ZPCI_PT_ENTRIES (ZPCI_PT_SIZE / ZPCI_TABLE_ENTRY_SIZE) +#define ZPCI_PT_MASK (ZPCI_PT_ENTRIES - 1) + +#define ZPCI_PTE_FLAG_MASK 0xfffULL +#define ZPCI_PTE_ADDR_MASK (~ZPCI_PTE_FLAG_MASK) + +/* Shared bits */ +#define ZPCI_TABLE_VALID 0x00 +#define ZPCI_TABLE_INVALID 0x20 +#define ZPCI_TABLE_PROTECTED 0x200 +#define ZPCI_TABLE_UNPROTECTED 0x000 + +#define ZPCI_TABLE_VALID_MASK 0x20 +#define ZPCI_TABLE_PROT_MASK 0x200 + +typedef struct SeiContainer { + QTAILQ_ENTRY(SeiContainer) link; + uint32_t fid; + uint32_t fh; + uint8_t cc; + uint16_t pec; + uint64_t faddr; + uint32_t e; +} SeiContainer; + +typedef struct PciCcdfErr { + uint32_t reserved1; + uint32_t fh; + uint32_t fid; + uint32_t e; + uint64_t faddr; + uint32_t reserved3; + uint16_t reserved4; + uint16_t pec; +} QEMU_PACKED PciCcdfErr; + +typedef struct PciCcdfAvail { + uint32_t reserved1; + uint32_t fh; + uint32_t fid; + uint32_t reserved2; + uint32_t reserved3; + uint32_t reserved4; + uint32_t reserved5; + uint16_t reserved6; + uint16_t pec; +} QEMU_PACKED PciCcdfAvail; + +typedef struct ChscSeiNt2Res { + uint16_t length; + uint16_t code; + uint16_t reserved1; + uint8_t reserved2; + uint8_t nt; + uint8_t flags; + uint8_t reserved3; + uint8_t reserved4; + uint8_t cc; + uint32_t reserved5[13]; + uint8_t ccdf[4016]; +} QEMU_PACKED ChscSeiNt2Res; + +typedef struct PciCfgSccb { + SCCBHeader header; + uint8_t atype; + uint8_t reserved1; + uint16_t reserved2; + uint32_t aid; +} QEMU_PACKED PciCfgSccb; + +typedef struct S390MsixInfo { + bool available; + uint8_t table_bar; + uint8_t pba_bar; + uint16_t entries; + uint32_t table_offset; + uint32_t pba_offset; +} S390MsixInfo; + +typedef struct S390PCIBusDevice { + PCIDevice *pdev; + bool configured; + bool error_state; + bool lgstg_blocked; + uint32_t fh; + uint32_t fid; + uint64_t g_iota; + uint64_t pba; + uint64_t pal; + uint64_t fmb_addr; + uint8_t isc; + uint16_t noi; + uint8_t sum; + S390MsixInfo msix; + AdapterRoutes routes; + AddressSpace as; + MemoryRegion mr; +} S390PCIBusDevice; + +typedef struct S390pciState { + PCIHostState parent_obj; + S390PCIBusDevice pbdev[PCI_SLOT_MAX]; + AddressSpace msix_notify_as; + MemoryRegion msix_notify_mr; + QTAILQ_HEAD(, SeiContainer) pending_sei; +} S390pciState; + +int chsc_sei_nt2_get_event(void *res); +int chsc_sei_nt2_have_event(void); +void s390_pci_sclp_configure(int configure, SCCB *sccb); +S390PCIBusDevice *s390_pci_find_dev_by_idx(uint32_t idx); +S390PCIBusDevice *s390_pci_find_dev_by_fh(uint32_t fh); +S390PCIBusDevice *s390_pci_find_dev_by_fid(uint32_t fid); + +#endif diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c new file mode 100644 index 0000000000..5ea13e5d79 --- /dev/null +++ b/hw/s390x/s390-pci-inst.c @@ -0,0 +1,811 @@ +/* + * s390 PCI instructions + * + * Copyright 2014 IBM Corp. + * Author(s): Frank Blaschka <frank.blaschka@de.ibm.com> + * Hong Bo Li <lihbbj@cn.ibm.com> + * Yi Min Zhao <zyimin@cn.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at + * your option) any later version. See the COPYING file in the top-level + * directory. + */ + +#include "s390-pci-inst.h" +#include "s390-pci-bus.h" +#include <exec/memory-internal.h> +#include <qemu/error-report.h> + +/* #define DEBUG_S390PCI_INST */ +#ifdef DEBUG_S390PCI_INST +#define DPRINTF(fmt, ...) \ + do { fprintf(stderr, "s390pci-inst: " fmt, ## __VA_ARGS__); } while (0) +#else +#define DPRINTF(fmt, ...) \ + do { } while (0) +#endif + +static void s390_set_status_code(CPUS390XState *env, + uint8_t r, uint64_t status_code) +{ + env->regs[r] &= ~0xff000000ULL; + env->regs[r] |= (status_code & 0xff) << 24; +} + +static int list_pci(ClpReqRspListPci *rrb, uint8_t *cc) +{ + S390PCIBusDevice *pbdev; + uint32_t res_code, initial_l2, g_l2, finish; + int rc, idx; + uint64_t resume_token; + + rc = 0; + if (lduw_p(&rrb->request.hdr.len) != 32) { + res_code = CLP_RC_LEN; + rc = -EINVAL; + goto out; + } + + if ((ldl_p(&rrb->request.fmt) & CLP_MASK_FMT) != 0) { + res_code = CLP_RC_FMT; + rc = -EINVAL; + goto out; + } + + if ((ldl_p(&rrb->request.fmt) & ~CLP_MASK_FMT) != 0 || + ldq_p(&rrb->request.reserved1) != 0 || + ldq_p(&rrb->request.reserved2) != 0) { + res_code = CLP_RC_RESNOT0; + rc = -EINVAL; + goto out; + } + + resume_token = ldq_p(&rrb->request.resume_token); + + if (resume_token) { + pbdev = s390_pci_find_dev_by_idx(resume_token); + if (!pbdev) { + res_code = CLP_RC_LISTPCI_BADRT; + rc = -EINVAL; + goto out; + } + } + + if (lduw_p(&rrb->response.hdr.len) < 48) { + res_code = CLP_RC_8K; + rc = -EINVAL; + goto out; + } + + initial_l2 = lduw_p(&rrb->response.hdr.len); + if ((initial_l2 - LIST_PCI_HDR_LEN) % sizeof(ClpFhListEntry) + != 0) { + res_code = CLP_RC_LEN; + rc = -EINVAL; + *cc = 3; + goto out; + } + + stl_p(&rrb->response.fmt, 0); + stq_p(&rrb->response.reserved1, 0); + stq_p(&rrb->response.reserved2, 0); + stl_p(&rrb->response.mdd, FH_VIRT); + stw_p(&rrb->response.max_fn, PCI_MAX_FUNCTIONS); + rrb->response.entry_size = sizeof(ClpFhListEntry); + finish = 0; + idx = resume_token; + g_l2 = LIST_PCI_HDR_LEN; + do { + pbdev = s390_pci_find_dev_by_idx(idx); + if (!pbdev) { + finish = 1; + break; + } + stw_p(&rrb->response.fh_list[idx - resume_token].device_id, + pci_get_word(pbdev->pdev->config + PCI_DEVICE_ID)); + stw_p(&rrb->response.fh_list[idx - resume_token].vendor_id, + pci_get_word(pbdev->pdev->config + PCI_VENDOR_ID)); + stl_p(&rrb->response.fh_list[idx - resume_token].config, 0x80000000); + stl_p(&rrb->response.fh_list[idx - resume_token].fid, pbdev->fid); + stl_p(&rrb->response.fh_list[idx - resume_token].fh, pbdev->fh); + + g_l2 += sizeof(ClpFhListEntry); + /* Add endian check for DPRINTF? */ + DPRINTF("g_l2 %d vendor id 0x%x device id 0x%x fid 0x%x fh 0x%x\n", + g_l2, + lduw_p(&rrb->response.fh_list[idx - resume_token].vendor_id), + lduw_p(&rrb->response.fh_list[idx - resume_token].device_id), + ldl_p(&rrb->response.fh_list[idx - resume_token].fid), + ldl_p(&rrb->response.fh_list[idx - resume_token].fh)); + idx++; + } while (g_l2 < initial_l2); + + if (finish == 1) { + resume_token = 0; + } else { + resume_token = idx; + } + stq_p(&rrb->response.resume_token, resume_token); + stw_p(&rrb->response.hdr.len, g_l2); + stw_p(&rrb->response.hdr.rsp, CLP_RC_OK); +out: + if (rc) { + DPRINTF("list pci failed rc 0x%x\n", rc); + stw_p(&rrb->response.hdr.rsp, res_code); + } + return rc; +} + +int clp_service_call(S390CPU *cpu, uint8_t r2) +{ + ClpReqHdr *reqh; + ClpRspHdr *resh; + S390PCIBusDevice *pbdev; + uint32_t req_len; + uint32_t res_len; + uint8_t buffer[4096 * 2]; + uint8_t cc = 0; + CPUS390XState *env = &cpu->env; + int i; + + cpu_synchronize_state(CPU(cpu)); + + if (env->psw.mask & PSW_MASK_PSTATE) { + program_interrupt(env, PGM_PRIVILEGED, 4); + return 0; + } + + cpu_physical_memory_read(env->regs[r2], buffer, sizeof(*reqh)); + reqh = (ClpReqHdr *)buffer; + req_len = lduw_p(&reqh->len); + if (req_len < 16 || req_len > 8184 || (req_len % 8 != 0)) { + program_interrupt(env, PGM_OPERAND, 4); + return 0; + } + + cpu_physical_memory_read(env->regs[r2], buffer, req_len + sizeof(*resh)); + resh = (ClpRspHdr *)(buffer + req_len); + res_len = lduw_p(&resh->len); + if (res_len < 8 || res_len > 8176 || (res_len % 8 != 0)) { + program_interrupt(env, PGM_OPERAND, 4); + return 0; + } + if ((req_len + res_len) > 8192) { + program_interrupt(env, PGM_OPERAND, 4); + return 0; + } + + cpu_physical_memory_read(env->regs[r2], buffer, req_len + res_len); + + if (req_len != 32) { + stw_p(&resh->rsp, CLP_RC_LEN); + goto out; + } + + switch (lduw_p(&reqh->cmd)) { + case CLP_LIST_PCI: { + ClpReqRspListPci *rrb = (ClpReqRspListPci *)buffer; + list_pci(rrb, &cc); + break; + } + case CLP_SET_PCI_FN: { + ClpReqSetPci *reqsetpci = (ClpReqSetPci *)reqh; + ClpRspSetPci *ressetpci = (ClpRspSetPci *)resh; + + pbdev = s390_pci_find_dev_by_fh(ldl_p(&reqsetpci->fh)); + if (!pbdev) { + stw_p(&ressetpci->hdr.rsp, CLP_RC_SETPCIFN_FH); + goto out; + } + + switch (reqsetpci->oc) { + case CLP_SET_ENABLE_PCI_FN: + pbdev->fh = pbdev->fh | 1 << ENABLE_BIT_OFFSET; + stl_p(&ressetpci->fh, pbdev->fh); + stw_p(&ressetpci->hdr.rsp, CLP_RC_OK); + break; + case CLP_SET_DISABLE_PCI_FN: + pbdev->fh = pbdev->fh & ~(1 << ENABLE_BIT_OFFSET); + pbdev->error_state = false; + pbdev->lgstg_blocked = false; + stl_p(&ressetpci->fh, pbdev->fh); + stw_p(&ressetpci->hdr.rsp, CLP_RC_OK); + break; + default: + DPRINTF("unknown set pci command\n"); + stw_p(&ressetpci->hdr.rsp, CLP_RC_SETPCIFN_FHOP); + break; + } + break; + } + case CLP_QUERY_PCI_FN: { + ClpReqQueryPci *reqquery = (ClpReqQueryPci *)reqh; + ClpRspQueryPci *resquery = (ClpRspQueryPci *)resh; + + pbdev = s390_pci_find_dev_by_fh(ldl_p(&reqquery->fh)); + if (!pbdev) { + DPRINTF("query pci no pci dev\n"); + stw_p(&resquery->hdr.rsp, CLP_RC_SETPCIFN_FH); + goto out; + } + + for (i = 0; i < PCI_BAR_COUNT; i++) { + uint32_t data = pci_get_long(pbdev->pdev->config + + PCI_BASE_ADDRESS_0 + (i * 4)); + + stl_p(&resquery->bar[i], data); + resquery->bar_size[i] = pbdev->pdev->io_regions[i].size ? + ctz64(pbdev->pdev->io_regions[i].size) : 0; + DPRINTF("bar %d addr 0x%x size 0x%" PRIx64 "barsize 0x%x\n", i, + ldl_p(&resquery->bar[i]), + pbdev->pdev->io_regions[i].size, + resquery->bar_size[i]); + } + + stq_p(&resquery->sdma, ZPCI_SDMA_ADDR); + stq_p(&resquery->edma, ZPCI_EDMA_ADDR); + stw_p(&resquery->pchid, 0); + stw_p(&resquery->ug, 1); + stl_p(&resquery->uid, pbdev->fid); + stw_p(&resquery->hdr.rsp, CLP_RC_OK); + break; + } + case CLP_QUERY_PCI_FNGRP: { + ClpRspQueryPciGrp *resgrp = (ClpRspQueryPciGrp *)resh; + resgrp->fr = 1; + stq_p(&resgrp->dasm, 0); + stq_p(&resgrp->msia, ZPCI_MSI_ADDR); + stw_p(&resgrp->mui, 0); + stw_p(&resgrp->i, 128); + resgrp->version = 0; + + stw_p(&resgrp->hdr.rsp, CLP_RC_OK); + break; + } + default: + DPRINTF("unknown clp command\n"); + stw_p(&resh->rsp, CLP_RC_CMD); + break; + } + +out: + cpu_physical_memory_write(env->regs[r2], buffer, req_len + res_len); + setcc(cpu, cc); + return 0; +} + +int pcilg_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2) +{ + CPUS390XState *env = &cpu->env; + S390PCIBusDevice *pbdev; + uint64_t offset; + uint64_t data; + uint8_t len; + uint32_t fh; + uint8_t pcias; + + cpu_synchronize_state(CPU(cpu)); + + if (env->psw.mask & PSW_MASK_PSTATE) { + program_interrupt(env, PGM_PRIVILEGED, 4); + return 0; + } + + if (r2 & 0x1) { + program_interrupt(env, PGM_SPECIFICATION, 4); + return 0; + } + + fh = env->regs[r2] >> 32; + pcias = (env->regs[r2] >> 16) & 0xf; + len = env->regs[r2] & 0xf; + offset = env->regs[r2 + 1]; + + pbdev = s390_pci_find_dev_by_fh(fh); + if (!pbdev) { + DPRINTF("pcilg no pci dev\n"); + setcc(cpu, ZPCI_PCI_LS_INVAL_HANDLE); + return 0; + } + + if (pbdev->lgstg_blocked) { + setcc(cpu, ZPCI_PCI_LS_ERR); + s390_set_status_code(env, r2, ZPCI_PCI_ST_BLOCKED); + return 0; + } + + if (pcias < 6) { + if ((8 - (offset & 0x7)) < len) { + program_interrupt(env, PGM_OPERAND, 4); + return 0; + } + MemoryRegion *mr = pbdev->pdev->io_regions[pcias].memory; + io_mem_read(mr, offset, &data, len); + } else if (pcias == 15) { + if ((4 - (offset & 0x3)) < len) { + program_interrupt(env, PGM_OPERAND, 4); + return 0; + } + data = pci_host_config_read_common( + pbdev->pdev, offset, pci_config_size(pbdev->pdev), len); + + switch (len) { + case 1: + break; + case 2: + data = bswap16(data); + break; + case 4: + data = bswap32(data); + break; + case 8: + data = bswap64(data); + break; + default: + program_interrupt(env, PGM_OPERAND, 4); + return 0; + } + } else { + DPRINTF("invalid space\n"); + setcc(cpu, ZPCI_PCI_LS_ERR); + s390_set_status_code(env, r2, ZPCI_PCI_ST_INVAL_AS); + return 0; + } + + env->regs[r1] = data; + setcc(cpu, ZPCI_PCI_LS_OK); + return 0; +} + +static void update_msix_table_msg_data(S390PCIBusDevice *pbdev, uint64_t offset, + uint64_t *data, uint8_t len) +{ + uint32_t val; + uint8_t *msg_data; + + if (offset % PCI_MSIX_ENTRY_SIZE != 8) { + return; + } + + if (len != 4) { + DPRINTF("access msix table msg data but len is %d\n", len); + return; + } + + msg_data = (uint8_t *)data - offset % PCI_MSIX_ENTRY_SIZE + + PCI_MSIX_ENTRY_VECTOR_CTRL; + val = pci_get_long(msg_data) | (pbdev->fid << ZPCI_MSI_VEC_BITS); + pci_set_long(msg_data, val); + DPRINTF("update msix msg_data to 0x%" PRIx64 "\n", *data); +} + +static int trap_msix(S390PCIBusDevice *pbdev, uint64_t offset, uint8_t pcias) +{ + if (pbdev->msix.available && pbdev->msix.table_bar == pcias && + offset >= pbdev->msix.table_offset && + offset <= pbdev->msix.table_offset + + (pbdev->msix.entries - 1) * PCI_MSIX_ENTRY_SIZE) { + return 1; + } else { + return 0; + } +} + +int pcistg_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2) +{ + CPUS390XState *env = &cpu->env; + uint64_t offset, data; + S390PCIBusDevice *pbdev; + uint8_t len; + uint32_t fh; + uint8_t pcias; + + cpu_synchronize_state(CPU(cpu)); + + if (env->psw.mask & PSW_MASK_PSTATE) { + program_interrupt(env, PGM_PRIVILEGED, 4); + return 0; + } + + if (r2 & 0x1) { + program_interrupt(env, PGM_SPECIFICATION, 4); + return 0; + } + + fh = env->regs[r2] >> 32; + pcias = (env->regs[r2] >> 16) & 0xf; + len = env->regs[r2] & 0xf; + offset = env->regs[r2 + 1]; + + pbdev = s390_pci_find_dev_by_fh(fh); + if (!pbdev) { + DPRINTF("pcistg no pci dev\n"); + setcc(cpu, ZPCI_PCI_LS_INVAL_HANDLE); + return 0; + } + + if (pbdev->lgstg_blocked) { + setcc(cpu, ZPCI_PCI_LS_ERR); + s390_set_status_code(env, r2, ZPCI_PCI_ST_BLOCKED); + return 0; + } + + data = env->regs[r1]; + if (pcias < 6) { + if ((8 - (offset & 0x7)) < len) { + program_interrupt(env, PGM_OPERAND, 4); + return 0; + } + MemoryRegion *mr; + if (trap_msix(pbdev, offset, pcias)) { + offset = offset - pbdev->msix.table_offset; + mr = &pbdev->pdev->msix_table_mmio; + update_msix_table_msg_data(pbdev, offset, &data, len); + } else { + mr = pbdev->pdev->io_regions[pcias].memory; + } + + io_mem_write(mr, offset, data, len); + } else if (pcias == 15) { + if ((4 - (offset & 0x3)) < len) { + program_interrupt(env, PGM_OPERAND, 4); + return 0; + } + switch (len) { + case 1: + break; + case 2: + data = bswap16(data); + break; + case 4: + data = bswap32(data); + break; + case 8: + data = bswap64(data); + break; + default: + program_interrupt(env, PGM_OPERAND, 4); + return 0; + } + + pci_host_config_write_common(pbdev->pdev, offset, + pci_config_size(pbdev->pdev), + data, len); + } else { + DPRINTF("pcistg invalid space\n"); + setcc(cpu, ZPCI_PCI_LS_ERR); + s390_set_status_code(env, r2, ZPCI_PCI_ST_INVAL_AS); + return 0; + } + + setcc(cpu, ZPCI_PCI_LS_OK); + return 0; +} + +int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2) +{ + CPUS390XState *env = &cpu->env; + uint32_t fh; + S390PCIBusDevice *pbdev; + ram_addr_t size; + IOMMUTLBEntry entry; + MemoryRegion *mr; + + cpu_synchronize_state(CPU(cpu)); + + if (env->psw.mask & PSW_MASK_PSTATE) { + program_interrupt(env, PGM_PRIVILEGED, 4); + goto out; + } + + if (r2 & 0x1) { + program_interrupt(env, PGM_SPECIFICATION, 4); + goto out; + } + + fh = env->regs[r1] >> 32; + size = env->regs[r2 + 1]; + + pbdev = s390_pci_find_dev_by_fh(fh); + + if (!pbdev) { + DPRINTF("rpcit no pci dev\n"); + setcc(cpu, ZPCI_PCI_LS_INVAL_HANDLE); + goto out; + } + + mr = pci_device_iommu_address_space(pbdev->pdev)->root; + entry = mr->iommu_ops->translate(mr, env->regs[r2], 0); + + if (!entry.translated_addr) { + setcc(cpu, ZPCI_PCI_LS_ERR); + goto out; + } + + entry.addr_mask = size - 1; + memory_region_notify_iommu(mr, entry); + setcc(cpu, ZPCI_PCI_LS_OK); +out: + return 0; +} + +int pcistb_service_call(S390CPU *cpu, uint8_t r1, uint8_t r3, uint64_t gaddr) +{ + CPUS390XState *env = &cpu->env; + S390PCIBusDevice *pbdev; + MemoryRegion *mr; + int i; + uint64_t val; + uint32_t fh; + uint8_t pcias; + uint8_t len; + + if (env->psw.mask & PSW_MASK_PSTATE) { + program_interrupt(env, PGM_PRIVILEGED, 6); + return 0; + } + + fh = env->regs[r1] >> 32; + pcias = (env->regs[r1] >> 16) & 0xf; + len = env->regs[r1] & 0xff; + + if (pcias > 5) { + DPRINTF("pcistb invalid space\n"); + setcc(cpu, ZPCI_PCI_LS_ERR); + s390_set_status_code(env, r1, ZPCI_PCI_ST_INVAL_AS); + return 0; + } + + switch (len) { + case 16: + case 32: + case 64: + case 128: + break; + default: + program_interrupt(env, PGM_SPECIFICATION, 6); + return 0; + } + + pbdev = s390_pci_find_dev_by_fh(fh); + if (!pbdev) { + DPRINTF("pcistb no pci dev fh 0x%x\n", fh); + setcc(cpu, ZPCI_PCI_LS_INVAL_HANDLE); + return 0; + } + + if (pbdev->lgstg_blocked) { + setcc(cpu, ZPCI_PCI_LS_ERR); + s390_set_status_code(env, r1, ZPCI_PCI_ST_BLOCKED); + return 0; + } + + mr = pbdev->pdev->io_regions[pcias].memory; + if (!memory_region_access_valid(mr, env->regs[r3], len, true)) { + program_interrupt(env, PGM_ADDRESSING, 6); + return 0; + } + + for (i = 0; i < len / 8; i++) { + val = ldq_phys(&address_space_memory, gaddr + i * 8); + io_mem_write(mr, env->regs[r3] + i * 8, val, 8); + } + + setcc(cpu, ZPCI_PCI_LS_OK); + return 0; +} + +static int reg_irqs(CPUS390XState *env, S390PCIBusDevice *pbdev, ZpciFib fib) +{ + int ret; + S390FLICState *fs = s390_get_flic(); + S390FLICStateClass *fsc = S390_FLIC_COMMON_GET_CLASS(fs); + + ret = css_register_io_adapter(S390_PCIPT_ADAPTER, + FIB_DATA_ISC(ldl_p(&fib.data)), true, false, + &pbdev->routes.adapter.adapter_id); + assert(ret == 0); + + fsc->io_adapter_map(fs, pbdev->routes.adapter.adapter_id, + ldq_p(&fib.aisb), true); + fsc->io_adapter_map(fs, pbdev->routes.adapter.adapter_id, + ldq_p(&fib.aibv), true); + + pbdev->routes.adapter.summary_addr = ldq_p(&fib.aisb); + pbdev->routes.adapter.summary_offset = FIB_DATA_AISBO(ldl_p(&fib.data)); + pbdev->routes.adapter.ind_addr = ldq_p(&fib.aibv); + pbdev->routes.adapter.ind_offset = FIB_DATA_AIBVO(ldl_p(&fib.data)); + pbdev->isc = FIB_DATA_ISC(ldl_p(&fib.data)); + pbdev->noi = FIB_DATA_NOI(ldl_p(&fib.data)); + pbdev->sum = FIB_DATA_SUM(ldl_p(&fib.data)); + + DPRINTF("reg_irqs adapter id %d\n", pbdev->routes.adapter.adapter_id); + return 0; +} + +static int dereg_irqs(S390PCIBusDevice *pbdev) +{ + S390FLICState *fs = s390_get_flic(); + S390FLICStateClass *fsc = S390_FLIC_COMMON_GET_CLASS(fs); + + fsc->io_adapter_map(fs, pbdev->routes.adapter.adapter_id, + pbdev->routes.adapter.ind_addr, false); + + pbdev->routes.adapter.summary_addr = 0; + pbdev->routes.adapter.summary_offset = 0; + pbdev->routes.adapter.ind_addr = 0; + pbdev->routes.adapter.ind_offset = 0; + pbdev->isc = 0; + pbdev->noi = 0; + pbdev->sum = 0; + + DPRINTF("dereg_irqs adapter id %d\n", pbdev->routes.adapter.adapter_id); + return 0; +} + +static int reg_ioat(CPUS390XState *env, S390PCIBusDevice *pbdev, ZpciFib fib) +{ + uint64_t pba = ldq_p(&fib.pba); + uint64_t pal = ldq_p(&fib.pal); + uint64_t g_iota = ldq_p(&fib.iota); + uint8_t dt = (g_iota >> 2) & 0x7; + uint8_t t = (g_iota >> 11) & 0x1; + + if (pba > pal || pba < ZPCI_SDMA_ADDR || pal > ZPCI_EDMA_ADDR) { + program_interrupt(env, PGM_OPERAND, 6); + return -EINVAL; + } + + /* currently we only support designation type 1 with translation */ + if (!(dt == ZPCI_IOTA_RTTO && t)) { + error_report("unsupported ioat dt %d t %d", dt, t); + program_interrupt(env, PGM_OPERAND, 6); + return -EINVAL; + } + + pbdev->pba = pba; + pbdev->pal = pal; + pbdev->g_iota = g_iota; + return 0; +} + +static void dereg_ioat(S390PCIBusDevice *pbdev) +{ + pbdev->pba = 0; + pbdev->pal = 0; + pbdev->g_iota = 0; +} + +int mpcifc_service_call(S390CPU *cpu, uint8_t r1, uint64_t fiba) +{ + CPUS390XState *env = &cpu->env; + uint8_t oc; + uint32_t fh; + ZpciFib fib; + S390PCIBusDevice *pbdev; + uint64_t cc = ZPCI_PCI_LS_OK; + + if (env->psw.mask & PSW_MASK_PSTATE) { + program_interrupt(env, PGM_PRIVILEGED, 6); + return 0; + } + + oc = env->regs[r1] & 0xff; + fh = env->regs[r1] >> 32; + + if (fiba & 0x7) { + program_interrupt(env, PGM_SPECIFICATION, 6); + return 0; + } + + pbdev = s390_pci_find_dev_by_fh(fh); + if (!pbdev) { + DPRINTF("mpcifc no pci dev fh 0x%x\n", fh); + setcc(cpu, ZPCI_PCI_LS_INVAL_HANDLE); + return 0; + } + + cpu_physical_memory_read(fiba, (uint8_t *)&fib, sizeof(fib)); + + switch (oc) { + case ZPCI_MOD_FC_REG_INT: + if (reg_irqs(env, pbdev, fib)) { + cc = ZPCI_PCI_LS_ERR; + } + break; + case ZPCI_MOD_FC_DEREG_INT: + dereg_irqs(pbdev); + break; + case ZPCI_MOD_FC_REG_IOAT: + if (reg_ioat(env, pbdev, fib)) { + cc = ZPCI_PCI_LS_ERR; + } + break; + case ZPCI_MOD_FC_DEREG_IOAT: + dereg_ioat(pbdev); + break; + case ZPCI_MOD_FC_REREG_IOAT: + dereg_ioat(pbdev); + if (reg_ioat(env, pbdev, fib)) { + cc = ZPCI_PCI_LS_ERR; + } + break; + case ZPCI_MOD_FC_RESET_ERROR: + pbdev->error_state = false; + pbdev->lgstg_blocked = false; + break; + case ZPCI_MOD_FC_RESET_BLOCK: + pbdev->lgstg_blocked = false; + break; + case ZPCI_MOD_FC_SET_MEASURE: + pbdev->fmb_addr = ldq_p(&fib.fmb_addr); + break; + default: + program_interrupt(&cpu->env, PGM_OPERAND, 6); + cc = ZPCI_PCI_LS_ERR; + } + + setcc(cpu, cc); + return 0; +} + +int stpcifc_service_call(S390CPU *cpu, uint8_t r1, uint64_t fiba) +{ + CPUS390XState *env = &cpu->env; + uint32_t fh; + ZpciFib fib; + S390PCIBusDevice *pbdev; + uint32_t data; + uint64_t cc = ZPCI_PCI_LS_OK; + + if (env->psw.mask & PSW_MASK_PSTATE) { + program_interrupt(env, PGM_PRIVILEGED, 6); + return 0; + } + + fh = env->regs[r1] >> 32; + + if (fiba & 0x7) { + program_interrupt(env, PGM_SPECIFICATION, 6); + return 0; + } + + pbdev = s390_pci_find_dev_by_fh(fh); + if (!pbdev) { + setcc(cpu, ZPCI_PCI_LS_INVAL_HANDLE); + return 0; + } + + memset(&fib, 0, sizeof(fib)); + stq_p(&fib.pba, pbdev->pba); + stq_p(&fib.pal, pbdev->pal); + stq_p(&fib.iota, pbdev->g_iota); + stq_p(&fib.aibv, pbdev->routes.adapter.ind_addr); + stq_p(&fib.aisb, pbdev->routes.adapter.summary_addr); + stq_p(&fib.fmb_addr, pbdev->fmb_addr); + + data = (pbdev->isc << 28) | (pbdev->noi << 16) | + (pbdev->routes.adapter.ind_offset << 8) | (pbdev->sum << 7) | + pbdev->routes.adapter.summary_offset; + stw_p(&fib.data, data); + + if (pbdev->fh >> ENABLE_BIT_OFFSET) { + fib.fc |= 0x80; + } + + if (pbdev->error_state) { + fib.fc |= 0x40; + } + + if (pbdev->lgstg_blocked) { + fib.fc |= 0x20; + } + + if (pbdev->g_iota) { + fib.fc |= 0x10; + } + + cpu_physical_memory_write(fiba, (uint8_t *)&fib, sizeof(fib)); + setcc(cpu, cc); + return 0; +} diff --git a/hw/s390x/s390-pci-inst.h b/hw/s390x/s390-pci-inst.h new file mode 100644 index 0000000000..7e6c804737 --- /dev/null +++ b/hw/s390x/s390-pci-inst.h @@ -0,0 +1,288 @@ +/* + * s390 PCI instruction definitions + * + * Copyright 2014 IBM Corp. + * Author(s): Frank Blaschka <frank.blaschka@de.ibm.com> + * Hong Bo Li <lihbbj@cn.ibm.com> + * Yi Min Zhao <zyimin@cn.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at + * your option) any later version. See the COPYING file in the top-level + * directory. + */ + +#ifndef HW_S390_PCI_INST_H +#define HW_S390_PCI_INST_H + +#include <sysemu/dma.h> + +/* CLP common request & response block size */ +#define CLP_BLK_SIZE 4096 +#define PCI_BAR_COUNT 6 +#define PCI_MAX_FUNCTIONS 4096 + +typedef struct ClpReqHdr { + uint16_t len; + uint16_t cmd; +} QEMU_PACKED ClpReqHdr; + +typedef struct ClpRspHdr { + uint16_t len; + uint16_t rsp; +} QEMU_PACKED ClpRspHdr; + +/* CLP Response Codes */ +#define CLP_RC_OK 0x0010 /* Command request successfully */ +#define CLP_RC_CMD 0x0020 /* Command code not recognized */ +#define CLP_RC_PERM 0x0030 /* Command not authorized */ +#define CLP_RC_FMT 0x0040 /* Invalid command request format */ +#define CLP_RC_LEN 0x0050 /* Invalid command request length */ +#define CLP_RC_8K 0x0060 /* Command requires 8K LPCB */ +#define CLP_RC_RESNOT0 0x0070 /* Reserved field not zero */ +#define CLP_RC_NODATA 0x0080 /* No data available */ +#define CLP_RC_FC_UNKNOWN 0x0100 /* Function code not recognized */ + +/* + * Call Logical Processor - Command Codes + */ +#define CLP_LIST_PCI 0x0002 +#define CLP_QUERY_PCI_FN 0x0003 +#define CLP_QUERY_PCI_FNGRP 0x0004 +#define CLP_SET_PCI_FN 0x0005 + +/* PCI function handle list entry */ +typedef struct ClpFhListEntry { + uint16_t device_id; + uint16_t vendor_id; +#define CLP_FHLIST_MASK_CONFIG 0x80000000 + uint32_t config; + uint32_t fid; + uint32_t fh; +} QEMU_PACKED ClpFhListEntry; + +#define CLP_RC_SETPCIFN_FH 0x0101 /* Invalid PCI fn handle */ +#define CLP_RC_SETPCIFN_FHOP 0x0102 /* Fn handle not valid for op */ +#define CLP_RC_SETPCIFN_DMAAS 0x0103 /* Invalid DMA addr space */ +#define CLP_RC_SETPCIFN_RES 0x0104 /* Insufficient resources */ +#define CLP_RC_SETPCIFN_ALRDY 0x0105 /* Fn already in requested state */ +#define CLP_RC_SETPCIFN_ERR 0x0106 /* Fn in permanent error state */ +#define CLP_RC_SETPCIFN_RECPND 0x0107 /* Error recovery pending */ +#define CLP_RC_SETPCIFN_BUSY 0x0108 /* Fn busy */ +#define CLP_RC_LISTPCI_BADRT 0x010a /* Resume token not recognized */ +#define CLP_RC_QUERYPCIFG_PFGID 0x010b /* Unrecognized PFGID */ + +/* request or response block header length */ +#define LIST_PCI_HDR_LEN 32 + +/* Number of function handles fitting in response block */ +#define CLP_FH_LIST_NR_ENTRIES \ + ((CLP_BLK_SIZE - 2 * LIST_PCI_HDR_LEN) \ + / sizeof(ClpFhListEntry)) + +#define CLP_SET_ENABLE_PCI_FN 0 /* Yes, 0 enables it */ +#define CLP_SET_DISABLE_PCI_FN 1 /* Yes, 1 disables it */ + +#define CLP_UTIL_STR_LEN 64 + +#define CLP_MASK_FMT 0xf0000000 + +/* List PCI functions request */ +typedef struct ClpReqListPci { + ClpReqHdr hdr; + uint32_t fmt; + uint64_t reserved1; + uint64_t resume_token; + uint64_t reserved2; +} QEMU_PACKED ClpReqListPci; + +/* List PCI functions response */ +typedef struct ClpRspListPci { + ClpRspHdr hdr; + uint32_t fmt; + uint64_t reserved1; + uint64_t resume_token; + uint32_t mdd; + uint16_t max_fn; + uint8_t reserved2; + uint8_t entry_size; + ClpFhListEntry fh_list[CLP_FH_LIST_NR_ENTRIES]; +} QEMU_PACKED ClpRspListPci; + +/* Query PCI function request */ +typedef struct ClpReqQueryPci { + ClpReqHdr hdr; + uint32_t fmt; + uint64_t reserved1; + uint32_t fh; /* function handle */ + uint32_t reserved2; + uint64_t reserved3; +} QEMU_PACKED ClpReqQueryPci; + +/* Query PCI function response */ +typedef struct ClpRspQueryPci { + ClpRspHdr hdr; + uint32_t fmt; + uint64_t reserved1; + uint16_t vfn; /* virtual fn number */ +#define CLP_RSP_QPCI_MASK_UTIL 0x100 +#define CLP_RSP_QPCI_MASK_PFGID 0xff + uint16_t ug; + uint32_t fid; /* pci function id */ + uint8_t bar_size[PCI_BAR_COUNT]; + uint16_t pchid; + uint32_t bar[PCI_BAR_COUNT]; + uint64_t reserved2; + uint64_t sdma; /* start dma as */ + uint64_t edma; /* end dma as */ + uint32_t reserved3[11]; + uint32_t uid; + uint8_t util_str[CLP_UTIL_STR_LEN]; /* utility string */ +} QEMU_PACKED ClpRspQueryPci; + +/* Query PCI function group request */ +typedef struct ClpReqQueryPciGrp { + ClpReqHdr hdr; + uint32_t fmt; + uint64_t reserved1; +#define CLP_REQ_QPCIG_MASK_PFGID 0xff + uint32_t g; + uint32_t reserved2; + uint64_t reserved3; +} QEMU_PACKED ClpReqQueryPciGrp; + +/* Query PCI function group response */ +typedef struct ClpRspQueryPciGrp { + ClpRspHdr hdr; + uint32_t fmt; + uint64_t reserved1; +#define CLP_RSP_QPCIG_MASK_NOI 0xfff + uint16_t i; + uint8_t version; +#define CLP_RSP_QPCIG_MASK_FRAME 0x2 +#define CLP_RSP_QPCIG_MASK_REFRESH 0x1 + uint8_t fr; + uint16_t reserved2; + uint16_t mui; + uint64_t reserved3; + uint64_t dasm; /* dma address space mask */ + uint64_t msia; /* MSI address */ + uint64_t reserved4; + uint64_t reserved5; +} QEMU_PACKED ClpRspQueryPciGrp; + +/* Set PCI function request */ +typedef struct ClpReqSetPci { + ClpReqHdr hdr; + uint32_t fmt; + uint64_t reserved1; + uint32_t fh; /* function handle */ + uint16_t reserved2; + uint8_t oc; /* operation controls */ + uint8_t ndas; /* number of dma spaces */ + uint64_t reserved3; +} QEMU_PACKED ClpReqSetPci; + +/* Set PCI function response */ +typedef struct ClpRspSetPci { + ClpRspHdr hdr; + uint32_t fmt; + uint64_t reserved1; + uint32_t fh; /* function handle */ + uint32_t reserved3; + uint64_t reserved4; +} QEMU_PACKED ClpRspSetPci; + +typedef struct ClpReqRspListPci { + ClpReqListPci request; + ClpRspListPci response; +} QEMU_PACKED ClpReqRspListPci; + +typedef struct ClpReqRspSetPci { + ClpReqSetPci request; + ClpRspSetPci response; +} QEMU_PACKED ClpReqRspSetPci; + +typedef struct ClpReqRspQueryPci { + ClpReqQueryPci request; + ClpRspQueryPci response; +} QEMU_PACKED ClpReqRspQueryPci; + +typedef struct ClpReqRspQueryPciGrp { + ClpReqQueryPciGrp request; + ClpRspQueryPciGrp response; +} QEMU_PACKED ClpReqRspQueryPciGrp; + +/* Load/Store status codes */ +#define ZPCI_PCI_ST_FUNC_NOT_ENABLED 4 +#define ZPCI_PCI_ST_FUNC_IN_ERR 8 +#define ZPCI_PCI_ST_BLOCKED 12 +#define ZPCI_PCI_ST_INSUF_RES 16 +#define ZPCI_PCI_ST_INVAL_AS 20 +#define ZPCI_PCI_ST_FUNC_ALREADY_ENABLED 24 +#define ZPCI_PCI_ST_DMA_AS_NOT_ENABLED 28 +#define ZPCI_PCI_ST_2ND_OP_IN_INV_AS 36 +#define ZPCI_PCI_ST_FUNC_NOT_AVAIL 40 +#define ZPCI_PCI_ST_ALREADY_IN_RQ_STATE 44 + +/* Load/Store return codes */ +#define ZPCI_PCI_LS_OK 0 +#define ZPCI_PCI_LS_ERR 1 +#define ZPCI_PCI_LS_BUSY 2 +#define ZPCI_PCI_LS_INVAL_HANDLE 3 + +/* Modify PCI Function Controls */ +#define ZPCI_MOD_FC_REG_INT 2 +#define ZPCI_MOD_FC_DEREG_INT 3 +#define ZPCI_MOD_FC_REG_IOAT 4 +#define ZPCI_MOD_FC_DEREG_IOAT 5 +#define ZPCI_MOD_FC_REREG_IOAT 6 +#define ZPCI_MOD_FC_RESET_ERROR 7 +#define ZPCI_MOD_FC_RESET_BLOCK 9 +#define ZPCI_MOD_FC_SET_MEASURE 10 + +/* FIB function controls */ +#define ZPCI_FIB_FC_ENABLED 0x80 +#define ZPCI_FIB_FC_ERROR 0x40 +#define ZPCI_FIB_FC_LS_BLOCKED 0x20 +#define ZPCI_FIB_FC_DMAAS_REG 0x10 + +/* FIB function controls */ +#define ZPCI_FIB_FC_ENABLED 0x80 +#define ZPCI_FIB_FC_ERROR 0x40 +#define ZPCI_FIB_FC_LS_BLOCKED 0x20 +#define ZPCI_FIB_FC_DMAAS_REG 0x10 + +/* Function Information Block */ +typedef struct ZpciFib { + uint8_t fmt; /* format */ + uint8_t reserved1[7]; + uint8_t fc; /* function controls */ + uint8_t reserved2; + uint16_t reserved3; + uint32_t reserved4; + uint64_t pba; /* PCI base address */ + uint64_t pal; /* PCI address limit */ + uint64_t iota; /* I/O Translation Anchor */ +#define FIB_DATA_ISC(x) (((x) >> 28) & 0x7) +#define FIB_DATA_NOI(x) (((x) >> 16) & 0xfff) +#define FIB_DATA_AIBVO(x) (((x) >> 8) & 0x3f) +#define FIB_DATA_SUM(x) (((x) >> 7) & 0x1) +#define FIB_DATA_AISBO(x) ((x) & 0x3f) + uint32_t data; + uint32_t reserved5; + uint64_t aibv; /* Adapter int bit vector address */ + uint64_t aisb; /* Adapter int summary bit address */ + uint64_t fmb_addr; /* Function measurement address and key */ + uint32_t reserved6; + uint32_t gd; +} QEMU_PACKED ZpciFib; + +int clp_service_call(S390CPU *cpu, uint8_t r2); +int pcilg_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2); +int pcistg_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2); +int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2); +int pcistb_service_call(S390CPU *cpu, uint8_t r1, uint8_t r3, uint64_t gaddr); +int mpcifc_service_call(S390CPU *cpu, uint8_t r1, uint64_t fiba); +int stpcifc_service_call(S390CPU *cpu, uint8_t r1, uint64_t fiba); + +#endif diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c index bc4dc2ae8a..71bafe06ee 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c @@ -18,6 +18,7 @@ #include "css.h" #include "virtio-ccw.h" #include "qemu/config-file.h" +#include "s390-pci-bus.h" #define TYPE_S390_CCW_MACHINE "s390-ccw-machine" @@ -91,6 +92,7 @@ static void ccw_init(MachineState *machine) uint8_t *storage_keys; int ret; VirtualCssBus *css_bus; + DeviceState *dev; QemuOpts *opts = qemu_opts_find(qemu_find_opts("memory"), NULL); ram_addr_t pad_size = 0; ram_addr_t maxmem = qemu_opt_get_size(opts, "maxmem", my_ram_size); @@ -127,6 +129,11 @@ static void ccw_init(MachineState *machine) machine->initrd_filename, "s390-ccw.img"); s390_flic_init(); + dev = qdev_create(NULL, TYPE_S390_PCI_HOST_BRIDGE); + object_property_add_child(qdev_get_machine(), TYPE_S390_PCI_HOST_BRIDGE, + OBJECT(dev), NULL); + qdev_init_nofail(dev); + /* register hypercalls */ virtio_ccw_register_hcalls(); @@ -181,7 +188,7 @@ static void ccw_machine_class_init(ObjectClass *oc, void *data) mc->no_serial = 1; mc->no_parallel = 1; mc->no_sdcard = 1; - mc->use_sclp = 1, + mc->use_sclp = 1; mc->max_cpus = 255; nc->nmi_monitor_handler = s390_nmi; } diff --git a/hw/s390x/sclp.c b/hw/s390x/sclp.c index a759da7f34..a969975a78 100644 --- a/hw/s390x/sclp.c +++ b/hw/s390x/sclp.c @@ -20,6 +20,7 @@ #include "qemu/config-file.h" #include "hw/s390x/sclp.h" #include "hw/s390x/event-facility.h" +#include "hw/s390x/s390-pci-bus.h" static inline SCLPEventFacility *get_event_facility(void) { @@ -62,7 +63,8 @@ static void read_SCP_info(SCCB *sccb) read_info->entries[i].type = 0; } - read_info->facilities = cpu_to_be64(SCLP_HAS_CPU_INFO); + read_info->facilities = cpu_to_be64(SCLP_HAS_CPU_INFO | + SCLP_HAS_PCI_RECONFIG); /* * The storage increment size is a multiple of 1M and is a power of 2. @@ -350,6 +352,12 @@ static void sclp_execute(SCCB *sccb, uint32_t code) case SCLP_UNASSIGN_STORAGE: unassign_storage(sccb); break; + case SCLP_CMDW_CONFIGURE_PCI: + s390_pci_sclp_configure(1, sccb); + break; + case SCLP_CMDW_DECONFIGURE_PCI: + s390_pci_sclp_configure(0, sccb); + break; default: efc->command_handler(ef, sccb, code); break; diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index b4e73d1f35..014a92ce5f 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -2129,16 +2129,19 @@ static void vfio_pci_write_config(PCIDevice *pdev, uint32_t addr, */ static void vfio_disable_interrupts(VFIOPCIDevice *vdev) { - switch (vdev->interrupt) { - case VFIO_INT_INTx: - vfio_disable_intx(vdev); - break; - case VFIO_INT_MSI: - vfio_disable_msi(vdev); - break; - case VFIO_INT_MSIX: + /* + * More complicated than it looks. Disabling MSI/X transitions the + * device to INTx mode (if supported). Therefore we need to first + * disable MSI/X and then cleanup by disabling INTx. + */ + if (vdev->interrupt == VFIO_INT_MSIX) { vfio_disable_msix(vdev); - break; + } else if (vdev->interrupt == VFIO_INT_MSI) { + vfio_disable_msi(vdev); + } + + if (vdev->interrupt == VFIO_INT_INTx) { + vfio_disable_intx(vdev); } } @@ -2301,7 +2304,7 @@ static void vfio_unmap_bar(VFIOPCIDevice *vdev, int nr) static void vfio_map_bar(VFIOPCIDevice *vdev, int nr) { VFIOBAR *bar = &vdev->bars[nr]; - unsigned size = bar->region.size; + uint64_t size = bar->region.size; char name[64]; uint32_t pci_bar; uint8_t type; @@ -2351,7 +2354,7 @@ static void vfio_map_bar(VFIOPCIDevice *vdev, int nr) } if (vdev->msix && vdev->msix->table_bar == nr) { - unsigned start; + uint64_t start; start = HOST_PAGE_ALIGN(vdev->msix->table_offset + (vdev->msix->entries * PCI_MSIX_ENTRY_SIZE)); diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c index c1bf357154..f2893b28aa 100644 --- a/hw/xen/xen_pt.c +++ b/hw/xen/xen_pt.c @@ -736,7 +736,7 @@ static int xen_pt_initfn(PCIDevice *d) } out: - memory_listener_register(&s->memory_listener, &address_space_memory); + memory_listener_register(&s->memory_listener, &s->dev.bus_master_as); memory_listener_register(&s->io_listener, &address_space_io); XEN_PT_LOG(d, "Real physical device %02x:%02x.%d registered successfully!\n", diff --git a/include/block/block.h b/include/block/block.h index 6e7275d95b..3082d2b80e 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -133,7 +133,8 @@ typedef enum BlockOpType { BLOCK_OP_TYPE_BACKUP_SOURCE, BLOCK_OP_TYPE_BACKUP_TARGET, BLOCK_OP_TYPE_CHANGE, - BLOCK_OP_TYPE_COMMIT, + BLOCK_OP_TYPE_COMMIT_SOURCE, + BLOCK_OP_TYPE_COMMIT_TARGET, BLOCK_OP_TYPE_DATAPLANE, BLOCK_OP_TYPE_DRIVE_DEL, BLOCK_OP_TYPE_EJECT, @@ -396,7 +397,11 @@ const char *bdrv_get_encrypted_filename(BlockDriverState *bs); void bdrv_get_backing_filename(BlockDriverState *bs, char *filename, int filename_size); void bdrv_get_full_backing_filename(BlockDriverState *bs, - char *dest, size_t sz); + char *dest, size_t sz, Error **errp); +void bdrv_get_full_backing_filename_from_filename(const char *backed, + const char *backing, + char *dest, size_t sz, + Error **errp); int bdrv_is_snapshot(BlockDriverState *bs); int path_has_protocol(const char *path); @@ -434,8 +439,10 @@ BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int granularity, void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap); BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs); int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector); -void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors); -void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors); +void bdrv_set_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, + int64_t cur_sector, int nr_sectors); +void bdrv_reset_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, + int64_t cur_sector, int nr_sectors); void bdrv_dirty_iter_init(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, struct HBitmapIter *hbi); int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap *bitmap); diff --git a/include/block/coroutine.h b/include/block/coroutine.h index 793df0ef8b..20c027a7fd 100644 --- a/include/block/coroutine.h +++ b/include/block/coroutine.h @@ -216,14 +216,4 @@ void coroutine_fn co_aio_sleep_ns(AioContext *ctx, QEMUClockType type, */ void coroutine_fn yield_until_fd_readable(int fd); -/** - * Add or subtract from the coroutine pool size - * - * The coroutine implementation keeps a pool of coroutines to be reused by - * qemu_coroutine_create(). This makes coroutine creation cheap. Heavy - * coroutine users should call this to reserve pool space. Call it again with - * a negative number to release pool space. - */ -void qemu_coroutine_adjust_pool_size(int n); - #endif /* QEMU_COROUTINE_H */ diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h index f0ce18725c..5fdd2fee80 100644 --- a/include/exec/cpu-all.h +++ b/include/exec/cpu-all.h @@ -299,11 +299,15 @@ CPUArchState *cpu_copy(CPUArchState *env); /* memory API */ -typedef struct RAMBlock { +typedef struct RAMBlock RAMBlock; + +struct RAMBlock { struct MemoryRegion *mr; uint8_t *host; ram_addr_t offset; - ram_addr_t length; + ram_addr_t used_length; + ram_addr_t max_length; + void (*resized)(const char*, uint64_t length, void *host); uint32_t flags; char idstr[256]; /* Reads can take either the iothread or the ramlist lock. @@ -311,11 +315,11 @@ typedef struct RAMBlock { */ QTAILQ_ENTRY(RAMBlock) next; int fd; -} RAMBlock; +}; static inline void *ramblock_ptr(RAMBlock *block, ram_addr_t offset) { - assert(offset < block->length); + assert(offset < block->used_length); assert(block->host); return (char *)block->host + offset; } diff --git a/include/exec/memory.h b/include/exec/memory.h index f64ab5e3e5..0cd96b152e 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -321,6 +321,30 @@ void memory_region_init_ram(MemoryRegion *mr, uint64_t size, Error **errp); +/** + * memory_region_init_resizeable_ram: Initialize memory region with resizeable + * RAM. Accesses into the region will + * modify memory directly. Only an initial + * portion of this RAM is actually used. + * The used size can change across reboots. + * + * @mr: the #MemoryRegion to be initialized. + * @owner: the object that tracks the region's reference count + * @name: the name of the region. + * @size: used size of the region. + * @max_size: max size of the region. + * @resized: callback to notify owner about used size change. + * @errp: pointer to Error*, to store an error if it happens. + */ +void memory_region_init_resizeable_ram(MemoryRegion *mr, + struct Object *owner, + const char *name, + uint64_t size, + uint64_t max_size, + void (*resized)(const char*, + uint64_t length, + void *host), + Error **errp); #ifdef __linux__ /** * memory_region_init_ram_from_file: Initialize RAM memory region with a @@ -878,6 +902,16 @@ void memory_region_set_enabled(MemoryRegion *mr, bool enabled); void memory_region_set_address(MemoryRegion *mr, hwaddr addr); /* + * memory_region_set_size: dynamically update the size of a region. + * + * Dynamically updates the size of a region. + * + * @mr: the region to be updated + * @size: used size of the region. + */ +void memory_region_set_size(MemoryRegion *mr, uint64_t size); + +/* * memory_region_set_alias_offset: dynamically update a memory alias's offset * * Dynamically updates the offset into the target region that an alias points diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h index 8fc75cdd2b..ff558a4734 100644 --- a/include/exec/ram_addr.h +++ b/include/exec/ram_addr.h @@ -28,12 +28,19 @@ ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, MemoryRegion *mr, Error **errp); ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp); +ram_addr_t qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t max_size, + void (*resized)(const char*, + uint64_t length, + void *host), + MemoryRegion *mr, Error **errp); int qemu_get_ram_fd(ram_addr_t addr); void *qemu_get_ram_block_host_ptr(ram_addr_t addr); void *qemu_get_ram_ptr(ram_addr_t addr); void qemu_ram_free(ram_addr_t addr); void qemu_ram_free_from_ptr(ram_addr_t addr); +int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp); + static inline bool cpu_physical_memory_get_dirty(ram_addr_t start, ram_addr_t length, unsigned client) @@ -172,9 +179,9 @@ static inline void cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap, } #endif /* not _WIN32 */ -static inline void cpu_physical_memory_clear_dirty_range(ram_addr_t start, - ram_addr_t length, - unsigned client) +static inline void cpu_physical_memory_clear_dirty_range_type(ram_addr_t start, + ram_addr_t length, + unsigned client) { unsigned long end, page; @@ -184,6 +191,15 @@ static inline void cpu_physical_memory_clear_dirty_range(ram_addr_t start, bitmap_clear(ram_list.dirty_memory[client], page, end - page); } +static inline void cpu_physical_memory_clear_dirty_range(ram_addr_t start, + ram_addr_t length) +{ + cpu_physical_memory_clear_dirty_range_type(start, length, DIRTY_MEMORY_MIGRATION); + cpu_physical_memory_clear_dirty_range_type(start, length, DIRTY_MEMORY_VGA); + cpu_physical_memory_clear_dirty_range_type(start, length, DIRTY_MEMORY_CODE); +} + + void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length, unsigned client); diff --git a/include/hw/loader.h b/include/hw/loader.h index 899762019f..8593045d04 100644 --- a/include/hw/loader.h +++ b/include/hw/loader.h @@ -69,7 +69,7 @@ int rom_add_file(const char *file, const char *fw_dir, hwaddr addr, int32_t bootindex, bool option_rom); ram_addr_t rom_add_blob(const char *name, const void *blob, size_t len, - hwaddr addr, const char *fw_file_name, + size_t max_len, hwaddr addr, const char *fw_file_name, FWCfgReadCallback fw_callback, void *callback_opaque); int rom_add_elf_program(const char *name, void *data, size_t datasize, size_t romsize, hwaddr addr); @@ -83,7 +83,7 @@ void do_info_roms(Monitor *mon, const QDict *qdict); #define rom_add_file_fixed(_f, _a, _i) \ rom_add_file(_f, NULL, _a, _i, false) #define rom_add_blob_fixed(_f, _b, _l, _a) \ - rom_add_blob(_f, _b, _l, _a, NULL, NULL, NULL) + rom_add_blob(_f, _b, _l, _l, _a, NULL, NULL, NULL) #define PC_ROM_MIN_VGA 0xc0000 #define PC_ROM_MIN_OPTION 0xc8000 diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index 749daf4dd7..716bff43bf 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -37,6 +37,7 @@ typedef struct sPAPREnvironment { int htab_save_index; bool htab_first_pass; int htab_fd; + bool htab_fd_stale; } sPAPREnvironment; #define H_SUCCESS 0 diff --git a/include/hw/s390x/sclp.h b/include/hw/s390x/sclp.h index ec07a118f2..e8a64e25b7 100644 --- a/include/hw/s390x/sclp.h +++ b/include/hw/s390x/sclp.h @@ -43,14 +43,22 @@ #define SCLP_CMDW_CONFIGURE_CPU 0x00110001 #define SCLP_CMDW_DECONFIGURE_CPU 0x00100001 +/* SCLP PCI codes */ +#define SCLP_HAS_PCI_RECONFIG 0x0000000040000000ULL +#define SCLP_CMDW_CONFIGURE_PCI 0x001a0001 +#define SCLP_CMDW_DECONFIGURE_PCI 0x001b0001 +#define SCLP_RECONFIG_PCI_ATPYE 2 + /* SCLP response codes */ #define SCLP_RC_NORMAL_READ_COMPLETION 0x0010 #define SCLP_RC_NORMAL_COMPLETION 0x0020 #define SCLP_RC_SCCB_BOUNDARY_VIOLATION 0x0100 +#define SCLP_RC_NO_ACTION_REQUIRED 0x0120 #define SCLP_RC_INVALID_SCLP_COMMAND 0x01f0 #define SCLP_RC_CONTAINED_EQUIPMENT_CHECK 0x0340 #define SCLP_RC_INSUFFICIENT_SCCB_LENGTH 0x0300 #define SCLP_RC_STANDBY_READ_COMPLETION 0x0410 +#define SCLP_RC_ADAPTER_ID_NOT_RECOGNIZED 0x09f0 #define SCLP_RC_INVALID_FUNCTION 0x40f0 #define SCLP_RC_NO_EVENT_BUFFERS_STORED 0x60f0 #define SCLP_RC_INVALID_SELECTION_MASK 0x70f0 diff --git a/include/qemu/queue.h b/include/qemu/queue.h index 42bcadfbb1..a98eb3ad79 100644 --- a/include/qemu/queue.h +++ b/include/qemu/queue.h @@ -191,8 +191,19 @@ struct { \ } while (/*CONSTCOND*/0) #define QSLIST_INSERT_HEAD(head, elm, field) do { \ - (elm)->field.sle_next = (head)->slh_first; \ - (head)->slh_first = (elm); \ + (elm)->field.sle_next = (head)->slh_first; \ + (head)->slh_first = (elm); \ +} while (/*CONSTCOND*/0) + +#define QSLIST_INSERT_HEAD_ATOMIC(head, elm, field) do { \ + do { \ + (elm)->field.sle_next = (head)->slh_first; \ + } while (atomic_cmpxchg(&(head)->slh_first, (elm)->field.sle_next, \ + (elm)) != (elm)->field.sle_next); \ +} while (/*CONSTCOND*/0) + +#define QSLIST_MOVE_ATOMIC(dest, src) do { \ + (dest)->slh_first = atomic_xchg(&(src)->slh_first, NULL); \ } while (/*CONSTCOND*/0) #define QSLIST_REMOVE_HEAD(head, field) do { \ diff --git a/include/qemu/thread.h b/include/qemu/thread.h index f7e3b9b290..e89fdc9785 100644 --- a/include/qemu/thread.h +++ b/include/qemu/thread.h @@ -61,4 +61,8 @@ bool qemu_thread_is_self(QemuThread *thread); void qemu_thread_exit(void *retval); void qemu_thread_naming(bool enable); +struct Notifier; +void qemu_thread_atexit_add(struct Notifier *notifier); +void qemu_thread_atexit_remove(struct Notifier *notifier); + #endif diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index 104cf3535e..30cb84d2b8 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -158,6 +158,7 @@ extern bool kvm_readonly_mem_allowed; struct kvm_run; struct kvm_lapic_state; +struct kvm_irq_routing_entry; typedef struct KVMCapabilityInfo { const char *name; @@ -270,6 +271,9 @@ int kvm_arch_on_sigbus(int code, void *addr); void kvm_arch_init_irq_routing(KVMState *s); +int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, + uint64_t address, uint32_t data); + int kvm_set_irq(KVMState *s, int irq, int level); int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg); @@ -1225,6 +1225,10 @@ int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg) kroute.u.msi.address_lo = (uint32_t)msg.address; kroute.u.msi.address_hi = msg.address >> 32; kroute.u.msi.data = le32_to_cpu(msg.data); + if (kvm_arch_fixup_msi_route(&kroute, msg.address, msg.data)) { + kvm_irqchip_release_virq(s, virq); + return -EINVAL; + } kvm_add_routing_entry(s, &kroute); kvm_irqchip_commit_routes(s); @@ -1250,6 +1254,9 @@ int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg) kroute.u.msi.address_lo = (uint32_t)msg.address; kroute.u.msi.address_hi = msg.address >> 32; kroute.u.msi.data = le32_to_cpu(msg.data); + if (kvm_arch_fixup_msi_route(&kroute, msg.address, msg.data)) { + return -EINVAL; + } return kvm_update_routing_entry(s, &kroute); } @@ -1152,6 +1152,23 @@ void memory_region_init_ram(MemoryRegion *mr, mr->ram_addr = qemu_ram_alloc(size, mr, errp); } +void memory_region_init_resizeable_ram(MemoryRegion *mr, + Object *owner, + const char *name, + uint64_t size, + uint64_t max_size, + void (*resized)(const char*, + uint64_t length, + void *host), + Error **errp) +{ + memory_region_init(mr, owner, name, size); + mr->ram = true; + mr->terminates = true; + mr->destructor = memory_region_destructor_ram; + mr->ram_addr = qemu_ram_alloc_resizeable(size, max_size, resized, mr, errp); +} + #ifdef __linux__ void memory_region_init_ram_from_file(MemoryRegion *mr, struct Object *owner, @@ -1707,6 +1724,22 @@ void memory_region_set_enabled(MemoryRegion *mr, bool enabled) memory_region_transaction_commit(); } +void memory_region_set_size(MemoryRegion *mr, uint64_t size) +{ + Int128 s = int128_make64(size); + + if (size == UINT64_MAX) { + s = int128_2_64(); + } + if (int128_eq(s, mr->size)) { + return; + } + memory_region_transaction_begin(); + mr->size = s; + memory_region_update_pending = true; + memory_region_transaction_commit(); +} + static void memory_region_readd_subregion(MemoryRegion *mr) { MemoryRegion *container = mr->container; diff --git a/migration/block.c b/migration/block.c index 74d9eb125c..0c7610600b 100644 --- a/migration/block.c +++ b/migration/block.c @@ -303,7 +303,7 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds) blk->aiocb = bdrv_aio_readv(bs, cur_sector, &blk->qiov, nr_sectors, blk_mig_read_cb, blk); - bdrv_reset_dirty(bs, cur_sector, nr_sectors); + bdrv_reset_dirty_bitmap(bs, bmds->dirty_bitmap, cur_sector, nr_sectors); qemu_mutex_unlock_iothread(); bmds->cur_sector = cur_sector + nr_sectors; @@ -496,7 +496,8 @@ static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds, g_free(blk); } - bdrv_reset_dirty(bmds->bs, sector, nr_sectors); + bdrv_reset_dirty_bitmap(bmds->bs, bmds->dirty_bitmap, sector, + nr_sectors); break; } sector += BDRV_SECTORS_PER_DIRTY_CHUNK; @@ -765,8 +766,8 @@ static uint64_t block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size) block_mig_state.read_done * BLOCK_SIZE; /* Report at least one block pending during bulk phase */ - if (pending == 0 && !block_mig_state.bulk_completed) { - pending = BLOCK_SIZE; + if (pending <= max_size && !block_mig_state.bulk_completed) { + pending = max_size + BLOCK_SIZE; } blk_mig_unlock(); qemu_mutex_unlock_iothread(); diff --git a/pc-bios/README b/pc-bios/README index edfadd7d38..8a85e69d3b 100644 --- a/pc-bios/README +++ b/pc-bios/README @@ -17,7 +17,7 @@ - SLOF (Slimline Open Firmware) is a free IEEE 1275 Open Firmware implementation for certain IBM POWER hardware. The sources are at https://github.com/aik/SLOF, and the image currently in qemu is - built from git tag qemu-slof-20140630. + built from git tag qemu-slof-20141202. - sgabios (the Serial Graphics Adapter option ROM) provides a means for legacy x86 software to communicate with an attached serial console as diff --git a/pc-bios/slof.bin b/pc-bios/slof.bin Binary files differindex 69b0a5dbc3..031e3063a2 100644 --- a/pc-bios/slof.bin +++ b/pc-bios/slof.bin diff --git a/qapi-schema.json b/qapi-schema.json index 563b4ad98a..fbfc52f94d 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -1254,11 +1254,19 @@ # # A discriminated record of operations that can be performed with # @transaction. +# +# Since 1.1 +# +# drive-backup since 1.6 +# abort since 1.6 +# blockdev-snapshot-internal-sync since 1.7 +# blockdev-backup since 2.3 ## { 'union': 'TransactionAction', 'data': { 'blockdev-snapshot-sync': 'BlockdevSnapshot', 'drive-backup': 'DriveBackup', + 'blockdev-backup': 'BlockdevBackup', 'abort': 'Abort', 'blockdev-snapshot-internal-sync': 'BlockdevSnapshotInternal' } } diff --git a/qapi/block-core.json b/qapi/block-core.json index 6e8db15861..80984d1660 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -423,7 +423,7 @@ # @device: #optional If the stats are for a virtual block device, the name # corresponding to the virtual block device. # -# @device: #optional The node name of the device. (Since 2.3) +# @node-name: #optional The node name of the device. (Since 2.3) # # @stats: A @BlockDeviceStats for the device. # @@ -703,6 +703,41 @@ '*on-target-error': 'BlockdevOnError' } } ## +# @BlockdevBackup +# +# @device: the name of the device which should be copied. +# +# @target: the name of the backup target device. +# +# @sync: what parts of the disk image should be copied to the destination +# (all the disk, only the sectors allocated in the topmost image, or +# only new I/O). +# +# @speed: #optional the maximum speed, in bytes per second. The default is 0, +# for unlimited. +# +# @on-source-error: #optional the action to take on an error on the source, +# default 'report'. 'stop' and 'enospc' can only be used +# if the block device supports io-status (see BlockInfo). +# +# @on-target-error: #optional the action to take on an error on the target, +# default 'report' (no limitations, since this applies to +# a different block device than @device). +# +# Note that @on-source-error and @on-target-error only affect background I/O. +# If an error occurs during a guest write request, the device's rerror/werror +# actions will be used. +# +# Since: 2.3 +## +{ 'type': 'BlockdevBackup', + 'data': { 'device': 'str', 'target': 'str', + 'sync': 'MirrorSyncMode', + '*speed': 'int', + '*on-source-error': 'BlockdevOnError', + '*on-target-error': 'BlockdevOnError' } } + +## # @blockdev-snapshot-sync # # Generates a synchronous snapshot of a block device. @@ -822,6 +857,25 @@ { 'command': 'drive-backup', 'data': 'DriveBackup' } ## +# @blockdev-backup +# +# Start a point-in-time copy of a block device to a new destination. The +# status of ongoing blockdev-backup operations can be checked with +# query-block-jobs where the BlockJobInfo.type field has the value 'backup'. +# The operation can be stopped before it has completed using the +# block-job-cancel command. +# +# For the arguments, see the documentation of BlockdevBackup. +# +# Returns: Nothing on success. +# If @device or @target is not a valid block device, DeviceNotFound. +# +# Since 2.3 +## +{ 'command': 'blockdev-backup', 'data': 'BlockdevBackup' } + + +## # @query-named-block-nodes # # Get the named block driver list diff --git a/qemu-coroutine.c b/qemu-coroutine.c index bd574aa1b5..525247b050 100644 --- a/qemu-coroutine.c +++ b/qemu-coroutine.c @@ -15,31 +15,59 @@ #include "trace.h" #include "qemu-common.h" #include "qemu/thread.h" +#include "qemu/atomic.h" #include "block/coroutine.h" #include "block/coroutine_int.h" enum { - POOL_DEFAULT_SIZE = 64, + POOL_BATCH_SIZE = 64, }; /** Free list to speed up creation */ -static QemuMutex pool_lock; -static QSLIST_HEAD(, Coroutine) pool = QSLIST_HEAD_INITIALIZER(pool); -static unsigned int pool_size; -static unsigned int pool_max_size = POOL_DEFAULT_SIZE; +static QSLIST_HEAD(, Coroutine) release_pool = QSLIST_HEAD_INITIALIZER(pool); +static unsigned int release_pool_size; +static __thread QSLIST_HEAD(, Coroutine) alloc_pool = QSLIST_HEAD_INITIALIZER(pool); +static __thread unsigned int alloc_pool_size; +static __thread Notifier coroutine_pool_cleanup_notifier; + +static void coroutine_pool_cleanup(Notifier *n, void *value) +{ + Coroutine *co; + Coroutine *tmp; + + QSLIST_FOREACH_SAFE(co, &alloc_pool, pool_next, tmp) { + QSLIST_REMOVE_HEAD(&alloc_pool, pool_next); + qemu_coroutine_delete(co); + } +} Coroutine *qemu_coroutine_create(CoroutineEntry *entry) { Coroutine *co = NULL; if (CONFIG_COROUTINE_POOL) { - qemu_mutex_lock(&pool_lock); - co = QSLIST_FIRST(&pool); + co = QSLIST_FIRST(&alloc_pool); + if (!co) { + if (release_pool_size > POOL_BATCH_SIZE) { + /* Slow path; a good place to register the destructor, too. */ + if (!coroutine_pool_cleanup_notifier.notify) { + coroutine_pool_cleanup_notifier.notify = coroutine_pool_cleanup; + qemu_thread_atexit_add(&coroutine_pool_cleanup_notifier); + } + + /* This is not exact; there could be a little skew between + * release_pool_size and the actual size of release_pool. But + * it is just a heuristic, it does not need to be perfect. + */ + alloc_pool_size = atomic_xchg(&release_pool_size, 0); + QSLIST_MOVE_ATOMIC(&alloc_pool, &release_pool); + co = QSLIST_FIRST(&alloc_pool); + } + } if (co) { - QSLIST_REMOVE_HEAD(&pool, pool_next); - pool_size--; + QSLIST_REMOVE_HEAD(&alloc_pool, pool_next); + alloc_pool_size--; } - qemu_mutex_unlock(&pool_lock); } if (!co) { @@ -53,39 +81,24 @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry) static void coroutine_delete(Coroutine *co) { + co->caller = NULL; + if (CONFIG_COROUTINE_POOL) { - qemu_mutex_lock(&pool_lock); - if (pool_size < pool_max_size) { - QSLIST_INSERT_HEAD(&pool, co, pool_next); - co->caller = NULL; - pool_size++; - qemu_mutex_unlock(&pool_lock); + if (release_pool_size < POOL_BATCH_SIZE * 2) { + QSLIST_INSERT_HEAD_ATOMIC(&release_pool, co, pool_next); + atomic_inc(&release_pool_size); + return; + } + if (alloc_pool_size < POOL_BATCH_SIZE) { + QSLIST_INSERT_HEAD(&alloc_pool, co, pool_next); + alloc_pool_size++; return; } - qemu_mutex_unlock(&pool_lock); } qemu_coroutine_delete(co); } -static void __attribute__((constructor)) coroutine_pool_init(void) -{ - qemu_mutex_init(&pool_lock); -} - -static void __attribute__((destructor)) coroutine_pool_cleanup(void) -{ - Coroutine *co; - Coroutine *tmp; - - QSLIST_FOREACH_SAFE(co, &pool, pool_next, tmp) { - QSLIST_REMOVE_HEAD(&pool, pool_next); - qemu_coroutine_delete(co); - } - - qemu_mutex_destroy(&pool_lock); -} - static void coroutine_swap(Coroutine *from, Coroutine *to) { CoroutineAction ret; @@ -137,23 +150,3 @@ void coroutine_fn qemu_coroutine_yield(void) self->caller = NULL; coroutine_swap(self, to); } - -void qemu_coroutine_adjust_pool_size(int n) -{ - qemu_mutex_lock(&pool_lock); - - pool_max_size += n; - - /* Callers should never take away more than they added */ - assert(pool_max_size >= POOL_DEFAULT_SIZE); - - /* Trim oversized pool down to new max */ - while (pool_size > pool_max_size) { - Coroutine *co = QSLIST_FIRST(&pool); - QSLIST_REMOVE_HEAD(&pool, pool_next); - pool_size--; - qemu_coroutine_delete(co); - } - - qemu_mutex_unlock(&pool_lock); -} diff --git a/qemu-seccomp.c b/qemu-seccomp.c index af6a375127..b0c626984f 100644 --- a/qemu-seccomp.c +++ b/qemu-seccomp.c @@ -235,7 +235,8 @@ static const struct QemuSeccompSyscall seccomp_whitelist[] = { { SCMP_SYS(fallocate), 240 }, { SCMP_SYS(fadvise64), 240 }, { SCMP_SYS(inotify_init1), 240 }, - { SCMP_SYS(inotify_add_watch), 240 } + { SCMP_SYS(inotify_add_watch), 240 }, + { SCMP_SYS(mbind), 240 } }; int seccomp_start(void) diff --git a/qmp-commands.hx b/qmp-commands.hx index 6945d30198..8957201f73 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -1094,6 +1094,48 @@ Example: "sync": "full", "target": "backup.img" } } <- { "return": {} } + +EQMP + + { + .name = "blockdev-backup", + .args_type = "sync:s,device:B,target:B,speed:i?," + "on-source-error:s?,on-target-error:s?", + .mhandler.cmd_new = qmp_marshal_input_blockdev_backup, + }, + +SQMP +blockdev-backup +--------------- + +The device version of drive-backup: this command takes an existing named device +as backup target. + +Arguments: + +- "device": the name of the device which should be copied. + (json-string) +- "target": the name of the backup target device. (json-string) +- "sync": what parts of the disk image should be copied to the destination; + possibilities include "full" for all the disk, "top" for only the + sectors allocated in the topmost image, or "none" to only replicate + new I/O (MirrorSyncMode). +- "speed": the maximum speed, in bytes per second (json-int, optional) +- "on-source-error": the action to take on an error on the source, default + 'report'. 'stop' and 'enospc' can only be used + if the block device supports io-status. + (BlockdevOnError, optional) +- "on-target-error": the action to take on an error on the target, default + 'report' (no limitations, since this applies to + a different block device than device). + (BlockdevOnError, optional) + +Example: +-> { "execute": "blockdev-backup", "arguments": { "device": "src-id", + "sync": "full", + "target": "tgt-id" } } +<- { "return": {} } + EQMP { diff --git a/roms/SLOF b/roms/SLOF -Subproject f284ab3f03ae69a20e1ae966f6ddf76da33cbf7 +Subproject a70dbda2e21f6e438b3617c44ff180c3418dc30 diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 053e4320fc..5df61f9aa9 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -1639,7 +1639,13 @@ sub process { #print "realcnt<$realcnt> ctx_cnt<$ctx_cnt>\n"; #print "pre<$pre_ctx>\nline<$line>\nctx<$ctx>\nnext<$lines[$ctx_ln - 1]>\n"; - if ($ctx !~ /{\s*/ && defined($lines[$ctx_ln -1]) && $lines[$ctx_ln - 1] =~ /^\+\s*{/) { + # The length of the "previous line" is checked against 80 because it + # includes the + at the beginning of the line (if the actual line has + # 79 or 80 characters, it is no longer possible to add a space and an + # opening brace there) + if ($#ctx == 0 && $ctx !~ /{\s*/ && + defined($lines[$ctx_ln - 1]) && $lines[$ctx_ln - 1] =~ /^\+\s*{/ && + defined($lines[$ctx_ln - 2]) && length($lines[$ctx_ln - 2]) < 80) { ERROR("that open brace { should be on the previous line\n" . "$here\n$ctx\n$rawlines[$ctx_ln - 1]\n"); } @@ -2542,7 +2548,10 @@ sub process { substr($block, 0, length($cond), ''); - $seen++ if ($block =~ /^\s*{/); + my $spaced_block = $block; + $spaced_block =~ s/\n\+/ /g; + + $seen++ if ($spaced_block =~ /^\s*{/); print "APW: cond<$cond> block<$block> allowed<$allowed>\n" if $dbg_adv_apw; diff --git a/target-arm/kvm.c b/target-arm/kvm.c index 4d81f3d765..23cefe98b4 100644 --- a/target-arm/kvm.c +++ b/target-arm/kvm.c @@ -548,3 +548,9 @@ int kvm_arch_irqchip_create(KVMState *s) return 0; } + +int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, + uint64_t address, uint32_t data) +{ + return 0; +} diff --git a/target-i386/kvm.c b/target-i386/kvm.c index cf9f3319c2..36b1519f34 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -2751,3 +2751,9 @@ int kvm_device_msix_deassign(KVMState *s, uint32_t dev_id) return kvm_deassign_irq_internal(s, dev_id, KVM_DEV_IRQ_GUEST_MSIX | KVM_DEV_IRQ_HOST_MSIX); } + +int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, + uint64_t address, uint32_t data) +{ + return 0; +} diff --git a/target-mips/kvm.c b/target-mips/kvm.c index a761ea5b32..b68191c88e 100644 --- a/target-mips/kvm.c +++ b/target-mips/kvm.c @@ -688,3 +688,9 @@ int kvm_arch_get_registers(CPUState *cs) return ret; } + +int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, + uint64_t address, uint32_t data) +{ + return 0; +} diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h index f42589c478..c62097bb8a 100644 --- a/target-ppc/cpu.h +++ b/target-ppc/cpu.h @@ -559,6 +559,26 @@ struct ppc_slb_t { #define ESR_VLEMI (1 << (63 - 58)) /* VLE operation */ #define ESR_MIF (1 << (63 - 62)) /* Misaligned instruction (VLE) */ +/* Transaction EXception And Summary Register bits */ +#define TEXASR_FAILURE_PERSISTENT (63 - 7) +#define TEXASR_DISALLOWED (63 - 8) +#define TEXASR_NESTING_OVERFLOW (63 - 9) +#define TEXASR_FOOTPRINT_OVERFLOW (63 - 10) +#define TEXASR_SELF_INDUCED_CONFLICT (63 - 11) +#define TEXASR_NON_TRANSACTIONAL_CONFLICT (63 - 12) +#define TEXASR_TRANSACTION_CONFLICT (63 - 13) +#define TEXASR_TRANSLATION_INVALIDATION_CONFLICT (63 - 14) +#define TEXASR_IMPLEMENTATION_SPECIFIC (63 - 15) +#define TEXASR_INSTRUCTION_FETCH_CONFLICT (63 - 16) +#define TEXASR_ABORT (63 - 31) +#define TEXASR_SUSPENDED (63 - 32) +#define TEXASR_PRIVILEGE_HV (63 - 34) +#define TEXASR_PRIVILEGE_PR (63 - 35) +#define TEXASR_FAILURE_SUMMARY (63 - 36) +#define TEXASR_TFIAR_EXACT (63 - 37) +#define TEXASR_ROT (63 - 38) +#define TEXASR_TRANSACTION_LEVEL (63 - 52) /* 12 bits */ + enum { POWERPC_FLAG_NONE = 0x00000000, /* Flag for MSR bit 25 signification (VRE/SPE) */ @@ -585,6 +605,8 @@ enum { POWERPC_FLAG_CFAR = 0x00040000, /* Has VSX */ POWERPC_FLAG_VSX = 0x00080000, + /* Has Transaction Memory (ISA 2.07) */ + POWERPC_FLAG_TM = 0x00100000, }; /*****************************************************************************/ @@ -2011,6 +2033,8 @@ enum { PPC2_ISA207S = 0x0000000000008000ULL, /* Double precision floating point conversion for signed integer 64 */ PPC2_FP_CVT_S64 = 0x0000000000010000ULL, + /* Transactional Memory (ISA 2.07, Book II) */ + PPC2_TM = 0x0000000000020000ULL, #define PPC_TCG_INSNS2 (PPC2_BOOKE206 | PPC2_VSX | PPC2_PRCNTL | PPC2_DBRX | \ PPC2_ISA205 | PPC2_VSX207 | PPC2_PERM_ISA206 | \ @@ -2018,7 +2042,7 @@ enum { PPC2_FP_CVT_ISA206 | PPC2_FP_TST_ISA206 | \ PPC2_BCTAR_ISA207 | PPC2_LSQ_ISA207 | \ PPC2_ALTIVEC_207 | PPC2_ISA207S | PPC2_DFP | \ - PPC2_FP_CVT_S64) + PPC2_FP_CVT_S64 | PPC2_TM) }; /*****************************************************************************/ diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c index 7f74466f32..6cceffc556 100644 --- a/target-ppc/fpu_helper.c +++ b/target-ppc/fpu_helper.c @@ -19,6 +19,9 @@ #include "cpu.h" #include "exec/helper-proto.h" +#define float64_snan_to_qnan(x) ((x) | 0x0008000000000000ULL) +#define float32_snan_to_qnan(x) ((x) | 0x00400000) + /*****************************************************************************/ /* Floating point operations helpers */ uint64_t helper_float32_to_float64(CPUPPCState *env, uint32_t arg) @@ -60,59 +63,55 @@ static inline int ppc_float64_get_unbiased_exp(float64 f) return ((f >> 52) & 0x7FF) - 1023; } -uint32_t helper_compute_fprf(CPUPPCState *env, uint64_t arg, uint32_t set_fprf) +void helper_compute_fprf(CPUPPCState *env, uint64_t arg) { CPU_DoubleU farg; int isneg; - int ret; + int fprf; farg.ll = arg; isneg = float64_is_neg(farg.d); if (unlikely(float64_is_any_nan(farg.d))) { if (float64_is_signaling_nan(farg.d)) { /* Signaling NaN: flags are undefined */ - ret = 0x00; + fprf = 0x00; } else { /* Quiet NaN */ - ret = 0x11; + fprf = 0x11; } } else if (unlikely(float64_is_infinity(farg.d))) { /* +/- infinity */ if (isneg) { - ret = 0x09; + fprf = 0x09; } else { - ret = 0x05; + fprf = 0x05; } } else { if (float64_is_zero(farg.d)) { /* +/- zero */ if (isneg) { - ret = 0x12; + fprf = 0x12; } else { - ret = 0x02; + fprf = 0x02; } } else { if (isden(farg.d)) { /* Denormalized numbers */ - ret = 0x10; + fprf = 0x10; } else { /* Normalized numbers */ - ret = 0x00; + fprf = 0x00; } if (isneg) { - ret |= 0x08; + fprf |= 0x08; } else { - ret |= 0x04; + fprf |= 0x04; } } } - if (set_fprf) { - /* We update FPSCR_FPRF */ - env->fpscr &= ~(0x1F << FPSCR_FPRF); - env->fpscr |= ret << FPSCR_FPRF; - } - /* We just need fpcc to update Rc1 */ - return ret & 0xF; + /* We update FPSCR_FPRF */ + env->fpscr &= ~(0x1F << FPSCR_FPRF); + env->fpscr |= fprf << FPSCR_FPRF; } /* Floating-point invalid operations exception */ @@ -920,14 +919,16 @@ uint64_t helper_fsqrt(CPUPPCState *env, uint64_t arg) farg.ll = arg; - if (unlikely(float64_is_neg(farg.d) && !float64_is_zero(farg.d))) { - /* Square root of a negative nonzero number */ - farg.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSQRT, 1); - } else { + if (unlikely(float64_is_any_nan(farg.d))) { if (unlikely(float64_is_signaling_nan(farg.d))) { - /* sNaN square root */ + /* sNaN reciprocal square root */ fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1); + farg.ll = float64_snan_to_qnan(farg.ll); } + } else if (unlikely(float64_is_neg(farg.d) && !float64_is_zero(farg.d))) { + /* Square root of a negative nonzero number */ + farg.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSQRT, 1); + } else { farg.d = float64_sqrt(farg.d, &env->fp_status); } return farg.ll; @@ -974,17 +975,20 @@ uint64_t helper_frsqrte(CPUPPCState *env, uint64_t arg) farg.ll = arg; - if (unlikely(float64_is_neg(farg.d) && !float64_is_zero(farg.d))) { - /* Reciprocal square root of a negative nonzero number */ - farg.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSQRT, 1); - } else { + if (unlikely(float64_is_any_nan(farg.d))) { if (unlikely(float64_is_signaling_nan(farg.d))) { /* sNaN reciprocal square root */ fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1); + farg.ll = float64_snan_to_qnan(farg.ll); } + } else if (unlikely(float64_is_neg(farg.d) && !float64_is_zero(farg.d))) { + /* Reciprocal square root of a negative nonzero number */ + farg.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSQRT, 1); + } else { farg.d = float64_sqrt(farg.d, &env->fp_status); farg.d = float64_div(float64_one, farg.d, &env->fp_status); } + return farg.ll; } @@ -1845,7 +1849,7 @@ void helper_##name(CPUPPCState *env, uint32_t opcode) \ } \ \ if (sfprf) { \ - helper_compute_fprf(env, xt.fld, sfprf); \ + helper_compute_fprf(env, xt.fld); \ } \ } \ putVSR(xT(opcode), &xt, env); \ @@ -1900,7 +1904,7 @@ void helper_##op(CPUPPCState *env, uint32_t opcode) \ } \ \ if (sfprf) { \ - helper_compute_fprf(env, xt.fld, sfprf); \ + helper_compute_fprf(env, xt.fld); \ } \ } \ \ @@ -1954,7 +1958,7 @@ void helper_##op(CPUPPCState *env, uint32_t opcode) \ } \ \ if (sfprf) { \ - helper_compute_fprf(env, xt.fld, sfprf); \ + helper_compute_fprf(env, xt.fld); \ } \ } \ \ @@ -1995,7 +1999,7 @@ void helper_##op(CPUPPCState *env, uint32_t opcode) \ } \ \ if (sfprf) { \ - helper_compute_fprf(env, xt.fld, sfprf); \ + helper_compute_fprf(env, xt.fld); \ } \ } \ \ @@ -2044,7 +2048,7 @@ void helper_##op(CPUPPCState *env, uint32_t opcode) \ } \ \ if (sfprf) { \ - helper_compute_fprf(env, xt.fld, sfprf); \ + helper_compute_fprf(env, xt.fld); \ } \ } \ \ @@ -2094,7 +2098,7 @@ void helper_##op(CPUPPCState *env, uint32_t opcode) \ } \ \ if (sfprf) { \ - helper_compute_fprf(env, xt.fld, sfprf); \ + helper_compute_fprf(env, xt.fld); \ } \ } \ \ @@ -2294,7 +2298,7 @@ void helper_##op(CPUPPCState *env, uint32_t opcode) \ } \ \ if (sfprf) { \ - helper_compute_fprf(env, xt_out.fld, sfprf); \ + helper_compute_fprf(env, xt_out.fld); \ } \ } \ putVSR(xT(opcode), &xt_out, env); \ @@ -2382,9 +2386,6 @@ void helper_##op(CPUPPCState *env, uint32_t opcode) \ VSX_SCALAR_CMP(xscmpodp, 1) VSX_SCALAR_CMP(xscmpudp, 0) -#define float64_snan_to_qnan(x) ((x) | 0x0008000000000000ULL) -#define float32_snan_to_qnan(x) ((x) | 0x00400000) - /* VSX_MAX_MIN - VSX floating point maximum/minimum * name - instruction mnemonic * op - operation (max or min) @@ -2504,7 +2505,7 @@ void helper_##op(CPUPPCState *env, uint32_t opcode) \ } \ if (sfprf) { \ helper_compute_fprf(env, ttp##_to_float64(xt.tfld, \ - &env->fp_status), sfprf); \ + &env->fp_status)); \ } \ } \ \ @@ -2614,7 +2615,7 @@ void helper_##op(CPUPPCState *env, uint32_t opcode) \ xt.tfld = helper_frsp(env, xt.tfld); \ } \ if (sfprf) { \ - helper_compute_fprf(env, xt.tfld, sfprf); \ + helper_compute_fprf(env, xt.tfld); \ } \ } \ \ @@ -2669,7 +2670,7 @@ void helper_##op(CPUPPCState *env, uint32_t opcode) \ xt.fld = tp##_round_to_int(xb.fld, &env->fp_status); \ } \ if (sfprf) { \ - helper_compute_fprf(env, xt.fld, sfprf); \ + helper_compute_fprf(env, xt.fld); \ } \ } \ \ @@ -2709,7 +2710,7 @@ uint64_t helper_xsrsp(CPUPPCState *env, uint64_t xb) uint64_t xt = helper_frsp(env, xb); - helper_compute_fprf(env, xt, 1); + helper_compute_fprf(env, xt); helper_float_check_status(env); return xt; } diff --git a/target-ppc/helper.h b/target-ppc/helper.h index 210fd97f6a..869be1509d 100644 --- a/target-ppc/helper.h +++ b/target-ppc/helper.h @@ -52,7 +52,7 @@ DEF_HELPER_FLAGS_2(brinc, TCG_CALL_NO_RWG_SE, tl, tl, tl) DEF_HELPER_1(float_check_status, void, env) DEF_HELPER_1(reset_fpstatus, void, env) -DEF_HELPER_3(compute_fprf, i32, env, i64, i32) +DEF_HELPER_2(compute_fprf, void, env, i64) DEF_HELPER_3(store_fpscr, void, env, i64, i32) DEF_HELPER_2(fpscr_clrbit, void, env, i32) DEF_HELPER_2(fpscr_setbit, void, env, i32) @@ -665,3 +665,5 @@ DEF_HELPER_4(dscri, void, env, fprp, fprp, i32) DEF_HELPER_4(dscriq, void, env, fprp, fprp, i32) DEF_HELPER_4(dscli, void, env, fprp, fprp, i32) DEF_HELPER_4(dscliq, void, env, fprp, fprp, i32) + +DEF_HELPER_1(tbegin, void, env) diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c index 6843fa0b98..1edf2b5aeb 100644 --- a/target-ppc/kvm.c +++ b/target-ppc/kvm.c @@ -2246,8 +2246,23 @@ int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns) strerror(errno)); return rc; } else if (rc) { - /* Kernel already retuns data in BE format for the file */ - qemu_put_buffer(f, buf, rc); + uint8_t *buffer = buf; + ssize_t n = rc; + while (n) { + struct kvm_get_htab_header *head = + (struct kvm_get_htab_header *) buffer; + size_t chunksize = sizeof(*head) + + HASH_PTE_SIZE_64 * head->n_valid; + + qemu_put_be32(f, head->index); + qemu_put_be16(f, head->n_valid); + qemu_put_be16(f, head->n_invalid); + qemu_put_buffer(f, (void *)(head + 1), + HASH_PTE_SIZE_64 * head->n_valid); + + buffer += chunksize; + n -= chunksize; + } } } while ((rc != 0) && ((max_ns < 0) @@ -2264,7 +2279,6 @@ int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index, ssize_t rc; buf = alloca(chunksize); - /* This is KVM on ppc, so this is all big-endian */ buf->index = index; buf->n_valid = n_valid; buf->n_invalid = n_invalid; @@ -2388,3 +2402,9 @@ out_close: error_out: return; } + +int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, + uint64_t address, uint32_t data) +{ + return 0; +} diff --git a/target-ppc/mem_helper.c b/target-ppc/mem_helper.c index 50344b81cf..6d37dae7b0 100644 --- a/target-ppc/mem_helper.c +++ b/target-ppc/mem_helper.c @@ -269,3 +269,25 @@ STVE(stvewx, cpu_stl_data, bswap32, u32) #undef HI_IDX #undef LO_IDX + +void helper_tbegin(CPUPPCState *env) +{ + /* As a degenerate implementation, always fail tbegin. The reason + * given is "Nesting overflow". The "persistent" bit is set, + * providing a hint to the error handler to not retry. The TFIAR + * captures the address of the failure, which is this tbegin + * instruction. Instruction execution will continue with the + * next instruction in memory, which is precisely what we want. + */ + + env->spr[SPR_TEXASR] = + (1ULL << TEXASR_FAILURE_PERSISTENT) | + (1ULL << TEXASR_NESTING_OVERFLOW) | + (msr_hv << TEXASR_PRIVILEGE_HV) | + (msr_pr << TEXASR_PRIVILEGE_PR) | + (1ULL << TEXASR_FAILURE_SUMMARY) | + (1ULL << TEXASR_TFIAR_EXACT); + env->spr[SPR_TFIAR] = env->nip | (msr_hv << 1) | msr_pr; + env->spr[SPR_TFHAR] = env->nip + 4; + env->crf[0] = 0xB; /* 0b1010 = transaction failure */ +} diff --git a/target-ppc/translate.c b/target-ppc/translate.c index 2e32e8d8b8..7c801f36e3 100644 --- a/target-ppc/translate.c +++ b/target-ppc/translate.c @@ -203,6 +203,7 @@ struct DisasContext { int altivec_enabled; int vsx_enabled; int spe_enabled; + int tm_enabled; ppc_spr_t *spr_cb; /* Needed to check rights for mfspr/mtspr */ int singlestep_enabled; uint64_t insns_flags; @@ -250,26 +251,10 @@ static inline void gen_reset_fpstatus(void) gen_helper_reset_fpstatus(cpu_env); } -static inline void gen_compute_fprf(TCGv_i64 arg, int set_fprf, int set_rc) +static inline void gen_compute_fprf(TCGv_i64 arg) { - TCGv_i32 t0 = tcg_temp_new_i32(); - - if (set_fprf != 0) { - /* This case might be optimized later */ - tcg_gen_movi_i32(t0, 1); - gen_helper_compute_fprf(t0, cpu_env, arg, t0); - if (unlikely(set_rc)) { - tcg_gen_mov_i32(cpu_crf[1], t0); - } - gen_helper_float_check_status(cpu_env); - } else if (unlikely(set_rc)) { - /* We always need to compute fpcc */ - tcg_gen_movi_i32(t0, 0); - gen_helper_compute_fprf(t0, cpu_env, arg, t0); - tcg_gen_mov_i32(cpu_crf[1], t0); - } - - tcg_temp_free_i32(t0); + gen_helper_compute_fprf(cpu_env, arg); + gen_helper_float_check_status(cpu_env); } static inline void gen_set_access_type(DisasContext *ctx, int access_type) @@ -346,11 +331,13 @@ static inline void gen_stop_exception(DisasContext *ctx) ctx->exception = POWERPC_EXCP_STOP; } +#ifndef CONFIG_USER_ONLY /* No need to update nip here, as execution flow will change */ static inline void gen_sync_exception(DisasContext *ctx) { ctx->exception = POWERPC_EXCP_SYNC; } +#endif #define GEN_HANDLER(name, opc1, opc2, opc3, inval, type) \ GEN_OPCODE(name, opc1, opc2, opc3, inval, type, PPC_NONE) @@ -452,7 +439,10 @@ EXTRACT_HELPER(ME, 1, 5); EXTRACT_HELPER(TO, 21, 5); EXTRACT_HELPER(CRM, 12, 8); + +#ifndef CONFIG_USER_ONLY EXTRACT_HELPER(SR, 16, 4); +#endif /* mtfsf/mtfsfi */ EXTRACT_HELPER(FPBF, 23, 3); @@ -2077,6 +2067,21 @@ static void gen_srd(DisasContext *ctx) } #endif +#if defined(TARGET_PPC64) +static void gen_set_cr1_from_fpscr(DisasContext *ctx) +{ + TCGv_i32 tmp = tcg_temp_new_i32(); + tcg_gen_trunc_tl_i32(tmp, cpu_fpscr); + tcg_gen_shri_i32(cpu_crf[1], tmp, 28); + tcg_temp_free_i32(tmp); +} +#else +static void gen_set_cr1_from_fpscr(DisasContext *ctx) +{ + tcg_gen_shri_tl(cpu_crf[1], cpu_fpscr, 28); +} +#endif + /*** Floating-Point arithmetic ***/ #define _GEN_FLOAT_ACB(name, op, op1, op2, isfloat, set_fprf, type) \ static void gen_f##name(DisasContext *ctx) \ @@ -2095,8 +2100,12 @@ static void gen_f##name(DisasContext *ctx) \ gen_helper_frsp(cpu_fpr[rD(ctx->opcode)], cpu_env, \ cpu_fpr[rD(ctx->opcode)]); \ } \ - gen_compute_fprf(cpu_fpr[rD(ctx->opcode)], set_fprf, \ - Rc(ctx->opcode) != 0); \ + if (set_fprf) { \ + gen_compute_fprf(cpu_fpr[rD(ctx->opcode)]); \ + } \ + if (unlikely(Rc(ctx->opcode) != 0)) { \ + gen_set_cr1_from_fpscr(ctx); \ + } \ } #define GEN_FLOAT_ACB(name, op2, set_fprf, type) \ @@ -2120,8 +2129,12 @@ static void gen_f##name(DisasContext *ctx) \ gen_helper_frsp(cpu_fpr[rD(ctx->opcode)], cpu_env, \ cpu_fpr[rD(ctx->opcode)]); \ } \ - gen_compute_fprf(cpu_fpr[rD(ctx->opcode)], \ - set_fprf, Rc(ctx->opcode) != 0); \ + if (set_fprf) { \ + gen_compute_fprf(cpu_fpr[rD(ctx->opcode)]); \ + } \ + if (unlikely(Rc(ctx->opcode) != 0)) { \ + gen_set_cr1_from_fpscr(ctx); \ + } \ } #define GEN_FLOAT_AB(name, op2, inval, set_fprf, type) \ _GEN_FLOAT_AB(name, name, 0x3F, op2, inval, 0, set_fprf, type); \ @@ -2144,8 +2157,12 @@ static void gen_f##name(DisasContext *ctx) \ gen_helper_frsp(cpu_fpr[rD(ctx->opcode)], cpu_env, \ cpu_fpr[rD(ctx->opcode)]); \ } \ - gen_compute_fprf(cpu_fpr[rD(ctx->opcode)], \ - set_fprf, Rc(ctx->opcode) != 0); \ + if (set_fprf) { \ + gen_compute_fprf(cpu_fpr[rD(ctx->opcode)]); \ + } \ + if (unlikely(Rc(ctx->opcode) != 0)) { \ + gen_set_cr1_from_fpscr(ctx); \ + } \ } #define GEN_FLOAT_AC(name, op2, inval, set_fprf, type) \ _GEN_FLOAT_AC(name, name, 0x3F, op2, inval, 0, set_fprf, type); \ @@ -2163,8 +2180,12 @@ static void gen_f##name(DisasContext *ctx) \ gen_reset_fpstatus(); \ gen_helper_f##name(cpu_fpr[rD(ctx->opcode)], cpu_env, \ cpu_fpr[rB(ctx->opcode)]); \ - gen_compute_fprf(cpu_fpr[rD(ctx->opcode)], \ - set_fprf, Rc(ctx->opcode) != 0); \ + if (set_fprf) { \ + gen_compute_fprf(cpu_fpr[rD(ctx->opcode)]); \ + } \ + if (unlikely(Rc(ctx->opcode) != 0)) { \ + gen_set_cr1_from_fpscr(ctx); \ + } \ } #define GEN_FLOAT_BS(name, op1, op2, set_fprf, type) \ @@ -2179,8 +2200,12 @@ static void gen_f##name(DisasContext *ctx) \ gen_reset_fpstatus(); \ gen_helper_f##name(cpu_fpr[rD(ctx->opcode)], cpu_env, \ cpu_fpr[rB(ctx->opcode)]); \ - gen_compute_fprf(cpu_fpr[rD(ctx->opcode)], \ - set_fprf, Rc(ctx->opcode) != 0); \ + if (set_fprf) { \ + gen_compute_fprf(cpu_fpr[rD(ctx->opcode)]); \ + } \ + if (unlikely(Rc(ctx->opcode) != 0)) { \ + gen_set_cr1_from_fpscr(ctx); \ + } \ } /* fadd - fadds */ @@ -2213,7 +2238,10 @@ static void gen_frsqrtes(DisasContext *ctx) cpu_fpr[rB(ctx->opcode)]); gen_helper_frsp(cpu_fpr[rD(ctx->opcode)], cpu_env, cpu_fpr[rD(ctx->opcode)]); - gen_compute_fprf(cpu_fpr[rD(ctx->opcode)], 1, Rc(ctx->opcode) != 0); + gen_compute_fprf(cpu_fpr[rD(ctx->opcode)]); + if (unlikely(Rc(ctx->opcode) != 0)) { + gen_set_cr1_from_fpscr(ctx); + } } /* fsel */ @@ -2234,7 +2262,10 @@ static void gen_fsqrt(DisasContext *ctx) gen_reset_fpstatus(); gen_helper_fsqrt(cpu_fpr[rD(ctx->opcode)], cpu_env, cpu_fpr[rB(ctx->opcode)]); - gen_compute_fprf(cpu_fpr[rD(ctx->opcode)], 1, Rc(ctx->opcode) != 0); + gen_compute_fprf(cpu_fpr[rD(ctx->opcode)]); + if (unlikely(Rc(ctx->opcode) != 0)) { + gen_set_cr1_from_fpscr(ctx); + } } static void gen_fsqrts(DisasContext *ctx) @@ -2250,7 +2281,10 @@ static void gen_fsqrts(DisasContext *ctx) cpu_fpr[rB(ctx->opcode)]); gen_helper_frsp(cpu_fpr[rD(ctx->opcode)], cpu_env, cpu_fpr[rD(ctx->opcode)]); - gen_compute_fprf(cpu_fpr[rD(ctx->opcode)], 1, Rc(ctx->opcode) != 0); + gen_compute_fprf(cpu_fpr[rD(ctx->opcode)]); + if (unlikely(Rc(ctx->opcode) != 0)) { + gen_set_cr1_from_fpscr(ctx); + } } /*** Floating-Point multiply-and-add ***/ @@ -2370,7 +2404,9 @@ static void gen_fabs(DisasContext *ctx) } tcg_gen_andi_i64(cpu_fpr[rD(ctx->opcode)], cpu_fpr[rB(ctx->opcode)], ~(1ULL << 63)); - gen_compute_fprf(cpu_fpr[rD(ctx->opcode)], 0, Rc(ctx->opcode) != 0); + if (unlikely(Rc(ctx->opcode))) { + gen_set_cr1_from_fpscr(ctx); + } } /* fmr - fmr. */ @@ -2382,7 +2418,9 @@ static void gen_fmr(DisasContext *ctx) return; } tcg_gen_mov_i64(cpu_fpr[rD(ctx->opcode)], cpu_fpr[rB(ctx->opcode)]); - gen_compute_fprf(cpu_fpr[rD(ctx->opcode)], 0, Rc(ctx->opcode) != 0); + if (unlikely(Rc(ctx->opcode))) { + gen_set_cr1_from_fpscr(ctx); + } } /* fnabs */ @@ -2395,7 +2433,9 @@ static void gen_fnabs(DisasContext *ctx) } tcg_gen_ori_i64(cpu_fpr[rD(ctx->opcode)], cpu_fpr[rB(ctx->opcode)], 1ULL << 63); - gen_compute_fprf(cpu_fpr[rD(ctx->opcode)], 0, Rc(ctx->opcode) != 0); + if (unlikely(Rc(ctx->opcode))) { + gen_set_cr1_from_fpscr(ctx); + } } /* fneg */ @@ -2408,7 +2448,9 @@ static void gen_fneg(DisasContext *ctx) } tcg_gen_xori_i64(cpu_fpr[rD(ctx->opcode)], cpu_fpr[rB(ctx->opcode)], 1ULL << 63); - gen_compute_fprf(cpu_fpr[rD(ctx->opcode)], 0, Rc(ctx->opcode) != 0); + if (unlikely(Rc(ctx->opcode))) { + gen_set_cr1_from_fpscr(ctx); + } } /* fcpsgn: PowerPC 2.05 specification */ @@ -2421,7 +2463,9 @@ static void gen_fcpsgn(DisasContext *ctx) } tcg_gen_deposit_i64(cpu_fpr[rD(ctx->opcode)], cpu_fpr[rA(ctx->opcode)], cpu_fpr[rB(ctx->opcode)], 0, 63); - gen_compute_fprf(cpu_fpr[rD(ctx->opcode)], 0, Rc(ctx->opcode) != 0); + if (unlikely(Rc(ctx->opcode))) { + gen_set_cr1_from_fpscr(ctx); + } } static void gen_fmrgew(DisasContext *ctx) @@ -2479,7 +2523,9 @@ static void gen_mffs(DisasContext *ctx) } gen_reset_fpstatus(); tcg_gen_extu_tl_i64(cpu_fpr[rD(ctx->opcode)], cpu_fpscr); - gen_compute_fprf(cpu_fpr[rD(ctx->opcode)], 0, Rc(ctx->opcode) != 0); + if (unlikely(Rc(ctx->opcode))) { + gen_set_cr1_from_fpscr(ctx); + } } /* mtfsb0 */ @@ -6743,7 +6789,7 @@ static void gen_st##name(DisasContext *ctx) \ tcg_temp_free(EA); \ } -#define GEN_VR_LVE(name, opc2, opc3) \ +#define GEN_VR_LVE(name, opc2, opc3, size) \ static void gen_lve##name(DisasContext *ctx) \ { \ TCGv EA; \ @@ -6755,13 +6801,16 @@ static void gen_lve##name(DisasContext *ctx) \ gen_set_access_type(ctx, ACCESS_INT); \ EA = tcg_temp_new(); \ gen_addr_reg_index(ctx, EA); \ + if (size > 1) { \ + tcg_gen_andi_tl(EA, EA, ~(size - 1)); \ + } \ rs = gen_avr_ptr(rS(ctx->opcode)); \ gen_helper_lve##name(cpu_env, rs, EA); \ tcg_temp_free(EA); \ tcg_temp_free_ptr(rs); \ } -#define GEN_VR_STVE(name, opc2, opc3) \ +#define GEN_VR_STVE(name, opc2, opc3, size) \ static void gen_stve##name(DisasContext *ctx) \ { \ TCGv EA; \ @@ -6773,6 +6822,9 @@ static void gen_stve##name(DisasContext *ctx) \ gen_set_access_type(ctx, ACCESS_INT); \ EA = tcg_temp_new(); \ gen_addr_reg_index(ctx, EA); \ + if (size > 1) { \ + tcg_gen_andi_tl(EA, EA, ~(size - 1)); \ + } \ rs = gen_avr_ptr(rS(ctx->opcode)); \ gen_helper_stve##name(cpu_env, rs, EA); \ tcg_temp_free(EA); \ @@ -6783,17 +6835,17 @@ GEN_VR_LDX(lvx, 0x07, 0x03); /* As we don't emulate the cache, lvxl is stricly equivalent to lvx */ GEN_VR_LDX(lvxl, 0x07, 0x0B); -GEN_VR_LVE(bx, 0x07, 0x00); -GEN_VR_LVE(hx, 0x07, 0x01); -GEN_VR_LVE(wx, 0x07, 0x02); +GEN_VR_LVE(bx, 0x07, 0x00, 1); +GEN_VR_LVE(hx, 0x07, 0x01, 2); +GEN_VR_LVE(wx, 0x07, 0x02, 4); GEN_VR_STX(svx, 0x07, 0x07); /* As we don't emulate the cache, stvxl is stricly equivalent to stvx */ GEN_VR_STX(svxl, 0x07, 0x0F); -GEN_VR_STVE(bx, 0x07, 0x04); -GEN_VR_STVE(hx, 0x07, 0x05); -GEN_VR_STVE(wx, 0x07, 0x06); +GEN_VR_STVE(bx, 0x07, 0x04, 1); +GEN_VR_STVE(hx, 0x07, 0x05, 2); +GEN_VR_STVE(wx, 0x07, 0x06, 4); static void gen_lvsl(DisasContext *ctx) { @@ -8205,21 +8257,6 @@ static inline TCGv_ptr gen_fprp_ptr(int reg) return r; } -#if defined(TARGET_PPC64) -static void gen_set_cr1_from_fpscr(DisasContext *ctx) -{ - TCGv_i32 tmp = tcg_temp_new_i32(); - tcg_gen_trunc_tl_i32(tmp, cpu_fpscr); - tcg_gen_shri_i32(cpu_crf[1], tmp, 28); - tcg_temp_free_i32(tmp); -} -#else -static void gen_set_cr1_from_fpscr(DisasContext *ctx) -{ - tcg_gen_shri_tl(cpu_crf[1], cpu_fpscr, 28); -} -#endif - #define GEN_DFP_T_A_B_Rc(name) \ static void gen_##name(DisasContext *ctx) \ { \ @@ -9642,6 +9679,88 @@ GEN_SPE(efdctsiz, speundef, 0x1D, 0x0B, 0x00180000, 0xFFFFFFFF, PPC_SPE_DOUBLE GEN_SPE(efdtstgt, efdtstlt, 0x1E, 0x0B, 0x00600000, 0x00600000, PPC_SPE_DOUBLE); // GEN_SPE(efdtsteq, speundef, 0x1F, 0x0B, 0x00600000, 0xFFFFFFFF, PPC_SPE_DOUBLE); // +static void gen_tbegin(DisasContext *ctx) +{ + if (unlikely(!ctx->tm_enabled)) { + gen_exception_err(ctx, POWERPC_EXCP_FU, FSCR_IC_TM); + return; + } + gen_helper_tbegin(cpu_env); +} + +#define GEN_TM_NOOP(name) \ +static inline void gen_##name(DisasContext *ctx) \ +{ \ + if (unlikely(!ctx->tm_enabled)) { \ + gen_exception_err(ctx, POWERPC_EXCP_FU, FSCR_IC_TM); \ + return; \ + } \ + /* Because tbegin always fails in QEMU, these user \ + * space instructions all have a simple implementation: \ + * \ + * CR[0] = 0b0 || MSR[TS] || 0b0 \ + * = 0b0 || 0b00 || 0b0 \ + */ \ + tcg_gen_movi_i32(cpu_crf[0], 0); \ +} + +GEN_TM_NOOP(tend); +GEN_TM_NOOP(tabort); +GEN_TM_NOOP(tabortwc); +GEN_TM_NOOP(tabortwci); +GEN_TM_NOOP(tabortdc); +GEN_TM_NOOP(tabortdci); +GEN_TM_NOOP(tsr); + +static void gen_tcheck(DisasContext *ctx) +{ + if (unlikely(!ctx->tm_enabled)) { + gen_exception_err(ctx, POWERPC_EXCP_FU, FSCR_IC_TM); + return; + } + /* Because tbegin always fails, the tcheck implementation + * is simple: + * + * CR[CRF] = TDOOMED || MSR[TS] || 0b0 + * = 0b1 || 0b00 || 0b0 + */ + tcg_gen_movi_i32(cpu_crf[crfD(ctx->opcode)], 0x8); +} + +#if defined(CONFIG_USER_ONLY) +#define GEN_TM_PRIV_NOOP(name) \ +static inline void gen_##name(DisasContext *ctx) \ +{ \ + gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC); \ +} + +#else + +#define GEN_TM_PRIV_NOOP(name) \ +static inline void gen_##name(DisasContext *ctx) \ +{ \ + if (unlikely(ctx->pr)) { \ + gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC); \ + return; \ + } \ + if (unlikely(!ctx->tm_enabled)) { \ + gen_exception_err(ctx, POWERPC_EXCP_FU, FSCR_IC_TM); \ + return; \ + } \ + /* Because tbegin always fails, the implementation is \ + * simple: \ + * \ + * CR[0] = 0b0 || MSR[TS] || 0b0 \ + * = 0b0 || 0b00 | 0b0 \ + */ \ + tcg_gen_movi_i32(cpu_crf[0], 0); \ +} + +#endif + +GEN_TM_PRIV_NOOP(treclaim); +GEN_TM_PRIV_NOOP(trechkpt); + static opcode_t opcodes[] = { GEN_HANDLER(invalid, 0x00, 0x00, 0x00, 0xFFFFFFFF, PPC_NONE), GEN_HANDLER(cmp, 0x1F, 0x00, 0x00, 0x00400000, PPC_INTEGER), @@ -11054,6 +11173,29 @@ GEN_SPEOP_LDST(evstwhe, 0x18, 2), GEN_SPEOP_LDST(evstwho, 0x1A, 2), GEN_SPEOP_LDST(evstwwe, 0x1C, 2), GEN_SPEOP_LDST(evstwwo, 0x1E, 2), + +GEN_HANDLER2_E(tbegin, "tbegin", 0x1F, 0x0E, 0x14, 0x01DFF800, \ + PPC_NONE, PPC2_TM), +GEN_HANDLER2_E(tend, "tend", 0x1F, 0x0E, 0x15, 0x01FFF800, \ + PPC_NONE, PPC2_TM), +GEN_HANDLER2_E(tabort, "tabort", 0x1F, 0x0E, 0x1C, 0x03E0F800, \ + PPC_NONE, PPC2_TM), +GEN_HANDLER2_E(tabortwc, "tabortwc", 0x1F, 0x0E, 0x18, 0x00000000, \ + PPC_NONE, PPC2_TM), +GEN_HANDLER2_E(tabortwci, "tabortwci", 0x1F, 0x0E, 0x1A, 0x00000000, \ + PPC_NONE, PPC2_TM), +GEN_HANDLER2_E(tabortdc, "tabortdc", 0x1F, 0x0E, 0x19, 0x00000000, \ + PPC_NONE, PPC2_TM), +GEN_HANDLER2_E(tabortdci, "tabortdci", 0x1F, 0x0E, 0x1B, 0x00000000, \ + PPC_NONE, PPC2_TM), +GEN_HANDLER2_E(tsr, "tsr", 0x1F, 0x0E, 0x17, 0x03DFF800, \ + PPC_NONE, PPC2_TM), +GEN_HANDLER2_E(tcheck, "tcheck", 0x1F, 0x0E, 0x16, 0x007FF800, \ + PPC_NONE, PPC2_TM), +GEN_HANDLER2_E(treclaim, "treclaim", 0x1F, 0x0E, 0x1D, 0x03E0F800, \ + PPC_NONE, PPC2_TM), +GEN_HANDLER2_E(trechkpt, "trechkpt", 0x1F, 0x0E, 0x1F, 0x03FFF800, \ + PPC_NONE, PPC2_TM), }; #include "helper_regs.h" @@ -11311,6 +11453,13 @@ static inline void gen_intermediate_code_internal(PowerPCCPU *cpu, } else { ctx.vsx_enabled = 0; } +#if defined(TARGET_PPC64) + if ((env->flags & POWERPC_FLAG_TM) && msr_tm) { + ctx.tm_enabled = msr_tm; + } else { + ctx.tm_enabled = 0; + } +#endif if ((env->flags & POWERPC_FLAG_SE) && msr_se) ctx.singlestep_enabled = CPU_SINGLE_STEP; else diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c index f0a29992df..df1a62c4c6 100644 --- a/target-ppc/translate_init.c +++ b/target-ppc/translate_init.c @@ -8214,7 +8214,8 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data) PPC2_ATOMIC_ISA206 | PPC2_FP_CVT_ISA206 | PPC2_FP_TST_ISA206 | PPC2_BCTAR_ISA207 | PPC2_LSQ_ISA207 | PPC2_ALTIVEC_207 | - PPC2_ISA205 | PPC2_ISA207S | PPC2_FP_CVT_S64; + PPC2_ISA205 | PPC2_ISA207S | PPC2_FP_CVT_S64 | + PPC2_TM; pcc->msr_mask = (1ull << MSR_SF) | (1ull << MSR_TM) | (1ull << MSR_VR) | @@ -8242,7 +8243,7 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data) pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE | POWERPC_FLAG_BE | POWERPC_FLAG_PMM | POWERPC_FLAG_BUS_CLK | POWERPC_FLAG_CFAR | - POWERPC_FLAG_VSX; + POWERPC_FLAG_VSX | POWERPC_FLAG_TM; pcc->l1_dcache_size = 0x8000; pcc->l1_icache_size = 0x8000; pcc->interrupts_big_endian = ppc_cpu_interrupts_big_endian_lpcr; diff --git a/target-s390x/cpu.h b/target-s390x/cpu.h index fe2f95d084..23ad336803 100644 --- a/target-s390x/cpu.h +++ b/target-s390x/cpu.h @@ -457,87 +457,6 @@ int css_enable_mss(void); int css_do_rsch(SubchDev *sch); int css_do_rchp(uint8_t cssid, uint8_t chpid); bool css_present(uint8_t cssid); -#else -static inline SubchDev *css_find_subch(uint8_t m, uint8_t cssid, uint8_t ssid, - uint16_t schid) -{ - return NULL; -} -static inline bool css_subch_visible(SubchDev *sch) -{ - return false; -} -static inline void css_conditional_io_interrupt(SubchDev *sch) -{ -} -static inline int css_do_stsch(SubchDev *sch, SCHIB *schib) -{ - return -ENODEV; -} -static inline bool css_schid_final(uint8_t cssid, uint8_t ssid, uint16_t schid) -{ - return true; -} -static inline int css_do_msch(SubchDev *sch, SCHIB *schib) -{ - return -ENODEV; -} -static inline int css_do_xsch(SubchDev *sch) -{ - return -ENODEV; -} -static inline int css_do_csch(SubchDev *sch) -{ - return -ENODEV; -} -static inline int css_do_hsch(SubchDev *sch) -{ - return -ENODEV; -} -static inline int css_do_ssch(SubchDev *sch, ORB *orb) -{ - return -ENODEV; -} -static inline int css_do_tsch(SubchDev *sch, IRB *irb) -{ - return -ENODEV; -} -static inline int css_do_stcrw(CRW *crw) -{ - return 1; -} -static inline int css_do_tpi(IOIntCode *int_code, int lowcore) -{ - return 0; -} -static inline int css_collect_chp_desc(int m, uint8_t cssid, uint8_t f_chpid, - int rfmt, uint8_t l_chpid, void *buf) -{ - return 0; -} -static inline void css_do_schm(uint8_t mbk, int update, int dct, uint64_t mbo) -{ -} -static inline int css_enable_mss(void) -{ - return -EINVAL; -} -static inline int css_enable_mcsse(void) -{ - return -EINVAL; -} -static inline int css_do_rsch(SubchDev *sch) -{ - return -ENODEV; -} -static inline int css_do_rchp(uint8_t cssid, uint8_t chpid) -{ - return -ENODEV; -} -static inline bool css_present(uint8_t cssid) -{ - return false; -} #endif #define cpu_init(model) (&cpu_s390x_init(model)->env) diff --git a/target-s390x/ioinst.c b/target-s390x/ioinst.c index b8a6486f51..1ac5d61c56 100644 --- a/target-s390x/ioinst.c +++ b/target-s390x/ioinst.c @@ -14,6 +14,7 @@ #include "cpu.h" #include "ioinst.h" #include "trace.h" +#include "hw/s390x/s390-pci-bus.h" int ioinst_disassemble_sch_ident(uint32_t value, int *m, int *cssid, int *ssid, int *schid) @@ -398,6 +399,7 @@ typedef struct ChscResp { #define CHSC_SCPD 0x0002 #define CHSC_SCSC 0x0010 #define CHSC_SDA 0x0031 +#define CHSC_SEI 0x000e #define CHSC_SCPD_0_M 0x20000000 #define CHSC_SCPD_0_C 0x10000000 @@ -566,6 +568,53 @@ out: res->param = 0; } +static int chsc_sei_nt0_get_event(void *res) +{ + /* no events yet */ + return 1; +} + +static int chsc_sei_nt0_have_event(void) +{ + /* no events yet */ + return 0; +} + +#define CHSC_SEI_NT0 (1ULL << 63) +#define CHSC_SEI_NT2 (1ULL << 61) +static void ioinst_handle_chsc_sei(ChscReq *req, ChscResp *res) +{ + uint64_t selection_mask = ldq_p(&req->param1); + uint8_t *res_flags = (uint8_t *)res->data; + int have_event = 0; + int have_more = 0; + + /* regarding architecture nt0 can not be masked */ + have_event = !chsc_sei_nt0_get_event(res); + have_more = chsc_sei_nt0_have_event(); + + if (selection_mask & CHSC_SEI_NT2) { + if (!have_event) { + have_event = !chsc_sei_nt2_get_event(res); + } + + if (!have_more) { + have_more = chsc_sei_nt2_have_event(); + } + } + + if (have_event) { + res->code = cpu_to_be16(0x0001); + if (have_more) { + (*res_flags) |= 0x80; + } else { + (*res_flags) &= ~0x80; + } + } else { + res->code = cpu_to_be16(0x0004); + } +} + static void ioinst_handle_chsc_unimplemented(ChscResp *res) { res->len = cpu_to_be16(CHSC_MIN_RESP_LEN); @@ -617,6 +666,9 @@ void ioinst_handle_chsc(S390CPU *cpu, uint32_t ipb) case CHSC_SDA: ioinst_handle_chsc_sda(req, res); break; + case CHSC_SEI: + ioinst_handle_chsc_sei(req, res); + break; default: ioinst_handle_chsc_unimplemented(res); break; diff --git a/target-s390x/ioinst.h b/target-s390x/ioinst.h index 29f6423df4..1efe16c3ae 100644 --- a/target-s390x/ioinst.h +++ b/target-s390x/ioinst.h @@ -204,6 +204,7 @@ typedef struct CRW { #define CRW_RSC_SUBCH 0x3 #define CRW_RSC_CHP 0x4 +#define CRW_RSC_CSS 0xb /* I/O interruption code */ typedef struct IOIntCode { diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c index f3f8f2c2ca..dcd75055c1 100644 --- a/target-s390x/kvm.c +++ b/target-s390x/kvm.c @@ -40,6 +40,8 @@ #include "exec/gdbstub.h" #include "trace.h" #include "qapi-event.h" +#include "hw/s390x/s390-pci-inst.h" +#include "hw/s390x/s390-pci-bus.h" /* #define DEBUG_KVM */ @@ -56,6 +58,7 @@ #define IPA0_B2 0xb200 #define IPA0_B9 0xb900 #define IPA0_EB 0xeb00 +#define IPA0_E3 0xe300 #define PRIV_B2_SCLP_CALL 0x20 #define PRIV_B2_CSCH 0x30 @@ -76,8 +79,17 @@ #define PRIV_B2_XSCH 0x76 #define PRIV_EB_SQBS 0x8a +#define PRIV_EB_PCISTB 0xd0 +#define PRIV_EB_SIC 0xd1 #define PRIV_B9_EQBS 0x9c +#define PRIV_B9_CLP 0xa0 +#define PRIV_B9_PCISTG 0xd0 +#define PRIV_B9_PCILG 0xd2 +#define PRIV_B9_RPCIT 0xd3 + +#define PRIV_E3_MPCIFC 0xd0 +#define PRIV_E3_STPCIFC 0xd4 #define DIAG_IPL 0x308 #define DIAG_KVM_HYPERCALL 0x500 @@ -202,6 +214,11 @@ void kvm_s390_reset_vcpu(S390CPU *cpu) } } +static int can_sync_regs(CPUState *cs, int regs) +{ + return cap_sync_regs && (cs->kvm_run->kvm_valid_regs & regs) == regs; +} + int kvm_arch_put_registers(CPUState *cs, int level) { S390CPU *cpu = S390_CPU(cs); @@ -216,7 +233,7 @@ int kvm_arch_put_registers(CPUState *cs, int level) cs->kvm_run->psw_addr = env->psw.addr; cs->kvm_run->psw_mask = env->psw.mask; - if (cap_sync_regs && cs->kvm_run->kvm_valid_regs & KVM_SYNC_GPRS) { + if (can_sync_regs(cs, KVM_SYNC_GPRS)) { for (i = 0; i < 16; i++) { cs->kvm_run->s.regs.gprs[i] = env->regs[i]; cs->kvm_run->kvm_dirty_regs |= KVM_SYNC_GPRS; @@ -247,18 +264,33 @@ int kvm_arch_put_registers(CPUState *cs, int level) return 0; } - /* - * These ONE_REGS are not protected by a capability. As they are only - * necessary for migration we just trace a possible error, but don't - * return with an error return code. - */ - kvm_set_one_reg(cs, KVM_REG_S390_CPU_TIMER, &env->cputm); - kvm_set_one_reg(cs, KVM_REG_S390_CLOCK_COMP, &env->ckc); - kvm_set_one_reg(cs, KVM_REG_S390_TODPR, &env->todpr); - kvm_set_one_reg(cs, KVM_REG_S390_GBEA, &env->gbea); - kvm_set_one_reg(cs, KVM_REG_S390_PP, &env->pp); + if (can_sync_regs(cs, KVM_SYNC_ARCH0)) { + cs->kvm_run->s.regs.cputm = env->cputm; + cs->kvm_run->s.regs.ckc = env->ckc; + cs->kvm_run->s.regs.todpr = env->todpr; + cs->kvm_run->s.regs.gbea = env->gbea; + cs->kvm_run->s.regs.pp = env->pp; + cs->kvm_run->kvm_dirty_regs |= KVM_SYNC_ARCH0; + } else { + /* + * These ONE_REGS are not protected by a capability. As they are only + * necessary for migration we just trace a possible error, but don't + * return with an error return code. + */ + kvm_set_one_reg(cs, KVM_REG_S390_CPU_TIMER, &env->cputm); + kvm_set_one_reg(cs, KVM_REG_S390_CLOCK_COMP, &env->ckc); + kvm_set_one_reg(cs, KVM_REG_S390_TODPR, &env->todpr); + kvm_set_one_reg(cs, KVM_REG_S390_GBEA, &env->gbea); + kvm_set_one_reg(cs, KVM_REG_S390_PP, &env->pp); + } - if (cap_async_pf) { + /* pfault parameters */ + if (can_sync_regs(cs, KVM_SYNC_PFAULT)) { + cs->kvm_run->s.regs.pft = env->pfault_token; + cs->kvm_run->s.regs.pfs = env->pfault_select; + cs->kvm_run->s.regs.pfc = env->pfault_compare; + cs->kvm_run->kvm_dirty_regs |= KVM_SYNC_PFAULT; + } else if (cap_async_pf) { r = kvm_set_one_reg(cs, KVM_REG_S390_PFTOKEN, &env->pfault_token); if (r < 0) { return r; @@ -273,9 +305,8 @@ int kvm_arch_put_registers(CPUState *cs, int level) } } - if (cap_sync_regs && - cs->kvm_run->kvm_valid_regs & KVM_SYNC_ACRS && - cs->kvm_run->kvm_valid_regs & KVM_SYNC_CRS) { + /* access registers and control registers*/ + if (can_sync_regs(cs, KVM_SYNC_ACRS | KVM_SYNC_CRS)) { for (i = 0; i < 16; i++) { cs->kvm_run->s.regs.acrs[i] = env->aregs[i]; cs->kvm_run->s.regs.crs[i] = env->cregs[i]; @@ -294,7 +325,7 @@ int kvm_arch_put_registers(CPUState *cs, int level) } /* Finally the prefix */ - if (cap_sync_regs && cs->kvm_run->kvm_valid_regs & KVM_SYNC_PREFIX) { + if (can_sync_regs(cs, KVM_SYNC_PREFIX)) { cs->kvm_run->s.regs.prefix = env->psa; cs->kvm_run->kvm_dirty_regs |= KVM_SYNC_PREFIX; } else { @@ -317,7 +348,7 @@ int kvm_arch_get_registers(CPUState *cs) env->psw.mask = cs->kvm_run->psw_mask; /* the GPRS */ - if (cap_sync_regs && cs->kvm_run->kvm_valid_regs & KVM_SYNC_GPRS) { + if (can_sync_regs(cs, KVM_SYNC_GPRS)) { for (i = 0; i < 16; i++) { env->regs[i] = cs->kvm_run->s.regs.gprs[i]; } @@ -332,9 +363,7 @@ int kvm_arch_get_registers(CPUState *cs) } /* The ACRS and CRS */ - if (cap_sync_regs && - cs->kvm_run->kvm_valid_regs & KVM_SYNC_ACRS && - cs->kvm_run->kvm_valid_regs & KVM_SYNC_CRS) { + if (can_sync_regs(cs, KVM_SYNC_ACRS | KVM_SYNC_CRS)) { for (i = 0; i < 16; i++) { env->aregs[i] = cs->kvm_run->s.regs.acrs[i]; env->cregs[i] = cs->kvm_run->s.regs.crs[i]; @@ -361,22 +390,35 @@ int kvm_arch_get_registers(CPUState *cs) env->fpc = fpu.fpc; /* The prefix */ - if (cap_sync_regs && cs->kvm_run->kvm_valid_regs & KVM_SYNC_PREFIX) { + if (can_sync_regs(cs, KVM_SYNC_PREFIX)) { env->psa = cs->kvm_run->s.regs.prefix; } - /* - * These ONE_REGS are not protected by a capability. As they are only - * necessary for migration we just trace a possible error, but don't - * return with an error return code. - */ - kvm_get_one_reg(cs, KVM_REG_S390_CPU_TIMER, &env->cputm); - kvm_get_one_reg(cs, KVM_REG_S390_CLOCK_COMP, &env->ckc); - kvm_get_one_reg(cs, KVM_REG_S390_TODPR, &env->todpr); - kvm_get_one_reg(cs, KVM_REG_S390_GBEA, &env->gbea); - kvm_get_one_reg(cs, KVM_REG_S390_PP, &env->pp); + if (can_sync_regs(cs, KVM_SYNC_ARCH0)) { + env->cputm = cs->kvm_run->s.regs.cputm; + env->ckc = cs->kvm_run->s.regs.ckc; + env->todpr = cs->kvm_run->s.regs.todpr; + env->gbea = cs->kvm_run->s.regs.gbea; + env->pp = cs->kvm_run->s.regs.pp; + } else { + /* + * These ONE_REGS are not protected by a capability. As they are only + * necessary for migration we just trace a possible error, but don't + * return with an error return code. + */ + kvm_get_one_reg(cs, KVM_REG_S390_CPU_TIMER, &env->cputm); + kvm_get_one_reg(cs, KVM_REG_S390_CLOCK_COMP, &env->ckc); + kvm_get_one_reg(cs, KVM_REG_S390_TODPR, &env->todpr); + kvm_get_one_reg(cs, KVM_REG_S390_GBEA, &env->gbea); + kvm_get_one_reg(cs, KVM_REG_S390_PP, &env->pp); + } - if (cap_async_pf) { + /* pfault parameters */ + if (can_sync_regs(cs, KVM_SYNC_PFAULT)) { + env->pfault_token = cs->kvm_run->s.regs.pft; + env->pfault_select = cs->kvm_run->s.regs.pfs; + env->pfault_compare = cs->kvm_run->s.regs.pfc; + } else if (cap_async_pf) { r = kvm_get_one_reg(cs, KVM_REG_S390_PFTOKEN, &env->pfault_token); if (r < 0) { return r; @@ -809,11 +851,124 @@ static int handle_b2(S390CPU *cpu, struct kvm_run *run, uint8_t ipa1) return rc; } +static uint64_t get_base_disp_rxy(S390CPU *cpu, struct kvm_run *run) +{ + CPUS390XState *env = &cpu->env; + uint32_t x2 = (run->s390_sieic.ipa & 0x000f); + uint32_t base2 = run->s390_sieic.ipb >> 28; + uint32_t disp2 = ((run->s390_sieic.ipb & 0x0fff0000) >> 16) + + ((run->s390_sieic.ipb & 0xff00) << 4); + + if (disp2 & 0x80000) { + disp2 += 0xfff00000; + } + + return (base2 ? env->regs[base2] : 0) + + (x2 ? env->regs[x2] : 0) + (long)(int)disp2; +} + +static uint64_t get_base_disp_rsy(S390CPU *cpu, struct kvm_run *run) +{ + CPUS390XState *env = &cpu->env; + uint32_t base2 = run->s390_sieic.ipb >> 28; + uint32_t disp2 = ((run->s390_sieic.ipb & 0x0fff0000) >> 16) + + ((run->s390_sieic.ipb & 0xff00) << 4); + + if (disp2 & 0x80000) { + disp2 += 0xfff00000; + } + + return (base2 ? env->regs[base2] : 0) + (long)(int)disp2; +} + +static int kvm_clp_service_call(S390CPU *cpu, struct kvm_run *run) +{ + uint8_t r2 = (run->s390_sieic.ipb & 0x000f0000) >> 16; + + return clp_service_call(cpu, r2); +} + +static int kvm_pcilg_service_call(S390CPU *cpu, struct kvm_run *run) +{ + uint8_t r1 = (run->s390_sieic.ipb & 0x00f00000) >> 20; + uint8_t r2 = (run->s390_sieic.ipb & 0x000f0000) >> 16; + + return pcilg_service_call(cpu, r1, r2); +} + +static int kvm_pcistg_service_call(S390CPU *cpu, struct kvm_run *run) +{ + uint8_t r1 = (run->s390_sieic.ipb & 0x00f00000) >> 20; + uint8_t r2 = (run->s390_sieic.ipb & 0x000f0000) >> 16; + + return pcistg_service_call(cpu, r1, r2); +} + +static int kvm_stpcifc_service_call(S390CPU *cpu, struct kvm_run *run) +{ + uint8_t r1 = (run->s390_sieic.ipa & 0x00f0) >> 4; + uint64_t fiba; + + cpu_synchronize_state(CPU(cpu)); + fiba = get_base_disp_rxy(cpu, run); + + return stpcifc_service_call(cpu, r1, fiba); +} + +static int kvm_sic_service_call(S390CPU *cpu, struct kvm_run *run) +{ + /* NOOP */ + return 0; +} + +static int kvm_rpcit_service_call(S390CPU *cpu, struct kvm_run *run) +{ + uint8_t r1 = (run->s390_sieic.ipb & 0x00f00000) >> 20; + uint8_t r2 = (run->s390_sieic.ipb & 0x000f0000) >> 16; + + return rpcit_service_call(cpu, r1, r2); +} + +static int kvm_pcistb_service_call(S390CPU *cpu, struct kvm_run *run) +{ + uint8_t r1 = (run->s390_sieic.ipa & 0x00f0) >> 4; + uint8_t r3 = run->s390_sieic.ipa & 0x000f; + uint64_t gaddr; + + cpu_synchronize_state(CPU(cpu)); + gaddr = get_base_disp_rsy(cpu, run); + + return pcistb_service_call(cpu, r1, r3, gaddr); +} + +static int kvm_mpcifc_service_call(S390CPU *cpu, struct kvm_run *run) +{ + uint8_t r1 = (run->s390_sieic.ipa & 0x00f0) >> 4; + uint64_t fiba; + + cpu_synchronize_state(CPU(cpu)); + fiba = get_base_disp_rxy(cpu, run); + + return mpcifc_service_call(cpu, r1, fiba); +} + static int handle_b9(S390CPU *cpu, struct kvm_run *run, uint8_t ipa1) { int r = 0; switch (ipa1) { + case PRIV_B9_CLP: + r = kvm_clp_service_call(cpu, run); + break; + case PRIV_B9_PCISTG: + r = kvm_pcistg_service_call(cpu, run); + break; + case PRIV_B9_PCILG: + r = kvm_pcilg_service_call(cpu, run); + break; + case PRIV_B9_RPCIT: + r = kvm_rpcit_service_call(cpu, run); + break; case PRIV_B9_EQBS: /* just inject exception */ r = -1; @@ -832,6 +987,12 @@ static int handle_eb(S390CPU *cpu, struct kvm_run *run, uint8_t ipbl) int r = 0; switch (ipbl) { + case PRIV_EB_PCISTB: + r = kvm_pcistb_service_call(cpu, run); + break; + case PRIV_EB_SIC: + r = kvm_sic_service_call(cpu, run); + break; case PRIV_EB_SQBS: /* just inject exception */ r = -1; @@ -845,6 +1006,26 @@ static int handle_eb(S390CPU *cpu, struct kvm_run *run, uint8_t ipbl) return r; } +static int handle_e3(S390CPU *cpu, struct kvm_run *run, uint8_t ipbl) +{ + int r = 0; + + switch (ipbl) { + case PRIV_E3_MPCIFC: + r = kvm_mpcifc_service_call(cpu, run); + break; + case PRIV_E3_STPCIFC: + r = kvm_stpcifc_service_call(cpu, run); + break; + default: + r = -1; + DPRINTF("KVM: unhandled PRIV: 0xe3%x\n", ipbl); + break; + } + + return r; +} + static int handle_hypercall(S390CPU *cpu, struct kvm_run *run) { CPUS390XState *env = &cpu->env; @@ -1041,6 +1222,9 @@ static int handle_instruction(S390CPU *cpu, struct kvm_run *run) case IPA0_EB: r = handle_eb(cpu, run, run->s390_sieic.ipb & 0xff); break; + case IPA0_E3: + r = handle_e3(cpu, run, run->s390_sieic.ipb & 0xff); + break; case IPA0_DIAG: r = handle_diag(cpu, run, run->s390_sieic.ipb); break; @@ -1361,3 +1545,28 @@ int kvm_s390_set_cpu_state(S390CPU *cpu, uint8_t cpu_state) return ret; } + +int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, + uint64_t address, uint32_t data) +{ + S390PCIBusDevice *pbdev; + uint32_t fid = data >> ZPCI_MSI_VEC_BITS; + uint32_t vec = data & ZPCI_MSI_VEC_MASK; + + pbdev = s390_pci_find_dev_by_fid(fid); + if (!pbdev) { + DPRINTF("add_msi_route no dev\n"); + return -ENODEV; + } + + pbdev->routes.adapter.ind_offset = vec; + + route->type = KVM_IRQ_ROUTING_S390_ADAPTER; + route->flags = 0; + route->u.adapter.summary_addr = pbdev->routes.adapter.summary_addr; + route->u.adapter.ind_addr = pbdev->routes.adapter.ind_addr; + route->u.adapter.summary_offset = pbdev->routes.adapter.summary_offset; + route->u.adapter.ind_offset = pbdev->routes.adapter.ind_offset; + route->u.adapter.adapter_id = pbdev->routes.adapter.adapter_id; + return 0; +} diff --git a/tests/Makefile b/tests/Makefile index e4ddb6a8c1..c2e2e52f22 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -298,7 +298,7 @@ tests/test-opts-visitor$(EXESUF): tests/test-opts-visitor.o $(test-qapi-obj-y) l tests/test-mul64$(EXESUF): tests/test-mul64.o libqemuutil.a tests/test-bitops$(EXESUF): tests/test-bitops.o libqemuutil.a -libqos-obj-y = tests/libqos/pci.o tests/libqos/fw_cfg.o +libqos-obj-y = tests/libqos/pci.o tests/libqos/fw_cfg.o tests/libqos/malloc.o libqos-obj-y += tests/libqos/i2c.o libqos-pc-obj-y = $(libqos-obj-y) tests/libqos/pci-pc.o libqos-pc-obj-y += tests/libqos/malloc-pc.o diff --git a/tests/libqos/malloc-pc.c b/tests/libqos/malloc-pc.c index f4218c6451..c9c48fddc9 100644 --- a/tests/libqos/malloc-pc.c +++ b/tests/libqos/malloc-pc.c @@ -17,296 +17,28 @@ #include "hw/nvram/fw_cfg.h" #include "qemu-common.h" -#include "qemu/queue.h" #include <glib.h> #define PAGE_SIZE (4096) -#define MLIST_ENTNAME entries -typedef QTAILQ_HEAD(MemList, MemBlock) MemList; -typedef struct MemBlock { - QTAILQ_ENTRY(MemBlock) MLIST_ENTNAME; - uint64_t size; - uint64_t addr; -} MemBlock; - -typedef struct PCAlloc -{ - QGuestAllocator alloc; - PCAllocOpts opts; - uint64_t start; - uint64_t end; - - MemList used; - MemList free; -} PCAlloc; - -static MemBlock *mlist_new(uint64_t addr, uint64_t size) -{ - MemBlock *block; - - if (!size) { - return NULL; - } - block = g_malloc0(sizeof(MemBlock)); - - block->addr = addr; - block->size = size; - - return block; -} - -static void mlist_delete(MemList *list, MemBlock *node) -{ - g_assert(list && node); - QTAILQ_REMOVE(list, node, MLIST_ENTNAME); - g_free(node); -} - -static MemBlock *mlist_find_key(MemList *head, uint64_t addr) -{ - MemBlock *node; - QTAILQ_FOREACH(node, head, MLIST_ENTNAME) { - if (node->addr == addr) { - return node; - } - } - return NULL; -} - -static MemBlock *mlist_find_space(MemList *head, uint64_t size) -{ - MemBlock *node; - - QTAILQ_FOREACH(node, head, MLIST_ENTNAME) { - if (node->size >= size) { - return node; - } - } - return NULL; -} - -static MemBlock *mlist_sort_insert(MemList *head, MemBlock *insr) -{ - MemBlock *node; - g_assert(head && insr); - - QTAILQ_FOREACH(node, head, MLIST_ENTNAME) { - if (insr->addr < node->addr) { - QTAILQ_INSERT_BEFORE(node, insr, MLIST_ENTNAME); - return insr; - } - } - - QTAILQ_INSERT_TAIL(head, insr, MLIST_ENTNAME); - return insr; -} - -static inline uint64_t mlist_boundary(MemBlock *node) -{ - return node->size + node->addr; -} - -static MemBlock *mlist_join(MemList *head, MemBlock *left, MemBlock *right) -{ - g_assert(head && left && right); - - left->size += right->size; - mlist_delete(head, right); - return left; -} - -static void mlist_coalesce(MemList *head, MemBlock *node) -{ - g_assert(node); - MemBlock *left; - MemBlock *right; - char merge; - - do { - merge = 0; - left = QTAILQ_PREV(node, MemList, MLIST_ENTNAME); - right = QTAILQ_NEXT(node, MLIST_ENTNAME); - - /* clowns to the left of me */ - if (left && mlist_boundary(left) == node->addr) { - node = mlist_join(head, left, node); - merge = 1; - } - - /* jokers to the right */ - if (right && mlist_boundary(node) == right->addr) { - node = mlist_join(head, node, right); - merge = 1; - } - - } while (merge); -} - -static uint64_t pc_mlist_fulfill(PCAlloc *s, MemBlock *freenode, uint64_t size) -{ - uint64_t addr; - MemBlock *usednode; - - g_assert(freenode); - g_assert_cmpint(freenode->size, >=, size); - - addr = freenode->addr; - if (freenode->size == size) { - /* re-use this freenode as our used node */ - QTAILQ_REMOVE(&s->free, freenode, MLIST_ENTNAME); - usednode = freenode; - } else { - /* adjust the free node and create a new used node */ - freenode->addr += size; - freenode->size -= size; - usednode = mlist_new(addr, size); - } - - mlist_sort_insert(&s->used, usednode); - return addr; -} - -/* To assert the correctness of the list. - * Used only if PC_ALLOC_PARANOID is set. */ -static void pc_mlist_check(PCAlloc *s) -{ - MemBlock *node; - uint64_t addr = s->start > 0 ? s->start - 1 : 0; - uint64_t next = s->start; - - QTAILQ_FOREACH(node, &s->free, MLIST_ENTNAME) { - g_assert_cmpint(node->addr, >, addr); - g_assert_cmpint(node->addr, >=, next); - addr = node->addr; - next = node->addr + node->size; - } - - addr = s->start > 0 ? s->start - 1 : 0; - next = s->start; - QTAILQ_FOREACH(node, &s->used, MLIST_ENTNAME) { - g_assert_cmpint(node->addr, >, addr); - g_assert_cmpint(node->addr, >=, next); - addr = node->addr; - next = node->addr + node->size; - } -} - -static uint64_t pc_mlist_alloc(PCAlloc *s, uint64_t size) -{ - MemBlock *node; - - node = mlist_find_space(&s->free, size); - if (!node) { - fprintf(stderr, "Out of guest memory.\n"); - g_assert_not_reached(); - } - return pc_mlist_fulfill(s, node, size); -} - -static void pc_mlist_free(PCAlloc *s, uint64_t addr) -{ - MemBlock *node; - - if (addr == 0) { - return; - } - - node = mlist_find_key(&s->used, addr); - if (!node) { - fprintf(stderr, "Error: no record found for an allocation at " - "0x%016" PRIx64 ".\n", - addr); - g_assert_not_reached(); - } - - /* Rip it out of the used list and re-insert back into the free list. */ - QTAILQ_REMOVE(&s->used, node, MLIST_ENTNAME); - mlist_sort_insert(&s->free, node); - mlist_coalesce(&s->free, node); -} - -static uint64_t pc_alloc(QGuestAllocator *allocator, size_t size) -{ - PCAlloc *s = container_of(allocator, PCAlloc, alloc); - uint64_t rsize = size; - uint64_t naddr; - - rsize += (PAGE_SIZE - 1); - rsize &= -PAGE_SIZE; - g_assert_cmpint((s->start + rsize), <=, s->end); - g_assert_cmpint(rsize, >=, size); - - naddr = pc_mlist_alloc(s, rsize); - if (s->opts & PC_ALLOC_PARANOID) { - pc_mlist_check(s); - } - - return naddr; -} - -static void pc_free(QGuestAllocator *allocator, uint64_t addr) -{ - PCAlloc *s = container_of(allocator, PCAlloc, alloc); - - pc_mlist_free(s, addr); - if (s->opts & PC_ALLOC_PARANOID) { - pc_mlist_check(s); - } -} - /* * Mostly for valgrind happiness, but it does offer * a chokepoint for debugging guest memory leaks, too. */ void pc_alloc_uninit(QGuestAllocator *allocator) { - PCAlloc *s = container_of(allocator, PCAlloc, alloc); - MemBlock *node; - MemBlock *tmp; - PCAllocOpts mask; - - /* Check for guest leaks, and destroy the list. */ - QTAILQ_FOREACH_SAFE(node, &s->used, MLIST_ENTNAME, tmp) { - if (s->opts & (PC_ALLOC_LEAK_WARN | PC_ALLOC_LEAK_ASSERT)) { - fprintf(stderr, "guest malloc leak @ 0x%016" PRIx64 "; " - "size 0x%016" PRIx64 ".\n", - node->addr, node->size); - } - if (s->opts & (PC_ALLOC_LEAK_ASSERT)) { - g_assert_not_reached(); - } - g_free(node); - } - - /* If we have previously asserted that there are no leaks, then there - * should be only one node here with a specific address and size. */ - mask = PC_ALLOC_LEAK_ASSERT | PC_ALLOC_PARANOID; - QTAILQ_FOREACH_SAFE(node, &s->free, MLIST_ENTNAME, tmp) { - if ((s->opts & mask) == mask) { - if ((node->addr != s->start) || - (node->size != s->end - s->start)) { - fprintf(stderr, "Free list is corrupted.\n"); - g_assert_not_reached(); - } - } - - g_free(node); - } - - g_free(s); + alloc_uninit(allocator); } -QGuestAllocator *pc_alloc_init_flags(PCAllocOpts flags) +QGuestAllocator *pc_alloc_init_flags(QAllocOpts flags) { - PCAlloc *s = g_malloc0(sizeof(*s)); + QGuestAllocator *s = g_malloc0(sizeof(*s)); uint64_t ram_size; QFWCFG *fw_cfg = pc_fw_cfg_init(); MemBlock *node; s->opts = flags; - s->alloc.alloc = pc_alloc; - s->alloc.free = pc_free; + s->page_size = PAGE_SIZE; ram_size = qfw_cfg_get_u64(fw_cfg, FW_CFG_RAM_SIZE); @@ -325,10 +57,10 @@ QGuestAllocator *pc_alloc_init_flags(PCAllocOpts flags) node = mlist_new(s->start, s->end - s->start); QTAILQ_INSERT_HEAD(&s->free, node, MLIST_ENTNAME); - return &s->alloc; + return s; } inline QGuestAllocator *pc_alloc_init(void) { - return pc_alloc_init_flags(PC_ALLOC_NO_FLAGS); + return pc_alloc_init_flags(ALLOC_NO_FLAGS); } diff --git a/tests/libqos/malloc-pc.h b/tests/libqos/malloc-pc.h index 9f525e3b99..86ab9f0429 100644 --- a/tests/libqos/malloc-pc.h +++ b/tests/libqos/malloc-pc.h @@ -15,15 +15,8 @@ #include "libqos/malloc.h" -typedef enum { - PC_ALLOC_NO_FLAGS = 0x00, - PC_ALLOC_LEAK_WARN = 0x01, - PC_ALLOC_LEAK_ASSERT = 0x02, - PC_ALLOC_PARANOID = 0x04 -} PCAllocOpts; - QGuestAllocator *pc_alloc_init(void); -QGuestAllocator *pc_alloc_init_flags(PCAllocOpts flags); -void pc_alloc_uninit(QGuestAllocator *allocator); +QGuestAllocator *pc_alloc_init_flags(QAllocOpts flags); +void pc_alloc_uninit(QGuestAllocator *allocator); #endif diff --git a/tests/libqos/malloc.c b/tests/libqos/malloc.c new file mode 100644 index 0000000000..5debf18497 --- /dev/null +++ b/tests/libqos/malloc.c @@ -0,0 +1,270 @@ +/* + * libqos malloc support + * + * Copyright (c) 2014 + * + * Author: + * John Snow <jsnow@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "libqos/malloc.h" +#include "qemu-common.h" +#include <stdio.h> +#include <inttypes.h> +#include <glib.h> + +static void mlist_delete(MemList *list, MemBlock *node) +{ + g_assert(list && node); + QTAILQ_REMOVE(list, node, MLIST_ENTNAME); + g_free(node); +} + +static MemBlock *mlist_find_key(MemList *head, uint64_t addr) +{ + MemBlock *node; + QTAILQ_FOREACH(node, head, MLIST_ENTNAME) { + if (node->addr == addr) { + return node; + } + } + return NULL; +} + +static MemBlock *mlist_find_space(MemList *head, uint64_t size) +{ + MemBlock *node; + + QTAILQ_FOREACH(node, head, MLIST_ENTNAME) { + if (node->size >= size) { + return node; + } + } + return NULL; +} + +static MemBlock *mlist_sort_insert(MemList *head, MemBlock *insr) +{ + MemBlock *node; + g_assert(head && insr); + + QTAILQ_FOREACH(node, head, MLIST_ENTNAME) { + if (insr->addr < node->addr) { + QTAILQ_INSERT_BEFORE(node, insr, MLIST_ENTNAME); + return insr; + } + } + + QTAILQ_INSERT_TAIL(head, insr, MLIST_ENTNAME); + return insr; +} + +static inline uint64_t mlist_boundary(MemBlock *node) +{ + return node->size + node->addr; +} + +static MemBlock *mlist_join(MemList *head, MemBlock *left, MemBlock *right) +{ + g_assert(head && left && right); + + left->size += right->size; + mlist_delete(head, right); + return left; +} + +static void mlist_coalesce(MemList *head, MemBlock *node) +{ + g_assert(node); + MemBlock *left; + MemBlock *right; + char merge; + + do { + merge = 0; + left = QTAILQ_PREV(node, MemList, MLIST_ENTNAME); + right = QTAILQ_NEXT(node, MLIST_ENTNAME); + + /* clowns to the left of me */ + if (left && mlist_boundary(left) == node->addr) { + node = mlist_join(head, left, node); + merge = 1; + } + + /* jokers to the right */ + if (right && mlist_boundary(node) == right->addr) { + node = mlist_join(head, node, right); + merge = 1; + } + + } while (merge); +} + +static uint64_t mlist_fulfill(QGuestAllocator *s, MemBlock *freenode, + uint64_t size) +{ + uint64_t addr; + MemBlock *usednode; + + g_assert(freenode); + g_assert_cmpint(freenode->size, >=, size); + + addr = freenode->addr; + if (freenode->size == size) { + /* re-use this freenode as our used node */ + QTAILQ_REMOVE(&s->free, freenode, MLIST_ENTNAME); + usednode = freenode; + } else { + /* adjust the free node and create a new used node */ + freenode->addr += size; + freenode->size -= size; + usednode = mlist_new(addr, size); + } + + mlist_sort_insert(&s->used, usednode); + return addr; +} + +/* To assert the correctness of the list. + * Used only if ALLOC_PARANOID is set. */ +static void mlist_check(QGuestAllocator *s) +{ + MemBlock *node; + uint64_t addr = s->start > 0 ? s->start - 1 : 0; + uint64_t next = s->start; + + QTAILQ_FOREACH(node, &s->free, MLIST_ENTNAME) { + g_assert_cmpint(node->addr, >, addr); + g_assert_cmpint(node->addr, >=, next); + addr = node->addr; + next = node->addr + node->size; + } + + addr = s->start > 0 ? s->start - 1 : 0; + next = s->start; + QTAILQ_FOREACH(node, &s->used, MLIST_ENTNAME) { + g_assert_cmpint(node->addr, >, addr); + g_assert_cmpint(node->addr, >=, next); + addr = node->addr; + next = node->addr + node->size; + } +} + +static uint64_t mlist_alloc(QGuestAllocator *s, uint64_t size) +{ + MemBlock *node; + + node = mlist_find_space(&s->free, size); + if (!node) { + fprintf(stderr, "Out of guest memory.\n"); + g_assert_not_reached(); + } + return mlist_fulfill(s, node, size); +} + +static void mlist_free(QGuestAllocator *s, uint64_t addr) +{ + MemBlock *node; + + if (addr == 0) { + return; + } + + node = mlist_find_key(&s->used, addr); + if (!node) { + fprintf(stderr, "Error: no record found for an allocation at " + "0x%016" PRIx64 ".\n", + addr); + g_assert_not_reached(); + } + + /* Rip it out of the used list and re-insert back into the free list. */ + QTAILQ_REMOVE(&s->used, node, MLIST_ENTNAME); + mlist_sort_insert(&s->free, node); + mlist_coalesce(&s->free, node); +} + +MemBlock *mlist_new(uint64_t addr, uint64_t size) +{ + MemBlock *block; + + if (!size) { + return NULL; + } + block = g_malloc0(sizeof(MemBlock)); + + block->addr = addr; + block->size = size; + + return block; +} + +/* + * Mostly for valgrind happiness, but it does offer + * a chokepoint for debugging guest memory leaks, too. + */ +void alloc_uninit(QGuestAllocator *allocator) +{ + MemBlock *node; + MemBlock *tmp; + QAllocOpts mask; + + /* Check for guest leaks, and destroy the list. */ + QTAILQ_FOREACH_SAFE(node, &allocator->used, MLIST_ENTNAME, tmp) { + if (allocator->opts & (ALLOC_LEAK_WARN | ALLOC_LEAK_ASSERT)) { + fprintf(stderr, "guest malloc leak @ 0x%016" PRIx64 "; " + "size 0x%016" PRIx64 ".\n", + node->addr, node->size); + } + if (allocator->opts & (ALLOC_LEAK_ASSERT)) { + g_assert_not_reached(); + } + g_free(node); + } + + /* If we have previously asserted that there are no leaks, then there + * should be only one node here with a specific address and size. */ + mask = ALLOC_LEAK_ASSERT | ALLOC_PARANOID; + QTAILQ_FOREACH_SAFE(node, &allocator->free, MLIST_ENTNAME, tmp) { + if ((allocator->opts & mask) == mask) { + if ((node->addr != allocator->start) || + (node->size != allocator->end - allocator->start)) { + fprintf(stderr, "Free list is corrupted.\n"); + g_assert_not_reached(); + } + } + + g_free(node); + } + + g_free(allocator); +} + +uint64_t guest_alloc(QGuestAllocator *allocator, size_t size) +{ + uint64_t rsize = size; + uint64_t naddr; + + rsize += (allocator->page_size - 1); + rsize &= -allocator->page_size; + g_assert_cmpint((allocator->start + rsize), <=, allocator->end); + g_assert_cmpint(rsize, >=, size); + + naddr = mlist_alloc(allocator, rsize); + if (allocator->opts & ALLOC_PARANOID) { + mlist_check(allocator); + } + + return naddr; +} + +void guest_free(QGuestAllocator *allocator, uint64_t addr) +{ + mlist_free(allocator, addr); + if (allocator->opts & ALLOC_PARANOID) { + mlist_check(allocator); + } +} diff --git a/tests/libqos/malloc.h b/tests/libqos/malloc.h index 556538121e..465efeb8fb 100644 --- a/tests/libqos/malloc.h +++ b/tests/libqos/malloc.h @@ -15,24 +15,39 @@ #include <stdint.h> #include <sys/types.h> +#include "qemu/queue.h" -typedef struct QGuestAllocator QGuestAllocator; +#define MLIST_ENTNAME entries -struct QGuestAllocator -{ - uint64_t (*alloc)(QGuestAllocator *allocator, size_t size); - void (*free)(QGuestAllocator *allocator, uint64_t addr); -}; +typedef enum { + ALLOC_NO_FLAGS = 0x00, + ALLOC_LEAK_WARN = 0x01, + ALLOC_LEAK_ASSERT = 0x02, + ALLOC_PARANOID = 0x04 +} QAllocOpts; + +typedef QTAILQ_HEAD(MemList, MemBlock) MemList; +typedef struct MemBlock { + QTAILQ_ENTRY(MemBlock) MLIST_ENTNAME; + uint64_t size; + uint64_t addr; +} MemBlock; + +typedef struct QGuestAllocator { + QAllocOpts opts; + uint64_t start; + uint64_t end; + uint32_t page_size; + + MemList used; + MemList free; +} QGuestAllocator; + +MemBlock *mlist_new(uint64_t addr, uint64_t size); +void alloc_uninit(QGuestAllocator *allocator); /* Always returns page aligned values */ -static inline uint64_t guest_alloc(QGuestAllocator *allocator, size_t size) -{ - return allocator->alloc(allocator, size); -} - -static inline void guest_free(QGuestAllocator *allocator, uint64_t addr) -{ - allocator->free(allocator, addr); -} +uint64_t guest_alloc(QGuestAllocator *allocator, size_t size); +void guest_free(QGuestAllocator *allocator, uint64_t addr); #endif diff --git a/tests/qemu-iotests-quick.sh b/tests/qemu-iotests-quick.sh index 12af731c68..0e554bb972 100755 --- a/tests/qemu-iotests-quick.sh +++ b/tests/qemu-iotests-quick.sh @@ -3,6 +3,6 @@ cd tests/qemu-iotests ret=0 -./check -T -qcow2 -g quick || ret=1 +TEST_DIR=${TEST_DIR:-/tmp/qemu-iotests-quick-$$} ./check -T -qcow2 -g quick || ret=1 exit $ret diff --git a/tests/qemu-iotests/055 b/tests/qemu-iotests/055 index 0872444811..e81d4d0d83 100755 --- a/tests/qemu-iotests/055 +++ b/tests/qemu-iotests/055 @@ -1,8 +1,8 @@ #!/usr/bin/env python # -# Tests for drive-backup +# Tests for drive-backup and blockdev-backup # -# Copyright (C) 2013 Red Hat, Inc. +# Copyright (C) 2013, 2014 Red Hat, Inc. # # Based on 041. # @@ -27,6 +27,7 @@ from iotests import qemu_img, qemu_io test_img = os.path.join(iotests.test_dir, 'test.img') target_img = os.path.join(iotests.test_dir, 'target.img') +blockdev_target_img = os.path.join(iotests.test_dir, 'blockdev-target.img') class TestSingleDrive(iotests.QMPTestCase): image_len = 64 * 1024 * 1024 # MB @@ -38,34 +39,41 @@ class TestSingleDrive(iotests.QMPTestCase): qemu_io('-f', iotests.imgfmt, '-c', 'write -P0xd5 1M 32k', test_img) qemu_io('-f', iotests.imgfmt, '-c', 'write -P0xdc 32M 124k', test_img) qemu_io('-f', iotests.imgfmt, '-c', 'write -P0xdc 67043328 64k', test_img) + qemu_img('create', '-f', iotests.imgfmt, blockdev_target_img, str(TestSingleDrive.image_len)) - self.vm = iotests.VM().add_drive(test_img) + self.vm = iotests.VM().add_drive(test_img).add_drive(blockdev_target_img) self.vm.launch() def tearDown(self): self.vm.shutdown() os.remove(test_img) + os.remove(blockdev_target_img) try: os.remove(target_img) except OSError: pass - def test_cancel(self): + def do_test_cancel(self, cmd, target): self.assert_no_active_block_jobs() - result = self.vm.qmp('drive-backup', device='drive0', - target=target_img, sync='full') + result = self.vm.qmp(cmd, device='drive0', target=target, sync='full') self.assert_qmp(result, 'return', {}) event = self.cancel_and_wait() self.assert_qmp(event, 'data/type', 'backup') - def test_pause(self): + def test_cancel_drive_backup(self): + self.do_test_cancel('drive-backup', target_img) + + def test_cancel_blockdev_backup(self): + self.do_test_cancel('blockdev-backup', 'drive1') + + def do_test_pause(self, cmd, target, image): self.assert_no_active_block_jobs() self.vm.pause_drive('drive0') - result = self.vm.qmp('drive-backup', device='drive0', - target=target_img, sync='full') + result = self.vm.qmp(cmd, device='drive0', + target=target, sync='full') self.assert_qmp(result, 'return', {}) result = self.vm.qmp('block-job-pause', device='drive0') @@ -86,14 +94,25 @@ class TestSingleDrive(iotests.QMPTestCase): self.wait_until_completed() self.vm.shutdown() - self.assertTrue(iotests.compare_images(test_img, target_img), + self.assertTrue(iotests.compare_images(test_img, image), 'target image does not match source after backup') + def test_pause_drive_backup(self): + self.do_test_pause('drive-backup', target_img, target_img) + + def test_pause_blockdev_backup(self): + self.do_test_pause('blockdev-backup', 'drive1', blockdev_target_img) + def test_medium_not_found(self): result = self.vm.qmp('drive-backup', device='ide1-cd0', target=target_img, sync='full') self.assert_qmp(result, 'error/class', 'GenericError') + def test_medium_not_found_blockdev_backup(self): + result = self.vm.qmp('blockdev-backup', device='ide1-cd0', + target='drive1', sync='full') + self.assert_qmp(result, 'error/class', 'GenericError') + def test_image_not_found(self): result = self.vm.qmp('drive-backup', device='drive0', target=target_img, sync='full', mode='existing') @@ -105,31 +124,53 @@ class TestSingleDrive(iotests.QMPTestCase): format='spaghetti-noodles') self.assert_qmp(result, 'error/class', 'GenericError') - def test_device_not_found(self): - result = self.vm.qmp('drive-backup', device='nonexistent', - target=target_img, sync='full') + def do_test_device_not_found(self, cmd, **args): + result = self.vm.qmp(cmd, **args) self.assert_qmp(result, 'error/class', 'DeviceNotFound') + def test_device_not_found(self): + self.do_test_device_not_found('drive-backup', device='nonexistent', + target=target_img, sync='full') + + self.do_test_device_not_found('blockdev-backup', device='nonexistent', + target='drive0', sync='full') + + self.do_test_device_not_found('blockdev-backup', device='drive0', + target='nonexistent', sync='full') + + self.do_test_device_not_found('blockdev-backup', device='nonexistent', + target='nonexistent', sync='full') + + def test_target_is_source(self): + result = self.vm.qmp('blockdev-backup', device='drive0', + target='drive0', sync='full') + self.assert_qmp(result, 'error/class', 'GenericError') + class TestSetSpeed(iotests.QMPTestCase): image_len = 80 * 1024 * 1024 # MB def setUp(self): qemu_img('create', '-f', iotests.imgfmt, test_img, str(TestSetSpeed.image_len)) qemu_io('-f', iotests.imgfmt, '-c', 'write -P1 0 512', test_img) - self.vm = iotests.VM().add_drive(test_img) + qemu_img('create', '-f', iotests.imgfmt, blockdev_target_img, str(TestSingleDrive.image_len)) + + self.vm = iotests.VM().add_drive(test_img).add_drive(blockdev_target_img) self.vm.launch() def tearDown(self): self.vm.shutdown() os.remove(test_img) - os.remove(target_img) + os.remove(blockdev_target_img) + try: + os.remove(target_img) + except OSError: + pass - def test_set_speed(self): + def do_test_set_speed(self, cmd, target): self.assert_no_active_block_jobs() self.vm.pause_drive('drive0') - result = self.vm.qmp('drive-backup', device='drive0', - target=target_img, sync='full') + result = self.vm.qmp(cmd, device='drive0', target=target, sync='full') self.assert_qmp(result, 'return', {}) # Default speed is 0 @@ -148,10 +189,10 @@ class TestSetSpeed(iotests.QMPTestCase): event = self.cancel_and_wait(resume=True) self.assert_qmp(event, 'data/type', 'backup') - # Check setting speed in drive-backup works + # Check setting speed option works self.vm.pause_drive('drive0') - result = self.vm.qmp('drive-backup', device='drive0', - target=target_img, sync='full', speed=4*1024*1024) + result = self.vm.qmp(cmd, device='drive0', + target=target, sync='full', speed=4*1024*1024) self.assert_qmp(result, 'return', {}) result = self.vm.qmp('query-block-jobs') @@ -161,18 +202,24 @@ class TestSetSpeed(iotests.QMPTestCase): event = self.cancel_and_wait(resume=True) self.assert_qmp(event, 'data/type', 'backup') - def test_set_speed_invalid(self): + def test_set_speed_drive_backup(self): + self.do_test_set_speed('drive-backup', target_img) + + def test_set_speed_blockdev_backup(self): + self.do_test_set_speed('blockdev-backup', 'drive1') + + def do_test_set_speed_invalid(self, cmd, target): self.assert_no_active_block_jobs() - result = self.vm.qmp('drive-backup', device='drive0', - target=target_img, sync='full', speed=-1) + result = self.vm.qmp(cmd, device='drive0', + target=target, sync='full', speed=-1) self.assert_qmp(result, 'error/class', 'GenericError') self.assert_no_active_block_jobs() self.vm.pause_drive('drive0') - result = self.vm.qmp('drive-backup', device='drive0', - target=target_img, sync='full') + result = self.vm.qmp(cmd, device='drive0', + target=target, sync='full') self.assert_qmp(result, 'return', {}) result = self.vm.qmp('block-job-set-speed', device='drive0', speed=-1) @@ -181,6 +228,12 @@ class TestSetSpeed(iotests.QMPTestCase): event = self.cancel_and_wait(resume=True) self.assert_qmp(event, 'data/type', 'backup') + def test_set_speed_invalid_drive_backup(self): + self.do_test_set_speed_invalid('drive-backup', target_img) + + def test_set_speed_invalid_blockdev_backup(self): + self.do_test_set_speed_invalid('blockdev-backup', 'drive1') + class TestSingleTransaction(iotests.QMPTestCase): image_len = 64 * 1024 * 1024 # MB @@ -190,41 +243,50 @@ class TestSingleTransaction(iotests.QMPTestCase): qemu_io('-f', iotests.imgfmt, '-c', 'write -P0xd5 1M 32k', test_img) qemu_io('-f', iotests.imgfmt, '-c', 'write -P0xdc 32M 124k', test_img) qemu_io('-f', iotests.imgfmt, '-c', 'write -P0xdc 67043328 64k', test_img) + qemu_img('create', '-f', iotests.imgfmt, blockdev_target_img, str(TestSingleDrive.image_len)) - self.vm = iotests.VM().add_drive(test_img) + self.vm = iotests.VM().add_drive(test_img).add_drive(blockdev_target_img) self.vm.launch() def tearDown(self): self.vm.shutdown() os.remove(test_img) + os.remove(blockdev_target_img) try: os.remove(target_img) except OSError: pass - def test_cancel(self): + def do_test_cancel(self, cmd, target): self.assert_no_active_block_jobs() result = self.vm.qmp('transaction', actions=[{ - 'type': 'drive-backup', + 'type': cmd, 'data': { 'device': 'drive0', - 'target': target_img, + 'target': target, 'sync': 'full' }, } ]) + self.assert_qmp(result, 'return', {}) event = self.cancel_and_wait() self.assert_qmp(event, 'data/type', 'backup') - def test_pause(self): + def test_cancel_drive_backup(self): + self.do_test_cancel('drive-backup', target_img) + + def test_cancel_blockdev_backup(self): + self.do_test_cancel('blockdev-backup', 'drive1') + + def do_test_pause(self, cmd, target, image): self.assert_no_active_block_jobs() self.vm.pause_drive('drive0') result = self.vm.qmp('transaction', actions=[{ - 'type': 'drive-backup', + 'type': cmd, 'data': { 'device': 'drive0', - 'target': target_img, + 'target': target, 'sync': 'full' }, } ]) @@ -248,19 +310,31 @@ class TestSingleTransaction(iotests.QMPTestCase): self.wait_until_completed() self.vm.shutdown() - self.assertTrue(iotests.compare_images(test_img, target_img), + self.assertTrue(iotests.compare_images(test_img, image), 'target image does not match source after backup') - def test_medium_not_found(self): + def test_pause_drive_backup(self): + self.do_test_pause('drive-backup', target_img, target_img) + + def test_pause_blockdev_backup(self): + self.do_test_pause('blockdev-backup', 'drive1', blockdev_target_img) + + def do_test_medium_not_found(self, cmd, target): result = self.vm.qmp('transaction', actions=[{ - 'type': 'drive-backup', + 'type': cmd, 'data': { 'device': 'ide1-cd0', - 'target': target_img, + 'target': target, 'sync': 'full' }, } ]) self.assert_qmp(result, 'error/class', 'GenericError') + def test_medium_not_found_drive_backup(self): + self.do_test_medium_not_found('drive-backup', target_img) + + def test_medium_not_found_blockdev_backup(self): + self.do_test_medium_not_found('blockdev-backup', 'drive1') + def test_image_not_found(self): result = self.vm.qmp('transaction', actions=[{ 'type': 'drive-backup', @@ -283,6 +357,43 @@ class TestSingleTransaction(iotests.QMPTestCase): ]) self.assert_qmp(result, 'error/class', 'DeviceNotFound') + result = self.vm.qmp('transaction', actions=[{ + 'type': 'blockdev-backup', + 'data': { 'device': 'nonexistent', + 'target': 'drive1', + 'sync': 'full' }, + } + ]) + self.assert_qmp(result, 'error/class', 'DeviceNotFound') + + result = self.vm.qmp('transaction', actions=[{ + 'type': 'blockdev-backup', + 'data': { 'device': 'drive0', + 'target': 'nonexistent', + 'sync': 'full' }, + } + ]) + self.assert_qmp(result, 'error/class', 'DeviceNotFound') + + result = self.vm.qmp('transaction', actions=[{ + 'type': 'blockdev-backup', + 'data': { 'device': 'nonexistent', + 'target': 'nonexistent', + 'sync': 'full' }, + } + ]) + self.assert_qmp(result, 'error/class', 'DeviceNotFound') + + def test_target_is_source(self): + result = self.vm.qmp('transaction', actions=[{ + 'type': 'blockdev-backup', + 'data': { 'device': 'drive0', + 'target': 'drive0', + 'sync': 'full' }, + } + ]) + self.assert_qmp(result, 'error/class', 'GenericError') + def test_abort(self): result = self.vm.qmp('transaction', actions=[{ 'type': 'drive-backup', @@ -298,5 +409,31 @@ class TestSingleTransaction(iotests.QMPTestCase): self.assert_qmp(result, 'error/class', 'GenericError') self.assert_no_active_block_jobs() + result = self.vm.qmp('transaction', actions=[{ + 'type': 'blockdev-backup', + 'data': { 'device': 'nonexistent', + 'target': 'drive1', + 'sync': 'full' }, + }, { + 'type': 'Abort', + 'data': {}, + } + ]) + self.assert_qmp(result, 'error/class', 'GenericError') + self.assert_no_active_block_jobs() + + result = self.vm.qmp('transaction', actions=[{ + 'type': 'blockdev-backup', + 'data': { 'device': 'drive0', + 'target': 'nonexistent', + 'sync': 'full' }, + }, { + 'type': 'Abort', + 'data': {}, + } + ]) + self.assert_qmp(result, 'error/class', 'GenericError') + self.assert_no_active_block_jobs() + if __name__ == '__main__': iotests.main(supported_fmts=['raw', 'qcow2']) diff --git a/tests/qemu-iotests/055.out b/tests/qemu-iotests/055.out index 6323079e08..42314e9c00 100644 --- a/tests/qemu-iotests/055.out +++ b/tests/qemu-iotests/055.out @@ -1,5 +1,5 @@ -.............. +........................ ---------------------------------------------------------------------- -Ran 14 tests +Ran 24 tests OK diff --git a/tests/qemu-iotests/058 b/tests/qemu-iotests/058 index 2d5ca85ddc..a60b34b46c 100755 --- a/tests/qemu-iotests/058 +++ b/tests/qemu-iotests/058 @@ -87,6 +87,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 _supported_fmt qcow2 _supported_proto file +_supported_os Linux _require_command QEMU_NBD # Use -f raw instead of -f $IMGFMT for the NBD connection diff --git a/tests/qemu-iotests/067 b/tests/qemu-iotests/067 index 29cd6b5aff..0508c696a8 100755 --- a/tests/qemu-iotests/067 +++ b/tests/qemu-iotests/067 @@ -45,7 +45,8 @@ function do_run_qemu() function run_qemu() { - do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_qmp | sed -e 's/\("actual-size":\s*\)[0-9]\+/\1SIZE/g' + do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_qmp | _filter_qemu \ + | sed -e 's/\("actual-size":\s*\)[0-9]\+/\1SIZE/g' } size=128M diff --git a/tests/qemu-iotests/071 b/tests/qemu-iotests/071 index 5d61ef6d81..9eaa49b419 100755 --- a/tests/qemu-iotests/071 +++ b/tests/qemu-iotests/071 @@ -51,7 +51,7 @@ function do_run_qemu() function run_qemu() { - do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_qmp | _filter_qemu_io + do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_qemu | _filter_qmp | _filter_qemu_io } IMG_SIZE=64M diff --git a/tests/qemu-iotests/071.out b/tests/qemu-iotests/071.out index 46484ff69c..9205ce2512 100644 --- a/tests/qemu-iotests/071.out +++ b/tests/qemu-iotests/071.out @@ -52,8 +52,8 @@ read failed: Input/output error {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN"} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "DEVICE_TRAY_MOVED", "data": {"device": "ide1-cd0", "tray-open": true}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "DEVICE_TRAY_MOVED", "data": {"device": "floppy0", "tray-open": true}} -qemu-system-x86_64: Failed to flush the L2 table cache: Input/output error -qemu-system-x86_64: Failed to flush the refcount block cache: Input/output error +QEMU_PROG: Failed to flush the L2 table cache: Input/output error +QEMU_PROG: Failed to flush the refcount block cache: Input/output error === Testing blkverify on existing block device === @@ -92,7 +92,7 @@ read failed: Input/output error {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN"} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "DEVICE_TRAY_MOVED", "data": {"device": "ide1-cd0", "tray-open": true}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "DEVICE_TRAY_MOVED", "data": {"device": "floppy0", "tray-open": true}} -qemu-system-x86_64: Failed to flush the L2 table cache: Input/output error -qemu-system-x86_64: Failed to flush the refcount block cache: Input/output error +QEMU_PROG: Failed to flush the L2 table cache: Input/output error +QEMU_PROG: Failed to flush the refcount block cache: Input/output error *** done diff --git a/tests/qemu-iotests/081 b/tests/qemu-iotests/081 index 9ab93ff89e..d9b042cfc7 100755 --- a/tests/qemu-iotests/081 +++ b/tests/qemu-iotests/081 @@ -53,7 +53,7 @@ function do_run_qemu() function run_qemu() { - do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_qmp | _filter_qemu_io + do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_qemu | _filter_qmp | _filter_qemu_io } test_quorum=$($QEMU_IMG --help|grep quorum) diff --git a/tests/qemu-iotests/087 b/tests/qemu-iotests/087 index d7454d13da..8694749947 100755 --- a/tests/qemu-iotests/087 +++ b/tests/qemu-iotests/087 @@ -45,7 +45,8 @@ function do_run_qemu() function run_qemu() { - do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_qmp | sed -e 's/\("actual-size":\s*\)[0-9]\+/\1SIZE/g' + do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_qmp | _filter_qemu \ + | sed -e 's/\("actual-size":\s*\)[0-9]\+/\1SIZE/g' } size=128M diff --git a/tests/qemu-iotests/087.out b/tests/qemu-iotests/087.out index 7d38cc26c5..91f4ea1a8b 100644 --- a/tests/qemu-iotests/087.out +++ b/tests/qemu-iotests/087.out @@ -21,7 +21,6 @@ QMP_VERSION {"return": {}} {"error": {"class": "GenericError", "desc": "Device with id 'disk' already exists"}} {"error": {"class": "GenericError", "desc": "Device name 'test-node' conflicts with an existing node name"}} -main-loop: WARNING: I/O thread spun for 1000 iterations {"error": {"class": "GenericError", "desc": "could not open disk image disk2: node-name=disk is conflicting with a device id"}} {"error": {"class": "GenericError", "desc": "could not open disk image disk2: Duplicate node name"}} {"error": {"class": "GenericError", "desc": "could not open disk image disk3: node-name=disk3 is conflicting with a device id"}} diff --git a/tests/qemu-iotests/099 b/tests/qemu-iotests/099 index 948afff28b..80f3d9aaf3 100755 --- a/tests/qemu-iotests/099 +++ b/tests/qemu-iotests/099 @@ -57,7 +57,7 @@ function run_qemu() # Get the "file": "foo" entry ($foo may only contain escaped double quotes, # which is how we can extract it) do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_imgfmt | _filter_qmp \ - | grep "drv0" \ + | _filter_qemu | grep "drv0" \ | sed -e 's/^.*"file": "\(\(\\"\|[^"]\)*\)".*$/\1/' -e 's/\\"/"/g' } diff --git a/tests/qemu-iotests/110 b/tests/qemu-iotests/110 new file mode 100755 index 0000000000..a687f9567d --- /dev/null +++ b/tests/qemu-iotests/110 @@ -0,0 +1,94 @@ +#!/bin/bash +# +# Test case for relative backing file names in complex BDS trees +# +# Copyright (C) 2014 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +# creator +owner=mreitz@redhat.com + +seq="$(basename $0)" +echo "QA output created by $seq" + +here="$PWD" +tmp=/tmp/$$ +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter + +# Any format supporting backing files +_supported_fmt qed qcow qcow2 vmdk +_supported_proto file +_supported_os Linux +_unsupported_imgopts "subformat=monolithicFlat" "subformat=twoGbMaxExtentFlat" + +TEST_IMG_REL=$(basename "$TEST_IMG") + +echo +echo '=== Reconstructable filename ===' +echo + +TEST_IMG="$TEST_IMG.base" _make_test_img 64M +_make_test_img -b "$TEST_IMG_REL.base" 64M +# qemu should be able to reconstruct the filename, so relative backing names +# should work +TEST_IMG="json:{'driver':'$IMGFMT','file':{'driver':'file','filename':'$TEST_IMG'}}" \ + _img_info | _filter_img_info + +echo +echo '=== Non-reconstructable filename ===' +echo + +# Across blkdebug without a config file, you cannot reconstruct filenames, so +# qemu is incapable of knowing the directory of the top image +TEST_IMG="json:{ + 'driver': '$IMGFMT', + 'file': { + 'driver': 'blkdebug', + 'image': { + 'driver': 'file', + 'filename': '$TEST_IMG' + }, + 'set-state': [ + { + 'event': 'read_aio', + 'new_state': 42 + } + ] + } +}" _img_info | _filter_img_info + +echo +echo '=== Backing name is always relative to the backed image ===' +echo + +# omit the image size; it should work anyway +_make_test_img -b "$TEST_IMG_REL.base" + + +# success, all done +echo '*** done' +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/110.out b/tests/qemu-iotests/110.out new file mode 100644 index 0000000000..152bacf41e --- /dev/null +++ b/tests/qemu-iotests/110.out @@ -0,0 +1,19 @@ +QA output created by 110 + +=== Reconstructable filename === + +Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864 +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 backing_file='t.IMGFMT.base' +image: TEST_DIR/t.IMGFMT +file format: IMGFMT +virtual size: 64M (67108864 bytes) +backing file: t.IMGFMT.base (actual path: TEST_DIR/t.IMGFMT.base) + +=== Non-reconstructable filename === + +qemu-img: Cannot use relative backing file names for 'json:{"driver": "IMGFMT", "file": {"set-state.0.event": "read_aio", "image": {"driver": "file", "filename": "TEST_DIR/t.IMGFMT"}, "driver": "blkdebug", "set-state.0.new_state": 42}}' + +=== Backing name is always relative to the backed image === + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 backing_file='t.IMGFMT.base' +*** done diff --git a/tests/qemu-iotests/check b/tests/qemu-iotests/check index 8ca40116d7..baeae80f96 100755 --- a/tests/qemu-iotests/check +++ b/tests/qemu-iotests/check @@ -238,6 +238,7 @@ QEMU_NBD -- $QEMU_NBD IMGFMT -- $FULL_IMGFMT_DETAILS IMGPROTO -- $FULL_IMGPROTO_DETAILS PLATFORM -- $FULL_HOST_DETAILS +TEST_DIR -- $TEST_DIR SOCKET_SCM_HELPER -- $SOCKET_SCM_HELPER EOF diff --git a/tests/qemu-iotests/common.config b/tests/qemu-iotests/common.config index 91a5ef696b..a1973ad9d0 100644 --- a/tests/qemu-iotests/common.config +++ b/tests/qemu-iotests/common.config @@ -155,4 +155,4 @@ _readlink() } # make sure this script returns success -/bin/true +true diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter index 6c14590594..b73c70be95 100644 --- a/tests/qemu-iotests/common.filter +++ b/tests/qemu-iotests/common.filter @@ -159,6 +159,7 @@ _filter_qemu() { sed -e "s#\\(^\\|(qemu) \\)$(basename $QEMU_PROG):#\1QEMU_PROG:#" \ -e 's#^QEMU [0-9]\+\.[0-9]\+\.[0-9]\+ monitor#QEMU X.Y.Z monitor#' \ + -e '/main-loop: WARNING: I\/O thread spun for [0-9]\+ iterations/d' \ -e $'s#\r##' # QEMU monitor uses \r\n line endings } @@ -223,4 +224,4 @@ _filter_qemu_img_map() } # make sure this script returns success -/bin/true +true diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc index 3b14053790..aa093d9d84 100644 --- a/tests/qemu-iotests/common.rc +++ b/tests/qemu-iotests/common.rc @@ -490,4 +490,4 @@ _die() } # make sure this script returns success -/bin/true +true diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index a4742c6d01..f8bf354156 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -97,7 +97,7 @@ 088 rw auto quick 089 rw auto quick 090 rw auto quick -091 rw auto quick +091 rw auto 092 rw auto quick 095 rw auto quick 097 rw auto backing @@ -112,6 +112,7 @@ 107 rw auto quick 108 rw auto quick 109 rw auto +110 rw auto backing quick 111 rw auto quick 113 rw auto quick 114 rw auto quick diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py index f57f1548ac..87002e0e2c 100644 --- a/tests/qemu-iotests/iotests.py +++ b/tests/qemu-iotests/iotests.py @@ -282,12 +282,15 @@ def notrun(reason): print '%s not run: %s' % (seq, reason) sys.exit(0) -def main(supported_fmts=[]): +def main(supported_fmts=[], supported_oses=['linux']): '''Run tests''' if supported_fmts and (imgfmt not in supported_fmts): notrun('not suitable for this image format: %s' % imgfmt) + if sys.platform not in supported_oses: + notrun('not suitable for this OS: %s' % sys.platform) + # We need to filter out the time taken from the output so that qemu-iotest # can reliably diff the results against master output. import StringIO diff --git a/tests/test-coroutine.c b/tests/test-coroutine.c index e22fae170a..27d1b6f8e8 100644 --- a/tests/test-coroutine.c +++ b/tests/test-coroutine.c @@ -337,7 +337,7 @@ static void perf_cost(void) "%luns per coroutine", maxcycles, duration, ops, - (unsigned long)(1000000000 * duration) / maxcycles); + (unsigned long)(1000000000.0 * duration / maxcycles)); } int main(int argc, char **argv) diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c index d05a6497e1..41cb23df0c 100644 --- a/util/qemu-thread-posix.c +++ b/util/qemu-thread-posix.c @@ -26,6 +26,7 @@ #endif #include "qemu/thread.h" #include "qemu/atomic.h" +#include "qemu/notify.h" static bool name_threads; @@ -401,6 +402,42 @@ void qemu_event_wait(QemuEvent *ev) } } +static pthread_key_t exit_key; + +union NotifierThreadData { + void *ptr; + NotifierList list; +}; +QEMU_BUILD_BUG_ON(sizeof(union NotifierThreadData) != sizeof(void *)); + +void qemu_thread_atexit_add(Notifier *notifier) +{ + union NotifierThreadData ntd; + ntd.ptr = pthread_getspecific(exit_key); + notifier_list_add(&ntd.list, notifier); + pthread_setspecific(exit_key, ntd.ptr); +} + +void qemu_thread_atexit_remove(Notifier *notifier) +{ + union NotifierThreadData ntd; + ntd.ptr = pthread_getspecific(exit_key); + notifier_remove(notifier); + pthread_setspecific(exit_key, ntd.ptr); +} + +static void qemu_thread_atexit_run(void *arg) +{ + union NotifierThreadData ntd = { .ptr = arg }; + notifier_list_notify(&ntd.list, NULL); +} + +static void __attribute__((constructor)) qemu_thread_atexit_init(void) +{ + pthread_key_create(&exit_key, qemu_thread_atexit_run); +} + + /* Attempt to set the threads name; note that this is for debug, so * we're not going to fail if we can't set it. */ diff --git a/util/qemu-thread-win32.c b/util/qemu-thread-win32.c index c405c9bef6..406b52f91d 100644 --- a/util/qemu-thread-win32.c +++ b/util/qemu-thread-win32.c @@ -12,6 +12,7 @@ */ #include "qemu-common.h" #include "qemu/thread.h" +#include "qemu/notify.h" #include <process.h> #include <assert.h> #include <limits.h> @@ -268,6 +269,7 @@ struct QemuThreadData { void *(*start_routine)(void *); void *arg; short mode; + NotifierList exit; /* Only used for joinable threads. */ bool exited; @@ -275,18 +277,40 @@ struct QemuThreadData { CRITICAL_SECTION cs; }; +static bool atexit_registered; +static NotifierList main_thread_exit; + static __thread QemuThreadData *qemu_thread_data; +static void run_main_thread_exit(void) +{ + notifier_list_notify(&main_thread_exit, NULL); +} + +void qemu_thread_atexit_add(Notifier *notifier) +{ + if (!qemu_thread_data) { + if (!atexit_registered) { + atexit_registered = true; + atexit(run_main_thread_exit); + } + notifier_list_add(&main_thread_exit, notifier); + } else { + notifier_list_add(&qemu_thread_data->exit, notifier); + } +} + +void qemu_thread_atexit_remove(Notifier *notifier) +{ + notifier_remove(notifier); +} + static unsigned __stdcall win32_start_routine(void *arg) { QemuThreadData *data = (QemuThreadData *) arg; void *(*start_routine)(void *) = data->start_routine; void *thread_arg = data->arg; - if (data->mode == QEMU_THREAD_DETACHED) { - g_free(data); - data = NULL; - } qemu_thread_data = data; qemu_thread_exit(start_routine(thread_arg)); abort(); @@ -296,12 +320,14 @@ void qemu_thread_exit(void *arg) { QemuThreadData *data = qemu_thread_data; - if (data) { - assert(data->mode != QEMU_THREAD_DETACHED); + notifier_list_notify(&data->exit, NULL); + if (data->mode == QEMU_THREAD_JOINABLE) { data->ret = arg; EnterCriticalSection(&data->cs); data->exited = true; LeaveCriticalSection(&data->cs); + } else { + g_free(data); } _endthreadex(0); } @@ -313,9 +339,10 @@ void *qemu_thread_join(QemuThread *thread) HANDLE handle; data = thread->data; - if (!data) { + if (data->mode == QEMU_THREAD_DETACHED) { return NULL; } + /* * Because multiple copies of the QemuThread can exist via * qemu_thread_get_self, we need to store a value that cannot @@ -329,7 +356,6 @@ void *qemu_thread_join(QemuThread *thread) CloseHandle(handle); } ret = data->ret; - assert(data->mode != QEMU_THREAD_DETACHED); DeleteCriticalSection(&data->cs); g_free(data); return ret; @@ -347,6 +373,7 @@ void qemu_thread_create(QemuThread *thread, const char *name, data->arg = arg; data->mode = mode; data->exited = false; + notifier_list_init(&data->exit); if (data->mode != QEMU_THREAD_DETACHED) { InitializeCriticalSection(&data->cs); @@ -358,7 +385,7 @@ void qemu_thread_create(QemuThread *thread, const char *name, error_exit(GetLastError(), __func__); } CloseHandle(hThread); - thread->data = (mode == QEMU_THREAD_DETACHED) ? NULL : data; + thread->data = data; } void qemu_thread_get_self(QemuThread *thread) @@ -373,11 +400,10 @@ HANDLE qemu_thread_get_handle(QemuThread *thread) HANDLE handle; data = thread->data; - if (!data) { + if (data->mode == QEMU_THREAD_DETACHED) { return NULL; } - assert(data->mode != QEMU_THREAD_DETACHED); EnterCriticalSection(&data->cs); if (!data->exited) { handle = OpenThread(SYNCHRONIZE | THREAD_SUSPEND_RESUME, FALSE, @@ -90,6 +90,12 @@ static inline ioreq_t *xen_vcpu_ioreq(shared_iopage_t *shared_page, int vcpu) #endif #define BUFFER_IO_MAX_DELAY 100 +/* Leave some slack so that hvmloader does not complain about lack of + * memory at boot time ("Could not allocate order=0 extent"). + * Once hvmloader is modified to cope with that situation without + * printing warning messages, QEMU_SPARE_PAGES can be removed. + */ +#define QEMU_SPARE_PAGES 16 typedef struct XenPhysmap { hwaddr start_addr; @@ -244,6 +250,8 @@ void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size, MemoryRegion *mr) unsigned long nr_pfn; xen_pfn_t *pfn_list; int i; + xc_domaininfo_t info; + unsigned long free_pages; if (runstate_check(RUN_STATE_INMIGRATE)) { /* RAM already populated in Xen */ @@ -266,6 +274,22 @@ void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size, MemoryRegion *mr) pfn_list[i] = (ram_addr >> TARGET_PAGE_BITS) + i; } + if ((xc_domain_getinfolist(xen_xc, xen_domid, 1, &info) != 1) || + (info.domain != xen_domid)) { + hw_error("xc_domain_getinfolist failed"); + } + free_pages = info.max_pages - info.tot_pages; + if (free_pages > QEMU_SPARE_PAGES) { + free_pages -= QEMU_SPARE_PAGES; + } else { + free_pages = 0; + } + if ((free_pages < nr_pfn) && + (xc_domain_setmaxmem(xen_xc, xen_domid, + ((info.max_pages + nr_pfn - free_pages) + << (XC_PAGE_SHIFT - 10))) < 0)) { + hw_error("xc_domain_setmaxmem failed"); + } if (xc_domain_populate_physmap_exact(xen_xc, xen_domid, nr_pfn, 0, 0, pfn_list)) { hw_error("xen: failed to populate ram at " RAM_ADDR_FMT, ram_addr); } |