135 files changed, 4436 insertions, 1106 deletions
diff --git a/.gitignore b/.gitignore
index e32a58417a..090f974cb9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -109,3 +109,4 @@ cscope.*
 tags
 TAGS
 *~
+/tests/qemu-iotests/common.env
diff --git a/MAINTAINERS b/MAINTAINERS
index 7fc3cdb1d6..430688dcab 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -538,6 +538,7 @@ S390 Virtio
 M: Alexander Graf <agraf@suse.de>
 S: Maintained
 F: hw/s390x/s390-*.c
+X: hw/s390x/*pci*.[hc]
 
 S390 Virtio-ccw
 M: Cornelia Huck <cornelia.huck@de.ibm.com>
@@ -548,6 +549,7 @@ F: hw/s390x/s390-virtio-ccw.c
 F: hw/s390x/css.[hc]
 F: hw/s390x/sclp*.[hc]
 F: hw/s390x/ipl*.[hc]
+F: hw/s390x/*pci*.[hc]
 F: include/hw/s390x/
 F: pc-bios/s390-ccw/
 T: git git://github.com/cohuck/qemu virtio-ccw-upstr
@@ -755,6 +757,7 @@ F: aio-*.c
 F: block*
 F: block/
 F: hw/block/
+F: migration/block*
 F: qemu-img*
 F: qemu-io*
 F: tests/image-fuzzer/
@@ -1103,7 +1106,6 @@ S: Supported
 F: block/ssh.c
 
 ARCHIPELAGO
-M: Chrysostomos Nanakos <cnanakos@grnet.gr>
 M: Chrysostomos Nanakos <chris@include.gr>
 S: Maintained
 F: block/archipelago.c
diff --git a/arch_init.c b/arch_init.c
index 7680d28be4..cfedbf08af 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -522,7 +522,7 @@ static void migration_bitmap_sync(void)
     address_space_sync_dirty_bitmap(&address_space_memory);
 
     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
-        migration_bitmap_sync_range(block->mr->ram_addr, block->length);
+        migration_bitmap_sync_range(block->mr->ram_addr, block->used_length);
     }
     trace_migration_bitmap_sync_end(migration_dirty_pages
                                     - num_dirty_pages_init);
@@ -668,7 +668,7 @@ static int ram_find_and_save_block(QEMUFile *f, bool last_stage)
             offset >= last_offset) {
             break;
         }
-        if (offset >= block->length) {
+        if (offset >= block->used_length) {
             offset = 0;
             block = QTAILQ_NEXT(block, next);
             if (!block) {
@@ -727,7 +727,7 @@ uint64_t ram_bytes_total(void)
     uint64_t total = 0;
 
     QTAILQ_FOREACH(block, &ram_list.blocks, next)
-        total += block->length;
+        total += block->used_length;
 
     return total;
 }
@@ -831,7 +831,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
         uint64_t block_pages;
 
-        block_pages = block->length >> TARGET_PAGE_BITS;
+        block_pages = block->used_length >> TARGET_PAGE_BITS;
         migration_dirty_pages += block_pages;
     }
 
@@ -844,7 +844,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
         qemu_put_byte(f, strlen(block->idstr));
         qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
-        qemu_put_be64(f, block->length);
+        qemu_put_be64(f, block->used_length);
     }
 
     qemu_mutex_unlock_ramlist();
@@ -1015,7 +1015,7 @@ static inline void *host_from_stream_offset(QEMUFile *f,
     uint8_t len;
 
     if (flags & RAM_SAVE_FLAG_CONTINUE) {
-        if (!block || block->length <= offset) {
+        if (!block || block->max_length <= offset) {
             error_report("Ack, bad migration stream!");
             return NULL;
         }
@@ -1028,7 +1028,8 @@ static inline void *host_from_stream_offset(QEMUFile *f,
     id[len] = 0;
 
     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
-        if (!strncmp(id, block->idstr, sizeof(id)) && block->length > offset) {
+        if (!strncmp(id, block->idstr, sizeof(id)) &&
+            block->max_length > offset) {
             return memory_region_get_ram_ptr(block->mr) + offset;
         }
     }
@@ -1085,11 +1086,14 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
 
                 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
                     if (!strncmp(id, block->idstr, sizeof(id))) {
-                        if (block->length != length) {
-                            error_report("Length mismatch: %s: 0x" RAM_ADDR_FMT
-                                         " in != 0x" RAM_ADDR_FMT, id, length,
-                                         block->length);
-                            ret =  -EINVAL;
+                        if (length != block->used_length) {
+                            Error *local_err = NULL;
+
+                            ret = qemu_ram_resize(block->offset, length, &local_err);
+                            if (local_err) {
+                                error_report("%s", error_get_pretty(local_err));
+                                error_free(local_err);
+                            }
                         }
                         break;
                     }
diff --git a/async.c b/async.c
index 3939b795e5..2be88cc9e9 100644
--- a/async.c
+++ b/async.c
@@ -44,10 +44,12 @@ struct QEMUBH {
 QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
 {
     QEMUBH *bh;
-    bh = g_new0(QEMUBH, 1);
-    bh->ctx = ctx;
-    bh->cb = cb;
-    bh->opaque = opaque;
+    bh = g_new(QEMUBH, 1);
+    *bh = (QEMUBH){
+        .ctx = ctx,
+        .cb = cb,
+        .opaque = opaque,
+    };
     qemu_mutex_lock(&ctx->bh_lock);
     bh->next = ctx->first_bh;
     /* Make sure that the members are ready before putting bh into list */
@@ -300,6 +302,7 @@ AioContext *aio_context_new(Error **errp)
         error_setg_errno(errp, -ret, "Failed to initialize event notifier");
         return NULL;
     }
+    g_source_set_can_recurse(&ctx->source, true);
     aio_set_event_notifier(ctx, &ctx->notifier,
                            (EventNotifierHandler *)
                            event_notifier_test_and_clear);
diff --git a/block.c b/block.c
index 4165d4265c..cbe4a32a5a 100644
--- a/block.c
+++ b/block.c
@@ -97,6 +97,10 @@ static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
     QLIST_HEAD_INITIALIZER(bdrv_drivers);
 
+static void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
+                           int nr_sectors);
+static void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
+                             int nr_sectors);
 /* If non-zero, use only whitelisted block drivers */
 static int use_bdrv_whitelist;
 
@@ -303,15 +307,32 @@ void path_combine(char *dest, int dest_size,
     }
 }
 
-void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz)
+void bdrv_get_full_backing_filename_from_filename(const char *backed,
+                                                  const char *backing,
+                                                  char *dest, size_t sz,
+                                                  Error **errp)
 {
-    if (bs->backing_file[0] == '\0' || path_has_protocol(bs->backing_file)) {
-        pstrcpy(dest, sz, bs->backing_file);
+    if (backing[0] == '\0' || path_has_protocol(backing) ||
+        path_is_absolute(backing))
+    {
+        pstrcpy(dest, sz, backing);
+    } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
+        error_setg(errp, "Cannot use relative backing file names for '%s'",
+                   backed);
     } else {
-        path_combine(dest, sz, bs->filename, bs->backing_file);
+        path_combine(dest, sz, backed, backing);
     }
 }
 
+void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz,
+                                    Error **errp)
+{
+    char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename;
+
+    bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file,
+                                                 dest, sz, errp);
+}
+
 void bdrv_register(BlockDriver *bdrv)
 {
     /* Block drivers without coroutine functions need emulation */
@@ -1179,7 +1200,7 @@ void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
 
     bdrv_op_block_all(bs->backing_hd, bs->backing_blocker);
     /* Otherwise we won't be able to commit due to check in bdrv_commit */
-    bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT,
+    bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
                     bs->backing_blocker);
 out:
     bdrv_refresh_limits(bs, NULL);
@@ -1217,7 +1238,14 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
         QDECREF(options);
         goto free_exit;
     } else {
-        bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX);
+        bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX,
+                                       &local_err);
+        if (local_err) {
+            ret = -EINVAL;
+            error_propagate(errp, local_err);
+            QDECREF(options);
+            goto free_exit;
+        }
     }
 
     if (!bs->drv || !bs->drv->supports_backing) {
@@ -2188,8 +2216,8 @@ int bdrv_commit(BlockDriverState *bs)
         return -ENOTSUP;
     }
 
-    if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT, NULL) ||
-        bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT, NULL)) {
+    if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
+        bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
         return -EBUSY;
     }
 
@@ -3034,18 +3062,16 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
 
         max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num),
                                   align >> BDRV_SECTOR_BITS);
-        if (max_nb_sectors > 0) {
+        if (nb_sectors < max_nb_sectors) {
+            ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
+        } else if (max_nb_sectors > 0) {
             QEMUIOVector local_qiov;
-            size_t local_sectors;
-
-            max_nb_sectors = MIN(max_nb_sectors, SIZE_MAX / BDRV_SECTOR_BITS);
-            local_sectors = MIN(max_nb_sectors, nb_sectors);
 
             qemu_iovec_init(&local_qiov, qiov->niov);
             qemu_iovec_concat(&local_qiov, qiov, 0,
-                              local_sectors * BDRV_SECTOR_SIZE);
+                              max_nb_sectors * BDRV_SECTOR_SIZE);
 
-            ret = drv->bdrv_co_readv(bs, sector_num, local_sectors,
+            ret = drv->bdrv_co_readv(bs, sector_num, max_nb_sectors,
                                      &local_qiov);
 
             qemu_iovec_destroy(&local_qiov);
@@ -3218,6 +3244,9 @@ static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
 
         if (ret == -ENOTSUP) {
             /* Fall back to bounce buffer if write zeroes is unsupported */
+            int max_xfer_len = MIN_NON_ZERO(bs->bl.max_transfer_length,
+                                            MAX_WRITE_ZEROES_DEFAULT);
+            num = MIN(num, max_xfer_len);
             iov.iov_len = num * BDRV_SECTOR_SIZE;
             if (iov.iov_base == NULL) {
                 iov.iov_base = qemu_try_blockalign(bs, num * BDRV_SECTOR_SIZE);
@@ -3234,7 +3263,7 @@ static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
             /* Keep bounce buffer around if it is big enough for all
              * all future requests.
              */
-            if (num < max_write_zeroes) {
+            if (num < max_xfer_len) {
                 qemu_vfree(iov.iov_base);
                 iov.iov_base = NULL;
             }
@@ -5389,8 +5418,20 @@ void bdrv_dirty_iter_init(BlockDriverState *bs,
     hbitmap_iter_init(hbi, bitmap->bitmap, 0);
 }
 
-void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
-                    int nr_sectors)
+void bdrv_set_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
+                           int64_t cur_sector, int nr_sectors)
+{
+    hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
+}
+
+void bdrv_reset_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
+                             int64_t cur_sector, int nr_sectors)
+{
+    hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
+}
+
+static void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
+                           int nr_sectors)
 {
     BdrvDirtyBitmap *bitmap;
     QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
@@ -5398,7 +5439,8 @@ void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
     }
 }
 
-void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors)
+static void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
+                             int nr_sectors)
 {
     BdrvDirtyBitmap *bitmap;
     QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
@@ -5637,16 +5679,26 @@ void bdrv_img_create(const char *filename, const char *fmt,
     if (size == -1) {
         if (backing_file) {
             BlockDriverState *bs;
+            char *full_backing = g_new0(char, PATH_MAX);
             int64_t size;
             int back_flags;
 
+            bdrv_get_full_backing_filename_from_filename(filename, backing_file,
+                                                         full_backing, PATH_MAX,
+                                                         &local_err);
+            if (local_err) {
+                g_free(full_backing);
+                goto out;
+            }
+
             /* backing files always opened read-only */
             back_flags =
                 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
 
             bs = NULL;
-            ret = bdrv_open(&bs, backing_file, NULL, NULL, back_flags,
+            ret = bdrv_open(&bs, full_backing, NULL, NULL, back_flags,
                             backing_drv, &local_err);
+            g_free(full_backing);
             if (ret < 0) {
                 goto out;
             }
diff --git a/block/backup.c b/block/backup.c
index 792e65514b..1c535b1ab9 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -360,6 +360,7 @@ static void coroutine_fn backup_run(void *opaque)
     hbitmap_free(job->bitmap);
 
     bdrv_iostatus_disable(target);
+    bdrv_op_unblock_all(target, job->common.blocker);
 
     data = g_malloc(sizeof(*data));
     data->ret = ret;
@@ -379,6 +380,11 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
     assert(target);
     assert(cb);
 
+    if (bs == target) {
+        error_setg(errp, "Source and target cannot be the same");
+        return;
+    }
+
     if ((on_source_error == BLOCKDEV_ON_ERROR_STOP ||
          on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
         !bdrv_iostatus_is_enabled(bs)) {
@@ -386,6 +392,26 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
         return;
     }
 
+    if (!bdrv_is_inserted(bs)) {
+        error_setg(errp, "Device is not inserted: %s",
+                   bdrv_get_device_name(bs));
+        return;
+    }
+
+    if (!bdrv_is_inserted(target)) {
+        error_setg(errp, "Device is not inserted: %s",
+                   bdrv_get_device_name(target));
+        return;
+    }
+
+    if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) {
+        return;
+    }
+
+    if (bdrv_op_is_blocked(target, BLOCK_OP_TYPE_BACKUP_TARGET, errp)) {
+        return;
+    }
+
     len = bdrv_getlength(bs);
     if (len < 0) {
         error_setg_errno(errp, -len, "unable to get length for '%s'",
@@ -399,6 +425,8 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
         return;
     }
 
+    bdrv_op_block_all(target, job->common.blocker);
+
     job->on_source_error = on_source_error;
     job->on_target_error = on_target_error;
     job->target = target;
diff --git a/block/block-backend.c b/block/block-backend.c
index ef16d7356a..d00c129f15 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -260,9 +260,6 @@ int blk_attach_dev(BlockBackend *blk, void *dev)
     blk_ref(blk);
     blk->dev = dev;
     bdrv_iostatus_reset(blk->bs);
-
-    /* We're expecting I/O from the device so bump up coroutine pool size */
-    qemu_coroutine_adjust_pool_size(COROUTINE_POOL_RESERVATION);
     return 0;
 }
 
@@ -290,7 +287,6 @@ void blk_detach_dev(BlockBackend *blk, void *dev)
     blk->dev_ops = NULL;
     blk->dev_opaque = NULL;
     bdrv_set_guest_block_size(blk->bs, 512);
-    qemu_coroutine_adjust_pool_size(-COROUTINE_POOL_RESERVATION);
     blk_unref(blk);
 }
 
diff --git a/block/mirror.c b/block/mirror.c
index 2c6dd2a4c1..9019d1ba56 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -128,7 +128,8 @@ static void mirror_write_complete(void *opaque, int ret)
         BlockDriverState *source = s->common.bs;
         BlockErrorAction action;
 
-        bdrv_set_dirty(source, op->sector_num, op->nb_sectors);
+        bdrv_set_dirty_bitmap(source, s->dirty_bitmap, op->sector_num,
+                              op->nb_sectors);
         action = mirror_error_action(s, false, -ret);
         if (action == BLOCK_ERROR_ACTION_REPORT && s->ret >= 0) {
             s->ret = ret;
@@ -145,7 +146,8 @@ static void mirror_read_complete(void *opaque, int ret)
         BlockDriverState *source = s->common.bs;
         BlockErrorAction action;
 
-        bdrv_set_dirty(source, op->sector_num, op->nb_sectors);
+        bdrv_set_dirty_bitmap(source, s->dirty_bitmap, op->sector_num,
+                              op->nb_sectors);
         action = mirror_error_action(s, true, -ret);
         if (action == BLOCK_ERROR_ACTION_REPORT && s->ret >= 0) {
             s->ret = ret;
@@ -286,7 +288,8 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
         next_sector += sectors_per_chunk;
     }
 
-    bdrv_reset_dirty(source, sector_num, nb_sectors);
+    bdrv_reset_dirty_bitmap(source, s->dirty_bitmap, sector_num,
+                            nb_sectors);
 
     /* Copy the dirty cluster.  */
     s->in_flight++;
@@ -442,7 +445,7 @@ static void coroutine_fn mirror_run(void *opaque)
 
             assert(n > 0);
             if (ret == 1) {
-                bdrv_set_dirty(bs, sector_num, n);
+                bdrv_set_dirty_bitmap(bs, s->dirty_bitmap, sector_num, n);
                 sector_num = next;
             } else {
                 sector_num += n;
diff --git a/block/qapi.c b/block/qapi.c
index fa68ba731f..a6fd6f7ab2 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -214,7 +214,12 @@ void bdrv_query_image_info(BlockDriverState *bs,
         info->backing_filename = g_strdup(backing_filename);
         info->has_backing_filename = true;
         bdrv_get_full_backing_filename(bs, backing_filename2,
-                                       sizeof(backing_filename2));
+                                       sizeof(backing_filename2), &err);
+        if (err) {
+            error_propagate(errp, err);
+            qapi_free_ImageInfo(info);
+            return;
+        }
 
         if (strcmp(backing_filename, backing_filename2) != 0) {
             info->full_backing_filename =
diff --git a/block/vmdk.c b/block/vmdk.c
index bfff900ba6..52cb8888e5 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -1891,8 +1891,19 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
     }
     if (backing_file) {
         BlockDriverState *bs = NULL;
-        ret = bdrv_open(&bs, backing_file, NULL, NULL, BDRV_O_NO_BACKING, NULL,
+        char *full_backing = g_new0(char, PATH_MAX);
+        bdrv_get_full_backing_filename_from_filename(filename, backing_file,
+                                                     full_backing, PATH_MAX,
+                                                     &local_err);
+        if (local_err) {
+            g_free(full_backing);
+            error_propagate(errp, local_err);
+            ret = -ENOENT;
+            goto exit;
+        }
+        ret = bdrv_open(&bs, full_backing, NULL, NULL, BDRV_O_NO_BACKING, NULL,
                         errp);
+        g_free(full_backing);
         if (ret != 0) {
             goto exit;
         }
diff --git a/blockdev.c b/blockdev.c
index 5651a8e140..d59efd3f15 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1559,6 +1559,79 @@ static void drive_backup_clean(BlkTransactionState *common)
     }
 }
 
+typedef struct BlockdevBackupState {
+    BlkTransactionState common;
+    BlockDriverState *bs;
+    BlockJob *job;
+    AioContext *aio_context;
+} BlockdevBackupState;
+
+static void blockdev_backup_prepare(BlkTransactionState *common, Error **errp)
+{
+    BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common);
+    BlockdevBackup *backup;
+    BlockDriverState *bs, *target;
+    Error *local_err = NULL;
+
+    assert(common->action->kind == TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP);
+    backup = common->action->blockdev_backup;
+
+    bs = bdrv_find(backup->device);
+    if (!bs) {
+        error_set(errp, QERR_DEVICE_NOT_FOUND, backup->device);
+        return;
+    }
+
+    target = bdrv_find(backup->target);
+    if (!target) {
+        error_set(errp, QERR_DEVICE_NOT_FOUND, backup->target);
+        return;
+    }
+
+    /* AioContext is released in .clean() */
+    state->aio_context = bdrv_get_aio_context(bs);
+    if (state->aio_context != bdrv_get_aio_context(target)) {
+        state->aio_context = NULL;
+        error_setg(errp, "Backup between two IO threads is not implemented");
+        return;
+    }
+    aio_context_acquire(state->aio_context);
+
+    qmp_blockdev_backup(backup->device, backup->target,
+                        backup->sync,
+                        backup->has_speed, backup->speed,
+                        backup->has_on_source_error, backup->on_source_error,
+                        backup->has_on_target_error, backup->on_target_error,
+                        &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    state->bs = bs;
+    state->job = state->bs->job;
+}
+
+static void blockdev_backup_abort(BlkTransactionState *common)
+{
+    BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common);
+    BlockDriverState *bs = state->bs;
+
+    /* Only cancel if it's the job we started */
+    if (bs && bs->job && bs->job == state->job) {
+        block_job_cancel_sync(bs->job);
+    }
+}
+
+static void blockdev_backup_clean(BlkTransactionState *common)
+{
+    BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common);
+
+    if (state->aio_context) {
+        aio_context_release(state->aio_context);
+    }
+}
+
 static void abort_prepare(BlkTransactionState *common, Error **errp)
 {
     error_setg(errp, "Transaction aborted using Abort action");
@@ -1582,6 +1655,12 @@ static const BdrvActionOps actions[] = {
         .abort = drive_backup_abort,
         .clean = drive_backup_clean,
     },
+    [TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP] = {
+        .instance_size = sizeof(BlockdevBackupState),
+        .prepare = blockdev_backup_prepare,
+        .abort = blockdev_backup_abort,
+        .clean = blockdev_backup_clean,
+    },
     [TRANSACTION_ACTION_KIND_ABORT] = {
         .instance_size = sizeof(BlkTransactionState),
         .prepare = abort_prepare,
@@ -2139,7 +2218,7 @@ void qmp_block_commit(const char *device,
     /* drain all i/o before commits */
     bdrv_drain_all();
 
-    if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT, errp)) {
+    if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, errp)) {
         goto out;
     }
 
@@ -2172,6 +2251,10 @@ void qmp_block_commit(const char *device,
 
     assert(bdrv_get_aio_context(base_bs) == aio_context);
 
+    if (bdrv_op_is_blocked(base_bs, BLOCK_OP_TYPE_COMMIT_TARGET, errp)) {
+        goto out;
+    }
+
     /* Do not allow attempts to commit an image into itself */
     if (top_bs == base_bs) {
         error_setg(errp, "cannot commit an image into itself");
@@ -2240,6 +2323,8 @@ void qmp_drive_backup(const char *device, const char *target,
     aio_context = bdrv_get_aio_context(bs);
     aio_context_acquire(aio_context);
 
+    /* Although backup_run has this check too, we need to use bs->drv below, so
+     * do an early check redundantly. */
     if (!bdrv_is_inserted(bs)) {
         error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
         goto out;
@@ -2256,6 +2341,7 @@ void qmp_drive_backup(const char *device, const char *target,
         }
     }
 
+    /* Early check to avoid creating target */
     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) {
         goto out;
     }
@@ -2323,6 +2409,57 @@ BlockDeviceInfoList *qmp_query_named_block_nodes(Error **errp)
     return bdrv_named_nodes_list();
 }
 
+void qmp_blockdev_backup(const char *device, const char *target,
+                         enum MirrorSyncMode sync,
+                         bool has_speed, int64_t speed,
+                         bool has_on_source_error,
+                         BlockdevOnError on_source_error,
+                         bool has_on_target_error,
+                         BlockdevOnError on_target_error,
+                         Error **errp)
+{
+    BlockDriverState *bs;
+    BlockDriverState *target_bs;
+    Error *local_err = NULL;
+    AioContext *aio_context;
+
+    if (!has_speed) {
+        speed = 0;
+    }
+    if (!has_on_source_error) {
+        on_source_error = BLOCKDEV_ON_ERROR_REPORT;
+    }
+    if (!has_on_target_error) {
+        on_target_error = BLOCKDEV_ON_ERROR_REPORT;
+    }
+
+    bs = bdrv_find(device);
+    if (!bs) {
+        error_set(errp, QERR_DEVICE_NOT_FOUND, device);
+        return;
+    }
+
+    aio_context = bdrv_get_aio_context(bs);
+    aio_context_acquire(aio_context);
+
+    target_bs = bdrv_find(target);
+    if (!target_bs) {
+        error_set(errp, QERR_DEVICE_NOT_FOUND, target);
+        goto out;
+    }
+
+    bdrv_ref(target_bs);
+    bdrv_set_aio_context(target_bs, aio_context);
+    backup_start(bs, target_bs, speed, sync, on_source_error, on_target_error,
+                 block_job_cb, bs, &local_err);
+    if (local_err != NULL) {
+        bdrv_unref(target_bs);
+        error_propagate(errp, local_err);
+    }
+out:
+    aio_context_release(aio_context);
+}
+
 #define DEFAULT_MIRROR_BUF_SIZE   (10 << 20)
 
 void qmp_drive_mirror(const char *device, const char *target,
diff --git a/configure b/configure
index cae588c128..7539645521 100755
--- a/configure
+++ b/configure
@@ -1830,7 +1830,7 @@ if test "$seccomp" != "no" ; then
 	seccomp="yes"
     else
 	if test "$seccomp" = "yes"; then
-            feature_not_found "libseccomp" "Install libseccomp devel >= 2.1.0"
+            feature_not_found "libseccomp" "Install libseccomp devel >= 2.1.1"
 	fi
 	seccomp="no"
     fi
diff --git a/coroutine-ucontext.c b/coroutine-ucontext.c
index 4bf2cde279..259fcb48a4 100644
--- a/coroutine-ucontext.c
+++ b/coroutine-ucontext.c
@@ -25,7 +25,6 @@
 #include <stdlib.h>
 #include <setjmp.h>
 #include <stdint.h>
-#include <pthread.h>
 #include <ucontext.h>
 #include "qemu-common.h"
 #include "block/coroutine_int.h"
@@ -48,15 +47,8 @@ typedef struct {
 /**
  * Per-thread coroutine bookkeeping
  */
-typedef struct {
-    /** Currently executing coroutine */
-    Coroutine *current;
-
-    /** The default coroutine */
-    CoroutineUContext leader;
-} CoroutineThreadState;
-
-static pthread_key_t thread_state_key;
+static __thread CoroutineUContext leader;
+static __thread Coroutine *current;
 
 /*
  * va_args to makecontext() must be type 'int', so passing
@@ -68,36 +60,6 @@ union cc_arg {
     int i[2];
 };
 
-static CoroutineThreadState *coroutine_get_thread_state(void)
-{
-    CoroutineThreadState *s = pthread_getspecific(thread_state_key);
-
-    if (!s) {
-        s = g_malloc0(sizeof(*s));
-        s->current = &s->leader.base;
-        pthread_setspecific(thread_state_key, s);
-    }
-    return s;
-}
-
-static void qemu_coroutine_thread_cleanup(void *opaque)
-{
-    CoroutineThreadState *s = opaque;
-
-    g_free(s);
-}
-
-static void __attribute__((constructor)) coroutine_init(void)
-{
-    int ret;
-
-    ret = pthread_key_create(&thread_state_key, qemu_coroutine_thread_cleanup);
-    if (ret != 0) {
-        fprintf(stderr, "unable to create leader key: %s\n", strerror(errno));
-        abort();
-    }
-}
-
 static void coroutine_trampoline(int i0, int i1)
 {
     union cc_arg arg;
@@ -193,15 +155,23 @@ void qemu_coroutine_delete(Coroutine *co_)
     g_free(co);
 }
 
-CoroutineAction qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
-                                      CoroutineAction action)
+/* This function is marked noinline to prevent GCC from inlining it
+ * into coroutine_trampoline(). If we allow it to do that then it
+ * hoists the code to get the address of the TLS variable "current"
+ * out of the while() loop. This is an invalid transformation because
+ * the sigsetjmp() call may be called when running thread A but
+ * return in thread B, and so we might be in a different thread
+ * context each time round the loop.
+ */
+CoroutineAction __attribute__((noinline))
+qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
+                      CoroutineAction action)
 {
     CoroutineUContext *from = DO_UPCAST(CoroutineUContext, base, from_);
     CoroutineUContext *to = DO_UPCAST(CoroutineUContext, base, to_);
-    CoroutineThreadState *s = coroutine_get_thread_state();
     int ret;
 
-    s->current = to_;
+    current = to_;
 
     ret = sigsetjmp(from->env, 0);
     if (ret == 0) {
@@ -212,14 +182,13 @@ CoroutineAction qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
 
 Coroutine *qemu_coroutine_self(void)
 {
-    CoroutineThreadState *s = coroutine_get_thread_state();
-
-    return s->current;
+    if (!current) {
+        current = &leader.base;
+    }
+    return current;
 }
 
 bool qemu_in_coroutine(void)
 {
-    CoroutineThreadState *s = pthread_getspecific(thread_state_key);
-
-    return s && s->current->caller;
+    return current && current->caller;
 }
diff --git a/default-configs/s390x-softmmu.mak b/default-configs/s390x-softmmu.mak
index 126d88dc15..6ee2ff89ff 100644
--- a/default-configs/s390x-softmmu.mak
+++ b/default-configs/s390x-softmmu.mak
@@ -1,3 +1,4 @@
+include pci.mak
 CONFIG_VIRTIO=y
 CONFIG_SCLPCONSOLE=y
 CONFIG_S390_FLIC=y
diff --git a/device_tree.c b/device_tree.c
index df9eed9cbc..4cb1cd50aa 100644
--- a/device_tree.c
+++ b/device_tree.c
@@ -324,6 +324,7 @@ int qemu_fdt_setprop_sized_cells_from_array(void *fdt,
     uint64_t value;
     int cellnum, vnum, ncells;
     uint32_t hival;
+    int ret;
 
     propcells = g_new0(uint32_t, numvalues * 2);
 
@@ -331,18 +332,23 @@ int qemu_fdt_setprop_sized_cells_from_array(void *fdt,
     for (vnum = 0; vnum < numvalues; vnum++) {
         ncells = values[vnum * 2];
         if (ncells != 1 && ncells != 2) {
-            return -1;
+            ret = -1;
+            goto out;
         }
         value = values[vnum * 2 + 1];
         hival = cpu_to_be32(value >> 32);
         if (ncells > 1) {
             propcells[cellnum++] = hival;
         } else if (hival != 0) {
-            return -1;
+            ret = -1;
+            goto out;
         }
         propcells[cellnum++] = cpu_to_be32(value);
     }
 
-    return qemu_fdt_setprop(fdt, node_path, property, propcells,
-                            cellnum * sizeof(uint32_t));
+    ret = qemu_fdt_setprop(fdt, node_path, property, propcells,
+                           cellnum * sizeof(uint32_t));
+out:
+    g_free(propcells);
+    return ret;
 }
diff --git a/exec.c b/exec.c
index 9c3f3047d3..081818e6e8 100644
--- a/exec.c
+++ b/exec.c
@@ -75,6 +75,11 @@ static MemoryRegion io_mem_unassigned;
 /* RAM is mmap-ed with MAP_SHARED */
 #define RAM_SHARED     (1 << 1)
 
+/* Only a portion of RAM (used_length) is actually used, and migrated.
+ * This used_length size can change across reboots.
+ */
+#define RAM_RESIZEABLE (1 << 2)
+
 #endif
 
 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
@@ -812,11 +817,11 @@ static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
 
     /* The list is protected by the iothread lock here.  */
     block = ram_list.mru_block;
-    if (block && addr - block->offset < block->length) {
+    if (block && addr - block->offset < block->max_length) {
         goto found;
     }
     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
-        if (addr - block->offset < block->length) {
+        if (addr - block->offset < block->max_length) {
             goto found;
         }
     }
@@ -850,7 +855,7 @@ void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length,
 {
     if (length == 0)
         return;
-    cpu_physical_memory_clear_dirty_range(start, length, client);
+    cpu_physical_memory_clear_dirty_range_type(start, length, client);
 
     if (tcg_enabled()) {
         tlb_reset_dirty_range_all(start, length);
@@ -1186,7 +1191,7 @@ static ram_addr_t find_ram_offset(ram_addr_t size)
     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
         ram_addr_t end, next = RAM_ADDR_MAX;
 
-        end = block->offset + block->length;
+        end = block->offset + block->max_length;
 
         QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
             if (next_block->offset >= end) {
@@ -1214,7 +1219,7 @@ ram_addr_t last_ram_offset(void)
     ram_addr_t last = 0;
 
     QTAILQ_FOREACH(block, &ram_list.blocks, next)
-        last = MAX(last, block->offset + block->length);
+        last = MAX(last, block->offset + block->max_length);
 
     return last;
 }
@@ -1296,6 +1301,49 @@ static int memory_try_enable_merging(void *addr, size_t len)
     return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
 }
 
+/* Only legal before guest might have detected the memory size: e.g. on
+ * incoming migration, or right after reset.
+ *
+ * As memory core doesn't know how is memory accessed, it is up to
+ * resize callback to update device state and/or add assertions to detect
+ * misuse, if necessary.
+ */
+int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
+{
+    RAMBlock *block = find_ram_block(base);
+
+    assert(block);
+
+    if (block->used_length == newsize) {
+        return 0;
+    }
+
+    if (!(block->flags & RAM_RESIZEABLE)) {
+        error_setg_errno(errp, EINVAL,
+                         "Length mismatch: %s: 0x" RAM_ADDR_FMT
+                         " in != 0x" RAM_ADDR_FMT, block->idstr,
+                         newsize, block->used_length);
+        return -EINVAL;
+    }
+
+    if (block->max_length < newsize) {
+        error_setg_errno(errp, EINVAL,
+                         "Length too large: %s: 0x" RAM_ADDR_FMT
+                         " > 0x" RAM_ADDR_FMT, block->idstr,
+                         newsize, block->max_length);
+        return -EINVAL;
+    }
+
+    cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
+    block->used_length = newsize;
+    cpu_physical_memory_set_dirty_range(block->offset, block->used_length);
+    memory_region_set_size(block->mr, newsize);
+    if (block->resized) {
+        block->resized(block->idstr, newsize, block->host);
+    }
+    return 0;
+}
+
 static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
 {
     RAMBlock *block;
@@ -1305,13 +1353,14 @@ static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
 
     /* This assumes the iothread lock is taken here too.  */
     qemu_mutex_lock_ramlist();
-    new_block->offset = find_ram_offset(new_block->length);
+    new_block->offset = find_ram_offset(new_block->max_length);
 
     if (!new_block->host) {
         if (xen_enabled()) {
-            xen_ram_alloc(new_block->offset, new_block->length, new_block->mr);
+            xen_ram_alloc(new_block->offset, new_block->max_length,
+                          new_block->mr);
         } else {
-            new_block->host = phys_mem_alloc(new_block->length,
+            new_block->host = phys_mem_alloc(new_block->max_length,
                                              &new_block->mr->align);
             if (!new_block->host) {
                 error_setg_errno(errp, errno,
@@ -1320,13 +1369,13 @@ static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
                 qemu_mutex_unlock_ramlist();
                 return -1;
             }
-            memory_try_enable_merging(new_block->host, new_block->length);
+            memory_try_enable_merging(new_block->host, new_block->max_length);
         }
     }
 
     /* Keep the list sorted from biggest to smallest block.  */
     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
-        if (block->length < new_block->length) {
+        if (block->max_length < new_block->max_length) {
             break;
         }
     }
@@ -1350,14 +1399,15 @@ static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
                                    old_ram_size, new_ram_size);
        }
     }
-    cpu_physical_memory_set_dirty_range(new_block->offset, new_block->length);
+    cpu_physical_memory_set_dirty_range(new_block->offset,
+                                        new_block->used_length);
 
-    qemu_ram_setup_dump(new_block->host, new_block->length);
-    qemu_madvise(new_block->host, new_block->length, QEMU_MADV_HUGEPAGE);
-    qemu_madvise(new_block->host, new_block->length, QEMU_MADV_DONTFORK);
+    qemu_ram_setup_dump(new_block->host, new_block->max_length);
+    qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
+    qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
 
     if (kvm_enabled()) {
-        kvm_setup_guest_memory(new_block->host, new_block->length);
+        kvm_setup_guest_memory(new_block->host, new_block->max_length);
     }
 
     return new_block->offset;
@@ -1391,7 +1441,8 @@ ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
     size = TARGET_PAGE_ALIGN(size);
     new_block = g_malloc0(sizeof(*new_block));
     new_block->mr = mr;
-    new_block->length = size;
+    new_block->used_length = size;
+    new_block->max_length = size;
     new_block->flags = share ? RAM_SHARED : 0;
     new_block->host = file_ram_alloc(new_block, size,
                                      mem_path, errp);
@@ -1410,7 +1461,12 @@ ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
 }
 #endif
 
-ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
+static
+ram_addr_t qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
+                                   void (*resized)(const char*,
+                                                   uint64_t length,
+                                                   void *host),
+                                   void *host, bool resizeable,
                                    MemoryRegion *mr, Error **errp)
 {
     RAMBlock *new_block;
@@ -1418,14 +1474,21 @@ ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
     Error *local_err = NULL;
 
     size = TARGET_PAGE_ALIGN(size);
+    max_size = TARGET_PAGE_ALIGN(max_size);
     new_block = g_malloc0(sizeof(*new_block));
     new_block->mr = mr;
-    new_block->length = size;
+    new_block->resized = resized;
+    new_block->used_length = size;
+    new_block->max_length = max_size;
+    assert(max_size >= size);
     new_block->fd = -1;
     new_block->host = host;
     if (host) {
         new_block->flags |= RAM_PREALLOC;
     }
+    if (resizeable) {
+        new_block->flags |= RAM_RESIZEABLE;
+    }
     addr = ram_block_add(new_block, &local_err);
     if (local_err) {
         g_free(new_block);
@@ -1435,9 +1498,24 @@ ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
     return addr;
 }
 
+ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
+                                   MemoryRegion *mr, Error **errp)
+{
+    return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
+}
+
 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
 {
-    return qemu_ram_alloc_from_ptr(size, NULL, mr, errp);
+    return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
+}
+
+ram_addr_t qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
+                                     void (*resized)(const char*,
+                                                     uint64_t length,
+                                                     void *host),
+                                     MemoryRegion *mr, Error **errp)
+{
+    return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
 }
 
 void qemu_ram_free_from_ptr(ram_addr_t addr)
@@ -1475,11 +1553,11 @@ void qemu_ram_free(ram_addr_t addr)
                 xen_invalidate_map_cache_entry(block->host);
 #ifndef _WIN32
             } else if (block->fd >= 0) {
-                munmap(block->host, block->length);
+                munmap(block->host, block->max_length);
                 close(block->fd);
 #endif
             } else {
-                qemu_anon_ram_free(block->host, block->length);
+                qemu_anon_ram_free(block->host, block->max_length);
             }
             g_free(block);
             break;
@@ -1499,7 +1577,7 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
 
     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
         offset = addr - block->offset;
-        if (offset < block->length) {
+        if (offset < block->max_length) {
             vaddr = ramblock_ptr(block, offset);
             if (block->flags & RAM_PREALLOC) {
                 ;
@@ -1575,7 +1653,7 @@ void *qemu_get_ram_ptr(ram_addr_t addr)
             return xen_map_cache(addr, 0, 0);
         } else if (block->host == NULL) {
             block->host =
-                xen_map_cache(block->offset, block->length, 1);
+                xen_map_cache(block->offset, block->max_length, 1);
         }
     }
     return ramblock_ptr(block, addr - block->offset);
@@ -1594,9 +1672,9 @@ static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
         RAMBlock *block;
 
         QTAILQ_FOREACH(block, &ram_list.blocks, next) {
-            if (addr - block->offset < block->length) {
-                if (addr - block->offset + *size > block->length)
-                    *size = block->length - addr + block->offset;
+            if (addr - block->offset < block->max_length) {
+                if (addr - block->offset + *size > block->max_length)
+                    *size = block->max_length - addr + block->offset;
                 return ramblock_ptr(block, addr - block->offset);
             }
         }
@@ -1619,7 +1697,7 @@ MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
     }
 
     block = ram_list.mru_block;
-    if (block && block->host && host - block->host < block->length) {
+    if (block && block->host && host - block->host < block->max_length) {
         goto found;
     }
 
@@ -1628,7 +1706,7 @@ MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
         if (block->host == NULL) {
             continue;
         }
-        if (host - block->host < block->length) {
+        if (host - block->host < block->max_length) {
             goto found;
         }
     }
@@ -2882,7 +2960,7 @@ void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
     RAMBlock *block;
 
     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
-        func(block->host, block->offset, block->length, opaque);
+        func(block->host, block->offset, block->used_length, opaque);
     }
 }
 #endif
diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index 2a28978cba..39c5d7103c 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -198,7 +198,8 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf,
     blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_DRIVE_DEL, s->blocker);
     blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_BACKUP_SOURCE, s->blocker);
     blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_CHANGE, s->blocker);
-    blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_COMMIT, s->blocker);
+    blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_COMMIT_SOURCE, s->blocker);
+    blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_COMMIT_TARGET, s->blocker);
     blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_EJECT, s->blocker);
     blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_EXTERNAL_SNAPSHOT, s->blocker);
     blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_INTERNAL_SNAPSHOT, s->blocker);
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index aa1ed986d2..ce079aefdd 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -476,7 +476,8 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
 
     switch (dw10) {
     case NVME_NUMBER_OF_QUEUES:
-        req->cqe.result = cpu_to_le32(n->num_queues);
+        req->cqe.result =
+            cpu_to_le32((n->num_queues - 1) | ((n->num_queues - 1) << 16));
         break;
     default:
         return NVME_INVALID_FIELD | NVME_DNR;
@@ -490,7 +491,8 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
 
     switch (dw10) {
     case NVME_NUMBER_OF_QUEUES:
-        req->cqe.result = cpu_to_le32(n->num_queues);
+        req->cqe.result =
+            cpu_to_le32((n->num_queues - 1) | ((n->num_queues - 1) << 16));
         break;
     default:
         return NVME_INVALID_FIELD | NVME_DNR;
@@ -813,7 +815,7 @@ static int nvme_init(PCIDevice *pci_dev)
     NVME_CAP_SET_CSS(n->bar.cap, 1);
     NVME_CAP_SET_MPSMAX(n->bar.cap, 4);
 
-    n->bar.vs = 0x00010001;
+    n->bar.vs = 0x00010100;
     n->bar.intmc = n->bar.intms = 0;
 
     for (i = 0; i < n->num_namespaces; i++) {
diff --git a/hw/core/loader.c b/hw/core/loader.c
index f2b34da240..fcd4705d78 100644
--- a/hw/core/loader.c
+++ b/hw/core/loader.c
@@ -724,12 +724,22 @@ static void rom_insert(Rom *rom)
     QTAILQ_INSERT_TAIL(&roms, rom, next);
 }
 
+static void fw_cfg_resized(const char *id, uint64_t length, void *host)
+{
+    if (fw_cfg) {
+        fw_cfg_modify_file(fw_cfg, id + strlen("/rom@"), host, length);
+    }
+}
+
 static void *rom_set_mr(Rom *rom, Object *owner, const char *name)
 {
     void *data;
 
     rom->mr = g_malloc(sizeof(*rom->mr));
-    memory_region_init_ram(rom->mr, owner, name, rom->datasize, &error_abort);
+    memory_region_init_resizeable_ram(rom->mr, owner, name,
+                                      rom->datasize, rom->romsize,
+                                      fw_cfg_resized,
+                                      &error_abort);
     memory_region_set_readonly(rom->mr, true);
     vmstate_register_ram_global(rom->mr);
 
@@ -824,7 +834,7 @@ err:
 }
 
 ram_addr_t rom_add_blob(const char *name, const void *blob, size_t len,
-                   hwaddr addr, const char *fw_file_name,
+                   size_t max_len, hwaddr addr, const char *fw_file_name,
                    FWCfgReadCallback fw_callback, void *callback_opaque)
 {
     Rom *rom;
@@ -833,7 +843,7 @@ ram_addr_t rom_add_blob(const char *name, const void *blob, size_t len,
     rom           = g_malloc0(sizeof(*rom));
     rom->name     = g_strdup(name);
     rom->addr     = addr;
-    rom->romsize  = len;
+    rom->romsize  = max_len ? max_len : len;
     rom->datasize = len;
     rom->data     = g_malloc0(rom->datasize);
     memcpy(rom->data, blob, len);
@@ -853,7 +863,7 @@ ram_addr_t rom_add_blob(const char *name, const void *blob, size_t len,
 
         fw_cfg_add_file_callback(fw_cfg, fw_file_name,
                                  fw_callback, callback_opaque,
-                                 data, rom->romsize);
+                                 data, rom->datasize);
     }
     return ret;
 }
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index a4d0c0c8bf..6a2e9c52bc 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -68,6 +68,9 @@
 
 #define ACPI_BUILD_TABLE_SIZE             0x20000
 
+/* Reserve RAM space for tables: add another order of magnitude. */
+#define ACPI_BUILD_TABLE_MAX_SIZE         0x200000
+
 /* #define DEBUG_ACPI_BUILD */
 #ifdef DEBUG_ACPI_BUILD
 #define ACPI_BUILD_DPRINTF(fmt, ...)        \
@@ -1718,6 +1721,11 @@ static void acpi_build_update(void *build_opaque, uint32_t offset)
     acpi_build(build_state->guest_info, &tables);
 
     assert(acpi_data_len(tables.table_data) == build_state->table_size);
+
+    /* Make sure RAM size is correct - in case it got changed by migration */
+    qemu_ram_resize(build_state->table_ram, build_state->table_size,
+                    &error_abort);
+
     memcpy(qemu_get_ram_ptr(build_state->table_ram), tables.table_data->data,
            build_state->table_size);
 
@@ -1734,10 +1742,10 @@ static void acpi_build_reset(void *build_opaque)
 }
 
 static ram_addr_t acpi_add_rom_blob(AcpiBuildState *build_state, GArray *blob,
-                               const char *name)
+                               const char *name, uint64_t max_size)
 {
-    return rom_add_blob(name, blob->data, acpi_data_len(blob), -1, name,
-                        acpi_build_update, build_state);
+    return rom_add_blob(name, blob->data, acpi_data_len(blob), max_size, -1,
+                        name, acpi_build_update, build_state);
 }
 
 static const VMStateDescription vmstate_acpi_build = {
@@ -1781,11 +1789,12 @@ void acpi_setup(PcGuestInfo *guest_info)
 
     /* Now expose it all to Guest */
     build_state->table_ram = acpi_add_rom_blob(build_state, tables.table_data,
-                                               ACPI_BUILD_TABLE_FILE);
+                                               ACPI_BUILD_TABLE_FILE,
+                                               ACPI_BUILD_TABLE_MAX_SIZE);
     assert(build_state->table_ram != RAM_ADDR_MAX);
     build_state->table_size = acpi_data_len(tables.table_data);
 
-    acpi_add_rom_blob(NULL, tables.linker, "etc/table-loader");
+    acpi_add_rom_blob(NULL, tables.linker, "etc/table-loader", 0);
 
     fw_cfg_add_file(guest_info->fw_cfg, ACPI_BUILD_TPMLOG_FILE,
                     tables.tcpalog->data, acpi_data_len(tables.tcpalog));
diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
index c63b7e556e..a71e6e014f 100644
--- a/hw/ide/atapi.c
+++ b/hw/ide/atapi.c
@@ -621,20 +621,107 @@ static void cmd_request_sense(IDEState *s, uint8_t *buf)
 
 static void cmd_inquiry(IDEState *s, uint8_t *buf)
 {
+    uint8_t page_code = buf[2];
     int max_len = buf[4];
 
-    buf[0] = 0x05; /* CD-ROM */
-    buf[1] = 0x80; /* removable */
-    buf[2] = 0x00; /* ISO */
-    buf[3] = 0x21; /* ATAPI-2 (XXX: put ATAPI-4 ?) */
-    buf[4] = 31; /* additional length */
-    buf[5] = 0; /* reserved */
-    buf[6] = 0; /* reserved */
-    buf[7] = 0; /* reserved */
-    padstr8(buf + 8, 8, "QEMU");
-    padstr8(buf + 16, 16, "QEMU DVD-ROM");
-    padstr8(buf + 32, 4, s->version);
-    ide_atapi_cmd_reply(s, 36, max_len);
+    unsigned idx = 0;
+    unsigned size_idx;
+    unsigned preamble_len;
+
+    /* If the EVPD (Enable Vital Product Data) bit is set in byte 1,
+     * we are being asked for a specific page of info indicated by byte 2. */
+    if (buf[1] & 0x01) {
+        preamble_len = 4;
+        size_idx = 3;
+
+        buf[idx++] = 0x05;      /* CD-ROM */
+        buf[idx++] = page_code; /* Page Code */
+        buf[idx++] = 0x00;      /* reserved */
+        idx++;                  /* length (set later) */
+
+        switch (page_code) {
+        case 0x00:
+            /* Supported Pages: List of supported VPD responses. */
+            buf[idx++] = 0x00; /* 0x00: Supported Pages, and: */
+            buf[idx++] = 0x83; /* 0x83: Device Identification. */
+            break;
+
+        case 0x83:
+            /* Device Identification. Each entry is optional, but the entries
+             * included here are modeled after libata's VPD responses.
+             * If the response is given, at least one entry must be present. */
+
+            /* Entry 1: Serial */
+            if (idx + 24 > max_len) {
+                /* Not enough room for even the first entry: */
+                /* 4 byte header + 20 byte string */
+                ide_atapi_cmd_error(s, ILLEGAL_REQUEST,
+                                    ASC_DATA_PHASE_ERROR);
+                return;
+            }
+            buf[idx++] = 0x02; /* Ascii */
+            buf[idx++] = 0x00; /* Vendor Specific */
+            buf[idx++] = 0x00;
+            buf[idx++] = 20;   /* Remaining length */
+            padstr8(buf + idx, 20, s->drive_serial_str);
+            idx += 20;
+
+            /* Entry 2: Drive Model and Serial */
+            if (idx + 72 > max_len) {
+                /* 4 (header) + 8 (vendor) + 60 (model & serial) */
+                goto out;
+            }
+            buf[idx++] = 0x02; /* Ascii */
+            buf[idx++] = 0x01; /* T10 Vendor */
+            buf[idx++] = 0x00;
+            buf[idx++] = 68;
+            padstr8(buf + idx, 8, "ATA"); /* Generic T10 vendor */
+            idx += 8;
+            padstr8(buf + idx, 40, s->drive_model_str);
+            idx += 40;
+            padstr8(buf + idx, 20, s->drive_serial_str);
+            idx += 20;
+
+            /* Entry 3: WWN */
+            if (s->wwn && (idx + 12 <= max_len)) {
+                /* 4 byte header + 8 byte wwn */
+                buf[idx++] = 0x01; /* Binary */
+                buf[idx++] = 0x03; /* NAA */
+                buf[idx++] = 0x00;
+                buf[idx++] = 0x08;
+                stq_be_p(&buf[idx], s->wwn);
+                idx += 8;
+            }
+            break;
+
+        default:
+            /* SPC-3, revision 23 sec. 6.4 */
+            ide_atapi_cmd_error(s, ILLEGAL_REQUEST,
+                                ASC_INV_FIELD_IN_CMD_PACKET);
+            return;
+        }
+    } else {
+        preamble_len = 5;
+        size_idx = 4;
+
+        buf[0] = 0x05; /* CD-ROM */
+        buf[1] = 0x80; /* removable */
+        buf[2] = 0x00; /* ISO */
+        buf[3] = 0x21; /* ATAPI-2 (XXX: put ATAPI-4 ?) */
+        /* buf[size_idx] set below. */
+        buf[5] = 0;    /* reserved */
+        buf[6] = 0;    /* reserved */
+        buf[7] = 0;    /* reserved */
+        padstr8(buf + 8, 8, "QEMU");
+        padstr8(buf + 16, 16, "QEMU DVD-ROM");
+        padstr8(buf + 32, 4, s->version);
+        idx = 36;
+    }
+
+ out:
+    buf[size_idx] = idx - preamble_len;
+    ide_atapi_cmd_reply(s, idx, max_len);
+    return;
 }
 
 static void cmd_get_configuration(IDEState *s, uint8_t *buf)
diff --git a/hw/ide/internal.h b/hw/ide/internal.h
index 8a3eca40d2..c998003bf3 100644
--- a/hw/ide/internal.h
+++ b/hw/ide/internal.h
@@ -296,6 +296,7 @@ typedef struct IDEDMAOps IDEDMAOps;
 #define ASC_INCOMPATIBLE_FORMAT              0x30
 #define ASC_MEDIUM_NOT_PRESENT               0x3a
 #define ASC_SAVING_PARAMETERS_NOT_SUPPORTED  0x39
+#define ASC_DATA_PHASE_ERROR                 0x4b
 #define ASC_MEDIA_REMOVAL_PREVENTED          0x53
 
 #define CFA_NO_ERROR            0x00
diff --git a/hw/lm32/lm32_hwsetup.h b/hw/lm32/lm32_hwsetup.h
index 9fd5e697a7..838754d5d8 100644
--- a/hw/lm32/lm32_hwsetup.h
+++ b/hw/lm32/lm32_hwsetup.h
@@ -73,7 +73,8 @@ static inline void hwsetup_free(HWSetup *hw)
 static inline void hwsetup_create_rom(HWSetup *hw,
         hwaddr base)
 {
-    rom_add_blob("hwsetup", hw->data, TARGET_PAGE_SIZE, base, NULL, NULL, NULL);
+    rom_add_blob("hwsetup", hw->data, TARGET_PAGE_SIZE,
+                 TARGET_PAGE_SIZE, base, NULL, NULL, NULL);
 }
 
 static inline void hwsetup_add_u8(HWSetup *hw, uint8_t u)
diff --git a/hw/net/allwinner_emac.c b/hw/net/allwinner_emac.c
index d780ba0fcb..0407dee6da 100644
--- a/hw/net/allwinner_emac.c
+++ b/hw/net/allwinner_emac.c
@@ -218,13 +218,6 @@ static ssize_t aw_emac_receive(NetClientState *nc, const uint8_t *buf,
     return size;
 }
 
-static void aw_emac_cleanup(NetClientState *nc)
-{
-    AwEmacState *s = qemu_get_nic_opaque(nc);
-
-    s->nic = NULL;
-}
-
 static void aw_emac_reset(DeviceState *dev)
 {
     AwEmacState *s = AW_EMAC(dev);
@@ -433,7 +426,6 @@ static NetClientInfo net_aw_emac_info = {
     .size = sizeof(NICState),
     .can_receive = aw_emac_can_receive,
     .receive = aw_emac_receive,
-    .cleanup = aw_emac_cleanup,
     .link_status_changed = aw_emac_set_link,
 };
 
diff --git a/hw/net/cadence_gem.c b/hw/net/cadence_gem.c
index de26609c9d..55b629387c 100644
--- a/hw/net/cadence_gem.c
+++ b/hw/net/cadence_gem.c
@@ -1209,14 +1209,6 @@ static const MemoryRegionOps gem_ops = {
     .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
-static void gem_cleanup(NetClientState *nc)
-{
-    GemState *s = qemu_get_nic_opaque(nc);
-
-    DB_PRINT("\n");
-    s->nic = NULL;
-}
-
 static void gem_set_link(NetClientState *nc)
 {
     DB_PRINT("\n");
@@ -1228,7 +1220,6 @@ static NetClientInfo net_gem_info = {
     .size = sizeof(NICState),
     .can_receive = gem_can_receive,
     .receive = gem_receive,
-    .cleanup = gem_cleanup,
     .link_status_changed = gem_set_link,
 };
 
diff --git a/hw/net/dp8393x.c b/hw/net/dp8393x.c
index 7eab7ad0cc..7ce13d2b46 100644
--- a/hw/net/dp8393x.c
+++ b/hw/net/dp8393x.c
@@ -859,22 +859,11 @@ static void nic_reset(void *opaque)
     dp8393x_update_irq(s);
 }
 
-static void nic_cleanup(NetClientState *nc)
-{
-    dp8393xState *s = qemu_get_nic_opaque(nc);
-
-    timer_del(s->watchdog);
-    timer_free(s->watchdog);
-
-    g_free(s);
-}
-
 static NetClientInfo net_dp83932_info = {
     .type = NET_CLIENT_OPTIONS_KIND_NIC,
     .size = sizeof(NICState),
     .can_receive = nic_can_receive,
     .receive = nic_receive,
-    .cleanup = nic_cleanup,
 };
 
 void dp83932_init(NICInfo *nd, hwaddr base, int it_shift,
diff --git a/hw/net/e1000.c b/hw/net/e1000.c
index 89c5788b1c..a207e21bcf 100644
--- a/hw/net/e1000.c
+++ b/hw/net/e1000.c
@@ -1503,14 +1503,6 @@ e1000_mmio_setup(E1000State *d)
 }
 
 static void
-e1000_cleanup(NetClientState *nc)
-{
-    E1000State *s = qemu_get_nic_opaque(nc);
-
-    s->nic = NULL;
-}
-
-static void
 pci_e1000_uninit(PCIDevice *dev)
 {
     E1000State *d = E1000(dev);
@@ -1528,7 +1520,6 @@ static NetClientInfo net_e1000_info = {
     .can_receive = e1000_can_receive,
     .receive = e1000_receive,
     .receive_iov = e1000_receive_iov,
-    .cleanup = e1000_cleanup,
     .link_status_changed = e1000_set_link_status,
 };
 
diff --git a/hw/net/eepro100.c b/hw/net/eepro100.c
index 4877bfd4d3..7a4f9f8a08 100644
--- a/hw/net/eepro100.c
+++ b/hw/net/eepro100.c
@@ -1832,13 +1832,6 @@ static const VMStateDescription vmstate_eepro100 = {
     }
 };
 
-static void nic_cleanup(NetClientState *nc)
-{
-    EEPRO100State *s = qemu_get_nic_opaque(nc);
-
-    s->nic = NULL;
-}
-
 static void pci_nic_uninit(PCIDevice *pci_dev)
 {
     EEPRO100State *s = DO_UPCAST(EEPRO100State, dev, pci_dev);
@@ -1853,7 +1846,6 @@ static NetClientInfo net_eepro100_info = {
     .size = sizeof(NICState),
     .can_receive = nic_can_receive,
     .receive = nic_receive,
-    .cleanup = nic_cleanup,
 };
 
 static int e100_nic_init(PCIDevice *pci_dev)
diff --git a/hw/net/etraxfs_eth.c b/hw/net/etraxfs_eth.c
index 6a3c86db48..4773dea927 100644
--- a/hw/net/etraxfs_eth.c
+++ b/hw/net/etraxfs_eth.c
@@ -581,24 +581,11 @@ static const MemoryRegionOps eth_ops = {
     }
 };
 
-static void eth_cleanup(NetClientState *nc)
-{
-    ETRAXFSEthState *eth = qemu_get_nic_opaque(nc);
-
-    /* Disconnect the client.  */
-    eth->dma_out->client.push = NULL;
-    eth->dma_out->client.opaque = NULL;
-    eth->dma_in->client.opaque = NULL;
-    eth->dma_in->client.pull = NULL;
-        g_free(eth);
-}
-
 static NetClientInfo net_etraxfs_info = {
     .type = NET_CLIENT_OPTIONS_KIND_NIC,
     .size = sizeof(NICState),
     .can_receive = eth_can_receive,
     .receive = eth_receive,
-    .cleanup = eth_cleanup,
     .link_status_changed = eth_set_link,
 };
 
diff --git a/hw/net/fsl_etsec/etsec.c b/hw/net/fsl_etsec/etsec.c
index d4b4429446..2fbbc6ccc0 100644
--- a/hw/net/fsl_etsec/etsec.c
+++ b/hw/net/fsl_etsec/etsec.c
@@ -338,11 +338,6 @@ static void etsec_reset(DeviceState *d)
         MII_SR_100X_FD_CAPS     | MII_SR_100T4_CAPS;
 }
 
-static void etsec_cleanup(NetClientState *nc)
-{
-    /* qemu_log("eTSEC cleanup\n"); */
-}
-
 static int etsec_can_receive(NetClientState *nc)
 {
     eTSEC *etsec = qemu_get_nic_opaque(nc);
@@ -377,7 +372,6 @@ static NetClientInfo net_etsec_info = {
     .size = sizeof(NICState),
     .can_receive = etsec_can_receive,
     .receive = etsec_receive,
-    .cleanup = etsec_cleanup,
     .link_status_changed = etsec_set_link_status,
 };
 
diff --git a/hw/net/lan9118.c b/hw/net/lan9118.c
index e528290b41..f169c383df 100644
--- a/hw/net/lan9118.c
+++ b/hw/net/lan9118.c
@@ -1309,19 +1309,11 @@ static const MemoryRegionOps lan9118_16bit_mem_ops = {
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
-static void lan9118_cleanup(NetClientState *nc)
-{
-    lan9118_state *s = qemu_get_nic_opaque(nc);
-
-    s->nic = NULL;
-}
-
 static NetClientInfo net_lan9118_info = {
     .type = NET_CLIENT_OPTIONS_KIND_NIC,
     .size = sizeof(NICState),
     .can_receive = lan9118_can_receive,
     .receive = lan9118_receive,
-    .cleanup = lan9118_cleanup,
     .link_status_changed = lan9118_set_link,
 };
 
diff --git a/hw/net/lance.c b/hw/net/lance.c
index a1c49f1b97..ff7e789563 100644
--- a/hw/net/lance.c
+++ b/hw/net/lance.c
@@ -91,20 +91,12 @@ static const MemoryRegionOps lance_mem_ops = {
     },
 };
 
-static void lance_cleanup(NetClientState *nc)
-{
-    PCNetState *d = qemu_get_nic_opaque(nc);
-
-    pcnet_common_cleanup(d);
-}
-
 static NetClientInfo net_lance_info = {
     .type = NET_CLIENT_OPTIONS_KIND_NIC,
     .size = sizeof(NICState),
     .can_receive = pcnet_can_receive,
     .receive = pcnet_receive,
     .link_status_changed = pcnet_set_link_status,
-    .cleanup = lance_cleanup,
 };
 
 static const VMStateDescription vmstate_lance = {
diff --git a/hw/net/mcf_fec.c b/hw/net/mcf_fec.c
index 22cd7cf870..0255612f10 100644
--- a/hw/net/mcf_fec.c
+++ b/hw/net/mcf_fec.c
@@ -439,19 +439,11 @@ static const MemoryRegionOps mcf_fec_ops = {
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
-static void mcf_fec_cleanup(NetClientState *nc)
-{
-    mcf_fec_state *s = qemu_get_nic_opaque(nc);
-
-    g_free(s);
-}
-
 static NetClientInfo net_mcf_fec_info = {
     .type = NET_CLIENT_OPTIONS_KIND_NIC,
     .size = sizeof(NICState),
     .can_receive = mcf_fec_can_receive,
     .receive = mcf_fec_receive,
-    .cleanup = mcf_fec_cleanup,
 };
 
 void mcf_fec_init(MemoryRegion *sysmem, NICInfo *nd,
diff --git a/hw/net/milkymist-minimac2.c b/hw/net/milkymist-minimac2.c
index c6326728eb..f06afaa581 100644
--- a/hw/net/milkymist-minimac2.c
+++ b/hw/net/milkymist-minimac2.c
@@ -425,13 +425,6 @@ static int minimac2_can_rx(NetClientState *nc)
     return 0;
 }
 
-static void minimac2_cleanup(NetClientState *nc)
-{
-    MilkymistMinimac2State *s = qemu_get_nic_opaque(nc);
-
-    s->nic = NULL;
-}
-
 static void milkymist_minimac2_reset(DeviceState *d)
 {
     MilkymistMinimac2State *s = MILKYMIST_MINIMAC2(d);
@@ -454,7 +447,6 @@ static NetClientInfo net_milkymist_minimac2_info = {
     .size = sizeof(NICState),
     .can_receive = minimac2_can_rx,
     .receive = minimac2_rx,
-    .cleanup = minimac2_cleanup,
 };
 
 static int milkymist_minimac2_init(SysBusDevice *sbd)
diff --git a/hw/net/mipsnet.c b/hw/net/mipsnet.c
index b26c369178..c813e0caa8 100644
--- a/hw/net/mipsnet.c
+++ b/hw/net/mipsnet.c
@@ -211,19 +211,11 @@ static const VMStateDescription vmstate_mipsnet = {
     }
 };
 
-static void mipsnet_cleanup(NetClientState *nc)
-{
-    MIPSnetState *s = qemu_get_nic_opaque(nc);
-
-    s->nic = NULL;
-}
-
 static NetClientInfo net_mipsnet_info = {
     .type = NET_CLIENT_OPTIONS_KIND_NIC,
     .size = sizeof(NICState),
     .can_receive = mipsnet_can_receive,
     .receive = mipsnet_receive,
-    .cleanup = mipsnet_cleanup,
 };
 
 static const MemoryRegionOps mipsnet_ioport_ops = {
diff --git a/hw/net/ne2000-isa.c b/hw/net/ne2000-isa.c
index 82e2ba17c1..17e7199f70 100644
--- a/hw/net/ne2000-isa.c
+++ b/hw/net/ne2000-isa.c
@@ -41,19 +41,11 @@ typedef struct ISANE2000State {
     NE2000State ne2000;
 } ISANE2000State;
 
-static void isa_ne2000_cleanup(NetClientState *nc)
-{
-    NE2000State *s = qemu_get_nic_opaque(nc);
-
-    s->nic = NULL;
-}
-
 static NetClientInfo net_ne2000_isa_info = {
     .type = NET_CLIENT_OPTIONS_KIND_NIC,
     .size = sizeof(NICState),
     .can_receive = ne2000_can_receive,
     .receive = ne2000_receive,
-    .cleanup = isa_ne2000_cleanup,
 };
 
 static const VMStateDescription vmstate_isa_ne2000 = {
diff --git a/hw/net/ne2000.c b/hw/net/ne2000.c
index 3ab2d03696..4dea70178d 100644
--- a/hw/net/ne2000.c
+++ b/hw/net/ne2000.c
@@ -702,19 +702,11 @@ void ne2000_setup_io(NE2000State *s, DeviceState *dev, unsigned size)
     memory_region_init_io(&s->io, OBJECT(dev), &ne2000_ops, s, "ne2000", size);
 }
 
-static void ne2000_cleanup(NetClientState *nc)
-{
-    NE2000State *s = qemu_get_nic_opaque(nc);
-
-    s->nic = NULL;
-}
-
 static NetClientInfo net_ne2000_info = {
     .type = NET_CLIENT_OPTIONS_KIND_NIC,
     .size = sizeof(NICState),
     .can_receive = ne2000_can_receive,
     .receive = ne2000_receive,
-    .cleanup = ne2000_cleanup,
 };
 
 static int pci_ne2000_init(PCIDevice *pci_dev)
diff --git a/hw/net/opencores_eth.c b/hw/net/opencores_eth.c
index 4a443049dd..3642046efa 100644
--- a/hw/net/opencores_eth.c
+++ b/hw/net/opencores_eth.c
@@ -472,16 +472,11 @@ static ssize_t open_eth_receive(NetClientState *nc,
     return size;
 }
 
-static void open_eth_cleanup(NetClientState *nc)
-{
-}
-
 static NetClientInfo net_open_eth_info = {
     .type = NET_CLIENT_OPTIONS_KIND_NIC,
     .size = sizeof(NICState),
     .can_receive = open_eth_can_receive,
     .receive = open_eth_receive,
-    .cleanup = open_eth_cleanup,
     .link_status_changed = open_eth_set_link_status,
 };
 
diff --git a/hw/net/pcnet-pci.c b/hw/net/pcnet-pci.c
index fb5f5d6237..b86bc0d79b 100644
--- a/hw/net/pcnet-pci.c
+++ b/hw/net/pcnet-pci.c
@@ -271,13 +271,6 @@ static void pci_physical_memory_read(void *dma_opaque, hwaddr addr,
     pci_dma_read(dma_opaque, addr, buf, len);
 }
 
-static void pci_pcnet_cleanup(NetClientState *nc)
-{
-    PCNetState *d = qemu_get_nic_opaque(nc);
-
-    pcnet_common_cleanup(d);
-}
-
 static void pci_pcnet_uninit(PCIDevice *dev)
 {
     PCIPCNetState *d = PCI_PCNET(dev);
@@ -294,7 +287,6 @@ static NetClientInfo net_pci_pcnet_info = {
     .can_receive = pcnet_can_receive,
     .receive = pcnet_receive,
     .link_status_changed = pcnet_set_link_status,
-    .cleanup = pci_pcnet_cleanup,
 };
 
 static int pci_pcnet_init(PCIDevice *pci_dev)
diff --git a/hw/net/pcnet.c b/hw/net/pcnet.c
index f409b9293d..8a1c8f17b0 100644
--- a/hw/net/pcnet.c
+++ b/hw/net/pcnet.c
@@ -1724,11 +1724,6 @@ const VMStateDescription vmstate_pcnet = {
     }
 };
 
-void pcnet_common_cleanup(PCNetState *d)
-{
-    d->nic = NULL;
-}
-
 int pcnet_common_init(DeviceState *dev, PCNetState *s, NetClientInfo *info)
 {
     int i;
diff --git a/hw/net/pcnet.h b/hw/net/pcnet.h
index f8e8a6f6ba..3f12fe3c13 100644
--- a/hw/net/pcnet.h
+++ b/hw/net/pcnet.h
@@ -63,7 +63,6 @@ uint32_t pcnet_bcr_readw(PCNetState *s, uint32_t rap);
 int pcnet_can_receive(NetClientState *nc);
 ssize_t pcnet_receive(NetClientState *nc, const uint8_t *buf, size_t size_);
 void pcnet_set_link_status(NetClientState *nc);
-void pcnet_common_cleanup(PCNetState *d);
 int pcnet_common_init(DeviceState *dev, PCNetState *s, NetClientInfo *info);
 extern const VMStateDescription vmstate_pcnet;
 #endif
diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c
index 5f0197c9df..6fa9e0aa15 100644
--- a/hw/net/rtl8139.c
+++ b/hw/net/rtl8139.c
@@ -3455,13 +3455,6 @@ static void rtl8139_timer(void *opaque)
     rtl8139_set_next_tctr_time(s, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL));
 }
 
-static void rtl8139_cleanup(NetClientState *nc)
-{
-    RTL8139State *s = qemu_get_nic_opaque(nc);
-
-    s->nic = NULL;
-}
-
 static void pci_rtl8139_uninit(PCIDevice *dev)
 {
     RTL8139State *s = RTL8139(dev);
@@ -3494,7 +3487,6 @@ static NetClientInfo net_rtl8139_info = {
     .size = sizeof(NICState),
     .can_receive = rtl8139_can_receive,
     .receive = rtl8139_receive,
-    .cleanup = rtl8139_cleanup,
     .link_status_changed = rtl8139_set_link_status,
 };
 
diff --git a/hw/net/smc91c111.c b/hw/net/smc91c111.c
index d1dca8f4e2..74e06e6c77 100644
--- a/hw/net/smc91c111.c
+++ b/hw/net/smc91c111.c
@@ -736,19 +736,11 @@ static const MemoryRegionOps smc91c111_mem_ops = {
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
-static void smc91c111_cleanup(NetClientState *nc)
-{
-    smc91c111_state *s = qemu_get_nic_opaque(nc);
-
-    s->nic = NULL;
-}
-
 static NetClientInfo net_smc91c111_info = {
     .type = NET_CLIENT_OPTIONS_KIND_NIC,
     .size = sizeof(NICState),
     .can_receive = smc91c111_can_receive,
     .receive = smc91c111_receive,
-    .cleanup = smc91c111_cleanup,
 };
 
 static int smc91c111_init1(SysBusDevice *sbd)
diff --git a/hw/net/spapr_llan.c b/hw/net/spapr_llan.c
index 2c8b038227..c255d925a7 100644
--- a/hw/net/spapr_llan.c
+++ b/hw/net/spapr_llan.c
@@ -187,19 +187,11 @@ static ssize_t spapr_vlan_receive(NetClientState *nc, const uint8_t *buf,
     return size;
 }
 
-static void spapr_vlan_cleanup(NetClientState *nc)
-{
-    VIOsPAPRVLANDevice *dev = qemu_get_nic_opaque(nc);
-
-    dev->nic = NULL;
-}
-
 static NetClientInfo net_spapr_vlan_info = {
     .type = NET_CLIENT_OPTIONS_KIND_NIC,
     .size = sizeof(NICState),
     .can_receive = spapr_vlan_can_receive,
     .receive = spapr_vlan_receive,
-    .cleanup = spapr_vlan_cleanup,
 };
 
 static void spapr_vlan_reset(VIOsPAPRDevice *sdev)
diff --git a/hw/net/stellaris_enet.c b/hw/net/stellaris_enet.c
index c07e5137c2..278a6545c3 100644
--- a/hw/net/stellaris_enet.c
+++ b/hw/net/stellaris_enet.c
@@ -451,19 +451,11 @@ static void stellaris_enet_reset(stellaris_enet_state *s)
     s->tx_fifo_len = 0;
 }
 
-static void stellaris_enet_cleanup(NetClientState *nc)
-{
-    stellaris_enet_state *s = qemu_get_nic_opaque(nc);
-
-    s->nic = NULL;
-}
-
 static NetClientInfo net_stellaris_enet_info = {
     .type = NET_CLIENT_OPTIONS_KIND_NIC,
     .size = sizeof(NICState),
     .can_receive = stellaris_enet_can_receive,
     .receive = stellaris_enet_receive,
-    .cleanup = stellaris_enet_cleanup,
 };
 
 static int stellaris_enet_init(SysBusDevice *sbd)
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index e574bd4322..45da34ad61 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -1522,19 +1522,11 @@ static int virtio_net_load_device(VirtIODevice *vdev, QEMUFile *f,
     return 0;
 }
 
-static void virtio_net_cleanup(NetClientState *nc)
-{
-    VirtIONet *n = qemu_get_nic_opaque(nc);
-
-    n->nic = NULL;
-}
-
 static NetClientInfo net_virtio_info = {
     .type = NET_CLIENT_OPTIONS_KIND_NIC,
     .size = sizeof(NICState),
     .can_receive = virtio_net_can_receive,
     .receive = virtio_net_receive,
-    .cleanup = virtio_net_cleanup,
     .link_status_changed = virtio_net_set_link_status,
     .query_rx_filter = virtio_net_query_rxfilter,
 };
diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
index 8eea58989b..a83d2a11ff 100644
--- a/hw/net/vmxnet3.c
+++ b/hw/net/vmxnet3.c
@@ -1912,12 +1912,6 @@ vmxnet3_receive(NetClientState *nc, const uint8_t *buf, size_t size)
     return bytes_indicated;
 }
 
-static void vmxnet3_cleanup(NetClientState *nc)
-{
-    VMXNET3State *s = qemu_get_nic_opaque(nc);
-    s->nic = NULL;
-}
-
 static void vmxnet3_set_link_status(NetClientState *nc)
 {
     VMXNET3State *s = qemu_get_nic_opaque(nc);
@@ -1937,7 +1931,6 @@ static NetClientInfo net_vmxnet3_info = {
         .size = sizeof(NICState),
         .can_receive = vmxnet3_can_receive,
         .receive = vmxnet3_receive,
-        .cleanup = vmxnet3_cleanup,
         .link_status_changed = vmxnet3_set_link_status,
 };
 
diff --git a/hw/net/xen_nic.c b/hw/net/xen_nic.c
index 63918ae1a0..19ecfc4ccf 100644
--- a/hw/net/xen_nic.c
+++ b/hw/net/xen_nic.c
@@ -370,11 +370,16 @@ static int net_connect(struct XenDevice *xendev)
                                           netdev->xendev.dom,
                                           netdev->tx_ring_ref,
                                           PROT_READ | PROT_WRITE);
+    if (!netdev->txs) {
+        return -1;
+    }
     netdev->rxs = xc_gnttab_map_grant_ref(netdev->xendev.gnttabdev,
                                           netdev->xendev.dom,
                                           netdev->rx_ring_ref,
                                           PROT_READ | PROT_WRITE);
-    if (!netdev->txs || !netdev->rxs) {
+    if (!netdev->rxs) {
+        xc_gnttab_munmap(netdev->xendev.gnttabdev, netdev->txs, 1);
+        netdev->txs = NULL;
         return -1;
     }
     BACK_RING_INIT(&netdev->tx_ring, netdev->txs, XC_PAGE_SIZE);
@@ -405,10 +410,6 @@ static void net_disconnect(struct XenDevice *xendev)
         xc_gnttab_munmap(netdev->xendev.gnttabdev, netdev->rxs, 1);
         netdev->rxs = NULL;
     }
-    if (netdev->nic) {
-        qemu_del_nic(netdev->nic);
-        netdev->nic = NULL;
-    }
 }
 
 static void net_event(struct XenDevice *xendev)
@@ -422,7 +423,12 @@ static int net_free(struct XenDevice *xendev)
 {
     struct XenNetDev *netdev = container_of(xendev, struct XenNetDev, xendev);
 
+    if (netdev->nic) {
+        qemu_del_nic(netdev->nic);
+        netdev->nic = NULL;
+    }
     g_free(netdev->mac);
+    netdev->mac = NULL;
     return 0;
 }
 
diff --git a/hw/net/xgmac.c b/hw/net/xgmac.c
index aeffcb58b7..b068f3a0d6 100644
--- a/hw/net/xgmac.c
+++ b/hw/net/xgmac.c
@@ -368,19 +368,11 @@ out:
     return ret;
 }
 
-static void eth_cleanup(NetClientState *nc)
-{
-    XgmacState *s = qemu_get_nic_opaque(nc);
-
-    s->nic = NULL;
-}
-
 static NetClientInfo net_xgmac_enet_info = {
     .type = NET_CLIENT_OPTIONS_KIND_NIC,
     .size = sizeof(NICState),
     .can_receive = eth_can_rx,
     .receive = eth_rx,
-    .cleanup = eth_cleanup,
 };
 
 static int xgmac_enet_init(SysBusDevice *sbd)
diff --git a/hw/net/xilinx_axienet.c b/hw/net/xilinx_axienet.c
index cd952d2514..21efedfc3e 100644
--- a/hw/net/xilinx_axienet.c
+++ b/hw/net/xilinx_axienet.c
@@ -857,14 +857,6 @@ static ssize_t eth_rx(NetClientState *nc, const uint8_t *buf, size_t size)
     return size;
 }
 
-static void eth_cleanup(NetClientState *nc)
-{
-    /* FIXME.  */
-    XilinxAXIEnet *s = qemu_get_nic_opaque(nc);
-    g_free(s->rxmem);
-    g_free(s);
-}
-
 static size_t
 xilinx_axienet_control_stream_push(StreamSlave *obj, uint8_t *buf, size_t len)
 {
@@ -936,7 +928,6 @@ static NetClientInfo net_xilinx_enet_info = {
     .size = sizeof(NICState),
     .can_receive = eth_can_rx,
     .receive = eth_rx,
-    .cleanup = eth_cleanup,
 };
 
 static void xilinx_enet_realize(DeviceState *dev, Error **errp)
diff --git a/hw/net/xilinx_ethlite.c b/hw/net/xilinx_ethlite.c
index 1b177b3dae..9536f64584 100644
--- a/hw/net/xilinx_ethlite.c
+++ b/hw/net/xilinx_ethlite.c
@@ -212,19 +212,11 @@ static void xilinx_ethlite_reset(DeviceState *dev)
     s->rxbuf = 0;
 }
 
-static void eth_cleanup(NetClientState *nc)
-{
-    struct xlx_ethlite *s = qemu_get_nic_opaque(nc);
-
-    s->nic = NULL;
-}
-
 static NetClientInfo net_xilinx_ethlite_info = {
     .type = NET_CLIENT_OPTIONS_KIND_NIC,
     .size = sizeof(NICState),
     .can_receive = eth_can_rx,
     .receive = eth_rx,
-    .cleanup = eth_cleanup,
 };
 
 static void xilinx_ethlite_realize(DeviceState *dev, Error **errp)
diff --git a/hw/pci-host/ppce500.c b/hw/pci-host/ppce500.c
index 1b4c0f0023..574f8b2efb 100644
--- a/hw/pci-host/ppce500.c
+++ b/hw/pci-host/ppce500.c
@@ -62,11 +62,19 @@
 #define PPCE500_PCI_NR_POBS     5
 #define PPCE500_PCI_NR_PIBS     3
 
+#define PIWAR_EN                0x80000000      /* Enable */
+#define PIWAR_PF                0x20000000      /* prefetch */
+#define PIWAR_TGI_LOCAL         0x00f00000      /* target - local memory */
+#define PIWAR_READ_SNOOP        0x00050000
+#define PIWAR_WRITE_SNOOP       0x00005000
+#define PIWAR_SZ_MASK           0x0000003f
+
 struct  pci_outbound {
     uint32_t potar;
     uint32_t potear;
     uint32_t powbar;
     uint32_t powar;
+    MemoryRegion mem;
 };
 
 struct pci_inbound {
@@ -74,6 +82,7 @@ struct pci_inbound {
     uint32_t piwbar;
     uint32_t piwbear;
     uint32_t piwar;
+    MemoryRegion mem;
 };
 
 #define TYPE_PPC_E500_PCI_HOST_BRIDGE "e500-pcihost"
@@ -91,10 +100,13 @@ struct PPCE500PCIState {
     uint32_t irq_num[PCI_NUM_PINS];
     uint32_t first_slot;
     uint32_t first_pin_irq;
+    AddressSpace bm_as;
+    MemoryRegion bm;
     /* mmio maps */
     MemoryRegion container;
     MemoryRegion iomem;
     MemoryRegion pio;
+    MemoryRegion busmem;
 };
 
 #define TYPE_PPC_E500_PCI_BRIDGE "e500-host-bridge"
@@ -181,6 +193,71 @@ static uint64_t pci_reg_read4(void *opaque, hwaddr addr,
     return value;
 }
 
+/* DMA mapping */
+static void e500_update_piw(PPCE500PCIState *pci, int idx)
+{
+    uint64_t tar = ((uint64_t)pci->pib[idx].pitar) << 12;
+    uint64_t wbar = ((uint64_t)pci->pib[idx].piwbar) << 12;
+    uint64_t war = pci->pib[idx].piwar;
+    uint64_t size = 2ULL << (war & PIWAR_SZ_MASK);
+    MemoryRegion *address_space_mem = get_system_memory();
+    MemoryRegion *mem = &pci->pib[idx].mem;
+    MemoryRegion *bm = &pci->bm;
+    char *name;
+
+    if (memory_region_is_mapped(mem)) {
+        /* Before we modify anything, unmap and destroy the region */
+        memory_region_del_subregion(bm, mem);
+        object_unparent(OBJECT(mem));
+    }
+
+    if (!(war & PIWAR_EN)) {
+        /* Not enabled, nothing to do */
+        return;
+    }
+
+    name = g_strdup_printf("PCI Inbound Window %d", idx);
+    memory_region_init_alias(mem, OBJECT(pci), name, address_space_mem, tar,
+                             size);
+    memory_region_add_subregion_overlap(bm, wbar, mem, -1);
+    g_free(name);
+
+    pci_debug("%s: Added window of size=%#lx from PCI=%#lx to CPU=%#lx\n",
+              __func__, size, wbar, tar);
+}
+
+/* BAR mapping */
+static void e500_update_pow(PPCE500PCIState *pci, int idx)
+{
+    uint64_t tar = ((uint64_t)pci->pob[idx].potar) << 12;
+    uint64_t wbar = ((uint64_t)pci->pob[idx].powbar) << 12;
+    uint64_t war = pci->pob[idx].powar;
+    uint64_t size = 2ULL << (war & PIWAR_SZ_MASK);
+    MemoryRegion *mem = &pci->pob[idx].mem;
+    MemoryRegion *address_space_mem = get_system_memory();
+    char *name;
+
+    if (memory_region_is_mapped(mem)) {
+        /* Before we modify anything, unmap and destroy the region */
+        memory_region_del_subregion(address_space_mem, mem);
+        object_unparent(OBJECT(mem));
+    }
+
+    if (!(war & PIWAR_EN)) {
+        /* Not enabled, nothing to do */
+        return;
+    }
+
+    name = g_strdup_printf("PCI Outbound Window %d", idx);
+    memory_region_init_alias(mem, OBJECT(pci), name, &pci->busmem, tar,
+                             size);
+    memory_region_add_subregion(address_space_mem, wbar, mem);
+    g_free(name);
+
+    pci_debug("%s: Added window of size=%#lx from CPU=%#lx to PCI=%#lx\n",
+              __func__, size, wbar, tar);
+}
+
 static void pci_reg_write4(void *opaque, hwaddr addr,
                            uint64_t value, unsigned size)
 {
@@ -199,18 +276,22 @@ static void pci_reg_write4(void *opaque, hwaddr addr,
     case PPCE500_PCI_OW3:
     case PPCE500_PCI_OW4:
         idx = (addr >> 5) & 0x7;
-        switch (addr & 0xC) {
+        switch (addr & 0x1F) {
         case PCI_POTAR:
             pci->pob[idx].potar = value;
+            e500_update_pow(pci, idx);
             break;
         case PCI_POTEAR:
             pci->pob[idx].potear = value;
+            e500_update_pow(pci, idx);
             break;
         case PCI_POWBAR:
             pci->pob[idx].powbar = value;
+            e500_update_pow(pci, idx);
             break;
         case PCI_POWAR:
             pci->pob[idx].powar = value;
+            e500_update_pow(pci, idx);
             break;
         default:
             break;
@@ -221,18 +302,22 @@ static void pci_reg_write4(void *opaque, hwaddr addr,
     case PPCE500_PCI_IW2:
     case PPCE500_PCI_IW1:
         idx = ((addr >> 5) & 0x3) - 1;
-        switch (addr & 0xC) {
+        switch (addr & 0x1F) {
         case PCI_PITAR:
             pci->pib[idx].pitar = value;
+            e500_update_piw(pci, idx);
             break;
         case PCI_PIWBAR:
             pci->pib[idx].piwbar = value;
+            e500_update_piw(pci, idx);
             break;
         case PCI_PIWBEAR:
             pci->pib[idx].piwbear = value;
+            e500_update_piw(pci, idx);
             break;
         case PCI_PIWAR:
             pci->pib[idx].piwar = value;
+            e500_update_piw(pci, idx);
             break;
         default:
             break;
@@ -349,13 +434,20 @@ static int e500_pcihost_bridge_initfn(PCIDevice *d)
     return 0;
 }
 
+static AddressSpace *e500_pcihost_set_iommu(PCIBus *bus, void *opaque,
+                                            int devfn)
+{
+    PPCE500PCIState *s = opaque;
+
+    return &s->bm_as;
+}
+
 static int e500_pcihost_initfn(SysBusDevice *dev)
 {
     PCIHostState *h;
     PPCE500PCIState *s;
     PCIBus *b;
     int i;
-    MemoryRegion *address_space_mem = get_system_memory();
 
     h = PCI_HOST_BRIDGE(dev);
     s = PPC_E500_PCI_HOST_BRIDGE(dev);
@@ -369,12 +461,22 @@ static int e500_pcihost_initfn(SysBusDevice *dev)
     }
 
     memory_region_init(&s->pio, OBJECT(s), "pci-pio", PCIE500_PCI_IOLEN);
+    memory_region_init(&s->busmem, OBJECT(s), "pci bus memory", UINT64_MAX);
+
+    /* PIO lives at the bottom of our bus space */
+    memory_region_add_subregion_overlap(&s->busmem, 0, &s->pio, -2);
 
     b = pci_register_bus(DEVICE(dev), NULL, mpc85xx_pci_set_irq,
-                         mpc85xx_pci_map_irq, s, address_space_mem,
-                         &s->pio, PCI_DEVFN(s->first_slot, 0), 4, TYPE_PCI_BUS);
+                         mpc85xx_pci_map_irq, s, &s->busmem, &s->pio,
+                         PCI_DEVFN(s->first_slot, 0), 4, TYPE_PCI_BUS);
     h->bus = b;
 
+    /* Set up PCI view of memory */
+    memory_region_init(&s->bm, OBJECT(s), "bm-e500", UINT64_MAX);
+    memory_region_add_subregion(&s->bm, 0x0, &s->busmem);
+    address_space_init(&s->bm_as, &s->bm, "pci-bm");
+    pci_setup_iommu(b, e500_pcihost_set_iommu, s);
+
     pci_create_simple(b, 0, "e500-host-bridge");
 
     memory_region_init(&s->container, OBJECT(h), "pci-container", PCIE500_ALL_SIZE);
@@ -388,7 +490,6 @@ static int e500_pcihost_initfn(SysBusDevice *dev)
     memory_region_add_subregion(&s->container, PCIE500_CFGDATA, &h->data_mem);
     memory_region_add_subregion(&s->container, PCIE500_REG_BASE, &s->iomem);
     sysbus_init_mmio(dev, &s->container);
-    sysbus_init_mmio(dev, &s->pio);
     pci_bus_set_route_irq_fn(b, e500_route_intx_pin_to_irq);
 
     return 0;
diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c
index 2832fc0da4..7e17d180c6 100644
--- a/hw/ppc/e500.c
+++ b/hw/ppc/e500.c
@@ -51,21 +51,16 @@
 #define RAM_SIZES_ALIGN            (64UL << 20)
 
 /* TODO: parameterize */
-#define MPC8544_CCSRBAR_BASE       0xE0000000ULL
 #define MPC8544_CCSRBAR_SIZE       0x00100000ULL
 #define MPC8544_MPIC_REGS_OFFSET   0x40000ULL
 #define MPC8544_MSI_REGS_OFFSET   0x41600ULL
 #define MPC8544_SERIAL0_REGS_OFFSET 0x4500ULL
 #define MPC8544_SERIAL1_REGS_OFFSET 0x4600ULL
 #define MPC8544_PCI_REGS_OFFSET    0x8000ULL
-#define MPC8544_PCI_REGS_BASE      (MPC8544_CCSRBAR_BASE + \
-                                    MPC8544_PCI_REGS_OFFSET)
 #define MPC8544_PCI_REGS_SIZE      0x1000ULL
-#define MPC8544_PCI_IO             0xE1000000ULL
 #define MPC8544_UTIL_OFFSET        0xe0000ULL
-#define MPC8544_SPIN_BASE          0xEF000000ULL
 #define MPC8XXX_GPIO_OFFSET        0x000FF000ULL
-#define MPC8XXX_GPIO_IRQ           43
+#define MPC8XXX_GPIO_IRQ           47
 
 struct boot_info
 {
@@ -293,12 +288,12 @@ static int ppce500_load_device_tree(MachineState *machine,
     int len;
     uint32_t pci_ranges[14] =
         {
-            0x2000000, 0x0, 0xc0000000,
-            0x0, 0xc0000000,
+            0x2000000, 0x0, params->pci_mmio_bus_base,
+            params->pci_mmio_base >> 32, params->pci_mmio_base,
             0x0, 0x20000000,
 
             0x1000000, 0x0, 0x0,
-            0x0, 0xe1000000,
+            params->pci_pio_base >> 32, params->pci_pio_base,
             0x0, 0x10000,
         };
     QemuOpts *machine_opts = qemu_get_machine_opts();
@@ -389,7 +384,7 @@ static int ppce500_load_device_tree(MachineState *machine,
         CPUState *cpu;
         PowerPCCPU *pcpu;
         char cpu_name[128];
-        uint64_t cpu_release_addr = MPC8544_SPIN_BASE + (i * 0x20);
+        uint64_t cpu_release_addr = params->spin_base + (i * 0x20);
 
         cpu = qemu_get_cpu(i);
         if (cpu == NULL) {
@@ -426,7 +421,7 @@ static int ppce500_load_device_tree(MachineState *machine,
 
     qemu_fdt_add_subnode(fdt, "/aliases");
     /* XXX These should go into their respective devices' code */
-    snprintf(soc, sizeof(soc), "/soc@%llx", MPC8544_CCSRBAR_BASE);
+    snprintf(soc, sizeof(soc), "/soc@%"PRIx64, params->ccsrbar_base);
     qemu_fdt_add_subnode(fdt, soc);
     qemu_fdt_setprop_string(fdt, soc, "device_type", "soc");
     qemu_fdt_setprop(fdt, soc, "compatible", compatible_sb,
@@ -434,7 +429,7 @@ static int ppce500_load_device_tree(MachineState *machine,
     qemu_fdt_setprop_cell(fdt, soc, "#address-cells", 1);
     qemu_fdt_setprop_cell(fdt, soc, "#size-cells", 1);
     qemu_fdt_setprop_cells(fdt, soc, "ranges", 0x0,
-                           MPC8544_CCSRBAR_BASE >> 32, MPC8544_CCSRBAR_BASE,
+                           params->ccsrbar_base >> 32, params->ccsrbar_base,
                            MPC8544_CCSRBAR_SIZE);
     /* XXX should contain a reasonable value */
     qemu_fdt_setprop_cell(fdt, soc, "bus-frequency", 0);
@@ -493,7 +488,8 @@ static int ppce500_load_device_tree(MachineState *machine,
     qemu_fdt_setprop_cell(fdt, msi, "phandle", msi_ph);
     qemu_fdt_setprop_cell(fdt, msi, "linux,phandle", msi_ph);
 
-    snprintf(pci, sizeof(pci), "/pci@%llx", MPC8544_PCI_REGS_BASE);
+    snprintf(pci, sizeof(pci), "/pci@%llx",
+             params->ccsrbar_base + MPC8544_PCI_REGS_OFFSET);
     qemu_fdt_add_subnode(fdt, pci);
     qemu_fdt_setprop_cell(fdt, pci, "cell-index", 0);
     qemu_fdt_setprop_string(fdt, pci, "compatible", "fsl,mpc8540-pci");
@@ -512,8 +508,10 @@ static int ppce500_load_device_tree(MachineState *machine,
     }
     qemu_fdt_setprop_cell(fdt, pci, "fsl,msi", msi_ph);
     qemu_fdt_setprop(fdt, pci, "ranges", pci_ranges, sizeof(pci_ranges));
-    qemu_fdt_setprop_cells(fdt, pci, "reg", MPC8544_PCI_REGS_BASE >> 32,
-                           MPC8544_PCI_REGS_BASE, 0, 0x1000);
+    qemu_fdt_setprop_cells(fdt, pci, "reg",
+                           (params->ccsrbar_base + MPC8544_PCI_REGS_OFFSET) >> 32,
+                           (params->ccsrbar_base + MPC8544_PCI_REGS_OFFSET),
+                           0, 0x1000);
     qemu_fdt_setprop_cell(fdt, pci, "clock-frequency", 66666666);
     qemu_fdt_setprop_cell(fdt, pci, "#interrupt-cells", 1);
     qemu_fdt_setprop_cell(fdt, pci, "#size-cells", 2);
@@ -841,7 +839,7 @@ void ppce500_init(MachineState *machine, PPCE500Params *params)
         irqs[i][OPENPIC_OUTPUT_INT] = input[PPCE500_INPUT_INT];
         irqs[i][OPENPIC_OUTPUT_CINT] = input[PPCE500_INPUT_CINT];
         env->spr_cb[SPR_BOOKE_PIR].default_value = cs->cpu_index = i;
-        env->mpic_iack = MPC8544_CCSRBAR_BASE +
+        env->mpic_iack = params->ccsrbar_base +
                          MPC8544_MPIC_REGS_OFFSET + 0xa0;
 
         ppc_booke_timers_init(cpu, 400000000, PPC_TIMER_E500);
@@ -875,7 +873,7 @@ void ppce500_init(MachineState *machine, PPCE500Params *params)
     qdev_init_nofail(dev);
     ccsr = CCSR(dev);
     ccsr_addr_space = &ccsr->ccsr_space;
-    memory_region_add_subregion(address_space_mem, MPC8544_CCSRBAR_BASE,
+    memory_region_add_subregion(address_space_mem, params->ccsrbar_base,
                                 ccsr_addr_space);
 
     mpic = ppce500_init_mpic(params, ccsr_addr_space, irqs);
@@ -917,8 +915,6 @@ void ppce500_init(MachineState *machine, PPCE500Params *params)
     if (!pci_bus)
         printf("couldn't create PCI controller!\n");
 
-    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 1, MPC8544_PCI_IO);
-
     if (pci_bus) {
         /* Register network interfaces. */
         for (i = 0; i < nb_nics; i++) {
@@ -927,7 +923,7 @@ void ppce500_init(MachineState *machine, PPCE500Params *params)
     }
 
     /* Register spinning region */
-    sysbus_create_simple("e500-spin", MPC8544_SPIN_BASE, NULL);
+    sysbus_create_simple("e500-spin", params->spin_base, NULL);
 
     if (cur_base < (32 * 1024 * 1024)) {
         /* u-boot occupies memory up to 32MB, so load blobs above */
diff --git a/hw/ppc/e500.h b/hw/ppc/e500.h
index 9f61ab2b1c..ef224ea5e6 100644
--- a/hw/ppc/e500.h
+++ b/hw/ppc/e500.h
@@ -17,6 +17,11 @@ typedef struct PPCE500Params {
     hwaddr platform_bus_size;
     int platform_bus_first_irq;
     int platform_bus_num_irqs;
+    hwaddr ccsrbar_base;
+    hwaddr pci_pio_base;
+    hwaddr pci_mmio_base;
+    hwaddr pci_mmio_bus_base;
+    hwaddr spin_base;
 } PPCE500Params;
 
 void ppce500_init(MachineState *machine, PPCE500Params *params);
diff --git a/hw/ppc/e500plat.c b/hw/ppc/e500plat.c
index d50ae000ee..14b14eaa7d 100644
--- a/hw/ppc/e500plat.c
+++ b/hw/ppc/e500plat.c
@@ -41,6 +41,11 @@ static void e500plat_init(MachineState *machine)
         .platform_bus_size = (128ULL * 1024 * 1024),
         .platform_bus_first_irq = 5,
         .platform_bus_num_irqs = 10,
+        .ccsrbar_base = 0xFE0000000ULL,
+        .pci_pio_base = 0xFE1000000ULL,
+        .pci_mmio_base = 0xC00000000ULL,
+        .pci_mmio_bus_base = 0xE0000000ULL,
+        .spin_base = 0xFEF000000ULL,
     };
 
     /* Older KVM versions don't support EPR which breaks guests when we announce
diff --git a/hw/ppc/mpc8544ds.c b/hw/ppc/mpc8544ds.c
index b99f74af75..3a3b141e43 100644
--- a/hw/ppc/mpc8544ds.c
+++ b/hw/ppc/mpc8544ds.c
@@ -15,6 +15,7 @@
 #include "hw/boards.h"
 #include "sysemu/device_tree.h"
 #include "hw/ppc/openpic.h"
+#include "qemu/error-report.h"
 
 static void mpc8544ds_fixup_devtree(PPCE500Params *params, void *fdt)
 {
@@ -33,8 +34,18 @@ static void mpc8544ds_init(MachineState *machine)
         .pci_nr_slots = 2,
         .fixup_devtree = mpc8544ds_fixup_devtree,
         .mpic_version = OPENPIC_MODEL_FSL_MPIC_20,
+        .ccsrbar_base = 0xE0000000ULL,
+        .pci_mmio_base = 0xC0000000ULL,
+        .pci_mmio_bus_base = 0xC0000000ULL,
+        .pci_pio_base = 0xE1000000ULL,
+        .spin_base = 0xEF000000ULL,
     };
 
+    if (machine->ram_size > 0xc0000000) {
+        error_report("The MPC8544DS board only supports up to 3GB of RAM");
+        exit(1);
+    }
+
     ppce500_init(machine, &params);
 }
 
diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c
index bec82cd7a9..5ce565d5ec 100644
--- a/hw/ppc/ppc.c
+++ b/hw/ppc/ppc.c
@@ -844,7 +844,7 @@ static void timebase_pre_save(void *opaque)
         return;
     }
 
-    tb->time_of_the_day_ns = get_clock_realtime();
+    tb->time_of_the_day_ns = qemu_clock_get_ns(QEMU_CLOCK_HOST);
     /*
      * tb_offset is only expected to be changed by migration so
      * there is no need to update it from KVM here
@@ -873,7 +873,7 @@ static int timebase_post_load(void *opaque, int version_id)
      * We try to adjust timebase by downtime if host clocks are not
      * too much out of sync (1 second for now).
      */
-    host_ns = get_clock_realtime();
+    host_ns = qemu_clock_get_ns(QEMU_CLOCK_HOST);
     ns_diff = MAX(0, host_ns - tb_remote->time_of_the_day_ns);
     migration_duration_ns = MIN(NSEC_PER_SEC, ns_diff);
     migration_duration_tb = muldiv64(migration_duration_ns, freq, NSEC_PER_SEC);
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 53c4116ed3..b560459e83 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -819,9 +819,16 @@ static void emulate_spapr_hypercall(PowerPCCPU *cpu)
     }
 }
 
+#define HPTE(_table, _i)   (void *)(((uint64_t *)(_table)) + ((_i) * 2))
+#define HPTE_VALID(_hpte)  (tswap64(*((uint64_t *)(_hpte))) & HPTE64_V_VALID)
+#define HPTE_DIRTY(_hpte)  (tswap64(*((uint64_t *)(_hpte))) & HPTE64_V_HPTE_DIRTY)
+#define CLEAN_HPTE(_hpte)  ((*(uint64_t *)(_hpte)) &= tswap64(~HPTE64_V_HPTE_DIRTY))
+#define DIRTY_HPTE(_hpte)  ((*(uint64_t *)(_hpte)) |= tswap64(HPTE64_V_HPTE_DIRTY))
+
 static void spapr_reset_htab(sPAPREnvironment *spapr)
 {
     long shift;
+    int index;
 
     /* allocate hash page table.  For now we always make this 16mb,
      * later we should probably make it scale to the size of guest
@@ -833,6 +840,11 @@ static void spapr_reset_htab(sPAPREnvironment *spapr)
         /* Kernel handles htab, we don't need to allocate one */
         spapr->htab_shift = shift;
         kvmppc_kern_htab = true;
+
+        /* Tell readers to update their file descriptor */
+        if (spapr->htab_fd >= 0) {
+            spapr->htab_fd_stale = true;
+        }
     } else {
         if (!spapr->htab) {
             /* Allocate an htab if we don't yet have one */
@@ -841,6 +853,10 @@ static void spapr_reset_htab(sPAPREnvironment *spapr)
 
         /* And clear it */
         memset(spapr->htab, 0, HTAB_SIZE(spapr));
+
+        for (index = 0; index < HTAB_SIZE(spapr) / HASH_PTE_SIZE_64; index++) {
+            DIRTY_HPTE(HPTE(spapr->htab, index));
+        }
     }
 
     /* Update the RMA size if necessary */
@@ -867,6 +883,28 @@ static int find_unknown_sysbus_device(SysBusDevice *sbdev, void *opaque)
     return 0;
 }
 
+/*
+ * A guest reset will cause spapr->htab_fd to become stale if being used.
+ * Reopen the file descriptor to make sure the whole HTAB is properly read.
+ */
+static int spapr_check_htab_fd(sPAPREnvironment *spapr)
+{
+    int rc = 0;
+
+    if (spapr->htab_fd_stale) {
+        close(spapr->htab_fd);
+        spapr->htab_fd = kvmppc_get_htab_fd(false);
+        if (spapr->htab_fd < 0) {
+            error_report("Unable to open fd for reading hash table from KVM: "
+                    "%s", strerror(errno));
+            rc = -1;
+        }
+        spapr->htab_fd_stale = false;
+    }
+
+    return rc;
+}
+
 static void ppc_spapr_reset(void)
 {
     PowerPCCPU *first_ppc_cpu;
@@ -986,11 +1024,6 @@ static const VMStateDescription vmstate_spapr = {
     },
 };
 
-#define HPTE(_table, _i)   (void *)(((uint64_t *)(_table)) + ((_i) * 2))
-#define HPTE_VALID(_hpte)  (tswap64(*((uint64_t *)(_hpte))) & HPTE64_V_VALID)
-#define HPTE_DIRTY(_hpte)  (tswap64(*((uint64_t *)(_hpte))) & HPTE64_V_HPTE_DIRTY)
-#define CLEAN_HPTE(_hpte)  ((*(uint64_t *)(_hpte)) &= tswap64(~HPTE64_V_HPTE_DIRTY))
-
 static int htab_save_setup(QEMUFile *f, void *opaque)
 {
     sPAPREnvironment *spapr = opaque;
@@ -1005,6 +1038,7 @@ static int htab_save_setup(QEMUFile *f, void *opaque)
         assert(kvm_enabled());
 
         spapr->htab_fd = kvmppc_get_htab_fd(false);
+        spapr->htab_fd_stale = false;
         if (spapr->htab_fd < 0) {
             fprintf(stderr, "Unable to open fd for reading hash table from KVM: %s\n",
                     strerror(errno));
@@ -1037,7 +1071,7 @@ static void htab_save_first_pass(QEMUFile *f, sPAPREnvironment *spapr,
 
         /* Consume valid HPTEs */
         chunkstart = index;
-        while ((index < htabslots)
+        while ((index < htabslots) && (index - chunkstart < USHRT_MAX)
                && HPTE_VALID(HPTE(spapr->htab, index))) {
             index++;
             CLEAN_HPTE(HPTE(spapr->htab, index));
@@ -1089,7 +1123,7 @@ static int htab_save_later_pass(QEMUFile *f, sPAPREnvironment *spapr,
 
         chunkstart = index;
         /* Consume valid dirty HPTEs */
-        while ((index < htabslots)
+        while ((index < htabslots) && (index - chunkstart < USHRT_MAX)
                && HPTE_DIRTY(HPTE(spapr->htab, index))
                && HPTE_VALID(HPTE(spapr->htab, index))) {
             CLEAN_HPTE(HPTE(spapr->htab, index));
@@ -1099,7 +1133,7 @@ static int htab_save_later_pass(QEMUFile *f, sPAPREnvironment *spapr,
 
         invalidstart = index;
         /* Consume invalid dirty HPTEs */
-        while ((index < htabslots)
+        while ((index < htabslots) && (index - invalidstart < USHRT_MAX)
                && HPTE_DIRTY(HPTE(spapr->htab, index))
                && !HPTE_VALID(HPTE(spapr->htab, index))) {
             CLEAN_HPTE(HPTE(spapr->htab, index));
@@ -1157,6 +1191,11 @@ static int htab_save_iterate(QEMUFile *f, void *opaque)
     if (!spapr->htab) {
         assert(kvm_enabled());
 
+        rc = spapr_check_htab_fd(spapr);
+        if (rc < 0) {
+            return rc;
+        }
+
         rc = kvmppc_save_htab(f, spapr->htab_fd,
                               MAX_KVM_BUF_SIZE, MAX_ITERATION_NS);
         if (rc < 0) {
@@ -1188,6 +1227,11 @@ static int htab_save_complete(QEMUFile *f, void *opaque)
 
         assert(kvm_enabled());
 
+        rc = spapr_check_htab_fd(spapr);
+        if (rc < 0) {
+            return rc;
+        }
+
         rc = kvmppc_save_htab(f, spapr->htab_fd, MAX_KVM_BUF_SIZE, -1);
         if (rc < 0) {
             return rc;
@@ -1438,7 +1482,7 @@ static void ppc_spapr_init(MachineState *machine)
     }
     if (spapr->rtas_size > RTAS_MAX_SIZE) {
         hw_error("RTAS too big ! 0x%zx bytes (max is 0x%x)\n",
-                 spapr->rtas_size, RTAS_MAX_SIZE);
+                 (size_t)spapr->rtas_size, RTAS_MAX_SIZE);
         exit(1);
     }
     g_free(filename);
diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index 6c91d8edd8..da474740c0 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -173,9 +173,9 @@ sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn,
     return tcet;
 }
 
-static void spapr_tce_table_finalize(Object *obj)
+static void spapr_tce_table_unrealize(DeviceState *dev, Error **errp)
 {
-    sPAPRTCETable *tcet = SPAPR_TCE_TABLE(obj);
+    sPAPRTCETable *tcet = SPAPR_TCE_TABLE(dev);
 
     QLIST_REMOVE(tcet, list);
 
@@ -420,6 +420,7 @@ static void spapr_tce_table_class_init(ObjectClass *klass, void *data)
     DeviceClass *dc = DEVICE_CLASS(klass);
     dc->init = spapr_tce_table_realize;
     dc->reset = spapr_tce_reset;
+    dc->unrealize = spapr_tce_table_unrealize;
 
     QLIST_INIT(&spapr_tce_tables);
 
@@ -435,7 +436,6 @@ static TypeInfo spapr_tce_table_info = {
     .parent = TYPE_DEVICE,
     .instance_size = sizeof(sPAPRTCETable),
     .class_init = spapr_tce_table_class_init,
-    .instance_finalize = spapr_tce_table_finalize,
 };
 
 static void register_types(void)
diff --git a/hw/s390x/Makefile.objs b/hw/s390x/Makefile.objs
index 1ba6c3ab70..27cd75a932 100644
--- a/hw/s390x/Makefile.objs
+++ b/hw/s390x/Makefile.objs
@@ -8,3 +8,4 @@ obj-y += ipl.o
 obj-y += css.o
 obj-y += s390-virtio-ccw.o
 obj-y += virtio-ccw.o
+obj-y += s390-pci-bus.o s390-pci-inst.o
diff --git a/hw/s390x/css.c b/hw/s390x/css.c
index b67c039a70..d0c5ddeece 100644
--- a/hw/s390x/css.c
+++ b/hw/s390x/css.c
@@ -1299,6 +1299,11 @@ void css_generate_chp_crws(uint8_t cssid, uint8_t chpid)
     /* TODO */
 }
 
+void css_generate_css_crws(uint8_t cssid)
+{
+    css_queue_crw(CRW_RSC_CSS, 0, 0, cssid);
+}
+
 int css_enable_mcsse(void)
 {
     trace_css_enable_facility("mcsse");
diff --git a/hw/s390x/css.h b/hw/s390x/css.h
index 33104ac58e..7e53148700 100644
--- a/hw/s390x/css.h
+++ b/hw/s390x/css.h
@@ -101,6 +101,7 @@ void css_queue_crw(uint8_t rsc, uint8_t erc, int chain, uint16_t rsid);
 void css_generate_sch_crws(uint8_t cssid, uint8_t ssid, uint16_t schid,
                            int hotplugged, int add);
 void css_generate_chp_crws(uint8_t cssid, uint8_t chpid);
+void css_generate_css_crws(uint8_t cssid);
 void css_adapter_interrupt(uint8_t isc);
 
 #define CSS_IO_ADAPTER_VIRTIO 1
diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
new file mode 100644
index 0000000000..1201b8d57c
--- /dev/null
+++ b/hw/s390x/s390-pci-bus.c
@@ -0,0 +1,591 @@
+/*
+ * s390 PCI BUS
+ *
+ * Copyright 2014 IBM Corp.
+ * Author(s): Frank Blaschka <frank.blaschka@de.ibm.com>
+ *            Hong Bo Li <lihbbj@cn.ibm.com>
+ *            Yi Min Zhao <zyimin@cn.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#include "s390-pci-bus.h"
+#include <hw/pci/pci_bus.h>
+#include <hw/pci/msi.h>
+#include <qemu/error-report.h>
+
+/* #define DEBUG_S390PCI_BUS */
+#ifdef DEBUG_S390PCI_BUS
+#define DPRINTF(fmt, ...) \
+    do { fprintf(stderr, "S390pci-bus: " fmt, ## __VA_ARGS__); } while (0)
+#else
+#define DPRINTF(fmt, ...) \
+    do { } while (0)
+#endif
+
+int chsc_sei_nt2_get_event(void *res)
+{
+    ChscSeiNt2Res *nt2_res = (ChscSeiNt2Res *)res;
+    PciCcdfAvail *accdf;
+    PciCcdfErr *eccdf;
+    int rc = 1;
+    SeiContainer *sei_cont;
+    S390pciState *s = S390_PCI_HOST_BRIDGE(
+        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
+
+    if (!s) {
+        return rc;
+    }
+
+    sei_cont = QTAILQ_FIRST(&s->pending_sei);
+    if (sei_cont) {
+        QTAILQ_REMOVE(&s->pending_sei, sei_cont, link);
+        nt2_res->nt = 2;
+        nt2_res->cc = sei_cont->cc;
+        switch (sei_cont->cc) {
+        case 1: /* error event */
+            eccdf = (PciCcdfErr *)nt2_res->ccdf;
+            eccdf->fid = cpu_to_be32(sei_cont->fid);
+            eccdf->fh = cpu_to_be32(sei_cont->fh);
+            eccdf->e = cpu_to_be32(sei_cont->e);
+            eccdf->faddr = cpu_to_be64(sei_cont->faddr);
+            eccdf->pec = cpu_to_be16(sei_cont->pec);
+            break;
+        case 2: /* availability event */
+            accdf = (PciCcdfAvail *)nt2_res->ccdf;
+            accdf->fid = cpu_to_be32(sei_cont->fid);
+            accdf->fh = cpu_to_be32(sei_cont->fh);
+            accdf->pec = cpu_to_be16(sei_cont->pec);
+            break;
+        default:
+            abort();
+        }
+        g_free(sei_cont);
+        rc = 0;
+    }
+
+    return rc;
+}
+
+int chsc_sei_nt2_have_event(void)
+{
+    S390pciState *s = S390_PCI_HOST_BRIDGE(
+        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
+
+    if (!s) {
+        return 0;
+    }
+
+    return !QTAILQ_EMPTY(&s->pending_sei);
+}
+
+S390PCIBusDevice *s390_pci_find_dev_by_fid(uint32_t fid)
+{
+    S390PCIBusDevice *pbdev;
+    int i;
+    S390pciState *s = S390_PCI_HOST_BRIDGE(
+        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
+
+    if (!s) {
+        return NULL;
+    }
+
+    for (i = 0; i < PCI_SLOT_MAX; i++) {
+        pbdev = &s->pbdev[i];
+        if ((pbdev->fh != 0) && (pbdev->fid == fid)) {
+            return pbdev;
+        }
+    }
+
+    return NULL;
+}
+
+void s390_pci_sclp_configure(int configure, SCCB *sccb)
+{
+    PciCfgSccb *psccb = (PciCfgSccb *)sccb;
+    S390PCIBusDevice *pbdev = s390_pci_find_dev_by_fid(be32_to_cpu(psccb->aid));
+    uint16_t rc;
+
+    if (pbdev) {
+        if ((configure == 1 && pbdev->configured == true) ||
+            (configure == 0 && pbdev->configured == false)) {
+            rc = SCLP_RC_NO_ACTION_REQUIRED;
+        } else {
+            pbdev->configured = !pbdev->configured;
+            rc = SCLP_RC_NORMAL_COMPLETION;
+        }
+    } else {
+        DPRINTF("sclp config %d no dev found\n", configure);
+        rc = SCLP_RC_ADAPTER_ID_NOT_RECOGNIZED;
+    }
+
+    psccb->header.response_code = cpu_to_be16(rc);
+    return;
+}
+
+static uint32_t s390_pci_get_pfid(PCIDevice *pdev)
+{
+    return PCI_SLOT(pdev->devfn);
+}
+
+static uint32_t s390_pci_get_pfh(PCIDevice *pdev)
+{
+    return PCI_SLOT(pdev->devfn) | FH_VIRT;
+}
+
+S390PCIBusDevice *s390_pci_find_dev_by_idx(uint32_t idx)
+{
+    S390PCIBusDevice *pbdev;
+    int i;
+    int j = 0;
+    S390pciState *s = S390_PCI_HOST_BRIDGE(
+        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
+
+    if (!s) {
+        return NULL;
+    }
+
+    for (i = 0; i < PCI_SLOT_MAX; i++) {
+        pbdev = &s->pbdev[i];
+
+        if (pbdev->fh == 0) {
+            continue;
+        }
+
+        if (j == idx) {
+            return pbdev;
+        }
+        j++;
+    }
+
+    return NULL;
+}
+
+S390PCIBusDevice *s390_pci_find_dev_by_fh(uint32_t fh)
+{
+    S390PCIBusDevice *pbdev;
+    int i;
+    S390pciState *s = S390_PCI_HOST_BRIDGE(
+        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
+
+    if (!s) {
+        return NULL;
+    }
+
+    for (i = 0; i < PCI_SLOT_MAX; i++) {
+        pbdev = &s->pbdev[i];
+        if (pbdev->fh == fh) {
+            return pbdev;
+        }
+    }
+
+    return NULL;
+}
+
+static void s390_pci_generate_event(uint8_t cc, uint16_t pec, uint32_t fh,
+                                    uint32_t fid, uint64_t faddr, uint32_t e)
+{
+    SeiContainer *sei_cont = g_malloc0(sizeof(SeiContainer));
+    S390pciState *s = S390_PCI_HOST_BRIDGE(
+        object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
+
+    if (!s) {
+        return;
+    }
+
+    sei_cont->fh = fh;
+    sei_cont->fid = fid;
+    sei_cont->cc = cc;
+    sei_cont->pec = pec;
+    sei_cont->faddr = faddr;
+    sei_cont->e = e;
+
+    QTAILQ_INSERT_TAIL(&s->pending_sei, sei_cont, link);
+    css_generate_css_crws(0);
+}
+
+static void s390_pci_generate_plug_event(uint16_t pec, uint32_t fh,
+                                         uint32_t fid)
+{
+    s390_pci_generate_event(2, pec, fh, fid, 0, 0);
+}
+
+static void s390_pci_generate_error_event(uint16_t pec, uint32_t fh,
+                                          uint32_t fid, uint64_t faddr,
+                                          uint32_t e)
+{
+    s390_pci_generate_event(1, pec, fh, fid, faddr, e);
+}
+
+static void s390_pci_set_irq(void *opaque, int irq, int level)
+{
+    /* nothing to do */
+}
+
+static int s390_pci_map_irq(PCIDevice *pci_dev, int irq_num)
+{
+    /* nothing to do */
+    return 0;
+}
+
+static uint64_t s390_pci_get_table_origin(uint64_t iota)
+{
+    return iota & ~ZPCI_IOTA_RTTO_FLAG;
+}
+
+static unsigned int calc_rtx(dma_addr_t ptr)
+{
+    return ((unsigned long) ptr >> ZPCI_RT_SHIFT) & ZPCI_INDEX_MASK;
+}
+
+static unsigned int calc_sx(dma_addr_t ptr)
+{
+    return ((unsigned long) ptr >> ZPCI_ST_SHIFT) & ZPCI_INDEX_MASK;
+}
+
+static unsigned int calc_px(dma_addr_t ptr)
+{
+    return ((unsigned long) ptr >> PAGE_SHIFT) & ZPCI_PT_MASK;
+}
+
+static uint64_t get_rt_sto(uint64_t entry)
+{
+    return ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_RTX)
+                ? (entry & ZPCI_RTE_ADDR_MASK)
+                : 0;
+}
+
+static uint64_t get_st_pto(uint64_t entry)
+{
+    return ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_SX)
+            ? (entry & ZPCI_STE_ADDR_MASK)
+            : 0;
+}
+
+static uint64_t s390_guest_io_table_walk(uint64_t guest_iota,
+                                  uint64_t guest_dma_address)
+{
+    uint64_t sto_a, pto_a, px_a;
+    uint64_t sto, pto, pte;
+    uint32_t rtx, sx, px;
+
+    rtx = calc_rtx(guest_dma_address);
+    sx = calc_sx(guest_dma_address);
+    px = calc_px(guest_dma_address);
+
+    sto_a = guest_iota + rtx * sizeof(uint64_t);
+    sto = ldq_phys(&address_space_memory, sto_a);
+    sto = get_rt_sto(sto);
+    if (!sto) {
+        pte = 0;
+        goto out;
+    }
+
+    pto_a = sto + sx * sizeof(uint64_t);
+    pto = ldq_phys(&address_space_memory, pto_a);
+    pto = get_st_pto(pto);
+    if (!pto) {
+        pte = 0;
+        goto out;
+    }
+
+    px_a = pto + px * sizeof(uint64_t);
+    pte = ldq_phys(&address_space_memory, px_a);
+
+out:
+    return pte;
+}
+
+static IOMMUTLBEntry s390_translate_iommu(MemoryRegion *iommu, hwaddr addr,
+                                          bool is_write)
+{
+    uint64_t pte;
+    uint32_t flags;
+    S390PCIBusDevice *pbdev = container_of(iommu, S390PCIBusDevice, mr);
+    S390pciState *s = S390_PCI_HOST_BRIDGE(pci_device_root_bus(pbdev->pdev)
+                                           ->qbus.parent);
+    IOMMUTLBEntry ret = {
+        .target_as = &address_space_memory,
+        .iova = 0,
+        .translated_addr = 0,
+        .addr_mask = ~(hwaddr)0,
+        .perm = IOMMU_NONE,
+    };
+
+    DPRINTF("iommu trans addr 0x%" PRIx64 "\n", addr);
+
+    /* s390 does not have an APIC mapped to main storage so we use
+     * a separate AddressSpace only for msix notifications
+     */
+    if (addr == ZPCI_MSI_ADDR) {
+        ret.target_as = &s->msix_notify_as;
+        ret.iova = addr;
+        ret.translated_addr = addr;
+        ret.addr_mask = 0xfff;
+        ret.perm = IOMMU_RW;
+        return ret;
+    }
+
+    if (!pbdev->g_iota) {
+        pbdev->error_state = true;
+        pbdev->lgstg_blocked = true;
+        s390_pci_generate_error_event(ERR_EVENT_INVALAS, pbdev->fh, pbdev->fid,
+                                      addr, 0);
+        return ret;
+    }
+
+    if (addr < pbdev->pba || addr > pbdev->pal) {
+        pbdev->error_state = true;
+        pbdev->lgstg_blocked = true;
+        s390_pci_generate_error_event(ERR_EVENT_OORANGE, pbdev->fh, pbdev->fid,
+                                      addr, 0);
+        return ret;
+    }
+
+    pte = s390_guest_io_table_walk(s390_pci_get_table_origin(pbdev->g_iota),
+                                   addr);
+
+    if (!pte) {
+        pbdev->error_state = true;
+        pbdev->lgstg_blocked = true;
+        s390_pci_generate_error_event(ERR_EVENT_SERR, pbdev->fh, pbdev->fid,
+                                      addr, ERR_EVENT_Q_BIT);
+        return ret;
+    }
+
+    flags = pte & ZPCI_PTE_FLAG_MASK;
+    ret.iova = addr;
+    ret.translated_addr = pte & ZPCI_PTE_ADDR_MASK;
+    ret.addr_mask = 0xfff;
+
+    if (flags & ZPCI_PTE_INVALID) {
+        ret.perm = IOMMU_NONE;
+    } else {
+        ret.perm = IOMMU_RW;
+    }
+
+    return ret;
+}
+
+static const MemoryRegionIOMMUOps s390_iommu_ops = {
+    .translate = s390_translate_iommu,
+};
+
+static AddressSpace *s390_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn)
+{
+    S390pciState *s = opaque;
+
+    return &s->pbdev[PCI_SLOT(devfn)].as;
+}
+
+static uint8_t set_ind_atomic(uint64_t ind_loc, uint8_t to_be_set)
+{
+    uint8_t ind_old, ind_new;
+    hwaddr len = 1;
+    uint8_t *ind_addr;
+
+    ind_addr = cpu_physical_memory_map(ind_loc, &len, 1);
+    if (!ind_addr) {
+        s390_pci_generate_error_event(ERR_EVENT_AIRERR, 0, 0, 0, 0);
+        return -1;
+    }
+    do {
+        ind_old = *ind_addr;
+        ind_new = ind_old | to_be_set;
+    } while (atomic_cmpxchg(ind_addr, ind_old, ind_new) != ind_old);
+    cpu_physical_memory_unmap(ind_addr, len, 1, len);
+
+    return ind_old;
+}
+
+static void s390_msi_ctrl_write(void *opaque, hwaddr addr, uint64_t data,
+                                unsigned int size)
+{
+    S390PCIBusDevice *pbdev;
+    uint32_t io_int_word;
+    uint32_t fid = data >> ZPCI_MSI_VEC_BITS;
+    uint32_t vec = data & ZPCI_MSI_VEC_MASK;
+    uint64_t ind_bit;
+    uint32_t sum_bit;
+    uint32_t e = 0;
+
+    DPRINTF("write_msix data 0x%" PRIx64 " fid %d vec 0x%x\n", data, fid, vec);
+
+    pbdev = s390_pci_find_dev_by_fid(fid);
+    if (!pbdev) {
+        e |= (vec << ERR_EVENT_MVN_OFFSET);
+        s390_pci_generate_error_event(ERR_EVENT_NOMSI, 0, fid, addr, e);
+        return;
+    }
+
+    ind_bit = pbdev->routes.adapter.ind_offset;
+    sum_bit = pbdev->routes.adapter.summary_offset;
+
+    set_ind_atomic(pbdev->routes.adapter.ind_addr + (ind_bit + vec) / 8,
+                   0x80 >> ((ind_bit + vec) % 8));
+    if (!set_ind_atomic(pbdev->routes.adapter.summary_addr + sum_bit / 8,
+                                       0x80 >> (sum_bit % 8))) {
+        io_int_word = (pbdev->isc << 27) | IO_INT_WORD_AI;
+        s390_io_interrupt(0, 0, 0, io_int_word);
+    }
+
+    return;
+}
+
+static uint64_t s390_msi_ctrl_read(void *opaque, hwaddr addr, unsigned size)
+{
+    return 0xffffffff;
+}
+
+static const MemoryRegionOps s390_msi_ctrl_ops = {
+    .write = s390_msi_ctrl_write,
+    .read = s390_msi_ctrl_read,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+static void s390_pcihost_init_as(S390pciState *s)
+{
+    int i;
+
+    for (i = 0; i < PCI_SLOT_MAX; i++) {
+        memory_region_init_iommu(&s->pbdev[i].mr, OBJECT(s),
+                                 &s390_iommu_ops, "iommu-s390", UINT64_MAX);
+        address_space_init(&s->pbdev[i].as, &s->pbdev[i].mr, "iommu-pci");
+    }
+
+    memory_region_init_io(&s->msix_notify_mr, OBJECT(s),
+                          &s390_msi_ctrl_ops, s, "msix-s390", UINT64_MAX);
+    address_space_init(&s->msix_notify_as, &s->msix_notify_mr, "msix-pci");
+}
+
+static int s390_pcihost_init(SysBusDevice *dev)
+{
+    PCIBus *b;
+    BusState *bus;
+    PCIHostState *phb = PCI_HOST_BRIDGE(dev);
+    S390pciState *s = S390_PCI_HOST_BRIDGE(dev);
+
+    DPRINTF("host_init\n");
+
+    b = pci_register_bus(DEVICE(dev), NULL,
+                         s390_pci_set_irq, s390_pci_map_irq, NULL,
+                         get_system_memory(), get_system_io(), 0, 64,
+                         TYPE_PCI_BUS);
+    s390_pcihost_init_as(s);
+    pci_setup_iommu(b, s390_pci_dma_iommu, s);
+
+    bus = BUS(b);
+    qbus_set_hotplug_handler(bus, DEVICE(dev), NULL);
+    phb->bus = b;
+    QTAILQ_INIT(&s->pending_sei);
+    return 0;
+}
+
+static int s390_pcihost_setup_msix(S390PCIBusDevice *pbdev)
+{
+    uint8_t pos;
+    uint16_t ctrl;
+    uint32_t table, pba;
+
+    pos = pci_find_capability(pbdev->pdev, PCI_CAP_ID_MSIX);
+    if (!pos) {
+        pbdev->msix.available = false;
+        return 0;
+    }
+
+    ctrl = pci_host_config_read_common(pbdev->pdev, pos + PCI_CAP_FLAGS,
+             pci_config_size(pbdev->pdev), sizeof(ctrl));
+    table = pci_host_config_read_common(pbdev->pdev, pos + PCI_MSIX_TABLE,
+             pci_config_size(pbdev->pdev), sizeof(table));
+    pba = pci_host_config_read_common(pbdev->pdev, pos + PCI_MSIX_PBA,
+             pci_config_size(pbdev->pdev), sizeof(pba));
+
+    pbdev->msix.table_bar = table & PCI_MSIX_FLAGS_BIRMASK;
+    pbdev->msix.table_offset = table & ~PCI_MSIX_FLAGS_BIRMASK;
+    pbdev->msix.pba_bar = pba & PCI_MSIX_FLAGS_BIRMASK;
+    pbdev->msix.pba_offset = pba & ~PCI_MSIX_FLAGS_BIRMASK;
+    pbdev->msix.entries = (ctrl & PCI_MSIX_FLAGS_QSIZE) + 1;
+    pbdev->msix.available = true;
+    return 0;
+}
+
+static void s390_pcihost_hot_plug(HotplugHandler *hotplug_dev,
+                                  DeviceState *dev, Error **errp)
+{
+    PCIDevice *pci_dev = PCI_DEVICE(dev);
+    S390PCIBusDevice *pbdev;
+    S390pciState *s = S390_PCI_HOST_BRIDGE(pci_device_root_bus(pci_dev)
+                                           ->qbus.parent);
+
+    pbdev = &s->pbdev[PCI_SLOT(pci_dev->devfn)];
+
+    pbdev->fid = s390_pci_get_pfid(pci_dev);
+    pbdev->pdev = pci_dev;
+    pbdev->configured = true;
+    pbdev->fh = s390_pci_get_pfh(pci_dev);
+
+    s390_pcihost_setup_msix(pbdev);
+
+    if (dev->hotplugged) {
+        s390_pci_generate_plug_event(HP_EVENT_RESERVED_TO_STANDBY,
+                                     pbdev->fh, pbdev->fid);
+        s390_pci_generate_plug_event(HP_EVENT_TO_CONFIGURED,
+                                     pbdev->fh, pbdev->fid);
+    }
+    return;
+}
+
+static void s390_pcihost_hot_unplug(HotplugHandler *hotplug_dev,
+                                    DeviceState *dev, Error **errp)
+{
+    PCIDevice *pci_dev = PCI_DEVICE(dev);
+    S390pciState *s = S390_PCI_HOST_BRIDGE(pci_device_root_bus(pci_dev)
+                                           ->qbus.parent);
+    S390PCIBusDevice *pbdev = &s->pbdev[PCI_SLOT(pci_dev->devfn)];
+
+    if (pbdev->configured) {
+        pbdev->configured = false;
+        s390_pci_generate_plug_event(HP_EVENT_CONFIGURED_TO_STBRES,
+                                     pbdev->fh, pbdev->fid);
+    }
+
+    s390_pci_generate_plug_event(HP_EVENT_STANDBY_TO_RESERVED,
+                                 pbdev->fh, pbdev->fid);
+    pbdev->fh = 0;
+    pbdev->fid = 0;
+    pbdev->pdev = NULL;
+    object_unparent(OBJECT(pci_dev));
+}
+
+static void s390_pcihost_class_init(ObjectClass *klass, void *data)
+{
+    SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(klass);
+
+    dc->cannot_instantiate_with_device_add_yet = true;
+    k->init = s390_pcihost_init;
+    hc->plug = s390_pcihost_hot_plug;
+    hc->unplug = s390_pcihost_hot_unplug;
+    msi_supported = true;
+}
+
+static const TypeInfo s390_pcihost_info = {
+    .name          = TYPE_S390_PCI_HOST_BRIDGE,
+    .parent        = TYPE_PCI_HOST_BRIDGE,
+    .instance_size = sizeof(S390pciState),
+    .class_init    = s390_pcihost_class_init,
+    .interfaces = (InterfaceInfo[]) {
+        { TYPE_HOTPLUG_HANDLER },
+        { }
+    }
+};
+
+static void s390_pci_register_types(void)
+{
+    type_register_static(&s390_pcihost_info);
+}
+
+type_init(s390_pci_register_types)
diff --git a/hw/s390x/s390-pci-bus.h b/hw/s390x/s390-pci-bus.h
new file mode 100644
index 0000000000..464a92eedf
--- /dev/null
+++ b/hw/s390x/s390-pci-bus.h
@@ -0,0 +1,251 @@
+/*
+ * s390 PCI BUS definitions
+ *
+ * Copyright 2014 IBM Corp.
+ * Author(s): Frank Blaschka <frank.blaschka@de.ibm.com>
+ *            Hong Bo Li <lihbbj@cn.ibm.com>
+ *            Yi Min Zhao <zyimin@cn.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#ifndef HW_S390_PCI_BUS_H
+#define HW_S390_PCI_BUS_H
+
+#include <hw/pci/pci.h>
+#include <hw/pci/pci_host.h>
+#include "hw/s390x/sclp.h"
+#include "hw/s390x/s390_flic.h"
+#include "hw/s390x/css.h"
+
+#define TYPE_S390_PCI_HOST_BRIDGE "s390-pcihost"
+#define FH_VIRT 0x00ff0000
+#define ENABLE_BIT_OFFSET 31
+#define S390_PCIPT_ADAPTER 2
+
+#define S390_PCI_HOST_BRIDGE(obj) \
+    OBJECT_CHECK(S390pciState, (obj), TYPE_S390_PCI_HOST_BRIDGE)
+
+#define HP_EVENT_TO_CONFIGURED        0x0301
+#define HP_EVENT_RESERVED_TO_STANDBY  0x0302
+#define HP_EVENT_CONFIGURED_TO_STBRES 0x0304
+#define HP_EVENT_STANDBY_TO_RESERVED  0x0308
+
+#define ERR_EVENT_INVALAS 0x1
+#define ERR_EVENT_OORANGE 0x2
+#define ERR_EVENT_INVALTF 0x3
+#define ERR_EVENT_TPROTE  0x4
+#define ERR_EVENT_APROTE  0x5
+#define ERR_EVENT_KEYE    0x6
+#define ERR_EVENT_INVALTE 0x7
+#define ERR_EVENT_INVALTL 0x8
+#define ERR_EVENT_TT      0x9
+#define ERR_EVENT_INVALMS 0xa
+#define ERR_EVENT_SERR    0xb
+#define ERR_EVENT_NOMSI   0x10
+#define ERR_EVENT_INVALBV 0x11
+#define ERR_EVENT_AIBV    0x12
+#define ERR_EVENT_AIRERR  0x13
+#define ERR_EVENT_FMBA    0x2a
+#define ERR_EVENT_FMBUP   0x2b
+#define ERR_EVENT_FMBPRO  0x2c
+#define ERR_EVENT_CCONF   0x30
+#define ERR_EVENT_SERVAC  0x3a
+#define ERR_EVENT_PERMERR 0x3b
+
+#define ERR_EVENT_Q_BIT 0x2
+#define ERR_EVENT_MVN_OFFSET 16
+
+#define ZPCI_MSI_VEC_BITS 11
+#define ZPCI_MSI_VEC_MASK 0x7ff
+
+#define ZPCI_MSI_ADDR  0xfe00000000000000ULL
+#define ZPCI_SDMA_ADDR 0x100000000ULL
+#define ZPCI_EDMA_ADDR 0x1ffffffffffffffULL
+
+#define PAGE_SHIFT      12
+#define PAGE_MASK       (~(PAGE_SIZE-1))
+#define PAGE_DEFAULT_ACC        0
+#define PAGE_DEFAULT_KEY        (PAGE_DEFAULT_ACC << 4)
+
+/* I/O Translation Anchor (IOTA) */
+enum ZpciIoatDtype {
+    ZPCI_IOTA_STO = 0,
+    ZPCI_IOTA_RTTO = 1,
+    ZPCI_IOTA_RSTO = 2,
+    ZPCI_IOTA_RFTO = 3,
+    ZPCI_IOTA_PFAA = 4,
+    ZPCI_IOTA_IOPFAA = 5,
+    ZPCI_IOTA_IOPTO = 7
+};
+
+#define ZPCI_IOTA_IOT_ENABLED           0x800ULL
+#define ZPCI_IOTA_DT_ST                 (ZPCI_IOTA_STO  << 2)
+#define ZPCI_IOTA_DT_RT                 (ZPCI_IOTA_RTTO << 2)
+#define ZPCI_IOTA_DT_RS                 (ZPCI_IOTA_RSTO << 2)
+#define ZPCI_IOTA_DT_RF                 (ZPCI_IOTA_RFTO << 2)
+#define ZPCI_IOTA_DT_PF                 (ZPCI_IOTA_PFAA << 2)
+#define ZPCI_IOTA_FS_4K                 0
+#define ZPCI_IOTA_FS_1M                 1
+#define ZPCI_IOTA_FS_2G                 2
+#define ZPCI_KEY                        (PAGE_DEFAULT_KEY << 5)
+
+#define ZPCI_IOTA_STO_FLAG  (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_ST)
+#define ZPCI_IOTA_RTTO_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_RT)
+#define ZPCI_IOTA_RSTO_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_RS)
+#define ZPCI_IOTA_RFTO_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_RF)
+#define ZPCI_IOTA_RFAA_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY |\
+                             ZPCI_IOTA_DT_PF | ZPCI_IOTA_FS_2G)
+
+/* I/O Region and segment tables */
+#define ZPCI_INDEX_MASK         0x7ffULL
+
+#define ZPCI_TABLE_TYPE_MASK    0xc
+#define ZPCI_TABLE_TYPE_RFX     0xc
+#define ZPCI_TABLE_TYPE_RSX     0x8
+#define ZPCI_TABLE_TYPE_RTX     0x4
+#define ZPCI_TABLE_TYPE_SX      0x0
+
+#define ZPCI_TABLE_LEN_RFX      0x3
+#define ZPCI_TABLE_LEN_RSX      0x3
+#define ZPCI_TABLE_LEN_RTX      0x3
+
+#define ZPCI_TABLE_OFFSET_MASK  0xc0
+#define ZPCI_TABLE_SIZE         0x4000
+#define ZPCI_TABLE_ALIGN        ZPCI_TABLE_SIZE
+#define ZPCI_TABLE_ENTRY_SIZE   (sizeof(unsigned long))
+#define ZPCI_TABLE_ENTRIES      (ZPCI_TABLE_SIZE / ZPCI_TABLE_ENTRY_SIZE)
+
+#define ZPCI_TABLE_BITS         11
+#define ZPCI_PT_BITS            8
+#define ZPCI_ST_SHIFT           (ZPCI_PT_BITS + PAGE_SHIFT)
+#define ZPCI_RT_SHIFT           (ZPCI_ST_SHIFT + ZPCI_TABLE_BITS)
+
+#define ZPCI_RTE_FLAG_MASK      0x3fffULL
+#define ZPCI_RTE_ADDR_MASK      (~ZPCI_RTE_FLAG_MASK)
+#define ZPCI_STE_FLAG_MASK      0x7ffULL
+#define ZPCI_STE_ADDR_MASK      (~ZPCI_STE_FLAG_MASK)
+
+/* I/O Page tables */
+#define ZPCI_PTE_VALID_MASK             0x400
+#define ZPCI_PTE_INVALID                0x400
+#define ZPCI_PTE_VALID                  0x000
+#define ZPCI_PT_SIZE                    0x800
+#define ZPCI_PT_ALIGN                   ZPCI_PT_SIZE
+#define ZPCI_PT_ENTRIES                 (ZPCI_PT_SIZE / ZPCI_TABLE_ENTRY_SIZE)
+#define ZPCI_PT_MASK                    (ZPCI_PT_ENTRIES - 1)
+
+#define ZPCI_PTE_FLAG_MASK              0xfffULL
+#define ZPCI_PTE_ADDR_MASK              (~ZPCI_PTE_FLAG_MASK)
+
+/* Shared bits */
+#define ZPCI_TABLE_VALID                0x00
+#define ZPCI_TABLE_INVALID              0x20
+#define ZPCI_TABLE_PROTECTED            0x200
+#define ZPCI_TABLE_UNPROTECTED          0x000
+
+#define ZPCI_TABLE_VALID_MASK           0x20
+#define ZPCI_TABLE_PROT_MASK            0x200
+
+typedef struct SeiContainer {
+    QTAILQ_ENTRY(SeiContainer) link;
+    uint32_t fid;
+    uint32_t fh;
+    uint8_t cc;
+    uint16_t pec;
+    uint64_t faddr;
+    uint32_t e;
+} SeiContainer;
+
+typedef struct PciCcdfErr {
+    uint32_t reserved1;
+    uint32_t fh;
+    uint32_t fid;
+    uint32_t e;
+    uint64_t faddr;
+    uint32_t reserved3;
+    uint16_t reserved4;
+    uint16_t pec;
+} QEMU_PACKED PciCcdfErr;
+
+typedef struct PciCcdfAvail {
+    uint32_t reserved1;
+    uint32_t fh;
+    uint32_t fid;
+    uint32_t reserved2;
+    uint32_t reserved3;
+    uint32_t reserved4;
+    uint32_t reserved5;
+    uint16_t reserved6;
+    uint16_t pec;
+} QEMU_PACKED PciCcdfAvail;
+
+typedef struct ChscSeiNt2Res {
+    uint16_t length;
+    uint16_t code;
+    uint16_t reserved1;
+    uint8_t reserved2;
+    uint8_t nt;
+    uint8_t flags;
+    uint8_t reserved3;
+    uint8_t reserved4;
+    uint8_t cc;
+    uint32_t reserved5[13];
+    uint8_t ccdf[4016];
+} QEMU_PACKED ChscSeiNt2Res;
+
+typedef struct PciCfgSccb {
+        SCCBHeader header;
+        uint8_t atype;
+        uint8_t reserved1;
+        uint16_t reserved2;
+        uint32_t aid;
+} QEMU_PACKED PciCfgSccb;
+
+typedef struct S390MsixInfo {
+    bool available;
+    uint8_t table_bar;
+    uint8_t pba_bar;
+    uint16_t entries;
+    uint32_t table_offset;
+    uint32_t pba_offset;
+} S390MsixInfo;
+
+typedef struct S390PCIBusDevice {
+    PCIDevice *pdev;
+    bool configured;
+    bool error_state;
+    bool lgstg_blocked;
+    uint32_t fh;
+    uint32_t fid;
+    uint64_t g_iota;
+    uint64_t pba;
+    uint64_t pal;
+    uint64_t fmb_addr;
+    uint8_t isc;
+    uint16_t noi;
+    uint8_t sum;
+    S390MsixInfo msix;
+    AdapterRoutes routes;
+    AddressSpace as;
+    MemoryRegion mr;
+} S390PCIBusDevice;
+
+typedef struct S390pciState {
+    PCIHostState parent_obj;
+    S390PCIBusDevice pbdev[PCI_SLOT_MAX];
+    AddressSpace msix_notify_as;
+    MemoryRegion msix_notify_mr;
+    QTAILQ_HEAD(, SeiContainer) pending_sei;
+} S390pciState;
+
+int chsc_sei_nt2_get_event(void *res);
+int chsc_sei_nt2_have_event(void);
+void s390_pci_sclp_configure(int configure, SCCB *sccb);
+S390PCIBusDevice *s390_pci_find_dev_by_idx(uint32_t idx);
+S390PCIBusDevice *s390_pci_find_dev_by_fh(uint32_t fh);
+S390PCIBusDevice *s390_pci_find_dev_by_fid(uint32_t fid);
+
+#endif
diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c
new file mode 100644
index 0000000000..5ea13e5d79
--- /dev/null
+++ b/hw/s390x/s390-pci-inst.c
@@ -0,0 +1,811 @@
+/*
+ * s390 PCI instructions
+ *
+ * Copyright 2014 IBM Corp.
+ * Author(s): Frank Blaschka <frank.blaschka@de.ibm.com>
+ *            Hong Bo Li <lihbbj@cn.ibm.com>
+ *            Yi Min Zhao <zyimin@cn.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#include "s390-pci-inst.h"
+#include "s390-pci-bus.h"
+#include <exec/memory-internal.h>
+#include <qemu/error-report.h>
+
+/* #define DEBUG_S390PCI_INST */
+#ifdef DEBUG_S390PCI_INST
+#define DPRINTF(fmt, ...) \
+    do { fprintf(stderr, "s390pci-inst: " fmt, ## __VA_ARGS__); } while (0)
+#else
+#define DPRINTF(fmt, ...) \
+    do { } while (0)
+#endif
+
+static void s390_set_status_code(CPUS390XState *env,
+                                 uint8_t r, uint64_t status_code)
+{
+    env->regs[r] &= ~0xff000000ULL;
+    env->regs[r] |= (status_code & 0xff) << 24;
+}
+
+static int list_pci(ClpReqRspListPci *rrb, uint8_t *cc)
+{
+    S390PCIBusDevice *pbdev;
+    uint32_t res_code, initial_l2, g_l2, finish;
+    int rc, idx;
+    uint64_t resume_token;
+
+    rc = 0;
+    if (lduw_p(&rrb->request.hdr.len) != 32) {
+        res_code = CLP_RC_LEN;
+        rc = -EINVAL;
+        goto out;
+    }
+
+    if ((ldl_p(&rrb->request.fmt) & CLP_MASK_FMT) != 0) {
+        res_code = CLP_RC_FMT;
+        rc = -EINVAL;
+        goto out;
+    }
+
+    if ((ldl_p(&rrb->request.fmt) & ~CLP_MASK_FMT) != 0 ||
+        ldq_p(&rrb->request.reserved1) != 0 ||
+        ldq_p(&rrb->request.reserved2) != 0) {
+        res_code = CLP_RC_RESNOT0;
+        rc = -EINVAL;
+        goto out;
+    }
+
+    resume_token = ldq_p(&rrb->request.resume_token);
+
+    if (resume_token) {
+        pbdev = s390_pci_find_dev_by_idx(resume_token);
+        if (!pbdev) {
+            res_code = CLP_RC_LISTPCI_BADRT;
+            rc = -EINVAL;
+            goto out;
+        }
+    }
+
+    if (lduw_p(&rrb->response.hdr.len) < 48) {
+        res_code = CLP_RC_8K;
+        rc = -EINVAL;
+        goto out;
+    }
+
+    initial_l2 = lduw_p(&rrb->response.hdr.len);
+    if ((initial_l2 - LIST_PCI_HDR_LEN) % sizeof(ClpFhListEntry)
+        != 0) {
+        res_code = CLP_RC_LEN;
+        rc = -EINVAL;
+        *cc = 3;
+        goto out;
+    }
+
+    stl_p(&rrb->response.fmt, 0);
+    stq_p(&rrb->response.reserved1, 0);
+    stq_p(&rrb->response.reserved2, 0);
+    stl_p(&rrb->response.mdd, FH_VIRT);
+    stw_p(&rrb->response.max_fn, PCI_MAX_FUNCTIONS);
+    rrb->response.entry_size = sizeof(ClpFhListEntry);
+    finish = 0;
+    idx = resume_token;
+    g_l2 = LIST_PCI_HDR_LEN;
+    do {
+        pbdev = s390_pci_find_dev_by_idx(idx);
+        if (!pbdev) {
+            finish = 1;
+            break;
+        }
+        stw_p(&rrb->response.fh_list[idx - resume_token].device_id,
+            pci_get_word(pbdev->pdev->config + PCI_DEVICE_ID));
+        stw_p(&rrb->response.fh_list[idx - resume_token].vendor_id,
+            pci_get_word(pbdev->pdev->config + PCI_VENDOR_ID));
+        stl_p(&rrb->response.fh_list[idx - resume_token].config, 0x80000000);
+        stl_p(&rrb->response.fh_list[idx - resume_token].fid, pbdev->fid);
+        stl_p(&rrb->response.fh_list[idx - resume_token].fh, pbdev->fh);
+
+        g_l2 += sizeof(ClpFhListEntry);
+        /* Add endian check for DPRINTF? */
+        DPRINTF("g_l2 %d vendor id 0x%x device id 0x%x fid 0x%x fh 0x%x\n",
+            g_l2,
+            lduw_p(&rrb->response.fh_list[idx - resume_token].vendor_id),
+            lduw_p(&rrb->response.fh_list[idx - resume_token].device_id),
+            ldl_p(&rrb->response.fh_list[idx - resume_token].fid),
+            ldl_p(&rrb->response.fh_list[idx - resume_token].fh));
+        idx++;
+    } while (g_l2 < initial_l2);
+
+    if (finish == 1) {
+        resume_token = 0;
+    } else {
+        resume_token = idx;
+    }
+    stq_p(&rrb->response.resume_token, resume_token);
+    stw_p(&rrb->response.hdr.len, g_l2);
+    stw_p(&rrb->response.hdr.rsp, CLP_RC_OK);
+out:
+    if (rc) {
+        DPRINTF("list pci failed rc 0x%x\n", rc);
+        stw_p(&rrb->response.hdr.rsp, res_code);
+    }
+    return rc;
+}
+
+int clp_service_call(S390CPU *cpu, uint8_t r2)
+{
+    ClpReqHdr *reqh;
+    ClpRspHdr *resh;
+    S390PCIBusDevice *pbdev;
+    uint32_t req_len;
+    uint32_t res_len;
+    uint8_t buffer[4096 * 2];
+    uint8_t cc = 0;
+    CPUS390XState *env = &cpu->env;
+    int i;
+
+    cpu_synchronize_state(CPU(cpu));
+
+    if (env->psw.mask & PSW_MASK_PSTATE) {
+        program_interrupt(env, PGM_PRIVILEGED, 4);
+        return 0;
+    }
+
+    cpu_physical_memory_read(env->regs[r2], buffer, sizeof(*reqh));
+    reqh = (ClpReqHdr *)buffer;
+    req_len = lduw_p(&reqh->len);
+    if (req_len < 16 || req_len > 8184 || (req_len % 8 != 0)) {
+        program_interrupt(env, PGM_OPERAND, 4);
+        return 0;
+    }
+
+    cpu_physical_memory_read(env->regs[r2], buffer, req_len + sizeof(*resh));
+    resh = (ClpRspHdr *)(buffer + req_len);
+    res_len = lduw_p(&resh->len);
+    if (res_len < 8 || res_len > 8176 || (res_len % 8 != 0)) {
+        program_interrupt(env, PGM_OPERAND, 4);
+        return 0;
+    }
+    if ((req_len + res_len) > 8192) {
+        program_interrupt(env, PGM_OPERAND, 4);
+        return 0;
+    }
+
+    cpu_physical_memory_read(env->regs[r2], buffer, req_len + res_len);
+
+    if (req_len != 32) {
+        stw_p(&resh->rsp, CLP_RC_LEN);
+        goto out;
+    }
+
+    switch (lduw_p(&reqh->cmd)) {
+    case CLP_LIST_PCI: {
+        ClpReqRspListPci *rrb = (ClpReqRspListPci *)buffer;
+        list_pci(rrb, &cc);
+        break;
+    }
+    case CLP_SET_PCI_FN: {
+        ClpReqSetPci *reqsetpci = (ClpReqSetPci *)reqh;
+        ClpRspSetPci *ressetpci = (ClpRspSetPci *)resh;
+
+        pbdev = s390_pci_find_dev_by_fh(ldl_p(&reqsetpci->fh));
+        if (!pbdev) {
+                stw_p(&ressetpci->hdr.rsp, CLP_RC_SETPCIFN_FH);
+                goto out;
+        }
+
+        switch (reqsetpci->oc) {
+        case CLP_SET_ENABLE_PCI_FN:
+            pbdev->fh = pbdev->fh | 1 << ENABLE_BIT_OFFSET;
+            stl_p(&ressetpci->fh, pbdev->fh);
+            stw_p(&ressetpci->hdr.rsp, CLP_RC_OK);
+            break;
+        case CLP_SET_DISABLE_PCI_FN:
+            pbdev->fh = pbdev->fh & ~(1 << ENABLE_BIT_OFFSET);
+            pbdev->error_state = false;
+            pbdev->lgstg_blocked = false;
+            stl_p(&ressetpci->fh, pbdev->fh);
+            stw_p(&ressetpci->hdr.rsp, CLP_RC_OK);
+            break;
+        default:
+            DPRINTF("unknown set pci command\n");
+            stw_p(&ressetpci->hdr.rsp, CLP_RC_SETPCIFN_FHOP);
+            break;
+        }
+        break;
+    }
+    case CLP_QUERY_PCI_FN: {
+        ClpReqQueryPci *reqquery = (ClpReqQueryPci *)reqh;
+        ClpRspQueryPci *resquery = (ClpRspQueryPci *)resh;
+
+        pbdev = s390_pci_find_dev_by_fh(ldl_p(&reqquery->fh));
+        if (!pbdev) {
+            DPRINTF("query pci no pci dev\n");
+            stw_p(&resquery->hdr.rsp, CLP_RC_SETPCIFN_FH);
+            goto out;
+        }
+
+        for (i = 0; i < PCI_BAR_COUNT; i++) {
+            uint32_t data = pci_get_long(pbdev->pdev->config +
+                PCI_BASE_ADDRESS_0 + (i * 4));
+
+            stl_p(&resquery->bar[i], data);
+            resquery->bar_size[i] = pbdev->pdev->io_regions[i].size ?
+                                    ctz64(pbdev->pdev->io_regions[i].size) : 0;
+            DPRINTF("bar %d addr 0x%x size 0x%" PRIx64 "barsize 0x%x\n", i,
+                    ldl_p(&resquery->bar[i]),
+                    pbdev->pdev->io_regions[i].size,
+                    resquery->bar_size[i]);
+        }
+
+        stq_p(&resquery->sdma, ZPCI_SDMA_ADDR);
+        stq_p(&resquery->edma, ZPCI_EDMA_ADDR);
+        stw_p(&resquery->pchid, 0);
+        stw_p(&resquery->ug, 1);
+        stl_p(&resquery->uid, pbdev->fid);
+        stw_p(&resquery->hdr.rsp, CLP_RC_OK);
+        break;
+    }
+    case CLP_QUERY_PCI_FNGRP: {
+        ClpRspQueryPciGrp *resgrp = (ClpRspQueryPciGrp *)resh;
+        resgrp->fr = 1;
+        stq_p(&resgrp->dasm, 0);
+        stq_p(&resgrp->msia, ZPCI_MSI_ADDR);
+        stw_p(&resgrp->mui, 0);
+        stw_p(&resgrp->i, 128);
+        resgrp->version = 0;
+
+        stw_p(&resgrp->hdr.rsp, CLP_RC_OK);
+        break;
+    }
+    default:
+        DPRINTF("unknown clp command\n");
+        stw_p(&resh->rsp, CLP_RC_CMD);
+        break;
+    }
+
+out:
+    cpu_physical_memory_write(env->regs[r2], buffer, req_len + res_len);
+    setcc(cpu, cc);
+    return 0;
+}
+
+int pcilg_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2)
+{
+    CPUS390XState *env = &cpu->env;
+    S390PCIBusDevice *pbdev;
+    uint64_t offset;
+    uint64_t data;
+    uint8_t len;
+    uint32_t fh;
+    uint8_t pcias;
+
+    cpu_synchronize_state(CPU(cpu));
+
+    if (env->psw.mask & PSW_MASK_PSTATE) {
+        program_interrupt(env, PGM_PRIVILEGED, 4);
+        return 0;
+    }
+
+    if (r2 & 0x1) {
+        program_interrupt(env, PGM_SPECIFICATION, 4);
+        return 0;
+    }
+
+    fh = env->regs[r2] >> 32;
+    pcias = (env->regs[r2] >> 16) & 0xf;
+    len = env->regs[r2] & 0xf;
+    offset = env->regs[r2 + 1];
+
+    pbdev = s390_pci_find_dev_by_fh(fh);
+    if (!pbdev) {
+        DPRINTF("pcilg no pci dev\n");
+        setcc(cpu, ZPCI_PCI_LS_INVAL_HANDLE);
+        return 0;
+    }
+
+    if (pbdev->lgstg_blocked) {
+        setcc(cpu, ZPCI_PCI_LS_ERR);
+        s390_set_status_code(env, r2, ZPCI_PCI_ST_BLOCKED);
+        return 0;
+    }
+
+    if (pcias < 6) {
+        if ((8 - (offset & 0x7)) < len) {
+            program_interrupt(env, PGM_OPERAND, 4);
+            return 0;
+        }
+        MemoryRegion *mr = pbdev->pdev->io_regions[pcias].memory;
+        io_mem_read(mr, offset, &data, len);
+    } else if (pcias == 15) {
+        if ((4 - (offset & 0x3)) < len) {
+            program_interrupt(env, PGM_OPERAND, 4);
+            return 0;
+        }
+        data =  pci_host_config_read_common(
+                   pbdev->pdev, offset, pci_config_size(pbdev->pdev), len);
+
+        switch (len) {
+        case 1:
+            break;
+        case 2:
+            data = bswap16(data);
+            break;
+        case 4:
+            data = bswap32(data);
+            break;
+        case 8:
+            data = bswap64(data);
+            break;
+        default:
+            program_interrupt(env, PGM_OPERAND, 4);
+            return 0;
+        }
+    } else {
+        DPRINTF("invalid space\n");
+        setcc(cpu, ZPCI_PCI_LS_ERR);
+        s390_set_status_code(env, r2, ZPCI_PCI_ST_INVAL_AS);
+        return 0;
+    }
+
+    env->regs[r1] = data;
+    setcc(cpu, ZPCI_PCI_LS_OK);
+    return 0;
+}
+
+static void update_msix_table_msg_data(S390PCIBusDevice *pbdev, uint64_t offset,
+                                       uint64_t *data, uint8_t len)
+{
+    uint32_t val;
+    uint8_t *msg_data;
+
+    if (offset % PCI_MSIX_ENTRY_SIZE != 8) {
+        return;
+    }
+
+    if (len != 4) {
+        DPRINTF("access msix table msg data but len is %d\n", len);
+        return;
+    }
+
+    msg_data = (uint8_t *)data - offset % PCI_MSIX_ENTRY_SIZE +
+               PCI_MSIX_ENTRY_VECTOR_CTRL;
+    val = pci_get_long(msg_data) | (pbdev->fid << ZPCI_MSI_VEC_BITS);
+    pci_set_long(msg_data, val);
+    DPRINTF("update msix msg_data to 0x%" PRIx64 "\n", *data);
+}
+
+static int trap_msix(S390PCIBusDevice *pbdev, uint64_t offset, uint8_t pcias)
+{
+    if (pbdev->msix.available && pbdev->msix.table_bar == pcias &&
+        offset >= pbdev->msix.table_offset &&
+        offset <= pbdev->msix.table_offset +
+                  (pbdev->msix.entries - 1) * PCI_MSIX_ENTRY_SIZE) {
+        return 1;
+    } else {
+        return 0;
+    }
+}
+
+int pcistg_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2)
+{
+    CPUS390XState *env = &cpu->env;
+    uint64_t offset, data;
+    S390PCIBusDevice *pbdev;
+    uint8_t len;
+    uint32_t fh;
+    uint8_t pcias;
+
+    cpu_synchronize_state(CPU(cpu));
+
+    if (env->psw.mask & PSW_MASK_PSTATE) {
+        program_interrupt(env, PGM_PRIVILEGED, 4);
+        return 0;
+    }
+
+    if (r2 & 0x1) {
+        program_interrupt(env, PGM_SPECIFICATION, 4);
+        return 0;
+    }
+
+    fh = env->regs[r2] >> 32;
+    pcias = (env->regs[r2] >> 16) & 0xf;
+    len = env->regs[r2] & 0xf;
+    offset = env->regs[r2 + 1];
+
+    pbdev = s390_pci_find_dev_by_fh(fh);
+    if (!pbdev) {
+        DPRINTF("pcistg no pci dev\n");
+        setcc(cpu, ZPCI_PCI_LS_INVAL_HANDLE);
+        return 0;
+    }
+
+    if (pbdev->lgstg_blocked) {
+        setcc(cpu, ZPCI_PCI_LS_ERR);
+        s390_set_status_code(env, r2, ZPCI_PCI_ST_BLOCKED);
+        return 0;
+    }
+
+    data = env->regs[r1];
+    if (pcias < 6) {
+        if ((8 - (offset & 0x7)) < len) {
+            program_interrupt(env, PGM_OPERAND, 4);
+            return 0;
+        }
+        MemoryRegion *mr;
+        if (trap_msix(pbdev, offset, pcias)) {
+            offset = offset - pbdev->msix.table_offset;
+            mr = &pbdev->pdev->msix_table_mmio;
+            update_msix_table_msg_data(pbdev, offset, &data, len);
+        } else {
+            mr = pbdev->pdev->io_regions[pcias].memory;
+        }
+
+        io_mem_write(mr, offset, data, len);
+    } else if (pcias == 15) {
+        if ((4 - (offset & 0x3)) < len) {
+            program_interrupt(env, PGM_OPERAND, 4);
+            return 0;
+        }
+        switch (len) {
+        case 1:
+            break;
+        case 2:
+            data = bswap16(data);
+            break;
+        case 4:
+            data = bswap32(data);
+            break;
+        case 8:
+            data = bswap64(data);
+            break;
+        default:
+            program_interrupt(env, PGM_OPERAND, 4);
+            return 0;
+        }
+
+        pci_host_config_write_common(pbdev->pdev, offset,
+                                     pci_config_size(pbdev->pdev),
+                                     data, len);
+    } else {
+        DPRINTF("pcistg invalid space\n");
+        setcc(cpu, ZPCI_PCI_LS_ERR);
+        s390_set_status_code(env, r2, ZPCI_PCI_ST_INVAL_AS);
+        return 0;
+    }
+
+    setcc(cpu, ZPCI_PCI_LS_OK);
+    return 0;
+}
+
+int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2)
+{
+    CPUS390XState *env = &cpu->env;
+    uint32_t fh;
+    S390PCIBusDevice *pbdev;
+    ram_addr_t size;
+    IOMMUTLBEntry entry;
+    MemoryRegion *mr;
+
+    cpu_synchronize_state(CPU(cpu));
+
+    if (env->psw.mask & PSW_MASK_PSTATE) {
+        program_interrupt(env, PGM_PRIVILEGED, 4);
+        goto out;
+    }
+
+    if (r2 & 0x1) {
+        program_interrupt(env, PGM_SPECIFICATION, 4);
+        goto out;
+    }
+
+    fh = env->regs[r1] >> 32;
+    size = env->regs[r2 + 1];
+
+    pbdev = s390_pci_find_dev_by_fh(fh);
+
+    if (!pbdev) {
+        DPRINTF("rpcit no pci dev\n");
+        setcc(cpu, ZPCI_PCI_LS_INVAL_HANDLE);
+        goto out;
+    }
+
+    mr = pci_device_iommu_address_space(pbdev->pdev)->root;
+    entry = mr->iommu_ops->translate(mr, env->regs[r2], 0);
+
+    if (!entry.translated_addr) {
+        setcc(cpu, ZPCI_PCI_LS_ERR);
+        goto out;
+    }
+
+    entry.addr_mask = size - 1;
+    memory_region_notify_iommu(mr, entry);
+    setcc(cpu, ZPCI_PCI_LS_OK);
+out:
+    return 0;
+}
+
+int pcistb_service_call(S390CPU *cpu, uint8_t r1, uint8_t r3, uint64_t gaddr)
+{
+    CPUS390XState *env = &cpu->env;
+    S390PCIBusDevice *pbdev;
+    MemoryRegion *mr;
+    int i;
+    uint64_t val;
+    uint32_t fh;
+    uint8_t pcias;
+    uint8_t len;
+
+    if (env->psw.mask & PSW_MASK_PSTATE) {
+        program_interrupt(env, PGM_PRIVILEGED, 6);
+        return 0;
+    }
+
+    fh = env->regs[r1] >> 32;
+    pcias = (env->regs[r1] >> 16) & 0xf;
+    len = env->regs[r1] & 0xff;
+
+    if (pcias > 5) {
+        DPRINTF("pcistb invalid space\n");
+        setcc(cpu, ZPCI_PCI_LS_ERR);
+        s390_set_status_code(env, r1, ZPCI_PCI_ST_INVAL_AS);
+        return 0;
+    }
+
+    switch (len) {
+    case 16:
+    case 32:
+    case 64:
+    case 128:
+        break;
+    default:
+        program_interrupt(env, PGM_SPECIFICATION, 6);
+        return 0;
+    }
+
+    pbdev = s390_pci_find_dev_by_fh(fh);
+    if (!pbdev) {
+        DPRINTF("pcistb no pci dev fh 0x%x\n", fh);
+        setcc(cpu, ZPCI_PCI_LS_INVAL_HANDLE);
+        return 0;
+    }
+
+    if (pbdev->lgstg_blocked) {
+        setcc(cpu, ZPCI_PCI_LS_ERR);
+        s390_set_status_code(env, r1, ZPCI_PCI_ST_BLOCKED);
+        return 0;
+    }
+
+    mr = pbdev->pdev->io_regions[pcias].memory;
+    if (!memory_region_access_valid(mr, env->regs[r3], len, true)) {
+        program_interrupt(env, PGM_ADDRESSING, 6);
+        return 0;
+    }
+
+    for (i = 0; i < len / 8; i++) {
+        val = ldq_phys(&address_space_memory, gaddr + i * 8);
+        io_mem_write(mr, env->regs[r3] + i * 8, val, 8);
+    }
+
+    setcc(cpu, ZPCI_PCI_LS_OK);
+    return 0;
+}
+
+static int reg_irqs(CPUS390XState *env, S390PCIBusDevice *pbdev, ZpciFib fib)
+{
+    int ret;
+    S390FLICState *fs = s390_get_flic();
+    S390FLICStateClass *fsc = S390_FLIC_COMMON_GET_CLASS(fs);
+
+    ret = css_register_io_adapter(S390_PCIPT_ADAPTER,
+                                  FIB_DATA_ISC(ldl_p(&fib.data)), true, false,
+                                  &pbdev->routes.adapter.adapter_id);
+    assert(ret == 0);
+
+    fsc->io_adapter_map(fs, pbdev->routes.adapter.adapter_id,
+        ldq_p(&fib.aisb), true);
+    fsc->io_adapter_map(fs, pbdev->routes.adapter.adapter_id,
+        ldq_p(&fib.aibv), true);
+
+    pbdev->routes.adapter.summary_addr = ldq_p(&fib.aisb);
+    pbdev->routes.adapter.summary_offset = FIB_DATA_AISBO(ldl_p(&fib.data));
+    pbdev->routes.adapter.ind_addr = ldq_p(&fib.aibv);
+    pbdev->routes.adapter.ind_offset = FIB_DATA_AIBVO(ldl_p(&fib.data));
+    pbdev->isc = FIB_DATA_ISC(ldl_p(&fib.data));
+    pbdev->noi = FIB_DATA_NOI(ldl_p(&fib.data));
+    pbdev->sum = FIB_DATA_SUM(ldl_p(&fib.data));
+
+    DPRINTF("reg_irqs adapter id %d\n", pbdev->routes.adapter.adapter_id);
+    return 0;
+}
+
+static int dereg_irqs(S390PCIBusDevice *pbdev)
+{
+    S390FLICState *fs = s390_get_flic();
+    S390FLICStateClass *fsc = S390_FLIC_COMMON_GET_CLASS(fs);
+
+    fsc->io_adapter_map(fs, pbdev->routes.adapter.adapter_id,
+                        pbdev->routes.adapter.ind_addr, false);
+
+    pbdev->routes.adapter.summary_addr = 0;
+    pbdev->routes.adapter.summary_offset = 0;
+    pbdev->routes.adapter.ind_addr = 0;
+    pbdev->routes.adapter.ind_offset = 0;
+    pbdev->isc = 0;
+    pbdev->noi = 0;
+    pbdev->sum = 0;
+
+    DPRINTF("dereg_irqs adapter id %d\n", pbdev->routes.adapter.adapter_id);
+    return 0;
+}
+
+static int reg_ioat(CPUS390XState *env, S390PCIBusDevice *pbdev, ZpciFib fib)
+{
+    uint64_t pba = ldq_p(&fib.pba);
+    uint64_t pal = ldq_p(&fib.pal);
+    uint64_t g_iota = ldq_p(&fib.iota);
+    uint8_t dt = (g_iota >> 2) & 0x7;
+    uint8_t t = (g_iota >> 11) & 0x1;
+
+    if (pba > pal || pba < ZPCI_SDMA_ADDR || pal > ZPCI_EDMA_ADDR) {
+        program_interrupt(env, PGM_OPERAND, 6);
+        return -EINVAL;
+    }
+
+    /* currently we only support designation type 1 with translation */
+    if (!(dt == ZPCI_IOTA_RTTO && t)) {
+        error_report("unsupported ioat dt %d t %d", dt, t);
+        program_interrupt(env, PGM_OPERAND, 6);
+        return -EINVAL;
+    }
+
+    pbdev->pba = pba;
+    pbdev->pal = pal;
+    pbdev->g_iota = g_iota;
+    return 0;
+}
+
+static void dereg_ioat(S390PCIBusDevice *pbdev)
+{
+    pbdev->pba = 0;
+    pbdev->pal = 0;
+    pbdev->g_iota = 0;
+}
+
+int mpcifc_service_call(S390CPU *cpu, uint8_t r1, uint64_t fiba)
+{
+    CPUS390XState *env = &cpu->env;
+    uint8_t oc;
+    uint32_t fh;
+    ZpciFib fib;
+    S390PCIBusDevice *pbdev;
+    uint64_t cc = ZPCI_PCI_LS_OK;
+
+    if (env->psw.mask & PSW_MASK_PSTATE) {
+        program_interrupt(env, PGM_PRIVILEGED, 6);
+        return 0;
+    }
+
+    oc = env->regs[r1] & 0xff;
+    fh = env->regs[r1] >> 32;
+
+    if (fiba & 0x7) {
+        program_interrupt(env, PGM_SPECIFICATION, 6);
+        return 0;
+    }
+
+    pbdev = s390_pci_find_dev_by_fh(fh);
+    if (!pbdev) {
+        DPRINTF("mpcifc no pci dev fh 0x%x\n", fh);
+        setcc(cpu, ZPCI_PCI_LS_INVAL_HANDLE);
+        return 0;
+    }
+
+    cpu_physical_memory_read(fiba, (uint8_t *)&fib, sizeof(fib));
+
+    switch (oc) {
+    case ZPCI_MOD_FC_REG_INT:
+        if (reg_irqs(env, pbdev, fib)) {
+            cc = ZPCI_PCI_LS_ERR;
+        }
+        break;
+    case ZPCI_MOD_FC_DEREG_INT:
+        dereg_irqs(pbdev);
+        break;
+    case ZPCI_MOD_FC_REG_IOAT:
+        if (reg_ioat(env, pbdev, fib)) {
+            cc = ZPCI_PCI_LS_ERR;
+        }
+        break;
+    case ZPCI_MOD_FC_DEREG_IOAT:
+        dereg_ioat(pbdev);
+        break;
+    case ZPCI_MOD_FC_REREG_IOAT:
+        dereg_ioat(pbdev);
+        if (reg_ioat(env, pbdev, fib)) {
+            cc = ZPCI_PCI_LS_ERR;
+        }
+        break;
+    case ZPCI_MOD_FC_RESET_ERROR:
+        pbdev->error_state = false;
+        pbdev->lgstg_blocked = false;
+        break;
+    case ZPCI_MOD_FC_RESET_BLOCK:
+        pbdev->lgstg_blocked = false;
+        break;
+    case ZPCI_MOD_FC_SET_MEASURE:
+        pbdev->fmb_addr = ldq_p(&fib.fmb_addr);
+        break;
+    default:
+        program_interrupt(&cpu->env, PGM_OPERAND, 6);
+        cc = ZPCI_PCI_LS_ERR;
+    }
+
+    setcc(cpu, cc);
+    return 0;
+}
+
+int stpcifc_service_call(S390CPU *cpu, uint8_t r1, uint64_t fiba)
+{
+    CPUS390XState *env = &cpu->env;
+    uint32_t fh;
+    ZpciFib fib;
+    S390PCIBusDevice *pbdev;
+    uint32_t data;
+    uint64_t cc = ZPCI_PCI_LS_OK;
+
+    if (env->psw.mask & PSW_MASK_PSTATE) {
+        program_interrupt(env, PGM_PRIVILEGED, 6);
+        return 0;
+    }
+
+    fh = env->regs[r1] >> 32;
+
+    if (fiba & 0x7) {
+        program_interrupt(env, PGM_SPECIFICATION, 6);
+        return 0;
+    }
+
+    pbdev = s390_pci_find_dev_by_fh(fh);
+    if (!pbdev) {
+        setcc(cpu, ZPCI_PCI_LS_INVAL_HANDLE);
+        return 0;
+    }
+
+    memset(&fib, 0, sizeof(fib));
+    stq_p(&fib.pba, pbdev->pba);
+    stq_p(&fib.pal, pbdev->pal);
+    stq_p(&fib.iota, pbdev->g_iota);
+    stq_p(&fib.aibv, pbdev->routes.adapter.ind_addr);
+    stq_p(&fib.aisb, pbdev->routes.adapter.summary_addr);
+    stq_p(&fib.fmb_addr, pbdev->fmb_addr);
+
+    data = (pbdev->isc << 28) | (pbdev->noi << 16) |
+           (pbdev->routes.adapter.ind_offset << 8) | (pbdev->sum << 7) |
+           pbdev->routes.adapter.summary_offset;
+    stw_p(&fib.data, data);
+
+    if (pbdev->fh >> ENABLE_BIT_OFFSET) {
+        fib.fc |= 0x80;
+    }
+
+    if (pbdev->error_state) {
+        fib.fc |= 0x40;
+    }
+
+    if (pbdev->lgstg_blocked) {
+        fib.fc |= 0x20;
+    }
+
+    if (pbdev->g_iota) {
+        fib.fc |= 0x10;
+    }
+
+    cpu_physical_memory_write(fiba, (uint8_t *)&fib, sizeof(fib));
+    setcc(cpu, cc);
+    return 0;
+}
diff --git a/hw/s390x/s390-pci-inst.h b/hw/s390x/s390-pci-inst.h
new file mode 100644
index 0000000000..7e6c804737
--- /dev/null
+++ b/hw/s390x/s390-pci-inst.h
@@ -0,0 +1,288 @@
+/*
+ * s390 PCI instruction definitions
+ *
+ * Copyright 2014 IBM Corp.
+ * Author(s): Frank Blaschka <frank.blaschka@de.ibm.com>
+ *            Hong Bo Li <lihbbj@cn.ibm.com>
+ *            Yi Min Zhao <zyimin@cn.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#ifndef HW_S390_PCI_INST_H
+#define HW_S390_PCI_INST_H
+
+#include <sysemu/dma.h>
+
+/* CLP common request & response block size */
+#define CLP_BLK_SIZE 4096
+#define PCI_BAR_COUNT 6
+#define PCI_MAX_FUNCTIONS 4096
+
+typedef struct ClpReqHdr {
+    uint16_t len;
+    uint16_t cmd;
+} QEMU_PACKED ClpReqHdr;
+
+typedef struct ClpRspHdr {
+    uint16_t len;
+    uint16_t rsp;
+} QEMU_PACKED ClpRspHdr;
+
+/* CLP Response Codes */
+#define CLP_RC_OK         0x0010  /* Command request successfully */
+#define CLP_RC_CMD        0x0020  /* Command code not recognized */
+#define CLP_RC_PERM       0x0030  /* Command not authorized */
+#define CLP_RC_FMT        0x0040  /* Invalid command request format */
+#define CLP_RC_LEN        0x0050  /* Invalid command request length */
+#define CLP_RC_8K         0x0060  /* Command requires 8K LPCB */
+#define CLP_RC_RESNOT0    0x0070  /* Reserved field not zero */
+#define CLP_RC_NODATA     0x0080  /* No data available */
+#define CLP_RC_FC_UNKNOWN 0x0100  /* Function code not recognized */
+
+/*
+ * Call Logical Processor - Command Codes
+ */
+#define CLP_LIST_PCI            0x0002
+#define CLP_QUERY_PCI_FN        0x0003
+#define CLP_QUERY_PCI_FNGRP     0x0004
+#define CLP_SET_PCI_FN          0x0005
+
+/* PCI function handle list entry */
+typedef struct ClpFhListEntry {
+    uint16_t device_id;
+    uint16_t vendor_id;
+#define CLP_FHLIST_MASK_CONFIG 0x80000000
+    uint32_t config;
+    uint32_t fid;
+    uint32_t fh;
+} QEMU_PACKED ClpFhListEntry;
+
+#define CLP_RC_SETPCIFN_FH      0x0101 /* Invalid PCI fn handle */
+#define CLP_RC_SETPCIFN_FHOP    0x0102 /* Fn handle not valid for op */
+#define CLP_RC_SETPCIFN_DMAAS   0x0103 /* Invalid DMA addr space */
+#define CLP_RC_SETPCIFN_RES     0x0104 /* Insufficient resources */
+#define CLP_RC_SETPCIFN_ALRDY   0x0105 /* Fn already in requested state */
+#define CLP_RC_SETPCIFN_ERR     0x0106 /* Fn in permanent error state */
+#define CLP_RC_SETPCIFN_RECPND  0x0107 /* Error recovery pending */
+#define CLP_RC_SETPCIFN_BUSY    0x0108 /* Fn busy */
+#define CLP_RC_LISTPCI_BADRT    0x010a /* Resume token not recognized */
+#define CLP_RC_QUERYPCIFG_PFGID 0x010b /* Unrecognized PFGID */
+
+/* request or response block header length */
+#define LIST_PCI_HDR_LEN 32
+
+/* Number of function handles fitting in response block */
+#define CLP_FH_LIST_NR_ENTRIES \
+    ((CLP_BLK_SIZE - 2 * LIST_PCI_HDR_LEN) \
+        / sizeof(ClpFhListEntry))
+
+#define CLP_SET_ENABLE_PCI_FN  0 /* Yes, 0 enables it */
+#define CLP_SET_DISABLE_PCI_FN 1 /* Yes, 1 disables it */
+
+#define CLP_UTIL_STR_LEN 64
+
+#define CLP_MASK_FMT 0xf0000000
+
+/* List PCI functions request */
+typedef struct ClpReqListPci {
+    ClpReqHdr hdr;
+    uint32_t fmt;
+    uint64_t reserved1;
+    uint64_t resume_token;
+    uint64_t reserved2;
+} QEMU_PACKED ClpReqListPci;
+
+/* List PCI functions response */
+typedef struct ClpRspListPci {
+    ClpRspHdr hdr;
+    uint32_t fmt;
+    uint64_t reserved1;
+    uint64_t resume_token;
+    uint32_t mdd;
+    uint16_t max_fn;
+    uint8_t reserved2;
+    uint8_t entry_size;
+    ClpFhListEntry fh_list[CLP_FH_LIST_NR_ENTRIES];
+} QEMU_PACKED ClpRspListPci;
+
+/* Query PCI function request */
+typedef struct ClpReqQueryPci {
+    ClpReqHdr hdr;
+    uint32_t fmt;
+    uint64_t reserved1;
+    uint32_t fh; /* function handle */
+    uint32_t reserved2;
+    uint64_t reserved3;
+} QEMU_PACKED ClpReqQueryPci;
+
+/* Query PCI function response */
+typedef struct ClpRspQueryPci {
+    ClpRspHdr hdr;
+    uint32_t fmt;
+    uint64_t reserved1;
+    uint16_t vfn; /* virtual fn number */
+#define CLP_RSP_QPCI_MASK_UTIL  0x100
+#define CLP_RSP_QPCI_MASK_PFGID 0xff
+    uint16_t ug;
+    uint32_t fid; /* pci function id */
+    uint8_t bar_size[PCI_BAR_COUNT];
+    uint16_t pchid;
+    uint32_t bar[PCI_BAR_COUNT];
+    uint64_t reserved2;
+    uint64_t sdma; /* start dma as */
+    uint64_t edma; /* end dma as */
+    uint32_t reserved3[11];
+    uint32_t uid;
+    uint8_t util_str[CLP_UTIL_STR_LEN]; /* utility string */
+} QEMU_PACKED ClpRspQueryPci;
+
+/* Query PCI function group request */
+typedef struct ClpReqQueryPciGrp {
+    ClpReqHdr hdr;
+    uint32_t fmt;
+    uint64_t reserved1;
+#define CLP_REQ_QPCIG_MASK_PFGID 0xff
+    uint32_t g;
+    uint32_t reserved2;
+    uint64_t reserved3;
+} QEMU_PACKED ClpReqQueryPciGrp;
+
+/* Query PCI function group response */
+typedef struct ClpRspQueryPciGrp {
+    ClpRspHdr hdr;
+    uint32_t fmt;
+    uint64_t reserved1;
+#define CLP_RSP_QPCIG_MASK_NOI 0xfff
+    uint16_t i;
+    uint8_t version;
+#define CLP_RSP_QPCIG_MASK_FRAME   0x2
+#define CLP_RSP_QPCIG_MASK_REFRESH 0x1
+    uint8_t fr;
+    uint16_t reserved2;
+    uint16_t mui;
+    uint64_t reserved3;
+    uint64_t dasm; /* dma address space mask */
+    uint64_t msia; /* MSI address */
+    uint64_t reserved4;
+    uint64_t reserved5;
+} QEMU_PACKED ClpRspQueryPciGrp;
+
+/* Set PCI function request */
+typedef struct ClpReqSetPci {
+    ClpReqHdr hdr;
+    uint32_t fmt;
+    uint64_t reserved1;
+    uint32_t fh; /* function handle */
+    uint16_t reserved2;
+    uint8_t oc; /* operation controls */
+    uint8_t ndas; /* number of dma spaces */
+    uint64_t reserved3;
+} QEMU_PACKED ClpReqSetPci;
+
+/* Set PCI function response */
+typedef struct ClpRspSetPci {
+    ClpRspHdr hdr;
+    uint32_t fmt;
+    uint64_t reserved1;
+    uint32_t fh; /* function handle */
+    uint32_t reserved3;
+    uint64_t reserved4;
+} QEMU_PACKED ClpRspSetPci;
+
+typedef struct ClpReqRspListPci {
+    ClpReqListPci request;
+    ClpRspListPci response;
+} QEMU_PACKED ClpReqRspListPci;
+
+typedef struct ClpReqRspSetPci {
+    ClpReqSetPci request;
+    ClpRspSetPci response;
+} QEMU_PACKED ClpReqRspSetPci;
+
+typedef struct ClpReqRspQueryPci {
+    ClpReqQueryPci request;
+    ClpRspQueryPci response;
+} QEMU_PACKED ClpReqRspQueryPci;
+
+typedef struct ClpReqRspQueryPciGrp {
+    ClpReqQueryPciGrp request;
+    ClpRspQueryPciGrp response;
+} QEMU_PACKED ClpReqRspQueryPciGrp;
+
+/* Load/Store status codes */
+#define ZPCI_PCI_ST_FUNC_NOT_ENABLED        4
+#define ZPCI_PCI_ST_FUNC_IN_ERR             8
+#define ZPCI_PCI_ST_BLOCKED                 12
+#define ZPCI_PCI_ST_INSUF_RES               16
+#define ZPCI_PCI_ST_INVAL_AS                20
+#define ZPCI_PCI_ST_FUNC_ALREADY_ENABLED    24
+#define ZPCI_PCI_ST_DMA_AS_NOT_ENABLED      28
+#define ZPCI_PCI_ST_2ND_OP_IN_INV_AS        36
+#define ZPCI_PCI_ST_FUNC_NOT_AVAIL          40
+#define ZPCI_PCI_ST_ALREADY_IN_RQ_STATE     44
+
+/* Load/Store return codes */
+#define ZPCI_PCI_LS_OK              0
+#define ZPCI_PCI_LS_ERR             1
+#define ZPCI_PCI_LS_BUSY            2
+#define ZPCI_PCI_LS_INVAL_HANDLE    3
+
+/* Modify PCI Function Controls */
+#define ZPCI_MOD_FC_REG_INT     2
+#define ZPCI_MOD_FC_DEREG_INT   3
+#define ZPCI_MOD_FC_REG_IOAT    4
+#define ZPCI_MOD_FC_DEREG_IOAT  5
+#define ZPCI_MOD_FC_REREG_IOAT  6
+#define ZPCI_MOD_FC_RESET_ERROR 7
+#define ZPCI_MOD_FC_RESET_BLOCK 9
+#define ZPCI_MOD_FC_SET_MEASURE 10
+
+/* FIB function controls */
+#define ZPCI_FIB_FC_ENABLED     0x80
+#define ZPCI_FIB_FC_ERROR       0x40
+#define ZPCI_FIB_FC_LS_BLOCKED  0x20
+#define ZPCI_FIB_FC_DMAAS_REG   0x10
+
+/* FIB function controls */
+#define ZPCI_FIB_FC_ENABLED     0x80
+#define ZPCI_FIB_FC_ERROR       0x40
+#define ZPCI_FIB_FC_LS_BLOCKED  0x20
+#define ZPCI_FIB_FC_DMAAS_REG   0x10
+
+/* Function Information Block */
+typedef struct ZpciFib {
+    uint8_t fmt;   /* format */
+    uint8_t reserved1[7];
+    uint8_t fc;                  /* function controls */
+    uint8_t reserved2;
+    uint16_t reserved3;
+    uint32_t reserved4;
+    uint64_t pba;                /* PCI base address */
+    uint64_t pal;                /* PCI address limit */
+    uint64_t iota;               /* I/O Translation Anchor */
+#define FIB_DATA_ISC(x)    (((x) >> 28) & 0x7)
+#define FIB_DATA_NOI(x)    (((x) >> 16) & 0xfff)
+#define FIB_DATA_AIBVO(x) (((x) >> 8) & 0x3f)
+#define FIB_DATA_SUM(x)    (((x) >> 7) & 0x1)
+#define FIB_DATA_AISBO(x)  ((x) & 0x3f)
+    uint32_t data;
+    uint32_t reserved5;
+    uint64_t aibv;               /* Adapter int bit vector address */
+    uint64_t aisb;               /* Adapter int summary bit address */
+    uint64_t fmb_addr;           /* Function measurement address and key */
+    uint32_t reserved6;
+    uint32_t gd;
+} QEMU_PACKED ZpciFib;
+
+int clp_service_call(S390CPU *cpu, uint8_t r2);
+int pcilg_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2);
+int pcistg_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2);
+int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2);
+int pcistb_service_call(S390CPU *cpu, uint8_t r1, uint8_t r3, uint64_t gaddr);
+int mpcifc_service_call(S390CPU *cpu, uint8_t r1, uint64_t fiba);
+int stpcifc_service_call(S390CPU *cpu, uint8_t r1, uint64_t fiba);
+
+#endif
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index bc4dc2ae8a..71bafe06ee 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -18,6 +18,7 @@
 #include "css.h"
 #include "virtio-ccw.h"
 #include "qemu/config-file.h"
+#include "s390-pci-bus.h"
 
 #define TYPE_S390_CCW_MACHINE               "s390-ccw-machine"
 
@@ -91,6 +92,7 @@ static void ccw_init(MachineState *machine)
     uint8_t *storage_keys;
     int ret;
     VirtualCssBus *css_bus;
+    DeviceState *dev;
     QemuOpts *opts = qemu_opts_find(qemu_find_opts("memory"), NULL);
     ram_addr_t pad_size = 0;
     ram_addr_t maxmem = qemu_opt_get_size(opts, "maxmem", my_ram_size);
@@ -127,6 +129,11 @@ static void ccw_init(MachineState *machine)
                       machine->initrd_filename, "s390-ccw.img");
     s390_flic_init();
 
+    dev = qdev_create(NULL, TYPE_S390_PCI_HOST_BRIDGE);
+    object_property_add_child(qdev_get_machine(), TYPE_S390_PCI_HOST_BRIDGE,
+                              OBJECT(dev), NULL);
+    qdev_init_nofail(dev);
+
     /* register hypercalls */
     virtio_ccw_register_hcalls();
 
@@ -181,7 +188,7 @@ static void ccw_machine_class_init(ObjectClass *oc, void *data)
     mc->no_serial = 1;
     mc->no_parallel = 1;
     mc->no_sdcard = 1;
-    mc->use_sclp = 1,
+    mc->use_sclp = 1;
     mc->max_cpus = 255;
     nc->nmi_monitor_handler = s390_nmi;
 }
diff --git a/hw/s390x/sclp.c b/hw/s390x/sclp.c
index a759da7f34..a969975a78 100644
--- a/hw/s390x/sclp.c
+++ b/hw/s390x/sclp.c
@@ -20,6 +20,7 @@
 #include "qemu/config-file.h"
 #include "hw/s390x/sclp.h"
 #include "hw/s390x/event-facility.h"
+#include "hw/s390x/s390-pci-bus.h"
 
 static inline SCLPEventFacility *get_event_facility(void)
 {
@@ -62,7 +63,8 @@ static void read_SCP_info(SCCB *sccb)
         read_info->entries[i].type = 0;
     }
 
-    read_info->facilities = cpu_to_be64(SCLP_HAS_CPU_INFO);
+    read_info->facilities = cpu_to_be64(SCLP_HAS_CPU_INFO |
+                                        SCLP_HAS_PCI_RECONFIG);
 
     /*
      * The storage increment size is a multiple of 1M and is a power of 2.
@@ -350,6 +352,12 @@ static void sclp_execute(SCCB *sccb, uint32_t code)
     case SCLP_UNASSIGN_STORAGE:
         unassign_storage(sccb);
         break;
+    case SCLP_CMDW_CONFIGURE_PCI:
+        s390_pci_sclp_configure(1, sccb);
+        break;
+    case SCLP_CMDW_DECONFIGURE_PCI:
+        s390_pci_sclp_configure(0, sccb);
+        break;
     default:
         efc->command_handler(ef, sccb, code);
         break;
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index b4e73d1f35..014a92ce5f 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2129,16 +2129,19 @@ static void vfio_pci_write_config(PCIDevice *pdev, uint32_t addr,
  */
 static void vfio_disable_interrupts(VFIOPCIDevice *vdev)
 {
-    switch (vdev->interrupt) {
-    case VFIO_INT_INTx:
-        vfio_disable_intx(vdev);
-        break;
-    case VFIO_INT_MSI:
-        vfio_disable_msi(vdev);
-        break;
-    case VFIO_INT_MSIX:
+    /*
+     * More complicated than it looks.  Disabling MSI/X transitions the
+     * device to INTx mode (if supported).  Therefore we need to first
+     * disable MSI/X and then cleanup by disabling INTx.
+     */
+    if (vdev->interrupt == VFIO_INT_MSIX) {
         vfio_disable_msix(vdev);
-        break;
+    } else if (vdev->interrupt == VFIO_INT_MSI) {
+        vfio_disable_msi(vdev);
+    }
+
+    if (vdev->interrupt == VFIO_INT_INTx) {
+        vfio_disable_intx(vdev);
     }
 }
 
@@ -2301,7 +2304,7 @@ static void vfio_unmap_bar(VFIOPCIDevice *vdev, int nr)
 static void vfio_map_bar(VFIOPCIDevice *vdev, int nr)
 {
     VFIOBAR *bar = &vdev->bars[nr];
-    unsigned size = bar->region.size;
+    uint64_t size = bar->region.size;
     char name[64];
     uint32_t pci_bar;
     uint8_t type;
@@ -2351,7 +2354,7 @@ static void vfio_map_bar(VFIOPCIDevice *vdev, int nr)
     }
 
     if (vdev->msix && vdev->msix->table_bar == nr) {
-        unsigned start;
+        uint64_t start;
 
         start = HOST_PAGE_ALIGN(vdev->msix->table_offset +
                                 (vdev->msix->entries * PCI_MSIX_ENTRY_SIZE));
diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c
index c1bf357154..f2893b28aa 100644
--- a/hw/xen/xen_pt.c
+++ b/hw/xen/xen_pt.c
@@ -736,7 +736,7 @@ static int xen_pt_initfn(PCIDevice *d)
     }
 
 out:
-    memory_listener_register(&s->memory_listener, &address_space_memory);
+    memory_listener_register(&s->memory_listener, &s->dev.bus_master_as);
     memory_listener_register(&s->io_listener, &address_space_io);
     XEN_PT_LOG(d,
                "Real physical device %02x:%02x.%d registered successfully!\n",
diff --git a/include/block/block.h b/include/block/block.h
index 6e7275d95b..3082d2b80e 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -133,7 +133,8 @@ typedef enum BlockOpType {
     BLOCK_OP_TYPE_BACKUP_SOURCE,
     BLOCK_OP_TYPE_BACKUP_TARGET,
     BLOCK_OP_TYPE_CHANGE,
-    BLOCK_OP_TYPE_COMMIT,
+    BLOCK_OP_TYPE_COMMIT_SOURCE,
+    BLOCK_OP_TYPE_COMMIT_TARGET,
     BLOCK_OP_TYPE_DATAPLANE,
     BLOCK_OP_TYPE_DRIVE_DEL,
     BLOCK_OP_TYPE_EJECT,
@@ -396,7 +397,11 @@ const char *bdrv_get_encrypted_filename(BlockDriverState *bs);
 void bdrv_get_backing_filename(BlockDriverState *bs,
                                char *filename, int filename_size);
 void bdrv_get_full_backing_filename(BlockDriverState *bs,
-                                    char *dest, size_t sz);
+                                    char *dest, size_t sz, Error **errp);
+void bdrv_get_full_backing_filename_from_filename(const char *backed,
+                                                  const char *backing,
+                                                  char *dest, size_t sz,
+                                                  Error **errp);
 int bdrv_is_snapshot(BlockDriverState *bs);
 
 int path_has_protocol(const char *path);
@@ -434,8 +439,10 @@ BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int granularity,
 void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap);
 BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs);
 int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector);
-void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors);
-void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors);
+void bdrv_set_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
+                           int64_t cur_sector, int nr_sectors);
+void bdrv_reset_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
+                             int64_t cur_sector, int nr_sectors);
 void bdrv_dirty_iter_init(BlockDriverState *bs,
                           BdrvDirtyBitmap *bitmap, struct HBitmapIter *hbi);
 int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap *bitmap);
diff --git a/include/block/coroutine.h b/include/block/coroutine.h
index 793df0ef8b..20c027a7fd 100644
--- a/include/block/coroutine.h
+++ b/include/block/coroutine.h
@@ -216,14 +216,4 @@ void coroutine_fn co_aio_sleep_ns(AioContext *ctx, QEMUClockType type,
  */
 void coroutine_fn yield_until_fd_readable(int fd);
 
-/**
- * Add or subtract from the coroutine pool size
- *
- * The coroutine implementation keeps a pool of coroutines to be reused by
- * qemu_coroutine_create().  This makes coroutine creation cheap.  Heavy
- * coroutine users should call this to reserve pool space.  Call it again with
- * a negative number to release pool space.
- */
-void qemu_coroutine_adjust_pool_size(int n);
-
 #endif /* QEMU_COROUTINE_H */
diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index f0ce18725c..5fdd2fee80 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -299,11 +299,15 @@ CPUArchState *cpu_copy(CPUArchState *env);
 
 /* memory API */
 
-typedef struct RAMBlock {
+typedef struct RAMBlock RAMBlock;
+
+struct RAMBlock {
     struct MemoryRegion *mr;
     uint8_t *host;
     ram_addr_t offset;
-    ram_addr_t length;
+    ram_addr_t used_length;
+    ram_addr_t max_length;
+    void (*resized)(const char*, uint64_t length, void *host);
     uint32_t flags;
     char idstr[256];
     /* Reads can take either the iothread or the ramlist lock.
@@ -311,11 +315,11 @@ typedef struct RAMBlock {
      */
     QTAILQ_ENTRY(RAMBlock) next;
     int fd;
-} RAMBlock;
+};
 
 static inline void *ramblock_ptr(RAMBlock *block, ram_addr_t offset)
 {
-    assert(offset < block->length);
+    assert(offset < block->used_length);
     assert(block->host);
     return (char *)block->host + offset;
 }
diff --git a/include/exec/memory.h b/include/exec/memory.h
index f64ab5e3e5..0cd96b152e 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -321,6 +321,30 @@ void memory_region_init_ram(MemoryRegion *mr,
                             uint64_t size,
                             Error **errp);
 
+/**
+ * memory_region_init_resizeable_ram:  Initialize memory region with resizeable
+ *                                     RAM.  Accesses into the region will
+ *                                     modify memory directly.  Only an initial
+ *                                     portion of this RAM is actually used.
+ *                                     The used size can change across reboots.
+ *
+ * @mr: the #MemoryRegion to be initialized.
+ * @owner: the object that tracks the region's reference count
+ * @name: the name of the region.
+ * @size: used size of the region.
+ * @max_size: max size of the region.
+ * @resized: callback to notify owner about used size change.
+ * @errp: pointer to Error*, to store an error if it happens.
+ */
+void memory_region_init_resizeable_ram(MemoryRegion *mr,
+                                       struct Object *owner,
+                                       const char *name,
+                                       uint64_t size,
+                                       uint64_t max_size,
+                                       void (*resized)(const char*,
+                                                       uint64_t length,
+                                                       void *host),
+                                       Error **errp);
 #ifdef __linux__
 /**
  * memory_region_init_ram_from_file:  Initialize RAM memory region with a
@@ -878,6 +902,16 @@ void memory_region_set_enabled(MemoryRegion *mr, bool enabled);
 void memory_region_set_address(MemoryRegion *mr, hwaddr addr);
 
 /*
+ * memory_region_set_size: dynamically update the size of a region.
+ *
+ * Dynamically updates the size of a region.
+ *
+ * @mr: the region to be updated
+ * @size: used size of the region.
+ */
+void memory_region_set_size(MemoryRegion *mr, uint64_t size);
+
+/*
  * memory_region_set_alias_offset: dynamically update a memory alias's offset
  *
  * Dynamically updates the offset into the target region that an alias points
diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
index 8fc75cdd2b..ff558a4734 100644
--- a/include/exec/ram_addr.h
+++ b/include/exec/ram_addr.h
@@ -28,12 +28,19 @@ ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
                                    MemoryRegion *mr, Error **errp);
 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp);
+ram_addr_t qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t max_size,
+                                     void (*resized)(const char*,
+                                                     uint64_t length,
+                                                     void *host),
+                                     MemoryRegion *mr, Error **errp);
 int qemu_get_ram_fd(ram_addr_t addr);
 void *qemu_get_ram_block_host_ptr(ram_addr_t addr);
 void *qemu_get_ram_ptr(ram_addr_t addr);
 void qemu_ram_free(ram_addr_t addr);
 void qemu_ram_free_from_ptr(ram_addr_t addr);
 
+int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp);
+
 static inline bool cpu_physical_memory_get_dirty(ram_addr_t start,
                                                  ram_addr_t length,
                                                  unsigned client)
@@ -172,9 +179,9 @@ static inline void cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap,
 }
 #endif /* not _WIN32 */
 
-static inline void cpu_physical_memory_clear_dirty_range(ram_addr_t start,
-                                                         ram_addr_t length,
-                                                         unsigned client)
+static inline void cpu_physical_memory_clear_dirty_range_type(ram_addr_t start,
+                                                              ram_addr_t length,
+                                                              unsigned client)
 {
     unsigned long end, page;
 
@@ -184,6 +191,15 @@ static inline void cpu_physical_memory_clear_dirty_range(ram_addr_t start,
     bitmap_clear(ram_list.dirty_memory[client], page, end - page);
 }
 
+static inline void cpu_physical_memory_clear_dirty_range(ram_addr_t start,
+                                                         ram_addr_t length)
+{
+    cpu_physical_memory_clear_dirty_range_type(start, length, DIRTY_MEMORY_MIGRATION);
+    cpu_physical_memory_clear_dirty_range_type(start, length, DIRTY_MEMORY_VGA);
+    cpu_physical_memory_clear_dirty_range_type(start, length, DIRTY_MEMORY_CODE);
+}
+
+
 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length,
                                      unsigned client);
 
diff --git a/include/hw/loader.h b/include/hw/loader.h
index 899762019f..8593045d04 100644
--- a/include/hw/loader.h
+++ b/include/hw/loader.h
@@ -69,7 +69,7 @@ int rom_add_file(const char *file, const char *fw_dir,
                  hwaddr addr, int32_t bootindex,
                  bool option_rom);
 ram_addr_t rom_add_blob(const char *name, const void *blob, size_t len,
-                   hwaddr addr, const char *fw_file_name,
+                   size_t max_len, hwaddr addr, const char *fw_file_name,
                    FWCfgReadCallback fw_callback, void *callback_opaque);
 int rom_add_elf_program(const char *name, void *data, size_t datasize,
                         size_t romsize, hwaddr addr);
@@ -83,7 +83,7 @@ void do_info_roms(Monitor *mon, const QDict *qdict);
 #define rom_add_file_fixed(_f, _a, _i)          \
     rom_add_file(_f, NULL, _a, _i, false)
 #define rom_add_blob_fixed(_f, _b, _l, _a)      \
-    rom_add_blob(_f, _b, _l, _a, NULL, NULL, NULL)
+    rom_add_blob(_f, _b, _l, _l, _a, NULL, NULL, NULL)
 
 #define PC_ROM_MIN_VGA     0xc0000
 #define PC_ROM_MIN_OPTION  0xc8000
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 749daf4dd7..716bff43bf 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -37,6 +37,7 @@ typedef struct sPAPREnvironment {
     int htab_save_index;
     bool htab_first_pass;
     int htab_fd;
+    bool htab_fd_stale;
 } sPAPREnvironment;
 
 #define H_SUCCESS         0
diff --git a/include/hw/s390x/sclp.h b/include/hw/s390x/sclp.h
index ec07a118f2..e8a64e25b7 100644
--- a/include/hw/s390x/sclp.h
+++ b/include/hw/s390x/sclp.h
@@ -43,14 +43,22 @@
 #define SCLP_CMDW_CONFIGURE_CPU                 0x00110001
 #define SCLP_CMDW_DECONFIGURE_CPU               0x00100001
 
+/* SCLP PCI codes */
+#define SCLP_HAS_PCI_RECONFIG                   0x0000000040000000ULL
+#define SCLP_CMDW_CONFIGURE_PCI                 0x001a0001
+#define SCLP_CMDW_DECONFIGURE_PCI               0x001b0001
+#define SCLP_RECONFIG_PCI_ATPYE                 2
+
 /* SCLP response codes */
 #define SCLP_RC_NORMAL_READ_COMPLETION          0x0010
 #define SCLP_RC_NORMAL_COMPLETION               0x0020
 #define SCLP_RC_SCCB_BOUNDARY_VIOLATION         0x0100
+#define SCLP_RC_NO_ACTION_REQUIRED              0x0120
 #define SCLP_RC_INVALID_SCLP_COMMAND            0x01f0
 #define SCLP_RC_CONTAINED_EQUIPMENT_CHECK       0x0340
 #define SCLP_RC_INSUFFICIENT_SCCB_LENGTH        0x0300
 #define SCLP_RC_STANDBY_READ_COMPLETION         0x0410
+#define SCLP_RC_ADAPTER_ID_NOT_RECOGNIZED       0x09f0
 #define SCLP_RC_INVALID_FUNCTION                0x40f0
 #define SCLP_RC_NO_EVENT_BUFFERS_STORED         0x60f0
 #define SCLP_RC_INVALID_SELECTION_MASK          0x70f0
diff --git a/include/qemu/queue.h b/include/qemu/queue.h
index 42bcadfbb1..a98eb3ad79 100644
--- a/include/qemu/queue.h
+++ b/include/qemu/queue.h
@@ -191,8 +191,19 @@ struct {                                                                \
 } while (/*CONSTCOND*/0)
 
 #define QSLIST_INSERT_HEAD(head, elm, field) do {                        \
-        (elm)->field.sle_next = (head)->slh_first;                      \
-        (head)->slh_first = (elm);                                      \
+        (elm)->field.sle_next = (head)->slh_first;                       \
+        (head)->slh_first = (elm);                                       \
+} while (/*CONSTCOND*/0)
+
+#define QSLIST_INSERT_HEAD_ATOMIC(head, elm, field) do {                   \
+        do {                                                               \
+            (elm)->field.sle_next = (head)->slh_first;                     \
+        } while (atomic_cmpxchg(&(head)->slh_first, (elm)->field.sle_next, \
+                               (elm)) != (elm)->field.sle_next);           \
+} while (/*CONSTCOND*/0)
+
+#define QSLIST_MOVE_ATOMIC(dest, src) do {                               \
+        (dest)->slh_first = atomic_xchg(&(src)->slh_first, NULL);        \
 } while (/*CONSTCOND*/0)
 
 #define QSLIST_REMOVE_HEAD(head, field) do {                             \
diff --git a/include/qemu/thread.h b/include/qemu/thread.h
index f7e3b9b290..e89fdc9785 100644
--- a/include/qemu/thread.h
+++ b/include/qemu/thread.h
@@ -61,4 +61,8 @@ bool qemu_thread_is_self(QemuThread *thread);
 void qemu_thread_exit(void *retval);
 void qemu_thread_naming(bool enable);
 
+struct Notifier;
+void qemu_thread_atexit_add(struct Notifier *notifier);
+void qemu_thread_atexit_remove(struct Notifier *notifier);
+
 #endif
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index 104cf3535e..30cb84d2b8 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -158,6 +158,7 @@ extern bool kvm_readonly_mem_allowed;
 
 struct kvm_run;
 struct kvm_lapic_state;
+struct kvm_irq_routing_entry;
 
 typedef struct KVMCapabilityInfo {
     const char *name;
@@ -270,6 +271,9 @@ int kvm_arch_on_sigbus(int code, void *addr);
 
 void kvm_arch_init_irq_routing(KVMState *s);
 
+int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
+                             uint64_t address, uint32_t data);
+
 int kvm_set_irq(KVMState *s, int irq, int level);
 int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg);
 
diff --git a/kvm-all.c b/kvm-all.c
index 18cc6b4d3d..2f21a4e6fe 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -1225,6 +1225,10 @@ int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg)
     kroute.u.msi.address_lo = (uint32_t)msg.address;
     kroute.u.msi.address_hi = msg.address >> 32;
     kroute.u.msi.data = le32_to_cpu(msg.data);
+    if (kvm_arch_fixup_msi_route(&kroute, msg.address, msg.data)) {
+        kvm_irqchip_release_virq(s, virq);
+        return -EINVAL;
+    }
 
     kvm_add_routing_entry(s, &kroute);
     kvm_irqchip_commit_routes(s);
@@ -1250,6 +1254,9 @@ int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg)
     kroute.u.msi.address_lo = (uint32_t)msg.address;
     kroute.u.msi.address_hi = msg.address >> 32;
     kroute.u.msi.data = le32_to_cpu(msg.data);
+    if (kvm_arch_fixup_msi_route(&kroute, msg.address, msg.data)) {
+        return -EINVAL;
+    }
 
     return kvm_update_routing_entry(s, &kroute);
 }
diff --git a/memory.c b/memory.c
index 15cf9ebd84..c343bf37df 100644
--- a/memory.c
+++ b/memory.c
@@ -1152,6 +1152,23 @@ void memory_region_init_ram(MemoryRegion *mr,
     mr->ram_addr = qemu_ram_alloc(size, mr, errp);
 }
 
+void memory_region_init_resizeable_ram(MemoryRegion *mr,
+                                       Object *owner,
+                                       const char *name,
+                                       uint64_t size,
+                                       uint64_t max_size,
+                                       void (*resized)(const char*,
+                                                       uint64_t length,
+                                                       void *host),
+                                       Error **errp)
+{
+    memory_region_init(mr, owner, name, size);
+    mr->ram = true;
+    mr->terminates = true;
+    mr->destructor = memory_region_destructor_ram;
+    mr->ram_addr = qemu_ram_alloc_resizeable(size, max_size, resized, mr, errp);
+}
+
 #ifdef __linux__
 void memory_region_init_ram_from_file(MemoryRegion *mr,
                                       struct Object *owner,
@@ -1707,6 +1724,22 @@ void memory_region_set_enabled(MemoryRegion *mr, bool enabled)
     memory_region_transaction_commit();
 }
 
+void memory_region_set_size(MemoryRegion *mr, uint64_t size)
+{
+    Int128 s = int128_make64(size);
+
+    if (size == UINT64_MAX) {
+        s = int128_2_64();
+    }
+    if (int128_eq(s, mr->size)) {
+        return;
+    }
+    memory_region_transaction_begin();
+    mr->size = s;
+    memory_region_update_pending = true;
+    memory_region_transaction_commit();
+}
+
 static void memory_region_readd_subregion(MemoryRegion *mr)
 {
     MemoryRegion *container = mr->container;
diff --git a/migration/block.c b/migration/block.c
index 74d9eb125c..0c7610600b 100644
--- a/migration/block.c
+++ b/migration/block.c
@@ -303,7 +303,7 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds)
     blk->aiocb = bdrv_aio_readv(bs, cur_sector, &blk->qiov,
                                 nr_sectors, blk_mig_read_cb, blk);
 
-    bdrv_reset_dirty(bs, cur_sector, nr_sectors);
+    bdrv_reset_dirty_bitmap(bs, bmds->dirty_bitmap, cur_sector, nr_sectors);
     qemu_mutex_unlock_iothread();
 
     bmds->cur_sector = cur_sector + nr_sectors;
@@ -496,7 +496,8 @@ static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds,
                 g_free(blk);
             }
 
-            bdrv_reset_dirty(bmds->bs, sector, nr_sectors);
+            bdrv_reset_dirty_bitmap(bmds->bs, bmds->dirty_bitmap, sector,
+                                    nr_sectors);
             break;
         }
         sector += BDRV_SECTORS_PER_DIRTY_CHUNK;
@@ -765,8 +766,8 @@ static uint64_t block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size)
                        block_mig_state.read_done * BLOCK_SIZE;
 
     /* Report at least one block pending during bulk phase */
-    if (pending == 0 && !block_mig_state.bulk_completed) {
-        pending = BLOCK_SIZE;
+    if (pending <= max_size && !block_mig_state.bulk_completed) {
+        pending = max_size + BLOCK_SIZE;
     }
     blk_mig_unlock();
     qemu_mutex_unlock_iothread();
diff --git a/pc-bios/README b/pc-bios/README
index edfadd7d38..8a85e69d3b 100644
--- a/pc-bios/README
+++ b/pc-bios/README
@@ -17,7 +17,7 @@
 - SLOF (Slimline Open Firmware) is a free IEEE 1275 Open Firmware
   implementation for certain IBM POWER hardware.  The sources are at
   https://github.com/aik/SLOF, and the image currently in qemu is
-  built from git tag qemu-slof-20140630.
+  built from git tag qemu-slof-20141202.
 
 - sgabios (the Serial Graphics Adapter option ROM) provides a means for
   legacy x86 software to communicate with an attached serial console as
diff --git a/pc-bios/slof.bin b/pc-bios/slof.bin
index 69b0a5dbc3..031e3063a2 100644
--- a/pc-bios/slof.bin
+++ b/pc-bios/slof.bin
diff --git a/qapi-schema.json b/qapi-schema.json
index 563b4ad98a..fbfc52f94d 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -1254,11 +1254,19 @@
 #
 # A discriminated record of operations that can be performed with
 # @transaction.
+#
+# Since 1.1
+#
+# drive-backup since 1.6
+# abort since 1.6
+# blockdev-snapshot-internal-sync since 1.7
+# blockdev-backup since 2.3
 ##
 { 'union': 'TransactionAction',
   'data': {
        'blockdev-snapshot-sync': 'BlockdevSnapshot',
        'drive-backup': 'DriveBackup',
+       'blockdev-backup': 'BlockdevBackup',
        'abort': 'Abort',
        'blockdev-snapshot-internal-sync': 'BlockdevSnapshotInternal'
    } }
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 6e8db15861..80984d1660 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -423,7 +423,7 @@
 # @device: #optional If the stats are for a virtual block device, the name
 #          corresponding to the virtual block device.
 #
-# @device: #optional The node name of the device. (Since 2.3)
+# @node-name: #optional The node name of the device. (Since 2.3)
 #
 # @stats:  A @BlockDeviceStats for the device.
 #
@@ -703,6 +703,41 @@
             '*on-target-error': 'BlockdevOnError' } }
 
 ##
+# @BlockdevBackup
+#
+# @device: the name of the device which should be copied.
+#
+# @target: the name of the backup target device.
+#
+# @sync: what parts of the disk image should be copied to the destination
+#        (all the disk, only the sectors allocated in the topmost image, or
+#        only new I/O).
+#
+# @speed: #optional the maximum speed, in bytes per second. The default is 0,
+#         for unlimited.
+#
+# @on-source-error: #optional the action to take on an error on the source,
+#                   default 'report'.  'stop' and 'enospc' can only be used
+#                   if the block device supports io-status (see BlockInfo).
+#
+# @on-target-error: #optional the action to take on an error on the target,
+#                   default 'report' (no limitations, since this applies to
+#                   a different block device than @device).
+#
+# Note that @on-source-error and @on-target-error only affect background I/O.
+# If an error occurs during a guest write request, the device's rerror/werror
+# actions will be used.
+#
+# Since: 2.3
+##
+{ 'type': 'BlockdevBackup',
+  'data': { 'device': 'str', 'target': 'str',
+            'sync': 'MirrorSyncMode',
+            '*speed': 'int',
+            '*on-source-error': 'BlockdevOnError',
+            '*on-target-error': 'BlockdevOnError' } }
+
+##
 # @blockdev-snapshot-sync
 #
 # Generates a synchronous snapshot of a block device.
@@ -822,6 +857,25 @@
 { 'command': 'drive-backup', 'data': 'DriveBackup' }
 
 ##
+# @blockdev-backup
+#
+# Start a point-in-time copy of a block device to a new destination.  The
+# status of ongoing blockdev-backup operations can be checked with
+# query-block-jobs where the BlockJobInfo.type field has the value 'backup'.
+# The operation can be stopped before it has completed using the
+# block-job-cancel command.
+#
+# For the arguments, see the documentation of BlockdevBackup.
+#
+# Returns: Nothing on success.
+#          If @device or @target is not a valid block device, DeviceNotFound.
+#
+# Since 2.3
+##
+{ 'command': 'blockdev-backup', 'data': 'BlockdevBackup' }
+
+
+##
 # @query-named-block-nodes
 #
 # Get the named block driver list
diff --git a/qemu-coroutine.c b/qemu-coroutine.c
index bd574aa1b5..525247b050 100644
--- a/qemu-coroutine.c
+++ b/qemu-coroutine.c
@@ -15,31 +15,59 @@
 #include "trace.h"
 #include "qemu-common.h"
 #include "qemu/thread.h"
+#include "qemu/atomic.h"
 #include "block/coroutine.h"
 #include "block/coroutine_int.h"
 
 enum {
-    POOL_DEFAULT_SIZE = 64,
+    POOL_BATCH_SIZE = 64,
 };
 
 /** Free list to speed up creation */
-static QemuMutex pool_lock;
-static QSLIST_HEAD(, Coroutine) pool = QSLIST_HEAD_INITIALIZER(pool);
-static unsigned int pool_size;
-static unsigned int pool_max_size = POOL_DEFAULT_SIZE;
+static QSLIST_HEAD(, Coroutine) release_pool = QSLIST_HEAD_INITIALIZER(pool);
+static unsigned int release_pool_size;
+static __thread QSLIST_HEAD(, Coroutine) alloc_pool = QSLIST_HEAD_INITIALIZER(pool);
+static __thread unsigned int alloc_pool_size;
+static __thread Notifier coroutine_pool_cleanup_notifier;
+
+static void coroutine_pool_cleanup(Notifier *n, void *value)
+{
+    Coroutine *co;
+    Coroutine *tmp;
+
+    QSLIST_FOREACH_SAFE(co, &alloc_pool, pool_next, tmp) {
+        QSLIST_REMOVE_HEAD(&alloc_pool, pool_next);
+        qemu_coroutine_delete(co);
+    }
+}
 
 Coroutine *qemu_coroutine_create(CoroutineEntry *entry)
 {
     Coroutine *co = NULL;
 
     if (CONFIG_COROUTINE_POOL) {
-        qemu_mutex_lock(&pool_lock);
-        co = QSLIST_FIRST(&pool);
+        co = QSLIST_FIRST(&alloc_pool);
+        if (!co) {
+            if (release_pool_size > POOL_BATCH_SIZE) {
+                /* Slow path; a good place to register the destructor, too.  */
+                if (!coroutine_pool_cleanup_notifier.notify) {
+                    coroutine_pool_cleanup_notifier.notify = coroutine_pool_cleanup;
+                    qemu_thread_atexit_add(&coroutine_pool_cleanup_notifier);
+                }
+
+                /* This is not exact; there could be a little skew between
+                 * release_pool_size and the actual size of release_pool.  But
+                 * it is just a heuristic, it does not need to be perfect.
+                 */
+                alloc_pool_size = atomic_xchg(&release_pool_size, 0);
+                QSLIST_MOVE_ATOMIC(&alloc_pool, &release_pool);
+                co = QSLIST_FIRST(&alloc_pool);
+            }
+        }
         if (co) {
-            QSLIST_REMOVE_HEAD(&pool, pool_next);
-            pool_size--;
+            QSLIST_REMOVE_HEAD(&alloc_pool, pool_next);
+            alloc_pool_size--;
         }
-        qemu_mutex_unlock(&pool_lock);
     }
 
     if (!co) {
@@ -53,39 +81,24 @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry)
 
 static void coroutine_delete(Coroutine *co)
 {
+    co->caller = NULL;
+
     if (CONFIG_COROUTINE_POOL) {
-        qemu_mutex_lock(&pool_lock);
-        if (pool_size < pool_max_size) {
-            QSLIST_INSERT_HEAD(&pool, co, pool_next);
-            co->caller = NULL;
-            pool_size++;
-            qemu_mutex_unlock(&pool_lock);
+        if (release_pool_size < POOL_BATCH_SIZE * 2) {
+            QSLIST_INSERT_HEAD_ATOMIC(&release_pool, co, pool_next);
+            atomic_inc(&release_pool_size);
+            return;
+        }
+        if (alloc_pool_size < POOL_BATCH_SIZE) {
+            QSLIST_INSERT_HEAD(&alloc_pool, co, pool_next);
+            alloc_pool_size++;
             return;
         }
-        qemu_mutex_unlock(&pool_lock);
     }
 
     qemu_coroutine_delete(co);
 }
 
-static void __attribute__((constructor)) coroutine_pool_init(void)
-{
-    qemu_mutex_init(&pool_lock);
-}
-
-static void __attribute__((destructor)) coroutine_pool_cleanup(void)
-{
-    Coroutine *co;
-    Coroutine *tmp;
-
-    QSLIST_FOREACH_SAFE(co, &pool, pool_next, tmp) {
-        QSLIST_REMOVE_HEAD(&pool, pool_next);
-        qemu_coroutine_delete(co);
-    }
-
-    qemu_mutex_destroy(&pool_lock);
-}
-
 static void coroutine_swap(Coroutine *from, Coroutine *to)
 {
     CoroutineAction ret;
@@ -137,23 +150,3 @@ void coroutine_fn qemu_coroutine_yield(void)
     self->caller = NULL;
     coroutine_swap(self, to);
 }
-
-void qemu_coroutine_adjust_pool_size(int n)
-{
-    qemu_mutex_lock(&pool_lock);
-
-    pool_max_size += n;
-
-    /* Callers should never take away more than they added */
-    assert(pool_max_size >= POOL_DEFAULT_SIZE);
-
-    /* Trim oversized pool down to new max */
-    while (pool_size > pool_max_size) {
-        Coroutine *co = QSLIST_FIRST(&pool);
-        QSLIST_REMOVE_HEAD(&pool, pool_next);
-        pool_size--;
-        qemu_coroutine_delete(co);
-    }
-
-    qemu_mutex_unlock(&pool_lock);
-}
diff --git a/qemu-seccomp.c b/qemu-seccomp.c
index af6a375127..b0c626984f 100644
--- a/qemu-seccomp.c
+++ b/qemu-seccomp.c
@@ -235,7 +235,8 @@ static const struct QemuSeccompSyscall seccomp_whitelist[] = {
     { SCMP_SYS(fallocate), 240 },
     { SCMP_SYS(fadvise64), 240 },
     { SCMP_SYS(inotify_init1), 240 },
-    { SCMP_SYS(inotify_add_watch), 240 }
+    { SCMP_SYS(inotify_add_watch), 240 },
+    { SCMP_SYS(mbind), 240 }
 };
 
 int seccomp_start(void)
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 6945d30198..8957201f73 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -1094,6 +1094,48 @@ Example:
                                                "sync": "full",
                                                "target": "backup.img" } }
 <- { "return": {} }
+
+EQMP
+
+    {
+        .name       = "blockdev-backup",
+        .args_type  = "sync:s,device:B,target:B,speed:i?,"
+                      "on-source-error:s?,on-target-error:s?",
+        .mhandler.cmd_new = qmp_marshal_input_blockdev_backup,
+    },
+
+SQMP
+blockdev-backup
+---------------
+
+The device version of drive-backup: this command takes an existing named device
+as backup target.
+
+Arguments:
+
+- "device": the name of the device which should be copied.
+            (json-string)
+- "target": the name of the backup target device. (json-string)
+- "sync": what parts of the disk image should be copied to the destination;
+          possibilities include "full" for all the disk, "top" for only the
+          sectors allocated in the topmost image, or "none" to only replicate
+          new I/O (MirrorSyncMode).
+- "speed": the maximum speed, in bytes per second (json-int, optional)
+- "on-source-error": the action to take on an error on the source, default
+                     'report'.  'stop' and 'enospc' can only be used
+                     if the block device supports io-status.
+                     (BlockdevOnError, optional)
+- "on-target-error": the action to take on an error on the target, default
+                     'report' (no limitations, since this applies to
+                     a different block device than device).
+                     (BlockdevOnError, optional)
+
+Example:
+-> { "execute": "blockdev-backup", "arguments": { "device": "src-id",
+                                                  "sync": "full",
+                                                  "target": "tgt-id" } }
+<- { "return": {} }
+
 EQMP
 
     {
diff --git a/roms/SLOF b/roms/SLOF
-Subproject f284ab3f03ae69a20e1ae966f6ddf76da33cbf7
+Subproject a70dbda2e21f6e438b3617c44ff180c3418dc30
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 053e4320fc..5df61f9aa9 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -1639,7 +1639,13 @@ sub process {
 			#print "realcnt<$realcnt> ctx_cnt<$ctx_cnt>\n";
 			#print "pre<$pre_ctx>\nline<$line>\nctx<$ctx>\nnext<$lines[$ctx_ln - 1]>\n";
 
-			if ($ctx !~ /{\s*/ && defined($lines[$ctx_ln -1]) && $lines[$ctx_ln - 1] =~ /^\+\s*{/) {
+			# The length of the "previous line" is checked against 80 because it
+			# includes the + at the beginning of the line (if the actual line has
+			# 79 or 80 characters, it is no longer possible to add a space and an
+			# opening brace there)
+			if ($#ctx == 0 && $ctx !~ /{\s*/ &&
+			    defined($lines[$ctx_ln - 1]) && $lines[$ctx_ln - 1] =~ /^\+\s*{/ &&
+			    defined($lines[$ctx_ln - 2]) && length($lines[$ctx_ln - 2]) < 80) {
 				ERROR("that open brace { should be on the previous line\n" .
 					"$here\n$ctx\n$rawlines[$ctx_ln - 1]\n");
 			}
@@ -2542,7 +2548,10 @@ sub process {
 
 					substr($block, 0, length($cond), '');
 
-					$seen++ if ($block =~ /^\s*{/);
+					my $spaced_block = $block;
+					$spaced_block =~ s/\n\+/ /g;
+
+					$seen++ if ($spaced_block =~ /^\s*{/);
 
                                         print "APW: cond<$cond> block<$block> allowed<$allowed>\n"
                                             if $dbg_adv_apw;
diff --git a/target-arm/kvm.c b/target-arm/kvm.c
index 4d81f3d765..23cefe98b4 100644
--- a/target-arm/kvm.c
+++ b/target-arm/kvm.c
@@ -548,3 +548,9 @@ int kvm_arch_irqchip_create(KVMState *s)
 
     return 0;
 }
+
+int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
+                             uint64_t address, uint32_t data)
+{
+    return 0;
+}
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index cf9f3319c2..36b1519f34 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -2751,3 +2751,9 @@ int kvm_device_msix_deassign(KVMState *s, uint32_t dev_id)
     return kvm_deassign_irq_internal(s, dev_id, KVM_DEV_IRQ_GUEST_MSIX |
                                                 KVM_DEV_IRQ_HOST_MSIX);
 }
+
+int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
+                             uint64_t address, uint32_t data)
+{
+    return 0;
+}
diff --git a/target-mips/kvm.c b/target-mips/kvm.c
index a761ea5b32..b68191c88e 100644
--- a/target-mips/kvm.c
+++ b/target-mips/kvm.c
@@ -688,3 +688,9 @@ int kvm_arch_get_registers(CPUState *cs)
 
     return ret;
 }
+
+int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
+                             uint64_t address, uint32_t data)
+{
+    return 0;
+}
diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index f42589c478..c62097bb8a 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -559,6 +559,26 @@ struct ppc_slb_t {
 #define ESR_VLEMI (1 << (63 - 58)) /* VLE operation                          */
 #define ESR_MIF   (1 << (63 - 62)) /* Misaligned instruction (VLE)           */
 
+/* Transaction EXception And Summary Register bits                           */
+#define TEXASR_FAILURE_PERSISTENT                (63 - 7)
+#define TEXASR_DISALLOWED                        (63 - 8)
+#define TEXASR_NESTING_OVERFLOW                  (63 - 9)
+#define TEXASR_FOOTPRINT_OVERFLOW                (63 - 10)
+#define TEXASR_SELF_INDUCED_CONFLICT             (63 - 11)
+#define TEXASR_NON_TRANSACTIONAL_CONFLICT        (63 - 12)
+#define TEXASR_TRANSACTION_CONFLICT              (63 - 13)
+#define TEXASR_TRANSLATION_INVALIDATION_CONFLICT (63 - 14)
+#define TEXASR_IMPLEMENTATION_SPECIFIC           (63 - 15)
+#define TEXASR_INSTRUCTION_FETCH_CONFLICT        (63 - 16)
+#define TEXASR_ABORT                             (63 - 31)
+#define TEXASR_SUSPENDED                         (63 - 32)
+#define TEXASR_PRIVILEGE_HV                      (63 - 34)
+#define TEXASR_PRIVILEGE_PR                      (63 - 35)
+#define TEXASR_FAILURE_SUMMARY                   (63 - 36)
+#define TEXASR_TFIAR_EXACT                       (63 - 37)
+#define TEXASR_ROT                               (63 - 38)
+#define TEXASR_TRANSACTION_LEVEL                 (63 - 52) /* 12 bits */
+
 enum {
     POWERPC_FLAG_NONE     = 0x00000000,
     /* Flag for MSR bit 25 signification (VRE/SPE)                           */
@@ -585,6 +605,8 @@ enum {
     POWERPC_FLAG_CFAR     = 0x00040000,
     /* Has VSX                                                               */
     POWERPC_FLAG_VSX      = 0x00080000,
+    /* Has Transaction Memory (ISA 2.07)                                     */
+    POWERPC_FLAG_TM       = 0x00100000,
 };
 
 /*****************************************************************************/
@@ -2011,6 +2033,8 @@ enum {
     PPC2_ISA207S       = 0x0000000000008000ULL,
     /* Double precision floating point conversion for signed integer 64      */
     PPC2_FP_CVT_S64    = 0x0000000000010000ULL,
+    /* Transactional Memory (ISA 2.07, Book II)                              */
+    PPC2_TM            = 0x0000000000020000ULL,
 
 #define PPC_TCG_INSNS2 (PPC2_BOOKE206 | PPC2_VSX | PPC2_PRCNTL | PPC2_DBRX | \
                         PPC2_ISA205 | PPC2_VSX207 | PPC2_PERM_ISA206 | \
@@ -2018,7 +2042,7 @@ enum {
                         PPC2_FP_CVT_ISA206 | PPC2_FP_TST_ISA206 | \
                         PPC2_BCTAR_ISA207 | PPC2_LSQ_ISA207 | \
                         PPC2_ALTIVEC_207 | PPC2_ISA207S | PPC2_DFP | \
-                        PPC2_FP_CVT_S64)
+                        PPC2_FP_CVT_S64 | PPC2_TM)
 };
 
 /*****************************************************************************/
diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c
index 7f74466f32..6cceffc556 100644
--- a/target-ppc/fpu_helper.c
+++ b/target-ppc/fpu_helper.c
@@ -19,6 +19,9 @@
 #include "cpu.h"
 #include "exec/helper-proto.h"
 
+#define float64_snan_to_qnan(x) ((x) | 0x0008000000000000ULL)
+#define float32_snan_to_qnan(x) ((x) | 0x00400000)
+
 /*****************************************************************************/
 /* Floating point operations helpers */
 uint64_t helper_float32_to_float64(CPUPPCState *env, uint32_t arg)
@@ -60,59 +63,55 @@ static inline int ppc_float64_get_unbiased_exp(float64 f)
     return ((f >> 52) & 0x7FF) - 1023;
 }
 
-uint32_t helper_compute_fprf(CPUPPCState *env, uint64_t arg, uint32_t set_fprf)
+void helper_compute_fprf(CPUPPCState *env, uint64_t arg)
 {
     CPU_DoubleU farg;
     int isneg;
-    int ret;
+    int fprf;
 
     farg.ll = arg;
     isneg = float64_is_neg(farg.d);
     if (unlikely(float64_is_any_nan(farg.d))) {
         if (float64_is_signaling_nan(farg.d)) {
             /* Signaling NaN: flags are undefined */
-            ret = 0x00;
+            fprf = 0x00;
         } else {
             /* Quiet NaN */
-            ret = 0x11;
+            fprf = 0x11;
         }
     } else if (unlikely(float64_is_infinity(farg.d))) {
         /* +/- infinity */
         if (isneg) {
-            ret = 0x09;
+            fprf = 0x09;
         } else {
-            ret = 0x05;
+            fprf = 0x05;
         }
     } else {
         if (float64_is_zero(farg.d)) {
             /* +/- zero */
             if (isneg) {
-                ret = 0x12;
+                fprf = 0x12;
             } else {
-                ret = 0x02;
+                fprf = 0x02;
             }
         } else {
             if (isden(farg.d)) {
                 /* Denormalized numbers */
-                ret = 0x10;
+                fprf = 0x10;
             } else {
                 /* Normalized numbers */
-                ret = 0x00;
+                fprf = 0x00;
             }
             if (isneg) {
-                ret |= 0x08;
+                fprf |= 0x08;
             } else {
-                ret |= 0x04;
+                fprf |= 0x04;
             }
         }
     }
-    if (set_fprf) {
-        /* We update FPSCR_FPRF */
-        env->fpscr &= ~(0x1F << FPSCR_FPRF);
-        env->fpscr |= ret << FPSCR_FPRF;
-    }
-    /* We just need fpcc to update Rc1 */
-    return ret & 0xF;
+    /* We update FPSCR_FPRF */
+    env->fpscr &= ~(0x1F << FPSCR_FPRF);
+    env->fpscr |= fprf << FPSCR_FPRF;
 }
 
 /* Floating-point invalid operations exception */
@@ -920,14 +919,16 @@ uint64_t helper_fsqrt(CPUPPCState *env, uint64_t arg)
 
     farg.ll = arg;
 
-    if (unlikely(float64_is_neg(farg.d) && !float64_is_zero(farg.d))) {
-        /* Square root of a negative nonzero number */
-        farg.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSQRT, 1);
-    } else {
+    if (unlikely(float64_is_any_nan(farg.d))) {
         if (unlikely(float64_is_signaling_nan(farg.d))) {
-            /* sNaN square root */
+            /* sNaN reciprocal square root */
             fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
+            farg.ll = float64_snan_to_qnan(farg.ll);
         }
+    } else if (unlikely(float64_is_neg(farg.d) && !float64_is_zero(farg.d))) {
+        /* Square root of a negative nonzero number */
+        farg.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSQRT, 1);
+    } else {
         farg.d = float64_sqrt(farg.d, &env->fp_status);
     }
     return farg.ll;
@@ -974,17 +975,20 @@ uint64_t helper_frsqrte(CPUPPCState *env, uint64_t arg)
 
     farg.ll = arg;
 
-    if (unlikely(float64_is_neg(farg.d) && !float64_is_zero(farg.d))) {
-        /* Reciprocal square root of a negative nonzero number */
-        farg.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSQRT, 1);
-    } else {
+    if (unlikely(float64_is_any_nan(farg.d))) {
         if (unlikely(float64_is_signaling_nan(farg.d))) {
             /* sNaN reciprocal square root */
             fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
+            farg.ll = float64_snan_to_qnan(farg.ll);
         }
+    } else if (unlikely(float64_is_neg(farg.d) && !float64_is_zero(farg.d))) {
+        /* Reciprocal square root of a negative nonzero number */
+        farg.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSQRT, 1);
+    } else {
         farg.d = float64_sqrt(farg.d, &env->fp_status);
         farg.d = float64_div(float64_one, farg.d, &env->fp_status);
     }
+
     return farg.ll;
 }
 
@@ -1845,7 +1849,7 @@ void helper_##name(CPUPPCState *env, uint32_t opcode)                        \
         }                                                                    \
                                                                              \
         if (sfprf) {                                                         \
-            helper_compute_fprf(env, xt.fld, sfprf);                         \
+            helper_compute_fprf(env, xt.fld);                                \
         }                                                                    \
     }                                                                        \
     putVSR(xT(opcode), &xt, env);                                            \
@@ -1900,7 +1904,7 @@ void helper_##op(CPUPPCState *env, uint32_t opcode)                          \
         }                                                                    \
                                                                              \
         if (sfprf) {                                                         \
-            helper_compute_fprf(env, xt.fld, sfprf);                         \
+            helper_compute_fprf(env, xt.fld);                                \
         }                                                                    \
     }                                                                        \
                                                                              \
@@ -1954,7 +1958,7 @@ void helper_##op(CPUPPCState *env, uint32_t opcode)                           \
         }                                                                     \
                                                                               \
         if (sfprf) {                                                          \
-            helper_compute_fprf(env, xt.fld, sfprf);                          \
+            helper_compute_fprf(env, xt.fld);                                 \
         }                                                                     \
     }                                                                         \
                                                                               \
@@ -1995,7 +1999,7 @@ void helper_##op(CPUPPCState *env, uint32_t opcode)                           \
         }                                                                     \
                                                                               \
         if (sfprf) {                                                          \
-            helper_compute_fprf(env, xt.fld, sfprf);                          \
+            helper_compute_fprf(env, xt.fld);                                 \
         }                                                                     \
     }                                                                         \
                                                                               \
@@ -2044,7 +2048,7 @@ void helper_##op(CPUPPCState *env, uint32_t opcode)                          \
         }                                                                    \
                                                                              \
         if (sfprf) {                                                         \
-            helper_compute_fprf(env, xt.fld, sfprf);                         \
+            helper_compute_fprf(env, xt.fld);                                \
         }                                                                    \
     }                                                                        \
                                                                              \
@@ -2094,7 +2098,7 @@ void helper_##op(CPUPPCState *env, uint32_t opcode)                          \
         }                                                                    \
                                                                              \
         if (sfprf) {                                                         \
-            helper_compute_fprf(env, xt.fld, sfprf);                         \
+            helper_compute_fprf(env, xt.fld);                                \
         }                                                                    \
     }                                                                        \
                                                                              \
@@ -2294,7 +2298,7 @@ void helper_##op(CPUPPCState *env, uint32_t opcode)                           \
         }                                                                     \
                                                                               \
         if (sfprf) {                                                          \
-            helper_compute_fprf(env, xt_out.fld, sfprf);                      \
+            helper_compute_fprf(env, xt_out.fld);                             \
         }                                                                     \
     }                                                                         \
     putVSR(xT(opcode), &xt_out, env);                                         \
@@ -2382,9 +2386,6 @@ void helper_##op(CPUPPCState *env, uint32_t opcode)                      \
 VSX_SCALAR_CMP(xscmpodp, 1)
 VSX_SCALAR_CMP(xscmpudp, 0)
 
-#define float64_snan_to_qnan(x) ((x) | 0x0008000000000000ULL)
-#define float32_snan_to_qnan(x) ((x) | 0x00400000)
-
 /* VSX_MAX_MIN - VSX floating point maximum/minimum
  *   name  - instruction mnemonic
  *   op    - operation (max or min)
@@ -2504,7 +2505,7 @@ void helper_##op(CPUPPCState *env, uint32_t opcode)                \
         }                                                          \
         if (sfprf) {                                               \
             helper_compute_fprf(env, ttp##_to_float64(xt.tfld,     \
-                                &env->fp_status), sfprf);          \
+                                &env->fp_status));                 \
         }                                                          \
     }                                                              \
                                                                    \
@@ -2614,7 +2615,7 @@ void helper_##op(CPUPPCState *env, uint32_t opcode)                     \
             xt.tfld = helper_frsp(env, xt.tfld);                        \
         }                                                               \
         if (sfprf) {                                                    \
-            helper_compute_fprf(env, xt.tfld, sfprf);                   \
+            helper_compute_fprf(env, xt.tfld);                          \
         }                                                               \
     }                                                                   \
                                                                         \
@@ -2669,7 +2670,7 @@ void helper_##op(CPUPPCState *env, uint32_t opcode)                    \
             xt.fld = tp##_round_to_int(xb.fld, &env->fp_status);       \
         }                                                              \
         if (sfprf) {                                                   \
-            helper_compute_fprf(env, xt.fld, sfprf);                   \
+            helper_compute_fprf(env, xt.fld);                          \
         }                                                              \
     }                                                                  \
                                                                        \
@@ -2709,7 +2710,7 @@ uint64_t helper_xsrsp(CPUPPCState *env, uint64_t xb)
 
     uint64_t xt = helper_frsp(env, xb);
 
-    helper_compute_fprf(env, xt, 1);
+    helper_compute_fprf(env, xt);
     helper_float_check_status(env);
     return xt;
 }
diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index 210fd97f6a..869be1509d 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -52,7 +52,7 @@ DEF_HELPER_FLAGS_2(brinc, TCG_CALL_NO_RWG_SE, tl, tl, tl)
 
 DEF_HELPER_1(float_check_status, void, env)
 DEF_HELPER_1(reset_fpstatus, void, env)
-DEF_HELPER_3(compute_fprf, i32, env, i64, i32)
+DEF_HELPER_2(compute_fprf, void, env, i64)
 DEF_HELPER_3(store_fpscr, void, env, i64, i32)
 DEF_HELPER_2(fpscr_clrbit, void, env, i32)
 DEF_HELPER_2(fpscr_setbit, void, env, i32)
@@ -665,3 +665,5 @@ DEF_HELPER_4(dscri, void, env, fprp, fprp, i32)
 DEF_HELPER_4(dscriq, void, env, fprp, fprp, i32)
 DEF_HELPER_4(dscli, void, env, fprp, fprp, i32)
 DEF_HELPER_4(dscliq, void, env, fprp, fprp, i32)
+
+DEF_HELPER_1(tbegin, void, env)
diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index 6843fa0b98..1edf2b5aeb 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -2246,8 +2246,23 @@ int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
                     strerror(errno));
             return rc;
         } else if (rc) {
-            /* Kernel already retuns data in BE format for the file */
-            qemu_put_buffer(f, buf, rc);
+            uint8_t *buffer = buf;
+            ssize_t n = rc;
+            while (n) {
+                struct kvm_get_htab_header *head =
+                    (struct kvm_get_htab_header *) buffer;
+                size_t chunksize = sizeof(*head) +
+                     HASH_PTE_SIZE_64 * head->n_valid;
+
+                qemu_put_be32(f, head->index);
+                qemu_put_be16(f, head->n_valid);
+                qemu_put_be16(f, head->n_invalid);
+                qemu_put_buffer(f, (void *)(head + 1),
+                                HASH_PTE_SIZE_64 * head->n_valid);
+
+                buffer += chunksize;
+                n -= chunksize;
+            }
         }
     } while ((rc != 0)
              && ((max_ns < 0)
@@ -2264,7 +2279,6 @@ int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
     ssize_t rc;
 
     buf = alloca(chunksize);
-    /* This is KVM on ppc, so this is all big-endian */
     buf->index = index;
     buf->n_valid = n_valid;
     buf->n_invalid = n_invalid;
@@ -2388,3 +2402,9 @@ out_close:
 error_out:
     return;
 }
+
+int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
+                             uint64_t address, uint32_t data)
+{
+    return 0;
+}
diff --git a/target-ppc/mem_helper.c b/target-ppc/mem_helper.c
index 50344b81cf..6d37dae7b0 100644
--- a/target-ppc/mem_helper.c
+++ b/target-ppc/mem_helper.c
@@ -269,3 +269,25 @@ STVE(stvewx, cpu_stl_data, bswap32, u32)
 
 #undef HI_IDX
 #undef LO_IDX
+
+void helper_tbegin(CPUPPCState *env)
+{
+    /* As a degenerate implementation, always fail tbegin.  The reason
+     * given is "Nesting overflow".  The "persistent" bit is set,
+     * providing a hint to the error handler to not retry.  The TFIAR
+     * captures the address of the failure, which is this tbegin
+     * instruction.  Instruction execution will continue with the
+     * next instruction in memory, which is precisely what we want.
+     */
+
+    env->spr[SPR_TEXASR] =
+        (1ULL << TEXASR_FAILURE_PERSISTENT) |
+        (1ULL << TEXASR_NESTING_OVERFLOW) |
+        (msr_hv << TEXASR_PRIVILEGE_HV) |
+        (msr_pr << TEXASR_PRIVILEGE_PR) |
+        (1ULL << TEXASR_FAILURE_SUMMARY) |
+        (1ULL << TEXASR_TFIAR_EXACT);
+    env->spr[SPR_TFIAR] = env->nip | (msr_hv << 1) | msr_pr;
+    env->spr[SPR_TFHAR] = env->nip + 4;
+    env->crf[0] = 0xB; /* 0b1010 = transaction failure */
+}
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 2e32e8d8b8..7c801f36e3 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -203,6 +203,7 @@ struct DisasContext {
     int altivec_enabled;
     int vsx_enabled;
     int spe_enabled;
+    int tm_enabled;
     ppc_spr_t *spr_cb; /* Needed to check rights for mfspr/mtspr */
     int singlestep_enabled;
     uint64_t insns_flags;
@@ -250,26 +251,10 @@ static inline void gen_reset_fpstatus(void)
     gen_helper_reset_fpstatus(cpu_env);
 }
 
-static inline void gen_compute_fprf(TCGv_i64 arg, int set_fprf, int set_rc)
+static inline void gen_compute_fprf(TCGv_i64 arg)
 {
-    TCGv_i32 t0 = tcg_temp_new_i32();
-
-    if (set_fprf != 0) {
-        /* This case might be optimized later */
-        tcg_gen_movi_i32(t0, 1);
-        gen_helper_compute_fprf(t0, cpu_env, arg, t0);
-        if (unlikely(set_rc)) {
-            tcg_gen_mov_i32(cpu_crf[1], t0);
-        }
-        gen_helper_float_check_status(cpu_env);
-    } else if (unlikely(set_rc)) {
-        /* We always need to compute fpcc */
-        tcg_gen_movi_i32(t0, 0);
-        gen_helper_compute_fprf(t0, cpu_env, arg, t0);
-        tcg_gen_mov_i32(cpu_crf[1], t0);
-    }
-
-    tcg_temp_free_i32(t0);
+    gen_helper_compute_fprf(cpu_env, arg);
+    gen_helper_float_check_status(cpu_env);
 }
 
 static inline void gen_set_access_type(DisasContext *ctx, int access_type)
@@ -346,11 +331,13 @@ static inline void gen_stop_exception(DisasContext *ctx)
     ctx->exception = POWERPC_EXCP_STOP;
 }
 
+#ifndef CONFIG_USER_ONLY
 /* No need to update nip here, as execution flow will change */
 static inline void gen_sync_exception(DisasContext *ctx)
 {
     ctx->exception = POWERPC_EXCP_SYNC;
 }
+#endif
 
 #define GEN_HANDLER(name, opc1, opc2, opc3, inval, type)                      \
 GEN_OPCODE(name, opc1, opc2, opc3, inval, type, PPC_NONE)
@@ -452,7 +439,10 @@ EXTRACT_HELPER(ME, 1, 5);
 EXTRACT_HELPER(TO, 21, 5);
 
 EXTRACT_HELPER(CRM, 12, 8);
+
+#ifndef CONFIG_USER_ONLY
 EXTRACT_HELPER(SR, 16, 4);
+#endif
 
 /* mtfsf/mtfsfi */
 EXTRACT_HELPER(FPBF, 23, 3);
@@ -2077,6 +2067,21 @@ static void gen_srd(DisasContext *ctx)
 }
 #endif
 
+#if defined(TARGET_PPC64)
+static void gen_set_cr1_from_fpscr(DisasContext *ctx)
+{
+    TCGv_i32 tmp = tcg_temp_new_i32();
+    tcg_gen_trunc_tl_i32(tmp, cpu_fpscr);
+    tcg_gen_shri_i32(cpu_crf[1], tmp, 28);
+    tcg_temp_free_i32(tmp);
+}
+#else
+static void gen_set_cr1_from_fpscr(DisasContext *ctx)
+{
+    tcg_gen_shri_tl(cpu_crf[1], cpu_fpscr, 28);
+}
+#endif
+
 /***                       Floating-Point arithmetic                       ***/
 #define _GEN_FLOAT_ACB(name, op, op1, op2, isfloat, set_fprf, type)           \
 static void gen_f##name(DisasContext *ctx)                                    \
@@ -2095,8 +2100,12 @@ static void gen_f##name(DisasContext *ctx)                                    \
         gen_helper_frsp(cpu_fpr[rD(ctx->opcode)], cpu_env,                    \
                         cpu_fpr[rD(ctx->opcode)]);                            \
     }                                                                         \
-    gen_compute_fprf(cpu_fpr[rD(ctx->opcode)], set_fprf,                      \
-                     Rc(ctx->opcode) != 0);                                   \
+    if (set_fprf) {                                                           \
+        gen_compute_fprf(cpu_fpr[rD(ctx->opcode)]);                           \
+    }                                                                         \
+    if (unlikely(Rc(ctx->opcode) != 0)) {                                     \
+        gen_set_cr1_from_fpscr(ctx);                                          \
+    }                                                                         \
 }
 
 #define GEN_FLOAT_ACB(name, op2, set_fprf, type)                              \
@@ -2120,8 +2129,12 @@ static void gen_f##name(DisasContext *ctx)                                    \
         gen_helper_frsp(cpu_fpr[rD(ctx->opcode)], cpu_env,                    \
                         cpu_fpr[rD(ctx->opcode)]);                            \
     }                                                                         \
-    gen_compute_fprf(cpu_fpr[rD(ctx->opcode)],                                \
-                     set_fprf, Rc(ctx->opcode) != 0);                         \
+    if (set_fprf) {                                                           \
+        gen_compute_fprf(cpu_fpr[rD(ctx->opcode)]);                           \
+    }                                                                         \
+    if (unlikely(Rc(ctx->opcode) != 0)) {                                     \
+        gen_set_cr1_from_fpscr(ctx);                                          \
+    }                                                                         \
 }
 #define GEN_FLOAT_AB(name, op2, inval, set_fprf, type)                        \
 _GEN_FLOAT_AB(name, name, 0x3F, op2, inval, 0, set_fprf, type);               \
@@ -2144,8 +2157,12 @@ static void gen_f##name(DisasContext *ctx)                                    \
         gen_helper_frsp(cpu_fpr[rD(ctx->opcode)], cpu_env,                    \
                         cpu_fpr[rD(ctx->opcode)]);                            \
     }                                                                         \
-    gen_compute_fprf(cpu_fpr[rD(ctx->opcode)],                                \
-                     set_fprf, Rc(ctx->opcode) != 0);                         \
+    if (set_fprf) {                                                           \
+        gen_compute_fprf(cpu_fpr[rD(ctx->opcode)]);                           \
+    }                                                                         \
+    if (unlikely(Rc(ctx->opcode) != 0)) {                                     \
+        gen_set_cr1_from_fpscr(ctx);                                          \
+    }                                                                         \
 }
 #define GEN_FLOAT_AC(name, op2, inval, set_fprf, type)                        \
 _GEN_FLOAT_AC(name, name, 0x3F, op2, inval, 0, set_fprf, type);               \
@@ -2163,8 +2180,12 @@ static void gen_f##name(DisasContext *ctx)                                    \
     gen_reset_fpstatus();                                                     \
     gen_helper_f##name(cpu_fpr[rD(ctx->opcode)], cpu_env,                     \
                        cpu_fpr[rB(ctx->opcode)]);                             \
-    gen_compute_fprf(cpu_fpr[rD(ctx->opcode)],                                \
-                     set_fprf, Rc(ctx->opcode) != 0);                         \
+    if (set_fprf) {                                                           \
+        gen_compute_fprf(cpu_fpr[rD(ctx->opcode)]);                           \
+    }                                                                         \
+    if (unlikely(Rc(ctx->opcode) != 0)) {                                     \
+        gen_set_cr1_from_fpscr(ctx);                                          \
+    }                                                                         \
 }
 
 #define GEN_FLOAT_BS(name, op1, op2, set_fprf, type)                          \
@@ -2179,8 +2200,12 @@ static void gen_f##name(DisasContext *ctx)                                    \
     gen_reset_fpstatus();                                                     \
     gen_helper_f##name(cpu_fpr[rD(ctx->opcode)], cpu_env,                     \
                        cpu_fpr[rB(ctx->opcode)]);                             \
-    gen_compute_fprf(cpu_fpr[rD(ctx->opcode)],                                \
-                     set_fprf, Rc(ctx->opcode) != 0);                         \
+    if (set_fprf) {                                                           \
+        gen_compute_fprf(cpu_fpr[rD(ctx->opcode)]);                           \
+    }                                                                         \
+    if (unlikely(Rc(ctx->opcode) != 0)) {                                     \
+        gen_set_cr1_from_fpscr(ctx);                                          \
+    }                                                                         \
 }
 
 /* fadd - fadds */
@@ -2213,7 +2238,10 @@ static void gen_frsqrtes(DisasContext *ctx)
                        cpu_fpr[rB(ctx->opcode)]);
     gen_helper_frsp(cpu_fpr[rD(ctx->opcode)], cpu_env,
                     cpu_fpr[rD(ctx->opcode)]);
-    gen_compute_fprf(cpu_fpr[rD(ctx->opcode)], 1, Rc(ctx->opcode) != 0);
+    gen_compute_fprf(cpu_fpr[rD(ctx->opcode)]);
+    if (unlikely(Rc(ctx->opcode) != 0)) {
+        gen_set_cr1_from_fpscr(ctx);
+    }
 }
 
 /* fsel */
@@ -2234,7 +2262,10 @@ static void gen_fsqrt(DisasContext *ctx)
     gen_reset_fpstatus();
     gen_helper_fsqrt(cpu_fpr[rD(ctx->opcode)], cpu_env,
                      cpu_fpr[rB(ctx->opcode)]);
-    gen_compute_fprf(cpu_fpr[rD(ctx->opcode)], 1, Rc(ctx->opcode) != 0);
+    gen_compute_fprf(cpu_fpr[rD(ctx->opcode)]);
+    if (unlikely(Rc(ctx->opcode) != 0)) {
+        gen_set_cr1_from_fpscr(ctx);
+    }
 }
 
 static void gen_fsqrts(DisasContext *ctx)
@@ -2250,7 +2281,10 @@ static void gen_fsqrts(DisasContext *ctx)
                      cpu_fpr[rB(ctx->opcode)]);
     gen_helper_frsp(cpu_fpr[rD(ctx->opcode)], cpu_env,
                     cpu_fpr[rD(ctx->opcode)]);
-    gen_compute_fprf(cpu_fpr[rD(ctx->opcode)], 1, Rc(ctx->opcode) != 0);
+    gen_compute_fprf(cpu_fpr[rD(ctx->opcode)]);
+    if (unlikely(Rc(ctx->opcode) != 0)) {
+        gen_set_cr1_from_fpscr(ctx);
+    }
 }
 
 /***                     Floating-Point multiply-and-add                   ***/
@@ -2370,7 +2404,9 @@ static void gen_fabs(DisasContext *ctx)
     }
     tcg_gen_andi_i64(cpu_fpr[rD(ctx->opcode)], cpu_fpr[rB(ctx->opcode)],
                      ~(1ULL << 63));
-    gen_compute_fprf(cpu_fpr[rD(ctx->opcode)], 0, Rc(ctx->opcode) != 0);
+    if (unlikely(Rc(ctx->opcode))) {
+        gen_set_cr1_from_fpscr(ctx);
+    }
 }
 
 /* fmr  - fmr. */
@@ -2382,7 +2418,9 @@ static void gen_fmr(DisasContext *ctx)
         return;
     }
     tcg_gen_mov_i64(cpu_fpr[rD(ctx->opcode)], cpu_fpr[rB(ctx->opcode)]);
-    gen_compute_fprf(cpu_fpr[rD(ctx->opcode)], 0, Rc(ctx->opcode) != 0);
+    if (unlikely(Rc(ctx->opcode))) {
+        gen_set_cr1_from_fpscr(ctx);
+    }
 }
 
 /* fnabs */
@@ -2395,7 +2433,9 @@ static void gen_fnabs(DisasContext *ctx)
     }
     tcg_gen_ori_i64(cpu_fpr[rD(ctx->opcode)], cpu_fpr[rB(ctx->opcode)],
                     1ULL << 63);
-    gen_compute_fprf(cpu_fpr[rD(ctx->opcode)], 0, Rc(ctx->opcode) != 0);
+    if (unlikely(Rc(ctx->opcode))) {
+        gen_set_cr1_from_fpscr(ctx);
+    }
 }
 
 /* fneg */
@@ -2408,7 +2448,9 @@ static void gen_fneg(DisasContext *ctx)
     }
     tcg_gen_xori_i64(cpu_fpr[rD(ctx->opcode)], cpu_fpr[rB(ctx->opcode)],
                      1ULL << 63);
-    gen_compute_fprf(cpu_fpr[rD(ctx->opcode)], 0, Rc(ctx->opcode) != 0);
+    if (unlikely(Rc(ctx->opcode))) {
+        gen_set_cr1_from_fpscr(ctx);
+    }
 }
 
 /* fcpsgn: PowerPC 2.05 specification */
@@ -2421,7 +2463,9 @@ static void gen_fcpsgn(DisasContext *ctx)
     }
     tcg_gen_deposit_i64(cpu_fpr[rD(ctx->opcode)], cpu_fpr[rA(ctx->opcode)],
                         cpu_fpr[rB(ctx->opcode)], 0, 63);
-    gen_compute_fprf(cpu_fpr[rD(ctx->opcode)], 0, Rc(ctx->opcode) != 0);
+    if (unlikely(Rc(ctx->opcode))) {
+        gen_set_cr1_from_fpscr(ctx);
+    }
 }
 
 static void gen_fmrgew(DisasContext *ctx)
@@ -2479,7 +2523,9 @@ static void gen_mffs(DisasContext *ctx)
     }
     gen_reset_fpstatus();
     tcg_gen_extu_tl_i64(cpu_fpr[rD(ctx->opcode)], cpu_fpscr);
-    gen_compute_fprf(cpu_fpr[rD(ctx->opcode)], 0, Rc(ctx->opcode) != 0);
+    if (unlikely(Rc(ctx->opcode))) {
+        gen_set_cr1_from_fpscr(ctx);
+    }
 }
 
 /* mtfsb0 */
@@ -6743,7 +6789,7 @@ static void gen_st##name(DisasContext *ctx)                                   \
     tcg_temp_free(EA);                                                        \
 }
 
-#define GEN_VR_LVE(name, opc2, opc3)                                    \
+#define GEN_VR_LVE(name, opc2, opc3, size)                              \
 static void gen_lve##name(DisasContext *ctx)                            \
     {                                                                   \
         TCGv EA;                                                        \
@@ -6755,13 +6801,16 @@ static void gen_lve##name(DisasContext *ctx)                            \
         gen_set_access_type(ctx, ACCESS_INT);                           \
         EA = tcg_temp_new();                                            \
         gen_addr_reg_index(ctx, EA);                                    \
+        if (size > 1) {                                                 \
+            tcg_gen_andi_tl(EA, EA, ~(size - 1));                       \
+        }                                                               \
         rs = gen_avr_ptr(rS(ctx->opcode));                              \
         gen_helper_lve##name(cpu_env, rs, EA);                          \
         tcg_temp_free(EA);                                              \
         tcg_temp_free_ptr(rs);                                          \
     }
 
-#define GEN_VR_STVE(name, opc2, opc3)                                   \
+#define GEN_VR_STVE(name, opc2, opc3, size)                             \
 static void gen_stve##name(DisasContext *ctx)                           \
     {                                                                   \
         TCGv EA;                                                        \
@@ -6773,6 +6822,9 @@ static void gen_stve##name(DisasContext *ctx)                           \
         gen_set_access_type(ctx, ACCESS_INT);                           \
         EA = tcg_temp_new();                                            \
         gen_addr_reg_index(ctx, EA);                                    \
+        if (size > 1) {                                                 \
+            tcg_gen_andi_tl(EA, EA, ~(size - 1));                       \
+        }                                                               \
         rs = gen_avr_ptr(rS(ctx->opcode));                              \
         gen_helper_stve##name(cpu_env, rs, EA);                         \
         tcg_temp_free(EA);                                              \
@@ -6783,17 +6835,17 @@ GEN_VR_LDX(lvx, 0x07, 0x03);
 /* As we don't emulate the cache, lvxl is stricly equivalent to lvx */
 GEN_VR_LDX(lvxl, 0x07, 0x0B);
 
-GEN_VR_LVE(bx, 0x07, 0x00);
-GEN_VR_LVE(hx, 0x07, 0x01);
-GEN_VR_LVE(wx, 0x07, 0x02);
+GEN_VR_LVE(bx, 0x07, 0x00, 1);
+GEN_VR_LVE(hx, 0x07, 0x01, 2);
+GEN_VR_LVE(wx, 0x07, 0x02, 4);
 
 GEN_VR_STX(svx, 0x07, 0x07);
 /* As we don't emulate the cache, stvxl is stricly equivalent to stvx */
 GEN_VR_STX(svxl, 0x07, 0x0F);
 
-GEN_VR_STVE(bx, 0x07, 0x04);
-GEN_VR_STVE(hx, 0x07, 0x05);
-GEN_VR_STVE(wx, 0x07, 0x06);
+GEN_VR_STVE(bx, 0x07, 0x04, 1);
+GEN_VR_STVE(hx, 0x07, 0x05, 2);
+GEN_VR_STVE(wx, 0x07, 0x06, 4);
 
 static void gen_lvsl(DisasContext *ctx)
 {
@@ -8205,21 +8257,6 @@ static inline TCGv_ptr gen_fprp_ptr(int reg)
     return r;
 }
 
-#if defined(TARGET_PPC64)
-static void gen_set_cr1_from_fpscr(DisasContext *ctx)
-{
-    TCGv_i32 tmp = tcg_temp_new_i32();
-    tcg_gen_trunc_tl_i32(tmp, cpu_fpscr);
-    tcg_gen_shri_i32(cpu_crf[1], tmp, 28);
-    tcg_temp_free_i32(tmp);
-}
-#else
-static void gen_set_cr1_from_fpscr(DisasContext *ctx)
-{
-        tcg_gen_shri_tl(cpu_crf[1], cpu_fpscr, 28);
-}
-#endif
-
 #define GEN_DFP_T_A_B_Rc(name)                   \
 static void gen_##name(DisasContext *ctx)        \
 {                                                \
@@ -9642,6 +9679,88 @@ GEN_SPE(efdctsiz,  speundef,  0x1D, 0x0B, 0x00180000, 0xFFFFFFFF, PPC_SPE_DOUBLE
 GEN_SPE(efdtstgt,  efdtstlt,  0x1E, 0x0B, 0x00600000, 0x00600000, PPC_SPE_DOUBLE); //
 GEN_SPE(efdtsteq,  speundef,  0x1F, 0x0B, 0x00600000, 0xFFFFFFFF, PPC_SPE_DOUBLE); //
 
+static void gen_tbegin(DisasContext *ctx)
+{
+    if (unlikely(!ctx->tm_enabled)) {
+        gen_exception_err(ctx, POWERPC_EXCP_FU, FSCR_IC_TM);
+        return;
+    }
+    gen_helper_tbegin(cpu_env);
+}
+
+#define GEN_TM_NOOP(name)                                      \
+static inline void gen_##name(DisasContext *ctx)               \
+{                                                              \
+    if (unlikely(!ctx->tm_enabled)) {                          \
+        gen_exception_err(ctx, POWERPC_EXCP_FU, FSCR_IC_TM);   \
+        return;                                                \
+    }                                                          \
+    /* Because tbegin always fails in QEMU, these user         \
+     * space instructions all have a simple implementation:    \
+     *                                                         \
+     *     CR[0] = 0b0 || MSR[TS] || 0b0                       \
+     *           = 0b0 || 0b00    || 0b0                       \
+     */                                                        \
+    tcg_gen_movi_i32(cpu_crf[0], 0);                           \
+}
+
+GEN_TM_NOOP(tend);
+GEN_TM_NOOP(tabort);
+GEN_TM_NOOP(tabortwc);
+GEN_TM_NOOP(tabortwci);
+GEN_TM_NOOP(tabortdc);
+GEN_TM_NOOP(tabortdci);
+GEN_TM_NOOP(tsr);
+
+static void gen_tcheck(DisasContext *ctx)
+{
+    if (unlikely(!ctx->tm_enabled)) {
+        gen_exception_err(ctx, POWERPC_EXCP_FU, FSCR_IC_TM);
+        return;
+    }
+    /* Because tbegin always fails, the tcheck implementation
+     * is simple:
+     *
+     * CR[CRF] = TDOOMED || MSR[TS] || 0b0
+     *         = 0b1 || 0b00 || 0b0
+     */
+    tcg_gen_movi_i32(cpu_crf[crfD(ctx->opcode)], 0x8);
+}
+
+#if defined(CONFIG_USER_ONLY)
+#define GEN_TM_PRIV_NOOP(name)                                 \
+static inline void gen_##name(DisasContext *ctx)               \
+{                                                              \
+    gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);           \
+}
+
+#else
+
+#define GEN_TM_PRIV_NOOP(name)                                 \
+static inline void gen_##name(DisasContext *ctx)               \
+{                                                              \
+    if (unlikely(ctx->pr)) {                                   \
+        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);       \
+        return;                                                \
+    }                                                          \
+    if (unlikely(!ctx->tm_enabled)) {                          \
+        gen_exception_err(ctx, POWERPC_EXCP_FU, FSCR_IC_TM);   \
+        return;                                                \
+    }                                                          \
+    /* Because tbegin always fails, the implementation is      \
+     * simple:                                                 \
+     *                                                         \
+     *   CR[0] = 0b0 || MSR[TS] || 0b0                         \
+     *         = 0b0 || 0b00 | 0b0                             \
+     */                                                        \
+    tcg_gen_movi_i32(cpu_crf[0], 0);                           \
+}
+
+#endif
+
+GEN_TM_PRIV_NOOP(treclaim);
+GEN_TM_PRIV_NOOP(trechkpt);
+
 static opcode_t opcodes[] = {
 GEN_HANDLER(invalid, 0x00, 0x00, 0x00, 0xFFFFFFFF, PPC_NONE),
 GEN_HANDLER(cmp, 0x1F, 0x00, 0x00, 0x00400000, PPC_INTEGER),
@@ -11054,6 +11173,29 @@ GEN_SPEOP_LDST(evstwhe, 0x18, 2),
 GEN_SPEOP_LDST(evstwho, 0x1A, 2),
 GEN_SPEOP_LDST(evstwwe, 0x1C, 2),
 GEN_SPEOP_LDST(evstwwo, 0x1E, 2),
+
+GEN_HANDLER2_E(tbegin, "tbegin", 0x1F, 0x0E, 0x14, 0x01DFF800, \
+               PPC_NONE, PPC2_TM),
+GEN_HANDLER2_E(tend,   "tend",   0x1F, 0x0E, 0x15, 0x01FFF800, \
+               PPC_NONE, PPC2_TM),
+GEN_HANDLER2_E(tabort, "tabort", 0x1F, 0x0E, 0x1C, 0x03E0F800, \
+               PPC_NONE, PPC2_TM),
+GEN_HANDLER2_E(tabortwc, "tabortwc", 0x1F, 0x0E, 0x18, 0x00000000, \
+               PPC_NONE, PPC2_TM),
+GEN_HANDLER2_E(tabortwci, "tabortwci", 0x1F, 0x0E, 0x1A, 0x00000000, \
+               PPC_NONE, PPC2_TM),
+GEN_HANDLER2_E(tabortdc, "tabortdc", 0x1F, 0x0E, 0x19, 0x00000000, \
+               PPC_NONE, PPC2_TM),
+GEN_HANDLER2_E(tabortdci, "tabortdci", 0x1F, 0x0E, 0x1B, 0x00000000, \
+               PPC_NONE, PPC2_TM),
+GEN_HANDLER2_E(tsr, "tsr", 0x1F, 0x0E, 0x17, 0x03DFF800, \
+               PPC_NONE, PPC2_TM),
+GEN_HANDLER2_E(tcheck, "tcheck", 0x1F, 0x0E, 0x16, 0x007FF800, \
+               PPC_NONE, PPC2_TM),
+GEN_HANDLER2_E(treclaim, "treclaim", 0x1F, 0x0E, 0x1D, 0x03E0F800, \
+               PPC_NONE, PPC2_TM),
+GEN_HANDLER2_E(trechkpt, "trechkpt", 0x1F, 0x0E, 0x1F, 0x03FFF800, \
+               PPC_NONE, PPC2_TM),
 };
 
 #include "helper_regs.h"
@@ -11311,6 +11453,13 @@ static inline void gen_intermediate_code_internal(PowerPCCPU *cpu,
     } else {
         ctx.vsx_enabled = 0;
     }
+#if defined(TARGET_PPC64)
+    if ((env->flags & POWERPC_FLAG_TM) && msr_tm) {
+        ctx.tm_enabled = msr_tm;
+    } else {
+        ctx.tm_enabled = 0;
+    }
+#endif
     if ((env->flags & POWERPC_FLAG_SE) && msr_se)
         ctx.singlestep_enabled = CPU_SINGLE_STEP;
     else
diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index f0a29992df..df1a62c4c6 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -8214,7 +8214,8 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data)
                         PPC2_ATOMIC_ISA206 | PPC2_FP_CVT_ISA206 |
                         PPC2_FP_TST_ISA206 | PPC2_BCTAR_ISA207 |
                         PPC2_LSQ_ISA207 | PPC2_ALTIVEC_207 |
-                        PPC2_ISA205 | PPC2_ISA207S | PPC2_FP_CVT_S64;
+                        PPC2_ISA205 | PPC2_ISA207S | PPC2_FP_CVT_S64 |
+                        PPC2_TM;
     pcc->msr_mask = (1ull << MSR_SF) |
                     (1ull << MSR_TM) |
                     (1ull << MSR_VR) |
@@ -8242,7 +8243,7 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data)
     pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE |
                  POWERPC_FLAG_BE | POWERPC_FLAG_PMM |
                  POWERPC_FLAG_BUS_CLK | POWERPC_FLAG_CFAR |
-                 POWERPC_FLAG_VSX;
+                 POWERPC_FLAG_VSX | POWERPC_FLAG_TM;
     pcc->l1_dcache_size = 0x8000;
     pcc->l1_icache_size = 0x8000;
     pcc->interrupts_big_endian = ppc_cpu_interrupts_big_endian_lpcr;
diff --git a/target-s390x/cpu.h b/target-s390x/cpu.h
index fe2f95d084..23ad336803 100644
--- a/target-s390x/cpu.h
+++ b/target-s390x/cpu.h
@@ -457,87 +457,6 @@ int css_enable_mss(void);
 int css_do_rsch(SubchDev *sch);
 int css_do_rchp(uint8_t cssid, uint8_t chpid);
 bool css_present(uint8_t cssid);
-#else
-static inline SubchDev *css_find_subch(uint8_t m, uint8_t cssid, uint8_t ssid,
-                                       uint16_t schid)
-{
-    return NULL;
-}
-static inline bool css_subch_visible(SubchDev *sch)
-{
-    return false;
-}
-static inline void css_conditional_io_interrupt(SubchDev *sch)
-{
-}
-static inline int css_do_stsch(SubchDev *sch, SCHIB *schib)
-{
-    return -ENODEV;
-}
-static inline bool css_schid_final(uint8_t cssid, uint8_t ssid, uint16_t schid)
-{
-    return true;
-}
-static inline int css_do_msch(SubchDev *sch, SCHIB *schib)
-{
-    return -ENODEV;
-}
-static inline int css_do_xsch(SubchDev *sch)
-{
-    return -ENODEV;
-}
-static inline int css_do_csch(SubchDev *sch)
-{
-    return -ENODEV;
-}
-static inline int css_do_hsch(SubchDev *sch)
-{
-    return -ENODEV;
-}
-static inline int css_do_ssch(SubchDev *sch, ORB *orb)
-{
-    return -ENODEV;
-}
-static inline int css_do_tsch(SubchDev *sch, IRB *irb)
-{
-    return -ENODEV;
-}
-static inline int css_do_stcrw(CRW *crw)
-{
-    return 1;
-}
-static inline int css_do_tpi(IOIntCode *int_code, int lowcore)
-{
-    return 0;
-}
-static inline int css_collect_chp_desc(int m, uint8_t cssid, uint8_t f_chpid,
-                                       int rfmt, uint8_t l_chpid, void *buf)
-{
-    return 0;
-}
-static inline void css_do_schm(uint8_t mbk, int update, int dct, uint64_t mbo)
-{
-}
-static inline int css_enable_mss(void)
-{
-    return -EINVAL;
-}
-static inline int css_enable_mcsse(void)
-{
-    return -EINVAL;
-}
-static inline int css_do_rsch(SubchDev *sch)
-{
-    return -ENODEV;
-}
-static inline int css_do_rchp(uint8_t cssid, uint8_t chpid)
-{
-    return -ENODEV;
-}
-static inline bool css_present(uint8_t cssid)
-{
-    return false;
-}
 #endif
 
 #define cpu_init(model) (&cpu_s390x_init(model)->env)
diff --git a/target-s390x/ioinst.c b/target-s390x/ioinst.c
index b8a6486f51..1ac5d61c56 100644
--- a/target-s390x/ioinst.c
+++ b/target-s390x/ioinst.c
@@ -14,6 +14,7 @@
 #include "cpu.h"
 #include "ioinst.h"
 #include "trace.h"
+#include "hw/s390x/s390-pci-bus.h"
 
 int ioinst_disassemble_sch_ident(uint32_t value, int *m, int *cssid, int *ssid,
                                  int *schid)
@@ -398,6 +399,7 @@ typedef struct ChscResp {
 #define CHSC_SCPD 0x0002
 #define CHSC_SCSC 0x0010
 #define CHSC_SDA  0x0031
+#define CHSC_SEI  0x000e
 
 #define CHSC_SCPD_0_M 0x20000000
 #define CHSC_SCPD_0_C 0x10000000
@@ -566,6 +568,53 @@ out:
     res->param = 0;
 }
 
+static int chsc_sei_nt0_get_event(void *res)
+{
+    /* no events yet */
+    return 1;
+}
+
+static int chsc_sei_nt0_have_event(void)
+{
+    /* no events yet */
+    return 0;
+}
+
+#define CHSC_SEI_NT0    (1ULL << 63)
+#define CHSC_SEI_NT2    (1ULL << 61)
+static void ioinst_handle_chsc_sei(ChscReq *req, ChscResp *res)
+{
+    uint64_t selection_mask = ldq_p(&req->param1);
+    uint8_t *res_flags = (uint8_t *)res->data;
+    int have_event = 0;
+    int have_more = 0;
+
+    /* regarding architecture nt0 can not be masked */
+    have_event = !chsc_sei_nt0_get_event(res);
+    have_more = chsc_sei_nt0_have_event();
+
+    if (selection_mask & CHSC_SEI_NT2) {
+        if (!have_event) {
+            have_event = !chsc_sei_nt2_get_event(res);
+        }
+
+        if (!have_more) {
+            have_more = chsc_sei_nt2_have_event();
+        }
+    }
+
+    if (have_event) {
+        res->code = cpu_to_be16(0x0001);
+        if (have_more) {
+            (*res_flags) |= 0x80;
+        } else {
+            (*res_flags) &= ~0x80;
+        }
+    } else {
+        res->code = cpu_to_be16(0x0004);
+    }
+}
+
 static void ioinst_handle_chsc_unimplemented(ChscResp *res)
 {
     res->len = cpu_to_be16(CHSC_MIN_RESP_LEN);
@@ -617,6 +666,9 @@ void ioinst_handle_chsc(S390CPU *cpu, uint32_t ipb)
     case CHSC_SDA:
         ioinst_handle_chsc_sda(req, res);
         break;
+    case CHSC_SEI:
+        ioinst_handle_chsc_sei(req, res);
+        break;
     default:
         ioinst_handle_chsc_unimplemented(res);
         break;
diff --git a/target-s390x/ioinst.h b/target-s390x/ioinst.h
index 29f6423df4..1efe16c3ae 100644
--- a/target-s390x/ioinst.h
+++ b/target-s390x/ioinst.h
@@ -204,6 +204,7 @@ typedef struct CRW {
 
 #define CRW_RSC_SUBCH 0x3
 #define CRW_RSC_CHP   0x4
+#define CRW_RSC_CSS   0xb
 
 /* I/O interruption code */
 typedef struct IOIntCode {
diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c
index f3f8f2c2ca..dcd75055c1 100644
--- a/target-s390x/kvm.c
+++ b/target-s390x/kvm.c
@@ -40,6 +40,8 @@
 #include "exec/gdbstub.h"
 #include "trace.h"
 #include "qapi-event.h"
+#include "hw/s390x/s390-pci-inst.h"
+#include "hw/s390x/s390-pci-bus.h"
 
 /* #define DEBUG_KVM */
 
@@ -56,6 +58,7 @@
 #define IPA0_B2                         0xb200
 #define IPA0_B9                         0xb900
 #define IPA0_EB                         0xeb00
+#define IPA0_E3                         0xe300
 
 #define PRIV_B2_SCLP_CALL               0x20
 #define PRIV_B2_CSCH                    0x30
@@ -76,8 +79,17 @@
 #define PRIV_B2_XSCH                    0x76
 
 #define PRIV_EB_SQBS                    0x8a
+#define PRIV_EB_PCISTB                  0xd0
+#define PRIV_EB_SIC                     0xd1
 
 #define PRIV_B9_EQBS                    0x9c
+#define PRIV_B9_CLP                     0xa0
+#define PRIV_B9_PCISTG                  0xd0
+#define PRIV_B9_PCILG                   0xd2
+#define PRIV_B9_RPCIT                   0xd3
+
+#define PRIV_E3_MPCIFC                  0xd0
+#define PRIV_E3_STPCIFC                 0xd4
 
 #define DIAG_IPL                        0x308
 #define DIAG_KVM_HYPERCALL              0x500
@@ -202,6 +214,11 @@ void kvm_s390_reset_vcpu(S390CPU *cpu)
     }
 }
 
+static int can_sync_regs(CPUState *cs, int regs)
+{
+    return cap_sync_regs && (cs->kvm_run->kvm_valid_regs & regs) == regs;
+}
+
 int kvm_arch_put_registers(CPUState *cs, int level)
 {
     S390CPU *cpu = S390_CPU(cs);
@@ -216,7 +233,7 @@ int kvm_arch_put_registers(CPUState *cs, int level)
     cs->kvm_run->psw_addr = env->psw.addr;
     cs->kvm_run->psw_mask = env->psw.mask;
 
-    if (cap_sync_regs && cs->kvm_run->kvm_valid_regs & KVM_SYNC_GPRS) {
+    if (can_sync_regs(cs, KVM_SYNC_GPRS)) {
         for (i = 0; i < 16; i++) {
             cs->kvm_run->s.regs.gprs[i] = env->regs[i];
             cs->kvm_run->kvm_dirty_regs |= KVM_SYNC_GPRS;
@@ -247,18 +264,33 @@ int kvm_arch_put_registers(CPUState *cs, int level)
         return 0;
     }
 
-    /*
-     * These ONE_REGS are not protected by a capability. As they are only
-     * necessary for migration we just trace a possible error, but don't
-     * return with an error return code.
-     */
-    kvm_set_one_reg(cs, KVM_REG_S390_CPU_TIMER, &env->cputm);
-    kvm_set_one_reg(cs, KVM_REG_S390_CLOCK_COMP, &env->ckc);
-    kvm_set_one_reg(cs, KVM_REG_S390_TODPR, &env->todpr);
-    kvm_set_one_reg(cs, KVM_REG_S390_GBEA, &env->gbea);
-    kvm_set_one_reg(cs, KVM_REG_S390_PP, &env->pp);
+    if (can_sync_regs(cs, KVM_SYNC_ARCH0)) {
+        cs->kvm_run->s.regs.cputm = env->cputm;
+        cs->kvm_run->s.regs.ckc = env->ckc;
+        cs->kvm_run->s.regs.todpr = env->todpr;
+        cs->kvm_run->s.regs.gbea = env->gbea;
+        cs->kvm_run->s.regs.pp = env->pp;
+        cs->kvm_run->kvm_dirty_regs |= KVM_SYNC_ARCH0;
+    } else {
+        /*
+         * These ONE_REGS are not protected by a capability. As they are only
+         * necessary for migration we just trace a possible error, but don't
+         * return with an error return code.
+         */
+        kvm_set_one_reg(cs, KVM_REG_S390_CPU_TIMER, &env->cputm);
+        kvm_set_one_reg(cs, KVM_REG_S390_CLOCK_COMP, &env->ckc);
+        kvm_set_one_reg(cs, KVM_REG_S390_TODPR, &env->todpr);
+        kvm_set_one_reg(cs, KVM_REG_S390_GBEA, &env->gbea);
+        kvm_set_one_reg(cs, KVM_REG_S390_PP, &env->pp);
+    }
 
-    if (cap_async_pf) {
+    /* pfault parameters */
+    if (can_sync_regs(cs, KVM_SYNC_PFAULT)) {
+        cs->kvm_run->s.regs.pft = env->pfault_token;
+        cs->kvm_run->s.regs.pfs = env->pfault_select;
+        cs->kvm_run->s.regs.pfc = env->pfault_compare;
+        cs->kvm_run->kvm_dirty_regs |= KVM_SYNC_PFAULT;
+    } else if (cap_async_pf) {
         r = kvm_set_one_reg(cs, KVM_REG_S390_PFTOKEN, &env->pfault_token);
         if (r < 0) {
             return r;
@@ -273,9 +305,8 @@ int kvm_arch_put_registers(CPUState *cs, int level)
         }
     }
 
-    if (cap_sync_regs &&
-        cs->kvm_run->kvm_valid_regs & KVM_SYNC_ACRS &&
-        cs->kvm_run->kvm_valid_regs & KVM_SYNC_CRS) {
+    /* access registers and control registers*/
+    if (can_sync_regs(cs, KVM_SYNC_ACRS | KVM_SYNC_CRS)) {
         for (i = 0; i < 16; i++) {
             cs->kvm_run->s.regs.acrs[i] = env->aregs[i];
             cs->kvm_run->s.regs.crs[i] = env->cregs[i];
@@ -294,7 +325,7 @@ int kvm_arch_put_registers(CPUState *cs, int level)
     }
 
     /* Finally the prefix */
-    if (cap_sync_regs && cs->kvm_run->kvm_valid_regs & KVM_SYNC_PREFIX) {
+    if (can_sync_regs(cs, KVM_SYNC_PREFIX)) {
         cs->kvm_run->s.regs.prefix = env->psa;
         cs->kvm_run->kvm_dirty_regs |= KVM_SYNC_PREFIX;
     } else {
@@ -317,7 +348,7 @@ int kvm_arch_get_registers(CPUState *cs)
     env->psw.mask = cs->kvm_run->psw_mask;
 
     /* the GPRS */
-    if (cap_sync_regs && cs->kvm_run->kvm_valid_regs & KVM_SYNC_GPRS) {
+    if (can_sync_regs(cs, KVM_SYNC_GPRS)) {
         for (i = 0; i < 16; i++) {
             env->regs[i] = cs->kvm_run->s.regs.gprs[i];
         }
@@ -332,9 +363,7 @@ int kvm_arch_get_registers(CPUState *cs)
     }
 
     /* The ACRS and CRS */
-    if (cap_sync_regs &&
-        cs->kvm_run->kvm_valid_regs & KVM_SYNC_ACRS &&
-        cs->kvm_run->kvm_valid_regs & KVM_SYNC_CRS) {
+    if (can_sync_regs(cs, KVM_SYNC_ACRS | KVM_SYNC_CRS)) {
         for (i = 0; i < 16; i++) {
             env->aregs[i] = cs->kvm_run->s.regs.acrs[i];
             env->cregs[i] = cs->kvm_run->s.regs.crs[i];
@@ -361,22 +390,35 @@ int kvm_arch_get_registers(CPUState *cs)
     env->fpc = fpu.fpc;
 
     /* The prefix */
-    if (cap_sync_regs && cs->kvm_run->kvm_valid_regs & KVM_SYNC_PREFIX) {
+    if (can_sync_regs(cs, KVM_SYNC_PREFIX)) {
         env->psa = cs->kvm_run->s.regs.prefix;
     }
 
-    /*
-     * These ONE_REGS are not protected by a capability. As they are only
-     * necessary for migration we just trace a possible error, but don't
-     * return with an error return code.
-     */
-    kvm_get_one_reg(cs, KVM_REG_S390_CPU_TIMER, &env->cputm);
-    kvm_get_one_reg(cs, KVM_REG_S390_CLOCK_COMP, &env->ckc);
-    kvm_get_one_reg(cs, KVM_REG_S390_TODPR, &env->todpr);
-    kvm_get_one_reg(cs, KVM_REG_S390_GBEA, &env->gbea);
-    kvm_get_one_reg(cs, KVM_REG_S390_PP, &env->pp);
+    if (can_sync_regs(cs, KVM_SYNC_ARCH0)) {
+        env->cputm = cs->kvm_run->s.regs.cputm;
+        env->ckc = cs->kvm_run->s.regs.ckc;
+        env->todpr = cs->kvm_run->s.regs.todpr;
+        env->gbea = cs->kvm_run->s.regs.gbea;
+        env->pp = cs->kvm_run->s.regs.pp;
+    } else {
+        /*
+         * These ONE_REGS are not protected by a capability. As they are only
+         * necessary for migration we just trace a possible error, but don't
+         * return with an error return code.
+         */
+        kvm_get_one_reg(cs, KVM_REG_S390_CPU_TIMER, &env->cputm);
+        kvm_get_one_reg(cs, KVM_REG_S390_CLOCK_COMP, &env->ckc);
+        kvm_get_one_reg(cs, KVM_REG_S390_TODPR, &env->todpr);
+        kvm_get_one_reg(cs, KVM_REG_S390_GBEA, &env->gbea);
+        kvm_get_one_reg(cs, KVM_REG_S390_PP, &env->pp);
+    }
 
-    if (cap_async_pf) {
+    /* pfault parameters */
+    if (can_sync_regs(cs, KVM_SYNC_PFAULT)) {
+        env->pfault_token = cs->kvm_run->s.regs.pft;
+        env->pfault_select = cs->kvm_run->s.regs.pfs;
+        env->pfault_compare = cs->kvm_run->s.regs.pfc;
+    } else if (cap_async_pf) {
         r = kvm_get_one_reg(cs, KVM_REG_S390_PFTOKEN, &env->pfault_token);
         if (r < 0) {
             return r;
@@ -809,11 +851,124 @@ static int handle_b2(S390CPU *cpu, struct kvm_run *run, uint8_t ipa1)
     return rc;
 }
 
+static uint64_t get_base_disp_rxy(S390CPU *cpu, struct kvm_run *run)
+{
+    CPUS390XState *env = &cpu->env;
+    uint32_t x2 = (run->s390_sieic.ipa & 0x000f);
+    uint32_t base2 = run->s390_sieic.ipb >> 28;
+    uint32_t disp2 = ((run->s390_sieic.ipb & 0x0fff0000) >> 16) +
+                     ((run->s390_sieic.ipb & 0xff00) << 4);
+
+    if (disp2 & 0x80000) {
+        disp2 += 0xfff00000;
+    }
+
+    return (base2 ? env->regs[base2] : 0) +
+           (x2 ? env->regs[x2] : 0) + (long)(int)disp2;
+}
+
+static uint64_t get_base_disp_rsy(S390CPU *cpu, struct kvm_run *run)
+{
+    CPUS390XState *env = &cpu->env;
+    uint32_t base2 = run->s390_sieic.ipb >> 28;
+    uint32_t disp2 = ((run->s390_sieic.ipb & 0x0fff0000) >> 16) +
+                     ((run->s390_sieic.ipb & 0xff00) << 4);
+
+    if (disp2 & 0x80000) {
+        disp2 += 0xfff00000;
+    }
+
+    return (base2 ? env->regs[base2] : 0) + (long)(int)disp2;
+}
+
+static int kvm_clp_service_call(S390CPU *cpu, struct kvm_run *run)
+{
+    uint8_t r2 = (run->s390_sieic.ipb & 0x000f0000) >> 16;
+
+    return clp_service_call(cpu, r2);
+}
+
+static int kvm_pcilg_service_call(S390CPU *cpu, struct kvm_run *run)
+{
+    uint8_t r1 = (run->s390_sieic.ipb & 0x00f00000) >> 20;
+    uint8_t r2 = (run->s390_sieic.ipb & 0x000f0000) >> 16;
+
+    return pcilg_service_call(cpu, r1, r2);
+}
+
+static int kvm_pcistg_service_call(S390CPU *cpu, struct kvm_run *run)
+{
+    uint8_t r1 = (run->s390_sieic.ipb & 0x00f00000) >> 20;
+    uint8_t r2 = (run->s390_sieic.ipb & 0x000f0000) >> 16;
+
+    return pcistg_service_call(cpu, r1, r2);
+}
+
+static int kvm_stpcifc_service_call(S390CPU *cpu, struct kvm_run *run)
+{
+    uint8_t r1 = (run->s390_sieic.ipa & 0x00f0) >> 4;
+    uint64_t fiba;
+
+    cpu_synchronize_state(CPU(cpu));
+    fiba = get_base_disp_rxy(cpu, run);
+
+    return stpcifc_service_call(cpu, r1, fiba);
+}
+
+static int kvm_sic_service_call(S390CPU *cpu, struct kvm_run *run)
+{
+    /* NOOP */
+    return 0;
+}
+
+static int kvm_rpcit_service_call(S390CPU *cpu, struct kvm_run *run)
+{
+    uint8_t r1 = (run->s390_sieic.ipb & 0x00f00000) >> 20;
+    uint8_t r2 = (run->s390_sieic.ipb & 0x000f0000) >> 16;
+
+    return rpcit_service_call(cpu, r1, r2);
+}
+
+static int kvm_pcistb_service_call(S390CPU *cpu, struct kvm_run *run)
+{
+    uint8_t r1 = (run->s390_sieic.ipa & 0x00f0) >> 4;
+    uint8_t r3 = run->s390_sieic.ipa & 0x000f;
+    uint64_t gaddr;
+
+    cpu_synchronize_state(CPU(cpu));
+    gaddr = get_base_disp_rsy(cpu, run);
+
+    return pcistb_service_call(cpu, r1, r3, gaddr);
+}
+
+static int kvm_mpcifc_service_call(S390CPU *cpu, struct kvm_run *run)
+{
+    uint8_t r1 = (run->s390_sieic.ipa & 0x00f0) >> 4;
+    uint64_t fiba;
+
+    cpu_synchronize_state(CPU(cpu));
+    fiba = get_base_disp_rxy(cpu, run);
+
+    return mpcifc_service_call(cpu, r1, fiba);
+}
+
 static int handle_b9(S390CPU *cpu, struct kvm_run *run, uint8_t ipa1)
 {
     int r = 0;
 
     switch (ipa1) {
+    case PRIV_B9_CLP:
+        r = kvm_clp_service_call(cpu, run);
+        break;
+    case PRIV_B9_PCISTG:
+        r = kvm_pcistg_service_call(cpu, run);
+        break;
+    case PRIV_B9_PCILG:
+        r = kvm_pcilg_service_call(cpu, run);
+        break;
+    case PRIV_B9_RPCIT:
+        r = kvm_rpcit_service_call(cpu, run);
+        break;
     case PRIV_B9_EQBS:
         /* just inject exception */
         r = -1;
@@ -832,6 +987,12 @@ static int handle_eb(S390CPU *cpu, struct kvm_run *run, uint8_t ipbl)
     int r = 0;
 
     switch (ipbl) {
+    case PRIV_EB_PCISTB:
+        r = kvm_pcistb_service_call(cpu, run);
+        break;
+    case PRIV_EB_SIC:
+        r = kvm_sic_service_call(cpu, run);
+        break;
     case PRIV_EB_SQBS:
         /* just inject exception */
         r = -1;
@@ -845,6 +1006,26 @@ static int handle_eb(S390CPU *cpu, struct kvm_run *run, uint8_t ipbl)
     return r;
 }
 
+static int handle_e3(S390CPU *cpu, struct kvm_run *run, uint8_t ipbl)
+{
+    int r = 0;
+
+    switch (ipbl) {
+    case PRIV_E3_MPCIFC:
+        r = kvm_mpcifc_service_call(cpu, run);
+        break;
+    case PRIV_E3_STPCIFC:
+        r = kvm_stpcifc_service_call(cpu, run);
+        break;
+    default:
+        r = -1;
+        DPRINTF("KVM: unhandled PRIV: 0xe3%x\n", ipbl);
+        break;
+    }
+
+    return r;
+}
+
 static int handle_hypercall(S390CPU *cpu, struct kvm_run *run)
 {
     CPUS390XState *env = &cpu->env;
@@ -1041,6 +1222,9 @@ static int handle_instruction(S390CPU *cpu, struct kvm_run *run)
     case IPA0_EB:
         r = handle_eb(cpu, run, run->s390_sieic.ipb & 0xff);
         break;
+    case IPA0_E3:
+        r = handle_e3(cpu, run, run->s390_sieic.ipb & 0xff);
+        break;
     case IPA0_DIAG:
         r = handle_diag(cpu, run, run->s390_sieic.ipb);
         break;
@@ -1361,3 +1545,28 @@ int kvm_s390_set_cpu_state(S390CPU *cpu, uint8_t cpu_state)
 
     return ret;
 }
+
+int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
+                              uint64_t address, uint32_t data)
+{
+    S390PCIBusDevice *pbdev;
+    uint32_t fid = data >> ZPCI_MSI_VEC_BITS;
+    uint32_t vec = data & ZPCI_MSI_VEC_MASK;
+
+    pbdev = s390_pci_find_dev_by_fid(fid);
+    if (!pbdev) {
+        DPRINTF("add_msi_route no dev\n");
+        return -ENODEV;
+    }
+
+    pbdev->routes.adapter.ind_offset = vec;
+
+    route->type = KVM_IRQ_ROUTING_S390_ADAPTER;
+    route->flags = 0;
+    route->u.adapter.summary_addr = pbdev->routes.adapter.summary_addr;
+    route->u.adapter.ind_addr = pbdev->routes.adapter.ind_addr;
+    route->u.adapter.summary_offset = pbdev->routes.adapter.summary_offset;
+    route->u.adapter.ind_offset = pbdev->routes.adapter.ind_offset;
+    route->u.adapter.adapter_id = pbdev->routes.adapter.adapter_id;
+    return 0;
+}
diff --git a/tests/Makefile b/tests/Makefile
index e4ddb6a8c1..c2e2e52f22 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -298,7 +298,7 @@ tests/test-opts-visitor$(EXESUF): tests/test-opts-visitor.o $(test-qapi-obj-y) l
 tests/test-mul64$(EXESUF): tests/test-mul64.o libqemuutil.a
 tests/test-bitops$(EXESUF): tests/test-bitops.o libqemuutil.a
 
-libqos-obj-y = tests/libqos/pci.o tests/libqos/fw_cfg.o
+libqos-obj-y = tests/libqos/pci.o tests/libqos/fw_cfg.o tests/libqos/malloc.o
 libqos-obj-y += tests/libqos/i2c.o
 libqos-pc-obj-y = $(libqos-obj-y) tests/libqos/pci-pc.o
 libqos-pc-obj-y += tests/libqos/malloc-pc.o
diff --git a/tests/libqos/malloc-pc.c b/tests/libqos/malloc-pc.c
index f4218c6451..c9c48fddc9 100644
--- a/tests/libqos/malloc-pc.c
+++ b/tests/libqos/malloc-pc.c
@@ -17,296 +17,28 @@
 #include "hw/nvram/fw_cfg.h"
 
 #include "qemu-common.h"
-#include "qemu/queue.h"
 #include <glib.h>
 
 #define PAGE_SIZE (4096)
 
-#define MLIST_ENTNAME entries
-typedef QTAILQ_HEAD(MemList, MemBlock) MemList;
-typedef struct MemBlock {
-    QTAILQ_ENTRY(MemBlock) MLIST_ENTNAME;
-    uint64_t size;
-    uint64_t addr;
-} MemBlock;
-
-typedef struct PCAlloc
-{
-    QGuestAllocator alloc;
-    PCAllocOpts opts;
-    uint64_t start;
-    uint64_t end;
-
-    MemList used;
-    MemList free;
-} PCAlloc;
-
-static MemBlock *mlist_new(uint64_t addr, uint64_t size)
-{
-    MemBlock *block;
-
-    if (!size) {
-        return NULL;
-    }
-    block = g_malloc0(sizeof(MemBlock));
-
-    block->addr = addr;
-    block->size = size;
-
-    return block;
-}
-
-static void mlist_delete(MemList *list, MemBlock *node)
-{
-    g_assert(list && node);
-    QTAILQ_REMOVE(list, node, MLIST_ENTNAME);
-    g_free(node);
-}
-
-static MemBlock *mlist_find_key(MemList *head, uint64_t addr)
-{
-    MemBlock *node;
-    QTAILQ_FOREACH(node, head, MLIST_ENTNAME) {
-        if (node->addr == addr) {
-            return node;
-        }
-    }
-    return NULL;
-}
-
-static MemBlock *mlist_find_space(MemList *head, uint64_t size)
-{
-    MemBlock *node;
-
-    QTAILQ_FOREACH(node, head, MLIST_ENTNAME) {
-        if (node->size >= size) {
-            return node;
-        }
-    }
-    return NULL;
-}
-
-static MemBlock *mlist_sort_insert(MemList *head, MemBlock *insr)
-{
-    MemBlock *node;
-    g_assert(head && insr);
-
-    QTAILQ_FOREACH(node, head, MLIST_ENTNAME) {
-        if (insr->addr < node->addr) {
-            QTAILQ_INSERT_BEFORE(node, insr, MLIST_ENTNAME);
-            return insr;
-        }
-    }
-
-    QTAILQ_INSERT_TAIL(head, insr, MLIST_ENTNAME);
-    return insr;
-}
-
-static inline uint64_t mlist_boundary(MemBlock *node)
-{
-    return node->size + node->addr;
-}
-
-static MemBlock *mlist_join(MemList *head, MemBlock *left, MemBlock *right)
-{
-    g_assert(head && left && right);
-
-    left->size += right->size;
-    mlist_delete(head, right);
-    return left;
-}
-
-static void mlist_coalesce(MemList *head, MemBlock *node)
-{
-    g_assert(node);
-    MemBlock *left;
-    MemBlock *right;
-    char merge;
-
-    do {
-        merge = 0;
-        left = QTAILQ_PREV(node, MemList, MLIST_ENTNAME);
-        right = QTAILQ_NEXT(node, MLIST_ENTNAME);
-
-        /* clowns to the left of me */
-        if (left && mlist_boundary(left) == node->addr) {
-            node = mlist_join(head, left, node);
-            merge = 1;
-        }
-
-        /* jokers to the right */
-        if (right && mlist_boundary(node) == right->addr) {
-            node = mlist_join(head, node, right);
-            merge = 1;
-        }
-
-    } while (merge);
-}
-
-static uint64_t pc_mlist_fulfill(PCAlloc *s, MemBlock *freenode, uint64_t size)
-{
-    uint64_t addr;
-    MemBlock *usednode;
-
-    g_assert(freenode);
-    g_assert_cmpint(freenode->size, >=, size);
-
-    addr = freenode->addr;
-    if (freenode->size == size) {
-        /* re-use this freenode as our used node */
-        QTAILQ_REMOVE(&s->free, freenode, MLIST_ENTNAME);
-        usednode = freenode;
-    } else {
-        /* adjust the free node and create a new used node */
-        freenode->addr += size;
-        freenode->size -= size;
-        usednode = mlist_new(addr, size);
-    }
-
-    mlist_sort_insert(&s->used, usednode);
-    return addr;
-}
-
-/* To assert the correctness of the list.
- * Used only if PC_ALLOC_PARANOID is set. */
-static void pc_mlist_check(PCAlloc *s)
-{
-    MemBlock *node;
-    uint64_t addr = s->start > 0 ? s->start - 1 : 0;
-    uint64_t next = s->start;
-
-    QTAILQ_FOREACH(node, &s->free, MLIST_ENTNAME) {
-        g_assert_cmpint(node->addr, >, addr);
-        g_assert_cmpint(node->addr, >=, next);
-        addr = node->addr;
-        next = node->addr + node->size;
-    }
-
-    addr = s->start > 0 ? s->start - 1 : 0;
-    next = s->start;
-    QTAILQ_FOREACH(node, &s->used, MLIST_ENTNAME) {
-        g_assert_cmpint(node->addr, >, addr);
-        g_assert_cmpint(node->addr, >=, next);
-        addr = node->addr;
-        next = node->addr + node->size;
-    }
-}
-
-static uint64_t pc_mlist_alloc(PCAlloc *s, uint64_t size)
-{
-    MemBlock *node;
-
-    node = mlist_find_space(&s->free, size);
-    if (!node) {
-        fprintf(stderr, "Out of guest memory.\n");
-        g_assert_not_reached();
-    }
-    return pc_mlist_fulfill(s, node, size);
-}
-
-static void pc_mlist_free(PCAlloc *s, uint64_t addr)
-{
-    MemBlock *node;
-
-    if (addr == 0) {
-        return;
-    }
-
-    node = mlist_find_key(&s->used, addr);
-    if (!node) {
-        fprintf(stderr, "Error: no record found for an allocation at "
-                "0x%016" PRIx64 ".\n",
-                addr);
-        g_assert_not_reached();
-    }
-
-    /* Rip it out of the used list and re-insert back into the free list. */
-    QTAILQ_REMOVE(&s->used, node, MLIST_ENTNAME);
-    mlist_sort_insert(&s->free, node);
-    mlist_coalesce(&s->free, node);
-}
-
-static uint64_t pc_alloc(QGuestAllocator *allocator, size_t size)
-{
-    PCAlloc *s = container_of(allocator, PCAlloc, alloc);
-    uint64_t rsize = size;
-    uint64_t naddr;
-
-    rsize += (PAGE_SIZE - 1);
-    rsize &= -PAGE_SIZE;
-    g_assert_cmpint((s->start + rsize), <=, s->end);
-    g_assert_cmpint(rsize, >=, size);
-
-    naddr = pc_mlist_alloc(s, rsize);
-    if (s->opts & PC_ALLOC_PARANOID) {
-        pc_mlist_check(s);
-    }
-
-    return naddr;
-}
-
-static void pc_free(QGuestAllocator *allocator, uint64_t addr)
-{
-    PCAlloc *s = container_of(allocator, PCAlloc, alloc);
-
-    pc_mlist_free(s, addr);
-    if (s->opts & PC_ALLOC_PARANOID) {
-        pc_mlist_check(s);
-    }
-}
-
 /*
  * Mostly for valgrind happiness, but it does offer
  * a chokepoint for debugging guest memory leaks, too.
  */
 void pc_alloc_uninit(QGuestAllocator *allocator)
 {
-    PCAlloc *s = container_of(allocator, PCAlloc, alloc);
-    MemBlock *node;
-    MemBlock *tmp;
-    PCAllocOpts mask;
-
-    /* Check for guest leaks, and destroy the list. */
-    QTAILQ_FOREACH_SAFE(node, &s->used, MLIST_ENTNAME, tmp) {
-        if (s->opts & (PC_ALLOC_LEAK_WARN | PC_ALLOC_LEAK_ASSERT)) {
-            fprintf(stderr, "guest malloc leak @ 0x%016" PRIx64 "; "
-                    "size 0x%016" PRIx64 ".\n",
-                    node->addr, node->size);
-        }
-        if (s->opts & (PC_ALLOC_LEAK_ASSERT)) {
-            g_assert_not_reached();
-        }
-        g_free(node);
-    }
-
-    /* If we have previously asserted that there are no leaks, then there
-     * should be only one node here with a specific address and size. */
-    mask = PC_ALLOC_LEAK_ASSERT | PC_ALLOC_PARANOID;
-    QTAILQ_FOREACH_SAFE(node, &s->free, MLIST_ENTNAME, tmp) {
-        if ((s->opts & mask) == mask) {
-            if ((node->addr != s->start) ||
-                (node->size != s->end - s->start)) {
-                fprintf(stderr, "Free list is corrupted.\n");
-                g_assert_not_reached();
-            }
-        }
-
-        g_free(node);
-    }
-
-    g_free(s);
+    alloc_uninit(allocator);
 }
 
-QGuestAllocator *pc_alloc_init_flags(PCAllocOpts flags)
+QGuestAllocator *pc_alloc_init_flags(QAllocOpts flags)
 {
-    PCAlloc *s = g_malloc0(sizeof(*s));
+    QGuestAllocator *s = g_malloc0(sizeof(*s));
     uint64_t ram_size;
     QFWCFG *fw_cfg = pc_fw_cfg_init();
     MemBlock *node;
 
     s->opts = flags;
-    s->alloc.alloc = pc_alloc;
-    s->alloc.free = pc_free;
+    s->page_size = PAGE_SIZE;
 
     ram_size = qfw_cfg_get_u64(fw_cfg, FW_CFG_RAM_SIZE);
 
@@ -325,10 +57,10 @@ QGuestAllocator *pc_alloc_init_flags(PCAllocOpts flags)
     node = mlist_new(s->start, s->end - s->start);
     QTAILQ_INSERT_HEAD(&s->free, node, MLIST_ENTNAME);
 
-    return &s->alloc;
+    return s;
 }
 
 inline QGuestAllocator *pc_alloc_init(void)
 {
-    return pc_alloc_init_flags(PC_ALLOC_NO_FLAGS);
+    return pc_alloc_init_flags(ALLOC_NO_FLAGS);
 }
diff --git a/tests/libqos/malloc-pc.h b/tests/libqos/malloc-pc.h
index 9f525e3b99..86ab9f0429 100644
--- a/tests/libqos/malloc-pc.h
+++ b/tests/libqos/malloc-pc.h
@@ -15,15 +15,8 @@
 
 #include "libqos/malloc.h"
 
-typedef enum {
-    PC_ALLOC_NO_FLAGS    = 0x00,
-    PC_ALLOC_LEAK_WARN   = 0x01,
-    PC_ALLOC_LEAK_ASSERT = 0x02,
-    PC_ALLOC_PARANOID    = 0x04
-} PCAllocOpts;
-
 QGuestAllocator *pc_alloc_init(void);
-QGuestAllocator *pc_alloc_init_flags(PCAllocOpts flags);
-void             pc_alloc_uninit(QGuestAllocator *allocator);
+QGuestAllocator *pc_alloc_init_flags(QAllocOpts flags);
+void pc_alloc_uninit(QGuestAllocator *allocator);
 
 #endif
diff --git a/tests/libqos/malloc.c b/tests/libqos/malloc.c
new file mode 100644
index 0000000000..5debf18497
--- /dev/null
+++ b/tests/libqos/malloc.c
@@ -0,0 +1,270 @@
+/*
+ * libqos malloc support
+ *
+ * Copyright (c) 2014
+ *
+ * Author:
+ *  John Snow <jsnow@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "libqos/malloc.h"
+#include "qemu-common.h"
+#include <stdio.h>
+#include <inttypes.h>
+#include <glib.h>
+
+static void mlist_delete(MemList *list, MemBlock *node)
+{
+    g_assert(list && node);
+    QTAILQ_REMOVE(list, node, MLIST_ENTNAME);
+    g_free(node);
+}
+
+static MemBlock *mlist_find_key(MemList *head, uint64_t addr)
+{
+    MemBlock *node;
+    QTAILQ_FOREACH(node, head, MLIST_ENTNAME) {
+        if (node->addr == addr) {
+            return node;
+        }
+    }
+    return NULL;
+}
+
+static MemBlock *mlist_find_space(MemList *head, uint64_t size)
+{
+    MemBlock *node;
+
+    QTAILQ_FOREACH(node, head, MLIST_ENTNAME) {
+        if (node->size >= size) {
+            return node;
+        }
+    }
+    return NULL;
+}
+
+static MemBlock *mlist_sort_insert(MemList *head, MemBlock *insr)
+{
+    MemBlock *node;
+    g_assert(head && insr);
+
+    QTAILQ_FOREACH(node, head, MLIST_ENTNAME) {
+        if (insr->addr < node->addr) {
+            QTAILQ_INSERT_BEFORE(node, insr, MLIST_ENTNAME);
+            return insr;
+        }
+    }
+
+    QTAILQ_INSERT_TAIL(head, insr, MLIST_ENTNAME);
+    return insr;
+}
+
+static inline uint64_t mlist_boundary(MemBlock *node)
+{
+    return node->size + node->addr;
+}
+
+static MemBlock *mlist_join(MemList *head, MemBlock *left, MemBlock *right)
+{
+    g_assert(head && left && right);
+
+    left->size += right->size;
+    mlist_delete(head, right);
+    return left;
+}
+
+static void mlist_coalesce(MemList *head, MemBlock *node)
+{
+    g_assert(node);
+    MemBlock *left;
+    MemBlock *right;
+    char merge;
+
+    do {
+        merge = 0;
+        left = QTAILQ_PREV(node, MemList, MLIST_ENTNAME);
+        right = QTAILQ_NEXT(node, MLIST_ENTNAME);
+
+        /* clowns to the left of me */
+        if (left && mlist_boundary(left) == node->addr) {
+            node = mlist_join(head, left, node);
+            merge = 1;
+        }
+
+        /* jokers to the right */
+        if (right && mlist_boundary(node) == right->addr) {
+            node = mlist_join(head, node, right);
+            merge = 1;
+        }
+
+    } while (merge);
+}
+
+static uint64_t mlist_fulfill(QGuestAllocator *s, MemBlock *freenode,
+                                                                uint64_t size)
+{
+    uint64_t addr;
+    MemBlock *usednode;
+
+    g_assert(freenode);
+    g_assert_cmpint(freenode->size, >=, size);
+
+    addr = freenode->addr;
+    if (freenode->size == size) {
+        /* re-use this freenode as our used node */
+        QTAILQ_REMOVE(&s->free, freenode, MLIST_ENTNAME);
+        usednode = freenode;
+    } else {
+        /* adjust the free node and create a new used node */
+        freenode->addr += size;
+        freenode->size -= size;
+        usednode = mlist_new(addr, size);
+    }
+
+    mlist_sort_insert(&s->used, usednode);
+    return addr;
+}
+
+/* To assert the correctness of the list.
+ * Used only if ALLOC_PARANOID is set. */
+static void mlist_check(QGuestAllocator *s)
+{
+    MemBlock *node;
+    uint64_t addr = s->start > 0 ? s->start - 1 : 0;
+    uint64_t next = s->start;
+
+    QTAILQ_FOREACH(node, &s->free, MLIST_ENTNAME) {
+        g_assert_cmpint(node->addr, >, addr);
+        g_assert_cmpint(node->addr, >=, next);
+        addr = node->addr;
+        next = node->addr + node->size;
+    }
+
+    addr = s->start > 0 ? s->start - 1 : 0;
+    next = s->start;
+    QTAILQ_FOREACH(node, &s->used, MLIST_ENTNAME) {
+        g_assert_cmpint(node->addr, >, addr);
+        g_assert_cmpint(node->addr, >=, next);
+        addr = node->addr;
+        next = node->addr + node->size;
+    }
+}
+
+static uint64_t mlist_alloc(QGuestAllocator *s, uint64_t size)
+{
+    MemBlock *node;
+
+    node = mlist_find_space(&s->free, size);
+    if (!node) {
+        fprintf(stderr, "Out of guest memory.\n");
+        g_assert_not_reached();
+    }
+    return mlist_fulfill(s, node, size);
+}
+
+static void mlist_free(QGuestAllocator *s, uint64_t addr)
+{
+    MemBlock *node;
+
+    if (addr == 0) {
+        return;
+    }
+
+    node = mlist_find_key(&s->used, addr);
+    if (!node) {
+        fprintf(stderr, "Error: no record found for an allocation at "
+                "0x%016" PRIx64 ".\n",
+                addr);
+        g_assert_not_reached();
+    }
+
+    /* Rip it out of the used list and re-insert back into the free list. */
+    QTAILQ_REMOVE(&s->used, node, MLIST_ENTNAME);
+    mlist_sort_insert(&s->free, node);
+    mlist_coalesce(&s->free, node);
+}
+
+MemBlock *mlist_new(uint64_t addr, uint64_t size)
+{
+    MemBlock *block;
+
+    if (!size) {
+        return NULL;
+    }
+    block = g_malloc0(sizeof(MemBlock));
+
+    block->addr = addr;
+    block->size = size;
+
+    return block;
+}
+
+/*
+ * Mostly for valgrind happiness, but it does offer
+ * a chokepoint for debugging guest memory leaks, too.
+ */
+void alloc_uninit(QGuestAllocator *allocator)
+{
+    MemBlock *node;
+    MemBlock *tmp;
+    QAllocOpts mask;
+
+    /* Check for guest leaks, and destroy the list. */
+    QTAILQ_FOREACH_SAFE(node, &allocator->used, MLIST_ENTNAME, tmp) {
+        if (allocator->opts & (ALLOC_LEAK_WARN | ALLOC_LEAK_ASSERT)) {
+            fprintf(stderr, "guest malloc leak @ 0x%016" PRIx64 "; "
+                    "size 0x%016" PRIx64 ".\n",
+                    node->addr, node->size);
+        }
+        if (allocator->opts & (ALLOC_LEAK_ASSERT)) {
+            g_assert_not_reached();
+        }
+        g_free(node);
+    }
+
+    /* If we have previously asserted that there are no leaks, then there
+     * should be only one node here with a specific address and size. */
+    mask = ALLOC_LEAK_ASSERT | ALLOC_PARANOID;
+    QTAILQ_FOREACH_SAFE(node, &allocator->free, MLIST_ENTNAME, tmp) {
+        if ((allocator->opts & mask) == mask) {
+            if ((node->addr != allocator->start) ||
+                (node->size != allocator->end - allocator->start)) {
+                fprintf(stderr, "Free list is corrupted.\n");
+                g_assert_not_reached();
+            }
+        }
+
+        g_free(node);
+    }
+
+    g_free(allocator);
+}
+
+uint64_t guest_alloc(QGuestAllocator *allocator, size_t size)
+{
+    uint64_t rsize = size;
+    uint64_t naddr;
+
+    rsize += (allocator->page_size - 1);
+    rsize &= -allocator->page_size;
+    g_assert_cmpint((allocator->start + rsize), <=, allocator->end);
+    g_assert_cmpint(rsize, >=, size);
+
+    naddr = mlist_alloc(allocator, rsize);
+    if (allocator->opts & ALLOC_PARANOID) {
+        mlist_check(allocator);
+    }
+
+    return naddr;
+}
+
+void guest_free(QGuestAllocator *allocator, uint64_t addr)
+{
+    mlist_free(allocator, addr);
+    if (allocator->opts & ALLOC_PARANOID) {
+        mlist_check(allocator);
+    }
+}
diff --git a/tests/libqos/malloc.h b/tests/libqos/malloc.h
index 556538121e..465efeb8fb 100644
--- a/tests/libqos/malloc.h
+++ b/tests/libqos/malloc.h
@@ -15,24 +15,39 @@
 
 #include <stdint.h>
 #include <sys/types.h>
+#include "qemu/queue.h"
 
-typedef struct QGuestAllocator QGuestAllocator;
+#define MLIST_ENTNAME entries
 
-struct QGuestAllocator
-{
-    uint64_t (*alloc)(QGuestAllocator *allocator, size_t size);
-    void (*free)(QGuestAllocator *allocator, uint64_t addr);
-};
+typedef enum {
+    ALLOC_NO_FLAGS    = 0x00,
+    ALLOC_LEAK_WARN   = 0x01,
+    ALLOC_LEAK_ASSERT = 0x02,
+    ALLOC_PARANOID    = 0x04
+} QAllocOpts;
+
+typedef QTAILQ_HEAD(MemList, MemBlock) MemList;
+typedef struct MemBlock {
+    QTAILQ_ENTRY(MemBlock) MLIST_ENTNAME;
+    uint64_t size;
+    uint64_t addr;
+} MemBlock;
+
+typedef struct QGuestAllocator {
+    QAllocOpts opts;
+    uint64_t start;
+    uint64_t end;
+    uint32_t page_size;
+
+    MemList used;
+    MemList free;
+} QGuestAllocator;
+
+MemBlock *mlist_new(uint64_t addr, uint64_t size);
+void alloc_uninit(QGuestAllocator *allocator);
 
 /* Always returns page aligned values */
-static inline uint64_t guest_alloc(QGuestAllocator *allocator, size_t size)
-{
-    return allocator->alloc(allocator, size);
-}
-
-static inline void guest_free(QGuestAllocator *allocator, uint64_t addr)
-{
-    allocator->free(allocator, addr);
-}
+uint64_t guest_alloc(QGuestAllocator *allocator, size_t size);
+void guest_free(QGuestAllocator *allocator, uint64_t addr);
 
 #endif
diff --git a/tests/qemu-iotests-quick.sh b/tests/qemu-iotests-quick.sh
index 12af731c68..0e554bb972 100755
--- a/tests/qemu-iotests-quick.sh
+++ b/tests/qemu-iotests-quick.sh
@@ -3,6 +3,6 @@
 cd tests/qemu-iotests
 
 ret=0
-./check -T -qcow2 -g quick || ret=1
+TEST_DIR=${TEST_DIR:-/tmp/qemu-iotests-quick-$$} ./check -T -qcow2 -g quick || ret=1
 
 exit $ret
diff --git a/tests/qemu-iotests/055 b/tests/qemu-iotests/055
index 0872444811..e81d4d0d83 100755
--- a/tests/qemu-iotests/055
+++ b/tests/qemu-iotests/055
@@ -1,8 +1,8 @@
 #!/usr/bin/env python
 #
-# Tests for drive-backup
+# Tests for drive-backup and blockdev-backup
 #
-# Copyright (C) 2013 Red Hat, Inc.
+# Copyright (C) 2013, 2014 Red Hat, Inc.
 #
 # Based on 041.
 #
@@ -27,6 +27,7 @@ from iotests import qemu_img, qemu_io
 
 test_img = os.path.join(iotests.test_dir, 'test.img')
 target_img = os.path.join(iotests.test_dir, 'target.img')
+blockdev_target_img = os.path.join(iotests.test_dir, 'blockdev-target.img')
 
 class TestSingleDrive(iotests.QMPTestCase):
     image_len = 64 * 1024 * 1024 # MB
@@ -38,34 +39,41 @@ class TestSingleDrive(iotests.QMPTestCase):
         qemu_io('-f', iotests.imgfmt, '-c', 'write -P0xd5 1M 32k', test_img)
         qemu_io('-f', iotests.imgfmt, '-c', 'write -P0xdc 32M 124k', test_img)
         qemu_io('-f', iotests.imgfmt, '-c', 'write -P0xdc 67043328 64k', test_img)
+        qemu_img('create', '-f', iotests.imgfmt, blockdev_target_img, str(TestSingleDrive.image_len))
 
-        self.vm = iotests.VM().add_drive(test_img)
+        self.vm = iotests.VM().add_drive(test_img).add_drive(blockdev_target_img)
         self.vm.launch()
 
     def tearDown(self):
         self.vm.shutdown()
         os.remove(test_img)
+        os.remove(blockdev_target_img)
         try:
             os.remove(target_img)
         except OSError:
             pass
 
-    def test_cancel(self):
+    def do_test_cancel(self, cmd, target):
         self.assert_no_active_block_jobs()
 
-        result = self.vm.qmp('drive-backup', device='drive0',
-                             target=target_img, sync='full')
+        result = self.vm.qmp(cmd, device='drive0', target=target, sync='full')
         self.assert_qmp(result, 'return', {})
 
         event = self.cancel_and_wait()
         self.assert_qmp(event, 'data/type', 'backup')
 
-    def test_pause(self):
+    def test_cancel_drive_backup(self):
+        self.do_test_cancel('drive-backup', target_img)
+
+    def test_cancel_blockdev_backup(self):
+        self.do_test_cancel('blockdev-backup', 'drive1')
+
+    def do_test_pause(self, cmd, target, image):
         self.assert_no_active_block_jobs()
 
         self.vm.pause_drive('drive0')
-        result = self.vm.qmp('drive-backup', device='drive0',
-                             target=target_img, sync='full')
+        result = self.vm.qmp(cmd, device='drive0',
+                             target=target, sync='full')
         self.assert_qmp(result, 'return', {})
 
         result = self.vm.qmp('block-job-pause', device='drive0')
@@ -86,14 +94,25 @@ class TestSingleDrive(iotests.QMPTestCase):
         self.wait_until_completed()
 
         self.vm.shutdown()
-        self.assertTrue(iotests.compare_images(test_img, target_img),
+        self.assertTrue(iotests.compare_images(test_img, image),
                         'target image does not match source after backup')
 
+    def test_pause_drive_backup(self):
+        self.do_test_pause('drive-backup', target_img, target_img)
+
+    def test_pause_blockdev_backup(self):
+        self.do_test_pause('blockdev-backup', 'drive1', blockdev_target_img)
+
     def test_medium_not_found(self):
         result = self.vm.qmp('drive-backup', device='ide1-cd0',
                              target=target_img, sync='full')
         self.assert_qmp(result, 'error/class', 'GenericError')
 
+    def test_medium_not_found_blockdev_backup(self):
+        result = self.vm.qmp('blockdev-backup', device='ide1-cd0',
+                             target='drive1', sync='full')
+        self.assert_qmp(result, 'error/class', 'GenericError')
+
     def test_image_not_found(self):
         result = self.vm.qmp('drive-backup', device='drive0',
                              target=target_img, sync='full', mode='existing')
@@ -105,31 +124,53 @@ class TestSingleDrive(iotests.QMPTestCase):
                              format='spaghetti-noodles')
         self.assert_qmp(result, 'error/class', 'GenericError')
 
-    def test_device_not_found(self):
-        result = self.vm.qmp('drive-backup', device='nonexistent',
-                             target=target_img, sync='full')
+    def do_test_device_not_found(self, cmd, **args):
+        result = self.vm.qmp(cmd, **args)
         self.assert_qmp(result, 'error/class', 'DeviceNotFound')
 
+    def test_device_not_found(self):
+        self.do_test_device_not_found('drive-backup', device='nonexistent',
+                                      target=target_img, sync='full')
+
+        self.do_test_device_not_found('blockdev-backup', device='nonexistent',
+                                      target='drive0', sync='full')
+
+        self.do_test_device_not_found('blockdev-backup', device='drive0',
+                                      target='nonexistent', sync='full')
+
+        self.do_test_device_not_found('blockdev-backup', device='nonexistent',
+                                      target='nonexistent', sync='full')
+
+    def test_target_is_source(self):
+        result = self.vm.qmp('blockdev-backup', device='drive0',
+                             target='drive0', sync='full')
+        self.assert_qmp(result, 'error/class', 'GenericError')
+
 class TestSetSpeed(iotests.QMPTestCase):
     image_len = 80 * 1024 * 1024 # MB
 
     def setUp(self):
         qemu_img('create', '-f', iotests.imgfmt, test_img, str(TestSetSpeed.image_len))
         qemu_io('-f', iotests.imgfmt, '-c', 'write -P1 0 512', test_img)
-        self.vm = iotests.VM().add_drive(test_img)
+        qemu_img('create', '-f', iotests.imgfmt, blockdev_target_img, str(TestSingleDrive.image_len))
+
+        self.vm = iotests.VM().add_drive(test_img).add_drive(blockdev_target_img)
         self.vm.launch()
 
     def tearDown(self):
         self.vm.shutdown()
         os.remove(test_img)
-        os.remove(target_img)
+        os.remove(blockdev_target_img)
+        try:
+            os.remove(target_img)
+        except OSError:
+            pass
 
-    def test_set_speed(self):
+    def do_test_set_speed(self, cmd, target):
         self.assert_no_active_block_jobs()
 
         self.vm.pause_drive('drive0')
-        result = self.vm.qmp('drive-backup', device='drive0',
-                             target=target_img, sync='full')
+        result = self.vm.qmp(cmd, device='drive0', target=target, sync='full')
         self.assert_qmp(result, 'return', {})
 
         # Default speed is 0
@@ -148,10 +189,10 @@ class TestSetSpeed(iotests.QMPTestCase):
         event = self.cancel_and_wait(resume=True)
         self.assert_qmp(event, 'data/type', 'backup')
 
-        # Check setting speed in drive-backup works
+        # Check setting speed option works
         self.vm.pause_drive('drive0')
-        result = self.vm.qmp('drive-backup', device='drive0',
-                             target=target_img, sync='full', speed=4*1024*1024)
+        result = self.vm.qmp(cmd, device='drive0',
+                             target=target, sync='full', speed=4*1024*1024)
         self.assert_qmp(result, 'return', {})
 
         result = self.vm.qmp('query-block-jobs')
@@ -161,18 +202,24 @@ class TestSetSpeed(iotests.QMPTestCase):
         event = self.cancel_and_wait(resume=True)
         self.assert_qmp(event, 'data/type', 'backup')
 
-    def test_set_speed_invalid(self):
+    def test_set_speed_drive_backup(self):
+        self.do_test_set_speed('drive-backup', target_img)
+
+    def test_set_speed_blockdev_backup(self):
+        self.do_test_set_speed('blockdev-backup', 'drive1')
+
+    def do_test_set_speed_invalid(self, cmd, target):
         self.assert_no_active_block_jobs()
 
-        result = self.vm.qmp('drive-backup', device='drive0',
-                             target=target_img, sync='full', speed=-1)
+        result = self.vm.qmp(cmd, device='drive0',
+                             target=target, sync='full', speed=-1)
         self.assert_qmp(result, 'error/class', 'GenericError')
 
         self.assert_no_active_block_jobs()
 
         self.vm.pause_drive('drive0')
-        result = self.vm.qmp('drive-backup', device='drive0',
-                             target=target_img, sync='full')
+        result = self.vm.qmp(cmd, device='drive0',
+                             target=target, sync='full')
         self.assert_qmp(result, 'return', {})
 
         result = self.vm.qmp('block-job-set-speed', device='drive0', speed=-1)
@@ -181,6 +228,12 @@ class TestSetSpeed(iotests.QMPTestCase):
         event = self.cancel_and_wait(resume=True)
         self.assert_qmp(event, 'data/type', 'backup')
 
+    def test_set_speed_invalid_drive_backup(self):
+        self.do_test_set_speed_invalid('drive-backup', target_img)
+
+    def test_set_speed_invalid_blockdev_backup(self):
+        self.do_test_set_speed_invalid('blockdev-backup',  'drive1')
+
 class TestSingleTransaction(iotests.QMPTestCase):
     image_len = 64 * 1024 * 1024 # MB
 
@@ -190,41 +243,50 @@ class TestSingleTransaction(iotests.QMPTestCase):
         qemu_io('-f', iotests.imgfmt, '-c', 'write -P0xd5 1M 32k', test_img)
         qemu_io('-f', iotests.imgfmt, '-c', 'write -P0xdc 32M 124k', test_img)
         qemu_io('-f', iotests.imgfmt, '-c', 'write -P0xdc 67043328 64k', test_img)
+        qemu_img('create', '-f', iotests.imgfmt, blockdev_target_img, str(TestSingleDrive.image_len))
 
-        self.vm = iotests.VM().add_drive(test_img)
+        self.vm = iotests.VM().add_drive(test_img).add_drive(blockdev_target_img)
         self.vm.launch()
 
     def tearDown(self):
         self.vm.shutdown()
         os.remove(test_img)
+        os.remove(blockdev_target_img)
         try:
             os.remove(target_img)
         except OSError:
             pass
 
-    def test_cancel(self):
+    def do_test_cancel(self, cmd, target):
         self.assert_no_active_block_jobs()
 
         result = self.vm.qmp('transaction', actions=[{
-                'type': 'drive-backup',
+                'type': cmd,
                 'data': { 'device': 'drive0',
-                          'target': target_img,
+                          'target': target,
                           'sync': 'full' },
             }
         ])
+
         self.assert_qmp(result, 'return', {})
 
         event = self.cancel_and_wait()
         self.assert_qmp(event, 'data/type', 'backup')
 
-    def test_pause(self):
+    def test_cancel_drive_backup(self):
+        self.do_test_cancel('drive-backup', target_img)
+
+    def test_cancel_blockdev_backup(self):
+        self.do_test_cancel('blockdev-backup', 'drive1')
+
+    def do_test_pause(self, cmd, target, image):
         self.assert_no_active_block_jobs()
 
         self.vm.pause_drive('drive0')
         result = self.vm.qmp('transaction', actions=[{
-                'type': 'drive-backup',
+                'type': cmd,
                 'data': { 'device': 'drive0',
-                          'target': target_img,
+                          'target': target,
                           'sync': 'full' },
             }
         ])
@@ -248,19 +310,31 @@ class TestSingleTransaction(iotests.QMPTestCase):
         self.wait_until_completed()
 
         self.vm.shutdown()
-        self.assertTrue(iotests.compare_images(test_img, target_img),
+        self.assertTrue(iotests.compare_images(test_img, image),
                         'target image does not match source after backup')
 
-    def test_medium_not_found(self):
+    def test_pause_drive_backup(self):
+        self.do_test_pause('drive-backup', target_img, target_img)
+
+    def test_pause_blockdev_backup(self):
+        self.do_test_pause('blockdev-backup', 'drive1', blockdev_target_img)
+
+    def do_test_medium_not_found(self, cmd, target):
         result = self.vm.qmp('transaction', actions=[{
-                'type': 'drive-backup',
+                'type': cmd,
                 'data': { 'device': 'ide1-cd0',
-                          'target': target_img,
+                          'target': target,
                           'sync': 'full' },
             }
         ])
         self.assert_qmp(result, 'error/class', 'GenericError')
 
+    def test_medium_not_found_drive_backup(self):
+        self.do_test_medium_not_found('drive-backup', target_img)
+
+    def test_medium_not_found_blockdev_backup(self):
+        self.do_test_medium_not_found('blockdev-backup', 'drive1')
+
     def test_image_not_found(self):
         result = self.vm.qmp('transaction', actions=[{
                 'type': 'drive-backup',
@@ -283,6 +357,43 @@ class TestSingleTransaction(iotests.QMPTestCase):
         ])
         self.assert_qmp(result, 'error/class', 'DeviceNotFound')
 
+        result = self.vm.qmp('transaction', actions=[{
+                'type': 'blockdev-backup',
+                'data': { 'device': 'nonexistent',
+                          'target': 'drive1',
+                          'sync': 'full' },
+            }
+        ])
+        self.assert_qmp(result, 'error/class', 'DeviceNotFound')
+
+        result = self.vm.qmp('transaction', actions=[{
+                'type': 'blockdev-backup',
+                'data': { 'device': 'drive0',
+                          'target': 'nonexistent',
+                          'sync': 'full' },
+            }
+        ])
+        self.assert_qmp(result, 'error/class', 'DeviceNotFound')
+
+        result = self.vm.qmp('transaction', actions=[{
+                'type': 'blockdev-backup',
+                'data': { 'device': 'nonexistent',
+                          'target': 'nonexistent',
+                          'sync': 'full' },
+            }
+        ])
+        self.assert_qmp(result, 'error/class', 'DeviceNotFound')
+
+    def test_target_is_source(self):
+        result = self.vm.qmp('transaction', actions=[{
+                'type': 'blockdev-backup',
+                'data': { 'device': 'drive0',
+                          'target': 'drive0',
+                          'sync': 'full' },
+            }
+        ])
+        self.assert_qmp(result, 'error/class', 'GenericError')
+
     def test_abort(self):
         result = self.vm.qmp('transaction', actions=[{
                 'type': 'drive-backup',
@@ -298,5 +409,31 @@ class TestSingleTransaction(iotests.QMPTestCase):
         self.assert_qmp(result, 'error/class', 'GenericError')
         self.assert_no_active_block_jobs()
 
+        result = self.vm.qmp('transaction', actions=[{
+                'type': 'blockdev-backup',
+                'data': { 'device': 'nonexistent',
+                          'target': 'drive1',
+                          'sync': 'full' },
+            }, {
+                'type': 'Abort',
+                'data': {},
+            }
+        ])
+        self.assert_qmp(result, 'error/class', 'GenericError')
+        self.assert_no_active_block_jobs()
+
+        result = self.vm.qmp('transaction', actions=[{
+                'type': 'blockdev-backup',
+                'data': { 'device': 'drive0',
+                          'target': 'nonexistent',
+                          'sync': 'full' },
+            }, {
+                'type': 'Abort',
+                'data': {},
+            }
+        ])
+        self.assert_qmp(result, 'error/class', 'GenericError')
+        self.assert_no_active_block_jobs()
+
 if __name__ == '__main__':
     iotests.main(supported_fmts=['raw', 'qcow2'])
diff --git a/tests/qemu-iotests/055.out b/tests/qemu-iotests/055.out
index 6323079e08..42314e9c00 100644
--- a/tests/qemu-iotests/055.out
+++ b/tests/qemu-iotests/055.out
@@ -1,5 +1,5 @@
-..............
+........................
 ----------------------------------------------------------------------
-Ran 14 tests
+Ran 24 tests
 
 OK
diff --git a/tests/qemu-iotests/058 b/tests/qemu-iotests/058
index 2d5ca85ddc..a60b34b46c 100755
--- a/tests/qemu-iotests/058
+++ b/tests/qemu-iotests/058
@@ -87,6 +87,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15
 
 _supported_fmt qcow2
 _supported_proto file
+_supported_os Linux
 _require_command QEMU_NBD
 
 # Use -f raw instead of -f $IMGFMT for the NBD connection
diff --git a/tests/qemu-iotests/067 b/tests/qemu-iotests/067
index 29cd6b5aff..0508c696a8 100755
--- a/tests/qemu-iotests/067
+++ b/tests/qemu-iotests/067
@@ -45,7 +45,8 @@ function do_run_qemu()
 
 function run_qemu()
 {
-    do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_qmp | sed -e 's/\("actual-size":\s*\)[0-9]\+/\1SIZE/g'
+    do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_qmp | _filter_qemu \
+                          | sed -e 's/\("actual-size":\s*\)[0-9]\+/\1SIZE/g'
 }
 
 size=128M
diff --git a/tests/qemu-iotests/071 b/tests/qemu-iotests/071
index 5d61ef6d81..9eaa49b419 100755
--- a/tests/qemu-iotests/071
+++ b/tests/qemu-iotests/071
@@ -51,7 +51,7 @@ function do_run_qemu()
 
 function run_qemu()
 {
-    do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_qmp | _filter_qemu_io
+    do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_qemu | _filter_qmp | _filter_qemu_io
 }
 
 IMG_SIZE=64M
diff --git a/tests/qemu-iotests/071.out b/tests/qemu-iotests/071.out
index 46484ff69c..9205ce2512 100644
--- a/tests/qemu-iotests/071.out
+++ b/tests/qemu-iotests/071.out
@@ -52,8 +52,8 @@ read failed: Input/output error
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN"}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "DEVICE_TRAY_MOVED", "data": {"device": "ide1-cd0", "tray-open": true}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "DEVICE_TRAY_MOVED", "data": {"device": "floppy0", "tray-open": true}}
-qemu-system-x86_64: Failed to flush the L2 table cache: Input/output error
-qemu-system-x86_64: Failed to flush the refcount block cache: Input/output error
+QEMU_PROG: Failed to flush the L2 table cache: Input/output error
+QEMU_PROG: Failed to flush the refcount block cache: Input/output error
 
 
 === Testing blkverify on existing block device ===
@@ -92,7 +92,7 @@ read failed: Input/output error
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN"}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "DEVICE_TRAY_MOVED", "data": {"device": "ide1-cd0", "tray-open": true}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "DEVICE_TRAY_MOVED", "data": {"device": "floppy0", "tray-open": true}}
-qemu-system-x86_64: Failed to flush the L2 table cache: Input/output error
-qemu-system-x86_64: Failed to flush the refcount block cache: Input/output error
+QEMU_PROG: Failed to flush the L2 table cache: Input/output error
+QEMU_PROG: Failed to flush the refcount block cache: Input/output error
 
 *** done
diff --git a/tests/qemu-iotests/081 b/tests/qemu-iotests/081
index 9ab93ff89e..d9b042cfc7 100755
--- a/tests/qemu-iotests/081
+++ b/tests/qemu-iotests/081
@@ -53,7 +53,7 @@ function do_run_qemu()
 
 function run_qemu()
 {
-    do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_qmp | _filter_qemu_io
+    do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_qemu | _filter_qmp | _filter_qemu_io
 }
 
 test_quorum=$($QEMU_IMG --help|grep quorum)
diff --git a/tests/qemu-iotests/087 b/tests/qemu-iotests/087
index d7454d13da..8694749947 100755
--- a/tests/qemu-iotests/087
+++ b/tests/qemu-iotests/087
@@ -45,7 +45,8 @@ function do_run_qemu()
 
 function run_qemu()
 {
-    do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_qmp | sed -e 's/\("actual-size":\s*\)[0-9]\+/\1SIZE/g'
+    do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_qmp | _filter_qemu \
+                          | sed -e 's/\("actual-size":\s*\)[0-9]\+/\1SIZE/g'
 }
 
 size=128M
diff --git a/tests/qemu-iotests/087.out b/tests/qemu-iotests/087.out
index 7d38cc26c5..91f4ea1a8b 100644
--- a/tests/qemu-iotests/087.out
+++ b/tests/qemu-iotests/087.out
@@ -21,7 +21,6 @@ QMP_VERSION
 {"return": {}}
 {"error": {"class": "GenericError", "desc": "Device with id 'disk' already exists"}}
 {"error": {"class": "GenericError", "desc": "Device name 'test-node' conflicts with an existing node name"}}
-main-loop: WARNING: I/O thread spun for 1000 iterations
 {"error": {"class": "GenericError", "desc": "could not open disk image disk2: node-name=disk is conflicting with a device id"}}
 {"error": {"class": "GenericError", "desc": "could not open disk image disk2: Duplicate node name"}}
 {"error": {"class": "GenericError", "desc": "could not open disk image disk3: node-name=disk3 is conflicting with a device id"}}
diff --git a/tests/qemu-iotests/099 b/tests/qemu-iotests/099
index 948afff28b..80f3d9aaf3 100755
--- a/tests/qemu-iotests/099
+++ b/tests/qemu-iotests/099
@@ -57,7 +57,7 @@ function run_qemu()
     # Get the "file": "foo" entry ($foo may only contain escaped double quotes,
     # which is how we can extract it)
     do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_imgfmt | _filter_qmp \
-        | grep "drv0" \
+        | _filter_qemu | grep "drv0" \
         | sed -e 's/^.*"file": "\(\(\\"\|[^"]\)*\)".*$/\1/' -e 's/\\"/"/g'
 }
 
diff --git a/tests/qemu-iotests/110 b/tests/qemu-iotests/110
new file mode 100755
index 0000000000..a687f9567d
--- /dev/null
+++ b/tests/qemu-iotests/110
@@ -0,0 +1,94 @@
+#!/bin/bash
+#
+# Test case for relative backing file names in complex BDS trees
+#
+# Copyright (C) 2014 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+# creator
+owner=mreitz@redhat.com
+
+seq="$(basename $0)"
+echo "QA output created by $seq"
+
+here="$PWD"
+tmp=/tmp/$$
+status=1	# failure is the default!
+
+_cleanup()
+{
+	_cleanup_test_img
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+
+# Any format supporting backing files
+_supported_fmt qed qcow qcow2 vmdk
+_supported_proto file
+_supported_os Linux
+_unsupported_imgopts "subformat=monolithicFlat" "subformat=twoGbMaxExtentFlat"
+
+TEST_IMG_REL=$(basename "$TEST_IMG")
+
+echo
+echo '=== Reconstructable filename ==='
+echo
+
+TEST_IMG="$TEST_IMG.base" _make_test_img 64M
+_make_test_img -b "$TEST_IMG_REL.base" 64M
+# qemu should be able to reconstruct the filename, so relative backing names
+# should work
+TEST_IMG="json:{'driver':'$IMGFMT','file':{'driver':'file','filename':'$TEST_IMG'}}" \
+    _img_info | _filter_img_info
+
+echo
+echo '=== Non-reconstructable filename ==='
+echo
+
+# Across blkdebug without a config file, you cannot reconstruct filenames, so
+# qemu is incapable of knowing the directory of the top image
+TEST_IMG="json:{
+    'driver': '$IMGFMT',
+    'file': {
+        'driver': 'blkdebug',
+        'image': {
+            'driver': 'file',
+            'filename': '$TEST_IMG'
+        },
+        'set-state': [
+            {
+                'event': 'read_aio',
+                'new_state': 42
+            }
+        ]
+    }
+}" _img_info | _filter_img_info
+
+echo
+echo '=== Backing name is always relative to the backed image ==='
+echo
+
+# omit the image size; it should work anyway
+_make_test_img -b "$TEST_IMG_REL.base"
+
+
+# success, all done
+echo '*** done'
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/110.out b/tests/qemu-iotests/110.out
new file mode 100644
index 0000000000..152bacf41e
--- /dev/null
+++ b/tests/qemu-iotests/110.out
@@ -0,0 +1,19 @@
+QA output created by 110
+
+=== Reconstructable filename ===
+
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 backing_file='t.IMGFMT.base'
+image: TEST_DIR/t.IMGFMT
+file format: IMGFMT
+virtual size: 64M (67108864 bytes)
+backing file: t.IMGFMT.base (actual path: TEST_DIR/t.IMGFMT.base)
+
+=== Non-reconstructable filename ===
+
+qemu-img: Cannot use relative backing file names for 'json:{"driver": "IMGFMT", "file": {"set-state.0.event": "read_aio", "image": {"driver": "file", "filename": "TEST_DIR/t.IMGFMT"}, "driver": "blkdebug", "set-state.0.new_state": 42}}'
+
+=== Backing name is always relative to the backed image ===
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 backing_file='t.IMGFMT.base'
+*** done
diff --git a/tests/qemu-iotests/check b/tests/qemu-iotests/check
index 8ca40116d7..baeae80f96 100755
--- a/tests/qemu-iotests/check
+++ b/tests/qemu-iotests/check
@@ -238,6 +238,7 @@ QEMU_NBD      -- $QEMU_NBD
 IMGFMT        -- $FULL_IMGFMT_DETAILS
 IMGPROTO      -- $FULL_IMGPROTO_DETAILS
 PLATFORM      -- $FULL_HOST_DETAILS
+TEST_DIR      -- $TEST_DIR
 SOCKET_SCM_HELPER -- $SOCKET_SCM_HELPER
 
 EOF
diff --git a/tests/qemu-iotests/common.config b/tests/qemu-iotests/common.config
index 91a5ef696b..a1973ad9d0 100644
--- a/tests/qemu-iotests/common.config
+++ b/tests/qemu-iotests/common.config
@@ -155,4 +155,4 @@ _readlink()
 }
 
 # make sure this script returns success
-/bin/true
+true
diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter
index 6c14590594..b73c70be95 100644
--- a/tests/qemu-iotests/common.filter
+++ b/tests/qemu-iotests/common.filter
@@ -159,6 +159,7 @@ _filter_qemu()
 {
     sed -e "s#\\(^\\|(qemu) \\)$(basename $QEMU_PROG):#\1QEMU_PROG:#" \
         -e 's#^QEMU [0-9]\+\.[0-9]\+\.[0-9]\+ monitor#QEMU X.Y.Z monitor#' \
+        -e '/main-loop: WARNING: I\/O thread spun for [0-9]\+ iterations/d' \
         -e $'s#\r##' # QEMU monitor uses \r\n line endings
 }
 
@@ -223,4 +224,4 @@ _filter_qemu_img_map()
 }
 
 # make sure this script returns success
-/bin/true
+true
diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc
index 3b14053790..aa093d9d84 100644
--- a/tests/qemu-iotests/common.rc
+++ b/tests/qemu-iotests/common.rc
@@ -490,4 +490,4 @@ _die()
 }
 
 # make sure this script returns success
-/bin/true
+true
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index a4742c6d01..f8bf354156 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -97,7 +97,7 @@
 088 rw auto quick
 089 rw auto quick
 090 rw auto quick
-091 rw auto quick
+091 rw auto
 092 rw auto quick
 095 rw auto quick
 097 rw auto backing
@@ -112,6 +112,7 @@
 107 rw auto quick
 108 rw auto quick
 109 rw auto
+110 rw auto backing quick
 111 rw auto quick
 113 rw auto quick
 114 rw auto quick
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index f57f1548ac..87002e0e2c 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -282,12 +282,15 @@ def notrun(reason):
     print '%s not run: %s' % (seq, reason)
     sys.exit(0)
 
-def main(supported_fmts=[]):
+def main(supported_fmts=[], supported_oses=['linux']):
     '''Run tests'''
 
     if supported_fmts and (imgfmt not in supported_fmts):
         notrun('not suitable for this image format: %s' % imgfmt)
 
+    if sys.platform not in supported_oses:
+        notrun('not suitable for this OS: %s' % sys.platform)
+
     # We need to filter out the time taken from the output so that qemu-iotest
     # can reliably diff the results against master output.
     import StringIO
diff --git a/tests/test-coroutine.c b/tests/test-coroutine.c
index e22fae170a..27d1b6f8e8 100644
--- a/tests/test-coroutine.c
+++ b/tests/test-coroutine.c
@@ -337,7 +337,7 @@ static void perf_cost(void)
                    "%luns per coroutine",
                    maxcycles,
                    duration, ops,
-                   (unsigned long)(1000000000 * duration) / maxcycles);
+                   (unsigned long)(1000000000.0 * duration / maxcycles));
 }
 
 int main(int argc, char **argv)
diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c
index d05a6497e1..41cb23df0c 100644
--- a/util/qemu-thread-posix.c
+++ b/util/qemu-thread-posix.c
@@ -26,6 +26,7 @@
 #endif
 #include "qemu/thread.h"
 #include "qemu/atomic.h"
+#include "qemu/notify.h"
 
 static bool name_threads;
 
@@ -401,6 +402,42 @@ void qemu_event_wait(QemuEvent *ev)
     }
 }
 
+static pthread_key_t exit_key;
+
+union NotifierThreadData {
+    void *ptr;
+    NotifierList list;
+};
+QEMU_BUILD_BUG_ON(sizeof(union NotifierThreadData) != sizeof(void *));
+
+void qemu_thread_atexit_add(Notifier *notifier)
+{
+    union NotifierThreadData ntd;
+    ntd.ptr = pthread_getspecific(exit_key);
+    notifier_list_add(&ntd.list, notifier);
+    pthread_setspecific(exit_key, ntd.ptr);
+}
+
+void qemu_thread_atexit_remove(Notifier *notifier)
+{
+    union NotifierThreadData ntd;
+    ntd.ptr = pthread_getspecific(exit_key);
+    notifier_remove(notifier);
+    pthread_setspecific(exit_key, ntd.ptr);
+}
+
+static void qemu_thread_atexit_run(void *arg)
+{
+    union NotifierThreadData ntd = { .ptr = arg };
+    notifier_list_notify(&ntd.list, NULL);
+}
+
+static void __attribute__((constructor)) qemu_thread_atexit_init(void)
+{
+    pthread_key_create(&exit_key, qemu_thread_atexit_run);
+}
+
+
 /* Attempt to set the threads name; note that this is for debug, so
  * we're not going to fail if we can't set it.
  */
diff --git a/util/qemu-thread-win32.c b/util/qemu-thread-win32.c
index c405c9bef6..406b52f91d 100644
--- a/util/qemu-thread-win32.c
+++ b/util/qemu-thread-win32.c
@@ -12,6 +12,7 @@
  */
 #include "qemu-common.h"
 #include "qemu/thread.h"
+#include "qemu/notify.h"
 #include <process.h>
 #include <assert.h>
 #include <limits.h>
@@ -268,6 +269,7 @@ struct QemuThreadData {
     void             *(*start_routine)(void *);
     void             *arg;
     short             mode;
+    NotifierList      exit;
 
     /* Only used for joinable threads. */
     bool              exited;
@@ -275,18 +277,40 @@ struct QemuThreadData {
     CRITICAL_SECTION  cs;
 };
 
+static bool atexit_registered;
+static NotifierList main_thread_exit;
+
 static __thread QemuThreadData *qemu_thread_data;
 
+static void run_main_thread_exit(void)
+{
+    notifier_list_notify(&main_thread_exit, NULL);
+}
+
+void qemu_thread_atexit_add(Notifier *notifier)
+{
+    if (!qemu_thread_data) {
+        if (!atexit_registered) {
+            atexit_registered = true;
+            atexit(run_main_thread_exit);
+        }
+        notifier_list_add(&main_thread_exit, notifier);
+    } else {
+        notifier_list_add(&qemu_thread_data->exit, notifier);
+    }
+}
+
+void qemu_thread_atexit_remove(Notifier *notifier)
+{
+    notifier_remove(notifier);
+}
+
 static unsigned __stdcall win32_start_routine(void *arg)
 {
     QemuThreadData *data = (QemuThreadData *) arg;
     void *(*start_routine)(void *) = data->start_routine;
     void *thread_arg = data->arg;
 
-    if (data->mode == QEMU_THREAD_DETACHED) {
-        g_free(data);
-        data = NULL;
-    }
     qemu_thread_data = data;
     qemu_thread_exit(start_routine(thread_arg));
     abort();
@@ -296,12 +320,14 @@ void qemu_thread_exit(void *arg)
 {
     QemuThreadData *data = qemu_thread_data;
 
-    if (data) {
-        assert(data->mode != QEMU_THREAD_DETACHED);
+    notifier_list_notify(&data->exit, NULL);
+    if (data->mode == QEMU_THREAD_JOINABLE) {
         data->ret = arg;
         EnterCriticalSection(&data->cs);
         data->exited = true;
         LeaveCriticalSection(&data->cs);
+    } else {
+        g_free(data);
     }
     _endthreadex(0);
 }
@@ -313,9 +339,10 @@ void *qemu_thread_join(QemuThread *thread)
     HANDLE handle;
 
     data = thread->data;
-    if (!data) {
+    if (data->mode == QEMU_THREAD_DETACHED) {
         return NULL;
     }
+
     /*
      * Because multiple copies of the QemuThread can exist via
      * qemu_thread_get_self, we need to store a value that cannot
@@ -329,7 +356,6 @@ void *qemu_thread_join(QemuThread *thread)
         CloseHandle(handle);
     }
     ret = data->ret;
-    assert(data->mode != QEMU_THREAD_DETACHED);
     DeleteCriticalSection(&data->cs);
     g_free(data);
     return ret;
@@ -347,6 +373,7 @@ void qemu_thread_create(QemuThread *thread, const char *name,
     data->arg = arg;
     data->mode = mode;
     data->exited = false;
+    notifier_list_init(&data->exit);
 
     if (data->mode != QEMU_THREAD_DETACHED) {
         InitializeCriticalSection(&data->cs);
@@ -358,7 +385,7 @@ void qemu_thread_create(QemuThread *thread, const char *name,
         error_exit(GetLastError(), __func__);
     }
     CloseHandle(hThread);
-    thread->data = (mode == QEMU_THREAD_DETACHED) ? NULL : data;
+    thread->data = data;
 }
 
 void qemu_thread_get_self(QemuThread *thread)
@@ -373,11 +400,10 @@ HANDLE qemu_thread_get_handle(QemuThread *thread)
     HANDLE handle;
 
     data = thread->data;
-    if (!data) {
+    if (data->mode == QEMU_THREAD_DETACHED) {
         return NULL;
     }
 
-    assert(data->mode != QEMU_THREAD_DETACHED);
     EnterCriticalSection(&data->cs);
     if (!data->exited) {
         handle = OpenThread(SYNCHRONIZE | THREAD_SUSPEND_RESUME, FALSE,
diff --git a/xen-hvm.c b/xen-hvm.c
index 754879481e..e2e575b099 100644
--- a/xen-hvm.c
+++ b/xen-hvm.c
@@ -90,6 +90,12 @@ static inline ioreq_t *xen_vcpu_ioreq(shared_iopage_t *shared_page, int vcpu)
 #endif
 
 #define BUFFER_IO_MAX_DELAY  100
+/* Leave some slack so that hvmloader does not complain about lack of
+ * memory at boot time ("Could not allocate order=0 extent").
+ * Once hvmloader is modified to cope with that situation without
+ * printing warning messages, QEMU_SPARE_PAGES can be removed.
+ */
+#define QEMU_SPARE_PAGES 16
 
 typedef struct XenPhysmap {
     hwaddr start_addr;
@@ -244,6 +250,8 @@ void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size, MemoryRegion *mr)
     unsigned long nr_pfn;
     xen_pfn_t *pfn_list;
     int i;
+    xc_domaininfo_t info;
+    unsigned long free_pages;
 
     if (runstate_check(RUN_STATE_INMIGRATE)) {
         /* RAM already populated in Xen */
@@ -266,6 +274,22 @@ void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size, MemoryRegion *mr)
         pfn_list[i] = (ram_addr >> TARGET_PAGE_BITS) + i;
     }
 
+    if ((xc_domain_getinfolist(xen_xc, xen_domid, 1, &info) != 1) ||
+        (info.domain != xen_domid)) {
+        hw_error("xc_domain_getinfolist failed");
+    }
+    free_pages = info.max_pages - info.tot_pages;
+    if (free_pages > QEMU_SPARE_PAGES) {
+        free_pages -= QEMU_SPARE_PAGES;
+    } else {
+        free_pages = 0;
+    }
+    if ((free_pages < nr_pfn) &&
+        (xc_domain_setmaxmem(xen_xc, xen_domid,
+                             ((info.max_pages + nr_pfn - free_pages)
+                              << (XC_PAGE_SHIFT - 10))) < 0)) {
+        hw_error("xc_domain_setmaxmem failed");
+    }
     if (xc_domain_populate_physmap_exact(xen_xc, xen_domid, nr_pfn, 0, 0, pfn_list)) {
         hw_error("xen: failed to populate ram at " RAM_ADDR_FMT, ram_addr);
     }